1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printVMOVModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printVMOVModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printVMOVModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printVMOVModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printVMOVModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printVMOVModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printVMOVModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printVMOVModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printVMOVModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlignment() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlignment() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlignment() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlignment() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlignment() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlignment() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlignment() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlignment() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlignment() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlignment() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; 483 484// Types for vector shift by immediates. The "SHX" version is for long and 485// narrow operations where the source and destination vectors have different 486// types. The "SHINS" version is for shift and insert operations. 487def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 488 SDTCisVT<2, i32>]>; 489def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 490 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 491 492def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 493 494def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 495def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 496def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 497 498def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 499def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 500def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 501def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 502def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 503def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 504 505def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 506def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 508 509def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 510def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 511 512def NEONvbsp : SDNode<"ARMISD::VBSP", 513 SDTypeProfile<1, 3, [SDTCisVec<0>, 514 SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, 516 SDTCisSameAs<0, 3>]>>; 517 518def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 519 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 520def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 521 522def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 523 SDTCisSameAs<0, 2>, 524 SDTCisSameAs<0, 3>]>; 525def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 526def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 527def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 528 529def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 530 SDTCisVT<2, v8i8>]>; 531def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 532 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 533def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 534def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 535 536 537//===----------------------------------------------------------------------===// 538// NEON load / store instructions 539//===----------------------------------------------------------------------===// 540 541// Use VLDM to load a Q register as a D register pair. 542// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 543def VLDMQIA 544 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 545 IIC_fpLoad_m, "", 546 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 547 548// Use VSTM to store a Q register as a D register pair. 549// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 550def VSTMQIA 551 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 552 IIC_fpStore_m, "", 553 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 554 555// Classes for VLD* pseudo-instructions with multi-register operands. 556// These are expanded to real instructions after register allocation. 557class VLDQPseudo<InstrItinClass itin> 558 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 559class VLDQWBPseudo<InstrItinClass itin> 560 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 561 (ins addrmode6:$addr, am6offset:$offset), itin, 562 "$addr.addr = $wb">; 563class VLDQWBfixedPseudo<InstrItinClass itin> 564 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 565 (ins addrmode6:$addr), itin, 566 "$addr.addr = $wb">; 567class VLDQWBregisterPseudo<InstrItinClass itin> 568 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 569 (ins addrmode6:$addr, rGPR:$offset), itin, 570 "$addr.addr = $wb">; 571 572class VLDQQPseudo<InstrItinClass itin> 573 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 574class VLDQQWBPseudo<InstrItinClass itin> 575 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 576 (ins addrmode6:$addr, am6offset:$offset), itin, 577 "$addr.addr = $wb">; 578class VLDQQWBfixedPseudo<InstrItinClass itin> 579 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 580 (ins addrmode6:$addr), itin, 581 "$addr.addr = $wb">; 582class VLDQQWBregisterPseudo<InstrItinClass itin> 583 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 584 (ins addrmode6:$addr, rGPR:$offset), itin, 585 "$addr.addr = $wb">; 586 587 588class VLDQQQQPseudo<InstrItinClass itin> 589 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 590 "$src = $dst">; 591class VLDQQQQWBPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 594 "$addr.addr = $wb, $src = $dst">; 595 596let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 597 598// VLD1 : Vector Load (multiple single elements) 599class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 600 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 601 (ins AddrMode:$Rn), IIC_VLD1, 602 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 603 let Rm = 0b1111; 604 let Inst{4} = Rn{4}; 605 let DecoderMethod = "DecodeVLDST1Instruction"; 606} 607class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 608 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 609 (ins AddrMode:$Rn), IIC_VLD1x2, 610 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 611 let Rm = 0b1111; 612 let Inst{5-4} = Rn{5-4}; 613 let DecoderMethod = "DecodeVLDST1Instruction"; 614} 615 616def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 617def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 618def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 619def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 620 621def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 622def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 623def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 624def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 625 626// ...with address register writeback: 627multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 628 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 629 (ins AddrMode:$Rn), IIC_VLD1u, 630 "vld1", Dt, "$Vd, $Rn!", 631 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 632 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 633 let Inst{4} = Rn{4}; 634 let DecoderMethod = "DecodeVLDST1Instruction"; 635 } 636 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 637 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 638 "vld1", Dt, "$Vd, $Rn, $Rm", 639 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 640 let Inst{4} = Rn{4}; 641 let DecoderMethod = "DecodeVLDST1Instruction"; 642 } 643} 644multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 645 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 646 (ins AddrMode:$Rn), IIC_VLD1x2u, 647 "vld1", Dt, "$Vd, $Rn!", 648 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 650 let Inst{5-4} = Rn{5-4}; 651 let DecoderMethod = "DecodeVLDST1Instruction"; 652 } 653 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 654 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 655 "vld1", Dt, "$Vd, $Rn, $Rm", 656 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 657 let Inst{5-4} = Rn{5-4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660} 661 662defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 663defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 664defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 665defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 666defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 667defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 668defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 669defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 670 671// ...with 3 registers 672class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 673 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 674 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 675 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 676 let Rm = 0b1111; 677 let Inst{4} = Rn{4}; 678 let DecoderMethod = "DecodeVLDST1Instruction"; 679} 680multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 681 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 682 (ins AddrMode:$Rn), IIC_VLD1x2u, 683 "vld1", Dt, "$Vd, $Rn!", 684 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 685 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 686 let Inst{4} = Rn{4}; 687 let DecoderMethod = "DecodeVLDST1Instruction"; 688 } 689 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 690 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 691 "vld1", Dt, "$Vd, $Rn, $Rm", 692 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 693 let Inst{4} = Rn{4}; 694 let DecoderMethod = "DecodeVLDST1Instruction"; 695 } 696} 697 698def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 699def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 700def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 701def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 702 703defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 704defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 705defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 706defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 707 708def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 709def VLD1d8TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 710def VLD1d8TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 711def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 712def VLD1d16TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 713def VLD1d16TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 714def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 715def VLD1d32TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 716def VLD1d32TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 717def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 718def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 719def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 720 721def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 722def VLD1q8HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 723def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 724def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 725def VLD1q16HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 726def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 727def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 728def VLD1q32HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 729def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 730def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 731def VLD1q64HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 732def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 733 734// ...with 4 registers 735class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 736 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 737 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 738 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 739 let Rm = 0b1111; 740 let Inst{5-4} = Rn{5-4}; 741 let DecoderMethod = "DecodeVLDST1Instruction"; 742} 743multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 744 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 745 (ins AddrMode:$Rn), IIC_VLD1x2u, 746 "vld1", Dt, "$Vd, $Rn!", 747 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 748 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 749 let Inst{5-4} = Rn{5-4}; 750 let DecoderMethod = "DecodeVLDST1Instruction"; 751 } 752 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 753 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 754 "vld1", Dt, "$Vd, $Rn, $Rm", 755 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 756 let Inst{5-4} = Rn{5-4}; 757 let DecoderMethod = "DecodeVLDST1Instruction"; 758 } 759} 760 761def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 762def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 763def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 764def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 765 766defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 767defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 768defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 769defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 770 771def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 772def VLD1d8QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 773def VLD1d8QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 774def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 775def VLD1d16QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 776def VLD1d16QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 777def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 778def VLD1d32QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 779def VLD1d32QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 780def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 781def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 782def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 783 784def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 785def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 786def VLD1q8HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 787def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 788def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 789def VLD1q16HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 790def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 791def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 792def VLD1q32HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 793def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 794def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 795def VLD1q64HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 796 797// VLD2 : Vector Load (multiple 2-element structures) 798class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 799 InstrItinClass itin, Operand AddrMode> 800 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 801 (ins AddrMode:$Rn), itin, 802 "vld2", Dt, "$Vd, $Rn", "", []> { 803 let Rm = 0b1111; 804 let Inst{5-4} = Rn{5-4}; 805 let DecoderMethod = "DecodeVLDST2Instruction"; 806} 807 808def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 809 addrmode6align64or128>, Sched<[WriteVLD2]>; 810def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 811 addrmode6align64or128>, Sched<[WriteVLD2]>; 812def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 813 addrmode6align64or128>, Sched<[WriteVLD2]>; 814 815def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 816 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 817def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 818 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 819def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 820 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 821 822def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 823def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 824def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 825 826// ...with address register writeback: 827multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 828 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 829 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 830 (ins AddrMode:$Rn), itin, 831 "vld2", Dt, "$Vd, $Rn!", 832 "$Rn.addr = $wb", []> { 833 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 834 let Inst{5-4} = Rn{5-4}; 835 let DecoderMethod = "DecodeVLDST2Instruction"; 836 } 837 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 838 (ins AddrMode:$Rn, rGPR:$Rm), itin, 839 "vld2", Dt, "$Vd, $Rn, $Rm", 840 "$Rn.addr = $wb", []> { 841 let Inst{5-4} = Rn{5-4}; 842 let DecoderMethod = "DecodeVLDST2Instruction"; 843 } 844} 845 846defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 847 addrmode6align64or128>, Sched<[WriteVLD2]>; 848defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 849 addrmode6align64or128>, Sched<[WriteVLD2]>; 850defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 851 addrmode6align64or128>, Sched<[WriteVLD2]>; 852 853defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 854 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 855defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 856 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 857defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 858 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 859 860def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 861def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 862def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 863def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 864def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 865def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 866 867// ...with double-spaced registers 868def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 869 addrmode6align64or128>, Sched<[WriteVLD2]>; 870def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 871 addrmode6align64or128>, Sched<[WriteVLD2]>; 872def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 873 addrmode6align64or128>, Sched<[WriteVLD2]>; 874defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 875 addrmode6align64or128>, Sched<[WriteVLD2]>; 876defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 877 addrmode6align64or128>, Sched<[WriteVLD2]>; 878defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 879 addrmode6align64or128>, Sched<[WriteVLD2]>; 880 881// VLD3 : Vector Load (multiple 3-element structures) 882class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 883 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 884 (ins addrmode6:$Rn), IIC_VLD3, 885 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 886 let Rm = 0b1111; 887 let Inst{4} = Rn{4}; 888 let DecoderMethod = "DecodeVLDST3Instruction"; 889} 890 891def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 892def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 893def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 894 895def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 896def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 897def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 898 899// ...with address register writeback: 900class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 901 : NLdSt<0, 0b10, op11_8, op7_4, 902 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 903 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 904 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 905 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 906 let Inst{4} = Rn{4}; 907 let DecoderMethod = "DecodeVLDST3Instruction"; 908} 909 910def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 911def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 912def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 913 914def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 915def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 916def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 917 918// ...with double-spaced registers: 919def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 920def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 921def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 922def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 923def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 924def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 925 926def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 927def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 928def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 929 930// ...alternate versions to be allocated odd register numbers: 931def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 932def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 933def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 934 935def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 936def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 937def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 938 939// VLD4 : Vector Load (multiple 4-element structures) 940class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 941 : NLdSt<0, 0b10, op11_8, op7_4, 942 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 943 (ins addrmode6:$Rn), IIC_VLD4, 944 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 945 Sched<[WriteVLD4]> { 946 let Rm = 0b1111; 947 let Inst{5-4} = Rn{5-4}; 948 let DecoderMethod = "DecodeVLDST4Instruction"; 949} 950 951def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 952def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 953def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 954 955def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 956def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 957def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 958 959// ...with address register writeback: 960class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 961 : NLdSt<0, 0b10, op11_8, op7_4, 962 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 963 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 964 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 965 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 966 let Inst{5-4} = Rn{5-4}; 967 let DecoderMethod = "DecodeVLDST4Instruction"; 968} 969 970def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 971def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 972def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 973 974def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 975def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 976def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 977 978// ...with double-spaced registers: 979def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 980def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 981def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 982def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 983def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 984def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 985 986def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 987def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 988def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 989 990// ...alternate versions to be allocated odd register numbers: 991def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 992def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 993def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 994 995def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 996def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 997def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 998 999} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1000 1001// Classes for VLD*LN pseudo-instructions with multi-register operands. 1002// These are expanded to real instructions after register allocation. 1003class VLDQLNPseudo<InstrItinClass itin> 1004 : PseudoNLdSt<(outs QPR:$dst), 1005 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1006 itin, "$src = $dst">; 1007class VLDQLNWBPseudo<InstrItinClass itin> 1008 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1009 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1010 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1011class VLDQQLNPseudo<InstrItinClass itin> 1012 : PseudoNLdSt<(outs QQPR:$dst), 1013 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1014 itin, "$src = $dst">; 1015class VLDQQLNWBPseudo<InstrItinClass itin> 1016 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1017 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1018 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1019class VLDQQQQLNPseudo<InstrItinClass itin> 1020 : PseudoNLdSt<(outs QQQQPR:$dst), 1021 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1022 itin, "$src = $dst">; 1023class VLDQQQQLNWBPseudo<InstrItinClass itin> 1024 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1025 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1026 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1027 1028// VLD1LN : Vector Load (single element to one lane) 1029class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1030 PatFrag LoadOp> 1031 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1032 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1033 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1034 "$src = $Vd", 1035 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1036 (i32 (LoadOp addrmode6:$Rn)), 1037 imm:$lane))]> { 1038 let Rm = 0b1111; 1039 let DecoderMethod = "DecodeVLD1LN"; 1040} 1041class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1042 PatFrag LoadOp> 1043 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1044 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1045 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1046 "$src = $Vd", 1047 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1048 (i32 (LoadOp addrmode6oneL32:$Rn)), 1049 imm:$lane))]>, Sched<[WriteVLD1]> { 1050 let Rm = 0b1111; 1051 let DecoderMethod = "DecodeVLD1LN"; 1052} 1053class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1054 Sched<[WriteVLD1]> { 1055 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1056 (i32 (LoadOp addrmode6:$addr)), 1057 imm:$lane))]; 1058} 1059 1060def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1061 let Inst{7-5} = lane{2-0}; 1062} 1063def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1064 let Inst{7-6} = lane{1-0}; 1065 let Inst{5-4} = Rn{5-4}; 1066} 1067def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1068 let Inst{7} = lane{0}; 1069 let Inst{5-4} = Rn{5-4}; 1070} 1071 1072def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1073def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1074def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1075 1076let Predicates = [HasNEON] in { 1077def : Pat<(vector_insert (v4f16 DPR:$src), 1078 (f16 (load addrmode6:$addr)), imm:$lane), 1079 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1080def : Pat<(vector_insert (v8f16 QPR:$src), 1081 (f16 (load addrmode6:$addr)), imm:$lane), 1082 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1083def : Pat<(vector_insert (v4bf16 DPR:$src), 1084 (bf16 (load addrmode6:$addr)), imm:$lane), 1085 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1086def : Pat<(vector_insert (v8bf16 QPR:$src), 1087 (bf16 (load addrmode6:$addr)), imm:$lane), 1088 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1089def : Pat<(vector_insert (v2f32 DPR:$src), 1090 (f32 (load addrmode6:$addr)), imm:$lane), 1091 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1092def : Pat<(vector_insert (v4f32 QPR:$src), 1093 (f32 (load addrmode6:$addr)), imm:$lane), 1094 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1095 1096// A 64-bit subvector insert to the first 128-bit vector position 1097// is a subregister copy that needs no instruction. 1098def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1099 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1100def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1101 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1102def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1103 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1104def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1105 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1106def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1107 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1108def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1109 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1110} 1111 1112 1113let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1114 1115// ...with address register writeback: 1116class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1117 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1118 (ins addrmode6:$Rn, am6offset:$Rm, 1119 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1120 "\\{$Vd[$lane]\\}, $Rn$Rm", 1121 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1122 let DecoderMethod = "DecodeVLD1LN"; 1123} 1124 1125def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1126 let Inst{7-5} = lane{2-0}; 1127} 1128def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1129 let Inst{7-6} = lane{1-0}; 1130 let Inst{4} = Rn{4}; 1131} 1132def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1133 let Inst{7} = lane{0}; 1134 let Inst{5} = Rn{4}; 1135 let Inst{4} = Rn{4}; 1136} 1137 1138def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1139def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1140def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1141 1142// VLD2LN : Vector Load (single 2-element structure to one lane) 1143class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1144 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1145 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1146 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1147 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1148 let Rm = 0b1111; 1149 let Inst{4} = Rn{4}; 1150 let DecoderMethod = "DecodeVLD2LN"; 1151} 1152 1153def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1154 let Inst{7-5} = lane{2-0}; 1155} 1156def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1157 let Inst{7-6} = lane{1-0}; 1158} 1159def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1160 let Inst{7} = lane{0}; 1161} 1162 1163def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1164def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1165def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1166 1167// ...with double-spaced registers: 1168def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1169 let Inst{7-6} = lane{1-0}; 1170} 1171def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1172 let Inst{7} = lane{0}; 1173} 1174 1175def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1176def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1177 1178// ...with address register writeback: 1179class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1180 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1181 (ins addrmode6:$Rn, am6offset:$Rm, 1182 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1183 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1184 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1185 let Inst{4} = Rn{4}; 1186 let DecoderMethod = "DecodeVLD2LN"; 1187} 1188 1189def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1190 let Inst{7-5} = lane{2-0}; 1191} 1192def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1193 let Inst{7-6} = lane{1-0}; 1194} 1195def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1196 let Inst{7} = lane{0}; 1197} 1198 1199def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1200def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1201def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1202 1203def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1204 let Inst{7-6} = lane{1-0}; 1205} 1206def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1207 let Inst{7} = lane{0}; 1208} 1209 1210def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1211def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1212 1213// VLD3LN : Vector Load (single 3-element structure to one lane) 1214class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1215 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1216 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1217 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1218 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1219 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1220 let Rm = 0b1111; 1221 let DecoderMethod = "DecodeVLD3LN"; 1222} 1223 1224def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1225 let Inst{7-5} = lane{2-0}; 1226} 1227def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1228 let Inst{7-6} = lane{1-0}; 1229} 1230def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1231 let Inst{7} = lane{0}; 1232} 1233 1234def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1235def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1236def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1237 1238// ...with double-spaced registers: 1239def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1240 let Inst{7-6} = lane{1-0}; 1241} 1242def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1243 let Inst{7} = lane{0}; 1244} 1245 1246def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1247def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1248 1249// ...with address register writeback: 1250class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1251 : NLdStLn<1, 0b10, op11_8, op7_4, 1252 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1253 (ins addrmode6:$Rn, am6offset:$Rm, 1254 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1255 IIC_VLD3lnu, "vld3", Dt, 1256 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1257 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1258 []>, Sched<[WriteVLD2]> { 1259 let DecoderMethod = "DecodeVLD3LN"; 1260} 1261 1262def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1263 let Inst{7-5} = lane{2-0}; 1264} 1265def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1266 let Inst{7-6} = lane{1-0}; 1267} 1268def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1269 let Inst{7} = lane{0}; 1270} 1271 1272def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1273def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1274def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1275 1276def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1277 let Inst{7-6} = lane{1-0}; 1278} 1279def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1280 let Inst{7} = lane{0}; 1281} 1282 1283def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1284def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1285 1286// VLD4LN : Vector Load (single 4-element structure to one lane) 1287class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1288 : NLdStLn<1, 0b10, op11_8, op7_4, 1289 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1290 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1291 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1292 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1293 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1294 Sched<[WriteVLD2]> { 1295 let Rm = 0b1111; 1296 let Inst{4} = Rn{4}; 1297 let DecoderMethod = "DecodeVLD4LN"; 1298} 1299 1300def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1301 let Inst{7-5} = lane{2-0}; 1302} 1303def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1304 let Inst{7-6} = lane{1-0}; 1305} 1306def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1307 let Inst{7} = lane{0}; 1308 let Inst{5} = Rn{5}; 1309} 1310 1311def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1312def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1313def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1314 1315// ...with double-spaced registers: 1316def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1317 let Inst{7-6} = lane{1-0}; 1318} 1319def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1320 let Inst{7} = lane{0}; 1321 let Inst{5} = Rn{5}; 1322} 1323 1324def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1325def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1326 1327// ...with address register writeback: 1328class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1329 : NLdStLn<1, 0b10, op11_8, op7_4, 1330 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1331 (ins addrmode6:$Rn, am6offset:$Rm, 1332 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1333 IIC_VLD4lnu, "vld4", Dt, 1334"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1335"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1336 []> { 1337 let Inst{4} = Rn{4}; 1338 let DecoderMethod = "DecodeVLD4LN" ; 1339} 1340 1341def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1342 let Inst{7-5} = lane{2-0}; 1343} 1344def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1345 let Inst{7-6} = lane{1-0}; 1346} 1347def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1348 let Inst{7} = lane{0}; 1349 let Inst{5} = Rn{5}; 1350} 1351 1352def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1353def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1354def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1355 1356def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1357 let Inst{7-6} = lane{1-0}; 1358} 1359def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1360 let Inst{7} = lane{0}; 1361 let Inst{5} = Rn{5}; 1362} 1363 1364def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1365def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1366 1367} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1368 1369// VLD1DUP : Vector Load (single element to all lanes) 1370class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1371 Operand AddrMode> 1372 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1373 (ins AddrMode:$Rn), 1374 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1375 [(set VecListOneDAllLanes:$Vd, 1376 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1377 Sched<[WriteVLD2]> { 1378 let Rm = 0b1111; 1379 let Inst{4} = Rn{4}; 1380 let DecoderMethod = "DecodeVLD1DupInstruction"; 1381} 1382def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1383 addrmode6dupalignNone>; 1384def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1385 addrmode6dupalign16>; 1386def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1387 addrmode6dupalign32>; 1388 1389let Predicates = [HasNEON] in { 1390def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1391 (VLD1DUPd32 addrmode6:$addr)>; 1392} 1393 1394class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1395 Operand AddrMode> 1396 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1397 (ins AddrMode:$Rn), IIC_VLD1dup, 1398 "vld1", Dt, "$Vd, $Rn", "", 1399 [(set VecListDPairAllLanes:$Vd, 1400 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1401 let Rm = 0b1111; 1402 let Inst{4} = Rn{4}; 1403 let DecoderMethod = "DecodeVLD1DupInstruction"; 1404} 1405 1406def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1407 addrmode6dupalignNone>; 1408def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1409 addrmode6dupalign16>; 1410def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1411 addrmode6dupalign32>; 1412 1413let Predicates = [HasNEON] in { 1414def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1415 (VLD1DUPq32 addrmode6:$addr)>; 1416} 1417 1418let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1419// ...with address register writeback: 1420multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1421 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1422 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1423 (ins AddrMode:$Rn), IIC_VLD1dupu, 1424 "vld1", Dt, "$Vd, $Rn!", 1425 "$Rn.addr = $wb", []> { 1426 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1427 let Inst{4} = Rn{4}; 1428 let DecoderMethod = "DecodeVLD1DupInstruction"; 1429 } 1430 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1431 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1432 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1433 "vld1", Dt, "$Vd, $Rn, $Rm", 1434 "$Rn.addr = $wb", []> { 1435 let Inst{4} = Rn{4}; 1436 let DecoderMethod = "DecodeVLD1DupInstruction"; 1437 } 1438} 1439multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1440 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1441 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1442 (ins AddrMode:$Rn), IIC_VLD1dupu, 1443 "vld1", Dt, "$Vd, $Rn!", 1444 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1445 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1446 let Inst{4} = Rn{4}; 1447 let DecoderMethod = "DecodeVLD1DupInstruction"; 1448 } 1449 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1450 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1451 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1452 "vld1", Dt, "$Vd, $Rn, $Rm", 1453 "$Rn.addr = $wb", []> { 1454 let Inst{4} = Rn{4}; 1455 let DecoderMethod = "DecodeVLD1DupInstruction"; 1456 } 1457} 1458 1459defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1460defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1461defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1462 1463defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1464defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1465defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1466 1467// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1468class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1469 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1470 (ins AddrMode:$Rn), IIC_VLD2dup, 1471 "vld2", Dt, "$Vd, $Rn", "", []> { 1472 let Rm = 0b1111; 1473 let Inst{4} = Rn{4}; 1474 let DecoderMethod = "DecodeVLD2DupInstruction"; 1475} 1476 1477def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1478 addrmode6dupalign16>; 1479def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1480 addrmode6dupalign32>; 1481def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1482 addrmode6dupalign64>; 1483 1484// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1485// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1486// ...with double-spaced registers 1487def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1488 addrmode6dupalign16>; 1489def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1490 addrmode6dupalign32>; 1491def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1492 addrmode6dupalign64>; 1493 1494def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1495def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1496def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1497def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1498def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1499def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1500 1501// ...with address register writeback: 1502multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1503 Operand AddrMode> { 1504 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1505 (outs VdTy:$Vd, GPR:$wb), 1506 (ins AddrMode:$Rn), IIC_VLD2dupu, 1507 "vld2", Dt, "$Vd, $Rn!", 1508 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1509 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1510 let Inst{4} = Rn{4}; 1511 let DecoderMethod = "DecodeVLD2DupInstruction"; 1512 } 1513 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1514 (outs VdTy:$Vd, GPR:$wb), 1515 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1516 "vld2", Dt, "$Vd, $Rn, $Rm", 1517 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1518 let Inst{4} = Rn{4}; 1519 let DecoderMethod = "DecodeVLD2DupInstruction"; 1520 } 1521} 1522 1523defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1524 addrmode6dupalign16>; 1525defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1526 addrmode6dupalign32>; 1527defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1528 addrmode6dupalign64>; 1529 1530defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1531 addrmode6dupalign16>; 1532defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1533 addrmode6dupalign32>; 1534defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1535 addrmode6dupalign64>; 1536 1537def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1538def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1539def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1540def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1541def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1542def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1543 1544// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1545class VLD3DUP<bits<4> op7_4, string Dt> 1546 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1547 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1548 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1549 Sched<[WriteVLD2]> { 1550 let Rm = 0b1111; 1551 let Inst{4} = 0; 1552 let DecoderMethod = "DecodeVLD3DupInstruction"; 1553} 1554 1555def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1556def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1557def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1558 1559def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1560def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1561def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1562 1563// ...with double-spaced registers (not used for codegen): 1564def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1565def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1566def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1567 1568def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1569def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1570def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1571def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1572def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1573def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1574 1575// ...with address register writeback: 1576class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1577 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1578 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1579 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1580 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1581 let Inst{4} = 0; 1582 let DecoderMethod = "DecodeVLD3DupInstruction"; 1583} 1584 1585def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1586def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1587def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1588 1589def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1590def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1591def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1592 1593def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1594def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1595def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1596 1597def VLD3DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1598def VLD3DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1599def VLD3DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1600 1601// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1602class VLD4DUP<bits<4> op7_4, string Dt> 1603 : NLdSt<1, 0b10, 0b1111, op7_4, 1604 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1605 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1606 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1607 let Rm = 0b1111; 1608 let Inst{4} = Rn{4}; 1609 let DecoderMethod = "DecodeVLD4DupInstruction"; 1610} 1611 1612def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1613def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1614def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1615 1616def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1617def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1618def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1619 1620// ...with double-spaced registers (not used for codegen): 1621def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1622def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1623def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1624 1625def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1626def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1627def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1628def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1629def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1630def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1631 1632// ...with address register writeback: 1633class VLD4DUPWB<bits<4> op7_4, string Dt> 1634 : NLdSt<1, 0b10, 0b1111, op7_4, 1635 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1636 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1637 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1638 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1639 let Inst{4} = Rn{4}; 1640 let DecoderMethod = "DecodeVLD4DupInstruction"; 1641} 1642 1643def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1644def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1645def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1646 1647def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1648def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1649def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1650 1651def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1652def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1653def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1654 1655def VLD4DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1656def VLD4DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1657def VLD4DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1658 1659} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1660 1661let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1662 1663// Classes for VST* pseudo-instructions with multi-register operands. 1664// These are expanded to real instructions after register allocation. 1665class VSTQPseudo<InstrItinClass itin> 1666 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1667class VSTQWBPseudo<InstrItinClass itin> 1668 : PseudoNLdSt<(outs GPR:$wb), 1669 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1670 "$addr.addr = $wb">; 1671class VSTQWBfixedPseudo<InstrItinClass itin> 1672 : PseudoNLdSt<(outs GPR:$wb), 1673 (ins addrmode6:$addr, QPR:$src), itin, 1674 "$addr.addr = $wb">; 1675class VSTQWBregisterPseudo<InstrItinClass itin> 1676 : PseudoNLdSt<(outs GPR:$wb), 1677 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1678 "$addr.addr = $wb">; 1679class VSTQQPseudo<InstrItinClass itin> 1680 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1681class VSTQQWBPseudo<InstrItinClass itin> 1682 : PseudoNLdSt<(outs GPR:$wb), 1683 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1684 "$addr.addr = $wb">; 1685class VSTQQWBfixedPseudo<InstrItinClass itin> 1686 : PseudoNLdSt<(outs GPR:$wb), 1687 (ins addrmode6:$addr, QQPR:$src), itin, 1688 "$addr.addr = $wb">; 1689class VSTQQWBregisterPseudo<InstrItinClass itin> 1690 : PseudoNLdSt<(outs GPR:$wb), 1691 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1692 "$addr.addr = $wb">; 1693 1694class VSTQQQQPseudo<InstrItinClass itin> 1695 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1696class VSTQQQQWBPseudo<InstrItinClass itin> 1697 : PseudoNLdSt<(outs GPR:$wb), 1698 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1699 "$addr.addr = $wb">; 1700 1701// VST1 : Vector Store (multiple single elements) 1702class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1703 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1704 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1705 let Rm = 0b1111; 1706 let Inst{4} = Rn{4}; 1707 let DecoderMethod = "DecodeVLDST1Instruction"; 1708} 1709class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1710 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1711 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1712 let Rm = 0b1111; 1713 let Inst{5-4} = Rn{5-4}; 1714 let DecoderMethod = "DecodeVLDST1Instruction"; 1715} 1716 1717def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1718def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1719def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1720def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1721 1722def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1723def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1724def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1725def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1726 1727// ...with address register writeback: 1728multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1729 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1730 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1731 "vst1", Dt, "$Vd, $Rn!", 1732 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1733 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1734 let Inst{4} = Rn{4}; 1735 let DecoderMethod = "DecodeVLDST1Instruction"; 1736 } 1737 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1738 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1739 IIC_VLD1u, 1740 "vst1", Dt, "$Vd, $Rn, $Rm", 1741 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1742 let Inst{4} = Rn{4}; 1743 let DecoderMethod = "DecodeVLDST1Instruction"; 1744 } 1745} 1746multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1747 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1748 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1749 "vst1", Dt, "$Vd, $Rn!", 1750 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1751 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1752 let Inst{5-4} = Rn{5-4}; 1753 let DecoderMethod = "DecodeVLDST1Instruction"; 1754 } 1755 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1756 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1757 IIC_VLD1x2u, 1758 "vst1", Dt, "$Vd, $Rn, $Rm", 1759 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1760 let Inst{5-4} = Rn{5-4}; 1761 let DecoderMethod = "DecodeVLDST1Instruction"; 1762 } 1763} 1764 1765defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1766defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1767defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1768defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1769 1770defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1771defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1772defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1773defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1774 1775// ...with 3 registers 1776class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1777 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1778 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1779 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1780 let Rm = 0b1111; 1781 let Inst{4} = Rn{4}; 1782 let DecoderMethod = "DecodeVLDST1Instruction"; 1783} 1784multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1785 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1786 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1787 "vst1", Dt, "$Vd, $Rn!", 1788 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1789 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1790 let Inst{5-4} = Rn{5-4}; 1791 let DecoderMethod = "DecodeVLDST1Instruction"; 1792 } 1793 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1794 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1795 IIC_VLD1x3u, 1796 "vst1", Dt, "$Vd, $Rn, $Rm", 1797 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1798 let Inst{5-4} = Rn{5-4}; 1799 let DecoderMethod = "DecodeVLDST1Instruction"; 1800 } 1801} 1802 1803def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1804def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1805def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1806def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1807 1808defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1809defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1810defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1811defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1812 1813def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1814def VST1d8TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1815def VST1d8TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1816def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1817def VST1d16TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1818def VST1d16TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1819def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1820def VST1d32TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1821def VST1d32TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1822def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1823def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1824def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1825 1826def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1827def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1828def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1829def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1830 1831def VST1q8HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1832def VST1q16HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1833def VST1q32HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1834def VST1q64HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1835 1836def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1837def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1838def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1839def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1840 1841// ...with 4 registers 1842class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1843 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1844 (ins AddrMode:$Rn, VecListFourD:$Vd), 1845 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1846 []>, Sched<[WriteVST4]> { 1847 let Rm = 0b1111; 1848 let Inst{5-4} = Rn{5-4}; 1849 let DecoderMethod = "DecodeVLDST1Instruction"; 1850} 1851multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1852 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1853 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1854 "vst1", Dt, "$Vd, $Rn!", 1855 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1857 let Inst{5-4} = Rn{5-4}; 1858 let DecoderMethod = "DecodeVLDST1Instruction"; 1859 } 1860 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1861 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1862 IIC_VLD1x4u, 1863 "vst1", Dt, "$Vd, $Rn, $Rm", 1864 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1865 let Inst{5-4} = Rn{5-4}; 1866 let DecoderMethod = "DecodeVLDST1Instruction"; 1867 } 1868} 1869 1870def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1871def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1872def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1873def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1874 1875defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1876defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1877defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1878defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1879 1880def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1881def VST1d8QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1882def VST1d8QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1883def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1884def VST1d16QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1885def VST1d16QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1886def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1887def VST1d32QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1888def VST1d32QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1889def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1890def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1891def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1892 1893def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1894def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1895def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1896def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1897 1898def VST1q8HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1899def VST1q16HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1900def VST1q32HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1901def VST1q64HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1902 1903def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1904def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1905def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1906def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1907 1908// VST2 : Vector Store (multiple 2-element structures) 1909class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1910 InstrItinClass itin, Operand AddrMode> 1911 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1912 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1913 let Rm = 0b1111; 1914 let Inst{5-4} = Rn{5-4}; 1915 let DecoderMethod = "DecodeVLDST2Instruction"; 1916} 1917 1918def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1919 addrmode6align64or128>, Sched<[WriteVST2]>; 1920def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1921 addrmode6align64or128>, Sched<[WriteVST2]>; 1922def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1923 addrmode6align64or128>, Sched<[WriteVST2]>; 1924 1925def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1926 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1927def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1928 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1929def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1930 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1931 1932def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1933def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1934def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1935 1936// ...with address register writeback: 1937multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1938 RegisterOperand VdTy, Operand AddrMode> { 1939 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1940 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1941 "vst2", Dt, "$Vd, $Rn!", 1942 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1943 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1944 let Inst{5-4} = Rn{5-4}; 1945 let DecoderMethod = "DecodeVLDST2Instruction"; 1946 } 1947 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1948 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1949 "vst2", Dt, "$Vd, $Rn, $Rm", 1950 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1951 let Inst{5-4} = Rn{5-4}; 1952 let DecoderMethod = "DecodeVLDST2Instruction"; 1953 } 1954} 1955multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1956 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1957 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1958 "vst2", Dt, "$Vd, $Rn!", 1959 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1960 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1961 let Inst{5-4} = Rn{5-4}; 1962 let DecoderMethod = "DecodeVLDST2Instruction"; 1963 } 1964 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1965 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1966 IIC_VLD1u, 1967 "vst2", Dt, "$Vd, $Rn, $Rm", 1968 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1969 let Inst{5-4} = Rn{5-4}; 1970 let DecoderMethod = "DecodeVLDST2Instruction"; 1971 } 1972} 1973 1974defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1975 addrmode6align64or128>; 1976defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1977 addrmode6align64or128>; 1978defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1979 addrmode6align64or128>; 1980 1981defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1982defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1983defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1984 1985def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1986def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1987def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1988def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1989def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1990def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1991 1992// ...with double-spaced registers 1993def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1994 addrmode6align64or128>; 1995def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1996 addrmode6align64or128>; 1997def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1998 addrmode6align64or128>; 1999defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 2000 addrmode6align64or128>; 2001defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 2002 addrmode6align64or128>; 2003defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 2004 addrmode6align64or128>; 2005 2006// VST3 : Vector Store (multiple 3-element structures) 2007class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 2008 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2009 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 2010 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 2011 let Rm = 0b1111; 2012 let Inst{4} = Rn{4}; 2013 let DecoderMethod = "DecodeVLDST3Instruction"; 2014} 2015 2016def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 2017def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 2018def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 2019 2020def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2021def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2022def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2023 2024// ...with address register writeback: 2025class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2026 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2027 (ins addrmode6:$Rn, am6offset:$Rm, 2028 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 2029 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 2030 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 2031 let Inst{4} = Rn{4}; 2032 let DecoderMethod = "DecodeVLDST3Instruction"; 2033} 2034 2035def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 2036def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 2037def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 2038 2039def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2040def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2041def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2042 2043// ...with double-spaced registers: 2044def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2045def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2046def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2047def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2048def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2049def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2050 2051def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2052def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2053def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2054 2055// ...alternate versions to be allocated odd register numbers: 2056def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2057def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2058def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2059 2060def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2061def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2062def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2063 2064// VST4 : Vector Store (multiple 4-element structures) 2065class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2066 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2067 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2068 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2069 "", []>, Sched<[WriteVST4]> { 2070 let Rm = 0b1111; 2071 let Inst{5-4} = Rn{5-4}; 2072 let DecoderMethod = "DecodeVLDST4Instruction"; 2073} 2074 2075def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2076def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2077def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2078 2079def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2080def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2081def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2082 2083// ...with address register writeback: 2084class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2085 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2086 (ins addrmode6:$Rn, am6offset:$Rm, 2087 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2088 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2089 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2090 let Inst{5-4} = Rn{5-4}; 2091 let DecoderMethod = "DecodeVLDST4Instruction"; 2092} 2093 2094def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2095def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2096def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2097 2098def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2099def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2100def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2101 2102// ...with double-spaced registers: 2103def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2104def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2105def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2106def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2107def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2108def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2109 2110def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2111def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2112def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2113 2114// ...alternate versions to be allocated odd register numbers: 2115def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2116def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2117def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2118 2119def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2120def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2121def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2122 2123} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2124 2125// Classes for VST*LN pseudo-instructions with multi-register operands. 2126// These are expanded to real instructions after register allocation. 2127class VSTQLNPseudo<InstrItinClass itin> 2128 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2129 itin, "">; 2130class VSTQLNWBPseudo<InstrItinClass itin> 2131 : PseudoNLdSt<(outs GPR:$wb), 2132 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2133 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2134class VSTQQLNPseudo<InstrItinClass itin> 2135 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2136 itin, "">; 2137class VSTQQLNWBPseudo<InstrItinClass itin> 2138 : PseudoNLdSt<(outs GPR:$wb), 2139 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2140 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2141class VSTQQQQLNPseudo<InstrItinClass itin> 2142 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2143 itin, "">; 2144class VSTQQQQLNWBPseudo<InstrItinClass itin> 2145 : PseudoNLdSt<(outs GPR:$wb), 2146 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2147 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2148 2149// VST1LN : Vector Store (single element from one lane) 2150class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2151 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2152 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2153 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2154 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2155 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2156 Sched<[WriteVST1]> { 2157 let Rm = 0b1111; 2158 let DecoderMethod = "DecodeVST1LN"; 2159} 2160class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2161 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2162 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2163 addrmode6:$addr)]; 2164} 2165 2166def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2167 ARMvgetlaneu, addrmode6> { 2168 let Inst{7-5} = lane{2-0}; 2169} 2170def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2171 ARMvgetlaneu, addrmode6> { 2172 let Inst{7-6} = lane{1-0}; 2173 let Inst{4} = Rn{4}; 2174} 2175 2176def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2177 addrmode6oneL32> { 2178 let Inst{7} = lane{0}; 2179 let Inst{5-4} = Rn{5-4}; 2180} 2181 2182def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2183def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2184def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2185 2186let Predicates = [HasNEON] in { 2187def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2188 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2189def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2190 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2191 2192def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2193 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2194def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2195 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2196} 2197 2198// ...with address register writeback: 2199class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2200 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2201 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2202 (ins AdrMode:$Rn, am6offset:$Rm, 2203 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2204 "\\{$Vd[$lane]\\}, $Rn$Rm", 2205 "$Rn.addr = $wb", 2206 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2207 AdrMode:$Rn, am6offset:$Rm))]>, 2208 Sched<[WriteVST1]> { 2209 let DecoderMethod = "DecodeVST1LN"; 2210} 2211class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2212 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2213 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2214 addrmode6:$addr, am6offset:$offset))]; 2215} 2216 2217def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2218 ARMvgetlaneu, addrmode6> { 2219 let Inst{7-5} = lane{2-0}; 2220} 2221def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2222 ARMvgetlaneu, addrmode6> { 2223 let Inst{7-6} = lane{1-0}; 2224 let Inst{4} = Rn{4}; 2225} 2226def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2227 extractelt, addrmode6oneL32> { 2228 let Inst{7} = lane{0}; 2229 let Inst{5-4} = Rn{5-4}; 2230} 2231 2232def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2233def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2234def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2235 2236let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2237 2238// VST2LN : Vector Store (single 2-element structure from one lane) 2239class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2240 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2241 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2242 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2243 "", []>, Sched<[WriteVST1]> { 2244 let Rm = 0b1111; 2245 let Inst{4} = Rn{4}; 2246 let DecoderMethod = "DecodeVST2LN"; 2247} 2248 2249def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2250 let Inst{7-5} = lane{2-0}; 2251} 2252def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2253 let Inst{7-6} = lane{1-0}; 2254} 2255def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2256 let Inst{7} = lane{0}; 2257} 2258 2259def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2260def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2261def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2262 2263// ...with double-spaced registers: 2264def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2265 let Inst{7-6} = lane{1-0}; 2266 let Inst{4} = Rn{4}; 2267} 2268def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2269 let Inst{7} = lane{0}; 2270 let Inst{4} = Rn{4}; 2271} 2272 2273def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2274def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2275 2276// ...with address register writeback: 2277class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2278 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2279 (ins addrmode6:$Rn, am6offset:$Rm, 2280 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2281 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2282 "$Rn.addr = $wb", []> { 2283 let Inst{4} = Rn{4}; 2284 let DecoderMethod = "DecodeVST2LN"; 2285} 2286 2287def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2288 let Inst{7-5} = lane{2-0}; 2289} 2290def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2291 let Inst{7-6} = lane{1-0}; 2292} 2293def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2294 let Inst{7} = lane{0}; 2295} 2296 2297def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2298def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2299def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2300 2301def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2302 let Inst{7-6} = lane{1-0}; 2303} 2304def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2305 let Inst{7} = lane{0}; 2306} 2307 2308def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2309def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2310 2311// VST3LN : Vector Store (single 3-element structure from one lane) 2312class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2313 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2314 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2315 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2316 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2317 Sched<[WriteVST2]> { 2318 let Rm = 0b1111; 2319 let DecoderMethod = "DecodeVST3LN"; 2320} 2321 2322def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2323 let Inst{7-5} = lane{2-0}; 2324} 2325def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2326 let Inst{7-6} = lane{1-0}; 2327} 2328def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2329 let Inst{7} = lane{0}; 2330} 2331 2332def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2333def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2334def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2335 2336// ...with double-spaced registers: 2337def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2338 let Inst{7-6} = lane{1-0}; 2339} 2340def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2341 let Inst{7} = lane{0}; 2342} 2343 2344def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2345def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2346 2347// ...with address register writeback: 2348class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2349 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2350 (ins addrmode6:$Rn, am6offset:$Rm, 2351 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2352 IIC_VST3lnu, "vst3", Dt, 2353 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2354 "$Rn.addr = $wb", []> { 2355 let DecoderMethod = "DecodeVST3LN"; 2356} 2357 2358def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2359 let Inst{7-5} = lane{2-0}; 2360} 2361def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2362 let Inst{7-6} = lane{1-0}; 2363} 2364def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2365 let Inst{7} = lane{0}; 2366} 2367 2368def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2369def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2370def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2371 2372def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2373 let Inst{7-6} = lane{1-0}; 2374} 2375def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2376 let Inst{7} = lane{0}; 2377} 2378 2379def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2380def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2381 2382// VST4LN : Vector Store (single 4-element structure from one lane) 2383class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2384 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2385 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2386 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2387 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2388 "", []>, Sched<[WriteVST2]> { 2389 let Rm = 0b1111; 2390 let Inst{4} = Rn{4}; 2391 let DecoderMethod = "DecodeVST4LN"; 2392} 2393 2394def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2395 let Inst{7-5} = lane{2-0}; 2396} 2397def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2398 let Inst{7-6} = lane{1-0}; 2399} 2400def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2401 let Inst{7} = lane{0}; 2402 let Inst{5} = Rn{5}; 2403} 2404 2405def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2406def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2407def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2408 2409// ...with double-spaced registers: 2410def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2411 let Inst{7-6} = lane{1-0}; 2412} 2413def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2414 let Inst{7} = lane{0}; 2415 let Inst{5} = Rn{5}; 2416} 2417 2418def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2419def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2420 2421// ...with address register writeback: 2422class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2423 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2424 (ins addrmode6:$Rn, am6offset:$Rm, 2425 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2426 IIC_VST4lnu, "vst4", Dt, 2427 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2428 "$Rn.addr = $wb", []> { 2429 let Inst{4} = Rn{4}; 2430 let DecoderMethod = "DecodeVST4LN"; 2431} 2432 2433def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2434 let Inst{7-5} = lane{2-0}; 2435} 2436def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2437 let Inst{7-6} = lane{1-0}; 2438} 2439def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2440 let Inst{7} = lane{0}; 2441 let Inst{5} = Rn{5}; 2442} 2443 2444def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2445def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2446def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2447 2448def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2449 let Inst{7-6} = lane{1-0}; 2450} 2451def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2452 let Inst{7} = lane{0}; 2453 let Inst{5} = Rn{5}; 2454} 2455 2456def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2457def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2458 2459} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2460 2461// Use vld1/vst1 for unaligned f64 load / store 2462let Predicates = [IsLE,HasNEON] in { 2463def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2464 (VLD1d16 addrmode6:$addr)>; 2465def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2466 (VST1d16 addrmode6:$addr, DPR:$value)>; 2467def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2468 (VLD1d8 addrmode6:$addr)>; 2469def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2470 (VST1d8 addrmode6:$addr, DPR:$value)>; 2471} 2472let Predicates = [IsBE,HasNEON] in { 2473def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2474 (VLD1d64 addrmode6:$addr)>; 2475def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2476 (VST1d64 addrmode6:$addr, DPR:$value)>; 2477} 2478 2479// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2480// load / store if it's legal. 2481let Predicates = [HasNEON] in { 2482def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2483 (VLD1q64 addrmode6:$addr)>; 2484def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2485 (VST1q64 addrmode6:$addr, QPR:$value)>; 2486} 2487let Predicates = [IsLE,HasNEON] in { 2488def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2489 (VLD1q32 addrmode6:$addr)>; 2490def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2491 (VST1q32 addrmode6:$addr, QPR:$value)>; 2492def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2493 (VLD1q16 addrmode6:$addr)>; 2494def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2495 (VST1q16 addrmode6:$addr, QPR:$value)>; 2496def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2497 (VLD1q8 addrmode6:$addr)>; 2498def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2499 (VST1q8 addrmode6:$addr, QPR:$value)>; 2500} 2501 2502//===----------------------------------------------------------------------===// 2503// Instruction Classes 2504//===----------------------------------------------------------------------===// 2505 2506// Basic 2-register operations: double- and quad-register. 2507class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2508 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2509 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2510 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2511 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2512 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2513class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2514 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2515 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2516 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2517 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2518 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2519 2520// Basic 2-register intrinsics, both double- and quad-register. 2521class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2522 bits<2> op17_16, bits<5> op11_7, bit op4, 2523 InstrItinClass itin, string OpcodeStr, string Dt, 2524 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2525 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2526 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2527 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2528class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2529 bits<2> op17_16, bits<5> op11_7, bit op4, 2530 InstrItinClass itin, string OpcodeStr, string Dt, 2531 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2532 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2533 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2534 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2535 2536// Same as above, but not predicated. 2537class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2538 InstrItinClass itin, string OpcodeStr, string Dt, 2539 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2540 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2541 itin, OpcodeStr, Dt, 2542 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2543 2544class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2545 InstrItinClass itin, string OpcodeStr, string Dt, 2546 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2547 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2548 itin, OpcodeStr, Dt, 2549 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2550 2551// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2552class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2553 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2554 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2555 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2556 itin, OpcodeStr, Dt, 2557 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2558 2559// Same as N2VQIntXnp but with Vd as a src register. 2560class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2561 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2562 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2563 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2564 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2565 itin, OpcodeStr, Dt, 2566 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2567 let Constraints = "$src = $Vd"; 2568} 2569 2570// Narrow 2-register operations. 2571class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2572 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2573 InstrItinClass itin, string OpcodeStr, string Dt, 2574 ValueType TyD, ValueType TyQ, SDNode OpNode> 2575 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2576 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2577 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2578 2579// Narrow 2-register intrinsics. 2580class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2581 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2582 InstrItinClass itin, string OpcodeStr, string Dt, 2583 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2584 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2585 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2586 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2587 2588// Long 2-register operations (currently only used for VMOVL). 2589class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2590 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2591 InstrItinClass itin, string OpcodeStr, string Dt, 2592 ValueType TyQ, ValueType TyD, SDNode OpNode> 2593 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2594 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2595 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2596 2597// Long 2-register intrinsics. 2598class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2599 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2600 InstrItinClass itin, string OpcodeStr, string Dt, 2601 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2602 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2603 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2604 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2605 2606// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2607class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2608 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2609 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2610 OpcodeStr, Dt, "$Vd, $Vm", 2611 "$src1 = $Vd, $src2 = $Vm", []>; 2612class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2613 InstrItinClass itin, string OpcodeStr, string Dt> 2614 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2615 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2616 "$src1 = $Vd, $src2 = $Vm", []>; 2617 2618// Basic 3-register operations: double- and quad-register. 2619class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2620 InstrItinClass itin, string OpcodeStr, string Dt, 2621 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2622 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2623 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2624 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2625 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2626 // All of these have a two-operand InstAlias. 2627 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2628 let isCommutable = Commutable; 2629} 2630// Same as N3VD but no data type. 2631class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2632 InstrItinClass itin, string OpcodeStr, 2633 ValueType ResTy, ValueType OpTy, 2634 SDNode OpNode, bit Commutable> 2635 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2636 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2637 OpcodeStr, "$Vd, $Vn, $Vm", "", 2638 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2639 // All of these have a two-operand InstAlias. 2640 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2641 let isCommutable = Commutable; 2642} 2643 2644class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2645 InstrItinClass itin, string OpcodeStr, string Dt, 2646 ValueType Ty, SDNode ShOp> 2647 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2648 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2649 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2650 [(set (Ty DPR:$Vd), 2651 (Ty (ShOp (Ty DPR:$Vn), 2652 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2653 // All of these have a two-operand InstAlias. 2654 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2655 let isCommutable = 0; 2656} 2657class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2658 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2659 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2660 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2661 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2662 [(set (Ty DPR:$Vd), 2663 (Ty (ShOp (Ty DPR:$Vn), 2664 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2665 // All of these have a two-operand InstAlias. 2666 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2667 let isCommutable = 0; 2668} 2669 2670class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2671 InstrItinClass itin, string OpcodeStr, string Dt, 2672 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2673 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2674 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2675 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2676 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2677 // All of these have a two-operand InstAlias. 2678 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2679 let isCommutable = Commutable; 2680} 2681class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2682 InstrItinClass itin, string OpcodeStr, 2683 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2684 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2685 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2686 OpcodeStr, "$Vd, $Vn, $Vm", "", 2687 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2688 // All of these have a two-operand InstAlias. 2689 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2690 let isCommutable = Commutable; 2691} 2692class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2693 InstrItinClass itin, string OpcodeStr, string Dt, 2694 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2695 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2696 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2697 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2698 [(set (ResTy QPR:$Vd), 2699 (ResTy (ShOp (ResTy QPR:$Vn), 2700 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2701 imm:$lane)))))]> { 2702 // All of these have a two-operand InstAlias. 2703 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2704 let isCommutable = 0; 2705} 2706class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2707 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2708 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2709 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2710 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2711 [(set (ResTy QPR:$Vd), 2712 (ResTy (ShOp (ResTy QPR:$Vn), 2713 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2714 imm:$lane)))))]> { 2715 // All of these have a two-operand InstAlias. 2716 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2717 let isCommutable = 0; 2718} 2719 2720// Basic 3-register intrinsics, both double- and quad-register. 2721class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2722 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2723 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2724 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2725 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2726 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2727 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2728 // All of these have a two-operand InstAlias. 2729 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2730 let isCommutable = Commutable; 2731} 2732 2733class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2734 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2735 string Dt, ValueType ResTy, ValueType OpTy, 2736 SDPatternOperator IntOp, bit Commutable> 2737 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2738 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt, 2739 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2740 let isCommutable = Commutable; 2741} 2742 2743 2744class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2745 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2746 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2747 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2748 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2749 [(set (Ty DPR:$Vd), 2750 (Ty (IntOp (Ty DPR:$Vn), 2751 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2752 imm:$lane)))))]> { 2753 let isCommutable = 0; 2754} 2755 2756class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2757 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2758 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2759 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2760 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2761 [(set (Ty DPR:$Vd), 2762 (Ty (IntOp (Ty DPR:$Vn), 2763 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2764 let isCommutable = 0; 2765} 2766class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2767 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2768 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2769 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2770 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2771 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2772 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2773 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2774 let isCommutable = 0; 2775} 2776 2777class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2778 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2779 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2780 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2781 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2782 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2783 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2784 // All of these have a two-operand InstAlias. 2785 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2786 let isCommutable = Commutable; 2787} 2788 2789class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2790 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2791 string Dt, ValueType ResTy, ValueType OpTy, 2792 SDPatternOperator IntOp, bit Commutable> 2793 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2794 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2795 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2796 let isCommutable = Commutable; 2797} 2798 2799// Same as N3VQIntnp but with Vd as a src register. 2800class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2801 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2802 string Dt, ValueType ResTy, ValueType OpTy, 2803 SDPatternOperator IntOp> 2804 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2805 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2806 f, itin, OpcodeStr, Dt, 2807 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2808 (OpTy QPR:$Vm))))]> { 2809 let Constraints = "$src = $Vd"; 2810 let isCommutable = 0; 2811} 2812 2813class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2814 string OpcodeStr, string Dt, 2815 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2816 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2817 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2818 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2819 [(set (ResTy QPR:$Vd), 2820 (ResTy (IntOp (ResTy QPR:$Vn), 2821 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2822 imm:$lane)))))]> { 2823 let isCommutable = 0; 2824} 2825class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2826 string OpcodeStr, string Dt, 2827 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2828 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2829 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2830 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2831 [(set (ResTy QPR:$Vd), 2832 (ResTy (IntOp (ResTy QPR:$Vn), 2833 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2834 imm:$lane)))))]> { 2835 let isCommutable = 0; 2836} 2837class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2838 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2839 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2840 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2841 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2842 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2843 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2844 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2845 let isCommutable = 0; 2846} 2847 2848// Multiply-Add/Sub operations: double- and quad-register. 2849class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2850 InstrItinClass itin, string OpcodeStr, string Dt, 2851 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2852 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2853 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2854 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2855 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2856 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2857 2858class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2859 string OpcodeStr, string Dt, 2860 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2861 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2862 (outs DPR:$Vd), 2863 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2864 NVMulSLFrm, itin, 2865 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2866 [(set (Ty DPR:$Vd), 2867 (Ty (ShOp (Ty DPR:$src1), 2868 (Ty (MulOp DPR:$Vn, 2869 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2870 imm:$lane)))))))]>; 2871class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2872 string OpcodeStr, string Dt, 2873 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2874 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2875 (outs DPR:$Vd), 2876 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2877 NVMulSLFrm, itin, 2878 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2879 [(set (Ty DPR:$Vd), 2880 (Ty (ShOp (Ty DPR:$src1), 2881 (Ty (MulOp DPR:$Vn, 2882 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2883 imm:$lane)))))))]>; 2884 2885class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2886 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2887 SDPatternOperator MulOp, SDPatternOperator OpNode> 2888 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2889 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2890 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2891 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2892 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2893class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2894 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2895 SDPatternOperator MulOp, SDPatternOperator ShOp> 2896 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2897 (outs QPR:$Vd), 2898 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2899 NVMulSLFrm, itin, 2900 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2901 [(set (ResTy QPR:$Vd), 2902 (ResTy (ShOp (ResTy QPR:$src1), 2903 (ResTy (MulOp QPR:$Vn, 2904 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2905 imm:$lane)))))))]>; 2906class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2907 string OpcodeStr, string Dt, 2908 ValueType ResTy, ValueType OpTy, 2909 SDPatternOperator MulOp, SDPatternOperator ShOp> 2910 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2911 (outs QPR:$Vd), 2912 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2913 NVMulSLFrm, itin, 2914 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2915 [(set (ResTy QPR:$Vd), 2916 (ResTy (ShOp (ResTy QPR:$src1), 2917 (ResTy (MulOp QPR:$Vn, 2918 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2919 imm:$lane)))))))]>; 2920 2921// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2922class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2923 InstrItinClass itin, string OpcodeStr, string Dt, 2924 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2925 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2926 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2927 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2928 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2929 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2930class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2931 InstrItinClass itin, string OpcodeStr, string Dt, 2932 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2933 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2934 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2935 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2936 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2937 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2938 2939// Neon 3-argument intrinsics, both double- and quad-register. 2940// The destination register is also used as the first source operand register. 2941class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2942 InstrItinClass itin, string OpcodeStr, string Dt, 2943 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2944 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2945 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2946 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2947 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2948 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2949class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2950 InstrItinClass itin, string OpcodeStr, string Dt, 2951 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2952 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2953 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2954 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2955 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2956 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2957 2958// Long Multiply-Add/Sub operations. 2959class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2960 InstrItinClass itin, string OpcodeStr, string Dt, 2961 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2962 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2963 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2964 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2965 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2966 (TyQ (MulOp (TyD DPR:$Vn), 2967 (TyD DPR:$Vm)))))]>; 2968class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2969 InstrItinClass itin, string OpcodeStr, string Dt, 2970 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2971 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2972 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2973 NVMulSLFrm, itin, 2974 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2975 [(set QPR:$Vd, 2976 (OpNode (TyQ QPR:$src1), 2977 (TyQ (MulOp (TyD DPR:$Vn), 2978 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2979 imm:$lane))))))]>; 2980class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2981 InstrItinClass itin, string OpcodeStr, string Dt, 2982 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2983 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2984 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2985 NVMulSLFrm, itin, 2986 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2987 [(set QPR:$Vd, 2988 (OpNode (TyQ QPR:$src1), 2989 (TyQ (MulOp (TyD DPR:$Vn), 2990 (TyD (ARMvduplane (TyD DPR_8:$Vm), 2991 imm:$lane))))))]>; 2992 2993// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2994class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2995 InstrItinClass itin, string OpcodeStr, string Dt, 2996 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2997 SDNode OpNode> 2998 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2999 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3000 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3001 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 3002 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3003 (TyD DPR:$Vm)))))))]>; 3004 3005// Neon Long 3-argument intrinsic. The destination register is 3006// a quad-register and is also used as the first source operand register. 3007class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3008 InstrItinClass itin, string OpcodeStr, string Dt, 3009 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 3010 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3011 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3012 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3013 [(set QPR:$Vd, 3014 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 3015class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3016 string OpcodeStr, string Dt, 3017 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3018 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3019 (outs QPR:$Vd), 3020 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3021 NVMulSLFrm, itin, 3022 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3023 [(set (ResTy QPR:$Vd), 3024 (ResTy (IntOp (ResTy QPR:$src1), 3025 (OpTy DPR:$Vn), 3026 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3027 imm:$lane)))))]>; 3028class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3029 InstrItinClass itin, string OpcodeStr, string Dt, 3030 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3031 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3032 (outs QPR:$Vd), 3033 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3034 NVMulSLFrm, itin, 3035 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3036 [(set (ResTy QPR:$Vd), 3037 (ResTy (IntOp (ResTy QPR:$src1), 3038 (OpTy DPR:$Vn), 3039 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3040 imm:$lane)))))]>; 3041 3042// Narrowing 3-register intrinsics. 3043class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3044 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 3045 SDPatternOperator IntOp, bit Commutable> 3046 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3047 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 3048 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3049 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 3050 let isCommutable = Commutable; 3051} 3052 3053// Long 3-register operations. 3054class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3055 InstrItinClass itin, string OpcodeStr, string Dt, 3056 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3057 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3058 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3059 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3060 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3061 let isCommutable = Commutable; 3062} 3063 3064class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3065 InstrItinClass itin, string OpcodeStr, string Dt, 3066 ValueType TyQ, ValueType TyD, SDNode OpNode> 3067 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3068 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3069 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3070 [(set QPR:$Vd, 3071 (TyQ (OpNode (TyD DPR:$Vn), 3072 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3073class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3074 InstrItinClass itin, string OpcodeStr, string Dt, 3075 ValueType TyQ, ValueType TyD, SDNode OpNode> 3076 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3077 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3078 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3079 [(set QPR:$Vd, 3080 (TyQ (OpNode (TyD DPR:$Vn), 3081 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3082 3083// Long 3-register operations with explicitly extended operands. 3084class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3085 InstrItinClass itin, string OpcodeStr, string Dt, 3086 ValueType TyQ, ValueType TyD, SDNode OpNode, SDPatternOperator ExtOp, 3087 bit Commutable> 3088 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3089 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3090 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3091 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3092 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3093 let isCommutable = Commutable; 3094} 3095 3096// Long 3-register intrinsics with explicit extend (VABDL). 3097class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3098 InstrItinClass itin, string OpcodeStr, string Dt, 3099 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3100 bit Commutable> 3101 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3102 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3103 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3104 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3105 (TyD DPR:$Vm))))))]> { 3106 let isCommutable = Commutable; 3107} 3108 3109// Long 3-register intrinsics. 3110class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3111 InstrItinClass itin, string OpcodeStr, string Dt, 3112 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3113 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3114 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3115 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3116 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3117 let isCommutable = Commutable; 3118} 3119 3120// Same as above, but not predicated. 3121class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3122 bit op4, InstrItinClass itin, string OpcodeStr, 3123 string Dt, ValueType ResTy, ValueType OpTy, 3124 SDPatternOperator IntOp, bit Commutable> 3125 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3126 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3127 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 3128 let isCommutable = Commutable; 3129} 3130 3131 3132class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3133 string OpcodeStr, string Dt, 3134 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3135 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3136 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3137 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3138 [(set (ResTy QPR:$Vd), 3139 (ResTy (IntOp (OpTy DPR:$Vn), 3140 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3141 imm:$lane)))))]>; 3142class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3143 InstrItinClass itin, string OpcodeStr, string Dt, 3144 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3145 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3146 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3147 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3148 [(set (ResTy QPR:$Vd), 3149 (ResTy (IntOp (OpTy DPR:$Vn), 3150 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3151 imm:$lane)))))]>; 3152 3153// Wide 3-register operations. 3154class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3155 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3156 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable> 3157 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3158 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3159 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3160 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3161 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3162 // All of these have a two-operand InstAlias. 3163 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3164 let isCommutable = Commutable; 3165} 3166 3167// Pairwise long 2-register intrinsics, both double- and quad-register. 3168class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3169 bits<2> op17_16, bits<5> op11_7, bit op4, 3170 string OpcodeStr, string Dt, 3171 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3172 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3173 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3174 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3175class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3176 bits<2> op17_16, bits<5> op11_7, bit op4, 3177 string OpcodeStr, string Dt, 3178 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3179 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3180 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3181 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3182 3183// Pairwise long 2-register accumulate intrinsics, 3184// both double- and quad-register. 3185// The destination register is also used as the first source operand register. 3186class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3187 bits<2> op17_16, bits<5> op11_7, bit op4, 3188 string OpcodeStr, string Dt, 3189 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3190 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3191 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3192 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3193 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3194class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3195 bits<2> op17_16, bits<5> op11_7, bit op4, 3196 string OpcodeStr, string Dt, 3197 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3198 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3199 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3200 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3201 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3202 3203// Shift by immediate, 3204// both double- and quad-register. 3205let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3206class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3207 Format f, InstrItinClass itin, Operand ImmTy, 3208 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3209 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3210 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3211 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3212 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3213class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3214 Format f, InstrItinClass itin, Operand ImmTy, 3215 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3216 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3217 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3218 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3219 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3220} 3221 3222// Long shift by immediate. 3223class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3224 string OpcodeStr, string Dt, 3225 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3226 SDPatternOperator OpNode> 3227 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3228 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3229 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3230 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3231 3232// Narrow shift by immediate. 3233class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3234 InstrItinClass itin, string OpcodeStr, string Dt, 3235 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3236 SDPatternOperator OpNode> 3237 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3238 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3239 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3240 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3241 (i32 ImmTy:$SIMM))))]>; 3242 3243// Shift right by immediate and accumulate, 3244// both double- and quad-register. 3245let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3246class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3247 Operand ImmTy, string OpcodeStr, string Dt, 3248 ValueType Ty, SDNode ShOp> 3249 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3250 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3251 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3252 [(set DPR:$Vd, (Ty (add DPR:$src1, 3253 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3254class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3255 Operand ImmTy, string OpcodeStr, string Dt, 3256 ValueType Ty, SDNode ShOp> 3257 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3258 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3259 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3260 [(set QPR:$Vd, (Ty (add QPR:$src1, 3261 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3262} 3263 3264// Shift by immediate and insert, 3265// both double- and quad-register. 3266let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3267class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3268 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3269 ValueType Ty,SDNode ShOp> 3270 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3271 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3272 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3273 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3274class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3275 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3276 ValueType Ty,SDNode ShOp> 3277 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3278 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3279 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3280 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3281} 3282 3283// Convert, with fractional bits immediate, 3284// both double- and quad-register. 3285class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3286 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3287 SDPatternOperator IntOp> 3288 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3289 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3290 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3291 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3292class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3293 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3294 SDPatternOperator IntOp> 3295 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3296 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3297 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3298 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3299 3300//===----------------------------------------------------------------------===// 3301// Multiclasses 3302//===----------------------------------------------------------------------===// 3303 3304// Abbreviations used in multiclass suffixes: 3305// Q = quarter int (8 bit) elements 3306// H = half int (16 bit) elements 3307// S = single int (32 bit) elements 3308// D = double int (64 bit) elements 3309 3310// Neon 2-register vector operations and intrinsics. 3311 3312// Neon 2-register comparisons. 3313// source operand element sizes of 8, 16 and 32 bits: 3314multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3315 bits<5> op11_7, bit op4, string opc, string Dt, 3316 string asm, PatFrag fc> { 3317 // 64-bit vector types. 3318 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3319 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3320 opc, !strconcat(Dt, "8"), asm, "", 3321 [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; 3322 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3323 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3324 opc, !strconcat(Dt, "16"), asm, "", 3325 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; 3326 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3327 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3328 opc, !strconcat(Dt, "32"), asm, "", 3329 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; 3330 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3331 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3332 opc, "f32", asm, "", 3333 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { 3334 let Inst{10} = 1; // overwrite F = 1 3335 } 3336 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3337 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3338 opc, "f16", asm, "", 3339 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, 3340 Requires<[HasNEON,HasFullFP16]> { 3341 let Inst{10} = 1; // overwrite F = 1 3342 } 3343 3344 // 128-bit vector types. 3345 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3346 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3347 opc, !strconcat(Dt, "8"), asm, "", 3348 [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; 3349 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3350 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3351 opc, !strconcat(Dt, "16"), asm, "", 3352 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; 3353 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3354 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3355 opc, !strconcat(Dt, "32"), asm, "", 3356 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; 3357 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3358 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3359 opc, "f32", asm, "", 3360 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { 3361 let Inst{10} = 1; // overwrite F = 1 3362 } 3363 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3364 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3365 opc, "f16", asm, "", 3366 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, 3367 Requires<[HasNEON,HasFullFP16]> { 3368 let Inst{10} = 1; // overwrite F = 1 3369 } 3370} 3371 3372// Neon 3-register comparisons. 3373class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3374 InstrItinClass itin, string OpcodeStr, string Dt, 3375 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3376 : N3V<op24, op23, op21_20, op11_8, 1, op4, 3377 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 3378 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3379 [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { 3380 // All of these have a two-operand InstAlias. 3381 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3382 let isCommutable = Commutable; 3383} 3384 3385class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3386 InstrItinClass itin, string OpcodeStr, string Dt, 3387 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3388 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3389 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3390 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3391 [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { 3392 // All of these have a two-operand InstAlias. 3393 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3394 let isCommutable = Commutable; 3395} 3396 3397multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, 3398 InstrItinClass itinD16, InstrItinClass itinD32, 3399 InstrItinClass itinQ16, InstrItinClass itinQ32, 3400 string OpcodeStr, string Dt, 3401 PatFrag fc, bit Commutable = 0> { 3402 // 64-bit vector types. 3403 def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, 3404 OpcodeStr, !strconcat(Dt, "8"), 3405 v8i8, v8i8, fc, Commutable>; 3406 def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, 3407 OpcodeStr, !strconcat(Dt, "16"), 3408 v4i16, v4i16, fc, Commutable>; 3409 def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, 3410 OpcodeStr, !strconcat(Dt, "32"), 3411 v2i32, v2i32, fc, Commutable>; 3412 3413 // 128-bit vector types. 3414 def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, 3415 OpcodeStr, !strconcat(Dt, "8"), 3416 v16i8, v16i8, fc, Commutable>; 3417 def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, 3418 OpcodeStr, !strconcat(Dt, "16"), 3419 v8i16, v8i16, fc, Commutable>; 3420 def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, 3421 OpcodeStr, !strconcat(Dt, "32"), 3422 v4i32, v4i32, fc, Commutable>; 3423} 3424 3425 3426// Neon 2-register vector intrinsics, 3427// element sizes of 8, 16 and 32 bits: 3428multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3429 bits<5> op11_7, bit op4, 3430 InstrItinClass itinD, InstrItinClass itinQ, 3431 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3432 // 64-bit vector types. 3433 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3434 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3435 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3436 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3437 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3438 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3439 3440 // 128-bit vector types. 3441 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3442 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3443 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3444 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3445 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3446 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3447} 3448 3449 3450// Neon Narrowing 2-register vector operations, 3451// source operand element sizes of 16, 32 and 64 bits: 3452multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3453 bits<5> op11_7, bit op6, bit op4, 3454 InstrItinClass itin, string OpcodeStr, string Dt, 3455 SDNode OpNode> { 3456 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3457 itin, OpcodeStr, !strconcat(Dt, "16"), 3458 v8i8, v8i16, OpNode>; 3459 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3460 itin, OpcodeStr, !strconcat(Dt, "32"), 3461 v4i16, v4i32, OpNode>; 3462 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3463 itin, OpcodeStr, !strconcat(Dt, "64"), 3464 v2i32, v2i64, OpNode>; 3465} 3466 3467// Neon Narrowing 2-register vector intrinsics, 3468// source operand element sizes of 16, 32 and 64 bits: 3469multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3470 bits<5> op11_7, bit op6, bit op4, 3471 InstrItinClass itin, string OpcodeStr, string Dt, 3472 SDPatternOperator IntOp> { 3473 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3474 itin, OpcodeStr, !strconcat(Dt, "16"), 3475 v8i8, v8i16, IntOp>; 3476 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3477 itin, OpcodeStr, !strconcat(Dt, "32"), 3478 v4i16, v4i32, IntOp>; 3479 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3480 itin, OpcodeStr, !strconcat(Dt, "64"), 3481 v2i32, v2i64, IntOp>; 3482} 3483 3484 3485// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3486// source operand element sizes of 16, 32 and 64 bits: 3487multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3488 string OpcodeStr, string Dt, SDNode OpNode> { 3489 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3490 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3491 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3492 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3493 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3494 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3495} 3496 3497 3498// Neon 3-register vector operations. 3499 3500// First with only element sizes of 8, 16 and 32 bits: 3501multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3502 InstrItinClass itinD16, InstrItinClass itinD32, 3503 InstrItinClass itinQ16, InstrItinClass itinQ32, 3504 string OpcodeStr, string Dt, 3505 SDNode OpNode, bit Commutable = 0> { 3506 // 64-bit vector types. 3507 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3508 OpcodeStr, !strconcat(Dt, "8"), 3509 v8i8, v8i8, OpNode, Commutable>; 3510 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3511 OpcodeStr, !strconcat(Dt, "16"), 3512 v4i16, v4i16, OpNode, Commutable>; 3513 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3514 OpcodeStr, !strconcat(Dt, "32"), 3515 v2i32, v2i32, OpNode, Commutable>; 3516 3517 // 128-bit vector types. 3518 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3519 OpcodeStr, !strconcat(Dt, "8"), 3520 v16i8, v16i8, OpNode, Commutable>; 3521 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3522 OpcodeStr, !strconcat(Dt, "16"), 3523 v8i16, v8i16, OpNode, Commutable>; 3524 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3525 OpcodeStr, !strconcat(Dt, "32"), 3526 v4i32, v4i32, OpNode, Commutable>; 3527} 3528 3529multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3530 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3531 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3532 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3533 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3534 v4i32, v2i32, ShOp>; 3535} 3536 3537// ....then also with element size 64 bits: 3538multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3539 InstrItinClass itinD, InstrItinClass itinQ, 3540 string OpcodeStr, string Dt, 3541 SDNode OpNode, bit Commutable = 0> 3542 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3543 OpcodeStr, Dt, OpNode, Commutable> { 3544 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3545 OpcodeStr, !strconcat(Dt, "64"), 3546 v1i64, v1i64, OpNode, Commutable>; 3547 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3548 OpcodeStr, !strconcat(Dt, "64"), 3549 v2i64, v2i64, OpNode, Commutable>; 3550} 3551 3552 3553// Neon 3-register vector intrinsics. 3554 3555// First with only element sizes of 16 and 32 bits: 3556multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3557 InstrItinClass itinD16, InstrItinClass itinD32, 3558 InstrItinClass itinQ16, InstrItinClass itinQ32, 3559 string OpcodeStr, string Dt, 3560 SDPatternOperator IntOp, bit Commutable = 0> { 3561 // 64-bit vector types. 3562 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3563 OpcodeStr, !strconcat(Dt, "16"), 3564 v4i16, v4i16, IntOp, Commutable>; 3565 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3566 OpcodeStr, !strconcat(Dt, "32"), 3567 v2i32, v2i32, IntOp, Commutable>; 3568 3569 // 128-bit vector types. 3570 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3571 OpcodeStr, !strconcat(Dt, "16"), 3572 v8i16, v8i16, IntOp, Commutable>; 3573 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3574 OpcodeStr, !strconcat(Dt, "32"), 3575 v4i32, v4i32, IntOp, Commutable>; 3576} 3577multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3578 InstrItinClass itinD16, InstrItinClass itinD32, 3579 InstrItinClass itinQ16, InstrItinClass itinQ32, 3580 string OpcodeStr, string Dt, 3581 SDPatternOperator IntOp> { 3582 // 64-bit vector types. 3583 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3584 OpcodeStr, !strconcat(Dt, "16"), 3585 v4i16, v4i16, IntOp>; 3586 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3587 OpcodeStr, !strconcat(Dt, "32"), 3588 v2i32, v2i32, IntOp>; 3589 3590 // 128-bit vector types. 3591 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3592 OpcodeStr, !strconcat(Dt, "16"), 3593 v8i16, v8i16, IntOp>; 3594 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3595 OpcodeStr, !strconcat(Dt, "32"), 3596 v4i32, v4i32, IntOp>; 3597} 3598 3599multiclass N3VIntSL_HS<bits<4> op11_8, 3600 InstrItinClass itinD16, InstrItinClass itinD32, 3601 InstrItinClass itinQ16, InstrItinClass itinQ32, 3602 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3603 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3604 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3605 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3606 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3607 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3608 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3609 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3610 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3611} 3612 3613// ....then also with element size of 8 bits: 3614multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3615 InstrItinClass itinD16, InstrItinClass itinD32, 3616 InstrItinClass itinQ16, InstrItinClass itinQ32, 3617 string OpcodeStr, string Dt, 3618 SDPatternOperator IntOp, bit Commutable = 0> 3619 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3620 OpcodeStr, Dt, IntOp, Commutable> { 3621 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3622 OpcodeStr, !strconcat(Dt, "8"), 3623 v8i8, v8i8, IntOp, Commutable>; 3624 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3625 OpcodeStr, !strconcat(Dt, "8"), 3626 v16i8, v16i8, IntOp, Commutable>; 3627} 3628multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3629 InstrItinClass itinD16, InstrItinClass itinD32, 3630 InstrItinClass itinQ16, InstrItinClass itinQ32, 3631 string OpcodeStr, string Dt, 3632 SDPatternOperator IntOp> 3633 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3634 OpcodeStr, Dt, IntOp> { 3635 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3636 OpcodeStr, !strconcat(Dt, "8"), 3637 v8i8, v8i8, IntOp>; 3638 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3639 OpcodeStr, !strconcat(Dt, "8"), 3640 v16i8, v16i8, IntOp>; 3641} 3642 3643 3644// ....then also with element size of 64 bits: 3645multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3646 InstrItinClass itinD16, InstrItinClass itinD32, 3647 InstrItinClass itinQ16, InstrItinClass itinQ32, 3648 string OpcodeStr, string Dt, 3649 SDPatternOperator IntOp, bit Commutable = 0> 3650 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3651 OpcodeStr, Dt, IntOp, Commutable> { 3652 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3653 OpcodeStr, !strconcat(Dt, "64"), 3654 v1i64, v1i64, IntOp, Commutable>; 3655 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3656 OpcodeStr, !strconcat(Dt, "64"), 3657 v2i64, v2i64, IntOp, Commutable>; 3658} 3659multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3660 InstrItinClass itinD16, InstrItinClass itinD32, 3661 InstrItinClass itinQ16, InstrItinClass itinQ32, 3662 string OpcodeStr, string Dt, 3663 SDPatternOperator IntOp> 3664 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3665 OpcodeStr, Dt, IntOp> { 3666 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3667 OpcodeStr, !strconcat(Dt, "64"), 3668 v1i64, v1i64, IntOp>; 3669 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3670 OpcodeStr, !strconcat(Dt, "64"), 3671 v2i64, v2i64, IntOp>; 3672} 3673 3674// Neon Narrowing 3-register vector intrinsics, 3675// source operand element sizes of 16, 32 and 64 bits: 3676multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3677 string OpcodeStr, string Dt, 3678 SDPatternOperator IntOp, bit Commutable = 0> { 3679 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3680 OpcodeStr, !strconcat(Dt, "16"), 3681 v8i8, v8i16, IntOp, Commutable>; 3682 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3683 OpcodeStr, !strconcat(Dt, "32"), 3684 v4i16, v4i32, IntOp, Commutable>; 3685 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3686 OpcodeStr, !strconcat(Dt, "64"), 3687 v2i32, v2i64, IntOp, Commutable>; 3688} 3689 3690 3691// Neon Long 3-register vector operations. 3692 3693multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3694 InstrItinClass itin16, InstrItinClass itin32, 3695 string OpcodeStr, string Dt, 3696 SDNode OpNode, bit Commutable = 0> { 3697 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3698 OpcodeStr, !strconcat(Dt, "8"), 3699 v8i16, v8i8, OpNode, Commutable>; 3700 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3701 OpcodeStr, !strconcat(Dt, "16"), 3702 v4i32, v4i16, OpNode, Commutable>; 3703 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3704 OpcodeStr, !strconcat(Dt, "32"), 3705 v2i64, v2i32, OpNode, Commutable>; 3706} 3707 3708multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3709 InstrItinClass itin, string OpcodeStr, string Dt, 3710 SDNode OpNode> { 3711 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3712 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3713 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3714 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3715} 3716 3717multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3718 InstrItinClass itin16, InstrItinClass itin32, 3719 string OpcodeStr, string Dt, 3720 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3721 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3722 OpcodeStr, !strconcat(Dt, "8"), 3723 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3724 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3725 OpcodeStr, !strconcat(Dt, "16"), 3726 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3727 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3728 OpcodeStr, !strconcat(Dt, "32"), 3729 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3730} 3731 3732// Neon Long 3-register vector intrinsics. 3733 3734// First with only element sizes of 16 and 32 bits: 3735multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3736 InstrItinClass itin16, InstrItinClass itin32, 3737 string OpcodeStr, string Dt, 3738 SDPatternOperator IntOp, bit Commutable = 0> { 3739 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3740 OpcodeStr, !strconcat(Dt, "16"), 3741 v4i32, v4i16, IntOp, Commutable>; 3742 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3743 OpcodeStr, !strconcat(Dt, "32"), 3744 v2i64, v2i32, IntOp, Commutable>; 3745} 3746 3747multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3748 InstrItinClass itin, string OpcodeStr, string Dt, 3749 SDPatternOperator IntOp> { 3750 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3751 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3752 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3753 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3754} 3755 3756// ....then also with element size of 8 bits: 3757multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3758 InstrItinClass itin16, InstrItinClass itin32, 3759 string OpcodeStr, string Dt, 3760 SDPatternOperator IntOp, bit Commutable = 0> 3761 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3762 IntOp, Commutable> { 3763 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3764 OpcodeStr, !strconcat(Dt, "8"), 3765 v8i16, v8i8, IntOp, Commutable>; 3766} 3767 3768// ....with explicit extend (VABDL). 3769multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3770 InstrItinClass itin, string OpcodeStr, string Dt, 3771 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3772 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3773 OpcodeStr, !strconcat(Dt, "8"), 3774 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3775 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3776 OpcodeStr, !strconcat(Dt, "16"), 3777 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3778 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3779 OpcodeStr, !strconcat(Dt, "32"), 3780 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3781} 3782 3783 3784// Neon Wide 3-register vector intrinsics, 3785// source operand element sizes of 8, 16 and 32 bits: 3786multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3787 string OpcodeStr, string Dt, 3788 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3789 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3790 OpcodeStr, !strconcat(Dt, "8"), 3791 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3792 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3793 OpcodeStr, !strconcat(Dt, "16"), 3794 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3795 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3796 OpcodeStr, !strconcat(Dt, "32"), 3797 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3798} 3799 3800 3801// Neon Multiply-Op vector operations, 3802// element sizes of 8, 16 and 32 bits: 3803multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3804 InstrItinClass itinD16, InstrItinClass itinD32, 3805 InstrItinClass itinQ16, InstrItinClass itinQ32, 3806 string OpcodeStr, string Dt, SDNode OpNode> { 3807 // 64-bit vector types. 3808 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3809 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3810 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3811 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3812 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3813 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3814 3815 // 128-bit vector types. 3816 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3817 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3818 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3819 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3820 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3821 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3822} 3823 3824multiclass N3VMulOpSL_HS<bits<4> op11_8, 3825 InstrItinClass itinD16, InstrItinClass itinD32, 3826 InstrItinClass itinQ16, InstrItinClass itinQ32, 3827 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3828 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3829 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3830 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3831 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3832 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3833 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3834 mul, ShOp>; 3835 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3836 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3837 mul, ShOp>; 3838} 3839 3840// Neon Intrinsic-Op vector operations, 3841// element sizes of 8, 16 and 32 bits: 3842multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3843 InstrItinClass itinD, InstrItinClass itinQ, 3844 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3845 SDNode OpNode> { 3846 // 64-bit vector types. 3847 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3848 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3849 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3850 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3851 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3852 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3853 3854 // 128-bit vector types. 3855 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3856 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3857 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3858 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3859 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3860 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3861} 3862 3863// Neon 3-argument intrinsics, 3864// element sizes of 16 and 32 bits: 3865multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3866 InstrItinClass itinD16, InstrItinClass itinD32, 3867 InstrItinClass itinQ16, InstrItinClass itinQ32, 3868 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3869 // 64-bit vector types. 3870 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3871 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3872 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3873 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3874 3875 // 128-bit vector types. 3876 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3877 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3878 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3879 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3880} 3881 3882// element sizes of 8, 16 and 32 bits: 3883multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3884 InstrItinClass itinD16, InstrItinClass itinD32, 3885 InstrItinClass itinQ16, InstrItinClass itinQ32, 3886 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3887 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3888 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3889 // 64-bit vector types. 3890 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3891 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3892 // 128-bit vector types. 3893 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3894 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3895} 3896 3897// Neon Long Multiply-Op vector operations, 3898// element sizes of 8, 16 and 32 bits: 3899multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3900 InstrItinClass itin16, InstrItinClass itin32, 3901 string OpcodeStr, string Dt, SDNode MulOp, 3902 SDNode OpNode> { 3903 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3904 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3905 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3906 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3907 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3908 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3909} 3910 3911multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3912 string Dt, SDNode MulOp, SDNode OpNode> { 3913 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3914 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3915 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3916 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3917} 3918 3919 3920// Neon Long 3-argument intrinsics. 3921 3922// First with only element sizes of 16 and 32 bits: 3923multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3924 InstrItinClass itin16, InstrItinClass itin32, 3925 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3926 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3927 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3928 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3929 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3930} 3931 3932multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3933 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3934 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3935 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3936 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3937 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3938} 3939 3940// ....then also with element size of 8 bits: 3941multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3942 InstrItinClass itin16, InstrItinClass itin32, 3943 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3944 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3945 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3946 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3947} 3948 3949// ....with explicit extend (VABAL). 3950multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3951 InstrItinClass itin, string OpcodeStr, string Dt, 3952 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3953 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3954 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3955 IntOp, ExtOp, OpNode>; 3956 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3957 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3958 IntOp, ExtOp, OpNode>; 3959 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3960 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3961 IntOp, ExtOp, OpNode>; 3962} 3963 3964 3965// Neon Pairwise long 2-register intrinsics, 3966// element sizes of 8, 16 and 32 bits: 3967multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3968 bits<5> op11_7, bit op4, 3969 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3970 // 64-bit vector types. 3971 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3972 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3973 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3974 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3975 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3976 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3977 3978 // 128-bit vector types. 3979 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3980 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3981 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3982 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3983 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3984 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3985} 3986 3987 3988// Neon Pairwise long 2-register accumulate intrinsics, 3989// element sizes of 8, 16 and 32 bits: 3990multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3991 bits<5> op11_7, bit op4, 3992 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3993 // 64-bit vector types. 3994 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3995 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3996 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3997 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3998 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3999 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 4000 4001 // 128-bit vector types. 4002 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 4003 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 4004 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 4005 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 4006 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 4007 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 4008} 4009 4010 4011// Neon 2-register vector shift by immediate, 4012// with f of either N2RegVShLFrm or N2RegVShRFrm 4013// element sizes of 8, 16, 32 and 64 bits: 4014multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4015 InstrItinClass itin, string OpcodeStr, string Dt, 4016 SDNode OpNode> { 4017 // 64-bit vector types. 4018 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4019 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4020 let Inst{21-19} = 0b001; // imm6 = 001xxx 4021 } 4022 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4023 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4024 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4025 } 4026 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4027 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4028 let Inst{21} = 0b1; // imm6 = 1xxxxx 4029 } 4030 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4031 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4032 // imm6 = xxxxxx 4033 4034 // 128-bit vector types. 4035 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4036 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4037 let Inst{21-19} = 0b001; // imm6 = 001xxx 4038 } 4039 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4040 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4041 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4042 } 4043 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4044 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4045 let Inst{21} = 0b1; // imm6 = 1xxxxx 4046 } 4047 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4048 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4049 // imm6 = xxxxxx 4050} 4051multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4052 InstrItinClass itin, string OpcodeStr, string Dt, 4053 SDNode OpNode> { 4054 // 64-bit vector types. 4055 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4056 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4057 let Inst{21-19} = 0b001; // imm6 = 001xxx 4058 } 4059 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4060 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4061 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4062 } 4063 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4064 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4065 let Inst{21} = 0b1; // imm6 = 1xxxxx 4066 } 4067 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4068 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4069 // imm6 = xxxxxx 4070 4071 // 128-bit vector types. 4072 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4073 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4074 let Inst{21-19} = 0b001; // imm6 = 001xxx 4075 } 4076 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4077 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4078 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4079 } 4080 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4081 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4082 let Inst{21} = 0b1; // imm6 = 1xxxxx 4083 } 4084 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4085 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4086 // imm6 = xxxxxx 4087} 4088 4089// Neon Shift-Accumulate vector operations, 4090// element sizes of 8, 16, 32 and 64 bits: 4091multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4092 string OpcodeStr, string Dt, SDNode ShOp> { 4093 // 64-bit vector types. 4094 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4095 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4096 let Inst{21-19} = 0b001; // imm6 = 001xxx 4097 } 4098 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4099 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4100 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4101 } 4102 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4103 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4104 let Inst{21} = 0b1; // imm6 = 1xxxxx 4105 } 4106 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4107 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4108 // imm6 = xxxxxx 4109 4110 // 128-bit vector types. 4111 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4112 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4113 let Inst{21-19} = 0b001; // imm6 = 001xxx 4114 } 4115 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4116 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4117 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4118 } 4119 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4120 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4121 let Inst{21} = 0b1; // imm6 = 1xxxxx 4122 } 4123 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4124 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4125 // imm6 = xxxxxx 4126} 4127 4128// Neon Shift-Insert vector operations, 4129// with f of either N2RegVShLFrm or N2RegVShRFrm 4130// element sizes of 8, 16, 32 and 64 bits: 4131multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4132 string OpcodeStr> { 4133 // 64-bit vector types. 4134 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4135 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4136 let Inst{21-19} = 0b001; // imm6 = 001xxx 4137 } 4138 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4139 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4140 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4141 } 4142 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4143 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4144 let Inst{21} = 0b1; // imm6 = 1xxxxx 4145 } 4146 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4147 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4148 // imm6 = xxxxxx 4149 4150 // 128-bit vector types. 4151 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4152 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4153 let Inst{21-19} = 0b001; // imm6 = 001xxx 4154 } 4155 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4156 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4157 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4158 } 4159 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4160 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4161 let Inst{21} = 0b1; // imm6 = 1xxxxx 4162 } 4163 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4164 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4165 // imm6 = xxxxxx 4166} 4167multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4168 string OpcodeStr> { 4169 // 64-bit vector types. 4170 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4171 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4172 let Inst{21-19} = 0b001; // imm6 = 001xxx 4173 } 4174 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4175 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4176 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4177 } 4178 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4179 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4180 let Inst{21} = 0b1; // imm6 = 1xxxxx 4181 } 4182 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4183 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4184 // imm6 = xxxxxx 4185 4186 // 128-bit vector types. 4187 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4188 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4189 let Inst{21-19} = 0b001; // imm6 = 001xxx 4190 } 4191 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4192 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4193 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4194 } 4195 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4196 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4197 let Inst{21} = 0b1; // imm6 = 1xxxxx 4198 } 4199 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4200 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4201 // imm6 = xxxxxx 4202} 4203 4204// Neon Shift Long operations, 4205// element sizes of 8, 16, 32 bits: 4206multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4207 bit op4, string OpcodeStr, string Dt, 4208 SDPatternOperator OpNode> { 4209 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4210 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4211 let Inst{21-19} = 0b001; // imm6 = 001xxx 4212 } 4213 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4214 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4215 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4216 } 4217 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4218 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4219 let Inst{21} = 0b1; // imm6 = 1xxxxx 4220 } 4221} 4222 4223// Neon Shift Narrow operations, 4224// element sizes of 16, 32, 64 bits: 4225multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4226 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4227 SDPatternOperator OpNode> { 4228 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4229 OpcodeStr, !strconcat(Dt, "16"), 4230 v8i8, v8i16, shr_imm8, OpNode> { 4231 let Inst{21-19} = 0b001; // imm6 = 001xxx 4232 } 4233 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4234 OpcodeStr, !strconcat(Dt, "32"), 4235 v4i16, v4i32, shr_imm16, OpNode> { 4236 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4237 } 4238 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4239 OpcodeStr, !strconcat(Dt, "64"), 4240 v2i32, v2i64, shr_imm32, OpNode> { 4241 let Inst{21} = 0b1; // imm6 = 1xxxxx 4242 } 4243} 4244 4245//===----------------------------------------------------------------------===// 4246// Instruction Definitions. 4247//===----------------------------------------------------------------------===// 4248 4249// Vector Add Operations. 4250 4251// VADD : Vector Add (integer and floating-point) 4252defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4253 add, 1>; 4254def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4255 v2f32, v2f32, fadd, 1>; 4256def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4257 v4f32, v4f32, fadd, 1>; 4258def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4259 v4f16, v4f16, fadd, 1>, 4260 Requires<[HasNEON,HasFullFP16]>; 4261def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4262 v8f16, v8f16, fadd, 1>, 4263 Requires<[HasNEON,HasFullFP16]>; 4264// VADDL : Vector Add Long (Q = D + D) 4265defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4266 "vaddl", "s", add, sext, 1>; 4267defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4268 "vaddl", "u", add, zanyext, 1>; 4269// VADDW : Vector Add Wide (Q = Q + D) 4270defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4271defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; 4272// VHADD : Vector Halving Add 4273defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4274 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4275 "vhadd", "s", int_arm_neon_vhadds, 1>; 4276defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4277 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4278 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4279// VRHADD : Vector Rounding Halving Add 4280defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4281 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4282 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4283defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4284 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4285 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4286// VQADD : Vector Saturating Add 4287defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4288 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4289 "vqadd", "s", saddsat, 1>; 4290defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4291 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4292 "vqadd", "u", uaddsat, 1>; 4293// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4294defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4295// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4296defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4297 int_arm_neon_vraddhn, 1>; 4298 4299let Predicates = [HasNEON] in { 4300def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4301 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4302def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4303 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4304def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4305 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4306} 4307 4308// Vector Multiply Operations. 4309 4310// VMUL : Vector Multiply (integer, polynomial and floating-point) 4311defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4312 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4313def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4314 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4315def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4316 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4317def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4318 v2f32, v2f32, fmul, 1>; 4319def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4320 v4f32, v4f32, fmul, 1>; 4321def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4322 v4f16, v4f16, fmul, 1>, 4323 Requires<[HasNEON,HasFullFP16]>; 4324def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4325 v8f16, v8f16, fmul, 1>, 4326 Requires<[HasNEON,HasFullFP16]>; 4327defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4328def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4329def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4330 v2f32, fmul>; 4331def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4332 Requires<[HasNEON,HasFullFP16]>; 4333def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4334 v4f16, fmul>, 4335 Requires<[HasNEON,HasFullFP16]>; 4336 4337let Predicates = [HasNEON] in { 4338def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4339 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4340 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4341 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4342 (DSubReg_i16_reg imm:$lane))), 4343 (SubReg_i16_lane imm:$lane)))>; 4344def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4345 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4346 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4347 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4348 (DSubReg_i32_reg imm:$lane))), 4349 (SubReg_i32_lane imm:$lane)))>; 4350def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4351 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4352 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4353 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4354 (DSubReg_i32_reg imm:$lane))), 4355 (SubReg_i32_lane imm:$lane)))>; 4356def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4357 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4358 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4359 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4360 (DSubReg_i16_reg imm:$lane))), 4361 (SubReg_i16_lane imm:$lane)))>; 4362 4363def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4364 (VMULslfd DPR:$Rn, 4365 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4366 (i32 0))>; 4367def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4368 (VMULslhd DPR:$Rn, 4369 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4370 (i32 0))>; 4371def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4372 (VMULslfq QPR:$Rn, 4373 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4374 (i32 0))>; 4375def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4376 (VMULslhq QPR:$Rn, 4377 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4378 (i32 0))>; 4379} 4380 4381// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4382defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4383 IIC_VMULi16Q, IIC_VMULi32Q, 4384 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4385defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4386 IIC_VMULi16Q, IIC_VMULi32Q, 4387 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4388 4389let Predicates = [HasNEON] in { 4390def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4391 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4392 imm:$lane)))), 4393 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4394 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4395 (DSubReg_i16_reg imm:$lane))), 4396 (SubReg_i16_lane imm:$lane)))>; 4397def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4398 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4399 imm:$lane)))), 4400 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4401 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4402 (DSubReg_i32_reg imm:$lane))), 4403 (SubReg_i32_lane imm:$lane)))>; 4404} 4405 4406// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4407defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4408 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4409 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4410defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4411 IIC_VMULi16Q, IIC_VMULi32Q, 4412 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4413 4414let Predicates = [HasNEON] in { 4415def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4416 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4417 imm:$lane)))), 4418 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4419 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4420 (DSubReg_i16_reg imm:$lane))), 4421 (SubReg_i16_lane imm:$lane)))>; 4422def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4423 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4424 imm:$lane)))), 4425 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4426 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4427 (DSubReg_i32_reg imm:$lane))), 4428 (SubReg_i32_lane imm:$lane)))>; 4429} 4430 4431// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4432let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4433 DecoderNamespace = "NEONData" in { 4434 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4435 "vmull", "s", ARMvmulls, 1>; 4436 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4437 "vmull", "u", ARMvmullu, 1>; 4438 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4439 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4440 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4441 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4442 Requires<[HasV8, HasAES]>; 4443} 4444defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>; 4445defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>; 4446 4447// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4448defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4449 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4450defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4451 "vqdmull", "s", int_arm_neon_vqdmull>; 4452 4453// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4454 4455// VMLA : Vector Multiply Accumulate (integer and floating-point) 4456defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4457 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4458def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4459 v2f32, fmul_su, fadd_mlx>, 4460 Requires<[HasNEON, UseFPVMLx]>; 4461def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4462 v4f32, fmul_su, fadd_mlx>, 4463 Requires<[HasNEON, UseFPVMLx]>; 4464def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4465 v4f16, fmul_su, fadd_mlx>, 4466 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4467def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4468 v8f16, fmul_su, fadd_mlx>, 4469 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4470defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4471 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4472def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4473 v2f32, fmul_su, fadd_mlx>, 4474 Requires<[HasNEON, UseFPVMLx]>; 4475def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4476 v4f32, v2f32, fmul_su, fadd_mlx>, 4477 Requires<[HasNEON, UseFPVMLx]>; 4478def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4479 v4f16, fmul, fadd>, 4480 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4481def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4482 v8f16, v4f16, fmul, fadd>, 4483 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4484 4485let Predicates = [HasNEON] in { 4486def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4487 (mul (v8i16 QPR:$src2), 4488 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4489 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4490 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4491 (DSubReg_i16_reg imm:$lane))), 4492 (SubReg_i16_lane imm:$lane)))>; 4493 4494def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4495 (mul (v4i32 QPR:$src2), 4496 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4497 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4498 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4499 (DSubReg_i32_reg imm:$lane))), 4500 (SubReg_i32_lane imm:$lane)))>; 4501} 4502 4503def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4504 (fmul_su (v4f32 QPR:$src2), 4505 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4506 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4507 (v4f32 QPR:$src2), 4508 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4509 (DSubReg_i32_reg imm:$lane))), 4510 (SubReg_i32_lane imm:$lane)))>, 4511 Requires<[HasNEON, UseFPVMLx]>; 4512 4513// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4514defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4515 "vmlal", "s", ARMvmulls, add>; 4516defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4517 "vmlal", "u", ARMvmullu, add>; 4518 4519defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>; 4520defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>; 4521 4522let Predicates = [HasNEON, HasV8_1a] in { 4523 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4524 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4525 // (Q += D * D) 4526 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4527 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4528 null_frag>; 4529 def : Pat<(v4i16 (saddsat 4530 (v4i16 DPR:$src1), 4531 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4532 (v4i16 DPR:$Vm))))), 4533 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4534 def : Pat<(v2i32 (saddsat 4535 (v2i32 DPR:$src1), 4536 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4537 (v2i32 DPR:$Vm))))), 4538 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4539 def : Pat<(v8i16 (saddsat 4540 (v8i16 QPR:$src1), 4541 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4542 (v8i16 QPR:$Vm))))), 4543 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4544 def : Pat<(v4i32 (saddsat 4545 (v4i32 QPR:$src1), 4546 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4547 (v4i32 QPR:$Vm))))), 4548 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4549 4550 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4551 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4552 null_frag>; 4553 def : Pat<(v4i16 (saddsat 4554 (v4i16 DPR:$src1), 4555 (v4i16 (int_arm_neon_vqrdmulh 4556 (v4i16 DPR:$Vn), 4557 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4558 imm:$lane)))))), 4559 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4560 imm:$lane))>; 4561 def : Pat<(v2i32 (saddsat 4562 (v2i32 DPR:$src1), 4563 (v2i32 (int_arm_neon_vqrdmulh 4564 (v2i32 DPR:$Vn), 4565 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4566 imm:$lane)))))), 4567 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4568 imm:$lane))>; 4569 def : Pat<(v8i16 (saddsat 4570 (v8i16 QPR:$src1), 4571 (v8i16 (int_arm_neon_vqrdmulh 4572 (v8i16 QPR:$src2), 4573 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4574 imm:$lane)))))), 4575 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4576 (v8i16 QPR:$src2), 4577 (v4i16 (EXTRACT_SUBREG 4578 QPR:$src3, 4579 (DSubReg_i16_reg imm:$lane))), 4580 (SubReg_i16_lane imm:$lane)))>; 4581 def : Pat<(v4i32 (saddsat 4582 (v4i32 QPR:$src1), 4583 (v4i32 (int_arm_neon_vqrdmulh 4584 (v4i32 QPR:$src2), 4585 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4586 imm:$lane)))))), 4587 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4588 (v4i32 QPR:$src2), 4589 (v2i32 (EXTRACT_SUBREG 4590 QPR:$src3, 4591 (DSubReg_i32_reg imm:$lane))), 4592 (SubReg_i32_lane imm:$lane)))>; 4593 4594 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4595 // (Q -= D * D) 4596 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4597 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4598 null_frag>; 4599 def : Pat<(v4i16 (ssubsat 4600 (v4i16 DPR:$src1), 4601 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4602 (v4i16 DPR:$Vm))))), 4603 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4604 def : Pat<(v2i32 (ssubsat 4605 (v2i32 DPR:$src1), 4606 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4607 (v2i32 DPR:$Vm))))), 4608 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4609 def : Pat<(v8i16 (ssubsat 4610 (v8i16 QPR:$src1), 4611 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4612 (v8i16 QPR:$Vm))))), 4613 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4614 def : Pat<(v4i32 (ssubsat 4615 (v4i32 QPR:$src1), 4616 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4617 (v4i32 QPR:$Vm))))), 4618 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4619 4620 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4621 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4622 null_frag>; 4623 def : Pat<(v4i16 (ssubsat 4624 (v4i16 DPR:$src1), 4625 (v4i16 (int_arm_neon_vqrdmulh 4626 (v4i16 DPR:$Vn), 4627 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4628 imm:$lane)))))), 4629 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4630 def : Pat<(v2i32 (ssubsat 4631 (v2i32 DPR:$src1), 4632 (v2i32 (int_arm_neon_vqrdmulh 4633 (v2i32 DPR:$Vn), 4634 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4635 imm:$lane)))))), 4636 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4637 imm:$lane))>; 4638 def : Pat<(v8i16 (ssubsat 4639 (v8i16 QPR:$src1), 4640 (v8i16 (int_arm_neon_vqrdmulh 4641 (v8i16 QPR:$src2), 4642 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4643 imm:$lane)))))), 4644 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4645 (v8i16 QPR:$src2), 4646 (v4i16 (EXTRACT_SUBREG 4647 QPR:$src3, 4648 (DSubReg_i16_reg imm:$lane))), 4649 (SubReg_i16_lane imm:$lane)))>; 4650 def : Pat<(v4i32 (ssubsat 4651 (v4i32 QPR:$src1), 4652 (v4i32 (int_arm_neon_vqrdmulh 4653 (v4i32 QPR:$src2), 4654 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4655 imm:$lane)))))), 4656 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4657 (v4i32 QPR:$src2), 4658 (v2i32 (EXTRACT_SUBREG 4659 QPR:$src3, 4660 (DSubReg_i32_reg imm:$lane))), 4661 (SubReg_i32_lane imm:$lane)))>; 4662} 4663// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4664defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4665 "vqdmlal", "s", null_frag>; 4666defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4667 4668let Predicates = [HasNEON] in { 4669def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4670 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4671 (v4i16 DPR:$Vm))))), 4672 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4673def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4674 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4675 (v2i32 DPR:$Vm))))), 4676 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4677def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4678 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4679 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4680 imm:$lane)))))), 4681 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4682def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4683 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4684 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4685 imm:$lane)))))), 4686 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4687} 4688 4689// VMLS : Vector Multiply Subtract (integer and floating-point) 4690defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4691 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4692def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4693 v2f32, fmul_su, fsub_mlx>, 4694 Requires<[HasNEON, UseFPVMLx]>; 4695def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4696 v4f32, fmul_su, fsub_mlx>, 4697 Requires<[HasNEON, UseFPVMLx]>; 4698def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4699 v4f16, fmul, fsub>, 4700 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4701def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4702 v8f16, fmul, fsub>, 4703 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4704defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4705 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4706def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4707 v2f32, fmul_su, fsub_mlx>, 4708 Requires<[HasNEON, UseFPVMLx]>; 4709def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4710 v4f32, v2f32, fmul_su, fsub_mlx>, 4711 Requires<[HasNEON, UseFPVMLx]>; 4712def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4713 v4f16, fmul, fsub>, 4714 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4715def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4716 v8f16, v4f16, fmul, fsub>, 4717 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4718 4719let Predicates = [HasNEON] in { 4720def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4721 (mul (v8i16 QPR:$src2), 4722 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4723 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4724 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4725 (DSubReg_i16_reg imm:$lane))), 4726 (SubReg_i16_lane imm:$lane)))>; 4727 4728def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4729 (mul (v4i32 QPR:$src2), 4730 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4731 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4732 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4733 (DSubReg_i32_reg imm:$lane))), 4734 (SubReg_i32_lane imm:$lane)))>; 4735} 4736 4737def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4738 (fmul_su (v4f32 QPR:$src2), 4739 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4740 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4741 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4742 (DSubReg_i32_reg imm:$lane))), 4743 (SubReg_i32_lane imm:$lane)))>, 4744 Requires<[HasNEON, UseFPVMLx]>; 4745 4746// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4747defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4748 "vmlsl", "s", ARMvmulls, sub>; 4749defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4750 "vmlsl", "u", ARMvmullu, sub>; 4751 4752defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>; 4753defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>; 4754 4755// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4756defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4757 "vqdmlsl", "s", null_frag>; 4758defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4759 4760let Predicates = [HasNEON] in { 4761def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4762 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4763 (v4i16 DPR:$Vm))))), 4764 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4765def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4766 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4767 (v2i32 DPR:$Vm))))), 4768 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4769def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4770 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4771 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4772 imm:$lane)))))), 4773 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4774def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4775 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4776 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4777 imm:$lane)))))), 4778 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4779} 4780 4781// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4782def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4783 v2f32, fmul_su, fadd_mlx>, 4784 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4785 4786def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4787 v4f32, fmul_su, fadd_mlx>, 4788 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4789def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4790 v4f16, fmul, fadd>, 4791 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4792 4793def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4794 v8f16, fmul, fadd>, 4795 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4796 4797// Fused Vector Multiply Subtract (floating-point) 4798def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4799 v2f32, fmul_su, fsub_mlx>, 4800 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4801def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4802 v4f32, fmul_su, fsub_mlx>, 4803 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4804def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4805 v4f16, fmul, fsub>, 4806 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4807def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4808 v8f16, fmul, fsub>, 4809 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4810 4811// Match @llvm.fma.* intrinsics 4812def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4813 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4814 Requires<[HasNEON,HasFullFP16]>; 4815def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4816 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4817 Requires<[HasNEON,HasFullFP16]>; 4818def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4819 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4820 Requires<[HasNEON,HasVFP4]>; 4821def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4822 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4823 Requires<[HasNEON,HasVFP4]>; 4824def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4825 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4826 Requires<[HasNEON,HasVFP4]>; 4827def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4828 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4829 Requires<[HasNEON,HasVFP4]>; 4830 4831// ARMv8.2a dot product instructions. 4832// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4833// encodings are the same and thus no further bit twiddling is necessary 4834// in the disassembler. 4835class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm, 4836 string AsmTy, ValueType AccumTy, ValueType InputTy, 4837 SDPatternOperator OpNode> : 4838 N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4839 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4840 Asm, AsmTy, 4841 [(set (AccumTy RegTy:$dst), 4842 (OpNode (AccumTy RegTy:$Vd), 4843 (InputTy RegTy:$Vn), 4844 (InputTy RegTy:$Vm)))]> { 4845 let Predicates = [HasDotProd]; 4846 let DecoderNamespace = "VFPV8"; 4847 let Constraints = "$dst = $Vd"; 4848} 4849 4850def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4851def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4852def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4853def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4854 4855// Indexed dot product instructions: 4856multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4857 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4858 dag RHS> { 4859 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4860 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4861 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4862 bit lane; 4863 let Inst{5} = lane; 4864 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4865 let Constraints = "$dst = $Vd"; 4866 let Predicates = [HasDotProd]; 4867 let DecoderNamespace = "VFPV8"; 4868 } 4869 4870 def : Pat< 4871 (AccumType (OpNode (AccumType Ty:$Vd), 4872 (InputType Ty:$Vn), 4873 (InputType (bitconvert (AccumType 4874 (ARMvduplane (AccumType Ty:$Vm), 4875 VectorIndex32:$lane)))))), 4876 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4877} 4878 4879defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4880 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4881defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4882 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4883defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4884 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4885defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4886 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4887 4888// v8.6A matrix multiplication extension 4889let Predicates = [HasMatMulInt8] in { 4890 class N3VMatMul<bit B, bit U, string Asm, string AsmTy, 4891 SDPatternOperator OpNode> 4892 : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst), 4893 (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary, 4894 Asm, AsmTy, 4895 [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd), 4896 (v16i8 QPR:$Vn), 4897 (v16i8 QPR:$Vm)))]> { 4898 let DecoderNamespace = "VFPV8"; 4899 let Constraints = "$dst = $Vd"; 4900 } 4901 4902 multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy, 4903 ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode, 4904 dag RHS> { 4905 4906 def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst), 4907 (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm, 4908 NoItinerary, Asm, AsmTy, []> { 4909 bit lane; 4910 let Inst{5} = lane; 4911 let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane"); 4912 let DecoderNamespace = "VFPV8"; 4913 let Constraints = "$dst = $Vd"; 4914 } 4915 4916 def : Pat< 4917 (AccumTy (OpNode (AccumTy RegTy:$Vd), 4918 (InputTy RegTy:$Vn), 4919 (InputTy (bitconvert (AccumTy 4920 (ARMvduplane (AccumTy RegTy:$Vm), 4921 VectorIndex32:$lane)))))), 4922 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4923 4924 } 4925 4926 multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS> 4927 : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> { 4928 def : Pat< 4929 (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd), 4930 (InputTy (bitconvert (AccumTy 4931 (ARMvduplane (AccumTy RegTy:$Vm), 4932 VectorIndex32:$lane)))), 4933 (InputTy RegTy:$Vn))), 4934 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4935 } 4936 4937 def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>; 4938 def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>; 4939 def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>; 4940 def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>; 4941 def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>; 4942 4943 defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8, 4944 int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>; 4945 defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8, 4946 int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4947 defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>; 4948 defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4949} 4950 4951// ARMv8.3 complex operations 4952class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4953 InstrItinClass itin, dag oops, dag iops, 4954 string opc, string dt, list<dag> pattern> 4955 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4956 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4957 bits<2> rot; 4958 let Inst{24-23} = rot; 4959} 4960 4961class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4962 InstrItinClass itin, dag oops, dag iops, string opc, 4963 string dt, list<dag> pattern> 4964 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4965 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4966 bits<1> rot; 4967 let Inst{24} = rot; 4968} 4969 4970class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4971 dag oops, dag iops, string opc, string dt, 4972 list<dag> pattern> 4973 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4974 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4975 bits<2> rot; 4976 bit lane; 4977 4978 let Inst{21-20} = rot; 4979 let Inst{5} = lane; 4980} 4981 4982class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4983 dag oops, dag iops, string opc, string dt, 4984 list<dag> pattern> 4985 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4986 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4987 bits<2> rot; 4988 bit lane; 4989 4990 let Inst{21-20} = rot; 4991 let Inst{5} = Vm{4}; 4992 // This is needed because the lane operand does not have any bits in the 4993 // encoding (it only has one possible value), so we need to manually set it 4994 // to it's default value. 4995 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4996} 4997 4998multiclass N3VCP8ComplexTied<bit op21, bit op4, 4999 string OpcodeStr> { 5000 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5001 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 5002 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 5003 OpcodeStr, "f16", []>; 5004 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 5005 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 5006 OpcodeStr, "f16", []>; 5007 } 5008 let Predicates = [HasNEON,HasV8_3a] in { 5009 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 5010 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 5011 OpcodeStr, "f32", []>; 5012 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 5013 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 5014 OpcodeStr, "f32", []>; 5015 } 5016} 5017 5018multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 5019 string OpcodeStr> { 5020 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5021 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 5022 (outs DPR:$Vd), 5023 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 5024 OpcodeStr, "f16", []>; 5025 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 5026 (outs QPR:$Vd), 5027 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 5028 OpcodeStr, "f16", []>; 5029 } 5030 let Predicates = [HasNEON,HasV8_3a] in { 5031 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 5032 (outs DPR:$Vd), 5033 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 5034 OpcodeStr, "f32", []>; 5035 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 5036 (outs QPR:$Vd), 5037 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 5038 OpcodeStr, "f32", []>; 5039 } 5040} 5041 5042// These instructions index by pairs of lanes, so the VectorIndexes are twice 5043// as wide as the data types. 5044multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr> { 5045 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5046 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 5047 (outs DPR:$Vd), 5048 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 5049 VectorIndex32:$lane, complexrotateop:$rot), 5050 OpcodeStr, "f16", []>; 5051 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 5052 (outs QPR:$Vd), 5053 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 5054 VectorIndex32:$lane, complexrotateop:$rot), 5055 OpcodeStr, "f16", []>; 5056 } 5057 let Predicates = [HasNEON,HasV8_3a] in { 5058 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 5059 (outs DPR:$Vd), 5060 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5061 complexrotateop:$rot), 5062 OpcodeStr, "f32", []>; 5063 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 5064 (outs QPR:$Vd), 5065 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5066 complexrotateop:$rot), 5067 OpcodeStr, "f32", []>; 5068 } 5069} 5070 5071defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla">; 5072defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd">; 5073defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla">; 5074 5075let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5076 def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5077 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; 5078 def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5079 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; 5080 def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5081 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; 5082 def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5083 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; 5084} 5085let Predicates = [HasNEON,HasV8_3a] in { 5086 def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5087 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; 5088 def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5089 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; 5090 def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5091 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; 5092 def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5093 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; 5094} 5095 5096// Vector Subtract Operations. 5097 5098// VSUB : Vector Subtract (integer and floating-point) 5099defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 5100 "vsub", "i", sub, 0>; 5101def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 5102 v2f32, v2f32, fsub, 0>; 5103def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 5104 v4f32, v4f32, fsub, 0>; 5105def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 5106 v4f16, v4f16, fsub, 0>, 5107 Requires<[HasNEON,HasFullFP16]>; 5108def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 5109 v8f16, v8f16, fsub, 0>, 5110 Requires<[HasNEON,HasFullFP16]>; 5111// VSUBL : Vector Subtract Long (Q = D - D) 5112defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5113 "vsubl", "s", sub, sext, 0>; 5114defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5115 "vsubl", "u", sub, zanyext, 0>; 5116// VSUBW : Vector Subtract Wide (Q = Q - D) 5117defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 5118defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; 5119// VHSUB : Vector Halving Subtract 5120defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 5121 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5122 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5123defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5124 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5125 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5126// VQSUB : Vector Saturing Subtract 5127defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5128 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5129 "vqsub", "s", ssubsat, 0>; 5130defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5131 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5132 "vqsub", "u", usubsat, 0>; 5133// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5134defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5135// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5136defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5137 int_arm_neon_vrsubhn, 0>; 5138 5139let Predicates = [HasNEON] in { 5140def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5141 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5142def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5143 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5144def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5145 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5146} 5147 5148// Vector Comparisons. 5149 5150// VCEQ : Vector Compare Equal 5151defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5152 IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; 5153def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5154 ARMCCeq, 1>; 5155def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5156 ARMCCeq, 1>; 5157def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5158 ARMCCeq, 1>, 5159 Requires<[HasNEON, HasFullFP16]>; 5160def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5161 ARMCCeq, 1>, 5162 Requires<[HasNEON, HasFullFP16]>; 5163 5164let TwoOperandAliasConstraint = "$Vm = $Vd" in 5165defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5166 "$Vd, $Vm, #0", ARMCCeq>; 5167 5168// VCGE : Vector Compare Greater Than or Equal 5169defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5170 IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; 5171defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5172 IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; 5173def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5174 ARMCCge, 0>; 5175def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5176 ARMCCge, 0>; 5177def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5178 ARMCCge, 0>, 5179 Requires<[HasNEON, HasFullFP16]>; 5180def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5181 ARMCCge, 0>, 5182 Requires<[HasNEON, HasFullFP16]>; 5183 5184let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5185defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5186 "$Vd, $Vm, #0", ARMCCge>; 5187defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5188 "$Vd, $Vm, #0", ARMCCle>; 5189} 5190 5191// VCGT : Vector Compare Greater Than 5192defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5193 IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; 5194defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5195 IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; 5196def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5197 ARMCCgt, 0>; 5198def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5199 ARMCCgt, 0>; 5200def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5201 ARMCCgt, 0>, 5202 Requires<[HasNEON, HasFullFP16]>; 5203def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5204 ARMCCgt, 0>, 5205 Requires<[HasNEON, HasFullFP16]>; 5206 5207let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5208defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5209 "$Vd, $Vm, #0", ARMCCgt>; 5210defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5211 "$Vd, $Vm, #0", ARMCClt>; 5212} 5213 5214// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5215def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5216 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5217def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5218 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5219def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5220 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5221 Requires<[HasNEON, HasFullFP16]>; 5222def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5223 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5224 Requires<[HasNEON, HasFullFP16]>; 5225// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5226def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5227 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5228def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5229 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5230def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5231 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5232 Requires<[HasNEON, HasFullFP16]>; 5233def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5234 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5235 Requires<[HasNEON, HasFullFP16]>; 5236// VTST : Vector Test Bits 5237defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5238 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5239 5240def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5241 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5242def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5243 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5244def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5245 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5246def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5247 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5248let Predicates = [HasNEON, HasFullFP16] in { 5249def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5250 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5251def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5252 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5253def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5254 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5255def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5256 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5257} 5258 5259// +fp16fml Floating Point Multiplication Variants 5260let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5261 5262class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5263 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5264 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5265 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5266 5267class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5268 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5269 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5270 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5271 5272// Vd, Vs, Vs[0-15], Idx[0-1] 5273class VFMD<string opc, string type, bits<2> S> 5274 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5275 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5276 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5277 bit idx; 5278 let Inst{3} = idx; 5279 let Inst{19-16} = Vn{4-1}; 5280 let Inst{7} = Vn{0}; 5281 let Inst{5} = Vm{0}; 5282 let Inst{2-0} = Vm{3-1}; 5283} 5284 5285// Vq, Vd, Vd[0-7], Idx[0-3] 5286class VFMQ<string opc, string type, bits<2> S> 5287 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5288 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5289 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5290 bits<2> idx; 5291 let Inst{5} = idx{1}; 5292 let Inst{3} = idx{0}; 5293} 5294 5295// op1 op2 op3 5296def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5297def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5298def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5299def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5300def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5301def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5302def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5303def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5304} // HasNEON, HasFP16FML 5305 5306 5307def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5308 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5309def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5310 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5311def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5312 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5313def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5314 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5315let Predicates = [HasNEON, HasFullFP16] in { 5316def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5317 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5318def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5319 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5320def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5321 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5322def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5323 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5324} 5325 5326// Vector Bitwise Operations. 5327 5328def vnotd : PatFrag<(ops node:$in), 5329 (xor node:$in, ARMimmAllOnesD)>; 5330def vnotq : PatFrag<(ops node:$in), 5331 (xor node:$in, ARMimmAllOnesV)>; 5332 5333 5334// VAND : Vector Bitwise AND 5335def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5336 v2i32, v2i32, and, 1>; 5337def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5338 v4i32, v4i32, and, 1>; 5339 5340// VEOR : Vector Bitwise Exclusive OR 5341def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5342 v2i32, v2i32, xor, 1>; 5343def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5344 v4i32, v4i32, xor, 1>; 5345 5346// VORR : Vector Bitwise OR 5347def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5348 v2i32, v2i32, or, 1>; 5349def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5350 v4i32, v4i32, or, 1>; 5351 5352multiclass BitwisePatterns<string Name, SDPatternOperator OpNodeD, 5353 SDPatternOperator OpNodeQ> { 5354 def : Pat<(v8i8 (OpNodeD DPR:$LHS, DPR:$RHS)), 5355 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5356 def : Pat<(v4i16 (OpNodeD DPR:$LHS, DPR:$RHS)), 5357 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5358 def : Pat<(v1i64 (OpNodeD DPR:$LHS, DPR:$RHS)), 5359 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5360 5361 def : Pat<(v16i8 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5362 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5363 def : Pat<(v8i16 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5364 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5365 def : Pat<(v2i64 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5366 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5367} 5368 5369let Predicates = [HasNEON] in { 5370 defm : BitwisePatterns<"VAND", and, and>; 5371 defm : BitwisePatterns<"VORR", or, or>; 5372 defm : BitwisePatterns<"VEOR", xor, xor>; 5373} 5374 5375def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5376 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5377 IIC_VMOVImm, 5378 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5379 [(set DPR:$Vd, 5380 (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5381 let Inst{9} = SIMM{9}; 5382} 5383 5384def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5385 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5386 IIC_VMOVImm, 5387 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5388 [(set DPR:$Vd, 5389 (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5390 let Inst{10-9} = SIMM{10-9}; 5391} 5392 5393def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5394 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5395 IIC_VMOVImm, 5396 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5397 [(set QPR:$Vd, 5398 (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5399 let Inst{9} = SIMM{9}; 5400} 5401 5402def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5403 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5404 IIC_VMOVImm, 5405 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5406 [(set QPR:$Vd, 5407 (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5408 let Inst{10-9} = SIMM{10-9}; 5409} 5410 5411 5412// VBIC : Vector Bitwise Bit Clear (AND NOT) 5413let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5414def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5415 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5416 "vbic", "$Vd, $Vn, $Vm", "", 5417 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5418 (vnotd DPR:$Vm))))]>; 5419def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5420 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5421 "vbic", "$Vd, $Vn, $Vm", "", 5422 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5423 (vnotq QPR:$Vm))))]>; 5424} 5425 5426let Predicates = [HasNEON] in { 5427 defm : BitwisePatterns<"VBIC", BinOpFrag<(and node:$LHS, (vnotd node:$RHS))>, 5428 BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>>; 5429} 5430 5431def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5432 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5433 IIC_VMOVImm, 5434 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5435 [(set DPR:$Vd, 5436 (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5437 let Inst{9} = SIMM{9}; 5438} 5439 5440def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5441 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5442 IIC_VMOVImm, 5443 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5444 [(set DPR:$Vd, 5445 (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5446 let Inst{10-9} = SIMM{10-9}; 5447} 5448 5449def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5450 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5451 IIC_VMOVImm, 5452 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5453 [(set QPR:$Vd, 5454 (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5455 let Inst{9} = SIMM{9}; 5456} 5457 5458def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5459 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5460 IIC_VMOVImm, 5461 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5462 [(set QPR:$Vd, 5463 (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5464 let Inst{10-9} = SIMM{10-9}; 5465} 5466 5467// VORN : Vector Bitwise OR NOT 5468def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5469 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5470 "vorn", "$Vd, $Vn, $Vm", "", 5471 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5472 (vnotd DPR:$Vm))))]>; 5473def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5474 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5475 "vorn", "$Vd, $Vn, $Vm", "", 5476 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5477 (vnotq QPR:$Vm))))]>; 5478 5479let Predicates = [HasNEON] in { 5480 defm : BitwisePatterns<"VORN", BinOpFrag<(or node:$LHS, (vnotd node:$RHS))>, 5481 BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>>; 5482} 5483 5484// VMVN : Vector Bitwise NOT (Immediate) 5485 5486let isReMaterializable = 1 in { 5487 5488def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5489 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5490 "vmvn", "i16", "$Vd, $SIMM", "", 5491 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5492 let Inst{9} = SIMM{9}; 5493} 5494 5495def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5496 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5497 "vmvn", "i16", "$Vd, $SIMM", "", 5498 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5499 let Inst{9} = SIMM{9}; 5500} 5501 5502def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5503 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5504 "vmvn", "i32", "$Vd, $SIMM", "", 5505 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5506 let Inst{11-8} = SIMM{11-8}; 5507} 5508 5509def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5510 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5511 "vmvn", "i32", "$Vd, $SIMM", "", 5512 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5513 let Inst{11-8} = SIMM{11-8}; 5514} 5515} 5516 5517// VMVN : Vector Bitwise NOT 5518def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5519 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5520 "vmvn", "$Vd, $Vm", "", 5521 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5522def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5523 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5524 "vmvn", "$Vd, $Vm", "", 5525 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5526let Predicates = [HasNEON] in { 5527def : Pat<(v1i64 (vnotd DPR:$src)), 5528 (VMVNd DPR:$src)>; 5529def : Pat<(v4i16 (vnotd DPR:$src)), 5530 (VMVNd DPR:$src)>; 5531def : Pat<(v8i8 (vnotd DPR:$src)), 5532 (VMVNd DPR:$src)>; 5533def : Pat<(v2i64 (vnotq QPR:$src)), 5534 (VMVNq QPR:$src)>; 5535def : Pat<(v8i16 (vnotq QPR:$src)), 5536 (VMVNq QPR:$src)>; 5537def : Pat<(v16i8 (vnotq QPR:$src)), 5538 (VMVNq QPR:$src)>; 5539} 5540 5541// The TwoAddress pass will not go looking for equivalent operations 5542// with different register constraints; it just inserts copies. 5543// That is why pseudo VBSP implemented. Is is expanded later into 5544// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. 5545def VBSPd 5546 : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5547 IIC_VBINiD, "", 5548 [(set DPR:$Vd, 5549 (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5550let Predicates = [HasNEON] in { 5551def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5552 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5553 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5554def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5555 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5556 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5557def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5558 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5559 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5560def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5561 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5562 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5563def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5564 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5565 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5566 5567def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd), 5568 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5569 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5570def : Pat<(v4i16 (or (and DPR:$Vn, DPR:$Vd), 5571 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5572 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5573def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5574 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5575 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5576def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5577 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5578 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5579} 5580 5581def VBSPq 5582 : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5583 IIC_VBINiQ, "", 5584 [(set QPR:$Vd, 5585 (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5586let Predicates = [HasNEON] in { 5587def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5588 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5589 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5590def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5591 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5592 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5593def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5594 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5595 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5596def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5597 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5598 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5599def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5600 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5601 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5602 5603def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd), 5604 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5605 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5606def : Pat<(v8i16 (or (and QPR:$Vn, QPR:$Vd), 5607 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5608 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5609def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5610 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5611 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5612def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5613 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5614 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5615} 5616 5617// VBSL : Vector Bitwise Select 5618def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5619 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5620 N3RegFrm, IIC_VBINiD, 5621 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5622 []>; 5623 5624def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5625 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5626 N3RegFrm, IIC_VBINiQ, 5627 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5628 []>; 5629 5630// VBIF : Vector Bitwise Insert if False 5631// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5632def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5633 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5634 N3RegFrm, IIC_VBINiD, 5635 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5636 []>; 5637def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5638 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5639 N3RegFrm, IIC_VBINiQ, 5640 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5641 []>; 5642 5643// VBIT : Vector Bitwise Insert if True 5644// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5645def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5646 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5647 N3RegFrm, IIC_VBINiD, 5648 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5649 []>; 5650def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5651 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5652 N3RegFrm, IIC_VBINiQ, 5653 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5654 []>; 5655 5656// Vector Absolute Differences. 5657 5658// VABD : Vector Absolute Difference 5659defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5660 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5661 "vabd", "s", int_arm_neon_vabds, 1>; 5662defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5663 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5664 "vabd", "u", int_arm_neon_vabdu, 1>; 5665def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5666 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5667def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5668 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5669def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5670 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5671 Requires<[HasNEON, HasFullFP16]>; 5672def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5673 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5674 Requires<[HasNEON, HasFullFP16]>; 5675 5676// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5677defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5678 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5679defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5680 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5681 5682let Predicates = [HasNEON] in { 5683def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5684 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5685def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5686 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5687} 5688 5689// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5690// shift/xor pattern for ABS. 5691 5692def abd_shr : 5693 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5694 (ARMvshrsImm (sub (zext node:$in1), 5695 (zext node:$in2)), (i32 $shift))>; 5696 5697let Predicates = [HasNEON] in { 5698def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)), 5699 (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5700 (zext (v2i32 DPR:$opB))), 5701 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5702 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5703} 5704 5705// VABA : Vector Absolute Difference and Accumulate 5706defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5707 "vaba", "s", int_arm_neon_vabds, add>; 5708defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5709 "vaba", "u", int_arm_neon_vabdu, add>; 5710 5711// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5712defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5713 "vabal", "s", int_arm_neon_vabds, zext, add>; 5714defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5715 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5716 5717// Vector Maximum and Minimum. 5718 5719// VMAX : Vector Maximum 5720defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5721 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5722 "vmax", "s", smax, 1>; 5723defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5724 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5725 "vmax", "u", umax, 1>; 5726def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5727 "vmax", "f32", 5728 v2f32, v2f32, fmaximum, 1>; 5729def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5730 "vmax", "f32", 5731 v4f32, v4f32, fmaximum, 1>; 5732def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5733 "vmax", "f16", 5734 v4f16, v4f16, fmaximum, 1>, 5735 Requires<[HasNEON, HasFullFP16]>; 5736def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5737 "vmax", "f16", 5738 v8f16, v8f16, fmaximum, 1>, 5739 Requires<[HasNEON, HasFullFP16]>; 5740 5741// VMAXNM 5742let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5743 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5744 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5745 v2f32, v2f32, fmaxnum, 1>, 5746 Requires<[HasV8, HasNEON]>; 5747 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5748 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5749 v4f32, v4f32, fmaxnum, 1>, 5750 Requires<[HasV8, HasNEON]>; 5751 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5752 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5753 v4f16, v4f16, fmaxnum, 1>, 5754 Requires<[HasV8, HasNEON, HasFullFP16]>; 5755 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5756 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5757 v8f16, v8f16, fmaxnum, 1>, 5758 Requires<[HasV8, HasNEON, HasFullFP16]>; 5759} 5760 5761// VMIN : Vector Minimum 5762defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5763 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5764 "vmin", "s", smin, 1>; 5765defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5766 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5767 "vmin", "u", umin, 1>; 5768def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5769 "vmin", "f32", 5770 v2f32, v2f32, fminimum, 1>; 5771def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5772 "vmin", "f32", 5773 v4f32, v4f32, fminimum, 1>; 5774def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5775 "vmin", "f16", 5776 v4f16, v4f16, fminimum, 1>, 5777 Requires<[HasNEON, HasFullFP16]>; 5778def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5779 "vmin", "f16", 5780 v8f16, v8f16, fminimum, 1>, 5781 Requires<[HasNEON, HasFullFP16]>; 5782 5783// VMINNM 5784let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5785 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5786 N3RegFrm, NoItinerary, "vminnm", "f32", 5787 v2f32, v2f32, fminnum, 1>, 5788 Requires<[HasV8, HasNEON]>; 5789 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5790 N3RegFrm, NoItinerary, "vminnm", "f32", 5791 v4f32, v4f32, fminnum, 1>, 5792 Requires<[HasV8, HasNEON]>; 5793 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5794 N3RegFrm, NoItinerary, "vminnm", "f16", 5795 v4f16, v4f16, fminnum, 1>, 5796 Requires<[HasV8, HasNEON, HasFullFP16]>; 5797 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5798 N3RegFrm, NoItinerary, "vminnm", "f16", 5799 v8f16, v8f16, fminnum, 1>, 5800 Requires<[HasV8, HasNEON, HasFullFP16]>; 5801} 5802 5803// Vector Pairwise Operations. 5804 5805// VPADD : Vector Pairwise Add 5806def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5807 "vpadd", "i8", 5808 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5809def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5810 "vpadd", "i16", 5811 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5812def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5813 "vpadd", "i32", 5814 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5815def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5816 IIC_VPBIND, "vpadd", "f32", 5817 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5818def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5819 IIC_VPBIND, "vpadd", "f16", 5820 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5821 Requires<[HasNEON, HasFullFP16]>; 5822 5823// VPADDL : Vector Pairwise Add Long 5824defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5825 int_arm_neon_vpaddls>; 5826defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5827 int_arm_neon_vpaddlu>; 5828 5829// VPADAL : Vector Pairwise Add and Accumulate Long 5830defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5831 int_arm_neon_vpadals>; 5832defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5833 int_arm_neon_vpadalu>; 5834 5835// VPMAX : Vector Pairwise Maximum 5836def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5837 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5838def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5839 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5840def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5841 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5842def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5843 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5844def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5845 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5846def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5847 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5848def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5849 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5850def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5851 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5852 Requires<[HasNEON, HasFullFP16]>; 5853 5854// VPMIN : Vector Pairwise Minimum 5855def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5856 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5857def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5858 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5859def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5860 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5861def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5862 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5863def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5864 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5865def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5866 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5867def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5868 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5869def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5870 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5871 Requires<[HasNEON, HasFullFP16]>; 5872 5873// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5874 5875// VRECPE : Vector Reciprocal Estimate 5876def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5877 IIC_VUNAD, "vrecpe", "u32", 5878 v2i32, v2i32, int_arm_neon_vrecpe>; 5879def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5880 IIC_VUNAQ, "vrecpe", "u32", 5881 v4i32, v4i32, int_arm_neon_vrecpe>; 5882def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5883 IIC_VUNAD, "vrecpe", "f32", 5884 v2f32, v2f32, int_arm_neon_vrecpe>; 5885def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5886 IIC_VUNAQ, "vrecpe", "f32", 5887 v4f32, v4f32, int_arm_neon_vrecpe>; 5888def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5889 IIC_VUNAD, "vrecpe", "f16", 5890 v4f16, v4f16, int_arm_neon_vrecpe>, 5891 Requires<[HasNEON, HasFullFP16]>; 5892def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5893 IIC_VUNAQ, "vrecpe", "f16", 5894 v8f16, v8f16, int_arm_neon_vrecpe>, 5895 Requires<[HasNEON, HasFullFP16]>; 5896 5897// VRECPS : Vector Reciprocal Step 5898def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5899 IIC_VRECSD, "vrecps", "f32", 5900 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5901def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5902 IIC_VRECSQ, "vrecps", "f32", 5903 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5904def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5905 IIC_VRECSD, "vrecps", "f16", 5906 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5907 Requires<[HasNEON, HasFullFP16]>; 5908def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5909 IIC_VRECSQ, "vrecps", "f16", 5910 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5911 Requires<[HasNEON, HasFullFP16]>; 5912 5913// VRSQRTE : Vector Reciprocal Square Root Estimate 5914def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5915 IIC_VUNAD, "vrsqrte", "u32", 5916 v2i32, v2i32, int_arm_neon_vrsqrte>; 5917def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5918 IIC_VUNAQ, "vrsqrte", "u32", 5919 v4i32, v4i32, int_arm_neon_vrsqrte>; 5920def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5921 IIC_VUNAD, "vrsqrte", "f32", 5922 v2f32, v2f32, int_arm_neon_vrsqrte>; 5923def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5924 IIC_VUNAQ, "vrsqrte", "f32", 5925 v4f32, v4f32, int_arm_neon_vrsqrte>; 5926def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5927 IIC_VUNAD, "vrsqrte", "f16", 5928 v4f16, v4f16, int_arm_neon_vrsqrte>, 5929 Requires<[HasNEON, HasFullFP16]>; 5930def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5931 IIC_VUNAQ, "vrsqrte", "f16", 5932 v8f16, v8f16, int_arm_neon_vrsqrte>, 5933 Requires<[HasNEON, HasFullFP16]>; 5934 5935// VRSQRTS : Vector Reciprocal Square Root Step 5936def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5937 IIC_VRECSD, "vrsqrts", "f32", 5938 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5939def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5940 IIC_VRECSQ, "vrsqrts", "f32", 5941 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5942def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5943 IIC_VRECSD, "vrsqrts", "f16", 5944 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5945 Requires<[HasNEON, HasFullFP16]>; 5946def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5947 IIC_VRECSQ, "vrsqrts", "f16", 5948 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5949 Requires<[HasNEON, HasFullFP16]>; 5950 5951// Vector Shifts. 5952 5953// VSHL : Vector Shift 5954defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5955 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5956 "vshl", "s", int_arm_neon_vshifts>; 5957defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5958 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5959 "vshl", "u", int_arm_neon_vshiftu>; 5960 5961let Predicates = [HasNEON] in { 5962def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5963 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5964def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5965 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5966def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5967 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5968def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5969 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5970def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5971 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5972def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5973 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5974def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5975 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5976def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5977 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5978 5979def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5980 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5981def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5982 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5983def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5984 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5985def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5986 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5987def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5988 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5989def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5990 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5991def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5992 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5993def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5994 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5995 5996} 5997 5998// VSHL : Vector Shift Left (Immediate) 5999defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 6000 6001// VSHR : Vector Shift Right (Immediate) 6002defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", 6003 ARMvshrsImm>; 6004defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", 6005 ARMvshruImm>; 6006 6007// VSHLL : Vector Shift Left Long 6008defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 6009 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 6010defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 6011 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 6012 6013// VSHLL : Vector Shift Left Long (with maximum shift count) 6014class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 6015 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 6016 ValueType OpTy, Operand ImmTy> 6017 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 6018 ResTy, OpTy, ImmTy, null_frag> { 6019 let Inst{21-16} = op21_16; 6020 let DecoderMethod = "DecodeVSHLMaxInstruction"; 6021} 6022def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 6023 v8i16, v8i8, imm8>; 6024def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 6025 v4i32, v4i16, imm16>; 6026def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 6027 v2i64, v2i32, imm32>; 6028 6029let Predicates = [HasNEON] in { 6030def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 6031 (VSHLLi8 DPR:$Rn, 8)>; 6032def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 6033 (VSHLLi16 DPR:$Rn, 16)>; 6034def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 6035 (VSHLLi32 DPR:$Rn, 32)>; 6036def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 6037 (VSHLLi8 DPR:$Rn, 8)>; 6038def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 6039 (VSHLLi16 DPR:$Rn, 16)>; 6040def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 6041 (VSHLLi32 DPR:$Rn, 32)>; 6042def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 6043 (VSHLLi8 DPR:$Rn, 8)>; 6044def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 6045 (VSHLLi16 DPR:$Rn, 16)>; 6046def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 6047 (VSHLLi32 DPR:$Rn, 32)>; 6048} 6049 6050// VSHRN : Vector Shift Right and Narrow 6051defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 6052 PatFrag<(ops node:$Rn, node:$amt), 6053 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 6054 6055let Predicates = [HasNEON] in { 6056def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 6057 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 6058def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 6059 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 6060def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 6061 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 6062} 6063 6064// VRSHL : Vector Rounding Shift 6065defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 6066 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6067 "vrshl", "s", int_arm_neon_vrshifts>; 6068defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 6069 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6070 "vrshl", "u", int_arm_neon_vrshiftu>; 6071// VRSHR : Vector Rounding Shift Right 6072defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", 6073 NEONvrshrsImm>; 6074defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", 6075 NEONvrshruImm>; 6076 6077// VRSHRN : Vector Rounding Shift Right and Narrow 6078defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 6079 NEONvrshrnImm>; 6080 6081// VQSHL : Vector Saturating Shift 6082defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 6083 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6084 "vqshl", "s", int_arm_neon_vqshifts>; 6085defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 6086 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6087 "vqshl", "u", int_arm_neon_vqshiftu>; 6088// VQSHL : Vector Saturating Shift Left (Immediate) 6089defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 6090defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 6091 6092// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 6093defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 6094 6095// VQSHRN : Vector Saturating Shift Right and Narrow 6096defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 6097 NEONvqshrnsImm>; 6098defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 6099 NEONvqshrnuImm>; 6100 6101// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 6102defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 6103 NEONvqshrnsuImm>; 6104 6105// VQRSHL : Vector Saturating Rounding Shift 6106defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 6107 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6108 "vqrshl", "s", int_arm_neon_vqrshifts>; 6109defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 6110 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6111 "vqrshl", "u", int_arm_neon_vqrshiftu>; 6112 6113// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 6114defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 6115 NEONvqrshrnsImm>; 6116defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 6117 NEONvqrshrnuImm>; 6118 6119// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 6120defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 6121 NEONvqrshrnsuImm>; 6122 6123// VSRA : Vector Shift Right and Accumulate 6124defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 6125defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 6126// VRSRA : Vector Rounding Shift Right and Accumulate 6127defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 6128defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 6129 6130// VSLI : Vector Shift Left and Insert 6131defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 6132 6133// VSRI : Vector Shift Right and Insert 6134defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 6135 6136// Vector Absolute and Saturating Absolute. 6137 6138// VABS : Vector Absolute Value 6139defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 6140 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 6141def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6142 "vabs", "f32", 6143 v2f32, v2f32, fabs>; 6144def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6145 "vabs", "f32", 6146 v4f32, v4f32, fabs>; 6147def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6148 "vabs", "f16", 6149 v4f16, v4f16, fabs>, 6150 Requires<[HasNEON, HasFullFP16]>; 6151def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6152 "vabs", "f16", 6153 v8f16, v8f16, fabs>, 6154 Requires<[HasNEON, HasFullFP16]>; 6155 6156// VQABS : Vector Saturating Absolute Value 6157defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 6158 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 6159 int_arm_neon_vqabs>; 6160 6161// Vector Negate. 6162 6163def vnegd : PatFrag<(ops node:$in), 6164 (sub ARMimmAllZerosD, node:$in)>; 6165def vnegq : PatFrag<(ops node:$in), 6166 (sub ARMimmAllZerosV, node:$in)>; 6167 6168class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6169 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 6170 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 6171 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 6172class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6173 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 6174 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 6175 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 6176 6177// VNEG : Vector Negate (integer) 6178def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 6179def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 6180def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6181def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6182def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6183def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6184 6185// VNEG : Vector Negate (floating-point) 6186def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6187 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6188 "vneg", "f32", "$Vd, $Vm", "", 6189 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6190def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6191 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6192 "vneg", "f32", "$Vd, $Vm", "", 6193 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6194def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6195 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6196 "vneg", "f16", "$Vd, $Vm", "", 6197 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6198 Requires<[HasNEON, HasFullFP16]>; 6199def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6200 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6201 "vneg", "f16", "$Vd, $Vm", "", 6202 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6203 Requires<[HasNEON, HasFullFP16]>; 6204 6205let Predicates = [HasNEON] in { 6206def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6207def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6208def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6209def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6210def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6211def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6212} 6213 6214// VQNEG : Vector Saturating Negate 6215defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6216 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6217 int_arm_neon_vqneg>; 6218 6219// Vector Bit Counting Operations. 6220 6221// VCLS : Vector Count Leading Sign Bits 6222defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6223 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6224 int_arm_neon_vcls>; 6225// VCLZ : Vector Count Leading Zeros 6226defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6227 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6228 ctlz>; 6229// VCNT : Vector Count One Bits 6230def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6231 IIC_VCNTiD, "vcnt", "8", 6232 v8i8, v8i8, ctpop>; 6233def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6234 IIC_VCNTiQ, "vcnt", "8", 6235 v16i8, v16i8, ctpop>; 6236 6237// Vector Swap 6238def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6239 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6240 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6241 []>; 6242def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6243 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6244 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6245 []>; 6246 6247// Vector Move Operations. 6248 6249// VMOV : Vector Move (Register) 6250def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6251 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6252def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6253 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6254 6255// VMOV : Vector Move (Immediate) 6256 6257// Although VMOVs are not strictly speaking cheap, they are as expensive 6258// as their copies counterpart (VORR), so we should prefer rematerialization 6259// over splitting when it applies. 6260let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6261def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6262 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6263 "vmov", "i8", "$Vd, $SIMM", "", 6264 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6265def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6266 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6267 "vmov", "i8", "$Vd, $SIMM", "", 6268 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6269 6270def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6271 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6272 "vmov", "i16", "$Vd, $SIMM", "", 6273 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6274 let Inst{9} = SIMM{9}; 6275} 6276 6277def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6278 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6279 "vmov", "i16", "$Vd, $SIMM", "", 6280 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6281 let Inst{9} = SIMM{9}; 6282} 6283 6284def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6285 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6286 "vmov", "i32", "$Vd, $SIMM", "", 6287 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6288 let Inst{11-8} = SIMM{11-8}; 6289} 6290 6291def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6292 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6293 "vmov", "i32", "$Vd, $SIMM", "", 6294 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6295 let Inst{11-8} = SIMM{11-8}; 6296} 6297 6298def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6299 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6300 "vmov", "i64", "$Vd, $SIMM", "", 6301 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6302def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6303 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6304 "vmov", "i64", "$Vd, $SIMM", "", 6305 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6306 6307def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6308 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6309 "vmov", "f32", "$Vd, $SIMM", "", 6310 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6311def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6312 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6313 "vmov", "f32", "$Vd, $SIMM", "", 6314 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6315} // isReMaterializable, isAsCheapAsAMove 6316 6317// Add support for bytes replication feature, so it could be GAS compatible. 6318multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6319 // E.g. instructions below: 6320 // "vmov.i32 d0, #0xffffffff" 6321 // "vmov.i32 d0, #0xabababab" 6322 // "vmov.i16 d0, #0xabab" 6323 // are incorrect, but we could deal with such cases. 6324 // For last two instructions, for example, it should emit: 6325 // "vmov.i8 d0, #0xab" 6326 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6327 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6328 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6329 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6330 // Also add same support for VMVN instructions. So instruction: 6331 // "vmvn.i32 d0, #0xabababab" 6332 // actually means: 6333 // "vmov.i8 d0, #0x54" 6334 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6335 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6336 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6337 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6338} 6339 6340defm : NEONImmReplicateI8InstAlias<i16>; 6341defm : NEONImmReplicateI8InstAlias<i32>; 6342defm : NEONImmReplicateI8InstAlias<i64>; 6343 6344// Similar to above for types other than i8, e.g.: 6345// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6346// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6347// In this case we do not canonicalize VMVN to VMOV 6348multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6349 NeonI NV8, NeonI NV16, ValueType To> { 6350 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6351 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6352 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6353 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6354 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6355 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6356 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6357 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6358} 6359 6360defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6361 VMVNv4i16, VMVNv8i16, i32>; 6362defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6363 VMVNv4i16, VMVNv8i16, i64>; 6364defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6365 VMVNv2i32, VMVNv4i32, i64>; 6366// TODO: add "VMOV <-> VMVN" conversion for cases like 6367// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6368// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6369 6370// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6371// require zero cycles to execute so they should be used wherever possible for 6372// setting a register to zero. 6373 6374// Even without these pseudo-insts we would probably end up with the correct 6375// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6376// since they are sometimes rather expensive (in general). 6377 6378let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6379 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6380 [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], 6381 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6382 Requires<[HasZCZ]>; 6383 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6384 [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], 6385 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6386 Requires<[HasZCZ]>; 6387} 6388 6389// VMOV : Vector Get Lane (move scalar to ARM core register) 6390 6391def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6392 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6393 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6394 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6395 imm:$lane))]> { 6396 let Inst{21} = lane{2}; 6397 let Inst{6-5} = lane{1-0}; 6398} 6399def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6400 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6401 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6402 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6403 imm:$lane))]> { 6404 let Inst{21} = lane{1}; 6405 let Inst{6} = lane{0}; 6406} 6407def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6408 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6409 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6410 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6411 imm:$lane))]> { 6412 let Inst{21} = lane{2}; 6413 let Inst{6-5} = lane{1-0}; 6414} 6415def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6416 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6417 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6418 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6419 imm:$lane))]> { 6420 let Inst{21} = lane{1}; 6421 let Inst{6} = lane{0}; 6422} 6423def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6424 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6425 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6426 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6427 imm:$lane))]>, 6428 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6429 let Inst{21} = lane{0}; 6430} 6431let Predicates = [HasNEON] in { 6432// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6433def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6434 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6435 (DSubReg_i8_reg imm:$lane))), 6436 (SubReg_i8_lane imm:$lane))>; 6437def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6438 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6439 (DSubReg_i16_reg imm:$lane))), 6440 (SubReg_i16_lane imm:$lane))>; 6441def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6442 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6443 (DSubReg_i8_reg imm:$lane))), 6444 (SubReg_i8_lane imm:$lane))>; 6445def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6446 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6447 (DSubReg_i16_reg imm:$lane))), 6448 (SubReg_i16_lane imm:$lane))>; 6449def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane), 6450 (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src, 6451 (DSubReg_i16_reg imm:$lane))), 6452 (SubReg_i16_lane imm:$lane))>; 6453def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane), 6454 (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>; 6455def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane), 6456 (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src, 6457 (DSubReg_i16_reg imm:$lane))), 6458 (SubReg_i16_lane imm:$lane))>; 6459def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane), 6460 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>; 6461} 6462def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6463 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6464 (DSubReg_i32_reg imm:$lane))), 6465 (SubReg_i32_lane imm:$lane))>, 6466 Requires<[HasNEON, HasFastVGETLNi32]>; 6467def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6468 (COPY_TO_REGCLASS 6469 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6470 Requires<[HasNEON, HasSlowVGETLNi32]>; 6471def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6472 (COPY_TO_REGCLASS 6473 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6474 Requires<[HasNEON, HasSlowVGETLNi32]>; 6475let Predicates = [HasNEON] in { 6476def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6477 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6478 (SSubReg_f32_reg imm:$src2))>; 6479def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6480 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6481 (SSubReg_f32_reg imm:$src2))>; 6482//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6483// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6484def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6485 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6486} 6487 6488multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> { 6489 def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane), 6490 (EXTRACT_SUBREG 6491 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6492 (SSubReg_f16_reg imm_even:$lane))>; 6493 def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane), 6494 (EXTRACT_SUBREG 6495 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6496 (SSubReg_f16_reg imm_even:$lane))>; 6497} 6498 6499multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> { 6500 def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane), 6501 (COPY_TO_REGCLASS 6502 (VMOVH (EXTRACT_SUBREG 6503 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6504 (SSubReg_f16_reg imm_odd:$lane))), 6505 HPR)>; 6506 def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane), 6507 (COPY_TO_REGCLASS 6508 (VMOVH (EXTRACT_SUBREG 6509 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6510 (SSubReg_f16_reg imm_odd:$lane))), 6511 HPR)>; 6512} 6513 6514let Predicates = [HasNEON] in { 6515 defm : ExtractEltEvenF16<v4f16, v8f16>; 6516 defm : ExtractEltOddF16VMOVH<v4f16, v8f16>; 6517} 6518 6519let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in { 6520 // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes 6521 defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>; 6522} 6523 6524let Predicates = [HasBF16, HasNEON] in { 6525 defm : ExtractEltEvenF16<v4bf16, v8bf16>; 6526 6527 // Otherwise, if VMOVH is not available resort to extracting the odd lane 6528 // into a GPR and then moving to HPR 6529 def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane), 6530 (COPY_TO_REGCLASS 6531 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane), 6532 HPR)>; 6533 6534 def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane), 6535 (COPY_TO_REGCLASS 6536 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6537 (DSubReg_i16_reg imm:$lane))), 6538 (SubReg_i16_lane imm:$lane)), 6539 HPR)>; 6540} 6541 6542// VMOV : Vector Set Lane (move ARM core register to scalar) 6543 6544let Constraints = "$src1 = $V" in { 6545def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6546 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6547 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6548 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6549 GPR:$R, imm:$lane))]> { 6550 let Inst{21} = lane{2}; 6551 let Inst{6-5} = lane{1-0}; 6552} 6553def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6554 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6555 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6556 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6557 GPR:$R, imm:$lane))]> { 6558 let Inst{21} = lane{1}; 6559 let Inst{6} = lane{0}; 6560} 6561def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6562 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6563 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6564 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6565 GPR:$R, imm:$lane))]>, 6566 Requires<[HasVFP2]> { 6567 let Inst{21} = lane{0}; 6568 // This instruction is equivalent as 6569 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6570 let isInsertSubreg = 1; 6571} 6572} 6573 6574// TODO: for odd lanes we could optimize this a bit by using the VINS 6575// FullFP16 instruction when it is available 6576multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> { 6577 def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6578 (VT4 (VSETLNi16 DPR:$src1, 6579 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>; 6580 def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6581 (VT8 (INSERT_SUBREG QPR:$src1, 6582 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6583 (DSubReg_i16_reg imm:$lane))), 6584 (COPY_TO_REGCLASS HPR:$src2, GPR), 6585 (SubReg_i16_lane imm:$lane))), 6586 (DSubReg_i16_reg imm:$lane)))>; 6587} 6588 6589let Predicates = [HasNEON] in { 6590def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6591 (v16i8 (INSERT_SUBREG QPR:$src1, 6592 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6593 (DSubReg_i8_reg imm:$lane))), 6594 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6595 (DSubReg_i8_reg imm:$lane)))>; 6596def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6597 (v8i16 (INSERT_SUBREG QPR:$src1, 6598 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6599 (DSubReg_i16_reg imm:$lane))), 6600 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6601 (DSubReg_i16_reg imm:$lane)))>; 6602def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6603 (v4i32 (INSERT_SUBREG QPR:$src1, 6604 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6605 (DSubReg_i32_reg imm:$lane))), 6606 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6607 (DSubReg_i32_reg imm:$lane)))>; 6608 6609def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6610 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6611 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6612def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6613 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6614 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6615 6616defm : InsertEltF16<f16, v4f16, v8f16>; 6617 6618def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6619 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6620 6621def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6622 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6623def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6624 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6625def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6626 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6627 6628def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))), 6629 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6630def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))), 6631 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6632 6633def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6634 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6635def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6636 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6637def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6638 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6639 6640def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6641 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6642 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6643 dsub_0)>; 6644def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6645 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6646 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6647 dsub_0)>; 6648def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6649 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6650 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6651 dsub_0)>; 6652} 6653 6654let Predicates = [HasNEON, HasBF16] in 6655defm : InsertEltF16<bf16, v4bf16, v8bf16>; 6656 6657// VDUP : Vector Duplicate (from ARM core register to all elements) 6658 6659class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6660 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6661 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6662 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6663class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6664 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6665 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6666 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6667 6668def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6669def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6670def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6671 Requires<[HasNEON, HasFastVDUP32]>; 6672def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6673def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6674def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6675 6676// ARMvdup patterns for uarchs with fast VDUP.32. 6677def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6678 Requires<[HasNEON,HasFastVDUP32]>; 6679def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6680 Requires<[HasNEON]>; 6681 6682// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6683def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6684 Requires<[HasNEON,HasSlowVDUP32]>; 6685def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6686 Requires<[HasNEON,HasSlowVDUP32]>; 6687 6688// VDUP : Vector Duplicate Lane (from scalar to all elements) 6689 6690class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6691 ValueType Ty, Operand IdxTy> 6692 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6693 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6694 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6695 6696class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6697 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6698 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6699 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6700 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6701 VectorIndex32:$lane)))]>; 6702 6703// Inst{19-16} is partially specified depending on the element size. 6704 6705def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6706 bits<3> lane; 6707 let Inst{19-17} = lane{2-0}; 6708} 6709def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6710 bits<2> lane; 6711 let Inst{19-18} = lane{1-0}; 6712} 6713def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6714 bits<1> lane; 6715 let Inst{19} = lane{0}; 6716} 6717def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6718 bits<3> lane; 6719 let Inst{19-17} = lane{2-0}; 6720} 6721def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6722 bits<2> lane; 6723 let Inst{19-18} = lane{1-0}; 6724} 6725def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6726 bits<1> lane; 6727 let Inst{19} = lane{0}; 6728} 6729 6730let Predicates = [HasNEON] in { 6731def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6732 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6733 6734def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6735 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6736 6737def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6738 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6739 6740def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6741 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6742 (DSubReg_i8_reg imm:$lane))), 6743 (SubReg_i8_lane imm:$lane)))>; 6744def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6745 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6746 (DSubReg_i16_reg imm:$lane))), 6747 (SubReg_i16_lane imm:$lane)))>; 6748def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6749 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6750 (DSubReg_i16_reg imm:$lane))), 6751 (SubReg_i16_lane imm:$lane)))>; 6752def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6753 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6754 (DSubReg_i32_reg imm:$lane))), 6755 (SubReg_i32_lane imm:$lane)))>; 6756def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6757 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6758 (DSubReg_i32_reg imm:$lane))), 6759 (SubReg_i32_lane imm:$lane)))>; 6760 6761def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))), 6762 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6763 (f16 HPR:$src), ssub_0), (i32 0)))>; 6764def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6765 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6766 SPR:$src, ssub_0), (i32 0)))>; 6767def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6768 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6769 SPR:$src, ssub_0), (i32 0)))>; 6770def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))), 6771 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6772 (f16 HPR:$src), ssub_0), (i32 0)))>; 6773} 6774 6775let Predicates = [HasNEON, HasBF16] in { 6776def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)), 6777 (VDUPLN16d DPR:$Vm, imm:$lane)>; 6778 6779def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)), 6780 (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src, 6781 (DSubReg_i16_reg imm:$lane))), 6782 (SubReg_i16_lane imm:$lane)))>; 6783 6784def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))), 6785 (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6786 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6787def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))), 6788 (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6789 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6790} 6791 6792// VMOVN : Vector Narrowing Move 6793defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6794 "vmovn", "i", trunc>; 6795// VQMOVN : Vector Saturating Narrowing Move 6796defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6797 "vqmovn", "s", int_arm_neon_vqmovns>; 6798defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6799 "vqmovn", "u", int_arm_neon_vqmovnu>; 6800defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6801 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6802// VMOVL : Vector Lengthening Move 6803defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6804defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6805 6806let Predicates = [HasNEON] in { 6807def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6808def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6809def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6810} 6811 6812// Vector Conversions. 6813 6814// VCVT : Vector Convert Between Floating-Point and Integers 6815def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6816 v2i32, v2f32, fp_to_sint>; 6817def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6818 v2i32, v2f32, fp_to_uint>; 6819def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6820 v2f32, v2i32, sint_to_fp>; 6821def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6822 v2f32, v2i32, uint_to_fp>; 6823 6824def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6825 v4i32, v4f32, fp_to_sint>; 6826def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6827 v4i32, v4f32, fp_to_uint>; 6828def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6829 v4f32, v4i32, sint_to_fp>; 6830def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6831 v4f32, v4i32, uint_to_fp>; 6832 6833def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6834 v4i16, v4f16, fp_to_sint>, 6835 Requires<[HasNEON, HasFullFP16]>; 6836def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6837 v4i16, v4f16, fp_to_uint>, 6838 Requires<[HasNEON, HasFullFP16]>; 6839def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6840 v4f16, v4i16, sint_to_fp>, 6841 Requires<[HasNEON, HasFullFP16]>; 6842def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6843 v4f16, v4i16, uint_to_fp>, 6844 Requires<[HasNEON, HasFullFP16]>; 6845 6846def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6847 v8i16, v8f16, fp_to_sint>, 6848 Requires<[HasNEON, HasFullFP16]>; 6849def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6850 v8i16, v8f16, fp_to_uint>, 6851 Requires<[HasNEON, HasFullFP16]>; 6852def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6853 v8f16, v8i16, sint_to_fp>, 6854 Requires<[HasNEON, HasFullFP16]>; 6855def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6856 v8f16, v8i16, uint_to_fp>, 6857 Requires<[HasNEON, HasFullFP16]>; 6858 6859// VCVT{A, N, P, M} 6860multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6861 SDPatternOperator IntU> { 6862 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6863 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6864 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6865 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6866 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6867 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6868 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6869 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6870 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6871 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6872 "s16.f16", v4i16, v4f16, IntS>, 6873 Requires<[HasV8, HasNEON, HasFullFP16]>; 6874 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6875 "s16.f16", v8i16, v8f16, IntS>, 6876 Requires<[HasV8, HasNEON, HasFullFP16]>; 6877 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6878 "u16.f16", v4i16, v4f16, IntU>, 6879 Requires<[HasV8, HasNEON, HasFullFP16]>; 6880 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6881 "u16.f16", v8i16, v8f16, IntU>, 6882 Requires<[HasV8, HasNEON, HasFullFP16]>; 6883 } 6884} 6885 6886defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6887defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6888defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6889defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6890 6891// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6892let DecoderMethod = "DecodeVCVTD" in { 6893def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6894 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6895def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6896 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6897def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6898 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6899def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6900 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6901let Predicates = [HasNEON, HasFullFP16] in { 6902def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6903 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6904def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6905 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6906def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6907 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6908def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6909 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6910} // Predicates = [HasNEON, HasFullFP16] 6911} 6912 6913let DecoderMethod = "DecodeVCVTQ" in { 6914def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6915 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6916def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6917 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6918def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6919 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6920def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6921 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6922let Predicates = [HasNEON, HasFullFP16] in { 6923def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6924 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6925def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6926 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6927def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6928 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6929def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6930 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6931} // Predicates = [HasNEON, HasFullFP16] 6932} 6933 6934def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6935 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6936def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6937 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6938def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6939 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6940def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6941 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6942 6943def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6944 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6945def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6946 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6947def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6948 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6949def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6950 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6951 6952def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6953 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6954def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6955 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6956def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6957 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6958def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6959 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6960 6961def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6962 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6963def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6964 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6965def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6966 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6967def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6968 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6969 6970 6971// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6972def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6973 IIC_VUNAQ, "vcvt", "f16.f32", 6974 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6975 Requires<[HasNEON, HasFP16]>; 6976def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6977 IIC_VUNAQ, "vcvt", "f32.f16", 6978 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6979 Requires<[HasNEON, HasFP16]>; 6980 6981// Vector Reverse. 6982 6983// VREV64 : Vector Reverse elements within 64-bit doublewords 6984 6985class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6986 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6987 (ins DPR:$Vm), IIC_VMOVD, 6988 OpcodeStr, Dt, "$Vd, $Vm", "", 6989 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6990class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6991 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6992 (ins QPR:$Vm), IIC_VMOVQ, 6993 OpcodeStr, Dt, "$Vd, $Vm", "", 6994 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6995 6996def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6997def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6998def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6999let Predicates = [HasNEON] in { 7000def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 7001} 7002 7003def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 7004def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 7005def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 7006 7007let Predicates = [HasNEON] in { 7008 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), 7009 (VREV64q32 QPR:$Vm)>; 7010 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), 7011 (VREV64q16 QPR:$Vm)>; 7012 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), 7013 (VREV64d16 DPR:$Vm)>; 7014} 7015 7016// VREV32 : Vector Reverse elements within 32-bit words 7017 7018class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7019 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 7020 (ins DPR:$Vm), IIC_VMOVD, 7021 OpcodeStr, Dt, "$Vd, $Vm", "", 7022 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 7023class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7024 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 7025 (ins QPR:$Vm), IIC_VMOVQ, 7026 OpcodeStr, Dt, "$Vd, $Vm", "", 7027 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 7028 7029def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 7030def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 7031 7032def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 7033def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 7034 7035let Predicates = [HasNEON] in { 7036 def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), 7037 (VREV32q16 QPR:$Vm)>; 7038 def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), 7039 (VREV32d16 DPR:$Vm)>; 7040} 7041 7042// VREV16 : Vector Reverse elements within 16-bit halfwords 7043 7044class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7045 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 7046 (ins DPR:$Vm), IIC_VMOVD, 7047 OpcodeStr, Dt, "$Vd, $Vm", "", 7048 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 7049class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7050 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 7051 (ins QPR:$Vm), IIC_VMOVQ, 7052 OpcodeStr, Dt, "$Vd, $Vm", "", 7053 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 7054 7055def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 7056def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 7057 7058// Other Vector Shuffles. 7059 7060// Aligned extractions: really just dropping registers 7061 7062class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 7063 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 7064 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 7065 Requires<[HasNEON]>; 7066 7067def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 7068 7069def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 7070 7071def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 7072 7073def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 7074 7075def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 7076 7077def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16 7078 7079// VEXT : Vector Extract 7080 7081 7082// All of these have a two-operand InstAlias. 7083let TwoOperandAliasConstraint = "$Vn = $Vd" in { 7084class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 7085 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 7086 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 7087 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 7088 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 7089 (Ty DPR:$Vm), imm:$index)))]> { 7090 bits<3> index; 7091 let Inst{11} = 0b0; 7092 let Inst{10-8} = index{2-0}; 7093} 7094 7095class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 7096 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 7097 (ins QPR:$Vn, QPR:$Vm, immTy:$index), NVExtFrm, 7098 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 7099 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 7100 (Ty QPR:$Vm), imm:$index)))]> { 7101 bits<4> index; 7102 let Inst{11-8} = index{3-0}; 7103} 7104} 7105 7106def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 7107 let Inst{10-8} = index{2-0}; 7108} 7109def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 7110 let Inst{10-9} = index{1-0}; 7111 let Inst{8} = 0b0; 7112} 7113let Predicates = [HasNEON] in { 7114def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 7115 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 7116} 7117 7118def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 7119 let Inst{10} = index{0}; 7120 let Inst{9-8} = 0b00; 7121} 7122let Predicates = [HasNEON] in { 7123def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 7124 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 7125} 7126 7127def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 7128 let Inst{11-8} = index{3-0}; 7129} 7130def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 7131 let Inst{11-9} = index{2-0}; 7132 let Inst{8} = 0b0; 7133} 7134let Predicates = [HasNEON] in { 7135def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 7136 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 7137} 7138 7139def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 7140 let Inst{11-10} = index{1-0}; 7141 let Inst{9-8} = 0b00; 7142} 7143def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 7144 let Inst{11} = index{0}; 7145 let Inst{10-8} = 0b000; 7146} 7147let Predicates = [HasNEON] in { 7148def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 7149 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 7150} 7151 7152// VTRN : Vector Transpose 7153 7154def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 7155def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 7156def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 7157 7158def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 7159def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 7160def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 7161 7162// VUZP : Vector Unzip (Deinterleave) 7163 7164def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 7165def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 7166// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7167def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 7168 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7169 7170def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 7171def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 7172def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 7173 7174// VZIP : Vector Zip (Interleave) 7175 7176def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 7177def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 7178// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7179def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 7180 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7181 7182def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 7183def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 7184def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 7185 7186// Vector Table Lookup and Table Extension. 7187 7188// VTBL : Vector Table Lookup 7189let DecoderMethod = "DecodeTBLInstruction" in { 7190def VTBL1 7191 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 7192 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 7193 "vtbl", "8", "$Vd, $Vn, $Vm", "", 7194 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 7195 7196let hasExtraSrcRegAllocReq = 1 in { 7197def VTBL2 7198 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 7199 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 7200 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7201def VTBL3 7202 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 7203 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 7204 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7205def VTBL4 7206 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 7207 (ins VecListFourD:$Vn, DPR:$Vm), 7208 NVTBLFrm, IIC_VTB4, 7209 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7210} // hasExtraSrcRegAllocReq = 1 7211 7212def VTBL3Pseudo 7213 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 7214def VTBL4Pseudo 7215 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 7216 7217// VTBX : Vector Table Extension 7218def VTBX1 7219 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 7220 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 7221 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 7222 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 7223 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 7224let hasExtraSrcRegAllocReq = 1 in { 7225def VTBX2 7226 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 7227 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 7228 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 7229def VTBX3 7230 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 7231 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 7232 NVTBLFrm, IIC_VTBX3, 7233 "vtbx", "8", "$Vd, $Vn, $Vm", 7234 "$orig = $Vd", []>; 7235def VTBX4 7236 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 7237 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 7238 "vtbx", "8", "$Vd, $Vn, $Vm", 7239 "$orig = $Vd", []>; 7240} // hasExtraSrcRegAllocReq = 1 7241 7242def VTBX3Pseudo 7243 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7244 IIC_VTBX3, "$orig = $dst", []>; 7245def VTBX4Pseudo 7246 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7247 IIC_VTBX4, "$orig = $dst", []>; 7248} // DecoderMethod = "DecodeTBLInstruction" 7249 7250let Predicates = [HasNEON] in { 7251def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 7252 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7253 v8i8:$Vn1, dsub_1), 7254 v8i8:$Vm))>; 7255def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7256 v8i8:$Vm)), 7257 (v8i8 (VTBX2 v8i8:$orig, 7258 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7259 v8i8:$Vn1, dsub_1), 7260 v8i8:$Vm))>; 7261 7262def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7263 v8i8:$Vn2, v8i8:$Vm)), 7264 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7265 v8i8:$Vn1, dsub_1, 7266 v8i8:$Vn2, dsub_2, 7267 (v8i8 (IMPLICIT_DEF)), dsub_3), 7268 v8i8:$Vm))>; 7269def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7270 v8i8:$Vn2, v8i8:$Vm)), 7271 (v8i8 (VTBX3Pseudo v8i8:$orig, 7272 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7273 v8i8:$Vn1, dsub_1, 7274 v8i8:$Vn2, dsub_2, 7275 (v8i8 (IMPLICIT_DEF)), dsub_3), 7276 v8i8:$Vm))>; 7277 7278def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7279 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7280 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7281 v8i8:$Vn1, dsub_1, 7282 v8i8:$Vn2, dsub_2, 7283 v8i8:$Vn3, dsub_3), 7284 v8i8:$Vm))>; 7285def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7286 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7287 (v8i8 (VTBX4Pseudo v8i8:$orig, 7288 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7289 v8i8:$Vn1, dsub_1, 7290 v8i8:$Vn2, dsub_2, 7291 v8i8:$Vn3, dsub_3), 7292 v8i8:$Vm))>; 7293} 7294 7295// VRINT : Vector Rounding 7296multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7297 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7298 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7299 !strconcat("vrint", op), "f32", 7300 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7301 let Inst{9-7} = op9_7; 7302 } 7303 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7304 !strconcat("vrint", op), "f32", 7305 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7306 let Inst{9-7} = op9_7; 7307 } 7308 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7309 !strconcat("vrint", op), "f16", 7310 v4f16, v4f16, Int>, 7311 Requires<[HasV8, HasNEON, HasFullFP16]> { 7312 let Inst{9-7} = op9_7; 7313 } 7314 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7315 !strconcat("vrint", op), "f16", 7316 v8f16, v8f16, Int>, 7317 Requires<[HasV8, HasNEON, HasFullFP16]> { 7318 let Inst{9-7} = op9_7; 7319 } 7320 } 7321 7322 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7323 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7324 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7325 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7326 let Predicates = [HasNEON, HasFullFP16] in { 7327 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7328 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7329 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7330 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7331 } 7332} 7333 7334defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7335defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7336defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7337defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7338defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7339defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7340 7341// Cryptography instructions 7342let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7343 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7344 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7345 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7346 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7347 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7348 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7349 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7350 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7351 SDPatternOperator Int> 7352 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7353 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7354 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7355 SDPatternOperator Int> 7356 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7357 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7358 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7359 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7360 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7361} 7362 7363let Predicates = [HasV8, HasAES] in { 7364def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7365def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7366def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7367def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7368} 7369 7370let Predicates = [HasV8, HasSHA2] in { 7371def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7372def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7373def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7374def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7375def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7376def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7377def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7378def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7379def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7380def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7381} 7382 7383let Predicates = [HasNEON] in { 7384def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7385 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7386 (SHA1H (SUBREG_TO_REG (i64 0), 7387 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7388 ssub_0)), 7389 ssub_0)), GPR)>; 7390 7391def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7392 (SHA1C v4i32:$hash_abcd, 7393 (SUBREG_TO_REG (i64 0), 7394 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7395 ssub_0), 7396 v4i32:$wk)>; 7397 7398def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7399 (SHA1M v4i32:$hash_abcd, 7400 (SUBREG_TO_REG (i64 0), 7401 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7402 ssub_0), 7403 v4i32:$wk)>; 7404 7405def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7406 (SHA1P v4i32:$hash_abcd, 7407 (SUBREG_TO_REG (i64 0), 7408 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7409 ssub_0), 7410 v4i32:$wk)>; 7411} 7412 7413//===----------------------------------------------------------------------===// 7414// NEON instructions for single-precision FP math 7415//===----------------------------------------------------------------------===// 7416 7417class N2VSPat<SDNode OpNode, NeonI Inst> 7418 : NEONFPPat<(f32 (OpNode SPR:$a)), 7419 (EXTRACT_SUBREG 7420 (v2f32 (COPY_TO_REGCLASS (Inst 7421 (INSERT_SUBREG 7422 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7423 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7424 7425class N3VSPat<SDNode OpNode, NeonI Inst> 7426 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7427 (EXTRACT_SUBREG 7428 (v2f32 (COPY_TO_REGCLASS (Inst 7429 (INSERT_SUBREG 7430 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7431 SPR:$a, ssub_0), 7432 (INSERT_SUBREG 7433 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7434 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7435 7436class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7437 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7438 (EXTRACT_SUBREG 7439 (v4f16 (COPY_TO_REGCLASS (Inst 7440 (INSERT_SUBREG 7441 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7442 HPR:$a, ssub_0), 7443 (INSERT_SUBREG 7444 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7445 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7446 7447class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7448 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7449 (EXTRACT_SUBREG 7450 (v2f32 (COPY_TO_REGCLASS (Inst 7451 (INSERT_SUBREG 7452 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7453 SPR:$acc, ssub_0), 7454 (INSERT_SUBREG 7455 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7456 SPR:$a, ssub_0), 7457 (INSERT_SUBREG 7458 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7459 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7460 7461class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7462 : NEONFPPat<(f32 (OpNode GPR:$a)), 7463 (f32 (EXTRACT_SUBREG 7464 (v2f32 (Inst 7465 (INSERT_SUBREG 7466 (v2f32 (IMPLICIT_DEF)), 7467 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7468 ssub_0))>; 7469class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7470 : NEONFPPat<(i32 (OpNode SPR:$a)), 7471 (i32 (EXTRACT_SUBREG 7472 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7473 SPR:$a, ssub_0))), 7474 ssub_0))>; 7475 7476def : N3VSPat<fadd, VADDfd>; 7477def : N3VSPat<fsub, VSUBfd>; 7478def : N3VSPat<fmul, VMULfd>; 7479def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7480 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7481def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7482 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7483def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7484 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7485def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7486 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7487def : N2VSPat<fabs, VABSfd>; 7488def : N2VSPat<fneg, VNEGfd>; 7489def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7490def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7491def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7492def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7493def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7494def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7495def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7496def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7497 7498// NEON doesn't have any f64 conversions, so provide patterns to make 7499// sure the VFP conversions match when extracting from a vector. 7500def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7501 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7502def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7503 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7504def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7505 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7506def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7507 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7508 7509 7510// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7511def : Pat<(f32 (bitconvert GPR:$a)), 7512 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7513 Requires<[HasNEON, DontUseVMOVSR]>; 7514def : Pat<(arm_vmovsr GPR:$a), 7515 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7516 Requires<[HasNEON, DontUseVMOVSR]>; 7517 7518//===----------------------------------------------------------------------===// 7519// Non-Instruction Patterns or Endianess - Revert Patterns 7520//===----------------------------------------------------------------------===// 7521 7522// bit_convert 7523// 64 bit conversions 7524let Predicates = [HasNEON] in { 7525def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7526def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7527 7528def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7529def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7530 7531def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7532def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7533 7534def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>; 7535def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>; 7536 7537// 128 bit conversions 7538def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7539def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7540 7541def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7542def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7543 7544def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7545def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7546 7547def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>; 7548def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>; 7549} 7550 7551let Predicates = [IsLE,HasNEON] in { 7552 // 64 bit conversions 7553 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7554 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7555 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7556 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>; 7557 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7558 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7559 7560 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7561 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7562 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7563 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>; 7564 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7565 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7566 7567 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7568 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7569 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7570 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>; 7571 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7572 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7573 7574 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7575 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7576 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7577 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>; 7578 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7579 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7580 7581 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7582 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7583 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7584 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7585 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7586 7587 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>; 7588 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>; 7589 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>; 7590 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>; 7591 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>; 7592 7593 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7594 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7595 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7596 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7597 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7598 7599 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7600 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7601 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7602 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7603 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7604 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>; 7605 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7606 7607 // 128 bit conversions 7608 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7609 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7610 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7611 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>; 7612 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7613 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7614 7615 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7616 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7617 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7618 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>; 7619 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7620 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7621 7622 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7623 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7624 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7625 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>; 7626 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7627 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7628 7629 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7630 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7631 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7632 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>; 7633 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7634 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7635 7636 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7637 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7638 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7639 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7640 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7641 7642 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>; 7643 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>; 7644 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>; 7645 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>; 7646 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>; 7647 7648 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7649 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7650 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7651 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7652 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7653 7654 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7655 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7656 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7657 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7658 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7659 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>; 7660 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7661} 7662 7663let Predicates = [IsBE,HasNEON] in { 7664 // 64 bit conversions 7665 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7666 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7667 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7668 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7669 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7670 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7671 7672 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7673 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7674 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7675 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7676 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7677 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7678 7679 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7680 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7681 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7682 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7683 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7684 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7685 7686 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7687 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7688 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7689 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7690 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7691 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7692 7693 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7694 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7695 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7696 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7697 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7698 7699 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7700 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7701 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7702 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7703 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7704 7705 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7706 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7707 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7708 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7709 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7710 7711 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7712 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7713 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7714 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7715 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7716 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>; 7717 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7718 7719 // 128 bit conversions 7720 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7721 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7722 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7723 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7724 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7725 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7726 7727 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7728 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7729 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7730 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7731 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7732 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7733 7734 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7735 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7736 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7737 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7738 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7739 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7740 7741 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7742 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7743 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7744 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7745 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7746 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7747 7748 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7749 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7750 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7751 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7752 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7753 7754 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7755 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7756 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7757 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7758 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7759 7760 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7761 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7762 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7763 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7764 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7765 7766 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7767 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7768 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7769 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7770 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7771 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>; 7772 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7773} 7774 7775let Predicates = [HasNEON] in { 7776 // Here we match the specific SDNode type 'ARMVectorRegCastImpl' 7777 // rather than the more general 'ARMVectorRegCast' which would also 7778 // match some bitconverts. If we use the latter in cases where the 7779 // input and output types are the same, the bitconvert gets elided 7780 // and we end up generating a nonsense match of nothing. 7781 7782 foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7783 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7784 def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>; 7785 7786 foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7787 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7788 def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>; 7789} 7790 7791// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7792let Predicates = [IsBE,HasNEON] in { 7793def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7794 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7795def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7796 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7797def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7798 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7799def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7800 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7801} 7802 7803// Fold extracting an element out of a v2i32 into a vfp register. 7804def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7805 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7806 Requires<[HasNEON]>; 7807 7808// Vector lengthening move with load, matching extending loads. 7809 7810// extload, zextload and sextload for a standard lengthening load. Example: 7811// Lengthen_Single<"8", "i16", "8"> = 7812// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7813// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7814// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7815multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7816 let AddedComplexity = 10 in { 7817 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7818 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7819 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7820 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7821 Requires<[HasNEON]>; 7822 7823 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7824 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7825 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7826 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7827 Requires<[HasNEON]>; 7828 7829 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7830 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7831 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7832 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7833 Requires<[HasNEON]>; 7834 } 7835} 7836 7837// extload, zextload and sextload for a lengthening load which only uses 7838// half the lanes available. Example: 7839// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7840// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7841// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7842// (f64 (IMPLICIT_DEF)), (i32 0))), 7843// dsub_0)>; 7844multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7845 string InsnLanes, string InsnTy> { 7846 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7847 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7848 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7849 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7850 dsub_0)>, 7851 Requires<[HasNEON]>; 7852 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7853 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7854 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7855 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7856 dsub_0)>, 7857 Requires<[HasNEON]>; 7858 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7859 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7860 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7861 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7862 dsub_0)>, 7863 Requires<[HasNEON]>; 7864} 7865 7866// The following class definition is basically a copy of the 7867// Lengthen_HalfSingle definition above, however with an additional parameter 7868// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7869// data loaded by VLD1LN into proper vector format in big endian mode. 7870multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7871 string InsnLanes, string InsnTy, string RevLanes> { 7872 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7873 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7874 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7875 (!cast<Instruction>("VREV32d" # RevLanes) 7876 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7877 dsub_0)>, 7878 Requires<[HasNEON]>; 7879 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7880 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7881 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7882 (!cast<Instruction>("VREV32d" # RevLanes) 7883 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7884 dsub_0)>, 7885 Requires<[HasNEON]>; 7886 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7887 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7888 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7889 (!cast<Instruction>("VREV32d" # RevLanes) 7890 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7891 dsub_0)>, 7892 Requires<[HasNEON]>; 7893} 7894 7895// extload, zextload and sextload for a lengthening load followed by another 7896// lengthening load, to quadruple the initial length. 7897// 7898// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7899// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7900// (EXTRACT_SUBREG (VMOVLuv4i32 7901// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7902// (f64 (IMPLICIT_DEF)), 7903// (i32 0))), 7904// dsub_0)), 7905// dsub_0)>; 7906multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7907 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7908 string Insn2Ty> { 7909 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7910 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7911 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7912 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7913 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7914 dsub_0))>, 7915 Requires<[HasNEON]>; 7916 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7917 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7918 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7919 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7920 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7921 dsub_0))>, 7922 Requires<[HasNEON]>; 7923 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7924 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7925 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7926 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7927 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7928 dsub_0))>, 7929 Requires<[HasNEON]>; 7930} 7931 7932// The following class definition is basically a copy of the 7933// Lengthen_Double definition above, however with an additional parameter 7934// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7935// data loaded by VLD1LN into proper vector format in big endian mode. 7936multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7937 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7938 string Insn2Ty, string RevLanes> { 7939 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7940 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7941 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7942 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7943 (!cast<Instruction>("VREV32d" # RevLanes) 7944 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7945 dsub_0))>, 7946 Requires<[HasNEON]>; 7947 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7948 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7949 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7950 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7951 (!cast<Instruction>("VREV32d" # RevLanes) 7952 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7953 dsub_0))>, 7954 Requires<[HasNEON]>; 7955 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7956 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7957 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7958 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7959 (!cast<Instruction>("VREV32d" # RevLanes) 7960 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7961 dsub_0))>, 7962 Requires<[HasNEON]>; 7963} 7964 7965// extload, zextload and sextload for a lengthening load followed by another 7966// lengthening load, to quadruple the initial length, but which ends up only 7967// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7968// 7969// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7970// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7971// (EXTRACT_SUBREG (VMOVLuv4i32 7972// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7973// (f64 (IMPLICIT_DEF)), (i32 0))), 7974// dsub_0)), 7975// dsub_0)>; 7976multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7977 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7978 string Insn2Ty> { 7979 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7980 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7981 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7982 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7983 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7984 dsub_0)), 7985 dsub_0)>, 7986 Requires<[HasNEON]>; 7987 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7988 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7989 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7990 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7991 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7992 dsub_0)), 7993 dsub_0)>, 7994 Requires<[HasNEON]>; 7995 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7996 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7997 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7998 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7999 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 8000 dsub_0)), 8001 dsub_0)>, 8002 Requires<[HasNEON]>; 8003} 8004 8005// The following class definition is basically a copy of the 8006// Lengthen_HalfDouble definition above, however with an additional VREV16d8 8007// instruction to convert data loaded by VLD1LN into proper vector format 8008// in big endian mode. 8009multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 8010 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 8011 string Insn2Ty> { 8012 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8013 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 8014 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 8015 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 8016 (!cast<Instruction>("VREV16d8") 8017 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8018 dsub_0)), 8019 dsub_0)>, 8020 Requires<[HasNEON]>; 8021 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8022 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 8023 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 8024 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 8025 (!cast<Instruction>("VREV16d8") 8026 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8027 dsub_0)), 8028 dsub_0)>, 8029 Requires<[HasNEON]>; 8030 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8031 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 8032 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 8033 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 8034 (!cast<Instruction>("VREV16d8") 8035 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8036 dsub_0)), 8037 dsub_0)>, 8038 Requires<[HasNEON]>; 8039} 8040 8041defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 8042defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 8043defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 8044 8045let Predicates = [HasNEON,IsLE] in { 8046 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 8047 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 8048 8049 // Double lengthening - v4i8 -> v4i16 -> v4i32 8050 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 8051 // v2i8 -> v2i16 -> v2i32 8052 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 8053 // v2i16 -> v2i32 -> v2i64 8054 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 8055} 8056 8057let Predicates = [HasNEON,IsBE] in { 8058 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 8059 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 8060 8061 // Double lengthening - v4i8 -> v4i16 -> v4i32 8062 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 8063 // v2i8 -> v2i16 -> v2i32 8064 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 8065 // v2i16 -> v2i32 -> v2i64 8066 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 8067} 8068 8069// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 8070let Predicates = [HasNEON,IsLE] in { 8071 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 8072 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8073 (VLD1LNd16 addrmode6:$addr, 8074 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8075 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 8076 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8077 (VLD1LNd16 addrmode6:$addr, 8078 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8079 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 8080 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 8081 (VLD1LNd16 addrmode6:$addr, 8082 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8083} 8084// The following patterns are basically a copy of the patterns above, 8085// however with an additional VREV16d instruction to convert data 8086// loaded by VLD1LN into proper vector format in big endian mode. 8087let Predicates = [HasNEON,IsBE] in { 8088 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 8089 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8090 (!cast<Instruction>("VREV16d8") 8091 (VLD1LNd16 addrmode6:$addr, 8092 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8093 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 8094 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8095 (!cast<Instruction>("VREV16d8") 8096 (VLD1LNd16 addrmode6:$addr, 8097 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8098 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 8099 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 8100 (!cast<Instruction>("VREV16d8") 8101 (VLD1LNd16 addrmode6:$addr, 8102 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8103} 8104 8105let Predicates = [HasNEON] in { 8106def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 8107 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8108def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8109 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8110def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8111 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8112def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 8113 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8114def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8115 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8116def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8117 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8118def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8119 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8120} 8121 8122//===----------------------------------------------------------------------===// 8123// Assembler aliases 8124// 8125 8126def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 8127 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 8128def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 8129 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 8130 8131// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 8132defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8133 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8134defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8135 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8136defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8137 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8138defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8139 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8140defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8141 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8142defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8143 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8144defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8145 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8146defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8147 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8148// ... two-operand aliases 8149defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8150 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8151defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8152 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8153defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8154 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8155defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8156 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8157defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8158 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8159defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8160 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8161// ... immediates 8162def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8163 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8164def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8165 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8166def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8167 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8168def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8169 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8170 8171 8172// VLD1 single-lane pseudo-instructions. These need special handling for 8173// the lane index that an InstAlias can't handle, so we use these instead. 8174def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 8175 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8176 pred:$p)>; 8177def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 8178 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8179 pred:$p)>; 8180def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 8181 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8182 pred:$p)>; 8183 8184def VLD1LNdWB_fixed_Asm_8 : 8185 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 8186 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8187 pred:$p)>; 8188def VLD1LNdWB_fixed_Asm_16 : 8189 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 8190 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8191 pred:$p)>; 8192def VLD1LNdWB_fixed_Asm_32 : 8193 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 8194 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8195 pred:$p)>; 8196def VLD1LNdWB_register_Asm_8 : 8197 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 8198 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8199 rGPR:$Rm, pred:$p)>; 8200def VLD1LNdWB_register_Asm_16 : 8201 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 8202 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8203 rGPR:$Rm, pred:$p)>; 8204def VLD1LNdWB_register_Asm_32 : 8205 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 8206 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8207 rGPR:$Rm, pred:$p)>; 8208 8209 8210// VST1 single-lane pseudo-instructions. These need special handling for 8211// the lane index that an InstAlias can't handle, so we use these instead. 8212def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 8213 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8214 pred:$p)>; 8215def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 8216 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8217 pred:$p)>; 8218def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 8219 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8220 pred:$p)>; 8221 8222def VST1LNdWB_fixed_Asm_8 : 8223 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 8224 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8225 pred:$p)>; 8226def VST1LNdWB_fixed_Asm_16 : 8227 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 8228 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8229 pred:$p)>; 8230def VST1LNdWB_fixed_Asm_32 : 8231 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 8232 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8233 pred:$p)>; 8234def VST1LNdWB_register_Asm_8 : 8235 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 8236 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8237 rGPR:$Rm, pred:$p)>; 8238def VST1LNdWB_register_Asm_16 : 8239 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 8240 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8241 rGPR:$Rm, pred:$p)>; 8242def VST1LNdWB_register_Asm_32 : 8243 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 8244 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8245 rGPR:$Rm, pred:$p)>; 8246 8247// VLD2 single-lane pseudo-instructions. These need special handling for 8248// the lane index that an InstAlias can't handle, so we use these instead. 8249def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 8250 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8251 pred:$p)>; 8252def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8253 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8254 pred:$p)>; 8255def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8256 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 8257def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8258 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8259 pred:$p)>; 8260def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8261 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8262 pred:$p)>; 8263 8264def VLD2LNdWB_fixed_Asm_8 : 8265 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 8266 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8267 pred:$p)>; 8268def VLD2LNdWB_fixed_Asm_16 : 8269 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8270 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8271 pred:$p)>; 8272def VLD2LNdWB_fixed_Asm_32 : 8273 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8274 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8275 pred:$p)>; 8276def VLD2LNqWB_fixed_Asm_16 : 8277 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8278 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8279 pred:$p)>; 8280def VLD2LNqWB_fixed_Asm_32 : 8281 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8282 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8283 pred:$p)>; 8284def VLD2LNdWB_register_Asm_8 : 8285 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 8286 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8287 rGPR:$Rm, pred:$p)>; 8288def VLD2LNdWB_register_Asm_16 : 8289 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8290 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8291 rGPR:$Rm, pred:$p)>; 8292def VLD2LNdWB_register_Asm_32 : 8293 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8294 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8295 rGPR:$Rm, pred:$p)>; 8296def VLD2LNqWB_register_Asm_16 : 8297 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8298 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8299 rGPR:$Rm, pred:$p)>; 8300def VLD2LNqWB_register_Asm_32 : 8301 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8302 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8303 rGPR:$Rm, pred:$p)>; 8304 8305 8306// VST2 single-lane pseudo-instructions. These need special handling for 8307// the lane index that an InstAlias can't handle, so we use these instead. 8308def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 8309 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8310 pred:$p)>; 8311def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8312 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8313 pred:$p)>; 8314def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8315 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8316 pred:$p)>; 8317def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8318 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8319 pred:$p)>; 8320def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8321 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8322 pred:$p)>; 8323 8324def VST2LNdWB_fixed_Asm_8 : 8325 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8326 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8327 pred:$p)>; 8328def VST2LNdWB_fixed_Asm_16 : 8329 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8330 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8331 pred:$p)>; 8332def VST2LNdWB_fixed_Asm_32 : 8333 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8334 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8335 pred:$p)>; 8336def VST2LNqWB_fixed_Asm_16 : 8337 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8338 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8339 pred:$p)>; 8340def VST2LNqWB_fixed_Asm_32 : 8341 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8342 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8343 pred:$p)>; 8344def VST2LNdWB_register_Asm_8 : 8345 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8346 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8347 rGPR:$Rm, pred:$p)>; 8348def VST2LNdWB_register_Asm_16 : 8349 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8350 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8351 rGPR:$Rm, pred:$p)>; 8352def VST2LNdWB_register_Asm_32 : 8353 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8354 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8355 rGPR:$Rm, pred:$p)>; 8356def VST2LNqWB_register_Asm_16 : 8357 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8358 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8359 rGPR:$Rm, pred:$p)>; 8360def VST2LNqWB_register_Asm_32 : 8361 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8362 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8363 rGPR:$Rm, pred:$p)>; 8364 8365// VLD3 all-lanes pseudo-instructions. These need special handling for 8366// the lane index that an InstAlias can't handle, so we use these instead. 8367def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8368 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8369 pred:$p)>; 8370def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8371 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8372 pred:$p)>; 8373def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8374 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8375 pred:$p)>; 8376def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8377 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8378 pred:$p)>; 8379def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8380 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8381 pred:$p)>; 8382def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8383 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8384 pred:$p)>; 8385 8386def VLD3DUPdWB_fixed_Asm_8 : 8387 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8388 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8389 pred:$p)>; 8390def VLD3DUPdWB_fixed_Asm_16 : 8391 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8392 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8393 pred:$p)>; 8394def VLD3DUPdWB_fixed_Asm_32 : 8395 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8396 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8397 pred:$p)>; 8398def VLD3DUPqWB_fixed_Asm_8 : 8399 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8400 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8401 pred:$p)>; 8402def VLD3DUPqWB_fixed_Asm_16 : 8403 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8404 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8405 pred:$p)>; 8406def VLD3DUPqWB_fixed_Asm_32 : 8407 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8408 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8409 pred:$p)>; 8410def VLD3DUPdWB_register_Asm_8 : 8411 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8412 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8413 rGPR:$Rm, pred:$p)>; 8414def VLD3DUPdWB_register_Asm_16 : 8415 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8416 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8417 rGPR:$Rm, pred:$p)>; 8418def VLD3DUPdWB_register_Asm_32 : 8419 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8420 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8421 rGPR:$Rm, pred:$p)>; 8422def VLD3DUPqWB_register_Asm_8 : 8423 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8424 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8425 rGPR:$Rm, pred:$p)>; 8426def VLD3DUPqWB_register_Asm_16 : 8427 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8428 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8429 rGPR:$Rm, pred:$p)>; 8430def VLD3DUPqWB_register_Asm_32 : 8431 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8432 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8433 rGPR:$Rm, pred:$p)>; 8434 8435 8436// VLD3 single-lane pseudo-instructions. These need special handling for 8437// the lane index that an InstAlias can't handle, so we use these instead. 8438def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8439 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8440 pred:$p)>; 8441def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8442 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8443 pred:$p)>; 8444def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8445 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8446 pred:$p)>; 8447def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8448 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8449 pred:$p)>; 8450def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8451 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8452 pred:$p)>; 8453 8454def VLD3LNdWB_fixed_Asm_8 : 8455 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8456 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8457 pred:$p)>; 8458def VLD3LNdWB_fixed_Asm_16 : 8459 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8460 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8461 pred:$p)>; 8462def VLD3LNdWB_fixed_Asm_32 : 8463 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8464 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8465 pred:$p)>; 8466def VLD3LNqWB_fixed_Asm_16 : 8467 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8468 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8469 pred:$p)>; 8470def VLD3LNqWB_fixed_Asm_32 : 8471 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8472 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8473 pred:$p)>; 8474def VLD3LNdWB_register_Asm_8 : 8475 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8476 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8477 rGPR:$Rm, pred:$p)>; 8478def VLD3LNdWB_register_Asm_16 : 8479 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8480 (ins VecListThreeDHWordIndexed:$list, 8481 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8482def VLD3LNdWB_register_Asm_32 : 8483 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8484 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8485 rGPR:$Rm, pred:$p)>; 8486def VLD3LNqWB_register_Asm_16 : 8487 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8488 (ins VecListThreeQHWordIndexed:$list, 8489 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8490def VLD3LNqWB_register_Asm_32 : 8491 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8492 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8493 rGPR:$Rm, pred:$p)>; 8494 8495// VLD3 multiple structure pseudo-instructions. These need special handling for 8496// the vector operands that the normal instructions don't yet model. 8497// FIXME: Remove these when the register classes and instructions are updated. 8498def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8499 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8500def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8501 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8502def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8503 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8504def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8505 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8506def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8507 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8508def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8509 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8510 8511def VLD3dWB_fixed_Asm_8 : 8512 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8513 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8514def VLD3dWB_fixed_Asm_16 : 8515 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8516 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8517def VLD3dWB_fixed_Asm_32 : 8518 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8519 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8520def VLD3qWB_fixed_Asm_8 : 8521 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8522 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8523def VLD3qWB_fixed_Asm_16 : 8524 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8525 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8526def VLD3qWB_fixed_Asm_32 : 8527 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8528 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8529def VLD3dWB_register_Asm_8 : 8530 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8531 (ins VecListThreeD:$list, addrmode6align64:$addr, 8532 rGPR:$Rm, pred:$p)>; 8533def VLD3dWB_register_Asm_16 : 8534 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8535 (ins VecListThreeD:$list, addrmode6align64:$addr, 8536 rGPR:$Rm, pred:$p)>; 8537def VLD3dWB_register_Asm_32 : 8538 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8539 (ins VecListThreeD:$list, addrmode6align64:$addr, 8540 rGPR:$Rm, pred:$p)>; 8541def VLD3qWB_register_Asm_8 : 8542 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8543 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8544 rGPR:$Rm, pred:$p)>; 8545def VLD3qWB_register_Asm_16 : 8546 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8547 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8548 rGPR:$Rm, pred:$p)>; 8549def VLD3qWB_register_Asm_32 : 8550 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8551 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8552 rGPR:$Rm, pred:$p)>; 8553 8554// VST3 single-lane pseudo-instructions. These need special handling for 8555// the lane index that an InstAlias can't handle, so we use these instead. 8556def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8557 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8558 pred:$p)>; 8559def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8560 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8561 pred:$p)>; 8562def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8563 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8564 pred:$p)>; 8565def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8566 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8567 pred:$p)>; 8568def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8569 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8570 pred:$p)>; 8571 8572def VST3LNdWB_fixed_Asm_8 : 8573 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8574 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8575 pred:$p)>; 8576def VST3LNdWB_fixed_Asm_16 : 8577 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8578 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8579 pred:$p)>; 8580def VST3LNdWB_fixed_Asm_32 : 8581 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8582 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8583 pred:$p)>; 8584def VST3LNqWB_fixed_Asm_16 : 8585 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8586 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8587 pred:$p)>; 8588def VST3LNqWB_fixed_Asm_32 : 8589 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8590 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8591 pred:$p)>; 8592def VST3LNdWB_register_Asm_8 : 8593 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8594 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8595 rGPR:$Rm, pred:$p)>; 8596def VST3LNdWB_register_Asm_16 : 8597 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8598 (ins VecListThreeDHWordIndexed:$list, 8599 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8600def VST3LNdWB_register_Asm_32 : 8601 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8602 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8603 rGPR:$Rm, pred:$p)>; 8604def VST3LNqWB_register_Asm_16 : 8605 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8606 (ins VecListThreeQHWordIndexed:$list, 8607 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8608def VST3LNqWB_register_Asm_32 : 8609 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8610 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8611 rGPR:$Rm, pred:$p)>; 8612 8613 8614// VST3 multiple structure pseudo-instructions. These need special handling for 8615// the vector operands that the normal instructions don't yet model. 8616// FIXME: Remove these when the register classes and instructions are updated. 8617def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8618 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8619def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8620 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8621def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8622 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8623def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8624 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8625def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8626 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8627def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8628 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8629 8630def VST3dWB_fixed_Asm_8 : 8631 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8632 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8633def VST3dWB_fixed_Asm_16 : 8634 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8635 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8636def VST3dWB_fixed_Asm_32 : 8637 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8638 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8639def VST3qWB_fixed_Asm_8 : 8640 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8641 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8642def VST3qWB_fixed_Asm_16 : 8643 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8644 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8645def VST3qWB_fixed_Asm_32 : 8646 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8647 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8648def VST3dWB_register_Asm_8 : 8649 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8650 (ins VecListThreeD:$list, addrmode6align64:$addr, 8651 rGPR:$Rm, pred:$p)>; 8652def VST3dWB_register_Asm_16 : 8653 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8654 (ins VecListThreeD:$list, addrmode6align64:$addr, 8655 rGPR:$Rm, pred:$p)>; 8656def VST3dWB_register_Asm_32 : 8657 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8658 (ins VecListThreeD:$list, addrmode6align64:$addr, 8659 rGPR:$Rm, pred:$p)>; 8660def VST3qWB_register_Asm_8 : 8661 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8662 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8663 rGPR:$Rm, pred:$p)>; 8664def VST3qWB_register_Asm_16 : 8665 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8666 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8667 rGPR:$Rm, pred:$p)>; 8668def VST3qWB_register_Asm_32 : 8669 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8670 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8671 rGPR:$Rm, pred:$p)>; 8672 8673// VLD4 all-lanes pseudo-instructions. These need special handling for 8674// the lane index that an InstAlias can't handle, so we use these instead. 8675def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8676 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8677 pred:$p)>; 8678def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8679 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8680 pred:$p)>; 8681def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8682 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8683 pred:$p)>; 8684def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8685 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8686 pred:$p)>; 8687def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8688 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8689 pred:$p)>; 8690def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8691 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8692 pred:$p)>; 8693 8694def VLD4DUPdWB_fixed_Asm_8 : 8695 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8696 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8697 pred:$p)>; 8698def VLD4DUPdWB_fixed_Asm_16 : 8699 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8700 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8701 pred:$p)>; 8702def VLD4DUPdWB_fixed_Asm_32 : 8703 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8704 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8705 pred:$p)>; 8706def VLD4DUPqWB_fixed_Asm_8 : 8707 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8708 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8709 pred:$p)>; 8710def VLD4DUPqWB_fixed_Asm_16 : 8711 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8712 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8713 pred:$p)>; 8714def VLD4DUPqWB_fixed_Asm_32 : 8715 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8716 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8717 pred:$p)>; 8718def VLD4DUPdWB_register_Asm_8 : 8719 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8720 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8721 rGPR:$Rm, pred:$p)>; 8722def VLD4DUPdWB_register_Asm_16 : 8723 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8724 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8725 rGPR:$Rm, pred:$p)>; 8726def VLD4DUPdWB_register_Asm_32 : 8727 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8728 (ins VecListFourDAllLanes:$list, 8729 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8730def VLD4DUPqWB_register_Asm_8 : 8731 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8732 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8733 rGPR:$Rm, pred:$p)>; 8734def VLD4DUPqWB_register_Asm_16 : 8735 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8736 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8737 rGPR:$Rm, pred:$p)>; 8738def VLD4DUPqWB_register_Asm_32 : 8739 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8740 (ins VecListFourQAllLanes:$list, 8741 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8742 8743 8744// VLD4 single-lane pseudo-instructions. These need special handling for 8745// the lane index that an InstAlias can't handle, so we use these instead. 8746def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8747 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8748 pred:$p)>; 8749def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8750 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8751 pred:$p)>; 8752def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8753 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8754 pred:$p)>; 8755def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8756 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8757 pred:$p)>; 8758def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8759 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8760 pred:$p)>; 8761 8762def VLD4LNdWB_fixed_Asm_8 : 8763 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8764 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8765 pred:$p)>; 8766def VLD4LNdWB_fixed_Asm_16 : 8767 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8768 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8769 pred:$p)>; 8770def VLD4LNdWB_fixed_Asm_32 : 8771 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8772 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8773 pred:$p)>; 8774def VLD4LNqWB_fixed_Asm_16 : 8775 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8776 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8777 pred:$p)>; 8778def VLD4LNqWB_fixed_Asm_32 : 8779 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8780 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8781 pred:$p)>; 8782def VLD4LNdWB_register_Asm_8 : 8783 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8784 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8785 rGPR:$Rm, pred:$p)>; 8786def VLD4LNdWB_register_Asm_16 : 8787 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8788 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8789 rGPR:$Rm, pred:$p)>; 8790def VLD4LNdWB_register_Asm_32 : 8791 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8792 (ins VecListFourDWordIndexed:$list, 8793 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8794def VLD4LNqWB_register_Asm_16 : 8795 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8796 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8797 rGPR:$Rm, pred:$p)>; 8798def VLD4LNqWB_register_Asm_32 : 8799 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8800 (ins VecListFourQWordIndexed:$list, 8801 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8802 8803 8804 8805// VLD4 multiple structure pseudo-instructions. These need special handling for 8806// the vector operands that the normal instructions don't yet model. 8807// FIXME: Remove these when the register classes and instructions are updated. 8808def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8809 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8810 pred:$p)>; 8811def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8812 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8813 pred:$p)>; 8814def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8815 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8816 pred:$p)>; 8817def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8818 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8819 pred:$p)>; 8820def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8821 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8822 pred:$p)>; 8823def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8824 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8825 pred:$p)>; 8826 8827def VLD4dWB_fixed_Asm_8 : 8828 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8829 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8830 pred:$p)>; 8831def VLD4dWB_fixed_Asm_16 : 8832 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8833 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8834 pred:$p)>; 8835def VLD4dWB_fixed_Asm_32 : 8836 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8837 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8838 pred:$p)>; 8839def VLD4qWB_fixed_Asm_8 : 8840 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8841 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8842 pred:$p)>; 8843def VLD4qWB_fixed_Asm_16 : 8844 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8845 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8846 pred:$p)>; 8847def VLD4qWB_fixed_Asm_32 : 8848 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8849 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8850 pred:$p)>; 8851def VLD4dWB_register_Asm_8 : 8852 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8853 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8854 rGPR:$Rm, pred:$p)>; 8855def VLD4dWB_register_Asm_16 : 8856 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8857 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8858 rGPR:$Rm, pred:$p)>; 8859def VLD4dWB_register_Asm_32 : 8860 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8861 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8862 rGPR:$Rm, pred:$p)>; 8863def VLD4qWB_register_Asm_8 : 8864 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8865 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8866 rGPR:$Rm, pred:$p)>; 8867def VLD4qWB_register_Asm_16 : 8868 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8869 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8870 rGPR:$Rm, pred:$p)>; 8871def VLD4qWB_register_Asm_32 : 8872 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8873 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8874 rGPR:$Rm, pred:$p)>; 8875 8876// VST4 single-lane pseudo-instructions. These need special handling for 8877// the lane index that an InstAlias can't handle, so we use these instead. 8878def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8879 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8880 pred:$p)>; 8881def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8882 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8883 pred:$p)>; 8884def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8885 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8886 pred:$p)>; 8887def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8888 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8889 pred:$p)>; 8890def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8891 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8892 pred:$p)>; 8893 8894def VST4LNdWB_fixed_Asm_8 : 8895 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8896 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8897 pred:$p)>; 8898def VST4LNdWB_fixed_Asm_16 : 8899 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8900 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8901 pred:$p)>; 8902def VST4LNdWB_fixed_Asm_32 : 8903 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8904 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8905 pred:$p)>; 8906def VST4LNqWB_fixed_Asm_16 : 8907 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8908 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8909 pred:$p)>; 8910def VST4LNqWB_fixed_Asm_32 : 8911 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8912 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8913 pred:$p)>; 8914def VST4LNdWB_register_Asm_8 : 8915 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8916 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8917 rGPR:$Rm, pred:$p)>; 8918def VST4LNdWB_register_Asm_16 : 8919 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8920 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8921 rGPR:$Rm, pred:$p)>; 8922def VST4LNdWB_register_Asm_32 : 8923 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8924 (ins VecListFourDWordIndexed:$list, 8925 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8926def VST4LNqWB_register_Asm_16 : 8927 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8928 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8929 rGPR:$Rm, pred:$p)>; 8930def VST4LNqWB_register_Asm_32 : 8931 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8932 (ins VecListFourQWordIndexed:$list, 8933 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8934 8935 8936// VST4 multiple structure pseudo-instructions. These need special handling for 8937// the vector operands that the normal instructions don't yet model. 8938// FIXME: Remove these when the register classes and instructions are updated. 8939def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8940 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8941 pred:$p)>; 8942def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8943 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8944 pred:$p)>; 8945def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8946 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8947 pred:$p)>; 8948def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8949 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8950 pred:$p)>; 8951def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8952 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8953 pred:$p)>; 8954def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8955 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8956 pred:$p)>; 8957 8958def VST4dWB_fixed_Asm_8 : 8959 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8960 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8961 pred:$p)>; 8962def VST4dWB_fixed_Asm_16 : 8963 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8964 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8965 pred:$p)>; 8966def VST4dWB_fixed_Asm_32 : 8967 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8968 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8969 pred:$p)>; 8970def VST4qWB_fixed_Asm_8 : 8971 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8972 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8973 pred:$p)>; 8974def VST4qWB_fixed_Asm_16 : 8975 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8976 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8977 pred:$p)>; 8978def VST4qWB_fixed_Asm_32 : 8979 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8980 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8981 pred:$p)>; 8982def VST4dWB_register_Asm_8 : 8983 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8984 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8985 rGPR:$Rm, pred:$p)>; 8986def VST4dWB_register_Asm_16 : 8987 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8988 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8989 rGPR:$Rm, pred:$p)>; 8990def VST4dWB_register_Asm_32 : 8991 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8992 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8993 rGPR:$Rm, pred:$p)>; 8994def VST4qWB_register_Asm_8 : 8995 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8996 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8997 rGPR:$Rm, pred:$p)>; 8998def VST4qWB_register_Asm_16 : 8999 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 9000 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 9001 rGPR:$Rm, pred:$p)>; 9002def VST4qWB_register_Asm_32 : 9003 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 9004 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 9005 rGPR:$Rm, pred:$p)>; 9006 9007// VMOV/VMVN takes an optional datatype suffix 9008defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 9009 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 9010defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 9011 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 9012 9013defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 9014 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 9015defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 9016 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 9017 9018// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 9019// D-register versions. 9020def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 9021 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9022def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 9023 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9024def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 9025 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9026def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 9027 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9028def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 9029 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9030def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 9031 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9032def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 9033 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9034let Predicates = [HasNEON, HasFullFP16] in 9035def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 9036 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9037// Q-register versions. 9038def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 9039 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9040def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 9041 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9042def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 9043 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9044def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 9045 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9046def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 9047 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9048def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 9049 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9050def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 9051 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9052let Predicates = [HasNEON, HasFullFP16] in 9053def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 9054 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9055 9056// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 9057// D-register versions. 9058def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 9059 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9060def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 9061 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9062def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 9063 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9064def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 9065 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9066def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 9067 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9068def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 9069 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9070def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 9071 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9072let Predicates = [HasNEON, HasFullFP16] in 9073def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 9074 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9075// Q-register versions. 9076def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 9077 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9078def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 9079 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9080def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 9081 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9082def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 9083 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9084def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 9085 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9086def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 9087 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9088def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 9089 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9090let Predicates = [HasNEON, HasFullFP16] in 9091def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 9092 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9093 9094// VSWP allows, but does not require, a type suffix. 9095defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 9096 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 9097defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 9098 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 9099 9100// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 9101defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 9102 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9103defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 9104 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9105defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 9106 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9107defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 9108 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9109defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 9110 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9111defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 9112 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9113 9114// "vmov Rd, #-imm" can be handled via "vmvn". 9115def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9116 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9117def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9118 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9119def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9120 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9121def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9122 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9123 9124// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 9125// these should restrict to just the Q register variants, but the register 9126// classes are enough to match correctly regardless, so we keep it simple 9127// and just use MnemonicAlias. 9128def : NEONMnemonicAlias<"vbicq", "vbic">; 9129def : NEONMnemonicAlias<"vandq", "vand">; 9130def : NEONMnemonicAlias<"veorq", "veor">; 9131def : NEONMnemonicAlias<"vorrq", "vorr">; 9132 9133def : NEONMnemonicAlias<"vmovq", "vmov">; 9134def : NEONMnemonicAlias<"vmvnq", "vmvn">; 9135// Explicit versions for floating point so that the FPImm variants get 9136// handled early. The parser gets confused otherwise. 9137def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 9138def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 9139 9140def : NEONMnemonicAlias<"vaddq", "vadd">; 9141def : NEONMnemonicAlias<"vsubq", "vsub">; 9142 9143def : NEONMnemonicAlias<"vminq", "vmin">; 9144def : NEONMnemonicAlias<"vmaxq", "vmax">; 9145 9146def : NEONMnemonicAlias<"vmulq", "vmul">; 9147 9148def : NEONMnemonicAlias<"vabsq", "vabs">; 9149 9150def : NEONMnemonicAlias<"vshlq", "vshl">; 9151def : NEONMnemonicAlias<"vshrq", "vshr">; 9152 9153def : NEONMnemonicAlias<"vcvtq", "vcvt">; 9154 9155def : NEONMnemonicAlias<"vcleq", "vcle">; 9156def : NEONMnemonicAlias<"vceqq", "vceq">; 9157 9158def : NEONMnemonicAlias<"vzipq", "vzip">; 9159def : NEONMnemonicAlias<"vswpq", "vswp">; 9160 9161def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 9162def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 9163 9164 9165// Alias for loading floating point immediates that aren't representable 9166// using the vmov.f32 encoding but the bitpattern is representable using 9167// the .i32 encoding. 9168def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9169 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9170def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9171 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9172 9173// ARMv8.6a BFloat16 instructions. 9174let Predicates = [HasBF16, HasNEON] in { 9175class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6, 9176 dag oops, dag iops, list<dag> pattern> 9177 : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops, 9178 N3RegFrm, IIC_VDOTPROD, "", "", pattern> 9179{ 9180 let DecoderNamespace = "VFPV8"; 9181} 9182 9183class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy> 9184 : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst), 9185 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9186 [(set (AccumTy RegTy:$dst), 9187 (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9188 (InputTy RegTy:$Vn), 9189 (InputTy RegTy:$Vm)))]> { 9190 let Constraints = "$dst = $Vd"; 9191 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9192 let DecoderNamespace = "VFPV8"; 9193} 9194 9195multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, 9196 ValueType InputTy, dag RHS> { 9197 9198 def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst), 9199 (ins RegTy:$Vd, RegTy:$Vn, 9200 DPR_VFP2:$Vm, VectorIndex32:$lane), []> { 9201 bit lane; 9202 let Inst{5} = lane; 9203 let Constraints = "$dst = $Vd"; 9204 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane"); 9205 let DecoderNamespace = "VFPV8"; 9206 } 9207 9208 def : Pat< 9209 (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9210 (InputTy RegTy:$Vn), 9211 (InputTy (bitconvert (AccumTy 9212 (ARMvduplane (AccumTy RegTy:$Vm), 9213 VectorIndex32:$lane)))))), 9214 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 9215} 9216 9217def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; 9218def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; 9219 9220defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; 9221defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 9222 9223class BF16MM<bit Q, RegisterClass RegTy, 9224 string opc> 9225 : N3Vnp<0b11000, 0b00, 0b1100, Q, 0, 9226 (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9227 N3RegFrm, IIC_VDOTPROD, "", "", 9228 [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd), 9229 (v8bf16 QPR:$Vn), 9230 (v8bf16 QPR:$Vm)))]> { 9231 let Constraints = "$dst = $Vd"; 9232 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9233 let DecoderNamespace = "VFPV8"; 9234} 9235 9236def VMMLA : BF16MM<1, QPR, "vmmla">; 9237 9238class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode> 9239 : N3VCP8<0b00, 0b11, T, 1, 9240 (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), 9241 NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", 9242 [(set (v4f32 QPR:$dst), 9243 (OpNode (v4f32 QPR:$Vd), 9244 (v8bf16 QPR:$Vn), 9245 (v8bf16 QPR:$Vm)))]> { 9246 let Constraints = "$dst = $Vd"; 9247 let DecoderNamespace = "VFPV8"; 9248} 9249 9250def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>; 9251def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>; 9252 9253multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> { 9254 def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst), 9255 (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 9256 IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> { 9257 bits<2> idx; 9258 let Inst{5} = idx{1}; 9259 let Inst{3} = idx{0}; 9260 let Constraints = "$dst = $Vd"; 9261 let DecoderNamespace = "VFPV8"; 9262 } 9263 9264 def : Pat< 9265 (v4f32 (OpNode (v4f32 QPR:$Vd), 9266 (v8bf16 QPR:$Vn), 9267 (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), 9268 VectorIndex16:$lane)))), 9269 (!cast<Instruction>(NAME) QPR:$Vd, 9270 QPR:$Vn, 9271 (EXTRACT_SUBREG QPR:$Vm, 9272 (DSubReg_i16_reg VectorIndex16:$lane)), 9273 (SubReg_i16_lane VectorIndex16:$lane))>; 9274} 9275 9276defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>; 9277defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>; 9278 9279def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0, 9280 (outs DPR:$Vd), (ins QPR:$Vm), 9281 NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>; 9282} 9283// End of BFloat16 instructions 9284