1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Fujitsu A64FX processors. 10// 11//===----------------------------------------------------------------------===// 12 13def A64FXModel : SchedMachineModel { 14 let IssueWidth = 6; // 6 micro-ops dispatched at a time. 15 let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. 16 let LoadLatency = 5; // Optimistic load latency. 17 let MispredictPenalty = 12; // Extra cycles for mispredicted branch. 18 // Determined via a mix of micro-arch details and experimentation. 19 let LoopMicroOpBufferSize = 128; 20 let PostRAScheduler = 1; // Using PostRA sched. 21 let CompleteModel = 1; 22 23 list<Predicate> UnsupportedFeatures = 24 [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, 25 HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1, 26 HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32, 27 HasSMEFA64]; 28 29 let FullInstRWOverlapCheck = 0; 30} 31 32let SchedModel = A64FXModel in { 33 34// Define the issue ports. 35 36// A64FXIP* 37 38// Port 0 39def A64FXIPFLA : ProcResource<1>; 40 41// Port 1 42def A64FXIPPR : ProcResource<1>; 43 44// Port 2 45def A64FXIPEXA : ProcResource<1>; 46 47// Port 3 48def A64FXIPFLB : ProcResource<1>; 49 50// Port 4 51def A64FXIPEXB : ProcResource<1>; 52 53// Port 5 54def A64FXIPEAGA : ProcResource<1>; 55 56// Port 6 57def A64FXIPEAGB : ProcResource<1>; 58 59// Port 7 60def A64FXIPBR : ProcResource<1>; 61 62// Define groups for the functional units on each issue port. Each group 63// created will be used by a WriteRes later on. 64 65def A64FXGI7 : ProcResGroup<[A64FXIPBR]>; 66 67def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>; 68 69def A64FXGI1 : ProcResGroup<[A64FXIPPR]>; 70 71def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>; 72 73def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>; 74 75def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>; 76 77def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>; 78 79def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>; 80 81def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>; 82 83def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>; 84 85def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>; 86 87def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>; 88 89def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>; 90 91def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>; 92 93def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB, 94 A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>; 95 96def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> { 97 let Latency = 1; 98} 99 100def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 101 let Latency = 2; 102} 103 104def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 105 let Latency = 4; 106} 107 108def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 109 let Latency = 6; 110} 111 112def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 113 let Latency = 8; 114} 115 116def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 117 let Latency = 9; 118} 119 120def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> { 121 let Latency = 3; 122} 123 124def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { 125 let Latency = 5; 126} 127 128def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 129 let Latency = 4; 130} 131 132def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 133 let Latency = 6; 134} 135 136def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 137 let Latency = 4; 138} 139 140def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 141 let Latency = 8; 142} 143 144def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 145 let Latency = 9; 146} 147 148def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 149 let Latency = 10; 150} 151 152def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 153 let Latency = 12; 154} 155 156def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 157 let Latency = 20; 158} 159 160def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 161 let Latency = 5; 162} 163 164def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 165 let Latency = 11; 166} 167 168def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> { 169 let Latency = 5; 170} 171 172def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 173 let Latency = 1; 174} 175 176def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 177 let Latency = 2; 178} 179 180def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> { 181 let Latency = 4; 182 let NumMicroOps = 4; 183} 184 185def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 186 let Latency = 1; 187} 188 189def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 190 let Latency = 5; 191} 192 193def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 194 let Latency = 8; 195} 196 197def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 198 let Latency = 11; 199} 200 201def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> { 202 let Latency = 5; 203 let NumMicroOps = 2; 204} 205 206def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> { 207 let Latency = 5; 208 let NumMicroOps = 3; 209} 210 211def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> { 212 let Latency = 5; 213 let NumMicroOps = 2; 214} 215 216def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> { 217 let Latency = 8; 218 let NumMicroOps = 2; 219} 220 221def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> { 222 let Latency = 11; 223 let NumMicroOps = 2; 224 225} 226 227def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> { 228 let Latency = 8; 229 let NumMicroOps = 3; 230} 231 232def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> { 233 let Latency = 11; 234 let NumMicroOps = 3; 235} 236 237def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> { 238 let Latency = 8; 239 let NumMicroOps = 4; 240} 241 242def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> { 243 let Latency = 11; 244 let NumMicroOps = 4; 245} 246 247def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> { 248 let Latency = 8; 249 let NumMicroOps = 2; 250} 251 252def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> { 253 let Latency = 11; 254 let NumMicroOps = 2; 255} 256 257def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> { 258 let Latency = 8; 259 let NumMicroOps = 3; 260} 261 262def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> { 263 let Latency = 11; 264 let NumMicroOps = 3; 265} 266 267def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> { 268 let Latency = 8; 269 let NumMicroOps = 4; 270} 271 272def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> { 273 let Latency = 11; 274 let NumMicroOps = 4; 275} 276 277def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> { 278 let Latency = 8; 279 let NumMicroOps = 5; 280} 281 282def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> { 283 let Latency = 11; 284 let NumMicroOps = 5; 285} 286 287def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> { 288 let Latency = 8; 289 let NumMicroOps = 2; 290} 291 292def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> { 293 let Latency = 8; 294 let NumMicroOps = 3; 295} 296 297def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> { 298 let Latency = 8; 299 let NumMicroOps = 4; 300} 301 302def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> { 303 let Latency = 8; 304 let NumMicroOps = 5; 305} 306 307def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> { 308 let Latency = 8; 309 let NumMicroOps = 6; 310} 311 312def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> { 313 let Latency = 8; 314 let NumMicroOps = 7; 315} 316 317def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> { 318 let Latency = 8; 319 let NumMicroOps = 8; 320} 321 322def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> { 323 let Latency = 8; 324 let NumMicroOps = 9; 325} 326 327def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> { 328 let Latency = 1; 329} 330 331def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> { 332 let Latency = 10; 333} 334 335def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> { 336 let Latency = 14; 337} 338 339def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> { 340 let Latency = 12; 341} 342 343def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> { 344 let Latency = 14; 345} 346 347def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> { 348 let Latency = 14; 349} 350 351def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> { 352 let Latency = 6; 353} 354 355def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> { 356 let Latency = 8; 357} 358 359def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> { 360 let Latency = 10; 361} 362 363def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> { 364 let Latency = 12; 365 let NumMicroOps = 6; 366} 367 368def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> { 369 let Latency = 14; 370 let NumMicroOps = 6; 371} 372 373def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> { 374 let Latency = 9; 375} 376 377def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> { 378 let Latency = 8; 379} 380 381 382def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> { 383 let Latency = 8; 384 let NumMicroOps = 3; 385} 386 387def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> { 388 let Latency = 8; 389 let NumMicroOps = 2; 390} 391 392def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> { 393 let Latency = 10; 394 let NumMicroOps = 3; 395} 396 397def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> { 398 let Latency = 10; 399 let NumMicroOps = 2; 400} 401 402 403def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> { 404 let Latency = 10; 405 let NumMicroOps = 3; 406} 407 408def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> { 409 let Latency = 15; 410 let NumMicroOps = 2; 411} 412 413def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> { 414 let Latency = 15; 415 let NumMicroOps = 3; 416} 417 418def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> { 419 let Latency = 10; 420 let NumMicroOps = 3; 421} 422 423def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> { 424 let Latency = 10; 425 let NumMicroOps = 2; 426} 427 428def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> { 429 let Latency = 15; 430 let NumMicroOps = 2; 431} 432 433def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> { 434 let Latency = 14; 435 let NumMicroOps = 7; 436} 437 438def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> { 439 let Latency = 5; 440} 441 442def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> { 443 let Latency = 10; 444} 445 446def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> { 447 let Latency = 9; 448} 449 450def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> { 451 let Latency = 12; 452} 453 454def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> { 455 let Latency = 25; 456} 457 458def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> { 459 let Latency = 10; 460 let NumMicroOps = 3; 461} 462 463def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> { 464 let Latency = 10; 465 let NumMicroOps = 5; 466} 467 468def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> { 469 let Latency = 10; 470 let NumMicroOps = 7; 471} 472 473def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> { 474 let Latency = 10; 475 let NumMicroOps = 9; 476} 477 478def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> { 479 let Latency = 0; 480} 481 482def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> { 483 let Latency = 0; 484} 485 486def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> { 487 let Latency = 0; 488} 489 490def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> { 491 let Latency = 0; 492} 493 494def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> { 495 let Latency = 0; 496} 497 498def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> { 499 let Latency = 0; 500} 501 502def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> { 503 let Latency = 0; 504} 505 506def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> { 507 let Latency = 0; 508} 509 510def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> { 511 let Latency = 0; 512} 513 514def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> { 515 let Latency = 0; 516} 517 518def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> { 519 let Latency = 1; 520} 521 522def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> { 523 let Latency = 1; 524} 525 526def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> { 527 let Latency = 1; 528} 529 530def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> { 531 let Latency = 1; 532} 533 534def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> { 535 let Latency = 7; 536} 537 538// Define commonly used read types. 539 540// No forwarding is provided for these types. 541def : ReadAdvance<ReadI, 0>; 542def : ReadAdvance<ReadISReg, 0>; 543def : ReadAdvance<ReadIEReg, 0>; 544def : ReadAdvance<ReadIM, 0>; 545def : ReadAdvance<ReadIMA, 0>; 546def : ReadAdvance<ReadID, 0>; 547def : ReadAdvance<ReadExtrHi, 0>; 548def : ReadAdvance<ReadAdrBase, 0>; 549def : ReadAdvance<ReadST, 0>; 550def : ReadAdvance<ReadVLD, 0>; 551 552//===----------------------------------------------------------------------===// 553// 3. Instruction Tables. 554 555//--- 556// 3.1 Branch Instructions 557//--- 558 559// Branch, immed 560// Branch and link, immed 561// Compare and branch 562def : WriteRes<WriteBr, [A64FXGI7]> { 563 let Latency = 1; 564} 565 566// Branch, register 567// Branch and link, register != LR 568// Branch and link, register = LR 569def : WriteRes<WriteBrReg, [A64FXGI7]> { 570 let Latency = 1; 571} 572 573def : WriteRes<WriteSys, []> { let Latency = 1; } 574def : WriteRes<WriteBarrier, []> { let Latency = 1; } 575def : WriteRes<WriteHint, []> { let Latency = 1; } 576 577def : WriteRes<WriteAtomic, []> { 578 let Latency = 4; 579} 580 581//--- 582// Branch 583//--- 584def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>; 585def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>; 586def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>; 587def : InstRW<[A64FXWrite_1Cyc_GI7], 588 (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; 589 590//--- 591// 3.2 Arithmetic and Logical Instructions 592// 3.3 Move and Shift Instructions 593//--- 594 595// ALU, basic 596// Conditional compare 597// Conditional select 598// Address generation 599def : WriteRes<WriteI, [A64FXGI2456]> { 600 let Latency = 1; 601} 602 603def : InstRW<[WriteI], 604 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 605 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 606 "ADC(W|X)r", 607 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 608 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 609 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 610 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 611 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 612 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 613 "CSINC(W|X)r", "CSINV(W|X)r", 614 "CSNEG(W|X)r")>; 615 616def : InstRW<[WriteI], (instrs COPY)>; 617 618// ALU, extend and/or shift 619def : WriteRes<WriteISReg, [A64FXGI2456]> { 620 let Latency = 2; 621} 622 623def : InstRW<[WriteISReg], 624 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 625 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 626 "ADC(W|X)r", 627 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 628 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 629 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 630 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 631 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 632 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 633 "CSINC(W|X)r", "CSINV(W|X)r", 634 "CSNEG(W|X)r")>; 635 636def : WriteRes<WriteIEReg, [A64FXGI2456]> { 637 let Latency = 1; 638} 639 640def : InstRW<[WriteIEReg], 641 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 642 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 643 "ADC(W|X)r", 644 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 645 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 646 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 647 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 648 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 649 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 650 "CSINC(W|X)r", "CSINV(W|X)r", 651 "CSNEG(W|X)r")>; 652 653// Move immed 654def : WriteRes<WriteImm, [A64FXGI2456]> { 655 let Latency = 1; 656} 657 658def : InstRW<[A64FXWrite_1Cyc_GI2456], 659 (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 660 661def : InstRW<[A64FXWrite_2Cyc_GI24], 662 (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; 663 664// Variable shift 665def : WriteRes<WriteIS, [A64FXGI2456]> { 666 let Latency = 1; 667} 668 669//--- 670// 3.4 Divide and Multiply Instructions 671//--- 672 673// Divide, W-form 674def : WriteRes<WriteID32, [A64FXGI4]> { 675 let Latency = 39; 676 let ReleaseAtCycles = [39]; 677} 678 679// Divide, X-form 680def : WriteRes<WriteID64, [A64FXGI4]> { 681 let Latency = 23; 682 let ReleaseAtCycles = [23]; 683} 684 685// Multiply accumulate, W-form 686def : WriteRes<WriteIM32, [A64FXGI2456]> { 687 let Latency = 5; 688} 689 690// Multiply accumulate, X-form 691def : WriteRes<WriteIM64, [A64FXGI2456]> { 692 let Latency = 5; 693} 694 695def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; 696def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; 697def : InstRW<[A64FXWrite_MADDL], 698 (instregex "(S|U)(MADDL|MSUBL)rrr")>; 699 700def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; 701def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; 702 703// Bitfield extract, two reg 704def : WriteRes<WriteExtr, [A64FXGI2456]> { 705 let Latency = 1; 706} 707 708// Multiply high 709def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>; 710 711// Miscellaneous Data-Processing Instructions 712// Bitfield extract 713def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>; 714 715// Bitifield move - basic 716def : InstRW<[A64FXWrite_1Cyc_GI24], 717 (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; 718 719// Bitfield move, insert 720def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>; 721def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>; 722 723// Count leading 724def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$", 725 "^CLZ(W|X)r$")>; 726 727// Reverse bits 728def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>; 729 730// Cryptography Extensions 731def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>; 732def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>; 733def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>; 734def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>; 735def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>; 736def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>; 737def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>; 738def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>; 739def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>; 740 741// CRC Instructions 742def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>; 743def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>; 744def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>; 745 746def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>; 747def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>; 748def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>; 749 750// Reverse bits/bytes 751// NOTE: Handled by WriteI. 752 753//--- 754// 3.6 Load Instructions 755// 3.10 FP Load Instructions 756//--- 757 758// Load register, literal 759// Load register, unscaled immed 760// Load register, immed unprivileged 761// Load register, unsigned immed 762def : WriteRes<WriteLD, [A64FXGI56]> { 763 let Latency = 4; 764} 765 766// Load register, immed post-index 767// NOTE: Handled by WriteLD, WriteI. 768// Load register, immed pre-index 769// NOTE: Handled by WriteLD, WriteAdr. 770def : WriteRes<WriteAdr, [A64FXGI2456]> { 771 let Latency = 1; 772} 773 774// Load pair, immed offset, normal 775// Load pair, immed offset, signed words, base != SP 776// Load pair, immed offset signed words, base = SP 777// LDP only breaks into *one* LS micro-op. Thus 778// the resources are handled by WriteLD. 779def : WriteRes<WriteLDHi, []> { 780 let Latency = 5; 781} 782 783// Load register offset, basic 784// Load register, register offset, scale by 4/8 785// Load register, register offset, scale by 2 786// Load register offset, extend 787// Load register, register offset, extend, scale by 4/8 788// Load register, register offset, extend, scale by 2 789def A64FXWriteLDIdx : SchedWriteVariant<[ 790 SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>, 791 SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>; 792def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>; 793 794def A64FXReadAdrBase : SchedReadVariant<[ 795 SchedVar<ScaledIdxPred, [ReadDefault]>, 796 SchedVar<NoSchedPred, [ReadDefault]>]>; 797def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>; 798 799// Load pair, immed pre-index, normal 800// Load pair, immed pre-index, signed words 801// Load pair, immed post-index, normal 802// Load pair, immed post-index, signed words 803// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. 804 805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>; 806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>; 807def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>; 808def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>; 809def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>; 810 811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>; 812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>; 813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>; 814def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>; 815def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>; 816def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>; 817 818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>; 819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>; 820def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>; 821def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>; 822def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>; 823 824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>; 825def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>; 826def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>; 827def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>; 828 829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>; 830def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>; 831def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>; 832def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>; 833 834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>; 835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>; 836def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>; 837def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>; 838def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>; 839 840def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 841 (instrs LDPDpre)>; 842def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 843 (instrs LDPQpre)>; 844def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 845 (instrs LDPSpre)>; 846def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 847 (instrs LDPWpre)>; 848def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 849 (instrs LDPWpre)>; 850 851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 855def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 856def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 857def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 858 859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>; 860def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>; 861def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>; 862def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>; 863 864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>; 865def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>; 866def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>; 867def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>; 868 869def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>; 870def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>; 871 872def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>; 873def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>; 874 875def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 876 (instrs LDPDpost)>; 877def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 878 (instrs LDPQpost)>; 879def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 880 (instrs LDPSpost)>; 881def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 882 (instrs LDPWpost)>; 883def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 884 (instrs LDPXpost)>; 885 886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 890def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 891def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 892def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 893 894def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 895 (instrs LDPDpre)>; 896def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 897 (instrs LDPQpre)>; 898def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 899 (instrs LDPSpre)>; 900def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 901 (instrs LDPWpre)>; 902def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 903 (instrs LDPXpre)>; 904 905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 909def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 910def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 911def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 912 913def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 914 (instrs LDPDpost)>; 915def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 916 (instrs LDPQpost)>; 917def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 918 (instrs LDPSpost)>; 919def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 920 (instrs LDPWpost)>; 921def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 922 (instrs LDPXpost)>; 923 924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 928def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 929def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 930def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 931 932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>; 933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>; 934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>; 935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>; 936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>; 937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>; 938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>; 939def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>; 940def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>; 941def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>; 942 943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>; 944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>; 945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>; 946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>; 947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>; 948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>; 949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>; 950def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>; 951def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>; 952def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>; 953 954def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 955 (instrs LDRBroW)>; 956def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 957 (instrs LDRBroW)>; 958def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 959 (instrs LDRDroW)>; 960def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 961 (instrs LDRHroW)>; 962def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 963 (instrs LDRHHroW)>; 964def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 965 (instrs LDRQroW)>; 966def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 967 (instrs LDRSroW)>; 968def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 969 (instrs LDRSHWroW)>; 970def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 971 (instrs LDRSHXroW)>; 972def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 973 (instrs LDRWroW)>; 974def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 975 (instrs LDRXroW)>; 976def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 977 (instrs LDRBroX)>; 978def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 979 (instrs LDRDroX)>; 980def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 981 (instrs LDRHroX)>; 982def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 983 (instrs LDRHHroX)>; 984def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 985 (instrs LDRQroX)>; 986def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 987 (instrs LDRSroX)>; 988def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 989 (instrs LDRSHWroX)>; 990def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 991 (instrs LDRSHXroX)>; 992def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 993 (instrs LDRWroX)>; 994def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 995 (instrs LDRXroX)>; 996 997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>; 998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>; 999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>; 1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>; 1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>; 1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>; 1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>; 1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>; 1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>; 1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>; 1007def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>; 1008def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>; 1009def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>; 1010 1011//--- 1012// Prefetch 1013//--- 1014def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>; 1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>; 1016def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>; 1017def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>; 1018def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>; 1019 1020//-- 1021// 3.7 Store Instructions 1022// 3.11 FP Store Instructions 1023//-- 1024 1025// Store register, unscaled immed 1026// Store register, immed unprivileged 1027// Store register, unsigned immed 1028def : WriteRes<WriteST, [A64FXGI56]> { 1029 let Latency = 1; 1030} 1031 1032// Store register, immed post-index 1033// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase 1034 1035// Store register, immed pre-index 1036// NOTE: Handled by WriteAdr, WriteST 1037 1038// Store register, register offset, basic 1039// Store register, register offset, scaled by 4/8 1040// Store register, register offset, scaled by 2 1041// Store register, register offset, extend 1042// Store register, register offset, extend, scale by 4/8 1043// Store register, register offset, extend, scale by 1 1044def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> { 1045 let Latency = 1; 1046} 1047 1048// Store pair, immed offset, W-form 1049// Store pair, immed offset, X-form 1050def : WriteRes<WriteSTP, [A64FXGI56]> { 1051 let Latency = 1; 1052} 1053 1054// Store pair, immed post-index, W-form 1055// Store pair, immed post-index, X-form 1056// Store pair, immed pre-index, W-form 1057// Store pair, immed pre-index, X-form 1058// NOTE: Handled by WriteAdr, WriteSTP. 1059 1060def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>; 1061def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>; 1062def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>; 1063def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>; 1064def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>; 1065def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>; 1066def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>; 1067def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>; 1068def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>; 1069 1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>; 1071def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>; 1072def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>; 1073def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>; 1074 1075def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>; 1076def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>; 1077def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>; 1078def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>; 1079 1080def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>; 1081def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>; 1082def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>; 1083def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>; 1084 1085def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1086def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1087def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1088def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1089def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1090def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1091def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1092def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1093def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1094def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1095def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1096def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1097 1098def : InstRW<[A64FXWrite_STP01], 1099 (instrs STPDpre, STPDpost)>; 1100def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1101 (instrs STPDpre, STPDpost)>; 1102def : InstRW<[A64FXWrite_STP01], 1103 (instrs STPDpre, STPDpost)>; 1104def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1105 (instrs STPDpre, STPDpost)>; 1106def : InstRW<[A64FXWrite_STP01], 1107 (instrs STPQpre, STPQpost)>; 1108def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1109 (instrs STPQpre, STPQpost)>; 1110def : InstRW<[A64FXWrite_STP01], 1111 (instrs STPQpre, STPQpost)>; 1112def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1113 (instrs STPQpre, STPQpost)>; 1114def : InstRW<[A64FXWrite_STP01], 1115 (instrs STPSpre, STPSpost)>; 1116def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1117 (instrs STPSpre, STPSpost)>; 1118def : InstRW<[A64FXWrite_STP01], 1119 (instrs STPSpre, STPSpost)>; 1120def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1121 (instrs STPSpre, STPSpost)>; 1122def : InstRW<[A64FXWrite_STP01], 1123 (instrs STPWpre, STPWpost)>; 1124def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1125 (instrs STPWpre, STPWpost)>; 1126def : InstRW<[A64FXWrite_STP01], 1127 (instrs STPWpre, STPWpost)>; 1128def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1129 (instrs STPWpre, STPWpost)>; 1130def : InstRW<[A64FXWrite_STP01], 1131 (instrs STPXpre, STPXpost)>; 1132def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1133 (instrs STPXpre, STPXpost)>; 1134def : InstRW<[A64FXWrite_STP01], 1135 (instrs STPXpre, STPXpost)>; 1136def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1137 (instrs STPXpre, STPXpost)>; 1138 1139def : InstRW<[WriteAdr, A64FXWrite_STP01], 1140 (instrs STRBpre, STRBpost)>; 1141def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1142 (instrs STRBpre, STRBpost)>; 1143def : InstRW<[WriteAdr, A64FXWrite_STP01], 1144 (instrs STRBpre, STRBpost)>; 1145def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1146 (instrs STRBpre, STRBpost)>; 1147def : InstRW<[WriteAdr, A64FXWrite_STP01], 1148 (instrs STRBBpre, STRBBpost)>; 1149def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1150 (instrs STRBBpre, STRBBpost)>; 1151def : InstRW<[WriteAdr, A64FXWrite_STP01], 1152 (instrs STRBBpre, STRBBpost)>; 1153def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1154 (instrs STRBBpre, STRBBpost)>; 1155def : InstRW<[WriteAdr, A64FXWrite_STP01], 1156 (instrs STRDpre, STRDpost)>; 1157def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1158 (instrs STRDpre, STRDpost)>; 1159def : InstRW<[WriteAdr, A64FXWrite_STP01], 1160 (instrs STRDpre, STRDpost)>; 1161def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1162 (instrs STRDpre, STRDpost)>; 1163def : InstRW<[WriteAdr, A64FXWrite_STP01], 1164 (instrs STRHpre, STRHpost)>; 1165def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1166 (instrs STRHpre, STRHpost)>; 1167def : InstRW<[WriteAdr, A64FXWrite_STP01], 1168 (instrs STRHpre, STRHpost)>; 1169def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1170 (instrs STRHpre, STRHpost)>; 1171def : InstRW<[WriteAdr, A64FXWrite_STP01], 1172 (instrs STRHHpre, STRHHpost)>; 1173def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1174 (instrs STRHHpre, STRHHpost)>; 1175def : InstRW<[WriteAdr, A64FXWrite_STP01], 1176 (instrs STRHHpre, STRHHpost)>; 1177def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1178 (instrs STRHHpre, STRHHpost)>; 1179def : InstRW<[WriteAdr, A64FXWrite_STP01], 1180 (instrs STRQpre, STRQpost)>; 1181def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1182 (instrs STRQpre, STRQpost)>; 1183def : InstRW<[WriteAdr, A64FXWrite_STP01], 1184 (instrs STRQpre, STRQpost)>; 1185def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1186 (instrs STRQpre, STRQpost)>; 1187def : InstRW<[WriteAdr, A64FXWrite_STP01], 1188 (instrs STRSpre, STRSpost)>; 1189def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1190 (instrs STRSpre, STRSpost)>; 1191def : InstRW<[WriteAdr, A64FXWrite_STP01], 1192 (instrs STRSpre, STRSpost)>; 1193def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1194 (instrs STRSpre, STRSpost)>; 1195def : InstRW<[WriteAdr, A64FXWrite_STP01], 1196 (instrs STRWpre, STRWpost)>; 1197def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1198 (instrs STRWpre, STRWpost)>; 1199def : InstRW<[WriteAdr, A64FXWrite_STP01], 1200 (instrs STRWpre, STRWpost)>; 1201def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1202 (instrs STRWpre, STRWpost)>; 1203def : InstRW<[WriteAdr, A64FXWrite_STP01], 1204 (instrs STRXpre, STRXpost)>; 1205def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1206 (instrs STRXpre, STRXpost)>; 1207def : InstRW<[WriteAdr, A64FXWrite_STP01], 1208 (instrs STRXpre, STRXpost)>; 1209def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1210 (instrs STRXpre, STRXpost)>; 1211 1212def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1213 (instrs STRBroW, STRBroX)>; 1214def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1215 (instrs STRBroW, STRBroX)>; 1216def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1217 (instrs STRBBroW, STRBBroX)>; 1218def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1219 (instrs STRBBroW, STRBBroX)>; 1220def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1221 (instrs STRDroW, STRDroX)>; 1222def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1223 (instrs STRDroW, STRDroX)>; 1224def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1225 (instrs STRHroW, STRHroX)>; 1226def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1227 (instrs STRHroW, STRHroX)>; 1228def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1229 (instrs STRHHroW, STRHHroX)>; 1230def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1231 (instrs STRHHroW, STRHHroX)>; 1232def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1233 (instrs STRQroW, STRQroX)>; 1234def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1235 (instrs STRQroW, STRQroX)>; 1236def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1237 (instrs STRSroW, STRSroX)>; 1238def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1239 (instrs STRSroW, STRSroX)>; 1240def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1241 (instrs STRWroW, STRWroX)>; 1242def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1243 (instrs STRWroW, STRWroX)>; 1244def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1245 (instrs STRXroW, STRXroX)>; 1246def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1247 (instrs STRXroW, STRXroX)>; 1248 1249//--- 1250// 3.8 FP Data Processing Instructions 1251//--- 1252 1253// FP absolute value 1254// FP min/max 1255// FP negate 1256def : WriteRes<WriteF, [A64FXGI03]> { 1257 let Latency = 4; 1258 let ReleaseAtCycles = [2]; 1259} 1260 1261// FP arithmetic 1262 1263def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>; 1264def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>; 1265 1266// FP compare 1267def : WriteRes<WriteFCmp, [A64FXGI03]> { 1268 let Latency = 4; 1269 let ReleaseAtCycles = [2]; 1270} 1271 1272// FP Div, Sqrt 1273def : WriteRes<WriteFDiv, [A64FXGI0]> { 1274 let Latency = 43; 1275} 1276 1277def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> { 1278 let Latency = 38; 1279} 1280 1281def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> { 1282 let Latency = 29; 1283} 1284 1285def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> { 1286 let Latency = 43; 1287} 1288 1289def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> { 1290 let Latency = 29; 1291} 1292 1293def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> { 1294 let Latency = 43; 1295} 1296 1297// FP divide, S-form 1298// FP square root, S-form 1299def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>; 1300def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>; 1301def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>; 1302def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 1303def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>; 1304def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>; 1305 1306// FP divide, D-form 1307// FP square root, D-form 1308def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>; 1309def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>; 1310def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>; 1311def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 1312def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>; 1313def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>; 1314 1315// FP round to integral 1316def : InstRW<[A64FXWrite_9Cyc_GI03], 1317 (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; 1318 1319// FP select 1320def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>; 1321 1322//--- 1323// 3.9 FP Miscellaneous Instructions 1324//--- 1325 1326// FP convert, from vec to vec reg 1327// FP convert, from gen to vec reg 1328// FP convert, from vec to gen reg 1329def : WriteRes<WriteFCvt, [A64FXGI03]> { 1330 let Latency = 9; 1331 let ReleaseAtCycles = [2]; 1332} 1333 1334// FP move, immed 1335// FP move, register 1336def : WriteRes<WriteFImm, [A64FXGI0]> { 1337 let Latency = 4; 1338 let ReleaseAtCycles = [2]; 1339} 1340 1341// FP transfer, from gen to vec reg 1342// FP transfer, from vec to gen reg 1343def : WriteRes<WriteFCopy, [A64FXGI0]> { 1344 let Latency = 4; 1345 let ReleaseAtCycles = [2]; 1346} 1347 1348def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>; 1349def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>; 1350 1351//--- 1352// 3.12 ASIMD Integer Instructions 1353//--- 1354 1355// ASIMD absolute diff, D-form 1356// ASIMD absolute diff, Q-form 1357// ASIMD absolute diff accum, D-form 1358// ASIMD absolute diff accum, Q-form 1359// ASIMD absolute diff accum long 1360// ASIMD absolute diff long 1361// ASIMD arith, basic 1362// ASIMD arith, complex 1363// ASIMD compare 1364// ASIMD logical (AND, BIC, EOR) 1365// ASIMD max/min, basic 1366// ASIMD max/min, reduce, 4H/4S 1367// ASIMD max/min, reduce, 8B/8H 1368// ASIMD max/min, reduce, 16B 1369// ASIMD multiply, D-form 1370// ASIMD multiply, Q-form 1371// ASIMD multiply accumulate long 1372// ASIMD multiply accumulate saturating long 1373// ASIMD multiply long 1374// ASIMD pairwise add and accumulate 1375// ASIMD shift accumulate 1376// ASIMD shift by immed, basic 1377// ASIMD shift by immed and insert, basic, D-form 1378// ASIMD shift by immed and insert, basic, Q-form 1379// ASIMD shift by immed, complex 1380// ASIMD shift by register, basic, D-form 1381// ASIMD shift by register, basic, Q-form 1382// ASIMD shift by register, complex, D-form 1383// ASIMD shift by register, complex, Q-form 1384def : WriteRes<WriteVd, [A64FXGI03]> { 1385 let Latency = 4; 1386} 1387def : WriteRes<WriteVq, [A64FXGI03]> { 1388 let Latency = 4; 1389} 1390 1391// ASIMD arith, reduce, 4H/4S 1392// ASIMD arith, reduce, 8B/8H 1393// ASIMD arith, reduce, 16B 1394 1395// ASIMD logical (MVN (alias for NOT), ORN, ORR) 1396def : InstRW<[A64FXWrite_4Cyc_GI03], 1397 (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 1398 1399// ASIMD arith, reduce 1400def : InstRW<[A64FXWrite_ADDLV], 1401 (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; 1402 1403// ASIMD polynomial (8x8) multiply long 1404def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>; 1405def : InstRW<[A64FXWrite_MULLV], 1406 (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; 1407def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>; 1408def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>; 1409 1410// ASIMD absolute diff accum, D-form 1411def : InstRW<[A64FXWrite_ABA], 1412 (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; 1413// ASIMD absolute diff accum, Q-form 1414def : InstRW<[A64FXWrite_ABA], 1415 (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; 1416// ASIMD absolute diff accum long 1417def : InstRW<[A64FXWrite_ABAL], 1418 (instregex "^[SU]ABAL")>; 1419// ASIMD arith, reduce, 4H/4S 1420def : InstRW<[A64FXWrite_ADDLV1], 1421 (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; 1422// ASIMD arith, reduce, 8B 1423def : InstRW<[A64FXWrite_ADDLV1], 1424 (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; 1425// ASIMD arith, reduce, 16B/16H 1426def : InstRW<[A64FXWrite_ADDLV1], 1427 (instregex "^[SU]?ADDL?Vv16i8v$")>; 1428// ASIMD max/min, reduce, 4H/4S 1429def : InstRW<[A64FXWrite_MINMAXV], 1430 (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; 1431// ASIMD max/min, reduce, 8B/8H 1432def : InstRW<[A64FXWrite_MINMAXV], 1433 (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; 1434// ASIMD max/min, reduce, 16B/16H 1435def : InstRW<[A64FXWrite_MINMAXV], 1436 (instregex "^[SU](MIN|MAX)Vv16i8v$")>; 1437// ASIMD multiply, D-form 1438def : InstRW<[A64FXWrite_PMUL], 1439 (instregex "^(P?MUL|SQR?DMUL)" # 1440 "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # 1441 "(_indexed)?$")>; 1442 1443// ASIMD multiply, Q-form 1444def : InstRW<[A64FXWrite_PMUL], 1445 (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1446 1447// ASIMD multiply, Q-form 1448def : InstRW<[A64FXWrite_SQRDMULH], 1449 (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1450 1451// ASIMD multiply accumulate, D-form 1452def : InstRW<[A64FXWrite_9Cyc_GI03], 1453 (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; 1454// ASIMD multiply accumulate, Q-form 1455def : InstRW<[A64FXWrite_9Cyc_GI03], 1456 (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; 1457// ASIMD shift accumulate 1458def : InstRW<[A64FXWrite_SRSRAV], 1459 (instregex "SRSRAv", "URSRAv")>; 1460def : InstRW<[A64FXWrite_SSRAV], 1461 (instregex "SSRAv", "USRAv")>; 1462 1463// ASIMD shift by immed, basic 1464def : InstRW<[A64FXWrite_RSHRN], 1465 (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>; 1466def : InstRW<[A64FXWrite_SHRN], 1467 (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>; 1468 1469def : InstRW<[A64FXWrite_6Cyc_GI3], 1470 (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>; 1471 1472// ASIMD shift by immed, complex 1473def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>; 1474def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>; 1475// ASIMD shift by register, basic, Q-form 1476def : InstRW<[A64FXWrite_6Cyc_GI3], 1477 (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 1478// ASIMD shift by register, complex, D-form 1479def : InstRW<[A64FXWrite_6Cyc_GI3], 1480 (instregex "^[SU][QR]{1,2}SHL" # 1481 "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; 1482// ASIMD shift by register, complex, Q-form 1483def : InstRW<[A64FXWrite_6Cyc_GI3], 1484 (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; 1485 1486// ASIMD Arithmetic 1487def : InstRW<[A64FXWrite_4Cyc_GI03], 1488 (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; 1489def : InstRW<[A64FXWrite_4Cyc_GI03], 1490 (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; 1491def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>; 1492def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>; 1493def : InstRW<[A64FXWrite_4Cyc_GI03], 1494 (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", 1495 "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; 1496def : InstRW<[A64FXWrite_ADDP], 1497 (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; 1498def : InstRW<[A64FXWrite_4Cyc_GI03], 1499 (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # 1500 "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; 1501def : InstRW<[A64FXWrite_4Cyc_GI0], 1502 (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; 1503def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>; 1504def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>; 1505def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>; 1506def : InstRW<[A64FXWrite_MINMAXV], 1507 (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>; 1508def : InstRW<[A64FXWrite_ABA], 1509 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 1510def : InstRW<[A64FXWrite_4Cyc_GI03], 1511 (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>; 1512def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>; 1513def : InstRW<[A64FXWrite_SHRN], 1514 (instregex "^ADDHNv", "^SUBHNv")>; 1515def : InstRW<[A64FXWrite_RSHRN], 1516 (instregex "^RADDHNv", "^RSUBHNv")>; 1517def : InstRW<[A64FXWrite_4Cyc_GI03], 1518 (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", 1519 "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB", 1520 "^URHADD", "^USQADD")>; 1521 1522def : InstRW<[A64FXWrite_4Cyc_GI03], 1523 (instregex "^CMEQv", "^CMGEv", "^CMGTv", 1524 "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>; 1525def : InstRW<[A64FXWrite_MINMAXV], 1526 (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 1527def : InstRW<[A64FXWrite_ADDP], 1528 (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 1529def : InstRW<[A64FXWrite_4Cyc_GI03], 1530 (instregex "^SABDv", "^UABDv")>; 1531def : InstRW<[A64FXWrite_TBX1], 1532 (instregex "^SABDLv", "^UABDLv")>; 1533 1534//--- 1535// 3.13 ASIMD Floating-point Instructions 1536//--- 1537 1538def : WriteRes<WriteFMul, [A64FXGI03]> { 1539 let Latency = 9; 1540} 1541 1542// ASIMD FP absolute value 1543def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>; 1544 1545// ASIMD FP arith, normal, D-form 1546// ASIMD FP arith, normal, Q-form 1547def : InstRW<[A64FXWrite_9Cyc_GI03], 1548 (instregex "^FABDv", "^FADDv", "^FSUBv")>; 1549 1550// ASIMD FP arith, pairwise, D-form 1551// ASIMD FP arith, pairwise, Q-form 1552def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>; 1553 1554// ASIMD FP compare, D-form 1555// ASIMD FP compare, Q-form 1556def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>; 1557def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv", 1558 "^FCMGTv", "^FCMLEv", 1559 "^FCMLTv")>; 1560// ASIMD FP round, D-form 1561def : InstRW<[A64FXWrite_9Cyc_GI03], 1562 (instregex "^FRINT[AIMNPXZ](v2f32)")>; 1563// ASIMD FP round, Q-form 1564def : InstRW<[A64FXWrite_9Cyc_GI03], 1565 (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; 1566 1567// ASIMD FP convert, long 1568// ASIMD FP convert, narrow 1569// ASIMD FP convert, other, D-form 1570// ASIMD FP convert, other, Q-form 1571 1572// ASIMD FP convert, long and narrow 1573def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>; 1574// ASIMD FP convert, other, D-form 1575def : InstRW<[A64FXWrite_FCVTXNV], 1576 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; 1577// ASIMD FP convert, other, Q-form 1578def : InstRW<[A64FXWrite_FCVTXNV], 1579 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; 1580 1581// ASIMD FP divide, D-form, F32 1582def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>; 1583def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>; 1584 1585// ASIMD FP divide, Q-form, F32 1586def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>; 1587def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>; 1588 1589// ASIMD FP divide, Q-form, F64 1590def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>; 1591def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>; 1592 1593// ASIMD FP max/min, normal, D-form 1594// ASIMD FP max/min, normal, Q-form 1595def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv", 1596 "^FMINv", "^FMINNMv")>; 1597 1598// ASIMD FP max/min, pairwise, D-form 1599// ASIMD FP max/min, pairwise, Q-form 1600def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv", 1601 "^FMINPv", "^FMINNMPv")>; 1602 1603// ASIMD FP max/min, reduce 1604def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv", 1605 "^FMINVv", "^FMINNMVv")>; 1606 1607// ASIMD FP multiply, D-form, FZ 1608// ASIMD FP multiply, D-form, no FZ 1609// ASIMD FP multiply, Q-form, FZ 1610// ASIMD FP multiply, Q-form, no FZ 1611def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>; 1612def : InstRW<[A64FXWrite_FMULXE], 1613 (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; 1614def : InstRW<[A64FXWrite_FMULXE], 1615 (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; 1616 1617// ASIMD FP multiply accumulate, Dform, FZ 1618// ASIMD FP multiply accumulate, Dform, no FZ 1619// ASIMD FP multiply accumulate, Qform, FZ 1620// ASIMD FP multiply accumulate, Qform, no FZ 1621def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>; 1622def : InstRW<[A64FXWrite_FMULXE], 1623 (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; 1624def : InstRW<[A64FXWrite_FMULXE], 1625 (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; 1626 1627// ASIMD FP negate 1628def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>; 1629 1630//-- 1631// 3.14 ASIMD Miscellaneous Instructions 1632//-- 1633 1634// ASIMD bit reverse 1635def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>; 1636 1637// ASIMD bitwise insert, D-form 1638// ASIMD bitwise insert, Q-form 1639def : InstRW<[A64FXWrite_BIF], 1640 (instregex "^BIFv", "^BITv", "^BSLv")>; 1641 1642// ASIMD count, D-form 1643// ASIMD count, Q-form 1644def : InstRW<[A64FXWrite_4Cyc_GI0], 1645 (instregex "^CLSv", "^CLZv", "^CNTv")>; 1646 1647// ASIMD duplicate, gen reg 1648// ASIMD duplicate, element 1649def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>; 1650def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>; 1651def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>; 1652 1653// ASIMD extract 1654def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>; 1655 1656// ASIMD extract narrow 1657def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>; 1658 1659// ASIMD extract narrow, saturating 1660def : InstRW<[A64FXWrite_6Cyc_GI3], 1661 (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; 1662 1663// ASIMD insert, element to element 1664def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1665 1666// ASIMD transfer, element to gen reg 1667def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1668 1669// ASIMD move, integer immed 1670def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>; 1671 1672// ASIMD move, FP immed 1673def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>; 1674 1675// ASIMD table lookup, D-form 1676def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>; 1677def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>; 1678def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>; 1679def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>; 1680def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>; 1681def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>; 1682def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>; 1683def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>; 1684 1685// ASIMD table lookup, Q-form 1686def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>; 1687def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>; 1688def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>; 1689def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>; 1690def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>; 1691def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>; 1692def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>; 1693def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>; 1694 1695// ASIMD unzip/zip 1696def : InstRW<[A64FXWrite_6Cyc_GI0], 1697 (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; 1698 1699// ASIMD reciprocal estimate, D-form 1700// ASIMD reciprocal estimate, Q-form 1701def : InstRW<[A64FXWrite_4Cyc_GI03], 1702 (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", 1703 "^FRSQRTEv", "^URSQRTEv")>; 1704 1705// ASIMD reciprocal step, D-form, FZ 1706// ASIMD reciprocal step, D-form, no FZ 1707// ASIMD reciprocal step, Q-form, FZ 1708// ASIMD reciprocal step, Q-form, no FZ 1709def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>; 1710 1711// ASIMD reverse 1712def : InstRW<[A64FXWrite_4Cyc_GI03], 1713 (instregex "^REV16v", "^REV32v", "^REV64v")>; 1714 1715// ASIMD table lookup, D-form 1716// ASIMD table lookup, Q-form 1717def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>; 1718 1719// ASIMD transfer, element to word or word 1720def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1721 1722// ASIMD transfer, element to gen reg 1723def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>; 1724 1725// ASIMD transfer gen reg to element 1726def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1727 1728// ASIMD transpose 1729def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v", 1730 "^UZP1v", "^UZP2v")>; 1731 1732// ASIMD unzip/zip 1733def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>; 1734 1735//-- 1736// 3.15 ASIMD Load Instructions 1737//-- 1738 1739// ASIMD load, 1 element, multiple, 1 reg, D-form 1740// ASIMD load, 1 element, multiple, 1 reg, Q-form 1741def : InstRW<[A64FXWrite_8Cyc_GI56], 1742 (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>; 1743def : InstRW<[A64FXWrite_11Cyc_GI56], 1744 (instregex "^LD1Onev(16b|8h|4s)$")>; 1745def : InstRW<[A64FXWrite_LD108, WriteAdr], 1746 (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>; 1747def : InstRW<[A64FXWrite_LD109, WriteAdr], 1748 (instregex "^LD1Onev(16b|8h|4s)_POST$")>; 1749 1750// ASIMD load, 1 element, multiple, 2 reg, D-form 1751// ASIMD load, 1 element, multiple, 2 reg, Q-form 1752def : InstRW<[A64FXWrite_LD102], 1753 (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>; 1754def : InstRW<[A64FXWrite_LD103], 1755 (instregex "^LD1Twov(16b|8h|4s)$")>; 1756def : InstRW<[A64FXWrite_LD110, WriteAdr], 1757 (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>; 1758def : InstRW<[A64FXWrite_LD111, WriteAdr], 1759 (instregex "^LD1Twov(16b|8h|4s)_POST$")>; 1760 1761// ASIMD load, 1 element, multiple, 3 reg, D-form 1762// ASIMD load, 1 element, multiple, 3 reg, Q-form 1763def : InstRW<[A64FXWrite_LD104], 1764 (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>; 1765def : InstRW<[A64FXWrite_LD105], 1766 (instregex "^LD1Threev(16b|8h|4s)$")>; 1767def : InstRW<[A64FXWrite_LD112, WriteAdr], 1768 (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>; 1769def : InstRW<[A64FXWrite_LD113, WriteAdr], 1770 (instregex "^LD1Threev(16b|8h|4s)_POST$")>; 1771 1772// ASIMD load, 1 element, multiple, 4 reg, D-form 1773// ASIMD load, 1 element, multiple, 4 reg, Q-form 1774def : InstRW<[A64FXWrite_LD106], 1775 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>; 1776def : InstRW<[A64FXWrite_LD107], 1777 (instregex "^LD1Fourv(16b|8h|4s)$")>; 1778def : InstRW<[A64FXWrite_LD114, WriteAdr], 1779 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>; 1780def : InstRW<[A64FXWrite_LD115, WriteAdr], 1781 (instregex "^LD1Fourv(16b|8h|4s)_POST$")>; 1782 1783// ASIMD load, 1 element, one lane, B/H/S 1784// ASIMD load, 1 element, one lane, D 1785def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>; 1786def : InstRW<[A64FXWrite_LD1I1, WriteAdr], 1787 (instregex "^LD1i(8|16|32|64)_POST$")>; 1788 1789// ASIMD load, 1 element, all lanes, D-form, B/H/S 1790// ASIMD load, 1 element, all lanes, D-form, D 1791// ASIMD load, 1 element, all lanes, Q-form 1792def : InstRW<[A64FXWrite_8Cyc_GI03], 1793 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1794def : InstRW<[A64FXWrite_LD108, WriteAdr], 1795 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1796 1797// ASIMD load, 2 element, multiple, D-form, B/H/S 1798// ASIMD load, 2 element, multiple, Q-form, D 1799def : InstRW<[A64FXWrite_LD103], 1800 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1801def : InstRW<[A64FXWrite_LD111, WriteAdr], 1802 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1803 1804// ASIMD load, 2 element, one lane, B/H 1805// ASIMD load, 2 element, one lane, S 1806// ASIMD load, 2 element, one lane, D 1807def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>; 1808def : InstRW<[A64FXWrite_LD2I1, WriteAdr], 1809 (instregex "^LD2i(8|16|32|64)_POST$")>; 1810 1811// ASIMD load, 2 element, all lanes, D-form, B/H/S 1812// ASIMD load, 2 element, all lanes, D-form, D 1813// ASIMD load, 2 element, all lanes, Q-form 1814def : InstRW<[A64FXWrite_LD102], 1815 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1816def : InstRW<[A64FXWrite_LD110, WriteAdr], 1817 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1818 1819// ASIMD load, 3 element, multiple, D-form, B/H/S 1820// ASIMD load, 3 element, multiple, Q-form, B/H/S 1821// ASIMD load, 3 element, multiple, Q-form, D 1822def : InstRW<[A64FXWrite_LD105], 1823 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1824def : InstRW<[A64FXWrite_LD113, WriteAdr], 1825 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1826 1827// ASIMD load, 3 element, one lone, B/H 1828// ASIMD load, 3 element, one lane, S 1829// ASIMD load, 3 element, one lane, D 1830def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>; 1831def : InstRW<[A64FXWrite_LD3I1, WriteAdr], 1832 (instregex "^LD3i(8|16|32|64)_POST$")>; 1833 1834// ASIMD load, 3 element, all lanes, D-form, B/H/S 1835// ASIMD load, 3 element, all lanes, D-form, D 1836// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1837// ASIMD load, 3 element, all lanes, Q-form, D 1838def : InstRW<[A64FXWrite_LD104], 1839 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1840def : InstRW<[A64FXWrite_LD112, WriteAdr], 1841 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1842 1843// ASIMD load, 4 element, multiple, D-form, B/H/S 1844// ASIMD load, 4 element, multiple, Q-form, B/H/S 1845// ASIMD load, 4 element, multiple, Q-form, D 1846def : InstRW<[A64FXWrite_LD107], 1847 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1848def : InstRW<[A64FXWrite_LD115, WriteAdr], 1849 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1850 1851// ASIMD load, 4 element, one lane, B/H 1852// ASIMD load, 4 element, one lane, S 1853// ASIMD load, 4 element, one lane, D 1854def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>; 1855def : InstRW<[A64FXWrite_LD4I1, WriteAdr], 1856 (instregex "^LD4i(8|16|32|64)_POST$")>; 1857 1858// ASIMD load, 4 element, all lanes, D-form, B/H/S 1859// ASIMD load, 4 element, all lanes, D-form, D 1860// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1861// ASIMD load, 4 element, all lanes, Q-form, D 1862def : InstRW<[A64FXWrite_LD106], 1863 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1864def : InstRW<[A64FXWrite_LD114, WriteAdr], 1865 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1866 1867//-- 1868// 3.16 ASIMD Store Instructions 1869//-- 1870 1871// ASIMD store, 1 element, multiple, 1 reg, D-form 1872// ASIMD store, 1 element, multiple, 1 reg, Q-form 1873def : InstRW<[A64FXWrite_ST10], 1874 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1875def : InstRW<[A64FXWrite_ST14, WriteAdr], 1876 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1877 1878// ASIMD store, 1 element, multiple, 2 reg, D-form 1879// ASIMD store, 1 element, multiple, 2 reg, Q-form 1880def : InstRW<[A64FXWrite_ST11], 1881 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1882def : InstRW<[A64FXWrite_ST15, WriteAdr], 1883 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1884 1885// ASIMD store, 1 element, multiple, 3 reg, D-form 1886// ASIMD store, 1 element, multiple, 3 reg, Q-form 1887def : InstRW<[A64FXWrite_ST12], 1888 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1889def : InstRW<[A64FXWrite_ST16, WriteAdr], 1890 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1891 1892// ASIMD store, 1 element, multiple, 4 reg, D-form 1893// ASIMD store, 1 element, multiple, 4 reg, Q-form 1894def : InstRW<[A64FXWrite_ST13], 1895 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1896def : InstRW<[A64FXWrite_ST17, WriteAdr], 1897 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1898 1899// ASIMD store, 1 element, one lane, B/H/S 1900// ASIMD store, 1 element, one lane, D 1901def : InstRW<[A64FXWrite_ST10], 1902 (instregex "^ST1i(8|16|32|64)$")>; 1903def : InstRW<[A64FXWrite_ST14, WriteAdr], 1904 (instregex "^ST1i(8|16|32|64)_POST$")>; 1905 1906// ASIMD store, 2 element, multiple, D-form, B/H/S 1907// ASIMD store, 2 element, multiple, Q-form, B/H/S 1908// ASIMD store, 2 element, multiple, Q-form, D 1909def : InstRW<[A64FXWrite_ST11], 1910 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1911def : InstRW<[A64FXWrite_ST15, WriteAdr], 1912 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1913 1914// ASIMD store, 2 element, one lane, B/H/S 1915// ASIMD store, 2 element, one lane, D 1916def : InstRW<[A64FXWrite_ST11], 1917 (instregex "^ST2i(8|16|32|64)$")>; 1918def : InstRW<[A64FXWrite_ST15, WriteAdr], 1919 (instregex "^ST2i(8|16|32|64)_POST$")>; 1920 1921// ASIMD store, 3 element, multiple, D-form, B/H/S 1922// ASIMD store, 3 element, multiple, Q-form, B/H/S 1923// ASIMD store, 3 element, multiple, Q-form, D 1924def : InstRW<[A64FXWrite_ST12], 1925 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1926def : InstRW<[A64FXWrite_ST16, WriteAdr], 1927 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1928 1929// ASIMD store, 3 element, one lane, B/H 1930// ASIMD store, 3 element, one lane, S 1931// ASIMD store, 3 element, one lane, D 1932def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>; 1933def : InstRW<[A64FXWrite_ST16, WriteAdr], 1934 (instregex "^ST3i(8|16|32|64)_POST$")>; 1935 1936// ASIMD store, 4 element, multiple, D-form, B/H/S 1937// ASIMD store, 4 element, multiple, Q-form, B/H/S 1938// ASIMD store, 4 element, multiple, Q-form, D 1939def : InstRW<[A64FXWrite_ST13], 1940 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1941def : InstRW<[A64FXWrite_ST17, WriteAdr], 1942 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1943 1944// ASIMD store, 4 element, one lane, B/H 1945// ASIMD store, 4 element, one lane, S 1946// ASIMD store, 4 element, one lane, D 1947def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>; 1948def : InstRW<[A64FXWrite_ST17, WriteAdr], 1949 (instregex "^ST4i(8|16|32|64)_POST$")>; 1950 1951// V8.1a Atomics (LSE) 1952def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1953 (instrs CASB, CASH, CASW, CASX)>; 1954 1955def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1956 (instrs CASAB, CASAH, CASAW, CASAX)>; 1957 1958def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1959 (instrs CASLB, CASLH, CASLW, CASLX)>; 1960 1961def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1962 (instrs CASALB, CASALH, CASALW, CASALX)>; 1963 1964def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1965 (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; 1966 1967def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1968 (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; 1969 1970def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1971 (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; 1972 1973def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1974 (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; 1975 1976def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1977 (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; 1978 1979def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1980 (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; 1981 1982def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1983 (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; 1984 1985def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1986 (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; 1987 1988def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1989 (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; 1990 1991def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1992 (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; 1993 1994def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1995 (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; 1996 1997def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1998 (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; 1999 2000def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2001 (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; 2002 2003def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2004 (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; 2005 2006def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2007 (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; 2008 2009def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2010 (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; 2011 2012def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2013 (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; 2014 2015def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2016 (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, 2017 LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, 2018 LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, 2019 LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; 2020 2021def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2022 (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, 2023 LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, 2024 LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, 2025 LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; 2026 2027def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2028 (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, 2029 LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, 2030 LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, 2031 LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; 2032 2033def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2034 (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, 2035 LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, 2036 LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, 2037 LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; 2038 2039def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2040 (instrs SWPB, SWPH, SWPW, SWPX)>; 2041 2042def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2043 (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; 2044 2045def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2046 (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; 2047 2048def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2049 (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; 2050 2051def : InstRW<[A64FXWrite_STUR, WriteAtomic], 2052 (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; 2053 2054// SVE instructions 2055 2056// The modeling method for SVE instructions is more accurate than others. 2057// TODO: modify the model of other instructions similarly. 2058 2059def : InstRW<[A64FXWrite_4Cyc_GI0], 2060 (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z", 2061 "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P", 2062 "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI", 2063 "^SUBR?_ZI")>; 2064 2065def : InstRW<[A64FXWrite_6Cyc_GI0], 2066 (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR", 2067 "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z", 2068 "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>; 2069 2070def : InstRW<[A64FXWrite_9Cyc_GI0], 2071 (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z", 2072 "^INDEX_II_[SD]", "^MUL_ZI")>; 2073 2074def : InstRW<[A64FXWrite_4Cyc_GI3], 2075 (instregex "^CNT_Z")>; 2076 2077def : InstRW<[A64FXWrite_4Cyc_GI03], 2078 (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z", 2079 "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z", 2080 "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z", 2081 "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_", 2082 "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z", 2083 "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z", 2084 "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z", 2085 "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>; 2086 2087def : InstRW<[A64FXWrite_9Cyc_GI03 ], 2088 (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP", 2089 "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z", 2090 "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z", 2091 "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP", 2092 "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>; 2093 2094def : InstRW<[A64FXWrite_3Cyc_GI1], 2095 (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P", 2096 "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT", 2097 "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)", 2098 "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>; 2099 2100def : InstRW<[A64FXWrite_1Cyc_GI24], 2101 (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X", 2102 "^RDVLI")>; 2103 2104def : InstRW<[A64FXWrite_11Cyc_GI5], 2105 (instregex "^LDR_[PZ]XI")>; 2106 2107def : InstRW<[A64FXWrite_11Cyc_GI56], 2108 (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>; 2109 2110def A64FXWrite_None : SchedWriteRes<[]> { 2111} 2112def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>; 2113 2114def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> { 2115 let Latency = 15; 2116 let NumMicroOps = 2; 2117 let ReleaseAtCycles = [2]; 2118} 2119def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>; 2120 2121def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> { 2122 let Latency = 5; 2123 let NumMicroOps = 2; 2124 let ReleaseAtCycles = [2]; 2125} 2126def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>; 2127 2128def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> { 2129 let Latency = 8; 2130 let NumMicroOps = 2; 2131} 2132def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>; 2133 2134def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> { 2135 let Latency = 46; 2136 let NumMicroOps = 10; 2137 let ReleaseAtCycles = [10]; 2138} 2139def : InstRW<[A64FXWrite_Reduction4CycB], 2140 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>; 2141 2142def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> { 2143 let Latency = 42; 2144 let NumMicroOps = 9; 2145 let ReleaseAtCycles = [9]; 2146} 2147def : InstRW<[A64FXWrite_Reduction4CycH], 2148 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>; 2149 2150def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> { 2151 let Latency = 38; 2152 let NumMicroOps = 8; 2153 let ReleaseAtCycles = [8]; 2154} 2155def : InstRW<[A64FXWrite_Reduction4CycS], 2156 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>; 2157 2158def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> { 2159 let Latency = 34; 2160 let NumMicroOps = 7; 2161 let ReleaseAtCycles = [7]; 2162} 2163def : InstRW<[A64FXWrite_Reduction4CycD], 2164 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>; 2165 2166def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2167 let Latency = 29; 2168} 2169def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>; 2170 2171def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2172 let Latency = 4; 2173} 2174def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>; 2175 2176def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> { 2177 let Latency = 6; 2178} 2179def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>; 2180 2181def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2182 let Latency = 8; 2183} 2184def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>; 2185 2186def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> { 2187 let Latency = 2; 2188 let ReleaseAtCycles = [2]; 2189} 2190def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>; 2191 2192def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> { 2193 let Latency = 7; 2194 let NumMicroOps = 2; 2195} 2196def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>; 2197 2198def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2199 let Latency = 12; 2200} 2201def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>; 2202 2203def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> { 2204 let Latency = 75; 2205 let NumMicroOps = 11; 2206 let ReleaseAtCycles = [11]; 2207} 2208def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>; 2209 2210def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> { 2211 let Latency = 60; 2212 let NumMicroOps = 9; 2213 let ReleaseAtCycles = [9]; 2214} 2215def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>; 2216 2217def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> { 2218 let Latency = 45; 2219 let NumMicroOps = 7; 2220 let ReleaseAtCycles = [7]; 2221} 2222def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>; 2223 2224def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> { 2225 let Latency = 468; 2226 let NumMicroOps = 63; 2227 let ReleaseAtCycles = [63]; 2228} 2229def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>; 2230 2231def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> { 2232 let Latency = 228; 2233 let NumMicroOps = 31; 2234 let ReleaseAtCycles = [31]; 2235} 2236def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>; 2237 2238def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> { 2239 let Latency = 108; 2240 let NumMicroOps = 15; 2241 let ReleaseAtCycles = [15]; 2242} 2243def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>; 2244 2245def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2246 let Latency = 15; 2247 let NumMicroOps = 2; 2248} 2249def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>; 2250 2251def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> { 2252 let Latency = 15; 2253 let NumMicroOps = 3; 2254 let ReleaseAtCycles = [3]; 2255} 2256def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>; 2257 2258def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> { 2259 let Latency = 134; 2260 let ReleaseAtCycles = [134]; 2261} 2262def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>; 2263 2264def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> { 2265 let Latency = 98; 2266 let ReleaseAtCycles = [98]; 2267} 2268def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>; 2269 2270def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> { 2271 let Latency = 154; 2272 let ReleaseAtCycles = [154]; 2273} 2274def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>; 2275 2276def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> { 2277 let Latency = 54; 2278 let NumMicroOps = 11; 2279 let ReleaseAtCycles = [11]; 2280} 2281def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>; 2282 2283def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> { 2284 let Latency = 44; 2285 let NumMicroOps = 9; 2286 let ReleaseAtCycles = [9]; 2287} 2288def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>; 2289 2290def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> { 2291 let Latency = 34; 2292 let NumMicroOps = 7; 2293 let ReleaseAtCycles = [7]; 2294} 2295def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>; 2296 2297def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2298 let Latency = 17; 2299 let NumMicroOps = 2; 2300 let ReleaseAtCycles = [2, 2]; 2301} 2302def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>; 2303 2304def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2305 let Latency = 13; 2306 let NumMicroOps = 1; 2307} 2308def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>; 2309 2310def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> { 2311 let Latency = 13; 2312 let NumMicroOps = 2; 2313 let ReleaseAtCycles = [2]; 2314} 2315def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>; 2316 2317def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> { 2318 let Latency = 17; 2319 let NumMicroOps = 3; 2320 let ReleaseAtCycles = [2, 2, 1]; 2321} 2322def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>; 2323 2324def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2325 let Latency = 17; 2326 let NumMicroOps = 2; 2327 let ReleaseAtCycles = [2, 1]; 2328} 2329def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>; 2330 2331def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2332 let Latency = 10; 2333} 2334def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>; 2335 2336def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2337 let Latency = 25; 2338} 2339def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>; 2340 2341def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2342 let Latency = 19; 2343 let ReleaseAtCycles = [2, 4, 4]; 2344} 2345def : InstRW<[A64FXWrite_GLD_S_ZI], 2346 (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>; 2347 2348def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2349 let Latency = 16; 2350 let ReleaseAtCycles = [1, 2, 2]; 2351} 2352def : InstRW<[A64FXWrite_GLD_D_ZI], 2353 (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>; 2354 2355def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2356 let Latency = 23; 2357 let ReleaseAtCycles = [2, 1, 4, 4]; 2358} 2359def : InstRW<[A64FXWrite_GLD_S_RZ], 2360 (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>; 2361 2362def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2363 let Latency = 20; 2364 let ReleaseAtCycles = [1, 1, 2, 2]; 2365} 2366def : InstRW<[A64FXWrite_GLD_D_RZ], 2367 (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]", 2368 "^GLD(FF)?1S?[BHW]_D$")>; 2369 2370def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> { 2371 let Latency = 15; 2372 let NumMicroOps = 3; 2373 let ReleaseAtCycles = [9]; 2374} 2375def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>; 2376 2377def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2378 let Latency = 11; 2379 let NumMicroOps = 2; 2380 let ReleaseAtCycles = [2]; 2381} 2382def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>; 2383 2384def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> { 2385 let Latency = 12; 2386 let NumMicroOps = 3; 2387 let ReleaseAtCycles = [3]; 2388} 2389def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>; 2390 2391def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> { 2392 let Latency = 15; 2393 let NumMicroOps = 4; 2394 let ReleaseAtCycles = [13]; 2395} 2396def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>; 2397 2398def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2399 let Latency = 11; 2400 let NumMicroOps = 3; 2401 let ReleaseAtCycles = [3]; 2402} 2403def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>; 2404 2405def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> { 2406 let Latency = 12; 2407 let NumMicroOps = 4; 2408 let ReleaseAtCycles = [4]; 2409} 2410def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>; 2411 2412def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> { 2413 let Latency = 15; 2414 let NumMicroOps = 5; 2415 let ReleaseAtCycles = [17]; 2416} 2417def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>; 2418 2419def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2420 let Latency = 11; 2421 let NumMicroOps = 4; 2422 let ReleaseAtCycles = [4]; 2423} 2424def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>; 2425 2426def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> { 2427 let Latency = 12; 2428 let NumMicroOps = 5; 2429 let ReleaseAtCycles = [5]; 2430} 2431def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>; 2432 2433def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> { 2434} 2435def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>; 2436 2437def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2438 let ReleaseAtCycles = [2, 1, 4]; 2439} 2440def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>; 2441 2442def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2443 let ReleaseAtCycles = [2, 4]; 2444} 2445def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>; 2446 2447def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2448 let ReleaseAtCycles = [1, 1, 2]; 2449} 2450def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>; 2451 2452def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2453 let ReleaseAtCycles = [1, 2]; 2454} 2455def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>; 2456 2457def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> { 2458 let Latency = 114; 2459 let ReleaseAtCycles = [114]; 2460} 2461def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>; 2462 2463def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> { 2464 let Latency = 178; 2465 let ReleaseAtCycles = [178]; 2466} 2467def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>; 2468 2469def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2470 let Latency = 15; 2471 let NumMicroOps = 2; 2472} 2473def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>; 2474 2475def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> { 2476 let Latency = 2; 2477 let ReleaseAtCycles = [2]; 2478} 2479def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>; 2480 2481def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2482 let Latency = 6; 2483 let NumMicroOps = 2; 2484 let ReleaseAtCycles = [3, 1]; 2485} 2486def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>; 2487 2488def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2489 let Latency = 12; 2490} 2491def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>; 2492 2493def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2494 let Latency = 11; 2495} 2496def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>; 2497 2498def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2499 let Latency = 20; 2500 let NumMicroOps = 8; 2501 let ReleaseAtCycles = [8, 8, 8, 8]; 2502} 2503def : InstRW<[A64FXWrite_SST1_W_RZ], 2504 (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>; 2505 2506def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2507 let Latency = 20; 2508 let NumMicroOps = 4; 2509 let ReleaseAtCycles = [4, 4, 4, 4]; 2510} 2511def : InstRW<[A64FXWrite_SST1_D_RZ], 2512 (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>; 2513 2514def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2515 let Latency = 16; 2516 let NumMicroOps = 8; 2517 let ReleaseAtCycles = [12, 8, 8]; 2518} 2519def : InstRW<[A64FXWrite_SST1_W_ZI], 2520 (instregex "^SST1[BH]_S_I", "^SST1W_I")>; 2521 2522def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2523 let Latency = 16; 2524 let NumMicroOps = 4; 2525 let ReleaseAtCycles = [4, 4, 4]; 2526} 2527def : InstRW<[A64FXWrite_SST1_D_ZI], 2528 (instregex "^SST1[BHW]_D_I", "^SST1D_I")>; 2529 2530def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2531 let Latency = 12; 2532 let NumMicroOps = 3; 2533 let ReleaseAtCycles = [8, 9]; 2534} 2535def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>; 2536 2537def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2538 let Latency = 11; 2539 let NumMicroOps = 2; 2540 let ReleaseAtCycles = [2, 2]; 2541} 2542def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>; 2543 2544def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2545 let Latency = 12; 2546 let NumMicroOps = 3; 2547 let ReleaseAtCycles = [2, 3]; 2548} 2549def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>; 2550 2551def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2552 let Latency = 15; 2553 let NumMicroOps = 4; 2554 let ReleaseAtCycles = [12, 13]; 2555} 2556def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>; 2557 2558def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2559 let Latency = 11; 2560 let NumMicroOps = 3; 2561 let ReleaseAtCycles = [3, 3]; 2562} 2563def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>; 2564 2565def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2566 let Latency = 12; 2567 let NumMicroOps = 4; 2568 let ReleaseAtCycles = [3, 4]; 2569} 2570def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>; 2571 2572def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2573 let Latency = 15; 2574 let NumMicroOps = 5; 2575 let ReleaseAtCycles = [16, 17]; 2576} 2577def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>; 2578 2579def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2580 let Latency = 11; 2581 let NumMicroOps = 4; 2582 let ReleaseAtCycles = [4, 4]; 2583} 2584def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>; 2585 2586def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2587 let Latency = 12; 2588 let NumMicroOps = 5; 2589 let ReleaseAtCycles = [4, 5]; 2590} 2591def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>; 2592 2593def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2594 let Latency = 11; 2595} 2596def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>; 2597 2598def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> { 2599 let Latency = 11; 2600} 2601def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>; 2602 2603def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2604 let Latency = 4; 2605} 2606def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>; 2607 2608def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2609 let Latency = 3; 2610 let NumMicroOps = 2; 2611} 2612def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>; 2613 2614} // SchedModel = A64FXModel 2615