1//===- TargetSchedule.td - Target Independent Scheduling ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the target-independent scheduling interfaces which should 10// be implemented by each target which is using TableGen based scheduling. 11// 12// The SchedMachineModel is defined by subtargets for three categories of data: 13// 1. Basic properties for coarse grained instruction cost model. 14// 2. Scheduler Read/Write resources for simple per-opcode cost model. 15// 3. Instruction itineraries for detailed reservation tables. 16// 17// (1) Basic properties are defined by the SchedMachineModel 18// class. Target hooks allow subtargets to associate opcodes with 19// those properties. 20// 21// (2) A per-operand machine model can be implemented in any 22// combination of the following ways: 23// 24// A. Associate per-operand SchedReadWrite types with Instructions by 25// modifying the Instruction definition to inherit from Sched. For 26// each subtarget, define WriteRes and ReadAdvance to associate 27// processor resources and latency with each SchedReadWrite type. 28// 29// B. In each instruction definition, name an ItineraryClass. For each 30// subtarget, define ItinRW entries to map ItineraryClass to 31// per-operand SchedReadWrite types. Unlike method A, these types may 32// be subtarget specific and can be directly associated with resources 33// by defining SchedWriteRes and SchedReadAdvance. 34// 35// C. In the subtarget, map SchedReadWrite types to specific 36// opcodes. This overrides any SchedReadWrite types or 37// ItineraryClasses defined by the Instruction. As in method B, the 38// subtarget can directly associate resources with SchedReadWrite 39// types by defining SchedWriteRes and SchedReadAdvance. 40// 41// D. In either the target or subtarget, define SchedWriteVariant or 42// SchedReadVariant to map one SchedReadWrite type onto another 43// sequence of SchedReadWrite types. This allows dynamic selection of 44// an instruction's machine model via custom C++ code. It also allows 45// a machine-independent SchedReadWrite type to map to a sequence of 46// machine-dependent types. 47// 48// (3) A per-pipeline-stage machine model can be implemented by providing 49// Itineraries in addition to mapping instructions to ItineraryClasses. 50//===----------------------------------------------------------------------===// 51 52// Include legacy support for instruction itineraries. 53include "llvm/Target/TargetItinerary.td" 54 55class Predicate; // Forward def 56 57// DAG operator that interprets the DAG args as Instruction defs. 58def instrs; 59 60// DAG operator that interprets each DAG arg as a regex pattern for 61// matching Instruction opcode names. 62// The regex must match the beginning of the opcode (as in Python re.match). 63// To avoid matching prefixes, append '$' to the pattern. 64def instregex; 65 66// Define the SchedMachineModel and provide basic properties for 67// coarse grained instruction cost model. Default values for the 68// properties are defined in MCSchedModel. A value of "-1" in the 69// target description's SchedMachineModel indicates that the property 70// is not overriden by the target. 71// 72// Target hooks allow subtargets to associate LoadLatency and 73// HighLatency with groups of opcodes. 74// 75// See MCSchedule.h for detailed comments. 76class SchedMachineModel { 77 int IssueWidth = -1; // Max micro-ops that may be scheduled per cycle. 78 int MicroOpBufferSize = -1; // Max micro-ops that can be buffered. 79 int LoopMicroOpBufferSize = -1; // Max micro-ops that can be buffered for 80 // optimized loop dispatch/execution. 81 int LoadLatency = -1; // Cycles for loads to access the cache. 82 int HighLatency = -1; // Approximation of cycles for "high latency" ops. 83 int MispredictPenalty = -1; // Extra cycles for a mispredicted branch. 84 85 // Per-cycle resources tables. 86 ProcessorItineraries Itineraries = NoItineraries; 87 88 bit PostRAScheduler = false; // Enable Post RegAlloc Scheduler pass. 89 90 // Subtargets that define a model for only a subset of instructions 91 // that have a scheduling class (itinerary class or SchedRW list) 92 // and may actually be generated for that subtarget must clear this 93 // bit. Otherwise, the scheduler considers an unmodelled opcode to 94 // be an error. This should only be set during initial bringup, 95 // or there will be no way to catch simple errors in the model 96 // resulting from changes to the instruction definitions. 97 bit CompleteModel = true; 98 99 // Indicates that we should do full overlap checking for multiple InstrRWs 100 // defining the same instructions within the same SchedMachineModel. 101 // FIXME: Remove when all in tree targets are clean with the full check 102 // enabled. 103 bit FullInstRWOverlapCheck = true; 104 105 // A processor may only implement part of published ISA, due to either new ISA 106 // extensions, (e.g. Pentium 4 doesn't have AVX) or implementation 107 // (ARM/MIPS/PowerPC/SPARC soft float cores). 108 // 109 // For a processor which doesn't support some feature(s), the schedule model 110 // can use: 111 // 112 // let<Predicate> UnsupportedFeatures = [HaveA,..,HaveY]; 113 // 114 // to skip the checks for scheduling information when building LLVM for 115 // instructions which have any of the listed predicates in their Predicates 116 // field. 117 list<Predicate> UnsupportedFeatures = []; 118 119 bit NoModel = false; // Special tag to indicate missing machine model. 120 121 // Tells the MachineScheduler whether or not to track resource usage 122 // using intervals via ResourceSegments (see 123 // llvm/include/llvm/CodeGen/MachineScheduler.h). 124 bit EnableIntervals = false; 125} 126 127def NoSchedModel : SchedMachineModel { 128 let NoModel = true; 129 let CompleteModel = false; 130} 131 132// Define a kind of processor resource that may be common across 133// similar subtargets. 134class ProcResourceKind; 135 136// Define a number of interchangeable processor resources. NumUnits 137// determines the throughput of instructions that require the resource. 138// 139// An optional Super resource may be given to model these resources as 140// a subset of the more general super resources. Using one of these 141// resources implies using one of the super resources. 142// 143// ProcResourceUnits normally model a few buffered resources within an 144// out-of-order engine. Buffered resources may be held for multiple 145// clock cycles, but the scheduler does not pin them to a particular 146// clock cycle relative to instruction dispatch. Setting BufferSize=0 147// changes this to an in-order issue/dispatch resource. In this case, 148// the scheduler counts down from the cycle that the instruction 149// issues in-order, forcing a stall whenever a subsequent instruction 150// requires the same resource until the number of ReleaseAtCycles 151// specified in WriteRes expire. Setting BufferSize=1 changes this to 152// an in-order latency resource. In this case, the scheduler models 153// producer/consumer stalls between instructions that use the 154// resource. 155// 156// Examples (all assume an out-of-order engine): 157// 158// Use BufferSize = -1 for "issue ports" fed by a unified reservation 159// station. Here the size of the reservation station is modeled by 160// MicroOpBufferSize, which should be the minimum size of either the 161// register rename pool, unified reservation station, or reorder 162// buffer. 163// 164// Use BufferSize = 0 for resources that force "dispatch/issue 165// groups". (Different processors define dispath/issue 166// differently. Here we refer to stage between decoding into micro-ops 167// and moving them into a reservation station.) Normally NumMicroOps 168// is sufficient to limit dispatch/issue groups. However, some 169// processors can form groups of with only certain combinations of 170// instruction types. e.g. POWER7. 171// 172// Use BufferSize = 1 for in-order execution units. This is used for 173// an in-order pipeline within an out-of-order core where scheduling 174// dependent operations back-to-back is guaranteed to cause a 175// bubble. e.g. Cortex-a9 floating-point. 176// 177// Use BufferSize > 1 for out-of-order executions units with a 178// separate reservation station. This simply models the size of the 179// reservation station. 180// 181// To model both dispatch/issue groups and in-order execution units, 182// create two types of units, one with BufferSize=0 and one with 183// BufferSize=1. 184// 185// SchedModel ties these units to a processor for any stand-alone defs 186// of this class. 187class ProcResourceUnits<ProcResourceKind kind, int num> { 188 ProcResourceKind Kind = kind; 189 int NumUnits = num; 190 ProcResourceKind Super = ?; 191 int BufferSize = -1; 192 SchedMachineModel SchedModel = ?; 193} 194 195// EponymousProcResourceKind helps implement ProcResourceUnits by 196// allowing a ProcResourceUnits definition to reference itself. It 197// should not be referenced anywhere else. 198def EponymousProcResourceKind : ProcResourceKind; 199 200// Subtargets typically define processor resource kind and number of 201// units in one place. 202class ProcResource<int num> : ProcResourceKind, 203 ProcResourceUnits<EponymousProcResourceKind, num>; 204 205class ProcResGroup<list<ProcResource> resources> : ProcResourceKind { 206 list<ProcResource> Resources = resources; 207 SchedMachineModel SchedModel = ?; 208 int BufferSize = -1; 209} 210 211// A target architecture may define SchedReadWrite types and associate 212// them with instruction operands. 213class SchedReadWrite; 214 215// List the per-operand types that map to the machine model of an 216// instruction. One SchedWrite type must be listed for each explicit 217// def operand in order. Additional SchedWrite types may optionally be 218// listed for implicit def operands. SchedRead types may optionally 219// be listed for use operands in order. The order of defs relative to 220// uses is insignificant. This way, the same SchedReadWrite list may 221// be used for multiple forms of an operation. For example, a 222// two-address instruction could have two tied operands or single 223// operand that both reads and writes a reg. In both cases we have a 224// single SchedWrite and single SchedRead in any order. 225class Sched<list<SchedReadWrite> schedrw> { 226 list<SchedReadWrite> SchedRW = schedrw; 227} 228 229// Define a scheduler resource associated with a def operand. 230class SchedWrite : SchedReadWrite; 231def NoWrite : SchedWrite; 232 233// Define a scheduler resource associated with a use operand. 234class SchedRead : SchedReadWrite; 235 236// Define a SchedWrite that is modeled as a sequence of other 237// SchedWrites with additive latency. This allows a single operand to 238// be mapped the resources composed from a set of previously defined 239// SchedWrites. 240// 241// If the final write in this sequence is a SchedWriteVariant marked 242// Variadic, then the list of prior writes are distributed across all 243// operands after resolving the predicate for the final write. 244// 245// SchedModel silences warnings but is ignored. 246class WriteSequence<list<SchedWrite> writes, int rep = 1> : SchedWrite { 247 list<SchedWrite> Writes = writes; 248 int Repeat = rep; 249 SchedMachineModel SchedModel = ?; 250} 251 252// Define values common to WriteRes and SchedWriteRes. 253// 254// SchedModel ties these resources to a processor. 255class ProcWriteResources<list<ProcResourceKind> resources> { 256 list<ProcResourceKind> ProcResources = resources; 257 /// Cycle at which the resource will be released by an instruction, 258 /// relatively to the cycle in which the instruction is issued 259 /// (assuming no stalls inbetween). 260 list<int> ReleaseAtCycles = []; 261 /// Cycle at which the resource will be aquired by an instruction, 262 /// relatively to the cycle in which the instruction is issued 263 /// (assuming no stalls inbetween). 264 list<int> AcquireAtCycles = []; 265 int Latency = 1; 266 int NumMicroOps = 1; 267 bit BeginGroup = false; 268 bit EndGroup = false; 269 // Allow a processor to mark some scheduling classes as unsupported 270 // for stronger verification. 271 bit Unsupported = false; 272 // Allow a processor to mark some scheduling classes as single-issue. 273 // SingleIssue is an alias for Begin/End Group. 274 bit SingleIssue = false; 275 // An instruction is allowed to retire out-of-order if RetireOOO is 276 // true for at least one of its writes. This field is only used by 277 // MCA for in-order subtargets, and is ignored for other targets. 278 bit RetireOOO = false; 279 SchedMachineModel SchedModel = ?; 280} 281 282// Define the resources and latency of a SchedWrite. This will be used 283// directly by targets that have no itinerary classes. In this case, 284// SchedWrite is defined by the target, while WriteResources is 285// defined by the subtarget, and maps the SchedWrite to processor 286// resources. 287// 288// If a target already has itinerary classes, SchedWriteResources can 289// be used instead to define subtarget specific SchedWrites and map 290// them to processor resources in one place. Then ItinRW can map 291// itinerary classes to the subtarget's SchedWrites. 292// 293// ProcResources indicates the set of resources consumed by the write. 294// Optionally, ReleaseAtCycles indicates the number of cycles the 295// resource is consumed. Each ReleaseAtCycles item is paired with the 296// ProcResource item at the same position in its list. ReleaseAtCycles 297// can be `[]`: in that case, all resources are consumed for a single 298// cycle, regardless of latency, which models a fully pipelined processing 299// unit. A value of 0 for ReleaseAtCycles means that the resource must 300// be available but is not consumed, which is only relevant for 301// unbuffered resources. 302// 303// By default, each SchedWrite takes one micro-op, which is counted 304// against the processor's IssueWidth limit. If an instruction can 305// write multiple registers with a single micro-op, the subtarget 306// should define one of the writes to be zero micro-ops. If a 307// subtarget requires multiple micro-ops to write a single result, it 308// should either override the write's NumMicroOps to be greater than 1 309// or require additional writes. Extra writes can be required either 310// by defining a WriteSequence, or simply listing extra writes in the 311// instruction's list of writers beyond the number of "def" 312// operands. The scheduler assumes that all micro-ops must be 313// dispatched in the same cycle. These micro-ops may be required to 314// begin or end the current dispatch group. 315class WriteRes<SchedWrite write, list<ProcResourceKind> resources> 316 : ProcWriteResources<resources> { 317 SchedWrite WriteType = write; 318} 319 320// Directly name a set of WriteResources defining a new SchedWrite 321// type at the same time. This class is unaware of its SchedModel so 322// must be referenced by InstRW or ItinRW. 323class SchedWriteRes<list<ProcResourceKind> resources> : SchedWrite, 324 ProcWriteResources<resources>; 325 326// Define values common to ReadAdvance and SchedReadAdvance. 327// 328// SchedModel ties these resources to a processor. 329class ProcReadAdvance<int cycles, list<SchedWrite> writes = []> { 330 int Cycles = cycles; 331 list<SchedWrite> ValidWrites = writes; 332 // Allow a processor to mark some scheduling classes as unsupported 333 // for stronger verification. 334 bit Unsupported = false; 335 SchedMachineModel SchedModel = ?; 336} 337 338// A processor may define a ReadAdvance associated with a SchedRead 339// to reduce latency of a prior write by N cycles. A negative advance 340// effectively increases latency, which may be used for cross-domain 341// stalls. 342// 343// A ReadAdvance may be associated with a list of SchedWrites 344// to implement pipeline bypass. The Writes list may be empty to 345// indicate operands that are always read this number of Cycles later 346// than a normal register read, allowing the read's parent instruction 347// to issue earlier relative to the writer. 348class ReadAdvance<SchedRead read, int cycles, list<SchedWrite> writes = []> 349 : ProcReadAdvance<cycles, writes> { 350 SchedRead ReadType = read; 351} 352 353// Directly associate a new SchedRead type with a delay and optional 354// pipeline bypass. For use with InstRW or ItinRW. 355class SchedReadAdvance<int cycles, list<SchedWrite> writes = []> : SchedRead, 356 ProcReadAdvance<cycles, writes>; 357 358// Define SchedRead defaults. Reads seldom need special treatment. 359def ReadDefault : SchedRead; 360def NoReadAdvance : SchedReadAdvance<0>; 361 362// Define shared code that will be in the same scope as all 363// SchedPredicates. Available variables are: 364// (const MachineInstr *MI, const TargetSchedModel *SchedModel) 365class PredicateProlog<code c> { 366 code Code = c; 367} 368 369// Base class for scheduling predicates. 370class SchedPredicateBase; 371 372// A scheduling predicate whose logic is defined by a MCInstPredicate. 373// This can directly be used by SchedWriteVariant definitions. 374class MCSchedPredicate<MCInstPredicate P> : SchedPredicateBase { 375 MCInstPredicate Pred = P; 376 SchedMachineModel SchedModel = ?; 377} 378 379// Define a predicate to determine which SchedVariant applies to a 380// particular MachineInstr. The code snippet is used as an 381// if-statement's expression. Available variables are MI, SchedModel, 382// and anything defined in a PredicateProlog. 383// 384// SchedModel silences warnings but is ignored. 385class SchedPredicate<code pred> : SchedPredicateBase { 386 SchedMachineModel SchedModel = ?; 387 code Predicate = pred; 388} 389 390// Define a predicate to be typically used as the default case in a 391// SchedVariant. It the SchedVariant does not use any other predicate based on 392// MCSchedPredicate, this is the default scheduling case used by llvm-mca. 393def NoSchedPred : MCSchedPredicate<TruePred>; 394 395// Associate a predicate with a list of SchedReadWrites. By default, 396// the selected SchedReadWrites are still associated with a single 397// operand and assumed to execute sequentially with additive 398// latency. However, if the parent SchedWriteVariant or 399// SchedReadVariant is marked "Variadic", then each Selected 400// SchedReadWrite is mapped in place to the instruction's variadic 401// operands. In this case, latency is not additive. If the current Variant 402// is already part of a Sequence, then that entire chain leading up to 403// the Variant is distributed over the variadic operands. 404class SchedVar<SchedPredicateBase pred, list<SchedReadWrite> selected> { 405 SchedPredicateBase Predicate = pred; 406 list<SchedReadWrite> Selected = selected; 407} 408 409// SchedModel silences warnings but is ignored. 410class SchedVariant<list<SchedVar> variants> { 411 list<SchedVar> Variants = variants; 412 bit Variadic = false; 413 SchedMachineModel SchedModel = ?; 414} 415 416// A SchedWriteVariant is a single SchedWrite type that maps to a list 417// of SchedWrite types under the conditions defined by its predicates. 418// 419// A Variadic write is expanded to cover multiple "def" operands. The 420// SchedVariant's Expansion list is then interpreted as one write 421// per-operand instead of the usual sequential writes feeding a single 422// operand. 423class SchedWriteVariant<list<SchedVar> variants> : SchedWrite, 424 SchedVariant<variants> { 425} 426 427// A SchedReadVariant is a single SchedRead type that maps to a list 428// of SchedRead types under the conditions defined by its predicates. 429// 430// A Variadic write is expanded to cover multiple "readsReg" operands as 431// explained above. 432class SchedReadVariant<list<SchedVar> variants> : SchedRead, 433 SchedVariant<variants> { 434} 435 436// Map a set of opcodes to a list of SchedReadWrite types. This allows 437// the subtarget to easily override specific operations. 438// 439// SchedModel ties this opcode mapping to a processor. 440class InstRW<list<SchedReadWrite> rw, dag instrlist> { 441 list<SchedReadWrite> OperandReadWrites = rw; 442 dag Instrs = instrlist; 443 SchedMachineModel SchedModel = ?; 444 // Allow a subtarget to mark some instructions as unsupported. 445 bit Unsupported = false; 446} 447 448// Map a set of itinerary classes to SchedReadWrite resources. This is 449// used to bootstrap a target (e.g. ARM) when itineraries already 450// exist and changing InstrInfo is undesirable. 451// 452// SchedModel ties this ItineraryClass mapping to a processor. 453class ItinRW<list<SchedReadWrite> rw, list<InstrItinClass> iic> { 454 list<InstrItinClass> MatchedItinClasses = iic; 455 list<SchedReadWrite> OperandReadWrites = rw; 456 SchedMachineModel SchedModel = ?; 457} 458 459// Alias a target-defined SchedReadWrite to a processor specific 460// SchedReadWrite. This allows a subtarget to easily map a 461// SchedReadWrite type onto a WriteSequence, SchedWriteVariant, or 462// SchedReadVariant. 463// 464// SchedModel will usually be provided by surrounding let statement 465// and ties this SchedAlias mapping to a processor. 466class SchedAlias<SchedReadWrite match, SchedReadWrite alias> { 467 SchedReadWrite MatchRW = match; 468 SchedReadWrite AliasRW = alias; 469 SchedMachineModel SchedModel = ?; 470} 471 472// Allow the definition of processor register files for register renaming 473// purposes. 474// 475// Each processor register file declares: 476// - The set of registers that can be renamed. 477// - The number of physical registers which can be used for register renaming 478// purpose. 479// - The cost of a register rename. 480// - The set of registers that allow move elimination. 481// - The maximum number of moves that can be eliminated every cycle. 482// - Whether move elimination is limited to register moves whose input 483// is known to be zero. 484// 485// The cost of a rename is the number of physical registers allocated by the 486// register alias table to map the new definition. By default, register can be 487// renamed at the cost of a single physical register. Note that register costs 488// are defined at register class granularity (see field `Costs`). 489// 490// The set of registers that are subject to register renaming is declared using 491// a list of register classes (see field `RegClasses`). An empty list of 492// register classes means: all the logical registers defined by the target can 493// be fully renamed. 494// 495// A register R can be renamed if its register class appears in the `RegClasses` 496// set. When R is written, a new alias is allocated at the cost of one or more 497// physical registers; as a result, false dependencies on R are removed. 498// 499// A sub-register V of register R is implicitly part of the same register file. 500// However, V is only renamed if its register class is part of `RegClasses`. 501// Otherwise, the processor keeps it (as well as any other different part 502// of R) together with R, and a write of V always causes a compulsory read of R. 503// 504// This is what happens for example on AMD processors (at least from Bulldozer 505// onwards), where AL and AH are not treated as independent from AX, and AX is 506// not treated as independent from EAX. A write to AL has an implicity false 507// dependency on the last write to EAX (or a portion of EAX). As a consequence, 508// a write to AL cannot go in parallel with a write to AH. 509// 510// There is no false dependency if the partial register write belongs to a 511// register class that is in `RegClasses`. 512// There is also no penalty for writes that "clear the content a super-register" 513// (see MC/MCInstrAnalysis.h - method MCInstrAnalysis::clearsSuperRegisters()). 514// On x86-64, 32-bit GPR writes implicitly zero the upper half of the underlying 515// physical register, effectively removing any false dependencies with the 516// previous register definition. 517// 518// TODO: This implementation assumes that there is no limit in the number of 519// renames per cycle, which might not be true for all hardware or register 520// classes. Also, there is no limit to how many times the same logical register 521// can be renamed during the same cycle. 522// 523// TODO: we don't currently model merge penalties for the case where a write to 524// a part of a register is followed by a read from a larger part of the same 525// register. On some Intel chips, different parts of a GPR can be stored in 526// different physical registers. However, there is a cost to pay for when the 527// partial write is combined with the previous super-register definition. We 528// should add support for these cases, and correctly model merge problems with 529// partial register accesses. 530// 531// Field MaxMovesEliminatedPerCycle specifies how many moves can be eliminated 532// every cycle. A default value of zero for that field means: there is no limit 533// to the number of moves that can be eliminated by this register file. 534// 535// An instruction MI is a candidate for move elimination if a call to 536// method TargetSubtargetInfo::isOptimizableRegisterMove(MI) returns true (see 537// llvm/CodeGen/TargetSubtargetInfo.h, and llvm/MC/MCInstrAnalysis.h). 538// 539// Subtargets can instantiate tablegen class IsOptimizableRegisterMove (see 540// llvm/Target/TargetInstrPredicate.td) to customize the set of move elimination 541// candidates. By default, no instruction is a valid move elimination candidate. 542// 543// A register move MI is eliminated only if: 544// - MI is a move elimination candidate. 545// - The destination register is from a register class that allows move 546// elimination (see field `AllowMoveElimination` below). 547// - Constraints on the move kind, and the maximum number of moves that can be 548// eliminated per cycle are all met. 549 550class RegisterFile<int numPhysRegs, list<RegisterClass> Classes = [], 551 list<int> Costs = [], list<bit> AllowMoveElim = [], 552 int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = false> { 553 list<RegisterClass> RegClasses = Classes; 554 list<int> RegCosts = Costs; 555 list<bit> AllowMoveElimination = AllowMoveElim; 556 int NumPhysRegs = numPhysRegs; 557 int MaxMovesEliminatedPerCycle = MaxMoveElimPerCy; 558 bit AllowZeroMoveEliminationOnly = AllowZeroMoveElimOnly; 559 SchedMachineModel SchedModel = ?; 560} 561 562// Describe the retire control unit. 563// A retire control unit specifies the size of the reorder buffer, as well as 564// the maximum number of opcodes that can be retired every cycle. 565// A value less-than-or-equal-to zero for field 'ReorderBufferSize' means: "the 566// size is unknown". The idea is that external tools can fall-back to using 567// field MicroOpBufferSize in SchedModel if the reorder buffer size is unknown. 568// A zero or negative value for field 'MaxRetirePerCycle' means "no 569// restrictions on the number of instructions retired per cycle". 570// Models can optionally specify up to one instance of RetireControlUnit per 571// scheduling model. 572class RetireControlUnit<int bufferSize, int retirePerCycle> { 573 int ReorderBufferSize = bufferSize; 574 int MaxRetirePerCycle = retirePerCycle; 575 SchedMachineModel SchedModel = ?; 576} 577 578// Base class for Load/StoreQueue. It is used to identify processor resources 579// which describe load/store queues in the LS unit. 580class MemoryQueue<ProcResourceKind PR> { 581 ProcResourceKind QueueDescriptor = PR; 582 SchedMachineModel SchedModel = ?; 583} 584 585class LoadQueue<ProcResourceKind LDQueue> : MemoryQueue<LDQueue>; 586class StoreQueue<ProcResourceKind STQueue> : MemoryQueue<STQueue>; 587 588// The target instruction that FusionPredicate will be evaluated on. 589class FusionTarget; 590def first_fusion_target : FusionTarget; 591def second_fusion_target : FusionTarget; 592def both_fusion_target : FusionTarget; 593 594// Base class of FusionPredicate, etc. The avaliable variables are: 595// * const TargetInstrInfo &TII 596// * const TargetSubtargetInfo &STI 597// * const MachineRegisterInfo &MRI 598// * const MachineInstr *FirstMI 599// * const MachineInstr &SecondMI 600class FusionPredicate<FusionTarget target> { 601 FusionTarget Target = target; 602} 603class FirstFusionPredicate: FusionPredicate<first_fusion_target>; 604class SecondFusionPredicate: FusionPredicate<second_fusion_target>; 605class BothFusionPredicate: FusionPredicate<both_fusion_target>; 606 607// FusionPredicate with raw code predicate. 608class FusionPredicateWithCode<code pred> : FusionPredicate<both_fusion_target> { 609 code Predicate = pred; 610} 611 612// FusionPredicate with MCInstPredicate. 613class FusionPredicateWithMCInstPredicate<FusionTarget target, MCInstPredicate pred> 614 : FusionPredicate<target> { 615 MCInstPredicate Predicate = pred; 616} 617class FirstFusionPredicateWithMCInstPredicate<MCInstPredicate pred> 618 : FusionPredicateWithMCInstPredicate<first_fusion_target, pred>; 619class SecondFusionPredicateWithMCInstPredicate<MCInstPredicate pred> 620 : FusionPredicateWithMCInstPredicate<second_fusion_target, pred>; 621// The pred will be applied on both firstMI and secondMI. 622class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred> 623 : FusionPredicateWithMCInstPredicate<both_fusion_target, pred>; 624 625// Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position 626// `firstOpIdx` should be the same as the operand of `SecondMI` at position 627// `secondOpIdx`. 628class TieReg<int firstOpIdx, int secondOpIdx> : BothFusionPredicate { 629 int FirstOpIdx = firstOpIdx; 630 int SecondOpIdx = secondOpIdx; 631} 632 633// A predicate for wildcard. The generated code will be like: 634// ``` 635// if (!FirstMI) 636// return ReturnValue; 637// ``` 638class WildcardPred<bit ret> : FirstFusionPredicate { 639 bit ReturnValue = ret; 640} 641def WildcardFalse : WildcardPred<0>; 642def WildcardTrue : WildcardPred<1>; 643 644// Indicates that the destination register of `FirstMI` should have one use if 645// it is a virtual register. 646class OneUsePred : FirstFusionPredicate; 647def OneUse : OneUsePred; 648 649// Handled by MacroFusionPredicatorEmitter backend. 650// The generated predicator will be like: 651// ``` 652// bool isNAME(const TargetInstrInfo &TII, 653// const TargetSubtargetInfo &STI, 654// const MachineInstr *FirstMI, 655// const MachineInstr &SecondMI) { 656// auto &MRI = SecondMI.getMF()->getRegInfo(); 657// /* Predicates */ 658// return true; 659// } 660// ``` 661class Fusion<string name, string fieldName, string desc, list<FusionPredicate> predicates> 662 : SubtargetFeature<name, fieldName, "true", desc> { 663 list<FusionPredicate> Predicates = predicates; 664} 665 666// The generated predicator will be like: 667// ``` 668// bool isNAME(const TargetInstrInfo &TII, 669// const TargetSubtargetInfo &STI, 670// const MachineInstr *FirstMI, 671// const MachineInstr &SecondMI) { 672// auto &MRI = SecondMI.getMF()->getRegInfo(); 673// /* Prolog */ 674// /* Predicate for `SecondMI` */ 675// /* Wildcard */ 676// /* Predicate for `FirstMI` */ 677// /* Check One Use */ 678// /* Tie registers */ 679// /* Epilog */ 680// return true; 681// } 682// ``` 683class SimpleFusion<string name, string fieldName, string desc, 684 MCInstPredicate firstPred, MCInstPredicate secondPred, 685 list<FusionPredicate> prolog = [], 686 list<FusionPredicate> epilog = []> 687 : Fusion<name, fieldName, desc, 688 !listconcat( 689 prolog, 690 [ 691 SecondFusionPredicateWithMCInstPredicate<secondPred>, 692 WildcardTrue, 693 FirstFusionPredicateWithMCInstPredicate<firstPred>, 694 SecondFusionPredicateWithMCInstPredicate< 695 CheckAny<[ 696 CheckIsVRegOperand<0>, 697 CheckSameRegOperand<0, 1> 698 ]>>, 699 OneUse, 700 TieReg<0, 1>, 701 ], 702 epilog)>; 703