1 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting the memory fold tables of 10 // the X86 backend instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenInstruction.h" 15 #include "CodeGenTarget.h" 16 #include "X86RecognizableInstr.h" 17 #include "llvm/Support/FormattedStream.h" 18 #include "llvm/Support/X86FoldTablesUtils.h" 19 #include "llvm/TableGen/Record.h" 20 #include "llvm/TableGen/TableGenBackend.h" 21 #include <set> 22 23 using namespace llvm; 24 using namespace X86Disassembler; 25 26 namespace { 27 // Represents an entry in the manual mapped instructions set. 28 struct ManualMapEntry { 29 const char *RegInstStr; 30 const char *MemInstStr; 31 uint16_t Strategy; 32 }; 33 34 // List of instructions requiring explicitly aligned memory. 35 const char *ExplicitAlign[] = { 36 "MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", 37 "MOVNTDQ", "MOVNTDQA", "SHA1MSG1", "SHA1MSG2", "SHA1NEXTE", 38 "SHA1RNDS4", "SHA256MSG1", "SHA256MSG2", "SHA256RNDS2"}; 39 40 // List of instructions NOT requiring explicit memory alignment. 41 const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD", 42 "PCMPESTRM", "PCMPESTRI", "PCMPISTRM", 43 "PCMPISTRI"}; 44 45 const ManualMapEntry ManualMapSet[] = { 46 #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS}, 47 #include "X86ManualFoldTables.def" 48 }; 49 50 const std::set<StringRef> NoFoldSet = { 51 #define NOFOLD(INSN) #INSN, 52 #include "X86ManualFoldTables.def" 53 }; 54 55 static bool isExplicitAlign(const CodeGenInstruction *Inst) { 56 return any_of(ExplicitAlign, [Inst](const char *InstStr) { 57 return Inst->TheDef->getName().contains(InstStr); 58 }); 59 } 60 61 static bool isExplicitUnalign(const CodeGenInstruction *Inst) { 62 return any_of(ExplicitUnalign, [Inst](const char *InstStr) { 63 return Inst->TheDef->getName().contains(InstStr); 64 }); 65 } 66 67 class X86FoldTablesEmitter { 68 RecordKeeper &Records; 69 CodeGenTarget Target; 70 71 // Represents an entry in the folding table 72 class X86FoldTableEntry { 73 const CodeGenInstruction *RegInst; 74 const CodeGenInstruction *MemInst; 75 76 public: 77 bool NoReverse = false; 78 bool NoForward = false; 79 bool FoldLoad = false; 80 bool FoldStore = false; 81 enum BcastType { 82 BCAST_NONE, 83 BCAST_D, 84 BCAST_Q, 85 BCAST_SS, 86 BCAST_SD, 87 BCAST_SH, 88 }; 89 BcastType BroadcastKind = BCAST_NONE; 90 91 Align Alignment; 92 93 X86FoldTableEntry() = default; 94 X86FoldTableEntry(const CodeGenInstruction *RegInst, 95 const CodeGenInstruction *MemInst) 96 : RegInst(RegInst), MemInst(MemInst) {} 97 98 void print(formatted_raw_ostream &OS) const { 99 OS.indent(2); 100 OS << "{X86::" << RegInst->TheDef->getName() << ", "; 101 OS << "X86::" << MemInst->TheDef->getName() << ", "; 102 103 std::string Attrs; 104 if (FoldLoad) 105 Attrs += "TB_FOLDED_LOAD|"; 106 if (FoldStore) 107 Attrs += "TB_FOLDED_STORE|"; 108 if (NoReverse) 109 Attrs += "TB_NO_REVERSE|"; 110 if (NoForward) 111 Attrs += "TB_NO_FORWARD|"; 112 if (Alignment != Align(1)) 113 Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|"; 114 switch (BroadcastKind) { 115 case BCAST_NONE: 116 break; 117 case BCAST_D: 118 Attrs += "TB_BCAST_D|"; 119 break; 120 case BCAST_Q: 121 Attrs += "TB_BCAST_Q|"; 122 break; 123 case BCAST_SS: 124 Attrs += "TB_BCAST_SS|"; 125 break; 126 case BCAST_SD: 127 Attrs += "TB_BCAST_SD|"; 128 break; 129 case BCAST_SH: 130 Attrs += "TB_BCAST_SH|"; 131 break; 132 } 133 134 StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|"); 135 if (SimplifiedAttrs.empty()) 136 SimplifiedAttrs = "0"; 137 138 OS << SimplifiedAttrs << "},\n"; 139 } 140 141 #ifndef NDEBUG 142 // Check that Uses and Defs are same after memory fold. 143 void checkCorrectness() const { 144 auto &RegInstRec = *RegInst->TheDef; 145 auto &MemInstRec = *MemInst->TheDef; 146 auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses"); 147 auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses"); 148 auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs"); 149 auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs"); 150 if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem) 151 report_fatal_error("Uses/Defs couldn't be changed after folding " + 152 RegInstRec.getName() + " to " + 153 MemInstRec.getName()); 154 } 155 #endif 156 }; 157 158 // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the 159 // enum of the instruction, which is computed in 160 // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator 161 // here. 162 // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum? 163 struct CompareInstrsByEnum { 164 bool operator()(const CodeGenInstruction *LHS, 165 const CodeGenInstruction *RHS) const { 166 assert(LHS && RHS && "LHS and RHS shouldn't be nullptr"); 167 const auto &D1 = *LHS->TheDef; 168 const auto &D2 = *RHS->TheDef; 169 return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < 170 std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); 171 } 172 }; 173 174 typedef std::map<const CodeGenInstruction *, X86FoldTableEntry, 175 CompareInstrsByEnum> 176 FoldTable; 177 // Table2Addr - Holds instructions which their memory form performs 178 // load+store. 179 // 180 // Table#i - Holds instructions which the their memory form 181 // performs a load OR a store, and their #i'th operand is folded. 182 // 183 // BroadcastTable#i - Holds instructions which the their memory form performs 184 // a broadcast load and their #i'th operand is folded. 185 FoldTable Table2Addr; 186 FoldTable Table0; 187 FoldTable Table1; 188 FoldTable Table2; 189 FoldTable Table3; 190 FoldTable Table4; 191 FoldTable BroadcastTable1; 192 FoldTable BroadcastTable2; 193 FoldTable BroadcastTable3; 194 FoldTable BroadcastTable4; 195 196 public: 197 X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} 198 199 // run - Generate the 6 X86 memory fold tables. 200 void run(raw_ostream &OS); 201 202 private: 203 // Decides to which table to add the entry with the given instructions. 204 // S sets the strategy of adding the TB_NO_REVERSE flag. 205 void updateTables(const CodeGenInstruction *RegInst, 206 const CodeGenInstruction *MemInst, uint16_t S = 0, 207 bool IsManual = false, bool IsBroadcast = false); 208 209 // Generates X86FoldTableEntry with the given instructions and fill it with 210 // the appropriate flags, then adds it to a memory fold table. 211 void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst, 212 const CodeGenInstruction *MemInst, uint16_t S, 213 unsigned FoldedIdx, bool IsManual); 214 // Generates X86FoldTableEntry with the given instructions and adds it to a 215 // broadcast table. 216 void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst, 217 const CodeGenInstruction *MemInst); 218 219 // Print the given table as a static const C++ array of type 220 // X86FoldTableEntry. 221 void printTable(const FoldTable &Table, StringRef TableName, 222 formatted_raw_ostream &OS) { 223 OS << "static const X86FoldTableEntry " << TableName << "[] = {\n"; 224 225 for (auto &E : Table) 226 E.second.print(OS); 227 228 OS << "};\n\n"; 229 } 230 }; 231 232 // Return true if one of the instruction's operands is a RST register class 233 static bool hasRSTRegClass(const CodeGenInstruction *Inst) { 234 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 235 return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi"; 236 }); 237 } 238 239 // Return true if one of the instruction's operands is a ptr_rc_tailcall 240 static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) { 241 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 242 return OpIn.Rec->getName() == "ptr_rc_tailcall"; 243 }); 244 } 245 246 static uint8_t byteFromBitsInit(const BitsInit *B) { 247 unsigned N = B->getNumBits(); 248 assert(N <= 8 && "Field is too large for uint8_t!"); 249 250 uint8_t Value = 0; 251 for (unsigned I = 0; I != N; ++I) { 252 BitInit *Bit = cast<BitInit>(B->getBit(I)); 253 Value |= Bit->getValue() << I; 254 } 255 return Value; 256 } 257 258 static bool mayFoldFromForm(uint8_t Form) { 259 switch (Form) { 260 default: 261 return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r; 262 case X86Local::MRMXr: 263 case X86Local::MRMXrCC: 264 case X86Local::MRMDestReg: 265 case X86Local::MRMSrcReg: 266 case X86Local::MRMSrcReg4VOp3: 267 case X86Local::MRMSrcRegOp4: 268 case X86Local::MRMSrcRegCC: 269 return true; 270 } 271 } 272 273 static bool mayFoldToForm(uint8_t Form) { 274 switch (Form) { 275 default: 276 return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m; 277 case X86Local::MRMXm: 278 case X86Local::MRMXmCC: 279 case X86Local::MRMDestMem: 280 case X86Local::MRMSrcMem: 281 case X86Local::MRMSrcMem4VOp3: 282 case X86Local::MRMSrcMemOp4: 283 case X86Local::MRMSrcMemCC: 284 return true; 285 } 286 } 287 288 static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) { 289 switch (LHS) { 290 default: 291 llvm_unreachable("Unexpected Form!"); 292 case X86Local::MRM0r: 293 return RHS == X86Local::MRM0m; 294 case X86Local::MRM1r: 295 return RHS == X86Local::MRM1m; 296 case X86Local::MRM2r: 297 return RHS == X86Local::MRM2m; 298 case X86Local::MRM3r: 299 return RHS == X86Local::MRM3m; 300 case X86Local::MRM4r: 301 return RHS == X86Local::MRM4m; 302 case X86Local::MRM5r: 303 return RHS == X86Local::MRM5m; 304 case X86Local::MRM6r: 305 return RHS == X86Local::MRM6m; 306 case X86Local::MRM7r: 307 return RHS == X86Local::MRM7m; 308 case X86Local::MRMXr: 309 return RHS == X86Local::MRMXm; 310 case X86Local::MRMXrCC: 311 return RHS == X86Local::MRMXmCC; 312 case X86Local::MRMDestReg: 313 return RHS == X86Local::MRMDestMem; 314 case X86Local::MRMSrcReg: 315 return RHS == X86Local::MRMSrcMem; 316 case X86Local::MRMSrcReg4VOp3: 317 return RHS == X86Local::MRMSrcMem4VOp3; 318 case X86Local::MRMSrcRegOp4: 319 return RHS == X86Local::MRMSrcMemOp4; 320 case X86Local::MRMSrcRegCC: 321 return RHS == X86Local::MRMSrcMemCC; 322 } 323 } 324 325 static bool isNOREXRegClass(const Record *Op) { 326 return Op->getName().contains("_NOREX"); 327 } 328 329 // Function object - Operator() returns true if the given Reg instruction 330 // matches the Mem instruction of this object. 331 class IsMatch { 332 const CodeGenInstruction *MemInst; 333 const X86Disassembler::RecognizableInstrBase MemRI; 334 bool IsBroadcast; 335 const unsigned Variant; 336 337 public: 338 IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V) 339 : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {} 340 341 bool operator()(const CodeGenInstruction *RegInst) { 342 X86Disassembler::RecognizableInstrBase RegRI(*RegInst); 343 const Record *RegRec = RegInst->TheDef; 344 const Record *MemRec = MemInst->TheDef; 345 346 // EVEX_B means different things for memory and register forms. 347 // register form: rounding control or SAE 348 // memory form: broadcast 349 if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B)) 350 return false; 351 if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B)) 352 return false; 353 354 if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form)) 355 return false; 356 357 // X86 encoding is crazy, e.g 358 // 359 // f3 0f c7 30 vmxon (%rax) 360 // f3 0f c7 f0 senduipi %rax 361 // 362 // This two instruction have similiar encoding fields but are unrelated 363 if (X86Disassembler::getMnemonic(MemInst, Variant) != 364 X86Disassembler::getMnemonic(RegInst, Variant)) 365 return false; 366 367 // Return false if any of the following fields of does not match. 368 if (std::make_tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, 369 RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, 370 RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L, 371 RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, 372 RegRI.HasEVEX_L2, RegRec->getValueAsBit("hasEVEX_RC"), 373 RegRec->getValueAsBit("hasLockPrefix"), 374 RegRec->getValueAsBit("hasNoTrackPrefix"), 375 RegRec->getValueAsBit("EVEX_W1_VEX_W0")) != 376 std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, 377 MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, 378 MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L, 379 MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, 380 MemRI.HasEVEX_L2, MemRec->getValueAsBit("hasEVEX_RC"), 381 MemRec->getValueAsBit("hasLockPrefix"), 382 MemRec->getValueAsBit("hasNoTrackPrefix"), 383 MemRec->getValueAsBit("EVEX_W1_VEX_W0"))) 384 return false; 385 386 // Make sure the sizes of the operands of both instructions suit each other. 387 // This is needed for instructions with intrinsic version (_Int). 388 // Where the only difference is the size of the operands. 389 // For example: VUCOMISDZrm and VUCOMISDrm_Int 390 // Also for instructions that their EVEX version was upgraded to work with 391 // k-registers. For example VPCMPEQBrm (xmm output register) and 392 // VPCMPEQBZ128rm (k register output register). 393 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 394 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 395 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 396 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 397 398 // Instructions with one output in their memory form use the memory folded 399 // operand as source and destination (Read-Modify-Write). 400 unsigned RegStartIdx = 401 (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0; 402 403 bool FoundFoldedOp = false; 404 for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) { 405 Record *MemOpRec = MemInst->Operands[I].Rec; 406 Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec; 407 408 if (MemOpRec == RegOpRec) 409 continue; 410 411 if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) && 412 ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) || 413 (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec)))) 414 return false; 415 416 if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) && 417 (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec))) 418 return false; 419 420 if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) && 421 (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type"))) 422 return false; 423 424 // Only one operand can be folded. 425 if (FoundFoldedOp) 426 return false; 427 428 assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)); 429 FoundFoldedOp = true; 430 } 431 432 return FoundFoldedOp; 433 } 434 }; 435 436 } // end anonymous namespace 437 438 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table, 439 const CodeGenInstruction *RegInst, 440 const CodeGenInstruction *MemInst, 441 uint16_t S, unsigned FoldedIdx, 442 bool IsManual) { 443 444 assert((IsManual || Table.find(RegInst) == Table.end()) && 445 "Override entry unexpectedly"); 446 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 447 Record *RegRec = RegInst->TheDef; 448 Record *MemRec = MemInst->TheDef; 449 450 Result.NoReverse = S & TB_NO_REVERSE; 451 Result.NoForward = S & TB_NO_FORWARD; 452 Result.FoldLoad = S & TB_FOLDED_LOAD; 453 Result.FoldStore = S & TB_FOLDED_STORE; 454 Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)); 455 if (IsManual) { 456 Table[RegInst] = Result; 457 return; 458 } 459 460 // Only table0 entries should explicitly specify a load or store flag. 461 if (&Table == &Table0) { 462 unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 463 unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 464 // If the instruction writes to the folded operand, it will appear as an 465 // output in the register form instruction and as an input in the memory 466 // form instruction. 467 // If the instruction reads from the folded operand, it well appear as in 468 // input in both forms. 469 if (MemInOpsNum == RegInOpsNum) 470 Result.FoldLoad = true; 471 else 472 Result.FoldStore = true; 473 } 474 475 Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec; 476 Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec; 477 478 // Unfolding code generates a load/store instruction according to the size of 479 // the register in the register form instruction. 480 // If the register's size is greater than the memory's operand size, do not 481 // allow unfolding. 482 483 // the unfolded load size will be based on the register size. If that’s bigger 484 // than the memory operand size, the unfolded load will load more memory and 485 // potentially cause a memory fault. 486 if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) 487 Result.NoReverse = true; 488 489 // Check no-kz version's isMoveReg 490 StringRef RegInstName = RegRec->getName(); 491 unsigned DropLen = 492 RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0); 493 Record *BaseDef = 494 DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr; 495 bool IsMoveReg = 496 BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg; 497 // A masked load can not be unfolded to a full load, otherwise it would access 498 // unexpected memory. A simple store can not be unfolded. 499 if (IsMoveReg && (BaseDef || Result.FoldStore)) 500 Result.NoReverse = true; 501 502 uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits")); 503 if (isExplicitAlign(RegInst)) { 504 // The instruction require explicitly aligned memory. 505 BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize"); 506 Result.Alignment = Align(byteFromBitsInit(VectSize)); 507 } else if (!Enc && !isExplicitUnalign(RegInst) && 508 getMemOperandSize(MemOpRec) > 64) { 509 // Instructions with XOP/VEX/EVEX encoding do not require alignment while 510 // SSE packed vector instructions require a 16 byte alignment. 511 Result.Alignment = Align(16); 512 } 513 // Expand is only ever created as a masked instruction. It is not safe to 514 // unfold a masked expand because we don't know if it came from an expand load 515 // intrinsic or folding a plain load. If it is from a expand load intrinsic, 516 // Unfolding to plain load would read more elements and could trigger a fault. 517 if (RegRec->getName().contains("EXPAND")) 518 Result.NoReverse = true; 519 520 Table[RegInst] = Result; 521 } 522 523 void X86FoldTablesEmitter::addBroadcastEntry( 524 FoldTable &Table, const CodeGenInstruction *RegInst, 525 const CodeGenInstruction *MemInst) { 526 527 assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly"); 528 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 529 530 Record *RegRec = RegInst->TheDef; 531 StringRef RegInstName = RegRec->getName(); 532 StringRef MemInstName = MemInst->TheDef->getName(); 533 Record *Domain = RegRec->getValueAsDef("ExeDomain"); 534 bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt"; 535 if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") || 536 RegInstName.contains("Dr") || RegInstName.contains("I32")) && 537 IsSSEPackedInt) { 538 assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") || 539 MemInstName.contains("Dr") || MemInstName.contains("I32")) && 540 "Unmatched names for broadcast"); 541 Result.BroadcastKind = X86FoldTableEntry::BCAST_D; 542 } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") || 543 RegInstName.contains("Qr") || RegInstName.contains("I64")) && 544 IsSSEPackedInt) { 545 assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") || 546 MemInstName.contains("Qr") || MemInstName.contains("I64")) && 547 "Unmatched names for broadcast"); 548 Result.BroadcastKind = X86FoldTableEntry::BCAST_Q; 549 } else if ((RegInstName.contains("PS") || RegInstName.contains("F32") || 550 RegInstName.contains("CPH")) && 551 !RegInstName.contains("PH2PS")) { 552 assert((MemInstName.contains("PS") || MemInstName.contains("F32") || 553 MemInstName.contains("CPH")) && 554 "Unmatched names for broadcast"); 555 Result.BroadcastKind = X86FoldTableEntry::BCAST_SS; 556 } else if ((RegInstName.contains("PD") || RegInstName.contains("F64")) && 557 !RegInstName.contains("PH2PD")) { 558 assert((MemInstName.contains("PD") || MemInstName.contains("F64")) && 559 "Unmatched names for broadcast"); 560 Result.BroadcastKind = X86FoldTableEntry::BCAST_SD; 561 } else if (RegInstName.contains("PH")) { 562 assert(MemInstName.contains("PH") && "Unmatched names for broadcast"); 563 Result.BroadcastKind = X86FoldTableEntry::BCAST_SH; 564 } else { 565 errs() << RegInstName << ", " << MemInstName << "\n"; 566 llvm_unreachable("Name is not canoicalized for broadcast or " 567 "ExeDomain is incorrect"); 568 } 569 570 Table[RegInst] = Result; 571 } 572 573 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst, 574 const CodeGenInstruction *MemInst, 575 uint16_t S, bool IsManual, 576 bool IsBroadcast) { 577 578 Record *RegRec = RegInst->TheDef; 579 Record *MemRec = MemInst->TheDef; 580 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 581 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 582 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 583 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 584 585 // Instructions which Read-Modify-Write should be added to Table2Addr. 586 if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) { 587 assert(!IsBroadcast && "Read-Modify-Write can not be broadcast"); 588 // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE. 589 addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0, 590 IsManual); 591 return; 592 } 593 594 if (MemInSize == RegInSize && MemOutSize == RegOutSize) { 595 // Load-Folding cases. 596 // If the i'th register form operand is a register and the i'th memory form 597 // operand is a memory operand, add instructions to Table#i. 598 for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) { 599 Record *RegOpRec = RegInst->Operands[I].Rec; 600 Record *MemOpRec = MemInst->Operands[I].Rec; 601 // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64, 602 // TAILJMPr64_REX 603 if ((isRegisterOperand(RegOpRec) || 604 RegOpRec->isSubClassOf("PointerLikeRegClass")) && 605 isMemoryOperand(MemOpRec)) { 606 switch (I) { 607 case 0: 608 assert(!IsBroadcast && "BroadcastTable0 needs to be added"); 609 addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual); 610 return; 611 case 1: 612 IsBroadcast 613 ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst) 614 : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual); 615 return; 616 case 2: 617 IsBroadcast 618 ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst) 619 : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual); 620 return; 621 case 3: 622 IsBroadcast 623 ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst) 624 : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual); 625 return; 626 case 4: 627 IsBroadcast 628 ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst) 629 : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual); 630 return; 631 } 632 } 633 } 634 } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) { 635 // Store-Folding cases. 636 // If the memory form instruction performs a store, the *output* 637 // register of the register form instructions disappear and instead a 638 // memory *input* operand appears in the memory form instruction. 639 // For example: 640 // MOVAPSrr => (outs VR128:$dst), (ins VR128:$src) 641 // MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src) 642 Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec; 643 Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec; 644 if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) && 645 getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) { 646 assert(!IsBroadcast && "Store can not be broadcast"); 647 addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual); 648 } 649 } 650 } 651 652 void X86FoldTablesEmitter::run(raw_ostream &O) { 653 formatted_raw_ostream OS(O); 654 655 // Holds all memory instructions 656 std::vector<const CodeGenInstruction *> MemInsts; 657 // Holds all register instructions - divided according to opcode. 658 std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts; 659 660 ArrayRef<const CodeGenInstruction *> NumberedInstructions = 661 Target.getInstructionsByEnumValue(); 662 663 for (const CodeGenInstruction *Inst : NumberedInstructions) { 664 const Record *Rec = Inst->TheDef; 665 if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly")) 666 continue; 667 668 if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end()) 669 continue; 670 671 // - Instructions including RST register class operands are not relevant 672 // for memory folding (for further details check the explanation in 673 // lib/Target/X86/X86InstrFPStack.td file). 674 // - Some instructions (listed in the manual map above) use the register 675 // class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure 676 // safe mapping of these instruction we manually map them and exclude 677 // them from the automation. 678 if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst)) 679 continue; 680 681 // Add all the memory form instructions to MemInsts, and all the register 682 // form instructions to RegInsts[Opc], where Opc is the opcode of each 683 // instructions. this helps reducing the runtime of the backend. 684 const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits"); 685 uint8_t Form = byteFromBitsInit(FormBits); 686 if (mayFoldToForm(Form)) 687 MemInsts.push_back(Inst); 688 else if (mayFoldFromForm(Form)) { 689 uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode")); 690 RegInsts[Opc].push_back(Inst); 691 } 692 } 693 694 // Create a copy b/c the register instruction will removed when a new entry is 695 // added into memory fold tables. 696 auto RegInstsForBroadcast = RegInsts; 697 698 Record *AsmWriter = Target.getAsmWriter(); 699 unsigned Variant = AsmWriter->getValueAsInt("Variant"); 700 auto FixUp = [&](const CodeGenInstruction *RegInst) { 701 StringRef RegInstName = RegInst->TheDef->getName(); 702 if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt")) 703 if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4))) 704 RegInst = &Target.getInstruction(RegAltRec); 705 return RegInst; 706 }; 707 // For each memory form instruction, try to find its register form 708 // instruction. 709 for (const CodeGenInstruction *MemInst : MemInsts) { 710 uint8_t Opc = 711 byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode")); 712 713 auto RegInstsIt = RegInsts.find(Opc); 714 if (RegInstsIt == RegInsts.end()) 715 continue; 716 717 // Two forms (memory & register) of the same instruction must have the same 718 // opcode. 719 std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second; 720 721 // Memory fold tables 722 auto Match = 723 find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant)); 724 if (Match != OpcRegInsts.end()) { 725 updateTables(FixUp(*Match), MemInst); 726 OpcRegInsts.erase(Match); 727 } 728 729 // Broadcast tables 730 StringRef MemInstName = MemInst->TheDef->getName(); 731 if (!MemInstName.contains("mb") && !MemInstName.contains("mib")) 732 continue; 733 RegInstsIt = RegInstsForBroadcast.find(Opc); 734 assert(RegInstsIt != RegInstsForBroadcast.end() && 735 "Unexpected control flow"); 736 std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast = 737 RegInstsIt->second; 738 Match = find_if(OpcRegInstsForBroadcast, 739 IsMatch(MemInst, /*IsBroadcast=*/true, Variant)); 740 if (Match != OpcRegInstsForBroadcast.end()) { 741 updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false, 742 /*IsBroadcast=*/true); 743 OpcRegInstsForBroadcast.erase(Match); 744 } 745 } 746 747 // Add the manually mapped instructions listed above. 748 for (const ManualMapEntry &Entry : ManualMapSet) { 749 Record *RegInstIter = Records.getDef(Entry.RegInstStr); 750 Record *MemInstIter = Records.getDef(Entry.MemInstStr); 751 752 updateTables(&(Target.getInstruction(RegInstIter)), 753 &(Target.getInstruction(MemInstIter)), Entry.Strategy, true); 754 } 755 756 #ifndef NDEBUG 757 auto CheckMemFoldTable = [](const FoldTable &Table) -> void { 758 for (const auto &Record : Table) { 759 auto &FoldEntry = Record.second; 760 FoldEntry.checkCorrectness(); 761 } 762 }; 763 CheckMemFoldTable(Table2Addr); 764 CheckMemFoldTable(Table0); 765 CheckMemFoldTable(Table1); 766 CheckMemFoldTable(Table2); 767 CheckMemFoldTable(Table3); 768 CheckMemFoldTable(Table4); 769 CheckMemFoldTable(BroadcastTable1); 770 CheckMemFoldTable(BroadcastTable2); 771 CheckMemFoldTable(BroadcastTable3); 772 CheckMemFoldTable(BroadcastTable4); 773 #endif 774 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS); 775 // Print all tables. 776 PRINT_TABLE(Table2Addr) 777 PRINT_TABLE(Table0) 778 PRINT_TABLE(Table1) 779 PRINT_TABLE(Table2) 780 PRINT_TABLE(Table3) 781 PRINT_TABLE(Table4) 782 PRINT_TABLE(BroadcastTable1) 783 PRINT_TABLE(BroadcastTable2) 784 PRINT_TABLE(BroadcastTable3) 785 PRINT_TABLE(BroadcastTable4) 786 } 787 788 static TableGen::Emitter::OptClass<X86FoldTablesEmitter> 789 X("gen-x86-fold-tables", "Generate X86 fold tables"); 790