1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "../Target.h"
9 
10 #include "../Error.h"
11 #include "../ParallelSnippetGenerator.h"
12 #include "../SerialSnippetGenerator.h"
13 #include "../SnippetGenerator.h"
14 #include "MCTargetDesc/X86BaseInfo.h"
15 #include "MCTargetDesc/X86MCTargetDesc.h"
16 #include "X86.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/Sequence.h"
20 #include "llvm/MC/MCInstBuilder.h"
21 #include "llvm/Support/FormatVariadic.h"
22 
23 namespace llvm {
24 namespace exegesis {
25 
26 // Returns a non-null reason if we cannot handle the memory references in this
27 // instruction.
isInvalidMemoryInstr(const Instruction & Instr)28 static const char *isInvalidMemoryInstr(const Instruction &Instr) {
29   switch (Instr.Description.TSFlags & X86II::FormMask) {
30   default:
31     llvm_unreachable("Unknown FormMask value");
32   // These have no memory access.
33   case X86II::Pseudo:
34   case X86II::RawFrm:
35   case X86II::AddCCFrm:
36   case X86II::PrefixByte:
37   case X86II::MRMDestReg:
38   case X86II::MRMSrcReg:
39   case X86II::MRMSrcReg4VOp3:
40   case X86II::MRMSrcRegOp4:
41   case X86II::MRMSrcRegCC:
42   case X86II::MRMXrCC:
43   case X86II::MRMr0:
44   case X86II::MRMXr:
45   case X86II::MRM0r:
46   case X86II::MRM1r:
47   case X86II::MRM2r:
48   case X86II::MRM3r:
49   case X86II::MRM4r:
50   case X86II::MRM5r:
51   case X86II::MRM6r:
52   case X86II::MRM7r:
53   case X86II::MRM0X:
54   case X86II::MRM1X:
55   case X86II::MRM2X:
56   case X86II::MRM3X:
57   case X86II::MRM4X:
58   case X86II::MRM5X:
59   case X86II::MRM6X:
60   case X86II::MRM7X:
61   case X86II::MRM_C0:
62   case X86II::MRM_C1:
63   case X86II::MRM_C2:
64   case X86II::MRM_C3:
65   case X86II::MRM_C4:
66   case X86II::MRM_C5:
67   case X86II::MRM_C6:
68   case X86II::MRM_C7:
69   case X86II::MRM_C8:
70   case X86II::MRM_C9:
71   case X86II::MRM_CA:
72   case X86II::MRM_CB:
73   case X86II::MRM_CC:
74   case X86II::MRM_CD:
75   case X86II::MRM_CE:
76   case X86II::MRM_CF:
77   case X86II::MRM_D0:
78   case X86II::MRM_D1:
79   case X86II::MRM_D2:
80   case X86II::MRM_D3:
81   case X86II::MRM_D4:
82   case X86II::MRM_D5:
83   case X86II::MRM_D6:
84   case X86II::MRM_D7:
85   case X86II::MRM_D8:
86   case X86II::MRM_D9:
87   case X86II::MRM_DA:
88   case X86II::MRM_DB:
89   case X86II::MRM_DC:
90   case X86II::MRM_DD:
91   case X86II::MRM_DE:
92   case X86II::MRM_DF:
93   case X86II::MRM_E0:
94   case X86II::MRM_E1:
95   case X86II::MRM_E2:
96   case X86II::MRM_E3:
97   case X86II::MRM_E4:
98   case X86II::MRM_E5:
99   case X86II::MRM_E6:
100   case X86II::MRM_E7:
101   case X86II::MRM_E8:
102   case X86II::MRM_E9:
103   case X86II::MRM_EA:
104   case X86II::MRM_EB:
105   case X86II::MRM_EC:
106   case X86II::MRM_ED:
107   case X86II::MRM_EE:
108   case X86II::MRM_EF:
109   case X86II::MRM_F0:
110   case X86II::MRM_F1:
111   case X86II::MRM_F2:
112   case X86II::MRM_F3:
113   case X86II::MRM_F4:
114   case X86II::MRM_F5:
115   case X86II::MRM_F6:
116   case X86II::MRM_F7:
117   case X86II::MRM_F8:
118   case X86II::MRM_F9:
119   case X86II::MRM_FA:
120   case X86II::MRM_FB:
121   case X86II::MRM_FC:
122   case X86II::MRM_FD:
123   case X86II::MRM_FE:
124   case X86II::MRM_FF:
125   case X86II::RawFrmImm8:
126     return nullptr;
127   case X86II::AddRegFrm:
128     return (Instr.Description.Opcode == X86::POP16r ||
129             Instr.Description.Opcode == X86::POP32r ||
130             Instr.Description.Opcode == X86::PUSH16r ||
131             Instr.Description.Opcode == X86::PUSH32r)
132                ? "unsupported opcode: unsupported memory access"
133                : nullptr;
134   // These access memory and are handled.
135   case X86II::MRMDestMem:
136   case X86II::MRMSrcMem:
137   case X86II::MRMSrcMem4VOp3:
138   case X86II::MRMSrcMemOp4:
139   case X86II::MRMSrcMemCC:
140   case X86II::MRMXmCC:
141   case X86II::MRMXm:
142   case X86II::MRM0m:
143   case X86II::MRM1m:
144   case X86II::MRM2m:
145   case X86II::MRM3m:
146   case X86II::MRM4m:
147   case X86II::MRM5m:
148   case X86II::MRM6m:
149   case X86II::MRM7m:
150     return nullptr;
151   // These access memory and are not handled yet.
152   case X86II::RawFrmImm16:
153   case X86II::RawFrmMemOffs:
154   case X86II::RawFrmSrc:
155   case X86II::RawFrmDst:
156   case X86II::RawFrmDstSrc:
157     return "unsupported opcode: non uniform memory access";
158   }
159 }
160 
161 // If the opcode is invalid, returns a pointer to a character literal indicating
162 // the reason. nullptr indicates a valid opcode.
isInvalidOpcode(const Instruction & Instr)163 static const char *isInvalidOpcode(const Instruction &Instr) {
164   const auto OpcodeName = Instr.Name;
165   if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo)
166     return "unsupported opcode: pseudo instruction";
167   if (OpcodeName.startswith("POP") || OpcodeName.startswith("PUSH") ||
168       OpcodeName.startswith("ADJCALLSTACK") || OpcodeName.startswith("LEAVE"))
169     return "unsupported opcode: Push/Pop/AdjCallStack/Leave";
170   if (const auto reason = isInvalidMemoryInstr(Instr))
171     return reason;
172   // We do not handle instructions with OPERAND_PCREL.
173   for (const Operand &Op : Instr.Operands)
174     if (Op.isExplicit() &&
175         Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL)
176       return "unsupported opcode: PC relative operand";
177   // We do not handle second-form X87 instructions. We only handle first-form
178   // ones (_Fp), see comment in X86InstrFPStack.td.
179   for (const Operand &Op : Instr.Operands)
180     if (Op.isReg() && Op.isExplicit() &&
181         Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID)
182       return "unsupported second-form X87 instruction";
183   return nullptr;
184 }
185 
getX86FPFlags(const Instruction & Instr)186 static unsigned getX86FPFlags(const Instruction &Instr) {
187   return Instr.Description.TSFlags & X86II::FPTypeMask;
188 }
189 
190 // Helper to fill a memory operand with a value.
setMemOp(InstructionTemplate & IT,int OpIdx,const MCOperand & OpVal)191 static void setMemOp(InstructionTemplate &IT, int OpIdx,
192                      const MCOperand &OpVal) {
193   const auto Op = IT.getInstr().Operands[OpIdx];
194   assert(Op.isExplicit() && "invalid memory pattern");
195   IT.getValueFor(Op) = OpVal;
196 }
197 
198 // Common (latency, uops) code for LEA templates. `GetDestReg` takes the
199 // addressing base and index registers and returns the LEA destination register.
generateLEATemplatesCommon(const Instruction & Instr,const BitVector & ForbiddenRegisters,const LLVMState & State,const SnippetGenerator::Options & Opts,std::function<void (unsigned,unsigned,BitVector & CandidateDestRegs)> RestrictDestRegs)200 static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
201     const Instruction &Instr, const BitVector &ForbiddenRegisters,
202     const LLVMState &State, const SnippetGenerator::Options &Opts,
203     std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)>
204         RestrictDestRegs) {
205   assert(Instr.Operands.size() == 6 && "invalid LEA");
206   assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 &&
207          "invalid LEA");
208 
209   constexpr const int kDestOp = 0;
210   constexpr const int kBaseOp = 1;
211   constexpr const int kIndexOp = 3;
212   auto PossibleDestRegs =
213       Instr.Operands[kDestOp].getRegisterAliasing().sourceBits();
214   remove(PossibleDestRegs, ForbiddenRegisters);
215   auto PossibleBaseRegs =
216       Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits();
217   remove(PossibleBaseRegs, ForbiddenRegisters);
218   auto PossibleIndexRegs =
219       Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits();
220   remove(PossibleIndexRegs, ForbiddenRegisters);
221 
222   const auto &RegInfo = State.getRegInfo();
223   std::vector<CodeTemplate> Result;
224   for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) {
225     for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) {
226       for (int LogScale = 0; LogScale <= 3; ++LogScale) {
227         // FIXME: Add an option for controlling how we explore immediates.
228         for (const int Disp : {0, 42}) {
229           InstructionTemplate IT(&Instr);
230           const int64_t Scale = 1ull << LogScale;
231           setMemOp(IT, 1, MCOperand::createReg(BaseReg));
232           setMemOp(IT, 2, MCOperand::createImm(Scale));
233           setMemOp(IT, 3, MCOperand::createReg(IndexReg));
234           setMemOp(IT, 4, MCOperand::createImm(Disp));
235           // SegmentReg must be 0 for LEA.
236           setMemOp(IT, 5, MCOperand::createReg(0));
237 
238           // Output reg candidates are selected by the caller.
239           auto PossibleDestRegsNow = PossibleDestRegs;
240           RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow);
241           assert(PossibleDestRegsNow.set_bits().begin() !=
242                      PossibleDestRegsNow.set_bits().end() &&
243                  "no remaining registers");
244           setMemOp(
245               IT, 0,
246               MCOperand::createReg(*PossibleDestRegsNow.set_bits().begin()));
247 
248           CodeTemplate CT;
249           CT.Instructions.push_back(std::move(IT));
250           CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg),
251                               RegInfo.getName(IndexReg), Scale, Disp)
252                           .str();
253           Result.push_back(std::move(CT));
254           if (Result.size() >= Opts.MaxConfigsPerOpcode)
255             return std::move(Result);
256         }
257       }
258     }
259   }
260 
261   return std::move(Result);
262 }
263 
264 namespace {
265 class X86SerialSnippetGenerator : public SerialSnippetGenerator {
266 public:
267   using SerialSnippetGenerator::SerialSnippetGenerator;
268 
269   Expected<std::vector<CodeTemplate>>
270   generateCodeTemplates(InstructionTemplate Variant,
271                         const BitVector &ForbiddenRegisters) const override;
272 };
273 } // namespace
274 
275 Expected<std::vector<CodeTemplate>>
generateCodeTemplates(InstructionTemplate Variant,const BitVector & ForbiddenRegisters) const276 X86SerialSnippetGenerator::generateCodeTemplates(
277     InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
278   const Instruction &Instr = Variant.getInstr();
279 
280   if (const auto reason = isInvalidOpcode(Instr))
281     return make_error<Failure>(reason);
282 
283   // LEA gets special attention.
284   const auto Opcode = Instr.Description.getOpcode();
285   if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
286     return generateLEATemplatesCommon(
287         Instr, ForbiddenRegisters, State, Opts,
288         [this](unsigned BaseReg, unsigned IndexReg,
289                BitVector &CandidateDestRegs) {
290           // We just select a destination register that aliases the base
291           // register.
292           CandidateDestRegs &=
293               State.getRATC().getRegister(BaseReg).aliasedBits();
294         });
295   }
296 
297   if (Instr.hasMemoryOperands())
298     return make_error<Failure>(
299         "unsupported memory operand in latency measurements");
300 
301   switch (getX86FPFlags(Instr)) {
302   case X86II::NotFP:
303     return SerialSnippetGenerator::generateCodeTemplates(Variant,
304                                                          ForbiddenRegisters);
305   case X86II::ZeroArgFP:
306   case X86II::OneArgFP:
307   case X86II::SpecialFP:
308   case X86II::CompareFP:
309   case X86II::CondMovFP:
310     return make_error<Failure>("Unsupported x87 Instruction");
311   case X86II::OneArgFPRW:
312   case X86II::TwoArgFP:
313     // These are instructions like
314     //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
315     //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
316     // They are intrinsically serial and do not modify the state of the stack.
317     return generateSelfAliasingCodeTemplates(Variant);
318   default:
319     llvm_unreachable("Unknown FP Type!");
320   }
321 }
322 
323 namespace {
324 class X86ParallelSnippetGenerator : public ParallelSnippetGenerator {
325 public:
326   using ParallelSnippetGenerator::ParallelSnippetGenerator;
327 
328   Expected<std::vector<CodeTemplate>>
329   generateCodeTemplates(InstructionTemplate Variant,
330                         const BitVector &ForbiddenRegisters) const override;
331 };
332 
333 } // namespace
334 
335 Expected<std::vector<CodeTemplate>>
generateCodeTemplates(InstructionTemplate Variant,const BitVector & ForbiddenRegisters) const336 X86ParallelSnippetGenerator::generateCodeTemplates(
337     InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
338   const Instruction &Instr = Variant.getInstr();
339 
340   if (const auto reason = isInvalidOpcode(Instr))
341     return make_error<Failure>(reason);
342 
343   // LEA gets special attention.
344   const auto Opcode = Instr.Description.getOpcode();
345   if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
346     return generateLEATemplatesCommon(
347         Instr, ForbiddenRegisters, State, Opts,
348         [this](unsigned BaseReg, unsigned IndexReg,
349                BitVector &CandidateDestRegs) {
350           // Any destination register that is not used for addressing is fine.
351           remove(CandidateDestRegs,
352                  State.getRATC().getRegister(BaseReg).aliasedBits());
353           remove(CandidateDestRegs,
354                  State.getRATC().getRegister(IndexReg).aliasedBits());
355         });
356   }
357 
358   switch (getX86FPFlags(Instr)) {
359   case X86II::NotFP:
360     return ParallelSnippetGenerator::generateCodeTemplates(Variant,
361                                                            ForbiddenRegisters);
362   case X86II::ZeroArgFP:
363   case X86II::OneArgFP:
364   case X86II::SpecialFP:
365     return make_error<Failure>("Unsupported x87 Instruction");
366   case X86II::OneArgFPRW:
367   case X86II::TwoArgFP:
368     // These are instructions like
369     //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
370     //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
371     // They are intrinsically serial and do not modify the state of the stack.
372     // We generate the same code for latency and uops.
373     return generateSelfAliasingCodeTemplates(Variant);
374   case X86II::CompareFP:
375   case X86II::CondMovFP:
376     // We can compute uops for any FP instruction that does not grow or shrink
377     // the stack (either do not touch the stack or push as much as they pop).
378     return generateUnconstrainedCodeTemplates(
379         Variant, "instruction does not grow/shrink the FP stack");
380   default:
381     llvm_unreachable("Unknown FP Type!");
382   }
383 }
384 
getLoadImmediateOpcode(unsigned RegBitWidth)385 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
386   switch (RegBitWidth) {
387   case 8:
388     return X86::MOV8ri;
389   case 16:
390     return X86::MOV16ri;
391   case 32:
392     return X86::MOV32ri;
393   case 64:
394     return X86::MOV64ri;
395   }
396   llvm_unreachable("Invalid Value Width");
397 }
398 
399 // Generates instruction to load an immediate value into a register.
loadImmediate(unsigned Reg,unsigned RegBitWidth,const APInt & Value)400 static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
401                             const APInt &Value) {
402   if (Value.getBitWidth() > RegBitWidth)
403     llvm_unreachable("Value must fit in the Register");
404   return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
405       .addReg(Reg)
406       .addImm(Value.getZExtValue());
407 }
408 
409 // Allocates scratch memory on the stack.
allocateStackSpace(unsigned Bytes)410 static MCInst allocateStackSpace(unsigned Bytes) {
411   return MCInstBuilder(X86::SUB64ri8)
412       .addReg(X86::RSP)
413       .addReg(X86::RSP)
414       .addImm(Bytes);
415 }
416 
417 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
fillStackSpace(unsigned MovOpcode,unsigned OffsetBytes,uint64_t Imm)418 static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
419                              uint64_t Imm) {
420   return MCInstBuilder(MovOpcode)
421       // Address = ESP
422       .addReg(X86::RSP)    // BaseReg
423       .addImm(1)           // ScaleAmt
424       .addReg(0)           // IndexReg
425       .addImm(OffsetBytes) // Disp
426       .addReg(0)           // Segment
427       // Immediate.
428       .addImm(Imm);
429 }
430 
431 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
loadToReg(unsigned Reg,unsigned RMOpcode)432 static MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
433   return MCInstBuilder(RMOpcode)
434       .addReg(Reg)
435       // Address = ESP
436       .addReg(X86::RSP) // BaseReg
437       .addImm(1)        // ScaleAmt
438       .addReg(0)        // IndexReg
439       .addImm(0)        // Disp
440       .addReg(0);       // Segment
441 }
442 
443 // Releases scratch memory.
releaseStackSpace(unsigned Bytes)444 static MCInst releaseStackSpace(unsigned Bytes) {
445   return MCInstBuilder(X86::ADD64ri8)
446       .addReg(X86::RSP)
447       .addReg(X86::RSP)
448       .addImm(Bytes);
449 }
450 
451 // Reserves some space on the stack, fills it with the content of the provided
452 // constant and provide methods to load the stack value into a register.
453 namespace {
454 struct ConstantInliner {
ConstantInlinerllvm::exegesis::__anon50abce280511::ConstantInliner455   explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {}
456 
457   std::vector<MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
458                                       unsigned Opcode);
459 
460   std::vector<MCInst> loadX87STAndFinalize(unsigned Reg);
461 
462   std::vector<MCInst> loadX87FPAndFinalize(unsigned Reg);
463 
464   std::vector<MCInst> popFlagAndFinalize();
465 
466   std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode,
467                                                  unsigned Value);
468 
469 private:
addllvm::exegesis::__anon50abce280511::ConstantInliner470   ConstantInliner &add(const MCInst &Inst) {
471     Instructions.push_back(Inst);
472     return *this;
473   }
474 
475   void initStack(unsigned Bytes);
476 
477   static constexpr const unsigned kF80Bytes = 10; // 80 bits.
478 
479   APInt Constant_;
480   std::vector<MCInst> Instructions;
481 };
482 } // namespace
483 
loadAndFinalize(unsigned Reg,unsigned RegBitWidth,unsigned Opcode)484 std::vector<MCInst> ConstantInliner::loadAndFinalize(unsigned Reg,
485                                                      unsigned RegBitWidth,
486                                                      unsigned Opcode) {
487   assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits");
488   initStack(RegBitWidth / 8);
489   add(loadToReg(Reg, Opcode));
490   add(releaseStackSpace(RegBitWidth / 8));
491   return std::move(Instructions);
492 }
493 
loadX87STAndFinalize(unsigned Reg)494 std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(unsigned Reg) {
495   initStack(kF80Bytes);
496   add(MCInstBuilder(X86::LD_F80m)
497           // Address = ESP
498           .addReg(X86::RSP) // BaseReg
499           .addImm(1)        // ScaleAmt
500           .addReg(0)        // IndexReg
501           .addImm(0)        // Disp
502           .addReg(0));      // Segment
503   if (Reg != X86::ST0)
504     add(MCInstBuilder(X86::ST_Frr).addReg(Reg));
505   add(releaseStackSpace(kF80Bytes));
506   return std::move(Instructions);
507 }
508 
loadX87FPAndFinalize(unsigned Reg)509 std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(unsigned Reg) {
510   initStack(kF80Bytes);
511   add(MCInstBuilder(X86::LD_Fp80m)
512           .addReg(Reg)
513           // Address = ESP
514           .addReg(X86::RSP) // BaseReg
515           .addImm(1)        // ScaleAmt
516           .addReg(0)        // IndexReg
517           .addImm(0)        // Disp
518           .addReg(0));      // Segment
519   add(releaseStackSpace(kF80Bytes));
520   return std::move(Instructions);
521 }
522 
popFlagAndFinalize()523 std::vector<MCInst> ConstantInliner::popFlagAndFinalize() {
524   initStack(8);
525   add(MCInstBuilder(X86::POPF64));
526   return std::move(Instructions);
527 }
528 
529 std::vector<MCInst>
loadImplicitRegAndFinalize(unsigned Opcode,unsigned Value)530 ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) {
531   add(allocateStackSpace(4));
532   add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions
533   add(MCInstBuilder(Opcode)
534           // Address = ESP
535           .addReg(X86::RSP) // BaseReg
536           .addImm(1)        // ScaleAmt
537           .addReg(0)        // IndexReg
538           .addImm(0)        // Disp
539           .addReg(0));      // Segment
540   add(releaseStackSpace(4));
541   return std::move(Instructions);
542 }
543 
initStack(unsigned Bytes)544 void ConstantInliner::initStack(unsigned Bytes) {
545   assert(Constant_.getBitWidth() <= Bytes * 8 &&
546          "Value does not have the correct size");
547   const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
548                                  ? Constant_.sext(Bytes * 8)
549                                  : Constant_;
550   add(allocateStackSpace(Bytes));
551   size_t ByteOffset = 0;
552   for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
553     add(fillStackSpace(
554         X86::MOV32mi, ByteOffset,
555         WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
556   if (Bytes - ByteOffset >= 2) {
557     add(fillStackSpace(
558         X86::MOV16mi, ByteOffset,
559         WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
560     ByteOffset += 2;
561   }
562   if (Bytes - ByteOffset >= 1)
563     add(fillStackSpace(
564         X86::MOV8mi, ByteOffset,
565         WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
566 }
567 
568 #include "X86GenExegesis.inc"
569 
570 namespace {
571 class ExegesisX86Target : public ExegesisTarget {
572 public:
ExegesisX86Target()573   ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {}
574 
575 private:
576   void addTargetSpecificPasses(PassManagerBase &PM) const override;
577 
578   unsigned getScratchMemoryRegister(const Triple &TT) const override;
579 
580   unsigned getLoopCounterRegister(const Triple &) const override;
581 
getMaxMemoryAccessSize() const582   unsigned getMaxMemoryAccessSize() const override { return 64; }
583 
584   Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
585                                  MCOperand &AssignedValue,
586                                  const BitVector &ForbiddenRegs) const override;
587 
588   void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
589                           unsigned Offset) const override;
590 
591   void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
592                                    MachineBasicBlock &TargetMBB,
593                                    const MCInstrInfo &MII) const override;
594 
595   std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
596                                const APInt &Value) const override;
597 
getUnavailableRegisters() const598   ArrayRef<unsigned> getUnavailableRegisters() const override {
599     return makeArrayRef(kUnavailableRegisters,
600                         sizeof(kUnavailableRegisters) /
601                             sizeof(kUnavailableRegisters[0]));
602   }
603 
allowAsBackToBack(const Instruction & Instr) const604   bool allowAsBackToBack(const Instruction &Instr) const override {
605     const unsigned Opcode = Instr.Description.Opcode;
606     return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r &&
607            Opcode != X86::LEA64_32r && Opcode != X86::LEA16r;
608   }
609 
610   std::vector<InstructionTemplate>
611   generateInstructionVariants(const Instruction &Instr,
612                               unsigned MaxConfigsPerOpcode) const override;
613 
createSerialSnippetGenerator(const LLVMState & State,const SnippetGenerator::Options & Opts) const614   std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
615       const LLVMState &State,
616       const SnippetGenerator::Options &Opts) const override {
617     return std::make_unique<X86SerialSnippetGenerator>(State, Opts);
618   }
619 
createParallelSnippetGenerator(const LLVMState & State,const SnippetGenerator::Options & Opts) const620   std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
621       const LLVMState &State,
622       const SnippetGenerator::Options &Opts) const override {
623     return std::make_unique<X86ParallelSnippetGenerator>(State, Opts);
624   }
625 
matchesArch(Triple::ArchType Arch) const626   bool matchesArch(Triple::ArchType Arch) const override {
627     return Arch == Triple::x86_64 || Arch == Triple::x86;
628   }
629 
630   static const unsigned kUnavailableRegisters[4];
631 };
632 
633 // We disable a few registers that cannot be encoded on instructions with a REX
634 // prefix.
635 const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH,
636                                                               X86::CH, X86::DH};
637 
638 // We're using one of R8-R15 because these registers are never hardcoded in
639 // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less
640 // conflicts.
641 constexpr const unsigned kLoopCounterReg = X86::R8;
642 
643 } // namespace
644 
addTargetSpecificPasses(PassManagerBase & PM) const645 void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const {
646   // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
647   PM.add(createX86FloatingPointStackifierPass());
648 }
649 
getScratchMemoryRegister(const Triple & TT) const650 unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const {
651   if (!TT.isArch64Bit()) {
652     // FIXME: This would require popping from the stack, so we would have to
653     // add some additional setup code.
654     return 0;
655   }
656   return TT.isOSWindows() ? X86::RCX : X86::RDI;
657 }
658 
getLoopCounterRegister(const Triple & TT) const659 unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const {
660   if (!TT.isArch64Bit()) {
661     return 0;
662   }
663   return kLoopCounterReg;
664 }
665 
randomizeTargetMCOperand(const Instruction & Instr,const Variable & Var,MCOperand & AssignedValue,const BitVector & ForbiddenRegs) const666 Error ExegesisX86Target::randomizeTargetMCOperand(
667     const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
668     const BitVector &ForbiddenRegs) const {
669   const Operand &Op = Instr.getPrimaryOperand(Var);
670   switch (Op.getExplicitOperandInfo().OperandType) {
671   case X86::OperandType::OPERAND_ROUNDING_CONTROL:
672     AssignedValue =
673         MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::TO_ZERO));
674     return Error::success();
675   default:
676     break;
677   }
678   return make_error<Failure>(
679       Twine("unimplemented operand type ")
680           .concat(Twine(Op.getExplicitOperandInfo().OperandType)));
681 }
682 
fillMemoryOperands(InstructionTemplate & IT,unsigned Reg,unsigned Offset) const683 void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
684                                            unsigned Reg,
685                                            unsigned Offset) const {
686   assert(!isInvalidMemoryInstr(IT.getInstr()) &&
687          "fillMemoryOperands requires a valid memory instruction");
688   int MemOpIdx = X86II::getMemoryOperandNo(IT.getInstr().Description.TSFlags);
689   assert(MemOpIdx >= 0 && "invalid memory operand index");
690   // getMemoryOperandNo() ignores tied operands, so we have to add them back.
691   MemOpIdx += X86II::getOperandBias(IT.getInstr().Description);
692   setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg));    // BaseReg
693   setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1));      // ScaleAmt
694   setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0));      // IndexReg
695   setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
696   setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0));      // Segment
697 }
698 
decrementLoopCounterAndJump(MachineBasicBlock & MBB,MachineBasicBlock & TargetMBB,const MCInstrInfo & MII) const699 void ExegesisX86Target::decrementLoopCounterAndJump(
700     MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
701     const MCInstrInfo &MII) const {
702   BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8))
703       .addDef(kLoopCounterReg)
704       .addUse(kLoopCounterReg)
705       .addImm(-1);
706   BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1))
707       .addMBB(&TargetMBB)
708       .addImm(X86::COND_NE);
709 }
710 
setRegTo(const MCSubtargetInfo & STI,unsigned Reg,const APInt & Value) const711 std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
712                                                 unsigned Reg,
713                                                 const APInt &Value) const {
714   if (X86::GR8RegClass.contains(Reg))
715     return {loadImmediate(Reg, 8, Value)};
716   if (X86::GR16RegClass.contains(Reg))
717     return {loadImmediate(Reg, 16, Value)};
718   if (X86::GR32RegClass.contains(Reg))
719     return {loadImmediate(Reg, 32, Value)};
720   if (X86::GR64RegClass.contains(Reg))
721     return {loadImmediate(Reg, 64, Value)};
722   ConstantInliner CI(Value);
723   if (X86::VR64RegClass.contains(Reg))
724     return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm);
725   if (X86::VR128XRegClass.contains(Reg)) {
726     if (STI.getFeatureBits()[X86::FeatureAVX512])
727       return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm);
728     if (STI.getFeatureBits()[X86::FeatureAVX])
729       return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
730     return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
731   }
732   if (X86::VR256XRegClass.contains(Reg)) {
733     if (STI.getFeatureBits()[X86::FeatureAVX512])
734       return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm);
735     if (STI.getFeatureBits()[X86::FeatureAVX])
736       return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
737   }
738   if (X86::VR512RegClass.contains(Reg))
739     if (STI.getFeatureBits()[X86::FeatureAVX512])
740       return CI.loadAndFinalize(Reg, 512, X86::VMOVDQU32Zrm);
741   if (X86::RSTRegClass.contains(Reg)) {
742     return CI.loadX87STAndFinalize(Reg);
743   }
744   if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) ||
745       X86::RFP80RegClass.contains(Reg)) {
746     return CI.loadX87FPAndFinalize(Reg);
747   }
748   if (Reg == X86::EFLAGS)
749     return CI.popFlagAndFinalize();
750   if (Reg == X86::MXCSR)
751     return CI.loadImplicitRegAndFinalize(
752         STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR,
753         0x1f80);
754   if (Reg == X86::FPCW)
755     return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f);
756   return {}; // Not yet implemented.
757 }
758 
759 // Instruction can have some variable operands, and we may want to see how
760 // different operands affect performance. So for each operand position,
761 // precompute all the possible choices we might care about,
762 // and greedily generate all the possible combinations of choices.
generateInstructionVariants(const Instruction & Instr,unsigned MaxConfigsPerOpcode) const763 std::vector<InstructionTemplate> ExegesisX86Target::generateInstructionVariants(
764     const Instruction &Instr, unsigned MaxConfigsPerOpcode) const {
765   bool Exploration = false;
766   SmallVector<SmallVector<MCOperand, 1>, 4> VariableChoices;
767   VariableChoices.resize(Instr.Variables.size());
768   for (auto I : llvm::zip(Instr.Variables, VariableChoices)) {
769     const Variable &Var = std::get<0>(I);
770     SmallVectorImpl<MCOperand> &Choices = std::get<1>(I);
771 
772     switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) {
773     default:
774       // We don't wish to explicitly explore this variable.
775       Choices.emplace_back(); // But add invalid MCOperand to simplify logic.
776       continue;
777     case X86::OperandType::OPERAND_COND_CODE: {
778       Exploration = true;
779       auto CondCodes = seq((int)X86::CondCode::COND_O,
780                            1 + (int)X86::CondCode::LAST_VALID_COND);
781       Choices.reserve(std::distance(CondCodes.begin(), CondCodes.end()));
782       for (int CondCode : CondCodes)
783         Choices.emplace_back(MCOperand::createImm(CondCode));
784       break;
785     }
786     }
787   }
788 
789   // If we don't wish to explore any variables, defer to the baseline method.
790   if (!Exploration)
791     return ExegesisTarget::generateInstructionVariants(Instr,
792                                                        MaxConfigsPerOpcode);
793 
794   std::vector<InstructionTemplate> Variants;
795   size_t NumVariants;
796   CombinationGenerator<MCOperand, decltype(VariableChoices)::value_type, 4> G(
797       VariableChoices);
798 
799   // How many operand combinations can we produce, within the limit?
800   NumVariants = std::min(G.numCombinations(), (size_t)MaxConfigsPerOpcode);
801   // And actually produce all the wanted operand combinations.
802   Variants.reserve(NumVariants);
803   G.generate([&](ArrayRef<MCOperand> State) -> bool {
804     Variants.emplace_back(&Instr);
805     Variants.back().setVariableValues(State);
806     // Did we run out of space for variants?
807     return Variants.size() >= NumVariants;
808   });
809 
810   assert(Variants.size() == NumVariants &&
811          Variants.size() <= MaxConfigsPerOpcode &&
812          "Should not produce too many variants");
813   return Variants;
814 }
815 
getTheExegesisX86Target()816 static ExegesisTarget *getTheExegesisX86Target() {
817   static ExegesisX86Target Target;
818   return &Target;
819 }
820 
InitializeX86ExegesisTarget()821 void InitializeX86ExegesisTarget() {
822   ExegesisTarget::registerTarget(getTheExegesisX86Target());
823 }
824 
825 } // namespace exegesis
826 } // namespace llvm
827