1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "../Target.h"
9
10 #include "../Error.h"
11 #include "../ParallelSnippetGenerator.h"
12 #include "../SerialSnippetGenerator.h"
13 #include "../SnippetGenerator.h"
14 #include "MCTargetDesc/X86BaseInfo.h"
15 #include "MCTargetDesc/X86MCTargetDesc.h"
16 #include "X86.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/Sequence.h"
20 #include "llvm/MC/MCInstBuilder.h"
21 #include "llvm/Support/FormatVariadic.h"
22
23 namespace llvm {
24 namespace exegesis {
25
26 // Returns a non-null reason if we cannot handle the memory references in this
27 // instruction.
isInvalidMemoryInstr(const Instruction & Instr)28 static const char *isInvalidMemoryInstr(const Instruction &Instr) {
29 switch (Instr.Description.TSFlags & X86II::FormMask) {
30 default:
31 llvm_unreachable("Unknown FormMask value");
32 // These have no memory access.
33 case X86II::Pseudo:
34 case X86II::RawFrm:
35 case X86II::AddCCFrm:
36 case X86II::PrefixByte:
37 case X86II::MRMDestReg:
38 case X86II::MRMSrcReg:
39 case X86II::MRMSrcReg4VOp3:
40 case X86II::MRMSrcRegOp4:
41 case X86II::MRMSrcRegCC:
42 case X86II::MRMXrCC:
43 case X86II::MRMr0:
44 case X86II::MRMXr:
45 case X86II::MRM0r:
46 case X86II::MRM1r:
47 case X86II::MRM2r:
48 case X86II::MRM3r:
49 case X86II::MRM4r:
50 case X86II::MRM5r:
51 case X86II::MRM6r:
52 case X86II::MRM7r:
53 case X86II::MRM0X:
54 case X86II::MRM1X:
55 case X86II::MRM2X:
56 case X86II::MRM3X:
57 case X86II::MRM4X:
58 case X86II::MRM5X:
59 case X86II::MRM6X:
60 case X86II::MRM7X:
61 case X86II::MRM_C0:
62 case X86II::MRM_C1:
63 case X86II::MRM_C2:
64 case X86II::MRM_C3:
65 case X86II::MRM_C4:
66 case X86II::MRM_C5:
67 case X86II::MRM_C6:
68 case X86II::MRM_C7:
69 case X86II::MRM_C8:
70 case X86II::MRM_C9:
71 case X86II::MRM_CA:
72 case X86II::MRM_CB:
73 case X86II::MRM_CC:
74 case X86II::MRM_CD:
75 case X86II::MRM_CE:
76 case X86II::MRM_CF:
77 case X86II::MRM_D0:
78 case X86II::MRM_D1:
79 case X86II::MRM_D2:
80 case X86II::MRM_D3:
81 case X86II::MRM_D4:
82 case X86II::MRM_D5:
83 case X86II::MRM_D6:
84 case X86II::MRM_D7:
85 case X86II::MRM_D8:
86 case X86II::MRM_D9:
87 case X86II::MRM_DA:
88 case X86II::MRM_DB:
89 case X86II::MRM_DC:
90 case X86II::MRM_DD:
91 case X86II::MRM_DE:
92 case X86II::MRM_DF:
93 case X86II::MRM_E0:
94 case X86II::MRM_E1:
95 case X86II::MRM_E2:
96 case X86II::MRM_E3:
97 case X86II::MRM_E4:
98 case X86II::MRM_E5:
99 case X86II::MRM_E6:
100 case X86II::MRM_E7:
101 case X86II::MRM_E8:
102 case X86II::MRM_E9:
103 case X86II::MRM_EA:
104 case X86II::MRM_EB:
105 case X86II::MRM_EC:
106 case X86II::MRM_ED:
107 case X86II::MRM_EE:
108 case X86II::MRM_EF:
109 case X86II::MRM_F0:
110 case X86II::MRM_F1:
111 case X86II::MRM_F2:
112 case X86II::MRM_F3:
113 case X86II::MRM_F4:
114 case X86II::MRM_F5:
115 case X86II::MRM_F6:
116 case X86II::MRM_F7:
117 case X86II::MRM_F8:
118 case X86II::MRM_F9:
119 case X86II::MRM_FA:
120 case X86II::MRM_FB:
121 case X86II::MRM_FC:
122 case X86II::MRM_FD:
123 case X86II::MRM_FE:
124 case X86II::MRM_FF:
125 case X86II::RawFrmImm8:
126 return nullptr;
127 case X86II::AddRegFrm:
128 return (Instr.Description.Opcode == X86::POP16r ||
129 Instr.Description.Opcode == X86::POP32r ||
130 Instr.Description.Opcode == X86::PUSH16r ||
131 Instr.Description.Opcode == X86::PUSH32r)
132 ? "unsupported opcode: unsupported memory access"
133 : nullptr;
134 // These access memory and are handled.
135 case X86II::MRMDestMem:
136 case X86II::MRMSrcMem:
137 case X86II::MRMSrcMem4VOp3:
138 case X86II::MRMSrcMemOp4:
139 case X86II::MRMSrcMemCC:
140 case X86II::MRMXmCC:
141 case X86II::MRMXm:
142 case X86II::MRM0m:
143 case X86II::MRM1m:
144 case X86II::MRM2m:
145 case X86II::MRM3m:
146 case X86II::MRM4m:
147 case X86II::MRM5m:
148 case X86II::MRM6m:
149 case X86II::MRM7m:
150 return nullptr;
151 // These access memory and are not handled yet.
152 case X86II::RawFrmImm16:
153 case X86II::RawFrmMemOffs:
154 case X86II::RawFrmSrc:
155 case X86II::RawFrmDst:
156 case X86II::RawFrmDstSrc:
157 return "unsupported opcode: non uniform memory access";
158 }
159 }
160
161 // If the opcode is invalid, returns a pointer to a character literal indicating
162 // the reason. nullptr indicates a valid opcode.
isInvalidOpcode(const Instruction & Instr)163 static const char *isInvalidOpcode(const Instruction &Instr) {
164 const auto OpcodeName = Instr.Name;
165 if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo)
166 return "unsupported opcode: pseudo instruction";
167 if (OpcodeName.startswith("POP") || OpcodeName.startswith("PUSH") ||
168 OpcodeName.startswith("ADJCALLSTACK") || OpcodeName.startswith("LEAVE"))
169 return "unsupported opcode: Push/Pop/AdjCallStack/Leave";
170 if (const auto reason = isInvalidMemoryInstr(Instr))
171 return reason;
172 // We do not handle instructions with OPERAND_PCREL.
173 for (const Operand &Op : Instr.Operands)
174 if (Op.isExplicit() &&
175 Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL)
176 return "unsupported opcode: PC relative operand";
177 // We do not handle second-form X87 instructions. We only handle first-form
178 // ones (_Fp), see comment in X86InstrFPStack.td.
179 for (const Operand &Op : Instr.Operands)
180 if (Op.isReg() && Op.isExplicit() &&
181 Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID)
182 return "unsupported second-form X87 instruction";
183 return nullptr;
184 }
185
getX86FPFlags(const Instruction & Instr)186 static unsigned getX86FPFlags(const Instruction &Instr) {
187 return Instr.Description.TSFlags & X86II::FPTypeMask;
188 }
189
190 // Helper to fill a memory operand with a value.
setMemOp(InstructionTemplate & IT,int OpIdx,const MCOperand & OpVal)191 static void setMemOp(InstructionTemplate &IT, int OpIdx,
192 const MCOperand &OpVal) {
193 const auto Op = IT.getInstr().Operands[OpIdx];
194 assert(Op.isExplicit() && "invalid memory pattern");
195 IT.getValueFor(Op) = OpVal;
196 }
197
198 // Common (latency, uops) code for LEA templates. `GetDestReg` takes the
199 // addressing base and index registers and returns the LEA destination register.
generateLEATemplatesCommon(const Instruction & Instr,const BitVector & ForbiddenRegisters,const LLVMState & State,const SnippetGenerator::Options & Opts,std::function<void (unsigned,unsigned,BitVector & CandidateDestRegs)> RestrictDestRegs)200 static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
201 const Instruction &Instr, const BitVector &ForbiddenRegisters,
202 const LLVMState &State, const SnippetGenerator::Options &Opts,
203 std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)>
204 RestrictDestRegs) {
205 assert(Instr.Operands.size() == 6 && "invalid LEA");
206 assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 &&
207 "invalid LEA");
208
209 constexpr const int kDestOp = 0;
210 constexpr const int kBaseOp = 1;
211 constexpr const int kIndexOp = 3;
212 auto PossibleDestRegs =
213 Instr.Operands[kDestOp].getRegisterAliasing().sourceBits();
214 remove(PossibleDestRegs, ForbiddenRegisters);
215 auto PossibleBaseRegs =
216 Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits();
217 remove(PossibleBaseRegs, ForbiddenRegisters);
218 auto PossibleIndexRegs =
219 Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits();
220 remove(PossibleIndexRegs, ForbiddenRegisters);
221
222 const auto &RegInfo = State.getRegInfo();
223 std::vector<CodeTemplate> Result;
224 for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) {
225 for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) {
226 for (int LogScale = 0; LogScale <= 3; ++LogScale) {
227 // FIXME: Add an option for controlling how we explore immediates.
228 for (const int Disp : {0, 42}) {
229 InstructionTemplate IT(&Instr);
230 const int64_t Scale = 1ull << LogScale;
231 setMemOp(IT, 1, MCOperand::createReg(BaseReg));
232 setMemOp(IT, 2, MCOperand::createImm(Scale));
233 setMemOp(IT, 3, MCOperand::createReg(IndexReg));
234 setMemOp(IT, 4, MCOperand::createImm(Disp));
235 // SegmentReg must be 0 for LEA.
236 setMemOp(IT, 5, MCOperand::createReg(0));
237
238 // Output reg candidates are selected by the caller.
239 auto PossibleDestRegsNow = PossibleDestRegs;
240 RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow);
241 assert(PossibleDestRegsNow.set_bits().begin() !=
242 PossibleDestRegsNow.set_bits().end() &&
243 "no remaining registers");
244 setMemOp(
245 IT, 0,
246 MCOperand::createReg(*PossibleDestRegsNow.set_bits().begin()));
247
248 CodeTemplate CT;
249 CT.Instructions.push_back(std::move(IT));
250 CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg),
251 RegInfo.getName(IndexReg), Scale, Disp)
252 .str();
253 Result.push_back(std::move(CT));
254 if (Result.size() >= Opts.MaxConfigsPerOpcode)
255 return std::move(Result);
256 }
257 }
258 }
259 }
260
261 return std::move(Result);
262 }
263
264 namespace {
265 class X86SerialSnippetGenerator : public SerialSnippetGenerator {
266 public:
267 using SerialSnippetGenerator::SerialSnippetGenerator;
268
269 Expected<std::vector<CodeTemplate>>
270 generateCodeTemplates(InstructionTemplate Variant,
271 const BitVector &ForbiddenRegisters) const override;
272 };
273 } // namespace
274
275 Expected<std::vector<CodeTemplate>>
generateCodeTemplates(InstructionTemplate Variant,const BitVector & ForbiddenRegisters) const276 X86SerialSnippetGenerator::generateCodeTemplates(
277 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
278 const Instruction &Instr = Variant.getInstr();
279
280 if (const auto reason = isInvalidOpcode(Instr))
281 return make_error<Failure>(reason);
282
283 // LEA gets special attention.
284 const auto Opcode = Instr.Description.getOpcode();
285 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
286 return generateLEATemplatesCommon(
287 Instr, ForbiddenRegisters, State, Opts,
288 [this](unsigned BaseReg, unsigned IndexReg,
289 BitVector &CandidateDestRegs) {
290 // We just select a destination register that aliases the base
291 // register.
292 CandidateDestRegs &=
293 State.getRATC().getRegister(BaseReg).aliasedBits();
294 });
295 }
296
297 if (Instr.hasMemoryOperands())
298 return make_error<Failure>(
299 "unsupported memory operand in latency measurements");
300
301 switch (getX86FPFlags(Instr)) {
302 case X86II::NotFP:
303 return SerialSnippetGenerator::generateCodeTemplates(Variant,
304 ForbiddenRegisters);
305 case X86II::ZeroArgFP:
306 case X86II::OneArgFP:
307 case X86II::SpecialFP:
308 case X86II::CompareFP:
309 case X86II::CondMovFP:
310 return make_error<Failure>("Unsupported x87 Instruction");
311 case X86II::OneArgFPRW:
312 case X86II::TwoArgFP:
313 // These are instructions like
314 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
315 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
316 // They are intrinsically serial and do not modify the state of the stack.
317 return generateSelfAliasingCodeTemplates(Variant);
318 default:
319 llvm_unreachable("Unknown FP Type!");
320 }
321 }
322
323 namespace {
324 class X86ParallelSnippetGenerator : public ParallelSnippetGenerator {
325 public:
326 using ParallelSnippetGenerator::ParallelSnippetGenerator;
327
328 Expected<std::vector<CodeTemplate>>
329 generateCodeTemplates(InstructionTemplate Variant,
330 const BitVector &ForbiddenRegisters) const override;
331 };
332
333 } // namespace
334
335 Expected<std::vector<CodeTemplate>>
generateCodeTemplates(InstructionTemplate Variant,const BitVector & ForbiddenRegisters) const336 X86ParallelSnippetGenerator::generateCodeTemplates(
337 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
338 const Instruction &Instr = Variant.getInstr();
339
340 if (const auto reason = isInvalidOpcode(Instr))
341 return make_error<Failure>(reason);
342
343 // LEA gets special attention.
344 const auto Opcode = Instr.Description.getOpcode();
345 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
346 return generateLEATemplatesCommon(
347 Instr, ForbiddenRegisters, State, Opts,
348 [this](unsigned BaseReg, unsigned IndexReg,
349 BitVector &CandidateDestRegs) {
350 // Any destination register that is not used for addressing is fine.
351 remove(CandidateDestRegs,
352 State.getRATC().getRegister(BaseReg).aliasedBits());
353 remove(CandidateDestRegs,
354 State.getRATC().getRegister(IndexReg).aliasedBits());
355 });
356 }
357
358 switch (getX86FPFlags(Instr)) {
359 case X86II::NotFP:
360 return ParallelSnippetGenerator::generateCodeTemplates(Variant,
361 ForbiddenRegisters);
362 case X86II::ZeroArgFP:
363 case X86II::OneArgFP:
364 case X86II::SpecialFP:
365 return make_error<Failure>("Unsupported x87 Instruction");
366 case X86II::OneArgFPRW:
367 case X86II::TwoArgFP:
368 // These are instructions like
369 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
370 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
371 // They are intrinsically serial and do not modify the state of the stack.
372 // We generate the same code for latency and uops.
373 return generateSelfAliasingCodeTemplates(Variant);
374 case X86II::CompareFP:
375 case X86II::CondMovFP:
376 // We can compute uops for any FP instruction that does not grow or shrink
377 // the stack (either do not touch the stack or push as much as they pop).
378 return generateUnconstrainedCodeTemplates(
379 Variant, "instruction does not grow/shrink the FP stack");
380 default:
381 llvm_unreachable("Unknown FP Type!");
382 }
383 }
384
getLoadImmediateOpcode(unsigned RegBitWidth)385 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
386 switch (RegBitWidth) {
387 case 8:
388 return X86::MOV8ri;
389 case 16:
390 return X86::MOV16ri;
391 case 32:
392 return X86::MOV32ri;
393 case 64:
394 return X86::MOV64ri;
395 }
396 llvm_unreachable("Invalid Value Width");
397 }
398
399 // Generates instruction to load an immediate value into a register.
loadImmediate(unsigned Reg,unsigned RegBitWidth,const APInt & Value)400 static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
401 const APInt &Value) {
402 if (Value.getBitWidth() > RegBitWidth)
403 llvm_unreachable("Value must fit in the Register");
404 return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
405 .addReg(Reg)
406 .addImm(Value.getZExtValue());
407 }
408
409 // Allocates scratch memory on the stack.
allocateStackSpace(unsigned Bytes)410 static MCInst allocateStackSpace(unsigned Bytes) {
411 return MCInstBuilder(X86::SUB64ri8)
412 .addReg(X86::RSP)
413 .addReg(X86::RSP)
414 .addImm(Bytes);
415 }
416
417 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
fillStackSpace(unsigned MovOpcode,unsigned OffsetBytes,uint64_t Imm)418 static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
419 uint64_t Imm) {
420 return MCInstBuilder(MovOpcode)
421 // Address = ESP
422 .addReg(X86::RSP) // BaseReg
423 .addImm(1) // ScaleAmt
424 .addReg(0) // IndexReg
425 .addImm(OffsetBytes) // Disp
426 .addReg(0) // Segment
427 // Immediate.
428 .addImm(Imm);
429 }
430
431 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
loadToReg(unsigned Reg,unsigned RMOpcode)432 static MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
433 return MCInstBuilder(RMOpcode)
434 .addReg(Reg)
435 // Address = ESP
436 .addReg(X86::RSP) // BaseReg
437 .addImm(1) // ScaleAmt
438 .addReg(0) // IndexReg
439 .addImm(0) // Disp
440 .addReg(0); // Segment
441 }
442
443 // Releases scratch memory.
releaseStackSpace(unsigned Bytes)444 static MCInst releaseStackSpace(unsigned Bytes) {
445 return MCInstBuilder(X86::ADD64ri8)
446 .addReg(X86::RSP)
447 .addReg(X86::RSP)
448 .addImm(Bytes);
449 }
450
451 // Reserves some space on the stack, fills it with the content of the provided
452 // constant and provide methods to load the stack value into a register.
453 namespace {
454 struct ConstantInliner {
ConstantInlinerllvm::exegesis::__anon50abce280511::ConstantInliner455 explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {}
456
457 std::vector<MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
458 unsigned Opcode);
459
460 std::vector<MCInst> loadX87STAndFinalize(unsigned Reg);
461
462 std::vector<MCInst> loadX87FPAndFinalize(unsigned Reg);
463
464 std::vector<MCInst> popFlagAndFinalize();
465
466 std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode,
467 unsigned Value);
468
469 private:
addllvm::exegesis::__anon50abce280511::ConstantInliner470 ConstantInliner &add(const MCInst &Inst) {
471 Instructions.push_back(Inst);
472 return *this;
473 }
474
475 void initStack(unsigned Bytes);
476
477 static constexpr const unsigned kF80Bytes = 10; // 80 bits.
478
479 APInt Constant_;
480 std::vector<MCInst> Instructions;
481 };
482 } // namespace
483
loadAndFinalize(unsigned Reg,unsigned RegBitWidth,unsigned Opcode)484 std::vector<MCInst> ConstantInliner::loadAndFinalize(unsigned Reg,
485 unsigned RegBitWidth,
486 unsigned Opcode) {
487 assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits");
488 initStack(RegBitWidth / 8);
489 add(loadToReg(Reg, Opcode));
490 add(releaseStackSpace(RegBitWidth / 8));
491 return std::move(Instructions);
492 }
493
loadX87STAndFinalize(unsigned Reg)494 std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(unsigned Reg) {
495 initStack(kF80Bytes);
496 add(MCInstBuilder(X86::LD_F80m)
497 // Address = ESP
498 .addReg(X86::RSP) // BaseReg
499 .addImm(1) // ScaleAmt
500 .addReg(0) // IndexReg
501 .addImm(0) // Disp
502 .addReg(0)); // Segment
503 if (Reg != X86::ST0)
504 add(MCInstBuilder(X86::ST_Frr).addReg(Reg));
505 add(releaseStackSpace(kF80Bytes));
506 return std::move(Instructions);
507 }
508
loadX87FPAndFinalize(unsigned Reg)509 std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(unsigned Reg) {
510 initStack(kF80Bytes);
511 add(MCInstBuilder(X86::LD_Fp80m)
512 .addReg(Reg)
513 // Address = ESP
514 .addReg(X86::RSP) // BaseReg
515 .addImm(1) // ScaleAmt
516 .addReg(0) // IndexReg
517 .addImm(0) // Disp
518 .addReg(0)); // Segment
519 add(releaseStackSpace(kF80Bytes));
520 return std::move(Instructions);
521 }
522
popFlagAndFinalize()523 std::vector<MCInst> ConstantInliner::popFlagAndFinalize() {
524 initStack(8);
525 add(MCInstBuilder(X86::POPF64));
526 return std::move(Instructions);
527 }
528
529 std::vector<MCInst>
loadImplicitRegAndFinalize(unsigned Opcode,unsigned Value)530 ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) {
531 add(allocateStackSpace(4));
532 add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions
533 add(MCInstBuilder(Opcode)
534 // Address = ESP
535 .addReg(X86::RSP) // BaseReg
536 .addImm(1) // ScaleAmt
537 .addReg(0) // IndexReg
538 .addImm(0) // Disp
539 .addReg(0)); // Segment
540 add(releaseStackSpace(4));
541 return std::move(Instructions);
542 }
543
initStack(unsigned Bytes)544 void ConstantInliner::initStack(unsigned Bytes) {
545 assert(Constant_.getBitWidth() <= Bytes * 8 &&
546 "Value does not have the correct size");
547 const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
548 ? Constant_.sext(Bytes * 8)
549 : Constant_;
550 add(allocateStackSpace(Bytes));
551 size_t ByteOffset = 0;
552 for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
553 add(fillStackSpace(
554 X86::MOV32mi, ByteOffset,
555 WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
556 if (Bytes - ByteOffset >= 2) {
557 add(fillStackSpace(
558 X86::MOV16mi, ByteOffset,
559 WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
560 ByteOffset += 2;
561 }
562 if (Bytes - ByteOffset >= 1)
563 add(fillStackSpace(
564 X86::MOV8mi, ByteOffset,
565 WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
566 }
567
568 #include "X86GenExegesis.inc"
569
570 namespace {
571 class ExegesisX86Target : public ExegesisTarget {
572 public:
ExegesisX86Target()573 ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {}
574
575 private:
576 void addTargetSpecificPasses(PassManagerBase &PM) const override;
577
578 unsigned getScratchMemoryRegister(const Triple &TT) const override;
579
580 unsigned getLoopCounterRegister(const Triple &) const override;
581
getMaxMemoryAccessSize() const582 unsigned getMaxMemoryAccessSize() const override { return 64; }
583
584 Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
585 MCOperand &AssignedValue,
586 const BitVector &ForbiddenRegs) const override;
587
588 void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
589 unsigned Offset) const override;
590
591 void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
592 MachineBasicBlock &TargetMBB,
593 const MCInstrInfo &MII) const override;
594
595 std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
596 const APInt &Value) const override;
597
getUnavailableRegisters() const598 ArrayRef<unsigned> getUnavailableRegisters() const override {
599 return makeArrayRef(kUnavailableRegisters,
600 sizeof(kUnavailableRegisters) /
601 sizeof(kUnavailableRegisters[0]));
602 }
603
allowAsBackToBack(const Instruction & Instr) const604 bool allowAsBackToBack(const Instruction &Instr) const override {
605 const unsigned Opcode = Instr.Description.Opcode;
606 return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r &&
607 Opcode != X86::LEA64_32r && Opcode != X86::LEA16r;
608 }
609
610 std::vector<InstructionTemplate>
611 generateInstructionVariants(const Instruction &Instr,
612 unsigned MaxConfigsPerOpcode) const override;
613
createSerialSnippetGenerator(const LLVMState & State,const SnippetGenerator::Options & Opts) const614 std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
615 const LLVMState &State,
616 const SnippetGenerator::Options &Opts) const override {
617 return std::make_unique<X86SerialSnippetGenerator>(State, Opts);
618 }
619
createParallelSnippetGenerator(const LLVMState & State,const SnippetGenerator::Options & Opts) const620 std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
621 const LLVMState &State,
622 const SnippetGenerator::Options &Opts) const override {
623 return std::make_unique<X86ParallelSnippetGenerator>(State, Opts);
624 }
625
matchesArch(Triple::ArchType Arch) const626 bool matchesArch(Triple::ArchType Arch) const override {
627 return Arch == Triple::x86_64 || Arch == Triple::x86;
628 }
629
630 static const unsigned kUnavailableRegisters[4];
631 };
632
633 // We disable a few registers that cannot be encoded on instructions with a REX
634 // prefix.
635 const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH,
636 X86::CH, X86::DH};
637
638 // We're using one of R8-R15 because these registers are never hardcoded in
639 // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less
640 // conflicts.
641 constexpr const unsigned kLoopCounterReg = X86::R8;
642
643 } // namespace
644
addTargetSpecificPasses(PassManagerBase & PM) const645 void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const {
646 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
647 PM.add(createX86FloatingPointStackifierPass());
648 }
649
getScratchMemoryRegister(const Triple & TT) const650 unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const {
651 if (!TT.isArch64Bit()) {
652 // FIXME: This would require popping from the stack, so we would have to
653 // add some additional setup code.
654 return 0;
655 }
656 return TT.isOSWindows() ? X86::RCX : X86::RDI;
657 }
658
getLoopCounterRegister(const Triple & TT) const659 unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const {
660 if (!TT.isArch64Bit()) {
661 return 0;
662 }
663 return kLoopCounterReg;
664 }
665
randomizeTargetMCOperand(const Instruction & Instr,const Variable & Var,MCOperand & AssignedValue,const BitVector & ForbiddenRegs) const666 Error ExegesisX86Target::randomizeTargetMCOperand(
667 const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
668 const BitVector &ForbiddenRegs) const {
669 const Operand &Op = Instr.getPrimaryOperand(Var);
670 switch (Op.getExplicitOperandInfo().OperandType) {
671 case X86::OperandType::OPERAND_ROUNDING_CONTROL:
672 AssignedValue =
673 MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::TO_ZERO));
674 return Error::success();
675 default:
676 break;
677 }
678 return make_error<Failure>(
679 Twine("unimplemented operand type ")
680 .concat(Twine(Op.getExplicitOperandInfo().OperandType)));
681 }
682
fillMemoryOperands(InstructionTemplate & IT,unsigned Reg,unsigned Offset) const683 void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
684 unsigned Reg,
685 unsigned Offset) const {
686 assert(!isInvalidMemoryInstr(IT.getInstr()) &&
687 "fillMemoryOperands requires a valid memory instruction");
688 int MemOpIdx = X86II::getMemoryOperandNo(IT.getInstr().Description.TSFlags);
689 assert(MemOpIdx >= 0 && "invalid memory operand index");
690 // getMemoryOperandNo() ignores tied operands, so we have to add them back.
691 MemOpIdx += X86II::getOperandBias(IT.getInstr().Description);
692 setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
693 setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
694 setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
695 setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
696 setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment
697 }
698
decrementLoopCounterAndJump(MachineBasicBlock & MBB,MachineBasicBlock & TargetMBB,const MCInstrInfo & MII) const699 void ExegesisX86Target::decrementLoopCounterAndJump(
700 MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
701 const MCInstrInfo &MII) const {
702 BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8))
703 .addDef(kLoopCounterReg)
704 .addUse(kLoopCounterReg)
705 .addImm(-1);
706 BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1))
707 .addMBB(&TargetMBB)
708 .addImm(X86::COND_NE);
709 }
710
setRegTo(const MCSubtargetInfo & STI,unsigned Reg,const APInt & Value) const711 std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
712 unsigned Reg,
713 const APInt &Value) const {
714 if (X86::GR8RegClass.contains(Reg))
715 return {loadImmediate(Reg, 8, Value)};
716 if (X86::GR16RegClass.contains(Reg))
717 return {loadImmediate(Reg, 16, Value)};
718 if (X86::GR32RegClass.contains(Reg))
719 return {loadImmediate(Reg, 32, Value)};
720 if (X86::GR64RegClass.contains(Reg))
721 return {loadImmediate(Reg, 64, Value)};
722 ConstantInliner CI(Value);
723 if (X86::VR64RegClass.contains(Reg))
724 return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm);
725 if (X86::VR128XRegClass.contains(Reg)) {
726 if (STI.getFeatureBits()[X86::FeatureAVX512])
727 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm);
728 if (STI.getFeatureBits()[X86::FeatureAVX])
729 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
730 return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
731 }
732 if (X86::VR256XRegClass.contains(Reg)) {
733 if (STI.getFeatureBits()[X86::FeatureAVX512])
734 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm);
735 if (STI.getFeatureBits()[X86::FeatureAVX])
736 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
737 }
738 if (X86::VR512RegClass.contains(Reg))
739 if (STI.getFeatureBits()[X86::FeatureAVX512])
740 return CI.loadAndFinalize(Reg, 512, X86::VMOVDQU32Zrm);
741 if (X86::RSTRegClass.contains(Reg)) {
742 return CI.loadX87STAndFinalize(Reg);
743 }
744 if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) ||
745 X86::RFP80RegClass.contains(Reg)) {
746 return CI.loadX87FPAndFinalize(Reg);
747 }
748 if (Reg == X86::EFLAGS)
749 return CI.popFlagAndFinalize();
750 if (Reg == X86::MXCSR)
751 return CI.loadImplicitRegAndFinalize(
752 STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR,
753 0x1f80);
754 if (Reg == X86::FPCW)
755 return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f);
756 return {}; // Not yet implemented.
757 }
758
759 // Instruction can have some variable operands, and we may want to see how
760 // different operands affect performance. So for each operand position,
761 // precompute all the possible choices we might care about,
762 // and greedily generate all the possible combinations of choices.
generateInstructionVariants(const Instruction & Instr,unsigned MaxConfigsPerOpcode) const763 std::vector<InstructionTemplate> ExegesisX86Target::generateInstructionVariants(
764 const Instruction &Instr, unsigned MaxConfigsPerOpcode) const {
765 bool Exploration = false;
766 SmallVector<SmallVector<MCOperand, 1>, 4> VariableChoices;
767 VariableChoices.resize(Instr.Variables.size());
768 for (auto I : llvm::zip(Instr.Variables, VariableChoices)) {
769 const Variable &Var = std::get<0>(I);
770 SmallVectorImpl<MCOperand> &Choices = std::get<1>(I);
771
772 switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) {
773 default:
774 // We don't wish to explicitly explore this variable.
775 Choices.emplace_back(); // But add invalid MCOperand to simplify logic.
776 continue;
777 case X86::OperandType::OPERAND_COND_CODE: {
778 Exploration = true;
779 auto CondCodes = seq((int)X86::CondCode::COND_O,
780 1 + (int)X86::CondCode::LAST_VALID_COND);
781 Choices.reserve(std::distance(CondCodes.begin(), CondCodes.end()));
782 for (int CondCode : CondCodes)
783 Choices.emplace_back(MCOperand::createImm(CondCode));
784 break;
785 }
786 }
787 }
788
789 // If we don't wish to explore any variables, defer to the baseline method.
790 if (!Exploration)
791 return ExegesisTarget::generateInstructionVariants(Instr,
792 MaxConfigsPerOpcode);
793
794 std::vector<InstructionTemplate> Variants;
795 size_t NumVariants;
796 CombinationGenerator<MCOperand, decltype(VariableChoices)::value_type, 4> G(
797 VariableChoices);
798
799 // How many operand combinations can we produce, within the limit?
800 NumVariants = std::min(G.numCombinations(), (size_t)MaxConfigsPerOpcode);
801 // And actually produce all the wanted operand combinations.
802 Variants.reserve(NumVariants);
803 G.generate([&](ArrayRef<MCOperand> State) -> bool {
804 Variants.emplace_back(&Instr);
805 Variants.back().setVariableValues(State);
806 // Did we run out of space for variants?
807 return Variants.size() >= NumVariants;
808 });
809
810 assert(Variants.size() == NumVariants &&
811 Variants.size() <= MaxConfigsPerOpcode &&
812 "Should not produce too many variants");
813 return Variants;
814 }
815
getTheExegesisX86Target()816 static ExegesisTarget *getTheExegesisX86Target() {
817 static ExegesisX86Target Target;
818 return &Target;
819 }
820
InitializeX86ExegesisTarget()821 void InitializeX86ExegesisTarget() {
822 ExegesisTarget::registerTarget(getTheExegesisX86Target());
823 }
824
825 } // namespace exegesis
826 } // namespace llvm
827