1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CodeGenInstruction.h"
16 #include "CodeGenTarget.h"
17 #include "SubtargetFeatureInfo.h"
18 #include "Types.h"
19 #include "VarLenCodeEmitterGen.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/TableGen/Error.h"
26 #include "llvm/TableGen/Record.h"
27 #include "llvm/TableGen/TableGenBackend.h"
28 #include <cassert>
29 #include <cstdint>
30 #include <map>
31 #include <set>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 using namespace llvm;
37 
38 namespace {
39 
40 class CodeEmitterGen {
41   RecordKeeper &Records;
42 
43 public:
44   CodeEmitterGen(RecordKeeper &R) : Records(R) {}
45 
46   void run(raw_ostream &o);
47 
48 private:
49   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
50   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
51   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
52                                             CodeGenTarget &Target);
53   void AddCodeToMergeInOperand(Record *R, BitsInit *BI,
54                                const std::string &VarName,
55                                unsigned &NumberedOp,
56                                std::set<unsigned> &NamedOpIndices,
57                                std::string &Case, CodeGenTarget &Target);
58 
59   void emitInstructionBaseValues(
60       raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
61       CodeGenTarget &Target, int HwMode = -1);
62   unsigned BitWidth;
63   bool UseAPInt;
64 };
65 
66 // If the VarBitInit at position 'bit' matches the specified variable then
67 // return the variable bit position.  Otherwise return -1.
68 int CodeEmitterGen::getVariableBit(const std::string &VarName,
69                                    BitsInit *BI, int bit) {
70   if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
71     if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
72       if (VI->getName() == VarName)
73         return VBI->getBitNum();
74   } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
75     if (VI->getName() == VarName)
76       return 0;
77   }
78 
79   return -1;
80 }
81 
82 void CodeEmitterGen::
83 AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
84                         unsigned &NumberedOp,
85                         std::set<unsigned> &NamedOpIndices,
86                         std::string &Case, CodeGenTarget &Target) {
87   CodeGenInstruction &CGI = Target.getInstruction(R);
88 
89   // Determine if VarName actually contributes to the Inst encoding.
90   int bit = BI->getNumBits()-1;
91 
92   // Scan for a bit that this contributed to.
93   for (; bit >= 0; ) {
94     if (getVariableBit(VarName, BI, bit) != -1)
95       break;
96 
97     --bit;
98   }
99 
100   // If we found no bits, ignore this value, otherwise emit the call to get the
101   // operand encoding.
102   if (bit < 0) return;
103 
104   // If the operand matches by name, reference according to that
105   // operand number. Non-matching operands are assumed to be in
106   // order.
107   unsigned OpIdx;
108   if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
109     // Get the machine operand number for the indicated operand.
110     OpIdx = CGI.Operands[OpIdx].MIOperandNo;
111     assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) &&
112            "Explicitly used operand also marked as not emitted!");
113   } else {
114     unsigned NumberOps = CGI.Operands.size();
115     /// If this operand is not supposed to be emitted by the
116     /// generated emitter, skip it.
117     while (NumberedOp < NumberOps &&
118            (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
119               (!NamedOpIndices.empty() && NamedOpIndices.count(
120                 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
121       ++NumberedOp;
122     }
123 
124     if (NumberedOp >=
125         CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
126       std::string E;
127       raw_string_ostream S(E);
128       S << "Too few operands in record " << R->getName()
129         << " (no match for variable " << VarName << "):\n";
130       S << *R;
131       PrintFatalError(R, E);
132     }
133 
134     OpIdx = NumberedOp++;
135   }
136 
137   std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
138   std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName;
139 
140   if (UseAPInt)
141     Case += "      op.clearAllBits();\n";
142 
143   // If the source operand has a custom encoder, use it. This will
144   // get the encoding for all of the suboperands.
145   if (!EncoderMethodName.empty()) {
146     // A custom encoder has all of the information for the
147     // sub-operands, if there are more than one, so only
148     // query the encoder once per source operand.
149     if (SO.second == 0) {
150       Case += "      // op: " + VarName + "\n";
151       if (UseAPInt) {
152         Case += "      " + EncoderMethodName + "(MI, " + utostr(OpIdx);
153         Case += ", op";
154       } else {
155         Case += "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
156       }
157       Case += ", Fixups, STI);\n";
158     }
159   } else {
160     Case += "      // op: " + VarName + "\n";
161     if (UseAPInt) {
162       Case += "      getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
163       Case += ", op, Fixups, STI";
164     } else {
165       Case += "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
166       Case += ", Fixups, STI";
167     }
168     Case += ");\n";
169   }
170 
171   // Precalculate the number of lits this variable contributes to in the
172   // operand. If there is a single lit (consecutive range of bits) we can use a
173   // destructive sequence on APInt that reduces memory allocations.
174   int numOperandLits = 0;
175   for (int tmpBit = bit; tmpBit >= 0;) {
176     int varBit = getVariableBit(VarName, BI, tmpBit);
177 
178     // If this bit isn't from a variable, skip it.
179     if (varBit == -1) {
180       --tmpBit;
181       continue;
182     }
183 
184     // Figure out the consecutive range of bits covered by this operand, in
185     // order to generate better encoding code.
186     int beginVarBit = varBit;
187     int N = 1;
188     for (--tmpBit; tmpBit >= 0;) {
189       varBit = getVariableBit(VarName, BI, tmpBit);
190       if (varBit == -1 || varBit != (beginVarBit - N))
191         break;
192       ++N;
193       --tmpBit;
194     }
195     ++numOperandLits;
196   }
197 
198   for (; bit >= 0; ) {
199     int varBit = getVariableBit(VarName, BI, bit);
200 
201     // If this bit isn't from a variable, skip it.
202     if (varBit == -1) {
203       --bit;
204       continue;
205     }
206 
207     // Figure out the consecutive range of bits covered by this operand, in
208     // order to generate better encoding code.
209     int beginInstBit = bit;
210     int beginVarBit = varBit;
211     int N = 1;
212     for (--bit; bit >= 0;) {
213       varBit = getVariableBit(VarName, BI, bit);
214       if (varBit == -1 || varBit != (beginVarBit - N)) break;
215       ++N;
216       --bit;
217     }
218 
219     std::string maskStr;
220     int opShift;
221 
222     unsigned loBit = beginVarBit - N + 1;
223     unsigned hiBit = loBit + N;
224     unsigned loInstBit = beginInstBit - N + 1;
225     if (UseAPInt) {
226       std::string extractStr;
227       if (N >= 64) {
228         extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
229                      itostr(loBit) + ")";
230         Case += "      Value.insertBits(" + extractStr + ", " +
231                 itostr(loInstBit) + ");\n";
232       } else {
233         extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
234                      ", " + itostr(loBit) + ")";
235         Case += "      Value.insertBits(" + extractStr + ", " +
236                 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
237       }
238     } else {
239       uint64_t opMask = ~(uint64_t)0 >> (64 - N);
240       opShift = beginVarBit - N + 1;
241       opMask <<= opShift;
242       maskStr = "UINT64_C(" + utostr(opMask) + ")";
243       opShift = beginInstBit - beginVarBit;
244 
245       if (numOperandLits == 1) {
246         Case += "      op &= " + maskStr + ";\n";
247         if (opShift > 0) {
248           Case += "      op <<= " + itostr(opShift) + ";\n";
249         } else if (opShift < 0) {
250           Case += "      op >>= " + itostr(-opShift) + ";\n";
251         }
252         Case += "      Value |= op;\n";
253       } else {
254         if (opShift > 0) {
255           Case += "      Value |= (op & " + maskStr + ") << " +
256                   itostr(opShift) + ";\n";
257         } else if (opShift < 0) {
258           Case += "      Value |= (op & " + maskStr + ") >> " +
259                   itostr(-opShift) + ";\n";
260         } else {
261           Case += "      Value |= (op & " + maskStr + ");\n";
262         }
263       }
264     }
265   }
266 }
267 
268 std::string CodeEmitterGen::getInstructionCase(Record *R,
269                                                CodeGenTarget &Target) {
270   std::string Case;
271   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
272     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
273       const CodeGenHwModes &HWM = Target.getHwModes();
274       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
275       Case += "      switch (HwMode) {\n";
276       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
277       for (auto &KV : EBM) {
278         Case += "      case " + itostr(KV.first) + ": {\n";
279         Case += getInstructionCaseForEncoding(R, KV.second, Target);
280         Case += "      break;\n";
281         Case += "      }\n";
282       }
283       Case += "      }\n";
284       return Case;
285     }
286   }
287   return getInstructionCaseForEncoding(R, R, Target);
288 }
289 
290 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
291                                                           CodeGenTarget &Target) {
292   std::string Case;
293   BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
294   unsigned NumberedOp = 0;
295   std::set<unsigned> NamedOpIndices;
296 
297   // Collect the set of operand indices that might correspond to named
298   // operand, and skip these when assigning operands based on position.
299   if (Target.getInstructionSet()->
300        getValueAsBit("noNamedPositionallyEncodedOperands")) {
301     CodeGenInstruction &CGI = Target.getInstruction(R);
302     for (const RecordVal &RV : R->getValues()) {
303       unsigned OpIdx;
304       if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
305         continue;
306 
307       NamedOpIndices.insert(OpIdx);
308     }
309   }
310 
311   // Loop over all of the fields in the instruction, determining which are the
312   // operands to the instruction.
313   for (const RecordVal &RV : EncodingDef->getValues()) {
314     // Ignore fixed fields in the record, we're looking for values like:
315     //    bits<5> RST = { ?, ?, ?, ?, ? };
316     if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
317       continue;
318 
319     AddCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
320                             NamedOpIndices, Case, Target);
321   }
322 
323   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
324   if (!PostEmitter.empty()) {
325     Case += "      Value = ";
326     Case += PostEmitter;
327     Case += "(MI, Value";
328     Case += ", STI";
329     Case += ");\n";
330   }
331 
332   return Case;
333 }
334 
335 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
336   for (unsigned I = 0; I < Bits.getNumWords(); ++I)
337     OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
338        << ")";
339 }
340 
341 void CodeEmitterGen::emitInstructionBaseValues(
342     raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
343     CodeGenTarget &Target, int HwMode) {
344   const CodeGenHwModes &HWM = Target.getHwModes();
345   if (HwMode == -1)
346     o << "  static const uint64_t InstBits[] = {\n";
347   else
348     o << "  static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
349       << "[] = {\n";
350 
351   for (const CodeGenInstruction *CGI : NumberedInstructions) {
352     Record *R = CGI->TheDef;
353 
354     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
355         R->getValueAsBit("isPseudo")) {
356       o << "    "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
357       continue;
358     }
359 
360     Record *EncodingDef = R;
361     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
362       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
363         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
364         if (EBM.hasMode(HwMode))
365           EncodingDef = EBM.get(HwMode);
366       }
367     }
368     BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
369 
370     // Start by filling in fixed values.
371     APInt Value(BitWidth, 0);
372     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
373       if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e - i - 1)))
374         Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1);
375     }
376     o << "    ";
377     emitInstBits(o, Value);
378     o << "," << '\t' << "// " << R->getName() << "\n";
379   }
380   o << "    UINT64_C(0)\n  };\n";
381 }
382 
383 void CodeEmitterGen::run(raw_ostream &o) {
384   CodeGenTarget Target(Records);
385   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
386 
387   // For little-endian instruction bit encodings, reverse the bit order
388   Target.reverseBitsForLittleEndianEncoding();
389 
390   ArrayRef<const CodeGenInstruction*> NumberedInstructions =
391     Target.getInstructionsByEnumValue();
392 
393   if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
394         Record *R = CGI->TheDef;
395         return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
396       })) {
397     emitVarLenCodeEmitter(Records, o);
398   } else {
399     const CodeGenHwModes &HWM = Target.getHwModes();
400     // The set of HwModes used by instruction encodings.
401     std::set<unsigned> HwModes;
402     BitWidth = 0;
403     for (const CodeGenInstruction *CGI : NumberedInstructions) {
404       Record *R = CGI->TheDef;
405       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
406           R->getValueAsBit("isPseudo"))
407         continue;
408 
409       if (const RecordVal *RV = R->getValue("EncodingInfos")) {
410         if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
411           EncodingInfoByHwMode EBM(DI->getDef(), HWM);
412           for (auto &KV : EBM) {
413             BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
414             BitWidth = std::max(BitWidth, BI->getNumBits());
415             HwModes.insert(KV.first);
416           }
417           continue;
418         }
419       }
420       BitsInit *BI = R->getValueAsBitsInit("Inst");
421       BitWidth = std::max(BitWidth, BI->getNumBits());
422     }
423     UseAPInt = BitWidth > 64;
424 
425     // Emit function declaration
426     if (UseAPInt) {
427       o << "void " << Target.getName()
428         << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
429         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
430         << "    APInt &Inst,\n"
431         << "    APInt &Scratch,\n"
432         << "    const MCSubtargetInfo &STI) const {\n";
433     } else {
434       o << "uint64_t " << Target.getName();
435       o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
436         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
437         << "    const MCSubtargetInfo &STI) const {\n";
438     }
439 
440     // Emit instruction base values
441     if (HwModes.empty()) {
442       emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
443     } else {
444       for (unsigned HwMode : HwModes)
445         emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
446     }
447 
448     if (!HwModes.empty()) {
449       o << "  const uint64_t *InstBits;\n";
450       o << "  unsigned HwMode = STI.getHwMode();\n";
451       o << "  switch (HwMode) {\n";
452       o << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
453       for (unsigned I : HwModes) {
454         o << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
455           << "; break;\n";
456       }
457       o << "  };\n";
458     }
459 
460     // Map to accumulate all the cases.
461     std::map<std::string, std::vector<std::string>> CaseMap;
462 
463     // Construct all cases statement for each opcode
464     for (Record *R : Insts) {
465       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
466           R->getValueAsBit("isPseudo"))
467         continue;
468       std::string InstName =
469           (R->getValueAsString("Namespace") + "::" + R->getName()).str();
470       std::string Case = getInstructionCase(R, Target);
471 
472       CaseMap[Case].push_back(std::move(InstName));
473     }
474 
475     // Emit initial function code
476     if (UseAPInt) {
477       int NumWords = APInt::getNumWords(BitWidth);
478       o << "  const unsigned opcode = MI.getOpcode();\n"
479         << "  if (Scratch.getBitWidth() != " << BitWidth << ")\n"
480         << "    Scratch = Scratch.zext(" << BitWidth << ");\n"
481         << "  Inst = APInt(" << BitWidth
482         << ", makeArrayRef(InstBits + opcode * " << NumWords << ", " << NumWords
483         << "));\n"
484         << "  APInt &Value = Inst;\n"
485         << "  APInt &op = Scratch;\n"
486         << "  switch (opcode) {\n";
487     } else {
488       o << "  const unsigned opcode = MI.getOpcode();\n"
489         << "  uint64_t Value = InstBits[opcode];\n"
490         << "  uint64_t op = 0;\n"
491         << "  (void)op;  // suppress warning\n"
492         << "  switch (opcode) {\n";
493     }
494 
495     // Emit each case statement
496     std::map<std::string, std::vector<std::string>>::iterator IE, EE;
497     for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
498       const std::string &Case = IE->first;
499       std::vector<std::string> &InstList = IE->second;
500 
501       for (int i = 0, N = InstList.size(); i < N; i++) {
502         if (i)
503           o << "\n";
504         o << "    case " << InstList[i] << ":";
505       }
506       o << " {\n";
507       o << Case;
508       o << "      break;\n"
509         << "    }\n";
510     }
511 
512     // Default case: unhandled opcode
513     o << "  default:\n"
514       << "    std::string msg;\n"
515       << "    raw_string_ostream Msg(msg);\n"
516       << "    Msg << \"Not supported instr: \" << MI;\n"
517       << "    report_fatal_error(Msg.str().c_str());\n"
518       << "  }\n";
519     if (UseAPInt)
520       o << "  Inst = Value;\n";
521     else
522       o << "  return Value;\n";
523     o << "}\n\n";
524   }
525 }
526 
527 } // end anonymous namespace
528 
529 namespace llvm {
530 
531 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
532   emitSourceFileHeader("Machine Code Emitter", OS);
533   CodeEmitterGen(RK).run(OS);
534 }
535 
536 } // end namespace llvm
537