1 //===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the inline assembler pieces of the AsmPrinter class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/CodeGen/AsmPrinter.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineModuleInfo.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 #include "llvm/IR/InlineAsm.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCInstrInfo.h"
31 #include "llvm/MC/MCParser/MCAsmLexer.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/SourceMgr.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetMachine.h"
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "asm-printer"
44 
addInlineAsmDiagBuffer(StringRef AsmStr,const MDNode * LocMDNode) const45 unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
46                                             const MDNode *LocMDNode) const {
47   MCContext &Context = MMI->getContext();
48   Context.initInlineSourceManager();
49   SourceMgr &SrcMgr = *Context.getInlineSourceManager();
50   std::vector<const MDNode *> &LocInfos = Context.getLocInfos();
51 
52   std::unique_ptr<MemoryBuffer> Buffer;
53   // The inline asm source manager will outlive AsmStr, so make a copy of the
54   // string for SourceMgr to own.
55   Buffer = MemoryBuffer::getMemBufferCopy(AsmStr, "<inline asm>");
56 
57   // Tell SrcMgr about this buffer, it takes ownership of the buffer.
58   unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
59 
60   // Store LocMDNode in DiagInfo, using BufNum as an identifier.
61   if (LocMDNode) {
62     LocInfos.resize(BufNum);
63     LocInfos[BufNum - 1] = LocMDNode;
64   }
65 
66   return BufNum;
67 }
68 
69 
70 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
emitInlineAsm(StringRef Str,const MCSubtargetInfo & STI,const MCTargetOptions & MCOptions,const MDNode * LocMDNode,InlineAsm::AsmDialect Dialect) const71 void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
72                                const MCTargetOptions &MCOptions,
73                                const MDNode *LocMDNode,
74                                InlineAsm::AsmDialect Dialect) const {
75   assert(!Str.empty() && "Can't emit empty inline asm block");
76 
77   // Remember if the buffer is nul terminated or not so we can avoid a copy.
78   bool isNullTerminated = Str.back() == 0;
79   if (isNullTerminated)
80     Str = Str.substr(0, Str.size()-1);
81 
82   // If the output streamer does not have mature MC support or the integrated
83   // assembler has been disabled or not required, just emit the blob textually.
84   // Otherwise parse the asm and emit it via MC support.
85   // This is useful in case the asm parser doesn't handle something but the
86   // system assembler does.
87   const MCAsmInfo *MCAI = TM.getMCAsmInfo();
88   assert(MCAI && "No MCAsmInfo");
89   if (!MCAI->useIntegratedAssembler() &&
90       !MCAI->parseInlineAsmUsingAsmParser() &&
91       !OutStreamer->isIntegratedAssemblerRequired()) {
92     emitInlineAsmStart();
93     OutStreamer->emitRawText(Str);
94     emitInlineAsmEnd(STI, nullptr);
95     return;
96   }
97 
98   unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
99   SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager();
100   SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
101 
102   std::unique_ptr<MCAsmParser> Parser(
103       createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
104 
105   // Do not use assembler-level information for parsing inline assembly.
106   OutStreamer->setUseAssemblerInfoForParsing(false);
107 
108   // We create a new MCInstrInfo here since we might be at the module level
109   // and not have a MachineFunction to initialize the TargetInstrInfo from and
110   // we only need MCInstrInfo for asm parsing. We create one unconditionally
111   // because it's not subtarget dependent.
112   std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
113   assert(MII && "Failed to create instruction info");
114   std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
115       STI, *Parser, *MII, MCOptions));
116   if (!TAP)
117     report_fatal_error("Inline asm not supported by this streamer because"
118                        " we don't have an asm parser for this target\n");
119   Parser->setAssemblerDialect(Dialect);
120   Parser->setTargetParser(*TAP);
121   // Enable lexing Masm binary and hex integer literals in intel inline
122   // assembly.
123   if (Dialect == InlineAsm::AD_Intel)
124     Parser->getLexer().setLexMasmIntegers(true);
125 
126   emitInlineAsmStart();
127   // Don't implicitly switch to the text section before the asm.
128   (void)Parser->Run(/*NoInitialTextSection*/ true,
129                     /*NoFinalize*/ true);
130   emitInlineAsmEnd(STI, &TAP->getSTI());
131 }
132 
EmitInlineAsmStr(const char * AsmStr,const MachineInstr * MI,MachineModuleInfo * MMI,const MCAsmInfo * MAI,AsmPrinter * AP,uint64_t LocCookie,raw_ostream & OS)133 static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
134                              MachineModuleInfo *MMI, const MCAsmInfo *MAI,
135                              AsmPrinter *AP, uint64_t LocCookie,
136                              raw_ostream &OS) {
137   bool InputIsIntelDialect = MI->getInlineAsmDialect() == InlineAsm::AD_Intel;
138 
139   if (InputIsIntelDialect) {
140     // Switch to the inline assembly variant.
141     OS << "\t.intel_syntax\n\t";
142   }
143 
144   int CurVariant = -1; // The number of the {.|.|.} region we are in.
145   const char *LastEmitted = AsmStr; // One past the last character emitted.
146   unsigned NumOperands = MI->getNumOperands();
147 
148   int AsmPrinterVariant;
149   if (InputIsIntelDialect)
150     AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
151   else
152     AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
153 
154   // FIXME: Should this happen for `asm inteldialect` as well?
155   if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker())
156     OS << '\t';
157 
158   while (*LastEmitted) {
159     switch (*LastEmitted) {
160     default: {
161       // Not a special case, emit the string section literally.
162       const char *LiteralEnd = LastEmitted+1;
163       while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
164              *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
165         ++LiteralEnd;
166       if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
167         OS.write(LastEmitted, LiteralEnd - LastEmitted);
168       LastEmitted = LiteralEnd;
169       break;
170     }
171     case '\n':
172       ++LastEmitted;   // Consume newline character.
173       OS << '\n';      // Indent code with newline.
174       break;
175     case '$': {
176       ++LastEmitted;   // Consume '$' character.
177       bool Done = true;
178 
179       // Handle escapes.
180       switch (*LastEmitted) {
181       default: Done = false; break;
182       case '$':     // $$ -> $
183         if (!InputIsIntelDialect)
184           if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
185             OS << '$';
186         ++LastEmitted;  // Consume second '$' character.
187         break;
188       case '(':        // $( -> same as GCC's { character.
189         ++LastEmitted; // Consume '(' character.
190         if (CurVariant != -1)
191           report_fatal_error("Nested variants found in inline asm string: '" +
192                              Twine(AsmStr) + "'");
193         CurVariant = 0; // We're in the first variant now.
194         break;
195       case '|':
196         ++LastEmitted; // Consume '|' character.
197         if (CurVariant == -1)
198           OS << '|'; // This is gcc's behavior for | outside a variant.
199         else
200           ++CurVariant; // We're in the next variant.
201         break;
202       case ')':        // $) -> same as GCC's } char.
203         ++LastEmitted; // Consume ')' character.
204         if (CurVariant == -1)
205           OS << '}'; // This is gcc's behavior for } outside a variant.
206         else
207           CurVariant = -1;
208         break;
209       }
210       if (Done) break;
211 
212       bool HasCurlyBraces = false;
213       if (*LastEmitted == '{') {     // ${variable}
214         ++LastEmitted;               // Consume '{' character.
215         HasCurlyBraces = true;
216       }
217 
218       // If we have ${:foo}, then this is not a real operand reference, it is a
219       // "magic" string reference, just like in .td files.  Arrange to call
220       // PrintSpecial.
221       if (HasCurlyBraces && *LastEmitted == ':') {
222         ++LastEmitted;
223         const char *StrStart = LastEmitted;
224         const char *StrEnd = strchr(StrStart, '}');
225         if (!StrEnd)
226           report_fatal_error("Unterminated ${:foo} operand in inline asm"
227                              " string: '" + Twine(AsmStr) + "'");
228         if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
229           AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
230         LastEmitted = StrEnd+1;
231         break;
232       }
233 
234       const char *IDStart = LastEmitted;
235       const char *IDEnd = IDStart;
236       while (isDigit(*IDEnd))
237         ++IDEnd;
238 
239       unsigned Val;
240       if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
241         report_fatal_error("Bad $ operand number in inline asm string: '" +
242                            Twine(AsmStr) + "'");
243       LastEmitted = IDEnd;
244 
245       if (Val >= NumOperands - 1)
246         report_fatal_error("Invalid $ operand number in inline asm string: '" +
247                            Twine(AsmStr) + "'");
248 
249       char Modifier[2] = { 0, 0 };
250 
251       if (HasCurlyBraces) {
252         // If we have curly braces, check for a modifier character.  This
253         // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
254         if (*LastEmitted == ':') {
255           ++LastEmitted;    // Consume ':' character.
256           if (*LastEmitted == 0)
257             report_fatal_error("Bad ${:} expression in inline asm string: '" +
258                                Twine(AsmStr) + "'");
259 
260           Modifier[0] = *LastEmitted;
261           ++LastEmitted;    // Consume modifier character.
262         }
263 
264         if (*LastEmitted != '}')
265           report_fatal_error("Bad ${} expression in inline asm string: '" +
266                              Twine(AsmStr) + "'");
267         ++LastEmitted;    // Consume '}' character.
268       }
269 
270       // Okay, we finally have a value number.  Ask the target to print this
271       // operand!
272       if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
273         unsigned OpNo = InlineAsm::MIOp_FirstOperand;
274 
275         bool Error = false;
276 
277         // Scan to find the machine operand number for the operand.
278         for (; Val; --Val) {
279           if (OpNo >= MI->getNumOperands())
280             break;
281           const InlineAsm::Flag F(MI->getOperand(OpNo).getImm());
282           OpNo += F.getNumOperandRegisters() + 1;
283         }
284 
285         // We may have a location metadata attached to the end of the
286         // instruction, and at no point should see metadata at any
287         // other point while processing. It's an error if so.
288         if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
289           Error = true;
290         } else {
291           const InlineAsm::Flag F(MI->getOperand(OpNo).getImm());
292           ++OpNo; // Skip over the ID number.
293 
294           // FIXME: Shouldn't arch-independent output template handling go into
295           // PrintAsmOperand?
296           // Labels are target independent.
297           if (MI->getOperand(OpNo).isBlockAddress()) {
298             const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
299             MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
300             Sym->print(OS, AP->MAI);
301             MMI->getContext().registerInlineAsmLabel(Sym);
302           } else if (MI->getOperand(OpNo).isMBB()) {
303             const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
304             Sym->print(OS, AP->MAI);
305           } else if (F.isMemKind()) {
306             Error = AP->PrintAsmMemoryOperand(
307                 MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
308           } else {
309             Error = AP->PrintAsmOperand(MI, OpNo,
310                                         Modifier[0] ? Modifier : nullptr, OS);
311           }
312         }
313         if (Error) {
314           std::string msg;
315           raw_string_ostream Msg(msg);
316           Msg << "invalid operand in inline asm: '" << AsmStr << "'";
317           MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
318         }
319       }
320       break;
321     }
322     }
323   }
324   if (InputIsIntelDialect)
325     OS << "\n\t.att_syntax";
326   OS << '\n' << (char)0;  // null terminate string.
327 }
328 
329 /// This method formats and emits the specified machine instruction that is an
330 /// inline asm.
emitInlineAsm(const MachineInstr * MI) const331 void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
332   assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
333 
334   // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
335   const char *AsmStr = MI->getOperand(0).getSymbolName();
336 
337   // If this asmstr is empty, just print the #APP/#NOAPP markers.
338   // These are useful to see where empty asm's wound up.
339   if (AsmStr[0] == 0) {
340     OutStreamer->emitRawComment(MAI->getInlineAsmStart());
341     OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
342     return;
343   }
344 
345   // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
346   // enabled, so we use emitRawComment.
347   OutStreamer->emitRawComment(MAI->getInlineAsmStart());
348 
349   // Get the !srcloc metadata node if we have it, and decode the loc cookie from
350   // it.
351   uint64_t LocCookie = 0;
352   const MDNode *LocMD = nullptr;
353   for (const MachineOperand &MO : llvm::reverse(MI->operands())) {
354     if (MO.isMetadata() && (LocMD = MO.getMetadata()) &&
355         LocMD->getNumOperands() != 0) {
356       if (const ConstantInt *CI =
357               mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
358         LocCookie = CI->getZExtValue();
359         break;
360       }
361     }
362   }
363 
364   // Emit the inline asm to a temporary string so we can emit it through
365   // EmitInlineAsm.
366   SmallString<256> StringData;
367   raw_svector_ostream OS(StringData);
368 
369   AsmPrinter *AP = const_cast<AsmPrinter*>(this);
370   EmitInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
371 
372   // Emit warnings if we use reserved registers on the clobber list, as
373   // that might lead to undefined behaviour.
374   SmallVector<Register, 8> RestrRegs;
375   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
376   // Start with the first operand descriptor, and iterate over them.
377   for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands();
378        I < NumOps; ++I) {
379     const MachineOperand &MO = MI->getOperand(I);
380     if (!MO.isImm())
381       continue;
382     const InlineAsm::Flag F(MO.getImm());
383     if (F.isClobberKind()) {
384       Register Reg = MI->getOperand(I + 1).getReg();
385       if (!TRI->isAsmClobberable(*MF, Reg))
386         RestrRegs.push_back(Reg);
387     }
388     // Skip to one before the next operand descriptor, if it exists.
389     I += F.getNumOperandRegisters();
390   }
391 
392   if (!RestrRegs.empty()) {
393     std::string Msg = "inline asm clobber list contains reserved registers: ";
394     ListSeparator LS;
395     for (const Register RR : RestrRegs) {
396       Msg += LS;
397       Msg += TRI->getRegAsmName(RR);
398     }
399     const char *Note =
400         "Reserved registers on the clobber list may not be "
401         "preserved across the asm statement, and clobbering them may "
402         "lead to undefined behaviour.";
403     MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
404         LocCookie, Msg, DiagnosticSeverity::DS_Warning));
405     MMI->getModule()->getContext().diagnose(
406         DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));
407 
408     for (const Register RR : RestrRegs) {
409       if (std::optional<std::string> reason =
410               TRI->explainReservedReg(*MF, RR)) {
411         MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
412             LocCookie, *reason, DiagnosticSeverity::DS_Note));
413       }
414     }
415   }
416 
417   emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
418                 MI->getInlineAsmDialect());
419 
420   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
421   // enabled, so we use emitRawComment.
422   OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
423 }
424 
425 /// PrintSpecial - Print information related to the specified machine instr
426 /// that is independent of the operand, and may be independent of the instr
427 /// itself.  This can be useful for portably encoding the comment character
428 /// or other bits of target-specific knowledge into the asmstrings.  The
429 /// syntax used is ${:comment}.  Targets can override this to add support
430 /// for their own strange codes.
PrintSpecial(const MachineInstr * MI,raw_ostream & OS,StringRef Code) const431 void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
432                               StringRef Code) const {
433   if (Code == "private") {
434     const DataLayout &DL = MF->getDataLayout();
435     OS << DL.getPrivateGlobalPrefix();
436   } else if (Code == "comment") {
437     OS << MAI->getCommentString();
438   } else if (Code == "uid") {
439     // Comparing the address of MI isn't sufficient, because machineinstrs may
440     // be allocated to the same address across functions.
441 
442     // If this is a new LastFn instruction, bump the counter.
443     if (LastMI != MI || LastFn != getFunctionNumber()) {
444       ++Counter;
445       LastMI = MI;
446       LastFn = getFunctionNumber();
447     }
448     OS << Counter;
449   } else {
450     std::string msg;
451     raw_string_ostream Msg(msg);
452     Msg << "Unknown special formatter '" << Code
453          << "' for machine instr: " << *MI;
454     report_fatal_error(Twine(Msg.str()));
455   }
456 }
457 
PrintSymbolOperand(const MachineOperand & MO,raw_ostream & OS)458 void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) {
459   assert(MO.isGlobal() && "caller should check MO.isGlobal");
460   getSymbolPreferLocal(*MO.getGlobal())->print(OS, MAI);
461   printOffset(MO.getOffset(), OS);
462 }
463 
464 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
465 /// instruction, using the specified assembler variant.  Targets should
466 /// override this to format as appropriate for machine specific ExtraCodes
467 /// or when the arch-independent handling would be too complex otherwise.
PrintAsmOperand(const MachineInstr * MI,unsigned OpNo,const char * ExtraCode,raw_ostream & O)468 bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
469                                  const char *ExtraCode, raw_ostream &O) {
470   // Does this asm operand have a single letter operand modifier?
471   if (ExtraCode && ExtraCode[0]) {
472     if (ExtraCode[1] != 0) return true; // Unknown modifier.
473 
474     // https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html
475     const MachineOperand &MO = MI->getOperand(OpNo);
476     switch (ExtraCode[0]) {
477     default:
478       return true;  // Unknown modifier.
479     case 'a': // Print as memory address.
480       if (MO.isReg()) {
481         PrintAsmMemoryOperand(MI, OpNo, nullptr, O);
482         return false;
483       }
484       [[fallthrough]]; // GCC allows '%a' to behave like '%c' with immediates.
485     case 'c': // Substitute immediate value without immediate syntax
486       if (MO.isImm()) {
487         O << MO.getImm();
488         return false;
489       }
490       if (MO.isGlobal()) {
491         PrintSymbolOperand(MO, O);
492         return false;
493       }
494       return true;
495     case 'n':  // Negate the immediate constant.
496       if (!MO.isImm())
497         return true;
498       O << -MO.getImm();
499       return false;
500     case 's':  // The GCC deprecated s modifier
501       if (!MO.isImm())
502         return true;
503       O << ((32 - MO.getImm()) & 31);
504       return false;
505     }
506   }
507   return true;
508 }
509 
PrintAsmMemoryOperand(const MachineInstr * MI,unsigned OpNo,const char * ExtraCode,raw_ostream & O)510 bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
511                                        const char *ExtraCode, raw_ostream &O) {
512   // Target doesn't support this yet!
513   return true;
514 }
515 
emitInlineAsmStart() const516 void AsmPrinter::emitInlineAsmStart() const {}
517 
emitInlineAsmEnd(const MCSubtargetInfo & StartInfo,const MCSubtargetInfo * EndInfo) const518 void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
519                                   const MCSubtargetInfo *EndInfo) const {}
520