1 //===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains a printer that converts from our internal
11 /// representation of machine-dependent LLVM code to the WebAssembly assembly
12 /// language.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "WebAssemblyAsmPrinter.h"
17 #include "MCTargetDesc/WebAssemblyInstPrinter.h"
18 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
19 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
20 #include "TargetInfo/WebAssemblyTargetInfo.h"
21 #include "WebAssembly.h"
22 #include "WebAssemblyMCInstLower.h"
23 #include "WebAssemblyMachineFunctionInfo.h"
24 #include "WebAssemblyRegisterInfo.h"
25 #include "WebAssemblyTargetMachine.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/BinaryFormat/Wasm.h"
29 #include "llvm/CodeGen/Analysis.h"
30 #include "llvm/CodeGen/AsmPrinter.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineInstr.h"
33 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DebugInfoMetadata.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/MC/MCContext.h"
39 #include "llvm/MC/MCSectionWasm.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSymbol.h"
42 #include "llvm/MC/MCSymbolWasm.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/TargetRegistry.h"
45 #include "llvm/Support/raw_ostream.h"
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "asm-printer"
50 
51 extern cl::opt<bool> WasmKeepRegisters;
52 extern cl::opt<bool> EnableEmException;
53 extern cl::opt<bool> EnableEmSjLj;
54 
55 //===----------------------------------------------------------------------===//
56 // Helpers.
57 //===----------------------------------------------------------------------===//
58 
59 MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
60   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
61   const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
62   for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16,
63                 MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64})
64     if (TRI->isTypeLegalForClass(*TRC, T))
65       return T;
66   LLVM_DEBUG(errs() << "Unknown type for register number: " << RegNo);
67   llvm_unreachable("Unknown register type");
68   return MVT::Other;
69 }
70 
71 std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
72   Register RegNo = MO.getReg();
73   assert(Register::isVirtualRegister(RegNo) &&
74          "Unlowered physical register encountered during assembly printing");
75   assert(!MFI->isVRegStackified(RegNo));
76   unsigned WAReg = MFI->getWAReg(RegNo);
77   assert(WAReg != WebAssemblyFunctionInfo::UnusedReg);
78   return '$' + utostr(WAReg);
79 }
80 
81 WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
82   MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
83   return static_cast<WebAssemblyTargetStreamer *>(TS);
84 }
85 
86 // Emscripten exception handling helpers
87 //
88 // This converts invoke names generated by LowerEmscriptenEHSjLj to real names
89 // that are expected by JavaScript glue code. The invoke names generated by
90 // Emscripten JS glue code are based on their argument and return types; for
91 // example, for a function that takes an i32 and returns nothing, it is
92 // 'invoke_vi'. But the format of invoke generated by LowerEmscriptenEHSjLj pass
93 // contains a mangled string generated from their IR types, for example,
94 // "__invoke_void_%struct.mystruct*_int", because final wasm types are not
95 // available in the IR pass. So we convert those names to the form that
96 // Emscripten JS code expects.
97 //
98 // Refer to LowerEmscriptenEHSjLj pass for more details.
99 
100 // Returns true if the given function name is an invoke name generated by
101 // LowerEmscriptenEHSjLj pass.
102 static bool isEmscriptenInvokeName(StringRef Name) {
103   if (Name.front() == '"' && Name.back() == '"')
104     Name = Name.substr(1, Name.size() - 2);
105   return Name.startswith("__invoke_");
106 }
107 
108 // Returns a character that represents the given wasm value type in invoke
109 // signatures.
110 static char getInvokeSig(wasm::ValType VT) {
111   switch (VT) {
112   case wasm::ValType::I32:
113     return 'i';
114   case wasm::ValType::I64:
115     return 'j';
116   case wasm::ValType::F32:
117     return 'f';
118   case wasm::ValType::F64:
119     return 'd';
120   case wasm::ValType::V128:
121     return 'V';
122   case wasm::ValType::FUNCREF:
123     return 'F';
124   case wasm::ValType::EXTERNREF:
125     return 'X';
126   }
127   llvm_unreachable("Unhandled wasm::ValType enum");
128 }
129 
130 // Given the wasm signature, generate the invoke name in the format JS glue code
131 // expects.
132 static std::string getEmscriptenInvokeSymbolName(wasm::WasmSignature *Sig) {
133   assert(Sig->Returns.size() <= 1);
134   std::string Ret = "invoke_";
135   if (!Sig->Returns.empty())
136     for (auto VT : Sig->Returns)
137       Ret += getInvokeSig(VT);
138   else
139     Ret += 'v';
140   // Invokes' first argument is a pointer to the original function, so skip it
141   for (unsigned I = 1, E = Sig->Params.size(); I < E; I++)
142     Ret += getInvokeSig(Sig->Params[I]);
143   return Ret;
144 }
145 
146 //===----------------------------------------------------------------------===//
147 // WebAssemblyAsmPrinter Implementation.
148 //===----------------------------------------------------------------------===//
149 
150 MCSymbolWasm *WebAssemblyAsmPrinter::getMCSymbolForFunction(
151     const Function *F, bool EnableEmEH, wasm::WasmSignature *Sig,
152     bool &InvokeDetected) {
153   MCSymbolWasm *WasmSym = nullptr;
154   if (EnableEmEH && isEmscriptenInvokeName(F->getName())) {
155     assert(Sig);
156     InvokeDetected = true;
157     if (Sig->Returns.size() > 1) {
158       std::string Msg =
159           "Emscripten EH/SjLj does not support multivalue returns: " +
160           std::string(F->getName()) + ": " +
161           WebAssembly::signatureToString(Sig);
162       report_fatal_error(Msg);
163     }
164     WasmSym = cast<MCSymbolWasm>(
165         GetExternalSymbolSymbol(getEmscriptenInvokeSymbolName(Sig)));
166   } else {
167     WasmSym = cast<MCSymbolWasm>(getSymbol(F));
168   }
169   return WasmSym;
170 }
171 
172 void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
173   for (auto &It : OutContext.getSymbols()) {
174     // Emit a .globaltype and .eventtype declaration.
175     auto Sym = cast<MCSymbolWasm>(It.getValue());
176     if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_GLOBAL)
177       getTargetStreamer()->emitGlobalType(Sym);
178     else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_EVENT)
179       getTargetStreamer()->emitEventType(Sym);
180   }
181 
182   DenseSet<MCSymbol *> InvokeSymbols;
183   for (const auto &F : M) {
184     if (F.isIntrinsic())
185       continue;
186 
187     // Emit function type info for all undefined functions
188     if (F.isDeclarationForLinker()) {
189       SmallVector<MVT, 4> Results;
190       SmallVector<MVT, 4> Params;
191       computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results);
192       // At this point these MCSymbols may or may not have been created already
193       // and thus also contain a signature, but we need to get the signature
194       // anyway here in case it is an invoke that has not yet been created. We
195       // will discard it later if it turns out not to be necessary.
196       auto Signature = signatureFromMVTs(Results, Params);
197       bool InvokeDetected = false;
198       auto *Sym = getMCSymbolForFunction(&F, EnableEmException || EnableEmSjLj,
199                                          Signature.get(), InvokeDetected);
200 
201       // Multiple functions can be mapped to the same invoke symbol. For
202       // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32'
203       // are both mapped to '__invoke_vi'. We keep them in a set once we emit an
204       // Emscripten EH symbol so we don't emit the same symbol twice.
205       if (InvokeDetected && !InvokeSymbols.insert(Sym).second)
206         continue;
207 
208       Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
209       if (!Sym->getSignature()) {
210         Sym->setSignature(Signature.get());
211         addSignature(std::move(Signature));
212       } else {
213         // This symbol has already been created and had a signature. Discard it.
214         Signature.reset();
215       }
216 
217       getTargetStreamer()->emitFunctionType(Sym);
218 
219       if (F.hasFnAttribute("wasm-import-module")) {
220         StringRef Name =
221             F.getFnAttribute("wasm-import-module").getValueAsString();
222         Sym->setImportModule(storeName(Name));
223         getTargetStreamer()->emitImportModule(Sym, Name);
224       }
225       if (F.hasFnAttribute("wasm-import-name")) {
226         // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use
227         // the original function name but the converted symbol name.
228         StringRef Name =
229             InvokeDetected
230                 ? Sym->getName()
231                 : F.getFnAttribute("wasm-import-name").getValueAsString();
232         Sym->setImportName(storeName(Name));
233         getTargetStreamer()->emitImportName(Sym, Name);
234       }
235     }
236 
237     if (F.hasFnAttribute("wasm-export-name")) {
238       auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
239       StringRef Name = F.getFnAttribute("wasm-export-name").getValueAsString();
240       Sym->setExportName(storeName(Name));
241       getTargetStreamer()->emitExportName(Sym, Name);
242     }
243   }
244 
245   for (const auto &G : M.globals()) {
246     if (!G.hasInitializer() && G.hasExternalLinkage()) {
247       if (G.getValueType()->isSized()) {
248         uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType());
249         OutStreamer->emitELFSize(getSymbol(&G),
250                                  MCConstantExpr::create(Size, OutContext));
251       }
252     }
253   }
254 
255   if (const NamedMDNode *Named = M.getNamedMetadata("wasm.custom_sections")) {
256     for (const Metadata *MD : Named->operands()) {
257       const auto *Tuple = dyn_cast<MDTuple>(MD);
258       if (!Tuple || Tuple->getNumOperands() != 2)
259         continue;
260       const MDString *Name = dyn_cast<MDString>(Tuple->getOperand(0));
261       const MDString *Contents = dyn_cast<MDString>(Tuple->getOperand(1));
262       if (!Name || !Contents)
263         continue;
264 
265       OutStreamer->PushSection();
266       std::string SectionName = (".custom_section." + Name->getString()).str();
267       MCSectionWasm *MySection =
268           OutContext.getWasmSection(SectionName, SectionKind::getMetadata());
269       OutStreamer->SwitchSection(MySection);
270       OutStreamer->emitBytes(Contents->getString());
271       OutStreamer->PopSection();
272     }
273   }
274 
275   EmitProducerInfo(M);
276   EmitTargetFeatures(M);
277 }
278 
279 void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
280   llvm::SmallVector<std::pair<std::string, std::string>, 4> Languages;
281   if (const NamedMDNode *Debug = M.getNamedMetadata("llvm.dbg.cu")) {
282     llvm::SmallSet<StringRef, 4> SeenLanguages;
283     for (size_t I = 0, E = Debug->getNumOperands(); I < E; ++I) {
284       const auto *CU = cast<DICompileUnit>(Debug->getOperand(I));
285       StringRef Language = dwarf::LanguageString(CU->getSourceLanguage());
286       Language.consume_front("DW_LANG_");
287       if (SeenLanguages.insert(Language).second)
288         Languages.emplace_back(Language.str(), "");
289     }
290   }
291 
292   llvm::SmallVector<std::pair<std::string, std::string>, 4> Tools;
293   if (const NamedMDNode *Ident = M.getNamedMetadata("llvm.ident")) {
294     llvm::SmallSet<StringRef, 4> SeenTools;
295     for (size_t I = 0, E = Ident->getNumOperands(); I < E; ++I) {
296       const auto *S = cast<MDString>(Ident->getOperand(I)->getOperand(0));
297       std::pair<StringRef, StringRef> Field = S->getString().split("version");
298       StringRef Name = Field.first.trim();
299       StringRef Version = Field.second.trim();
300       if (SeenTools.insert(Name).second)
301         Tools.emplace_back(Name.str(), Version.str());
302     }
303   }
304 
305   int FieldCount = int(!Languages.empty()) + int(!Tools.empty());
306   if (FieldCount != 0) {
307     MCSectionWasm *Producers = OutContext.getWasmSection(
308         ".custom_section.producers", SectionKind::getMetadata());
309     OutStreamer->PushSection();
310     OutStreamer->SwitchSection(Producers);
311     OutStreamer->emitULEB128IntValue(FieldCount);
312     for (auto &Producers : {std::make_pair("language", &Languages),
313             std::make_pair("processed-by", &Tools)}) {
314       if (Producers.second->empty())
315         continue;
316       OutStreamer->emitULEB128IntValue(strlen(Producers.first));
317       OutStreamer->emitBytes(Producers.first);
318       OutStreamer->emitULEB128IntValue(Producers.second->size());
319       for (auto &Producer : *Producers.second) {
320         OutStreamer->emitULEB128IntValue(Producer.first.size());
321         OutStreamer->emitBytes(Producer.first);
322         OutStreamer->emitULEB128IntValue(Producer.second.size());
323         OutStreamer->emitBytes(Producer.second);
324       }
325     }
326     OutStreamer->PopSection();
327   }
328 }
329 
330 void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
331   struct FeatureEntry {
332     uint8_t Prefix;
333     std::string Name;
334   };
335 
336   // Read target features and linkage policies from module metadata
337   SmallVector<FeatureEntry, 4> EmittedFeatures;
338   auto EmitFeature = [&](std::string Feature) {
339     std::string MDKey = (StringRef("wasm-feature-") + Feature).str();
340     Metadata *Policy = M.getModuleFlag(MDKey);
341     if (Policy == nullptr)
342       return;
343 
344     FeatureEntry Entry;
345     Entry.Prefix = 0;
346     Entry.Name = Feature;
347 
348     if (auto *MD = cast<ConstantAsMetadata>(Policy))
349       if (auto *I = cast<ConstantInt>(MD->getValue()))
350         Entry.Prefix = I->getZExtValue();
351 
352     // Silently ignore invalid metadata
353     if (Entry.Prefix != wasm::WASM_FEATURE_PREFIX_USED &&
354         Entry.Prefix != wasm::WASM_FEATURE_PREFIX_REQUIRED &&
355         Entry.Prefix != wasm::WASM_FEATURE_PREFIX_DISALLOWED)
356       return;
357 
358     EmittedFeatures.push_back(Entry);
359   };
360 
361   for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
362     EmitFeature(KV.Key);
363   }
364   // This pseudo-feature tells the linker whether shared memory would be safe
365   EmitFeature("shared-mem");
366 
367   if (EmittedFeatures.size() == 0)
368     return;
369 
370   // Emit features and linkage policies into the "target_features" section
371   MCSectionWasm *FeaturesSection = OutContext.getWasmSection(
372       ".custom_section.target_features", SectionKind::getMetadata());
373   OutStreamer->PushSection();
374   OutStreamer->SwitchSection(FeaturesSection);
375 
376   OutStreamer->emitULEB128IntValue(EmittedFeatures.size());
377   for (auto &F : EmittedFeatures) {
378     OutStreamer->emitIntValue(F.Prefix, 1);
379     OutStreamer->emitULEB128IntValue(F.Name.size());
380     OutStreamer->emitBytes(F.Name);
381   }
382 
383   OutStreamer->PopSection();
384 }
385 
386 void WebAssemblyAsmPrinter::emitConstantPool() {
387   assert(MF->getConstantPool()->getConstants().empty() &&
388          "WebAssembly disables constant pools");
389 }
390 
391 void WebAssemblyAsmPrinter::emitJumpTableInfo() {
392   // Nothing to do; jump tables are incorporated into the instruction stream.
393 }
394 
395 void WebAssemblyAsmPrinter::emitFunctionBodyStart() {
396   const Function &F = MF->getFunction();
397   SmallVector<MVT, 1> ResultVTs;
398   SmallVector<MVT, 4> ParamVTs;
399   computeSignatureVTs(F.getFunctionType(), &F, F, TM, ParamVTs, ResultVTs);
400 
401   auto Signature = signatureFromMVTs(ResultVTs, ParamVTs);
402   auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
403   WasmSym->setSignature(Signature.get());
404   addSignature(std::move(Signature));
405   WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
406 
407   getTargetStreamer()->emitFunctionType(WasmSym);
408 
409   // Emit the function index.
410   if (MDNode *Idx = F.getMetadata("wasm.index")) {
411     assert(Idx->getNumOperands() == 1);
412 
413     getTargetStreamer()->emitIndIdx(AsmPrinter::lowerConstant(
414         cast<ConstantAsMetadata>(Idx->getOperand(0))->getValue()));
415   }
416 
417   SmallVector<wasm::ValType, 16> Locals;
418   valTypesFromMVTs(MFI->getLocals(), Locals);
419   getTargetStreamer()->emitLocal(Locals);
420 
421   AsmPrinter::emitFunctionBodyStart();
422 }
423 
424 void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) {
425   LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
426 
427   switch (MI->getOpcode()) {
428   case WebAssembly::ARGUMENT_i32:
429   case WebAssembly::ARGUMENT_i32_S:
430   case WebAssembly::ARGUMENT_i64:
431   case WebAssembly::ARGUMENT_i64_S:
432   case WebAssembly::ARGUMENT_f32:
433   case WebAssembly::ARGUMENT_f32_S:
434   case WebAssembly::ARGUMENT_f64:
435   case WebAssembly::ARGUMENT_f64_S:
436   case WebAssembly::ARGUMENT_v16i8:
437   case WebAssembly::ARGUMENT_v16i8_S:
438   case WebAssembly::ARGUMENT_v8i16:
439   case WebAssembly::ARGUMENT_v8i16_S:
440   case WebAssembly::ARGUMENT_v4i32:
441   case WebAssembly::ARGUMENT_v4i32_S:
442   case WebAssembly::ARGUMENT_v2i64:
443   case WebAssembly::ARGUMENT_v2i64_S:
444   case WebAssembly::ARGUMENT_v4f32:
445   case WebAssembly::ARGUMENT_v4f32_S:
446   case WebAssembly::ARGUMENT_v2f64:
447   case WebAssembly::ARGUMENT_v2f64_S:
448     // These represent values which are live into the function entry, so there's
449     // no instruction to emit.
450     break;
451   case WebAssembly::FALLTHROUGH_RETURN: {
452     // These instructions represent the implicit return at the end of a
453     // function body.
454     if (isVerbose()) {
455       OutStreamer->AddComment("fallthrough-return");
456       OutStreamer->AddBlankLine();
457     }
458     break;
459   }
460   case WebAssembly::COMPILER_FENCE:
461     // This is a compiler barrier that prevents instruction reordering during
462     // backend compilation, and should not be emitted.
463     break;
464   default: {
465     WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
466     MCInst TmpInst;
467     MCInstLowering.lower(MI, TmpInst);
468     EmitToStreamer(*OutStreamer, TmpInst);
469     break;
470   }
471   }
472 }
473 
474 bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
475                                             unsigned OpNo,
476                                             const char *ExtraCode,
477                                             raw_ostream &OS) {
478   // First try the generic code, which knows about modifiers like 'c' and 'n'.
479   if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS))
480     return false;
481 
482   if (!ExtraCode) {
483     const MachineOperand &MO = MI->getOperand(OpNo);
484     switch (MO.getType()) {
485     case MachineOperand::MO_Immediate:
486       OS << MO.getImm();
487       return false;
488     case MachineOperand::MO_Register:
489       // FIXME: only opcode that still contains registers, as required by
490       // MachineInstr::getDebugVariable().
491       assert(MI->getOpcode() == WebAssembly::INLINEASM);
492       OS << regToString(MO);
493       return false;
494     case MachineOperand::MO_GlobalAddress:
495       PrintSymbolOperand(MO, OS);
496       return false;
497     case MachineOperand::MO_ExternalSymbol:
498       GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI);
499       printOffset(MO.getOffset(), OS);
500       return false;
501     case MachineOperand::MO_MachineBasicBlock:
502       MO.getMBB()->getSymbol()->print(OS, MAI);
503       return false;
504     default:
505       break;
506     }
507   }
508 
509   return true;
510 }
511 
512 bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
513                                                   unsigned OpNo,
514                                                   const char *ExtraCode,
515                                                   raw_ostream &OS) {
516   // The current approach to inline asm is that "r" constraints are expressed
517   // as local indices, rather than values on the operand stack. This simplifies
518   // using "r" as it eliminates the need to push and pop the values in a
519   // particular order, however it also makes it impossible to have an "m"
520   // constraint. So we don't support it.
521 
522   return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
523 }
524 
525 // Force static initialization.
526 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmPrinter() {
527   RegisterAsmPrinter<WebAssemblyAsmPrinter> X(getTheWebAssemblyTarget32());
528   RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(getTheWebAssemblyTarget64());
529 }
530