1 //===--- DLangDemangle.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines a demangler for the D programming language as specified
11 /// in the ABI specification, available at:
12 /// https://dlang.org/spec/abi.html#name_mangling
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/Demangle.h"
17 #include "llvm/Demangle/StringViewExtras.h"
18 #include "llvm/Demangle/Utility.h"
19 
20 #include <cctype>
21 #include <cstring>
22 #include <limits>
23 #include <string_view>
24 
25 using namespace llvm;
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::starts_with;
28 
29 namespace {
30 
31 /// Demangle information structure.
32 struct Demangler {
33   /// Initialize the information structure we use to pass around information.
34   ///
35   /// \param Mangled String to demangle.
36   Demangler(std::string_view Mangled);
37 
38   /// Extract and demangle the mangled symbol and append it to the output
39   /// string.
40   ///
41   /// \param Demangled Output buffer to write the demangled name.
42   ///
43   /// \return The remaining string on success or nullptr on failure.
44   ///
45   /// \see https://dlang.org/spec/abi.html#name_mangling .
46   /// \see https://dlang.org/spec/abi.html#MangledName .
47   const char *parseMangle(OutputBuffer *Demangled);
48 
49 private:
50   /// Extract and demangle a given mangled symbol and append it to the output
51   /// string.
52   ///
53   /// \param Demangled output buffer to write the demangled name.
54   /// \param Mangled mangled symbol to be demangled.
55   ///
56   /// \see https://dlang.org/spec/abi.html#name_mangling .
57   /// \see https://dlang.org/spec/abi.html#MangledName .
58   void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled);
59 
60   /// Extract the number from a given string.
61   ///
62   /// \param Mangled string to extract the number.
63   /// \param Ret assigned result value.
64   ///
65   /// \note Ret larger than UINT_MAX is considered a failure.
66   ///
67   /// \see https://dlang.org/spec/abi.html#Number .
68   void decodeNumber(std::string_view &Mangled, unsigned long &Ret);
69 
70   /// Extract the back reference position from a given string.
71   ///
72   /// \param Mangled string to extract the back reference position.
73   /// \param Ret assigned result value.
74   ///
75   /// \return true on success, false on error.
76   ///
77   /// \note Ret is always >= 0 on success, and unspecified on failure
78   ///
79   /// \see https://dlang.org/spec/abi.html#back_ref .
80   /// \see https://dlang.org/spec/abi.html#NumberBackRef .
81   bool decodeBackrefPos(std::string_view &Mangled, long &Ret);
82 
83   /// Extract the symbol pointed by the back reference form a given string.
84   ///
85   /// \param Mangled string to extract the back reference position.
86   /// \param Ret assigned result value.
87   ///
88   /// \return true on success, false on error.
89   ///
90   /// \see https://dlang.org/spec/abi.html#back_ref .
91   bool decodeBackref(std::string_view &Mangled, std::string_view &Ret);
92 
93   /// Extract and demangle backreferenced symbol from a given mangled symbol
94   /// and append it to the output string.
95   ///
96   /// \param Demangled output buffer to write the demangled name.
97   /// \param Mangled mangled symbol to be demangled.
98   ///
99   /// \see https://dlang.org/spec/abi.html#back_ref .
100   /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
101   void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled);
102 
103   /// Extract and demangle backreferenced type from a given mangled symbol
104   /// and append it to the output string.
105   ///
106   /// \param Mangled mangled symbol to be demangled.
107   ///
108   /// \see https://dlang.org/spec/abi.html#back_ref .
109   /// \see https://dlang.org/spec/abi.html#TypeBackRef .
110   void parseTypeBackref(std::string_view &Mangled);
111 
112   /// Check whether it is the beginning of a symbol name.
113   ///
114   /// \param Mangled string to extract the symbol name.
115   ///
116   /// \return true on success, false otherwise.
117   ///
118   /// \see https://dlang.org/spec/abi.html#SymbolName .
119   bool isSymbolName(std::string_view Mangled);
120 
121   /// Extract and demangle an identifier from a given mangled symbol append it
122   /// to the output string.
123   ///
124   /// \param Demangled Output buffer to write the demangled name.
125   /// \param Mangled Mangled symbol to be demangled.
126   ///
127   /// \see https://dlang.org/spec/abi.html#SymbolName .
128   void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled);
129 
130   /// Extract and demangle the plain identifier from a given mangled symbol and
131   /// prepend/append it to the output string, with a special treatment for some
132   /// magic compiler generated symbols.
133   ///
134   /// \param Demangled Output buffer to write the demangled name.
135   /// \param Mangled Mangled symbol to be demangled.
136   /// \param Len Length of the mangled symbol name.
137   ///
138   /// \see https://dlang.org/spec/abi.html#LName .
139   void parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
140                   unsigned long Len);
141 
142   /// Extract and demangle the qualified symbol from a given mangled symbol
143   /// append it to the output string.
144   ///
145   /// \param Demangled Output buffer to write the demangled name.
146   /// \param Mangled Mangled symbol to be demangled.
147   ///
148   /// \see https://dlang.org/spec/abi.html#QualifiedName .
149   void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled);
150 
151   /// Extract and demangle a type from a given mangled symbol append it to
152   /// the output string.
153   ///
154   /// \param Mangled mangled symbol to be demangled.
155   ///
156   /// \return true on success, false on error.
157   ///
158   /// \see https://dlang.org/spec/abi.html#Type .
159   bool parseType(std::string_view &Mangled);
160 
161   /// An immutable view of the string we are demangling.
162   const std::string_view Str;
163   /// The index of the last back reference.
164   int LastBackref;
165 };
166 
167 } // namespace
168 
169 void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) {
170   // Clear Mangled if trying to extract something that isn't a digit.
171   if (Mangled.empty()) {
172     Mangled = {};
173     return;
174   }
175 
176   if (!std::isdigit(Mangled.front())) {
177     Mangled = {};
178     return;
179   }
180 
181   unsigned long Val = 0;
182 
183   do {
184     unsigned long Digit = Mangled[0] - '0';
185 
186     // Check for overflow.
187     if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) {
188       Mangled = {};
189       return;
190     }
191 
192     Val = Val * 10 + Digit;
193     Mangled.remove_prefix(1);
194   } while (!Mangled.empty() && std::isdigit(Mangled.front()));
195 
196   if (Mangled.empty()) {
197     Mangled = {};
198     return;
199   }
200 
201   Ret = Val;
202 }
203 
204 bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) {
205   // Return nullptr if trying to extract something that isn't a digit
206   if (Mangled.empty()) {
207     Mangled = {};
208     return false;
209   }
210   // Any identifier or non-basic type that has been emitted to the mangled
211   // symbol before will not be emitted again, but is referenced by a special
212   // sequence encoding the relative position of the original occurrence in the
213   // mangled symbol name.
214   // Numbers in back references are encoded with base 26 by upper case letters
215   // A-Z for higher digits but lower case letters a-z for the last digit.
216   //    NumberBackRef:
217   //        [a-z]
218   //        [A-Z] NumberBackRef
219   //        ^
220   unsigned long Val = 0;
221 
222   while (!Mangled.empty() && std::isalpha(Mangled.front())) {
223     // Check for overflow
224     if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
225       break;
226 
227     Val *= 26;
228 
229     if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
230       Val += Mangled[0] - 'a';
231       if ((long)Val <= 0)
232         break;
233       Ret = Val;
234       Mangled.remove_prefix(1);
235       return true;
236     }
237 
238     Val += Mangled[0] - 'A';
239     Mangled.remove_prefix(1);
240   }
241 
242   Mangled = {};
243   return false;
244 }
245 
246 bool Demangler::decodeBackref(std::string_view &Mangled,
247                               std::string_view &Ret) {
248   assert(!Mangled.empty() && Mangled.front() == 'Q' &&
249          "Invalid back reference!");
250   Ret = {};
251 
252   // Position of 'Q'
253   const char *Qpos = Mangled.data();
254   long RefPos;
255   Mangled.remove_prefix(1);
256 
257   if (!decodeBackrefPos(Mangled, RefPos)) {
258     Mangled = {};
259     return false;
260   }
261 
262   if (RefPos > Qpos - Str.data()) {
263     Mangled = {};
264     return false;
265   }
266 
267   // Set the position of the back reference.
268   Ret = Qpos - RefPos;
269 
270   return true;
271 }
272 
273 void Demangler::parseSymbolBackref(OutputBuffer *Demangled,
274                                    std::string_view &Mangled) {
275   // An identifier back reference always points to a digit 0 to 9.
276   //    IdentifierBackRef:
277   //        Q NumberBackRef
278   //        ^
279   unsigned long Len;
280 
281   // Get position of the back reference
282   std::string_view Backref;
283   if (!decodeBackref(Mangled, Backref)) {
284     Mangled = {};
285     return;
286   }
287 
288   // Must point to a simple identifier
289   decodeNumber(Backref, Len);
290   if (Backref.empty() || Backref.length() < Len) {
291     Mangled = {};
292     return;
293   }
294 
295   parseLName(Demangled, Backref, Len);
296   if (Backref.empty())
297     Mangled = {};
298 }
299 
300 void Demangler::parseTypeBackref(std::string_view &Mangled) {
301   // A type back reference always points to a letter.
302   //    TypeBackRef:
303   //        Q NumberBackRef
304   //        ^
305 
306   // If we appear to be moving backwards through the mangle string, then
307   // bail as this may be a recursive back reference.
308   if (Mangled.data() - Str.data() >= LastBackref) {
309     Mangled = {};
310     return;
311   }
312 
313   int SaveRefPos = LastBackref;
314   LastBackref = Mangled.data() - Str.data();
315 
316   // Get position of the back reference.
317   std::string_view Backref;
318   if (!decodeBackref(Mangled, Backref)) {
319     Mangled = {};
320     return;
321   }
322 
323   // Can't decode back reference.
324   if (Backref.empty()) {
325     Mangled = {};
326     return;
327   }
328 
329   // TODO: Add support for function type back references.
330   if (!parseType(Backref))
331     Mangled = {};
332 
333   LastBackref = SaveRefPos;
334 
335   if (Backref.empty())
336     Mangled = {};
337 }
338 
339 bool Demangler::isSymbolName(std::string_view Mangled) {
340   long Ret;
341   const char *Qref = Mangled.data();
342 
343   if (std::isdigit(Mangled.front()))
344     return true;
345 
346   // TODO: Handle template instances.
347 
348   if (Mangled.front() != 'Q')
349     return false;
350 
351   Mangled.remove_prefix(1);
352   bool Valid = decodeBackrefPos(Mangled, Ret);
353   if (!Valid || Ret > Qref - Str.data())
354     return false;
355 
356   return std::isdigit(Qref[-Ret]);
357 }
358 
359 void Demangler::parseMangle(OutputBuffer *Demangled,
360                             std::string_view &Mangled) {
361   // A D mangled symbol is comprised of both scope and type information.
362   //    MangleName:
363   //        _D QualifiedName Type
364   //        _D QualifiedName Z
365   //        ^
366   // The caller should have guaranteed that the start pointer is at the
367   // above location.
368   // Note that type is never a function type, but only the return type of
369   // a function or the type of a variable.
370   Mangled.remove_prefix(2);
371 
372   parseQualified(Demangled, Mangled);
373 
374   if (Mangled.empty()) {
375     Mangled = {};
376     return;
377   }
378 
379   // Artificial symbols end with 'Z' and have no type.
380   if (Mangled.front() == 'Z') {
381     Mangled.remove_prefix(1);
382   } else if (!parseType(Mangled))
383     Mangled = {};
384 }
385 
386 void Demangler::parseQualified(OutputBuffer *Demangled,
387                                std::string_view &Mangled) {
388   // Qualified names are identifiers separated by their encoded length.
389   // Nested functions also encode their argument types without specifying
390   // what they return.
391   //    QualifiedName:
392   //        SymbolFunctionName
393   //        SymbolFunctionName QualifiedName
394   //        ^
395   //    SymbolFunctionName:
396   //        SymbolName
397   //        SymbolName TypeFunctionNoReturn
398   //        SymbolName M TypeFunctionNoReturn
399   //        SymbolName M TypeModifiers TypeFunctionNoReturn
400   // The start pointer should be at the above location.
401 
402   // Whether it has more than one symbol
403   size_t NotFirst = false;
404   do {
405     // Skip over anonymous symbols.
406     if (!Mangled.empty() && Mangled.front() == '0') {
407       do
408         Mangled.remove_prefix(1);
409       while (!Mangled.empty() && Mangled.front() == '0');
410 
411       continue;
412     }
413 
414     if (NotFirst)
415       *Demangled << '.';
416     NotFirst = true;
417 
418     parseIdentifier(Demangled, Mangled);
419   } while (!Mangled.empty() && isSymbolName(Mangled));
420 }
421 
422 void Demangler::parseIdentifier(OutputBuffer *Demangled,
423                                 std::string_view &Mangled) {
424   if (Mangled.empty()) {
425     Mangled = {};
426     return;
427   }
428 
429   if (Mangled.front() == 'Q')
430     return parseSymbolBackref(Demangled, Mangled);
431 
432   // TODO: Parse lengthless template instances.
433 
434   unsigned long Len;
435   decodeNumber(Mangled, Len);
436 
437   if (Mangled.empty()) {
438     Mangled = {};
439     return;
440   }
441   if (!Len || Mangled.length() < Len) {
442     Mangled = {};
443     return;
444   }
445 
446   // TODO: Parse template instances with a length prefix.
447 
448   // There can be multiple different declarations in the same function that
449   // have the same mangled name.  To make the mangled names unique, a fake
450   // parent in the form `__Sddd' is added to the symbol.
451   if (Len >= 4 && starts_with(Mangled, "__S")) {
452     const size_t SuffixLen = Mangled.length() - Len;
453     std::string_view P = Mangled.substr(3);
454     while (P.length() > SuffixLen && std::isdigit(P.front()))
455       P.remove_prefix(1);
456     if (P.length() == SuffixLen) {
457       // Skip over the fake parent.
458       Mangled.remove_prefix(Len);
459       return parseIdentifier(Demangled, Mangled);
460     }
461 
462     // Else demangle it as a plain identifier.
463   }
464 
465   parseLName(Demangled, Mangled, Len);
466 }
467 
468 bool Demangler::parseType(std::string_view &Mangled) {
469   if (Mangled.empty()) {
470     Mangled = {};
471     return false;
472   }
473 
474   switch (Mangled.front()) {
475   // TODO: Parse type qualifiers.
476   // TODO: Parse function types.
477   // TODO: Parse compound types.
478   // TODO: Parse delegate types.
479   // TODO: Parse tuple types.
480 
481   // Basic types.
482   case 'i':
483     Mangled.remove_prefix(1);
484     // TODO: Add type name dumping
485     return true;
486 
487     // TODO: Add support for the rest of the basic types.
488 
489   // Back referenced type.
490   case 'Q': {
491     parseTypeBackref(Mangled);
492     return true;
493   }
494 
495   default: // unhandled.
496     Mangled = {};
497     return false;
498   }
499 }
500 
501 void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
502                            unsigned long Len) {
503   switch (Len) {
504   case 6:
505     if (starts_with(Mangled, "__initZ")) {
506       // The static initializer for a given symbol.
507       Demangled->prepend("initializer for ");
508       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
509       Mangled.remove_prefix(Len);
510       return;
511     }
512     if (starts_with(Mangled, "__vtblZ")) {
513       // The vtable symbol for a given class.
514       Demangled->prepend("vtable for ");
515       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
516       Mangled.remove_prefix(Len);
517       return;
518     }
519     break;
520 
521   case 7:
522     if (starts_with(Mangled, "__ClassZ")) {
523       // The classinfo symbol for a given class.
524       Demangled->prepend("ClassInfo for ");
525       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
526       Mangled.remove_prefix(Len);
527       return;
528     }
529     break;
530 
531   case 11:
532     if (starts_with(Mangled, "__InterfaceZ")) {
533       // The interface symbol for a given class.
534       Demangled->prepend("Interface for ");
535       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
536       Mangled.remove_prefix(Len);
537       return;
538     }
539     break;
540 
541   case 12:
542     if (starts_with(Mangled, "__ModuleInfoZ")) {
543       // The ModuleInfo symbol for a given module.
544       Demangled->prepend("ModuleInfo for ");
545       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
546       Mangled.remove_prefix(Len);
547       return;
548     }
549     break;
550   }
551 
552   *Demangled << Mangled.substr(0, Len);
553   Mangled.remove_prefix(Len);
554 }
555 
556 Demangler::Demangler(std::string_view Mangled)
557     : Str(Mangled), LastBackref(Mangled.length()) {}
558 
559 const char *Demangler::parseMangle(OutputBuffer *Demangled) {
560   std::string_view M(this->Str);
561   parseMangle(Demangled, M);
562   return M.data();
563 }
564 
565 char *llvm::dlangDemangle(std::string_view MangledName) {
566   if (MangledName.empty() || !starts_with(MangledName, "_D"))
567     return nullptr;
568 
569   OutputBuffer Demangled;
570   if (MangledName == "_Dmain") {
571     Demangled << "D main";
572   } else {
573 
574     Demangler D(MangledName);
575     const char *M = D.parseMangle(&Demangled);
576 
577     // Check that the entire symbol was successfully demangled.
578     if (M == nullptr || *M != '\0') {
579       std::free(Demangled.getBuffer());
580       return nullptr;
581     }
582   }
583 
584   // OutputBuffer's internal buffer is not null terminated and therefore we need
585   // to add it to comply with C null terminated strings.
586   if (Demangled.getCurrentPosition() > 0) {
587     Demangled << '\0';
588     Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
589     return Demangled.getBuffer();
590   }
591 
592   std::free(Demangled.getBuffer());
593   return nullptr;
594 }
595