1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringView.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 
26 using llvm::itanium_demangle::OutputStream;
27 using llvm::itanium_demangle::StringView;
28 using llvm::itanium_demangle::SwapAndRestore;
29 
30 namespace {
31 
32 struct Identifier {
33   StringView Name;
34   bool Punycode;
35 
empty__anonfdfe6f610111::Identifier36   bool empty() const { return Name.empty(); }
37 };
38 
39 enum class BasicType {
40   Bool,
41   Char,
42   I8,
43   I16,
44   I32,
45   I64,
46   I128,
47   ISize,
48   U8,
49   U16,
50   U32,
51   U64,
52   U128,
53   USize,
54   F32,
55   F64,
56   Str,
57   Placeholder,
58   Unit,
59   Variadic,
60   Never,
61 };
62 
63 enum class IsInType {
64   No,
65   Yes,
66 };
67 
68 enum class LeaveGenericsOpen {
69   No,
70   Yes,
71 };
72 
73 class Demangler {
74   // Maximum recursion level. Used to avoid stack overflow.
75   size_t MaxRecursionLevel;
76   // Current recursion level.
77   size_t RecursionLevel;
78   size_t BoundLifetimes;
79   // Input string that is being demangled with "_R" prefix removed.
80   StringView Input;
81   // Position in the input string.
82   size_t Position;
83   // When true, print methods append the output to the stream.
84   // When false, the output is suppressed.
85   bool Print;
86   // True if an error occurred.
87   bool Error;
88 
89 public:
90   // Demangled output.
91   OutputStream Output;
92 
93   Demangler(size_t MaxRecursionLevel = 500);
94 
95   bool demangle(StringView MangledName);
96 
97 private:
98   bool demanglePath(IsInType Type,
99                     LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100   void demangleImplPath(IsInType InType);
101   void demangleGenericArg();
102   void demangleType();
103   void demangleFnSig();
104   void demangleDynBounds();
105   void demangleDynTrait();
106   void demangleOptionalBinder();
107   void demangleConst();
108   void demangleConstInt();
109   void demangleConstBool();
110   void demangleConstChar();
111 
demangleBackref(Callable Demangler)112   template <typename Callable> void demangleBackref(Callable Demangler) {
113     uint64_t Backref = parseBase62Number();
114     if (Error || Backref >= Position) {
115       Error = true;
116       return;
117     }
118 
119     if (!Print)
120       return;
121 
122     SwapAndRestore<size_t> SavePosition(Position, Position);
123     Position = Backref;
124     Demangler();
125   }
126 
127   Identifier parseIdentifier();
128   uint64_t parseOptionalBase62Number(char Tag);
129   uint64_t parseBase62Number();
130   uint64_t parseDecimalNumber();
131   uint64_t parseHexNumber(StringView &HexDigits);
132 
133   void print(char C);
134   void print(StringView S);
135   void printDecimalNumber(uint64_t N);
136   void printBasicType(BasicType);
137   void printLifetime(uint64_t Index);
138 
139   char look() const;
140   char consume();
141   bool consumeIf(char Prefix);
142 
143   bool addAssign(uint64_t &A, uint64_t B);
144   bool mulAssign(uint64_t &A, uint64_t B);
145 };
146 
147 } // namespace
148 
rustDemangle(const char * MangledName,char * Buf,size_t * N,int * Status)149 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
150                          int *Status) {
151   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
152     if (Status != nullptr)
153       *Status = demangle_invalid_args;
154     return nullptr;
155   }
156 
157   // Return early if mangled name doesn't look like a Rust symbol.
158   StringView Mangled(MangledName);
159   if (!Mangled.startsWith("_R")) {
160     if (Status != nullptr)
161       *Status = demangle_invalid_mangled_name;
162     return nullptr;
163   }
164 
165   Demangler D;
166   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
167     if (Status != nullptr)
168       *Status = demangle_memory_alloc_failure;
169     return nullptr;
170   }
171 
172   if (!D.demangle(Mangled)) {
173     if (Status != nullptr)
174       *Status = demangle_invalid_mangled_name;
175     std::free(D.Output.getBuffer());
176     return nullptr;
177   }
178 
179   D.Output += '\0';
180   char *Demangled = D.Output.getBuffer();
181   size_t DemangledLen = D.Output.getCurrentPosition();
182 
183   if (Buf != nullptr) {
184     if (DemangledLen <= *N) {
185       std::memcpy(Buf, Demangled, DemangledLen);
186       std::free(Demangled);
187       Demangled = Buf;
188     } else {
189       std::free(Buf);
190     }
191   }
192 
193   if (N != nullptr)
194     *N = DemangledLen;
195 
196   if (Status != nullptr)
197     *Status = demangle_success;
198 
199   return Demangled;
200 }
201 
Demangler(size_t MaxRecursionLevel)202 Demangler::Demangler(size_t MaxRecursionLevel)
203     : MaxRecursionLevel(MaxRecursionLevel) {}
204 
isDigit(const char C)205 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
206 
isHexDigit(const char C)207 static inline bool isHexDigit(const char C) {
208   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
209 }
210 
isLower(const char C)211 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
212 
isUpper(const char C)213 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
214 
215 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)216 static inline bool isValid(const char C) {
217   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
218 }
219 
220 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
221 // otherwise. The demangled symbol is stored in Output field. It is
222 // responsibility of the caller to free the memory behind the output stream.
223 //
224 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)225 bool Demangler::demangle(StringView Mangled) {
226   Position = 0;
227   Error = false;
228   Print = true;
229   RecursionLevel = 0;
230   BoundLifetimes = 0;
231 
232   if (!Mangled.consumeFront("_R")) {
233     Error = true;
234     return false;
235   }
236   size_t Dot = Mangled.find('.');
237   Input = Mangled.substr(0, Dot);
238   StringView Suffix = Mangled.dropFront(Dot);
239 
240   demanglePath(IsInType::No);
241 
242   if (Position != Input.size()) {
243     SwapAndRestore<bool> SavePrint(Print, false);
244     demanglePath(IsInType::No);
245   }
246 
247   if (Position != Input.size())
248     Error = true;
249 
250   if (!Suffix.empty()) {
251     print(" (");
252     print(Suffix);
253     print(")");
254   }
255 
256   return !Error;
257 }
258 
259 // Demangles a path. InType indicates whether a path is inside a type. When
260 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
261 // output. Return value indicates whether generics arguments have been left
262 // open.
263 //
264 // <path> = "C" <identifier>               // crate root
265 //        | "M" <impl-path> <type>         // <T> (inherent impl)
266 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
267 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
268 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
269 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
270 //        | <backref>
271 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
272 // <ns> = "C"      // closure
273 //      | "S"      // shim
274 //      | <A-Z>    // other special namespaces
275 //      | <a-z>    // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)276 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
277   if (Error || RecursionLevel >= MaxRecursionLevel) {
278     Error = true;
279     return false;
280   }
281   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
282 
283   switch (consume()) {
284   case 'C': {
285     parseOptionalBase62Number('s');
286     Identifier Ident = parseIdentifier();
287     print(Ident.Name);
288     break;
289   }
290   case 'M': {
291     demangleImplPath(InType);
292     print("<");
293     demangleType();
294     print(">");
295     break;
296   }
297   case 'X': {
298     demangleImplPath(InType);
299     print("<");
300     demangleType();
301     print(" as ");
302     demanglePath(IsInType::Yes);
303     print(">");
304     break;
305   }
306   case 'Y': {
307     print("<");
308     demangleType();
309     print(" as ");
310     demanglePath(IsInType::Yes);
311     print(">");
312     break;
313   }
314   case 'N': {
315     char NS = consume();
316     if (!isLower(NS) && !isUpper(NS)) {
317       Error = true;
318       break;
319     }
320     demanglePath(InType);
321 
322     uint64_t Disambiguator = parseOptionalBase62Number('s');
323     Identifier Ident = parseIdentifier();
324 
325     if (isUpper(NS)) {
326       // Special namespaces
327       print("::{");
328       if (NS == 'C')
329         print("closure");
330       else if (NS == 'S')
331         print("shim");
332       else
333         print(NS);
334       if (!Ident.empty()) {
335         print(":");
336         print(Ident.Name);
337       }
338       print('#');
339       printDecimalNumber(Disambiguator);
340       print('}');
341     } else {
342       // Implementation internal namespaces.
343       if (!Ident.empty()) {
344         print("::");
345         print(Ident.Name);
346       }
347     }
348     break;
349   }
350   case 'I': {
351     demanglePath(InType);
352     // Omit "::" when in a type, where it is optional.
353     if (InType == IsInType::No)
354       print("::");
355     print("<");
356     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
357       if (I > 0)
358         print(", ");
359       demangleGenericArg();
360     }
361     if (LeaveOpen == LeaveGenericsOpen::Yes)
362       return true;
363     else
364       print(">");
365     break;
366   }
367   case 'B': {
368     bool IsOpen = false;
369     demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
370     return IsOpen;
371   }
372   default:
373     Error = true;
374     break;
375   }
376 
377   return false;
378 }
379 
380 // <impl-path> = [<disambiguator>] <path>
381 // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)382 void Demangler::demangleImplPath(IsInType InType) {
383   SwapAndRestore<bool> SavePrint(Print, false);
384   parseOptionalBase62Number('s');
385   demanglePath(InType);
386 }
387 
388 // <generic-arg> = <lifetime>
389 //               | <type>
390 //               | "K" <const>
391 // <lifetime> = "L" <base-62-number>
demangleGenericArg()392 void Demangler::demangleGenericArg() {
393   if (consumeIf('L'))
394     printLifetime(parseBase62Number());
395   else if (consumeIf('K'))
396     demangleConst();
397   else
398     demangleType();
399 }
400 
401 // <basic-type> = "a"      // i8
402 //              | "b"      // bool
403 //              | "c"      // char
404 //              | "d"      // f64
405 //              | "e"      // str
406 //              | "f"      // f32
407 //              | "h"      // u8
408 //              | "i"      // isize
409 //              | "j"      // usize
410 //              | "l"      // i32
411 //              | "m"      // u32
412 //              | "n"      // i128
413 //              | "o"      // u128
414 //              | "s"      // i16
415 //              | "t"      // u16
416 //              | "u"      // ()
417 //              | "v"      // ...
418 //              | "x"      // i64
419 //              | "y"      // u64
420 //              | "z"      // !
421 //              | "p"      // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)422 static bool parseBasicType(char C, BasicType &Type) {
423   switch (C) {
424   case 'a':
425     Type = BasicType::I8;
426     return true;
427   case 'b':
428     Type = BasicType::Bool;
429     return true;
430   case 'c':
431     Type = BasicType::Char;
432     return true;
433   case 'd':
434     Type = BasicType::F64;
435     return true;
436   case 'e':
437     Type = BasicType::Str;
438     return true;
439   case 'f':
440     Type = BasicType::F32;
441     return true;
442   case 'h':
443     Type = BasicType::U8;
444     return true;
445   case 'i':
446     Type = BasicType::ISize;
447     return true;
448   case 'j':
449     Type = BasicType::USize;
450     return true;
451   case 'l':
452     Type = BasicType::I32;
453     return true;
454   case 'm':
455     Type = BasicType::U32;
456     return true;
457   case 'n':
458     Type = BasicType::I128;
459     return true;
460   case 'o':
461     Type = BasicType::U128;
462     return true;
463   case 'p':
464     Type = BasicType::Placeholder;
465     return true;
466   case 's':
467     Type = BasicType::I16;
468     return true;
469   case 't':
470     Type = BasicType::U16;
471     return true;
472   case 'u':
473     Type = BasicType::Unit;
474     return true;
475   case 'v':
476     Type = BasicType::Variadic;
477     return true;
478   case 'x':
479     Type = BasicType::I64;
480     return true;
481   case 'y':
482     Type = BasicType::U64;
483     return true;
484   case 'z':
485     Type = BasicType::Never;
486     return true;
487   default:
488     return false;
489   }
490 }
491 
printBasicType(BasicType Type)492 void Demangler::printBasicType(BasicType Type) {
493   switch (Type) {
494   case BasicType::Bool:
495     print("bool");
496     break;
497   case BasicType::Char:
498     print("char");
499     break;
500   case BasicType::I8:
501     print("i8");
502     break;
503   case BasicType::I16:
504     print("i16");
505     break;
506   case BasicType::I32:
507     print("i32");
508     break;
509   case BasicType::I64:
510     print("i64");
511     break;
512   case BasicType::I128:
513     print("i128");
514     break;
515   case BasicType::ISize:
516     print("isize");
517     break;
518   case BasicType::U8:
519     print("u8");
520     break;
521   case BasicType::U16:
522     print("u16");
523     break;
524   case BasicType::U32:
525     print("u32");
526     break;
527   case BasicType::U64:
528     print("u64");
529     break;
530   case BasicType::U128:
531     print("u128");
532     break;
533   case BasicType::USize:
534     print("usize");
535     break;
536   case BasicType::F32:
537     print("f32");
538     break;
539   case BasicType::F64:
540     print("f64");
541     break;
542   case BasicType::Str:
543     print("str");
544     break;
545   case BasicType::Placeholder:
546     print("_");
547     break;
548   case BasicType::Unit:
549     print("()");
550     break;
551   case BasicType::Variadic:
552     print("...");
553     break;
554   case BasicType::Never:
555     print("!");
556     break;
557   }
558 }
559 
560 // <type> = | <basic-type>
561 //          | <path>                      // named type
562 //          | "A" <type> <const>          // [T; N]
563 //          | "S" <type>                  // [T]
564 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
565 //          | "R" [<lifetime>] <type>     // &T
566 //          | "Q" [<lifetime>] <type>     // &mut T
567 //          | "P" <type>                  // *const T
568 //          | "O" <type>                  // *mut T
569 //          | "F" <fn-sig>                // fn(...) -> ...
570 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
571 //          | <backref>                   // backref
demangleType()572 void Demangler::demangleType() {
573   if (Error || RecursionLevel >= MaxRecursionLevel) {
574     Error = true;
575     return;
576   }
577   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
578 
579   size_t Start = Position;
580   char C = consume();
581   BasicType Type;
582   if (parseBasicType(C, Type))
583     return printBasicType(Type);
584 
585   switch (C) {
586   case 'A':
587     print("[");
588     demangleType();
589     print("; ");
590     demangleConst();
591     print("]");
592     break;
593   case 'S':
594     print("[");
595     demangleType();
596     print("]");
597     break;
598   case 'T': {
599     print("(");
600     size_t I = 0;
601     for (; !Error && !consumeIf('E'); ++I) {
602       if (I > 0)
603         print(", ");
604       demangleType();
605     }
606     if (I == 1)
607       print(",");
608     print(")");
609     break;
610   }
611   case 'R':
612   case 'Q':
613     print('&');
614     if (consumeIf('L')) {
615       if (auto Lifetime = parseBase62Number()) {
616         printLifetime(Lifetime);
617         print(' ');
618       }
619     }
620     if (C == 'Q')
621       print("mut ");
622     demangleType();
623     break;
624   case 'P':
625     print("*const ");
626     demangleType();
627     break;
628   case 'O':
629     print("*mut ");
630     demangleType();
631     break;
632   case 'F':
633     demangleFnSig();
634     break;
635   case 'D':
636     demangleDynBounds();
637     if (consumeIf('L')) {
638       if (auto Lifetime = parseBase62Number()) {
639         print(" + ");
640         printLifetime(Lifetime);
641       }
642     } else {
643       Error = true;
644     }
645     break;
646   case 'B':
647     demangleBackref([&] { demangleType(); });
648     break;
649   default:
650     Position = Start;
651     demanglePath(IsInType::Yes);
652     break;
653   }
654 }
655 
656 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
657 // <abi> = "C"
658 //       | <undisambiguated-identifier>
demangleFnSig()659 void Demangler::demangleFnSig() {
660   SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
661   demangleOptionalBinder();
662 
663   if (consumeIf('U'))
664     print("unsafe ");
665 
666   if (consumeIf('K')) {
667     print("extern \"");
668     if (consumeIf('C')) {
669       print("C");
670     } else {
671       Identifier Ident = parseIdentifier();
672       for (char C : Ident.Name) {
673         // When mangling ABI string, the "-" is replaced with "_".
674         if (C == '_')
675           C = '-';
676         print(C);
677       }
678     }
679     print("\" ");
680   }
681 
682   print("fn(");
683   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
684     if (I > 0)
685       print(", ");
686     demangleType();
687   }
688   print(")");
689 
690   if (consumeIf('u')) {
691     // Skip the unit type from the output.
692   } else {
693     print(" -> ");
694     demangleType();
695   }
696 }
697 
698 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()699 void Demangler::demangleDynBounds() {
700   SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
701   print("dyn ");
702   demangleOptionalBinder();
703   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
704     if (I > 0)
705       print(" + ");
706     demangleDynTrait();
707   }
708 }
709 
710 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
711 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()712 void Demangler::demangleDynTrait() {
713   bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
714   while (!Error && consumeIf('p')) {
715     if (!IsOpen) {
716       IsOpen = true;
717       print('<');
718     } else {
719       print(", ");
720     }
721     print(parseIdentifier().Name);
722     print(" = ");
723     demangleType();
724   }
725   if (IsOpen)
726     print(">");
727 }
728 
729 // Demangles optional binder and updates the number of bound lifetimes.
730 //
731 // <binder> = "G" <base-62-number>
demangleOptionalBinder()732 void Demangler::demangleOptionalBinder() {
733   uint64_t Binder = parseOptionalBase62Number('G');
734   if (Error || Binder == 0)
735     return;
736 
737   // In valid inputs each bound lifetime is referenced later. Referencing a
738   // lifetime requires at least one byte of input. Reject inputs that are too
739   // short to reference all bound lifetimes. Otherwise demangling of invalid
740   // binders could generate excessive amounts of output.
741   if (Binder >= Input.size() - BoundLifetimes) {
742     Error = true;
743     return;
744   }
745 
746   print("for<");
747   for (size_t I = 0; I != Binder; ++I) {
748     BoundLifetimes += 1;
749     if (I > 0)
750       print(", ");
751     printLifetime(1);
752   }
753   print("> ");
754 }
755 
756 // <const> = <basic-type> <const-data>
757 //         | "p"                          // placeholder
758 //         | <backref>
demangleConst()759 void Demangler::demangleConst() {
760   if (Error || RecursionLevel >= MaxRecursionLevel) {
761     Error = true;
762     return;
763   }
764   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
765 
766   char C = consume();
767   BasicType Type;
768   if (parseBasicType(C, Type)) {
769     switch (Type) {
770     case BasicType::I8:
771     case BasicType::I16:
772     case BasicType::I32:
773     case BasicType::I64:
774     case BasicType::I128:
775     case BasicType::ISize:
776     case BasicType::U8:
777     case BasicType::U16:
778     case BasicType::U32:
779     case BasicType::U64:
780     case BasicType::U128:
781     case BasicType::USize:
782       demangleConstInt();
783       break;
784     case BasicType::Bool:
785       demangleConstBool();
786       break;
787     case BasicType::Char:
788       demangleConstChar();
789       break;
790     case BasicType::Placeholder:
791       print('_');
792       break;
793     default:
794       Error = true;
795       break;
796     }
797   } else if (C == 'B') {
798     demangleBackref([&] { demangleConst(); });
799   } else {
800     Error = true;
801   }
802 }
803 
804 // <const-data> = ["n"] <hex-number>
demangleConstInt()805 void Demangler::demangleConstInt() {
806   if (consumeIf('n'))
807     print('-');
808 
809   StringView HexDigits;
810   uint64_t Value = parseHexNumber(HexDigits);
811   if (HexDigits.size() <= 16) {
812     printDecimalNumber(Value);
813   } else {
814     print("0x");
815     print(HexDigits);
816   }
817 }
818 
819 // <const-data> = "0_" // false
820 //              | "1_" // true
demangleConstBool()821 void Demangler::demangleConstBool() {
822   StringView HexDigits;
823   parseHexNumber(HexDigits);
824   if (HexDigits == "0")
825     print("false");
826   else if (HexDigits == "1")
827     print("true");
828   else
829     Error = true;
830 }
831 
832 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)833 static bool isAsciiPrintable(uint64_t CodePoint) {
834   return 0x20 <= CodePoint && CodePoint <= 0x7e;
835 }
836 
837 // <const-data> = <hex-number>
demangleConstChar()838 void Demangler::demangleConstChar() {
839   StringView HexDigits;
840   uint64_t CodePoint = parseHexNumber(HexDigits);
841   if (Error || HexDigits.size() > 6) {
842     Error = true;
843     return;
844   }
845 
846   print("'");
847   switch (CodePoint) {
848   case '\t':
849     print(R"(\t)");
850     break;
851   case '\r':
852     print(R"(\r)");
853     break;
854   case '\n':
855     print(R"(\n)");
856     break;
857   case '\\':
858     print(R"(\\)");
859     break;
860   case '"':
861     print(R"(")");
862     break;
863   case '\'':
864     print(R"(\')");
865     break;
866   default:
867     if (isAsciiPrintable(CodePoint)) {
868       char C = CodePoint;
869       print(C);
870     } else {
871       print(R"(\u{)");
872       print(HexDigits);
873       print('}');
874     }
875     break;
876   }
877   print('\'');
878 }
879 
880 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()881 Identifier Demangler::parseIdentifier() {
882   bool Punycode = consumeIf('u');
883   uint64_t Bytes = parseDecimalNumber();
884 
885   // Underscore resolves the ambiguity when identifier starts with a decimal
886   // digit or another underscore.
887   consumeIf('_');
888 
889   if (Error || Bytes > Input.size() - Position) {
890     Error = true;
891     return {};
892   }
893   StringView S = Input.substr(Position, Bytes);
894   Position += Bytes;
895 
896   if (!std::all_of(S.begin(), S.end(), isValid)) {
897     Error = true;
898     return {};
899   }
900 
901   return {S, Punycode};
902 }
903 
904 // Parses optional base 62 number. The presence of a number is determined using
905 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
906 //
907 // This function is indended for parsing disambiguators and binders which when
908 // not present have their value interpreted as 0, and otherwise as decoded
909 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
910 // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)911 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
912   if (!consumeIf(Tag))
913     return 0;
914 
915   uint64_t N = parseBase62Number();
916   if (Error || !addAssign(N, 1))
917     return 0;
918 
919   return N;
920 }
921 
922 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
923 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
924 // "1_" encodes 2, etc.
925 //
926 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()927 uint64_t Demangler::parseBase62Number() {
928   if (consumeIf('_'))
929     return 0;
930 
931   uint64_t Value = 0;
932 
933   while (true) {
934     uint64_t Digit;
935     char C = consume();
936 
937     if (C == '_') {
938       break;
939     } else if (isDigit(C)) {
940       Digit = C - '0';
941     } else if (isLower(C)) {
942       Digit = 10 + (C - 'a');
943     } else if (isUpper(C)) {
944       Digit = 10 + 26 + (C - 'A');
945     } else {
946       Error = true;
947       return 0;
948     }
949 
950     if (!mulAssign(Value, 62))
951       return 0;
952 
953     if (!addAssign(Value, Digit))
954       return 0;
955   }
956 
957   if (!addAssign(Value, 1))
958     return 0;
959 
960   return Value;
961 }
962 
963 // Parses a decimal number that had been encoded without any leading zeros.
964 //
965 // <decimal-number> = "0"
966 //                  | <1-9> {<0-9>}
parseDecimalNumber()967 uint64_t Demangler::parseDecimalNumber() {
968   char C = look();
969   if (!isDigit(C)) {
970     Error = true;
971     return 0;
972   }
973 
974   if (C == '0') {
975     consume();
976     return 0;
977   }
978 
979   uint64_t Value = 0;
980 
981   while (isDigit(look())) {
982     if (!mulAssign(Value, 10)) {
983       Error = true;
984       return 0;
985     }
986 
987     uint64_t D = consume() - '0';
988     if (!addAssign(Value, D))
989       return 0;
990   }
991 
992   return Value;
993 }
994 
995 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
996 // value and stores hex digits in HexDigits. The return value is unspecified if
997 // HexDigits.size() > 16.
998 //
999 // <hex-number> = "0_"
1000 //              | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)1001 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
1002   size_t Start = Position;
1003   uint64_t Value = 0;
1004 
1005   if (!isHexDigit(look()))
1006     Error = true;
1007 
1008   if (consumeIf('0')) {
1009     if (!consumeIf('_'))
1010       Error = true;
1011   } else {
1012     while (!Error && !consumeIf('_')) {
1013       char C = consume();
1014       Value *= 16;
1015       if (isDigit(C))
1016         Value += C - '0';
1017       else if ('a' <= C && C <= 'f')
1018         Value += 10 + (C - 'a');
1019       else
1020         Error = true;
1021     }
1022   }
1023 
1024   if (Error) {
1025     HexDigits = StringView();
1026     return 0;
1027   }
1028 
1029   size_t End = Position - 1;
1030   assert(Start < End);
1031   HexDigits = Input.substr(Start, End - Start);
1032   return Value;
1033 }
1034 
print(char C)1035 void Demangler::print(char C) {
1036   if (Error || !Print)
1037     return;
1038 
1039   Output += C;
1040 }
1041 
print(StringView S)1042 void Demangler::print(StringView S) {
1043   if (Error || !Print)
1044     return;
1045 
1046   Output += S;
1047 }
1048 
printDecimalNumber(uint64_t N)1049 void Demangler::printDecimalNumber(uint64_t N) {
1050   if (Error || !Print)
1051     return;
1052 
1053   Output << N;
1054 }
1055 
1056 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1057 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1058 // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1059 void Demangler::printLifetime(uint64_t Index) {
1060   if (Index == 0) {
1061     print("'_");
1062     return;
1063   }
1064 
1065   if (Index - 1 >= BoundLifetimes) {
1066     Error = true;
1067     return;
1068   }
1069 
1070   uint64_t Depth = BoundLifetimes - Index;
1071   print('\'');
1072   if (Depth < 26) {
1073     char C = 'a' + Depth;
1074     print(C);
1075   } else {
1076     print('z');
1077     printDecimalNumber(Depth - 26 + 1);
1078   }
1079 }
1080 
look() const1081 char Demangler::look() const {
1082   if (Error || Position >= Input.size())
1083     return 0;
1084 
1085   return Input[Position];
1086 }
1087 
consume()1088 char Demangler::consume() {
1089   if (Error || Position >= Input.size()) {
1090     Error = true;
1091     return 0;
1092   }
1093 
1094   return Input[Position++];
1095 }
1096 
consumeIf(char Prefix)1097 bool Demangler::consumeIf(char Prefix) {
1098   if (Error || Position >= Input.size() || Input[Position] != Prefix)
1099     return false;
1100 
1101   Position += 1;
1102   return true;
1103 }
1104 
1105 /// Computes A + B. When computation wraps around sets the error and returns
1106 /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1107 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1108   if (A > std::numeric_limits<uint64_t>::max() - B) {
1109     Error = true;
1110     return false;
1111   }
1112 
1113   A += B;
1114   return true;
1115 }
1116 
1117 /// Computes A * B. When computation wraps around sets the error and returns
1118 /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1119 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1120   if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1121     Error = true;
1122     return false;
1123   }
1124 
1125   A *= B;
1126   return true;
1127 }
1128