1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringView.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::ScopedOverride;
28 using llvm::itanium_demangle::StringView;
29 
30 namespace {
31 
32 struct Identifier {
33   StringView Name;
34   bool Punycode;
35 
empty__anond672b4d40111::Identifier36   bool empty() const { return Name.empty(); }
37 };
38 
39 enum class BasicType {
40   Bool,
41   Char,
42   I8,
43   I16,
44   I32,
45   I64,
46   I128,
47   ISize,
48   U8,
49   U16,
50   U32,
51   U64,
52   U128,
53   USize,
54   F32,
55   F64,
56   Str,
57   Placeholder,
58   Unit,
59   Variadic,
60   Never,
61 };
62 
63 enum class IsInType {
64   No,
65   Yes,
66 };
67 
68 enum class LeaveGenericsOpen {
69   No,
70   Yes,
71 };
72 
73 class Demangler {
74   // Maximum recursion level. Used to avoid stack overflow.
75   size_t MaxRecursionLevel;
76   // Current recursion level.
77   size_t RecursionLevel;
78   size_t BoundLifetimes;
79   // Input string that is being demangled with "_R" prefix removed.
80   StringView Input;
81   // Position in the input string.
82   size_t Position;
83   // When true, print methods append the output to the stream.
84   // When false, the output is suppressed.
85   bool Print;
86   // True if an error occurred.
87   bool Error;
88 
89 public:
90   // Demangled output.
91   OutputBuffer Output;
92 
93   Demangler(size_t MaxRecursionLevel = 500);
94 
95   bool demangle(StringView MangledName);
96 
97 private:
98   bool demanglePath(IsInType Type,
99                     LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100   void demangleImplPath(IsInType InType);
101   void demangleGenericArg();
102   void demangleType();
103   void demangleFnSig();
104   void demangleDynBounds();
105   void demangleDynTrait();
106   void demangleOptionalBinder();
107   void demangleConst();
108   void demangleConstInt();
109   void demangleConstBool();
110   void demangleConstChar();
111 
demangleBackref(Callable Demangler)112   template <typename Callable> void demangleBackref(Callable Demangler) {
113     uint64_t Backref = parseBase62Number();
114     if (Error || Backref >= Position) {
115       Error = true;
116       return;
117     }
118 
119     if (!Print)
120       return;
121 
122     ScopedOverride<size_t> SavePosition(Position, Position);
123     Position = Backref;
124     Demangler();
125   }
126 
127   Identifier parseIdentifier();
128   uint64_t parseOptionalBase62Number(char Tag);
129   uint64_t parseBase62Number();
130   uint64_t parseDecimalNumber();
131   uint64_t parseHexNumber(StringView &HexDigits);
132 
133   void print(char C);
134   void print(StringView S);
135   void printDecimalNumber(uint64_t N);
136   void printBasicType(BasicType);
137   void printLifetime(uint64_t Index);
138   void printIdentifier(Identifier Ident);
139 
140   char look() const;
141   char consume();
142   bool consumeIf(char Prefix);
143 
144   bool addAssign(uint64_t &A, uint64_t B);
145   bool mulAssign(uint64_t &A, uint64_t B);
146 };
147 
148 } // namespace
149 
rustDemangle(const char * MangledName)150 char *llvm::rustDemangle(const char *MangledName) {
151   if (MangledName == nullptr)
152     return nullptr;
153 
154   // Return early if mangled name doesn't look like a Rust symbol.
155   StringView Mangled(MangledName);
156   if (!Mangled.startsWith("_R"))
157     return nullptr;
158 
159   Demangler D;
160   if (!D.demangle(Mangled)) {
161     std::free(D.Output.getBuffer());
162     return nullptr;
163   }
164 
165   D.Output += '\0';
166 
167   return D.Output.getBuffer();
168 }
169 
Demangler(size_t MaxRecursionLevel)170 Demangler::Demangler(size_t MaxRecursionLevel)
171     : MaxRecursionLevel(MaxRecursionLevel) {}
172 
isDigit(const char C)173 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
174 
isHexDigit(const char C)175 static inline bool isHexDigit(const char C) {
176   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
177 }
178 
isLower(const char C)179 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
180 
isUpper(const char C)181 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
182 
183 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)184 static inline bool isValid(const char C) {
185   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
186 }
187 
188 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
189 // otherwise. The demangled symbol is stored in Output field. It is
190 // responsibility of the caller to free the memory behind the output stream.
191 //
192 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)193 bool Demangler::demangle(StringView Mangled) {
194   Position = 0;
195   Error = false;
196   Print = true;
197   RecursionLevel = 0;
198   BoundLifetimes = 0;
199 
200   if (!Mangled.consumeFront("_R")) {
201     Error = true;
202     return false;
203   }
204   size_t Dot = Mangled.find('.');
205   Input = Mangled.substr(0, Dot);
206   StringView Suffix = Mangled.dropFront(Dot);
207 
208   demanglePath(IsInType::No);
209 
210   if (Position != Input.size()) {
211     ScopedOverride<bool> SavePrint(Print, false);
212     demanglePath(IsInType::No);
213   }
214 
215   if (Position != Input.size())
216     Error = true;
217 
218   if (!Suffix.empty()) {
219     print(" (");
220     print(Suffix);
221     print(")");
222   }
223 
224   return !Error;
225 }
226 
227 // Demangles a path. InType indicates whether a path is inside a type. When
228 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
229 // output. Return value indicates whether generics arguments have been left
230 // open.
231 //
232 // <path> = "C" <identifier>               // crate root
233 //        | "M" <impl-path> <type>         // <T> (inherent impl)
234 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
235 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
236 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
237 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
238 //        | <backref>
239 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
240 // <ns> = "C"      // closure
241 //      | "S"      // shim
242 //      | <A-Z>    // other special namespaces
243 //      | <a-z>    // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)244 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
245   if (Error || RecursionLevel >= MaxRecursionLevel) {
246     Error = true;
247     return false;
248   }
249   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
250 
251   switch (consume()) {
252   case 'C': {
253     parseOptionalBase62Number('s');
254     printIdentifier(parseIdentifier());
255     break;
256   }
257   case 'M': {
258     demangleImplPath(InType);
259     print("<");
260     demangleType();
261     print(">");
262     break;
263   }
264   case 'X': {
265     demangleImplPath(InType);
266     print("<");
267     demangleType();
268     print(" as ");
269     demanglePath(IsInType::Yes);
270     print(">");
271     break;
272   }
273   case 'Y': {
274     print("<");
275     demangleType();
276     print(" as ");
277     demanglePath(IsInType::Yes);
278     print(">");
279     break;
280   }
281   case 'N': {
282     char NS = consume();
283     if (!isLower(NS) && !isUpper(NS)) {
284       Error = true;
285       break;
286     }
287     demanglePath(InType);
288 
289     uint64_t Disambiguator = parseOptionalBase62Number('s');
290     Identifier Ident = parseIdentifier();
291 
292     if (isUpper(NS)) {
293       // Special namespaces
294       print("::{");
295       if (NS == 'C')
296         print("closure");
297       else if (NS == 'S')
298         print("shim");
299       else
300         print(NS);
301       if (!Ident.empty()) {
302         print(":");
303         printIdentifier(Ident);
304       }
305       print('#');
306       printDecimalNumber(Disambiguator);
307       print('}');
308     } else {
309       // Implementation internal namespaces.
310       if (!Ident.empty()) {
311         print("::");
312         printIdentifier(Ident);
313       }
314     }
315     break;
316   }
317   case 'I': {
318     demanglePath(InType);
319     // Omit "::" when in a type, where it is optional.
320     if (InType == IsInType::No)
321       print("::");
322     print("<");
323     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
324       if (I > 0)
325         print(", ");
326       demangleGenericArg();
327     }
328     if (LeaveOpen == LeaveGenericsOpen::Yes)
329       return true;
330     else
331       print(">");
332     break;
333   }
334   case 'B': {
335     bool IsOpen = false;
336     demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
337     return IsOpen;
338   }
339   default:
340     Error = true;
341     break;
342   }
343 
344   return false;
345 }
346 
347 // <impl-path> = [<disambiguator>] <path>
348 // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)349 void Demangler::demangleImplPath(IsInType InType) {
350   ScopedOverride<bool> SavePrint(Print, false);
351   parseOptionalBase62Number('s');
352   demanglePath(InType);
353 }
354 
355 // <generic-arg> = <lifetime>
356 //               | <type>
357 //               | "K" <const>
358 // <lifetime> = "L" <base-62-number>
demangleGenericArg()359 void Demangler::demangleGenericArg() {
360   if (consumeIf('L'))
361     printLifetime(parseBase62Number());
362   else if (consumeIf('K'))
363     demangleConst();
364   else
365     demangleType();
366 }
367 
368 // <basic-type> = "a"      // i8
369 //              | "b"      // bool
370 //              | "c"      // char
371 //              | "d"      // f64
372 //              | "e"      // str
373 //              | "f"      // f32
374 //              | "h"      // u8
375 //              | "i"      // isize
376 //              | "j"      // usize
377 //              | "l"      // i32
378 //              | "m"      // u32
379 //              | "n"      // i128
380 //              | "o"      // u128
381 //              | "s"      // i16
382 //              | "t"      // u16
383 //              | "u"      // ()
384 //              | "v"      // ...
385 //              | "x"      // i64
386 //              | "y"      // u64
387 //              | "z"      // !
388 //              | "p"      // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)389 static bool parseBasicType(char C, BasicType &Type) {
390   switch (C) {
391   case 'a':
392     Type = BasicType::I8;
393     return true;
394   case 'b':
395     Type = BasicType::Bool;
396     return true;
397   case 'c':
398     Type = BasicType::Char;
399     return true;
400   case 'd':
401     Type = BasicType::F64;
402     return true;
403   case 'e':
404     Type = BasicType::Str;
405     return true;
406   case 'f':
407     Type = BasicType::F32;
408     return true;
409   case 'h':
410     Type = BasicType::U8;
411     return true;
412   case 'i':
413     Type = BasicType::ISize;
414     return true;
415   case 'j':
416     Type = BasicType::USize;
417     return true;
418   case 'l':
419     Type = BasicType::I32;
420     return true;
421   case 'm':
422     Type = BasicType::U32;
423     return true;
424   case 'n':
425     Type = BasicType::I128;
426     return true;
427   case 'o':
428     Type = BasicType::U128;
429     return true;
430   case 'p':
431     Type = BasicType::Placeholder;
432     return true;
433   case 's':
434     Type = BasicType::I16;
435     return true;
436   case 't':
437     Type = BasicType::U16;
438     return true;
439   case 'u':
440     Type = BasicType::Unit;
441     return true;
442   case 'v':
443     Type = BasicType::Variadic;
444     return true;
445   case 'x':
446     Type = BasicType::I64;
447     return true;
448   case 'y':
449     Type = BasicType::U64;
450     return true;
451   case 'z':
452     Type = BasicType::Never;
453     return true;
454   default:
455     return false;
456   }
457 }
458 
printBasicType(BasicType Type)459 void Demangler::printBasicType(BasicType Type) {
460   switch (Type) {
461   case BasicType::Bool:
462     print("bool");
463     break;
464   case BasicType::Char:
465     print("char");
466     break;
467   case BasicType::I8:
468     print("i8");
469     break;
470   case BasicType::I16:
471     print("i16");
472     break;
473   case BasicType::I32:
474     print("i32");
475     break;
476   case BasicType::I64:
477     print("i64");
478     break;
479   case BasicType::I128:
480     print("i128");
481     break;
482   case BasicType::ISize:
483     print("isize");
484     break;
485   case BasicType::U8:
486     print("u8");
487     break;
488   case BasicType::U16:
489     print("u16");
490     break;
491   case BasicType::U32:
492     print("u32");
493     break;
494   case BasicType::U64:
495     print("u64");
496     break;
497   case BasicType::U128:
498     print("u128");
499     break;
500   case BasicType::USize:
501     print("usize");
502     break;
503   case BasicType::F32:
504     print("f32");
505     break;
506   case BasicType::F64:
507     print("f64");
508     break;
509   case BasicType::Str:
510     print("str");
511     break;
512   case BasicType::Placeholder:
513     print("_");
514     break;
515   case BasicType::Unit:
516     print("()");
517     break;
518   case BasicType::Variadic:
519     print("...");
520     break;
521   case BasicType::Never:
522     print("!");
523     break;
524   }
525 }
526 
527 // <type> = | <basic-type>
528 //          | <path>                      // named type
529 //          | "A" <type> <const>          // [T; N]
530 //          | "S" <type>                  // [T]
531 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
532 //          | "R" [<lifetime>] <type>     // &T
533 //          | "Q" [<lifetime>] <type>     // &mut T
534 //          | "P" <type>                  // *const T
535 //          | "O" <type>                  // *mut T
536 //          | "F" <fn-sig>                // fn(...) -> ...
537 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
538 //          | <backref>                   // backref
demangleType()539 void Demangler::demangleType() {
540   if (Error || RecursionLevel >= MaxRecursionLevel) {
541     Error = true;
542     return;
543   }
544   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
545 
546   size_t Start = Position;
547   char C = consume();
548   BasicType Type;
549   if (parseBasicType(C, Type))
550     return printBasicType(Type);
551 
552   switch (C) {
553   case 'A':
554     print("[");
555     demangleType();
556     print("; ");
557     demangleConst();
558     print("]");
559     break;
560   case 'S':
561     print("[");
562     demangleType();
563     print("]");
564     break;
565   case 'T': {
566     print("(");
567     size_t I = 0;
568     for (; !Error && !consumeIf('E'); ++I) {
569       if (I > 0)
570         print(", ");
571       demangleType();
572     }
573     if (I == 1)
574       print(",");
575     print(")");
576     break;
577   }
578   case 'R':
579   case 'Q':
580     print('&');
581     if (consumeIf('L')) {
582       if (auto Lifetime = parseBase62Number()) {
583         printLifetime(Lifetime);
584         print(' ');
585       }
586     }
587     if (C == 'Q')
588       print("mut ");
589     demangleType();
590     break;
591   case 'P':
592     print("*const ");
593     demangleType();
594     break;
595   case 'O':
596     print("*mut ");
597     demangleType();
598     break;
599   case 'F':
600     demangleFnSig();
601     break;
602   case 'D':
603     demangleDynBounds();
604     if (consumeIf('L')) {
605       if (auto Lifetime = parseBase62Number()) {
606         print(" + ");
607         printLifetime(Lifetime);
608       }
609     } else {
610       Error = true;
611     }
612     break;
613   case 'B':
614     demangleBackref([&] { demangleType(); });
615     break;
616   default:
617     Position = Start;
618     demanglePath(IsInType::Yes);
619     break;
620   }
621 }
622 
623 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
624 // <abi> = "C"
625 //       | <undisambiguated-identifier>
demangleFnSig()626 void Demangler::demangleFnSig() {
627   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
628   demangleOptionalBinder();
629 
630   if (consumeIf('U'))
631     print("unsafe ");
632 
633   if (consumeIf('K')) {
634     print("extern \"");
635     if (consumeIf('C')) {
636       print("C");
637     } else {
638       Identifier Ident = parseIdentifier();
639       if (Ident.Punycode)
640         Error = true;
641       for (char C : Ident.Name) {
642         // When mangling ABI string, the "-" is replaced with "_".
643         if (C == '_')
644           C = '-';
645         print(C);
646       }
647     }
648     print("\" ");
649   }
650 
651   print("fn(");
652   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
653     if (I > 0)
654       print(", ");
655     demangleType();
656   }
657   print(")");
658 
659   if (consumeIf('u')) {
660     // Skip the unit type from the output.
661   } else {
662     print(" -> ");
663     demangleType();
664   }
665 }
666 
667 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()668 void Demangler::demangleDynBounds() {
669   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
670   print("dyn ");
671   demangleOptionalBinder();
672   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
673     if (I > 0)
674       print(" + ");
675     demangleDynTrait();
676   }
677 }
678 
679 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
680 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()681 void Demangler::demangleDynTrait() {
682   bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
683   while (!Error && consumeIf('p')) {
684     if (!IsOpen) {
685       IsOpen = true;
686       print('<');
687     } else {
688       print(", ");
689     }
690     print(parseIdentifier().Name);
691     print(" = ");
692     demangleType();
693   }
694   if (IsOpen)
695     print(">");
696 }
697 
698 // Demangles optional binder and updates the number of bound lifetimes.
699 //
700 // <binder> = "G" <base-62-number>
demangleOptionalBinder()701 void Demangler::demangleOptionalBinder() {
702   uint64_t Binder = parseOptionalBase62Number('G');
703   if (Error || Binder == 0)
704     return;
705 
706   // In valid inputs each bound lifetime is referenced later. Referencing a
707   // lifetime requires at least one byte of input. Reject inputs that are too
708   // short to reference all bound lifetimes. Otherwise demangling of invalid
709   // binders could generate excessive amounts of output.
710   if (Binder >= Input.size() - BoundLifetimes) {
711     Error = true;
712     return;
713   }
714 
715   print("for<");
716   for (size_t I = 0; I != Binder; ++I) {
717     BoundLifetimes += 1;
718     if (I > 0)
719       print(", ");
720     printLifetime(1);
721   }
722   print("> ");
723 }
724 
725 // <const> = <basic-type> <const-data>
726 //         | "p"                          // placeholder
727 //         | <backref>
demangleConst()728 void Demangler::demangleConst() {
729   if (Error || RecursionLevel >= MaxRecursionLevel) {
730     Error = true;
731     return;
732   }
733   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
734 
735   char C = consume();
736   BasicType Type;
737   if (parseBasicType(C, Type)) {
738     switch (Type) {
739     case BasicType::I8:
740     case BasicType::I16:
741     case BasicType::I32:
742     case BasicType::I64:
743     case BasicType::I128:
744     case BasicType::ISize:
745     case BasicType::U8:
746     case BasicType::U16:
747     case BasicType::U32:
748     case BasicType::U64:
749     case BasicType::U128:
750     case BasicType::USize:
751       demangleConstInt();
752       break;
753     case BasicType::Bool:
754       demangleConstBool();
755       break;
756     case BasicType::Char:
757       demangleConstChar();
758       break;
759     case BasicType::Placeholder:
760       print('_');
761       break;
762     default:
763       Error = true;
764       break;
765     }
766   } else if (C == 'B') {
767     demangleBackref([&] { demangleConst(); });
768   } else {
769     Error = true;
770   }
771 }
772 
773 // <const-data> = ["n"] <hex-number>
demangleConstInt()774 void Demangler::demangleConstInt() {
775   if (consumeIf('n'))
776     print('-');
777 
778   StringView HexDigits;
779   uint64_t Value = parseHexNumber(HexDigits);
780   if (HexDigits.size() <= 16) {
781     printDecimalNumber(Value);
782   } else {
783     print("0x");
784     print(HexDigits);
785   }
786 }
787 
788 // <const-data> = "0_" // false
789 //              | "1_" // true
demangleConstBool()790 void Demangler::demangleConstBool() {
791   StringView HexDigits;
792   parseHexNumber(HexDigits);
793   if (HexDigits == "0")
794     print("false");
795   else if (HexDigits == "1")
796     print("true");
797   else
798     Error = true;
799 }
800 
801 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)802 static bool isAsciiPrintable(uint64_t CodePoint) {
803   return 0x20 <= CodePoint && CodePoint <= 0x7e;
804 }
805 
806 // <const-data> = <hex-number>
demangleConstChar()807 void Demangler::demangleConstChar() {
808   StringView HexDigits;
809   uint64_t CodePoint = parseHexNumber(HexDigits);
810   if (Error || HexDigits.size() > 6) {
811     Error = true;
812     return;
813   }
814 
815   print("'");
816   switch (CodePoint) {
817   case '\t':
818     print(R"(\t)");
819     break;
820   case '\r':
821     print(R"(\r)");
822     break;
823   case '\n':
824     print(R"(\n)");
825     break;
826   case '\\':
827     print(R"(\\)");
828     break;
829   case '"':
830     print(R"(")");
831     break;
832   case '\'':
833     print(R"(\')");
834     break;
835   default:
836     if (isAsciiPrintable(CodePoint)) {
837       char C = CodePoint;
838       print(C);
839     } else {
840       print(R"(\u{)");
841       print(HexDigits);
842       print('}');
843     }
844     break;
845   }
846   print('\'');
847 }
848 
849 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()850 Identifier Demangler::parseIdentifier() {
851   bool Punycode = consumeIf('u');
852   uint64_t Bytes = parseDecimalNumber();
853 
854   // Underscore resolves the ambiguity when identifier starts with a decimal
855   // digit or another underscore.
856   consumeIf('_');
857 
858   if (Error || Bytes > Input.size() - Position) {
859     Error = true;
860     return {};
861   }
862   StringView S = Input.substr(Position, Bytes);
863   Position += Bytes;
864 
865   if (!std::all_of(S.begin(), S.end(), isValid)) {
866     Error = true;
867     return {};
868   }
869 
870   return {S, Punycode};
871 }
872 
873 // Parses optional base 62 number. The presence of a number is determined using
874 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
875 //
876 // This function is indended for parsing disambiguators and binders which when
877 // not present have their value interpreted as 0, and otherwise as decoded
878 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
879 // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)880 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
881   if (!consumeIf(Tag))
882     return 0;
883 
884   uint64_t N = parseBase62Number();
885   if (Error || !addAssign(N, 1))
886     return 0;
887 
888   return N;
889 }
890 
891 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
892 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
893 // "1_" encodes 2, etc.
894 //
895 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()896 uint64_t Demangler::parseBase62Number() {
897   if (consumeIf('_'))
898     return 0;
899 
900   uint64_t Value = 0;
901 
902   while (true) {
903     uint64_t Digit;
904     char C = consume();
905 
906     if (C == '_') {
907       break;
908     } else if (isDigit(C)) {
909       Digit = C - '0';
910     } else if (isLower(C)) {
911       Digit = 10 + (C - 'a');
912     } else if (isUpper(C)) {
913       Digit = 10 + 26 + (C - 'A');
914     } else {
915       Error = true;
916       return 0;
917     }
918 
919     if (!mulAssign(Value, 62))
920       return 0;
921 
922     if (!addAssign(Value, Digit))
923       return 0;
924   }
925 
926   if (!addAssign(Value, 1))
927     return 0;
928 
929   return Value;
930 }
931 
932 // Parses a decimal number that had been encoded without any leading zeros.
933 //
934 // <decimal-number> = "0"
935 //                  | <1-9> {<0-9>}
parseDecimalNumber()936 uint64_t Demangler::parseDecimalNumber() {
937   char C = look();
938   if (!isDigit(C)) {
939     Error = true;
940     return 0;
941   }
942 
943   if (C == '0') {
944     consume();
945     return 0;
946   }
947 
948   uint64_t Value = 0;
949 
950   while (isDigit(look())) {
951     if (!mulAssign(Value, 10)) {
952       Error = true;
953       return 0;
954     }
955 
956     uint64_t D = consume() - '0';
957     if (!addAssign(Value, D))
958       return 0;
959   }
960 
961   return Value;
962 }
963 
964 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
965 // value and stores hex digits in HexDigits. The return value is unspecified if
966 // HexDigits.size() > 16.
967 //
968 // <hex-number> = "0_"
969 //              | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)970 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
971   size_t Start = Position;
972   uint64_t Value = 0;
973 
974   if (!isHexDigit(look()))
975     Error = true;
976 
977   if (consumeIf('0')) {
978     if (!consumeIf('_'))
979       Error = true;
980   } else {
981     while (!Error && !consumeIf('_')) {
982       char C = consume();
983       Value *= 16;
984       if (isDigit(C))
985         Value += C - '0';
986       else if ('a' <= C && C <= 'f')
987         Value += 10 + (C - 'a');
988       else
989         Error = true;
990     }
991   }
992 
993   if (Error) {
994     HexDigits = StringView();
995     return 0;
996   }
997 
998   size_t End = Position - 1;
999   assert(Start < End);
1000   HexDigits = Input.substr(Start, End - Start);
1001   return Value;
1002 }
1003 
print(char C)1004 void Demangler::print(char C) {
1005   if (Error || !Print)
1006     return;
1007 
1008   Output += C;
1009 }
1010 
print(StringView S)1011 void Demangler::print(StringView S) {
1012   if (Error || !Print)
1013     return;
1014 
1015   Output += S;
1016 }
1017 
printDecimalNumber(uint64_t N)1018 void Demangler::printDecimalNumber(uint64_t N) {
1019   if (Error || !Print)
1020     return;
1021 
1022   Output << N;
1023 }
1024 
1025 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1026 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1027 // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1028 void Demangler::printLifetime(uint64_t Index) {
1029   if (Index == 0) {
1030     print("'_");
1031     return;
1032   }
1033 
1034   if (Index - 1 >= BoundLifetimes) {
1035     Error = true;
1036     return;
1037   }
1038 
1039   uint64_t Depth = BoundLifetimes - Index;
1040   print('\'');
1041   if (Depth < 26) {
1042     char C = 'a' + Depth;
1043     print(C);
1044   } else {
1045     print('z');
1046     printDecimalNumber(Depth - 26 + 1);
1047   }
1048 }
1049 
decodePunycodeDigit(char C,size_t & Value)1050 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1051   if (isLower(C)) {
1052     Value = C - 'a';
1053     return true;
1054   }
1055 
1056   if (isDigit(C)) {
1057     Value = 26 + (C - '0');
1058     return true;
1059   }
1060 
1061   return false;
1062 }
1063 
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1064 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1065   char *Buffer = Output.getBuffer();
1066   char *Start = Buffer + StartIdx;
1067   char *End = Buffer + Output.getCurrentPosition();
1068   Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1069 }
1070 
1071 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1072 // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1073 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1074   if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1075     return false;
1076 
1077   if (CodePoint <= 0x7F) {
1078     Output[0] = CodePoint;
1079     return true;
1080   }
1081 
1082   if (CodePoint <= 0x7FF) {
1083     Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1084     Output[1] = 0x80 | (CodePoint & 0x3F);
1085     return true;
1086   }
1087 
1088   if (CodePoint <= 0xFFFF) {
1089     Output[0] = 0xE0 | (CodePoint >> 12);
1090     Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1091     Output[2] = 0x80 | (CodePoint & 0x3F);
1092     return true;
1093   }
1094 
1095   if (CodePoint <= 0x10FFFF) {
1096     Output[0] = 0xF0 | (CodePoint >> 18);
1097     Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1098     Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1099     Output[3] = 0x80 | (CodePoint & 0x3F);
1100     return true;
1101   }
1102 
1103   return false;
1104 }
1105 
1106 // Decodes string encoded using punycode and appends results to Output.
1107 // Returns true if decoding was successful.
decodePunycode(StringView Input,OutputBuffer & Output)1108 static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1109   size_t OutputSize = Output.getCurrentPosition();
1110   size_t InputIdx = 0;
1111 
1112   // Rust uses an underscore as a delimiter.
1113   size_t DelimiterPos = StringView::npos;
1114   for (size_t I = 0; I != Input.size(); ++I)
1115     if (Input[I] == '_')
1116       DelimiterPos = I;
1117 
1118   if (DelimiterPos != StringView::npos) {
1119     // Copy basic code points before the last delimiter to the output.
1120     for (; InputIdx != DelimiterPos; ++InputIdx) {
1121       char C = Input[InputIdx];
1122       if (!isValid(C))
1123         return false;
1124       // Code points are padded with zeros while decoding is in progress.
1125       char UTF8[4] = {C};
1126       Output += StringView(UTF8, UTF8 + 4);
1127     }
1128     // Skip over the delimiter.
1129     ++InputIdx;
1130   }
1131 
1132   size_t Base = 36;
1133   size_t Skew = 38;
1134   size_t Bias = 72;
1135   size_t N = 0x80;
1136   size_t TMin = 1;
1137   size_t TMax = 26;
1138   size_t Damp = 700;
1139 
1140   auto Adapt = [&](size_t Delta, size_t NumPoints) {
1141     Delta /= Damp;
1142     Delta += Delta / NumPoints;
1143     Damp = 2;
1144 
1145     size_t K = 0;
1146     while (Delta > (Base - TMin) * TMax / 2) {
1147       Delta /= Base - TMin;
1148       K += Base;
1149     }
1150     return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1151   };
1152 
1153   // Main decoding loop.
1154   for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1155     size_t OldI = I;
1156     size_t W = 1;
1157     size_t Max = std::numeric_limits<size_t>::max();
1158     for (size_t K = Base; true; K += Base) {
1159       if (InputIdx == Input.size())
1160         return false;
1161       char C = Input[InputIdx++];
1162       size_t Digit = 0;
1163       if (!decodePunycodeDigit(C, Digit))
1164         return false;
1165 
1166       if (Digit > (Max - I) / W)
1167         return false;
1168       I += Digit * W;
1169 
1170       size_t T;
1171       if (K <= Bias)
1172         T = TMin;
1173       else if (K >= Bias + TMax)
1174         T = TMax;
1175       else
1176         T = K - Bias;
1177 
1178       if (Digit < T)
1179         break;
1180 
1181       if (W > Max / (Base - T))
1182         return false;
1183       W *= (Base - T);
1184     }
1185     size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1186     Bias = Adapt(I - OldI, NumPoints);
1187 
1188     if (I / NumPoints > Max - N)
1189       return false;
1190     N += I / NumPoints;
1191     I = I % NumPoints;
1192 
1193     // Insert N at position I in the output.
1194     char UTF8[4] = {};
1195     if (!encodeUTF8(N, UTF8))
1196       return false;
1197     Output.insert(OutputSize + I * 4, UTF8, 4);
1198   }
1199 
1200   removeNullBytes(Output, OutputSize);
1201   return true;
1202 }
1203 
printIdentifier(Identifier Ident)1204 void Demangler::printIdentifier(Identifier Ident) {
1205   if (Error || !Print)
1206     return;
1207 
1208   if (Ident.Punycode) {
1209     if (!decodePunycode(Ident.Name, Output))
1210       Error = true;
1211   } else {
1212     print(Ident.Name);
1213   }
1214 }
1215 
look() const1216 char Demangler::look() const {
1217   if (Error || Position >= Input.size())
1218     return 0;
1219 
1220   return Input[Position];
1221 }
1222 
consume()1223 char Demangler::consume() {
1224   if (Error || Position >= Input.size()) {
1225     Error = true;
1226     return 0;
1227   }
1228 
1229   return Input[Position++];
1230 }
1231 
consumeIf(char Prefix)1232 bool Demangler::consumeIf(char Prefix) {
1233   if (Error || Position >= Input.size() || Input[Position] != Prefix)
1234     return false;
1235 
1236   Position += 1;
1237   return true;
1238 }
1239 
1240 /// Computes A + B. When computation wraps around sets the error and returns
1241 /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1242 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1243   if (A > std::numeric_limits<uint64_t>::max() - B) {
1244     Error = true;
1245     return false;
1246   }
1247 
1248   A += B;
1249   return true;
1250 }
1251 
1252 /// Computes A * B. When computation wraps around sets the error and returns
1253 /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1254 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1255   if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1256     Error = true;
1257     return false;
1258   }
1259 
1260   A *= B;
1261   return true;
1262 }
1263