1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringView.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::ScopedOverride;
28 using llvm::itanium_demangle::StringView;
29 
30 namespace {
31 
32 struct Identifier {
33   StringView Name;
34   bool Punycode;
35 
36   bool empty() const { return Name.empty(); }
37 };
38 
39 enum class BasicType {
40   Bool,
41   Char,
42   I8,
43   I16,
44   I32,
45   I64,
46   I128,
47   ISize,
48   U8,
49   U16,
50   U32,
51   U64,
52   U128,
53   USize,
54   F32,
55   F64,
56   Str,
57   Placeholder,
58   Unit,
59   Variadic,
60   Never,
61 };
62 
63 enum class IsInType {
64   No,
65   Yes,
66 };
67 
68 enum class LeaveGenericsOpen {
69   No,
70   Yes,
71 };
72 
73 class Demangler {
74   // Maximum recursion level. Used to avoid stack overflow.
75   size_t MaxRecursionLevel;
76   // Current recursion level.
77   size_t RecursionLevel;
78   size_t BoundLifetimes;
79   // Input string that is being demangled with "_R" prefix removed.
80   StringView Input;
81   // Position in the input string.
82   size_t Position;
83   // When true, print methods append the output to the stream.
84   // When false, the output is suppressed.
85   bool Print;
86   // True if an error occurred.
87   bool Error;
88 
89 public:
90   // Demangled output.
91   OutputBuffer Output;
92 
93   Demangler(size_t MaxRecursionLevel = 500);
94 
95   bool demangle(StringView MangledName);
96 
97 private:
98   bool demanglePath(IsInType Type,
99                     LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100   void demangleImplPath(IsInType InType);
101   void demangleGenericArg();
102   void demangleType();
103   void demangleFnSig();
104   void demangleDynBounds();
105   void demangleDynTrait();
106   void demangleOptionalBinder();
107   void demangleConst();
108   void demangleConstInt();
109   void demangleConstBool();
110   void demangleConstChar();
111 
112   template <typename Callable> void demangleBackref(Callable Demangler) {
113     uint64_t Backref = parseBase62Number();
114     if (Error || Backref >= Position) {
115       Error = true;
116       return;
117     }
118 
119     if (!Print)
120       return;
121 
122     ScopedOverride<size_t> SavePosition(Position, Position);
123     Position = Backref;
124     Demangler();
125   }
126 
127   Identifier parseIdentifier();
128   uint64_t parseOptionalBase62Number(char Tag);
129   uint64_t parseBase62Number();
130   uint64_t parseDecimalNumber();
131   uint64_t parseHexNumber(StringView &HexDigits);
132 
133   void print(char C);
134   void print(StringView S);
135   void printDecimalNumber(uint64_t N);
136   void printBasicType(BasicType);
137   void printLifetime(uint64_t Index);
138   void printIdentifier(Identifier Ident);
139 
140   char look() const;
141   char consume();
142   bool consumeIf(char Prefix);
143 
144   bool addAssign(uint64_t &A, uint64_t B);
145   bool mulAssign(uint64_t &A, uint64_t B);
146 };
147 
148 } // namespace
149 
150 char *llvm::rustDemangle(const char *MangledName) {
151   if (MangledName == nullptr)
152     return nullptr;
153 
154   // Return early if mangled name doesn't look like a Rust symbol.
155   StringView Mangled(MangledName);
156   if (!Mangled.startsWith("_R"))
157     return nullptr;
158 
159   Demangler D;
160   if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024))
161     return nullptr;
162 
163   if (!D.demangle(Mangled)) {
164     std::free(D.Output.getBuffer());
165     return nullptr;
166   }
167 
168   D.Output += '\0';
169 
170   return D.Output.getBuffer();
171 }
172 
173 Demangler::Demangler(size_t MaxRecursionLevel)
174     : MaxRecursionLevel(MaxRecursionLevel) {}
175 
176 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
177 
178 static inline bool isHexDigit(const char C) {
179   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
180 }
181 
182 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
183 
184 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
185 
186 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
187 static inline bool isValid(const char C) {
188   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
189 }
190 
191 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
192 // otherwise. The demangled symbol is stored in Output field. It is
193 // responsibility of the caller to free the memory behind the output stream.
194 //
195 // <symbol-name> = "_R" <path> [<instantiating-crate>]
196 bool Demangler::demangle(StringView Mangled) {
197   Position = 0;
198   Error = false;
199   Print = true;
200   RecursionLevel = 0;
201   BoundLifetimes = 0;
202 
203   if (!Mangled.consumeFront("_R")) {
204     Error = true;
205     return false;
206   }
207   size_t Dot = Mangled.find('.');
208   Input = Mangled.substr(0, Dot);
209   StringView Suffix = Mangled.dropFront(Dot);
210 
211   demanglePath(IsInType::No);
212 
213   if (Position != Input.size()) {
214     ScopedOverride<bool> SavePrint(Print, false);
215     demanglePath(IsInType::No);
216   }
217 
218   if (Position != Input.size())
219     Error = true;
220 
221   if (!Suffix.empty()) {
222     print(" (");
223     print(Suffix);
224     print(")");
225   }
226 
227   return !Error;
228 }
229 
230 // Demangles a path. InType indicates whether a path is inside a type. When
231 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
232 // output. Return value indicates whether generics arguments have been left
233 // open.
234 //
235 // <path> = "C" <identifier>               // crate root
236 //        | "M" <impl-path> <type>         // <T> (inherent impl)
237 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
238 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
239 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
240 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
241 //        | <backref>
242 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
243 // <ns> = "C"      // closure
244 //      | "S"      // shim
245 //      | <A-Z>    // other special namespaces
246 //      | <a-z>    // internal namespaces
247 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
248   if (Error || RecursionLevel >= MaxRecursionLevel) {
249     Error = true;
250     return false;
251   }
252   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
253 
254   switch (consume()) {
255   case 'C': {
256     parseOptionalBase62Number('s');
257     printIdentifier(parseIdentifier());
258     break;
259   }
260   case 'M': {
261     demangleImplPath(InType);
262     print("<");
263     demangleType();
264     print(">");
265     break;
266   }
267   case 'X': {
268     demangleImplPath(InType);
269     print("<");
270     demangleType();
271     print(" as ");
272     demanglePath(IsInType::Yes);
273     print(">");
274     break;
275   }
276   case 'Y': {
277     print("<");
278     demangleType();
279     print(" as ");
280     demanglePath(IsInType::Yes);
281     print(">");
282     break;
283   }
284   case 'N': {
285     char NS = consume();
286     if (!isLower(NS) && !isUpper(NS)) {
287       Error = true;
288       break;
289     }
290     demanglePath(InType);
291 
292     uint64_t Disambiguator = parseOptionalBase62Number('s');
293     Identifier Ident = parseIdentifier();
294 
295     if (isUpper(NS)) {
296       // Special namespaces
297       print("::{");
298       if (NS == 'C')
299         print("closure");
300       else if (NS == 'S')
301         print("shim");
302       else
303         print(NS);
304       if (!Ident.empty()) {
305         print(":");
306         printIdentifier(Ident);
307       }
308       print('#');
309       printDecimalNumber(Disambiguator);
310       print('}');
311     } else {
312       // Implementation internal namespaces.
313       if (!Ident.empty()) {
314         print("::");
315         printIdentifier(Ident);
316       }
317     }
318     break;
319   }
320   case 'I': {
321     demanglePath(InType);
322     // Omit "::" when in a type, where it is optional.
323     if (InType == IsInType::No)
324       print("::");
325     print("<");
326     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
327       if (I > 0)
328         print(", ");
329       demangleGenericArg();
330     }
331     if (LeaveOpen == LeaveGenericsOpen::Yes)
332       return true;
333     else
334       print(">");
335     break;
336   }
337   case 'B': {
338     bool IsOpen = false;
339     demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
340     return IsOpen;
341   }
342   default:
343     Error = true;
344     break;
345   }
346 
347   return false;
348 }
349 
350 // <impl-path> = [<disambiguator>] <path>
351 // <disambiguator> = "s" <base-62-number>
352 void Demangler::demangleImplPath(IsInType InType) {
353   ScopedOverride<bool> SavePrint(Print, false);
354   parseOptionalBase62Number('s');
355   demanglePath(InType);
356 }
357 
358 // <generic-arg> = <lifetime>
359 //               | <type>
360 //               | "K" <const>
361 // <lifetime> = "L" <base-62-number>
362 void Demangler::demangleGenericArg() {
363   if (consumeIf('L'))
364     printLifetime(parseBase62Number());
365   else if (consumeIf('K'))
366     demangleConst();
367   else
368     demangleType();
369 }
370 
371 // <basic-type> = "a"      // i8
372 //              | "b"      // bool
373 //              | "c"      // char
374 //              | "d"      // f64
375 //              | "e"      // str
376 //              | "f"      // f32
377 //              | "h"      // u8
378 //              | "i"      // isize
379 //              | "j"      // usize
380 //              | "l"      // i32
381 //              | "m"      // u32
382 //              | "n"      // i128
383 //              | "o"      // u128
384 //              | "s"      // i16
385 //              | "t"      // u16
386 //              | "u"      // ()
387 //              | "v"      // ...
388 //              | "x"      // i64
389 //              | "y"      // u64
390 //              | "z"      // !
391 //              | "p"      // placeholder (e.g. for generic params), shown as _
392 static bool parseBasicType(char C, BasicType &Type) {
393   switch (C) {
394   case 'a':
395     Type = BasicType::I8;
396     return true;
397   case 'b':
398     Type = BasicType::Bool;
399     return true;
400   case 'c':
401     Type = BasicType::Char;
402     return true;
403   case 'd':
404     Type = BasicType::F64;
405     return true;
406   case 'e':
407     Type = BasicType::Str;
408     return true;
409   case 'f':
410     Type = BasicType::F32;
411     return true;
412   case 'h':
413     Type = BasicType::U8;
414     return true;
415   case 'i':
416     Type = BasicType::ISize;
417     return true;
418   case 'j':
419     Type = BasicType::USize;
420     return true;
421   case 'l':
422     Type = BasicType::I32;
423     return true;
424   case 'm':
425     Type = BasicType::U32;
426     return true;
427   case 'n':
428     Type = BasicType::I128;
429     return true;
430   case 'o':
431     Type = BasicType::U128;
432     return true;
433   case 'p':
434     Type = BasicType::Placeholder;
435     return true;
436   case 's':
437     Type = BasicType::I16;
438     return true;
439   case 't':
440     Type = BasicType::U16;
441     return true;
442   case 'u':
443     Type = BasicType::Unit;
444     return true;
445   case 'v':
446     Type = BasicType::Variadic;
447     return true;
448   case 'x':
449     Type = BasicType::I64;
450     return true;
451   case 'y':
452     Type = BasicType::U64;
453     return true;
454   case 'z':
455     Type = BasicType::Never;
456     return true;
457   default:
458     return false;
459   }
460 }
461 
462 void Demangler::printBasicType(BasicType Type) {
463   switch (Type) {
464   case BasicType::Bool:
465     print("bool");
466     break;
467   case BasicType::Char:
468     print("char");
469     break;
470   case BasicType::I8:
471     print("i8");
472     break;
473   case BasicType::I16:
474     print("i16");
475     break;
476   case BasicType::I32:
477     print("i32");
478     break;
479   case BasicType::I64:
480     print("i64");
481     break;
482   case BasicType::I128:
483     print("i128");
484     break;
485   case BasicType::ISize:
486     print("isize");
487     break;
488   case BasicType::U8:
489     print("u8");
490     break;
491   case BasicType::U16:
492     print("u16");
493     break;
494   case BasicType::U32:
495     print("u32");
496     break;
497   case BasicType::U64:
498     print("u64");
499     break;
500   case BasicType::U128:
501     print("u128");
502     break;
503   case BasicType::USize:
504     print("usize");
505     break;
506   case BasicType::F32:
507     print("f32");
508     break;
509   case BasicType::F64:
510     print("f64");
511     break;
512   case BasicType::Str:
513     print("str");
514     break;
515   case BasicType::Placeholder:
516     print("_");
517     break;
518   case BasicType::Unit:
519     print("()");
520     break;
521   case BasicType::Variadic:
522     print("...");
523     break;
524   case BasicType::Never:
525     print("!");
526     break;
527   }
528 }
529 
530 // <type> = | <basic-type>
531 //          | <path>                      // named type
532 //          | "A" <type> <const>          // [T; N]
533 //          | "S" <type>                  // [T]
534 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
535 //          | "R" [<lifetime>] <type>     // &T
536 //          | "Q" [<lifetime>] <type>     // &mut T
537 //          | "P" <type>                  // *const T
538 //          | "O" <type>                  // *mut T
539 //          | "F" <fn-sig>                // fn(...) -> ...
540 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
541 //          | <backref>                   // backref
542 void Demangler::demangleType() {
543   if (Error || RecursionLevel >= MaxRecursionLevel) {
544     Error = true;
545     return;
546   }
547   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
548 
549   size_t Start = Position;
550   char C = consume();
551   BasicType Type;
552   if (parseBasicType(C, Type))
553     return printBasicType(Type);
554 
555   switch (C) {
556   case 'A':
557     print("[");
558     demangleType();
559     print("; ");
560     demangleConst();
561     print("]");
562     break;
563   case 'S':
564     print("[");
565     demangleType();
566     print("]");
567     break;
568   case 'T': {
569     print("(");
570     size_t I = 0;
571     for (; !Error && !consumeIf('E'); ++I) {
572       if (I > 0)
573         print(", ");
574       demangleType();
575     }
576     if (I == 1)
577       print(",");
578     print(")");
579     break;
580   }
581   case 'R':
582   case 'Q':
583     print('&');
584     if (consumeIf('L')) {
585       if (auto Lifetime = parseBase62Number()) {
586         printLifetime(Lifetime);
587         print(' ');
588       }
589     }
590     if (C == 'Q')
591       print("mut ");
592     demangleType();
593     break;
594   case 'P':
595     print("*const ");
596     demangleType();
597     break;
598   case 'O':
599     print("*mut ");
600     demangleType();
601     break;
602   case 'F':
603     demangleFnSig();
604     break;
605   case 'D':
606     demangleDynBounds();
607     if (consumeIf('L')) {
608       if (auto Lifetime = parseBase62Number()) {
609         print(" + ");
610         printLifetime(Lifetime);
611       }
612     } else {
613       Error = true;
614     }
615     break;
616   case 'B':
617     demangleBackref([&] { demangleType(); });
618     break;
619   default:
620     Position = Start;
621     demanglePath(IsInType::Yes);
622     break;
623   }
624 }
625 
626 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
627 // <abi> = "C"
628 //       | <undisambiguated-identifier>
629 void Demangler::demangleFnSig() {
630   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
631   demangleOptionalBinder();
632 
633   if (consumeIf('U'))
634     print("unsafe ");
635 
636   if (consumeIf('K')) {
637     print("extern \"");
638     if (consumeIf('C')) {
639       print("C");
640     } else {
641       Identifier Ident = parseIdentifier();
642       if (Ident.Punycode)
643         Error = true;
644       for (char C : Ident.Name) {
645         // When mangling ABI string, the "-" is replaced with "_".
646         if (C == '_')
647           C = '-';
648         print(C);
649       }
650     }
651     print("\" ");
652   }
653 
654   print("fn(");
655   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
656     if (I > 0)
657       print(", ");
658     demangleType();
659   }
660   print(")");
661 
662   if (consumeIf('u')) {
663     // Skip the unit type from the output.
664   } else {
665     print(" -> ");
666     demangleType();
667   }
668 }
669 
670 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
671 void Demangler::demangleDynBounds() {
672   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
673   print("dyn ");
674   demangleOptionalBinder();
675   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
676     if (I > 0)
677       print(" + ");
678     demangleDynTrait();
679   }
680 }
681 
682 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
683 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
684 void Demangler::demangleDynTrait() {
685   bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
686   while (!Error && consumeIf('p')) {
687     if (!IsOpen) {
688       IsOpen = true;
689       print('<');
690     } else {
691       print(", ");
692     }
693     print(parseIdentifier().Name);
694     print(" = ");
695     demangleType();
696   }
697   if (IsOpen)
698     print(">");
699 }
700 
701 // Demangles optional binder and updates the number of bound lifetimes.
702 //
703 // <binder> = "G" <base-62-number>
704 void Demangler::demangleOptionalBinder() {
705   uint64_t Binder = parseOptionalBase62Number('G');
706   if (Error || Binder == 0)
707     return;
708 
709   // In valid inputs each bound lifetime is referenced later. Referencing a
710   // lifetime requires at least one byte of input. Reject inputs that are too
711   // short to reference all bound lifetimes. Otherwise demangling of invalid
712   // binders could generate excessive amounts of output.
713   if (Binder >= Input.size() - BoundLifetimes) {
714     Error = true;
715     return;
716   }
717 
718   print("for<");
719   for (size_t I = 0; I != Binder; ++I) {
720     BoundLifetimes += 1;
721     if (I > 0)
722       print(", ");
723     printLifetime(1);
724   }
725   print("> ");
726 }
727 
728 // <const> = <basic-type> <const-data>
729 //         | "p"                          // placeholder
730 //         | <backref>
731 void Demangler::demangleConst() {
732   if (Error || RecursionLevel >= MaxRecursionLevel) {
733     Error = true;
734     return;
735   }
736   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
737 
738   char C = consume();
739   BasicType Type;
740   if (parseBasicType(C, Type)) {
741     switch (Type) {
742     case BasicType::I8:
743     case BasicType::I16:
744     case BasicType::I32:
745     case BasicType::I64:
746     case BasicType::I128:
747     case BasicType::ISize:
748     case BasicType::U8:
749     case BasicType::U16:
750     case BasicType::U32:
751     case BasicType::U64:
752     case BasicType::U128:
753     case BasicType::USize:
754       demangleConstInt();
755       break;
756     case BasicType::Bool:
757       demangleConstBool();
758       break;
759     case BasicType::Char:
760       demangleConstChar();
761       break;
762     case BasicType::Placeholder:
763       print('_');
764       break;
765     default:
766       Error = true;
767       break;
768     }
769   } else if (C == 'B') {
770     demangleBackref([&] { demangleConst(); });
771   } else {
772     Error = true;
773   }
774 }
775 
776 // <const-data> = ["n"] <hex-number>
777 void Demangler::demangleConstInt() {
778   if (consumeIf('n'))
779     print('-');
780 
781   StringView HexDigits;
782   uint64_t Value = parseHexNumber(HexDigits);
783   if (HexDigits.size() <= 16) {
784     printDecimalNumber(Value);
785   } else {
786     print("0x");
787     print(HexDigits);
788   }
789 }
790 
791 // <const-data> = "0_" // false
792 //              | "1_" // true
793 void Demangler::demangleConstBool() {
794   StringView HexDigits;
795   parseHexNumber(HexDigits);
796   if (HexDigits == "0")
797     print("false");
798   else if (HexDigits == "1")
799     print("true");
800   else
801     Error = true;
802 }
803 
804 /// Returns true if CodePoint represents a printable ASCII character.
805 static bool isAsciiPrintable(uint64_t CodePoint) {
806   return 0x20 <= CodePoint && CodePoint <= 0x7e;
807 }
808 
809 // <const-data> = <hex-number>
810 void Demangler::demangleConstChar() {
811   StringView HexDigits;
812   uint64_t CodePoint = parseHexNumber(HexDigits);
813   if (Error || HexDigits.size() > 6) {
814     Error = true;
815     return;
816   }
817 
818   print("'");
819   switch (CodePoint) {
820   case '\t':
821     print(R"(\t)");
822     break;
823   case '\r':
824     print(R"(\r)");
825     break;
826   case '\n':
827     print(R"(\n)");
828     break;
829   case '\\':
830     print(R"(\\)");
831     break;
832   case '"':
833     print(R"(")");
834     break;
835   case '\'':
836     print(R"(\')");
837     break;
838   default:
839     if (isAsciiPrintable(CodePoint)) {
840       char C = CodePoint;
841       print(C);
842     } else {
843       print(R"(\u{)");
844       print(HexDigits);
845       print('}');
846     }
847     break;
848   }
849   print('\'');
850 }
851 
852 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
853 Identifier Demangler::parseIdentifier() {
854   bool Punycode = consumeIf('u');
855   uint64_t Bytes = parseDecimalNumber();
856 
857   // Underscore resolves the ambiguity when identifier starts with a decimal
858   // digit or another underscore.
859   consumeIf('_');
860 
861   if (Error || Bytes > Input.size() - Position) {
862     Error = true;
863     return {};
864   }
865   StringView S = Input.substr(Position, Bytes);
866   Position += Bytes;
867 
868   if (!std::all_of(S.begin(), S.end(), isValid)) {
869     Error = true;
870     return {};
871   }
872 
873   return {S, Punycode};
874 }
875 
876 // Parses optional base 62 number. The presence of a number is determined using
877 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
878 //
879 // This function is indended for parsing disambiguators and binders which when
880 // not present have their value interpreted as 0, and otherwise as decoded
881 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
882 // 2. When "G" is absent value is 0.
883 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
884   if (!consumeIf(Tag))
885     return 0;
886 
887   uint64_t N = parseBase62Number();
888   if (Error || !addAssign(N, 1))
889     return 0;
890 
891   return N;
892 }
893 
894 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
895 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
896 // "1_" encodes 2, etc.
897 //
898 // <base-62-number> = {<0-9a-zA-Z>} "_"
899 uint64_t Demangler::parseBase62Number() {
900   if (consumeIf('_'))
901     return 0;
902 
903   uint64_t Value = 0;
904 
905   while (true) {
906     uint64_t Digit;
907     char C = consume();
908 
909     if (C == '_') {
910       break;
911     } else if (isDigit(C)) {
912       Digit = C - '0';
913     } else if (isLower(C)) {
914       Digit = 10 + (C - 'a');
915     } else if (isUpper(C)) {
916       Digit = 10 + 26 + (C - 'A');
917     } else {
918       Error = true;
919       return 0;
920     }
921 
922     if (!mulAssign(Value, 62))
923       return 0;
924 
925     if (!addAssign(Value, Digit))
926       return 0;
927   }
928 
929   if (!addAssign(Value, 1))
930     return 0;
931 
932   return Value;
933 }
934 
935 // Parses a decimal number that had been encoded without any leading zeros.
936 //
937 // <decimal-number> = "0"
938 //                  | <1-9> {<0-9>}
939 uint64_t Demangler::parseDecimalNumber() {
940   char C = look();
941   if (!isDigit(C)) {
942     Error = true;
943     return 0;
944   }
945 
946   if (C == '0') {
947     consume();
948     return 0;
949   }
950 
951   uint64_t Value = 0;
952 
953   while (isDigit(look())) {
954     if (!mulAssign(Value, 10)) {
955       Error = true;
956       return 0;
957     }
958 
959     uint64_t D = consume() - '0';
960     if (!addAssign(Value, D))
961       return 0;
962   }
963 
964   return Value;
965 }
966 
967 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
968 // value and stores hex digits in HexDigits. The return value is unspecified if
969 // HexDigits.size() > 16.
970 //
971 // <hex-number> = "0_"
972 //              | <1-9a-f> {<0-9a-f>} "_"
973 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
974   size_t Start = Position;
975   uint64_t Value = 0;
976 
977   if (!isHexDigit(look()))
978     Error = true;
979 
980   if (consumeIf('0')) {
981     if (!consumeIf('_'))
982       Error = true;
983   } else {
984     while (!Error && !consumeIf('_')) {
985       char C = consume();
986       Value *= 16;
987       if (isDigit(C))
988         Value += C - '0';
989       else if ('a' <= C && C <= 'f')
990         Value += 10 + (C - 'a');
991       else
992         Error = true;
993     }
994   }
995 
996   if (Error) {
997     HexDigits = StringView();
998     return 0;
999   }
1000 
1001   size_t End = Position - 1;
1002   assert(Start < End);
1003   HexDigits = Input.substr(Start, End - Start);
1004   return Value;
1005 }
1006 
1007 void Demangler::print(char C) {
1008   if (Error || !Print)
1009     return;
1010 
1011   Output += C;
1012 }
1013 
1014 void Demangler::print(StringView S) {
1015   if (Error || !Print)
1016     return;
1017 
1018   Output += S;
1019 }
1020 
1021 void Demangler::printDecimalNumber(uint64_t N) {
1022   if (Error || !Print)
1023     return;
1024 
1025   Output << N;
1026 }
1027 
1028 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1029 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1030 // bound by one of the enclosing binders.
1031 void Demangler::printLifetime(uint64_t Index) {
1032   if (Index == 0) {
1033     print("'_");
1034     return;
1035   }
1036 
1037   if (Index - 1 >= BoundLifetimes) {
1038     Error = true;
1039     return;
1040   }
1041 
1042   uint64_t Depth = BoundLifetimes - Index;
1043   print('\'');
1044   if (Depth < 26) {
1045     char C = 'a' + Depth;
1046     print(C);
1047   } else {
1048     print('z');
1049     printDecimalNumber(Depth - 26 + 1);
1050   }
1051 }
1052 
1053 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1054   if (isLower(C)) {
1055     Value = C - 'a';
1056     return true;
1057   }
1058 
1059   if (isDigit(C)) {
1060     Value = 26 + (C - '0');
1061     return true;
1062   }
1063 
1064   return false;
1065 }
1066 
1067 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1068   char *Buffer = Output.getBuffer();
1069   char *Start = Buffer + StartIdx;
1070   char *End = Buffer + Output.getCurrentPosition();
1071   Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1072 }
1073 
1074 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1075 // CodePoint is not a valid unicode scalar value.
1076 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1077   if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1078     return false;
1079 
1080   if (CodePoint <= 0x7F) {
1081     Output[0] = CodePoint;
1082     return true;
1083   }
1084 
1085   if (CodePoint <= 0x7FF) {
1086     Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1087     Output[1] = 0x80 | (CodePoint & 0x3F);
1088     return true;
1089   }
1090 
1091   if (CodePoint <= 0xFFFF) {
1092     Output[0] = 0xE0 | (CodePoint >> 12);
1093     Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1094     Output[2] = 0x80 | (CodePoint & 0x3F);
1095     return true;
1096   }
1097 
1098   if (CodePoint <= 0x10FFFF) {
1099     Output[0] = 0xF0 | (CodePoint >> 18);
1100     Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1101     Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1102     Output[3] = 0x80 | (CodePoint & 0x3F);
1103     return true;
1104   }
1105 
1106   return false;
1107 }
1108 
1109 // Decodes string encoded using punycode and appends results to Output.
1110 // Returns true if decoding was successful.
1111 static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1112   size_t OutputSize = Output.getCurrentPosition();
1113   size_t InputIdx = 0;
1114 
1115   // Rust uses an underscore as a delimiter.
1116   size_t DelimiterPos = StringView::npos;
1117   for (size_t I = 0; I != Input.size(); ++I)
1118     if (Input[I] == '_')
1119       DelimiterPos = I;
1120 
1121   if (DelimiterPos != StringView::npos) {
1122     // Copy basic code points before the last delimiter to the output.
1123     for (; InputIdx != DelimiterPos; ++InputIdx) {
1124       char C = Input[InputIdx];
1125       if (!isValid(C))
1126         return false;
1127       // Code points are padded with zeros while decoding is in progress.
1128       char UTF8[4] = {C};
1129       Output += StringView(UTF8, UTF8 + 4);
1130     }
1131     // Skip over the delimiter.
1132     ++InputIdx;
1133   }
1134 
1135   size_t Base = 36;
1136   size_t Skew = 38;
1137   size_t Bias = 72;
1138   size_t N = 0x80;
1139   size_t TMin = 1;
1140   size_t TMax = 26;
1141   size_t Damp = 700;
1142 
1143   auto Adapt = [&](size_t Delta, size_t NumPoints) {
1144     Delta /= Damp;
1145     Delta += Delta / NumPoints;
1146     Damp = 2;
1147 
1148     size_t K = 0;
1149     while (Delta > (Base - TMin) * TMax / 2) {
1150       Delta /= Base - TMin;
1151       K += Base;
1152     }
1153     return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1154   };
1155 
1156   // Main decoding loop.
1157   for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1158     size_t OldI = I;
1159     size_t W = 1;
1160     size_t Max = std::numeric_limits<size_t>::max();
1161     for (size_t K = Base; true; K += Base) {
1162       if (InputIdx == Input.size())
1163         return false;
1164       char C = Input[InputIdx++];
1165       size_t Digit = 0;
1166       if (!decodePunycodeDigit(C, Digit))
1167         return false;
1168 
1169       if (Digit > (Max - I) / W)
1170         return false;
1171       I += Digit * W;
1172 
1173       size_t T;
1174       if (K <= Bias)
1175         T = TMin;
1176       else if (K >= Bias + TMax)
1177         T = TMax;
1178       else
1179         T = K - Bias;
1180 
1181       if (Digit < T)
1182         break;
1183 
1184       if (W > Max / (Base - T))
1185         return false;
1186       W *= (Base - T);
1187     }
1188     size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1189     Bias = Adapt(I - OldI, NumPoints);
1190 
1191     if (I / NumPoints > Max - N)
1192       return false;
1193     N += I / NumPoints;
1194     I = I % NumPoints;
1195 
1196     // Insert N at position I in the output.
1197     char UTF8[4] = {};
1198     if (!encodeUTF8(N, UTF8))
1199       return false;
1200     Output.insert(OutputSize + I * 4, UTF8, 4);
1201   }
1202 
1203   removeNullBytes(Output, OutputSize);
1204   return true;
1205 }
1206 
1207 void Demangler::printIdentifier(Identifier Ident) {
1208   if (Error || !Print)
1209     return;
1210 
1211   if (Ident.Punycode) {
1212     if (!decodePunycode(Ident.Name, Output))
1213       Error = true;
1214   } else {
1215     print(Ident.Name);
1216   }
1217 }
1218 
1219 char Demangler::look() const {
1220   if (Error || Position >= Input.size())
1221     return 0;
1222 
1223   return Input[Position];
1224 }
1225 
1226 char Demangler::consume() {
1227   if (Error || Position >= Input.size()) {
1228     Error = true;
1229     return 0;
1230   }
1231 
1232   return Input[Position++];
1233 }
1234 
1235 bool Demangler::consumeIf(char Prefix) {
1236   if (Error || Position >= Input.size() || Input[Position] != Prefix)
1237     return false;
1238 
1239   Position += 1;
1240   return true;
1241 }
1242 
1243 /// Computes A + B. When computation wraps around sets the error and returns
1244 /// false. Otherwise assigns the result to A and returns true.
1245 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1246   if (A > std::numeric_limits<uint64_t>::max() - B) {
1247     Error = true;
1248     return false;
1249   }
1250 
1251   A += B;
1252   return true;
1253 }
1254 
1255 /// Computes A * B. When computation wraps around sets the error and returns
1256 /// false. Otherwise assigns the result to A and returns true.
1257 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1258   if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1259     Error = true;
1260     return false;
1261   }
1262 
1263   A *= B;
1264   return true;
1265 }
1266