173471bf0Spatrick //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
273471bf0Spatrick //
373471bf0Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
473471bf0Spatrick // See https://llvm.org/LICENSE.txt for license information.
573471bf0Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
673471bf0Spatrick //
773471bf0Spatrick //===----------------------------------------------------------------------===//
873471bf0Spatrick //
973471bf0Spatrick // This file defines a demangler for Rust v0 mangled symbols as specified in
1073471bf0Spatrick // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
1173471bf0Spatrick //
1273471bf0Spatrick //===----------------------------------------------------------------------===//
1373471bf0Spatrick 
1473471bf0Spatrick #include "llvm/Demangle/Demangle.h"
1573471bf0Spatrick #include "llvm/Demangle/StringView.h"
1673471bf0Spatrick #include "llvm/Demangle/Utility.h"
1773471bf0Spatrick 
1873471bf0Spatrick #include <algorithm>
1973471bf0Spatrick #include <cassert>
2073471bf0Spatrick #include <cstdint>
2173471bf0Spatrick #include <cstring>
2273471bf0Spatrick #include <limits>
2373471bf0Spatrick 
2473471bf0Spatrick using namespace llvm;
2573471bf0Spatrick 
26*d415bd75Srobert using llvm::itanium_demangle::OutputBuffer;
27*d415bd75Srobert using llvm::itanium_demangle::ScopedOverride;
2873471bf0Spatrick using llvm::itanium_demangle::StringView;
2973471bf0Spatrick 
3073471bf0Spatrick namespace {
3173471bf0Spatrick 
3273471bf0Spatrick struct Identifier {
3373471bf0Spatrick   StringView Name;
3473471bf0Spatrick   bool Punycode;
3573471bf0Spatrick 
empty__anond672b4d40111::Identifier3673471bf0Spatrick   bool empty() const { return Name.empty(); }
3773471bf0Spatrick };
3873471bf0Spatrick 
3973471bf0Spatrick enum class BasicType {
4073471bf0Spatrick   Bool,
4173471bf0Spatrick   Char,
4273471bf0Spatrick   I8,
4373471bf0Spatrick   I16,
4473471bf0Spatrick   I32,
4573471bf0Spatrick   I64,
4673471bf0Spatrick   I128,
4773471bf0Spatrick   ISize,
4873471bf0Spatrick   U8,
4973471bf0Spatrick   U16,
5073471bf0Spatrick   U32,
5173471bf0Spatrick   U64,
5273471bf0Spatrick   U128,
5373471bf0Spatrick   USize,
5473471bf0Spatrick   F32,
5573471bf0Spatrick   F64,
5673471bf0Spatrick   Str,
5773471bf0Spatrick   Placeholder,
5873471bf0Spatrick   Unit,
5973471bf0Spatrick   Variadic,
6073471bf0Spatrick   Never,
6173471bf0Spatrick };
6273471bf0Spatrick 
6373471bf0Spatrick enum class IsInType {
6473471bf0Spatrick   No,
6573471bf0Spatrick   Yes,
6673471bf0Spatrick };
6773471bf0Spatrick 
6873471bf0Spatrick enum class LeaveGenericsOpen {
6973471bf0Spatrick   No,
7073471bf0Spatrick   Yes,
7173471bf0Spatrick };
7273471bf0Spatrick 
7373471bf0Spatrick class Demangler {
7473471bf0Spatrick   // Maximum recursion level. Used to avoid stack overflow.
7573471bf0Spatrick   size_t MaxRecursionLevel;
7673471bf0Spatrick   // Current recursion level.
7773471bf0Spatrick   size_t RecursionLevel;
7873471bf0Spatrick   size_t BoundLifetimes;
7973471bf0Spatrick   // Input string that is being demangled with "_R" prefix removed.
8073471bf0Spatrick   StringView Input;
8173471bf0Spatrick   // Position in the input string.
8273471bf0Spatrick   size_t Position;
8373471bf0Spatrick   // When true, print methods append the output to the stream.
8473471bf0Spatrick   // When false, the output is suppressed.
8573471bf0Spatrick   bool Print;
8673471bf0Spatrick   // True if an error occurred.
8773471bf0Spatrick   bool Error;
8873471bf0Spatrick 
8973471bf0Spatrick public:
9073471bf0Spatrick   // Demangled output.
91*d415bd75Srobert   OutputBuffer Output;
9273471bf0Spatrick 
9373471bf0Spatrick   Demangler(size_t MaxRecursionLevel = 500);
9473471bf0Spatrick 
9573471bf0Spatrick   bool demangle(StringView MangledName);
9673471bf0Spatrick 
9773471bf0Spatrick private:
9873471bf0Spatrick   bool demanglePath(IsInType Type,
9973471bf0Spatrick                     LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
10073471bf0Spatrick   void demangleImplPath(IsInType InType);
10173471bf0Spatrick   void demangleGenericArg();
10273471bf0Spatrick   void demangleType();
10373471bf0Spatrick   void demangleFnSig();
10473471bf0Spatrick   void demangleDynBounds();
10573471bf0Spatrick   void demangleDynTrait();
10673471bf0Spatrick   void demangleOptionalBinder();
10773471bf0Spatrick   void demangleConst();
10873471bf0Spatrick   void demangleConstInt();
10973471bf0Spatrick   void demangleConstBool();
11073471bf0Spatrick   void demangleConstChar();
11173471bf0Spatrick 
demangleBackref(Callable Demangler)11273471bf0Spatrick   template <typename Callable> void demangleBackref(Callable Demangler) {
11373471bf0Spatrick     uint64_t Backref = parseBase62Number();
11473471bf0Spatrick     if (Error || Backref >= Position) {
11573471bf0Spatrick       Error = true;
11673471bf0Spatrick       return;
11773471bf0Spatrick     }
11873471bf0Spatrick 
11973471bf0Spatrick     if (!Print)
12073471bf0Spatrick       return;
12173471bf0Spatrick 
122*d415bd75Srobert     ScopedOverride<size_t> SavePosition(Position, Position);
12373471bf0Spatrick     Position = Backref;
12473471bf0Spatrick     Demangler();
12573471bf0Spatrick   }
12673471bf0Spatrick 
12773471bf0Spatrick   Identifier parseIdentifier();
12873471bf0Spatrick   uint64_t parseOptionalBase62Number(char Tag);
12973471bf0Spatrick   uint64_t parseBase62Number();
13073471bf0Spatrick   uint64_t parseDecimalNumber();
13173471bf0Spatrick   uint64_t parseHexNumber(StringView &HexDigits);
13273471bf0Spatrick 
13373471bf0Spatrick   void print(char C);
13473471bf0Spatrick   void print(StringView S);
13573471bf0Spatrick   void printDecimalNumber(uint64_t N);
13673471bf0Spatrick   void printBasicType(BasicType);
13773471bf0Spatrick   void printLifetime(uint64_t Index);
138*d415bd75Srobert   void printIdentifier(Identifier Ident);
13973471bf0Spatrick 
14073471bf0Spatrick   char look() const;
14173471bf0Spatrick   char consume();
14273471bf0Spatrick   bool consumeIf(char Prefix);
14373471bf0Spatrick 
14473471bf0Spatrick   bool addAssign(uint64_t &A, uint64_t B);
14573471bf0Spatrick   bool mulAssign(uint64_t &A, uint64_t B);
14673471bf0Spatrick };
14773471bf0Spatrick 
14873471bf0Spatrick } // namespace
14973471bf0Spatrick 
rustDemangle(const char * MangledName)150*d415bd75Srobert char *llvm::rustDemangle(const char *MangledName) {
151*d415bd75Srobert   if (MangledName == nullptr)
15273471bf0Spatrick     return nullptr;
15373471bf0Spatrick 
15473471bf0Spatrick   // Return early if mangled name doesn't look like a Rust symbol.
15573471bf0Spatrick   StringView Mangled(MangledName);
156*d415bd75Srobert   if (!Mangled.startsWith("_R"))
15773471bf0Spatrick     return nullptr;
15873471bf0Spatrick 
15973471bf0Spatrick   Demangler D;
16073471bf0Spatrick   if (!D.demangle(Mangled)) {
16173471bf0Spatrick     std::free(D.Output.getBuffer());
16273471bf0Spatrick     return nullptr;
16373471bf0Spatrick   }
16473471bf0Spatrick 
16573471bf0Spatrick   D.Output += '\0';
16673471bf0Spatrick 
167*d415bd75Srobert   return D.Output.getBuffer();
16873471bf0Spatrick }
16973471bf0Spatrick 
Demangler(size_t MaxRecursionLevel)17073471bf0Spatrick Demangler::Demangler(size_t MaxRecursionLevel)
17173471bf0Spatrick     : MaxRecursionLevel(MaxRecursionLevel) {}
17273471bf0Spatrick 
isDigit(const char C)17373471bf0Spatrick static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
17473471bf0Spatrick 
isHexDigit(const char C)17573471bf0Spatrick static inline bool isHexDigit(const char C) {
17673471bf0Spatrick   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
17773471bf0Spatrick }
17873471bf0Spatrick 
isLower(const char C)17973471bf0Spatrick static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
18073471bf0Spatrick 
isUpper(const char C)18173471bf0Spatrick static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
18273471bf0Spatrick 
18373471bf0Spatrick /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)18473471bf0Spatrick static inline bool isValid(const char C) {
18573471bf0Spatrick   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
18673471bf0Spatrick }
18773471bf0Spatrick 
18873471bf0Spatrick // Demangles Rust v0 mangled symbol. Returns true when successful, and false
18973471bf0Spatrick // otherwise. The demangled symbol is stored in Output field. It is
19073471bf0Spatrick // responsibility of the caller to free the memory behind the output stream.
19173471bf0Spatrick //
19273471bf0Spatrick // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)19373471bf0Spatrick bool Demangler::demangle(StringView Mangled) {
19473471bf0Spatrick   Position = 0;
19573471bf0Spatrick   Error = false;
19673471bf0Spatrick   Print = true;
19773471bf0Spatrick   RecursionLevel = 0;
19873471bf0Spatrick   BoundLifetimes = 0;
19973471bf0Spatrick 
20073471bf0Spatrick   if (!Mangled.consumeFront("_R")) {
20173471bf0Spatrick     Error = true;
20273471bf0Spatrick     return false;
20373471bf0Spatrick   }
20473471bf0Spatrick   size_t Dot = Mangled.find('.');
20573471bf0Spatrick   Input = Mangled.substr(0, Dot);
20673471bf0Spatrick   StringView Suffix = Mangled.dropFront(Dot);
20773471bf0Spatrick 
20873471bf0Spatrick   demanglePath(IsInType::No);
20973471bf0Spatrick 
21073471bf0Spatrick   if (Position != Input.size()) {
211*d415bd75Srobert     ScopedOverride<bool> SavePrint(Print, false);
21273471bf0Spatrick     demanglePath(IsInType::No);
21373471bf0Spatrick   }
21473471bf0Spatrick 
21573471bf0Spatrick   if (Position != Input.size())
21673471bf0Spatrick     Error = true;
21773471bf0Spatrick 
21873471bf0Spatrick   if (!Suffix.empty()) {
21973471bf0Spatrick     print(" (");
22073471bf0Spatrick     print(Suffix);
22173471bf0Spatrick     print(")");
22273471bf0Spatrick   }
22373471bf0Spatrick 
22473471bf0Spatrick   return !Error;
22573471bf0Spatrick }
22673471bf0Spatrick 
22773471bf0Spatrick // Demangles a path. InType indicates whether a path is inside a type. When
22873471bf0Spatrick // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
22973471bf0Spatrick // output. Return value indicates whether generics arguments have been left
23073471bf0Spatrick // open.
23173471bf0Spatrick //
23273471bf0Spatrick // <path> = "C" <identifier>               // crate root
23373471bf0Spatrick //        | "M" <impl-path> <type>         // <T> (inherent impl)
23473471bf0Spatrick //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
23573471bf0Spatrick //        | "Y" <type> <path>              // <T as Trait> (trait definition)
23673471bf0Spatrick //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
23773471bf0Spatrick //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
23873471bf0Spatrick //        | <backref>
23973471bf0Spatrick // <identifier> = [<disambiguator>] <undisambiguated-identifier>
24073471bf0Spatrick // <ns> = "C"      // closure
24173471bf0Spatrick //      | "S"      // shim
24273471bf0Spatrick //      | <A-Z>    // other special namespaces
24373471bf0Spatrick //      | <a-z>    // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)24473471bf0Spatrick bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
24573471bf0Spatrick   if (Error || RecursionLevel >= MaxRecursionLevel) {
24673471bf0Spatrick     Error = true;
24773471bf0Spatrick     return false;
24873471bf0Spatrick   }
249*d415bd75Srobert   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
25073471bf0Spatrick 
25173471bf0Spatrick   switch (consume()) {
25273471bf0Spatrick   case 'C': {
25373471bf0Spatrick     parseOptionalBase62Number('s');
254*d415bd75Srobert     printIdentifier(parseIdentifier());
25573471bf0Spatrick     break;
25673471bf0Spatrick   }
25773471bf0Spatrick   case 'M': {
25873471bf0Spatrick     demangleImplPath(InType);
25973471bf0Spatrick     print("<");
26073471bf0Spatrick     demangleType();
26173471bf0Spatrick     print(">");
26273471bf0Spatrick     break;
26373471bf0Spatrick   }
26473471bf0Spatrick   case 'X': {
26573471bf0Spatrick     demangleImplPath(InType);
26673471bf0Spatrick     print("<");
26773471bf0Spatrick     demangleType();
26873471bf0Spatrick     print(" as ");
26973471bf0Spatrick     demanglePath(IsInType::Yes);
27073471bf0Spatrick     print(">");
27173471bf0Spatrick     break;
27273471bf0Spatrick   }
27373471bf0Spatrick   case 'Y': {
27473471bf0Spatrick     print("<");
27573471bf0Spatrick     demangleType();
27673471bf0Spatrick     print(" as ");
27773471bf0Spatrick     demanglePath(IsInType::Yes);
27873471bf0Spatrick     print(">");
27973471bf0Spatrick     break;
28073471bf0Spatrick   }
28173471bf0Spatrick   case 'N': {
28273471bf0Spatrick     char NS = consume();
28373471bf0Spatrick     if (!isLower(NS) && !isUpper(NS)) {
28473471bf0Spatrick       Error = true;
28573471bf0Spatrick       break;
28673471bf0Spatrick     }
28773471bf0Spatrick     demanglePath(InType);
28873471bf0Spatrick 
28973471bf0Spatrick     uint64_t Disambiguator = parseOptionalBase62Number('s');
29073471bf0Spatrick     Identifier Ident = parseIdentifier();
29173471bf0Spatrick 
29273471bf0Spatrick     if (isUpper(NS)) {
29373471bf0Spatrick       // Special namespaces
29473471bf0Spatrick       print("::{");
29573471bf0Spatrick       if (NS == 'C')
29673471bf0Spatrick         print("closure");
29773471bf0Spatrick       else if (NS == 'S')
29873471bf0Spatrick         print("shim");
29973471bf0Spatrick       else
30073471bf0Spatrick         print(NS);
30173471bf0Spatrick       if (!Ident.empty()) {
30273471bf0Spatrick         print(":");
303*d415bd75Srobert         printIdentifier(Ident);
30473471bf0Spatrick       }
30573471bf0Spatrick       print('#');
30673471bf0Spatrick       printDecimalNumber(Disambiguator);
30773471bf0Spatrick       print('}');
30873471bf0Spatrick     } else {
30973471bf0Spatrick       // Implementation internal namespaces.
31073471bf0Spatrick       if (!Ident.empty()) {
31173471bf0Spatrick         print("::");
312*d415bd75Srobert         printIdentifier(Ident);
31373471bf0Spatrick       }
31473471bf0Spatrick     }
31573471bf0Spatrick     break;
31673471bf0Spatrick   }
31773471bf0Spatrick   case 'I': {
31873471bf0Spatrick     demanglePath(InType);
31973471bf0Spatrick     // Omit "::" when in a type, where it is optional.
32073471bf0Spatrick     if (InType == IsInType::No)
32173471bf0Spatrick       print("::");
32273471bf0Spatrick     print("<");
32373471bf0Spatrick     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
32473471bf0Spatrick       if (I > 0)
32573471bf0Spatrick         print(", ");
32673471bf0Spatrick       demangleGenericArg();
32773471bf0Spatrick     }
32873471bf0Spatrick     if (LeaveOpen == LeaveGenericsOpen::Yes)
32973471bf0Spatrick       return true;
33073471bf0Spatrick     else
33173471bf0Spatrick       print(">");
33273471bf0Spatrick     break;
33373471bf0Spatrick   }
33473471bf0Spatrick   case 'B': {
33573471bf0Spatrick     bool IsOpen = false;
33673471bf0Spatrick     demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
33773471bf0Spatrick     return IsOpen;
33873471bf0Spatrick   }
33973471bf0Spatrick   default:
34073471bf0Spatrick     Error = true;
34173471bf0Spatrick     break;
34273471bf0Spatrick   }
34373471bf0Spatrick 
34473471bf0Spatrick   return false;
34573471bf0Spatrick }
34673471bf0Spatrick 
34773471bf0Spatrick // <impl-path> = [<disambiguator>] <path>
34873471bf0Spatrick // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)34973471bf0Spatrick void Demangler::demangleImplPath(IsInType InType) {
350*d415bd75Srobert   ScopedOverride<bool> SavePrint(Print, false);
35173471bf0Spatrick   parseOptionalBase62Number('s');
35273471bf0Spatrick   demanglePath(InType);
35373471bf0Spatrick }
35473471bf0Spatrick 
35573471bf0Spatrick // <generic-arg> = <lifetime>
35673471bf0Spatrick //               | <type>
35773471bf0Spatrick //               | "K" <const>
35873471bf0Spatrick // <lifetime> = "L" <base-62-number>
demangleGenericArg()35973471bf0Spatrick void Demangler::demangleGenericArg() {
36073471bf0Spatrick   if (consumeIf('L'))
36173471bf0Spatrick     printLifetime(parseBase62Number());
36273471bf0Spatrick   else if (consumeIf('K'))
36373471bf0Spatrick     demangleConst();
36473471bf0Spatrick   else
36573471bf0Spatrick     demangleType();
36673471bf0Spatrick }
36773471bf0Spatrick 
36873471bf0Spatrick // <basic-type> = "a"      // i8
36973471bf0Spatrick //              | "b"      // bool
37073471bf0Spatrick //              | "c"      // char
37173471bf0Spatrick //              | "d"      // f64
37273471bf0Spatrick //              | "e"      // str
37373471bf0Spatrick //              | "f"      // f32
37473471bf0Spatrick //              | "h"      // u8
37573471bf0Spatrick //              | "i"      // isize
37673471bf0Spatrick //              | "j"      // usize
37773471bf0Spatrick //              | "l"      // i32
37873471bf0Spatrick //              | "m"      // u32
37973471bf0Spatrick //              | "n"      // i128
38073471bf0Spatrick //              | "o"      // u128
38173471bf0Spatrick //              | "s"      // i16
38273471bf0Spatrick //              | "t"      // u16
38373471bf0Spatrick //              | "u"      // ()
38473471bf0Spatrick //              | "v"      // ...
38573471bf0Spatrick //              | "x"      // i64
38673471bf0Spatrick //              | "y"      // u64
38773471bf0Spatrick //              | "z"      // !
38873471bf0Spatrick //              | "p"      // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)38973471bf0Spatrick static bool parseBasicType(char C, BasicType &Type) {
39073471bf0Spatrick   switch (C) {
39173471bf0Spatrick   case 'a':
39273471bf0Spatrick     Type = BasicType::I8;
39373471bf0Spatrick     return true;
39473471bf0Spatrick   case 'b':
39573471bf0Spatrick     Type = BasicType::Bool;
39673471bf0Spatrick     return true;
39773471bf0Spatrick   case 'c':
39873471bf0Spatrick     Type = BasicType::Char;
39973471bf0Spatrick     return true;
40073471bf0Spatrick   case 'd':
40173471bf0Spatrick     Type = BasicType::F64;
40273471bf0Spatrick     return true;
40373471bf0Spatrick   case 'e':
40473471bf0Spatrick     Type = BasicType::Str;
40573471bf0Spatrick     return true;
40673471bf0Spatrick   case 'f':
40773471bf0Spatrick     Type = BasicType::F32;
40873471bf0Spatrick     return true;
40973471bf0Spatrick   case 'h':
41073471bf0Spatrick     Type = BasicType::U8;
41173471bf0Spatrick     return true;
41273471bf0Spatrick   case 'i':
41373471bf0Spatrick     Type = BasicType::ISize;
41473471bf0Spatrick     return true;
41573471bf0Spatrick   case 'j':
41673471bf0Spatrick     Type = BasicType::USize;
41773471bf0Spatrick     return true;
41873471bf0Spatrick   case 'l':
41973471bf0Spatrick     Type = BasicType::I32;
42073471bf0Spatrick     return true;
42173471bf0Spatrick   case 'm':
42273471bf0Spatrick     Type = BasicType::U32;
42373471bf0Spatrick     return true;
42473471bf0Spatrick   case 'n':
42573471bf0Spatrick     Type = BasicType::I128;
42673471bf0Spatrick     return true;
42773471bf0Spatrick   case 'o':
42873471bf0Spatrick     Type = BasicType::U128;
42973471bf0Spatrick     return true;
43073471bf0Spatrick   case 'p':
43173471bf0Spatrick     Type = BasicType::Placeholder;
43273471bf0Spatrick     return true;
43373471bf0Spatrick   case 's':
43473471bf0Spatrick     Type = BasicType::I16;
43573471bf0Spatrick     return true;
43673471bf0Spatrick   case 't':
43773471bf0Spatrick     Type = BasicType::U16;
43873471bf0Spatrick     return true;
43973471bf0Spatrick   case 'u':
44073471bf0Spatrick     Type = BasicType::Unit;
44173471bf0Spatrick     return true;
44273471bf0Spatrick   case 'v':
44373471bf0Spatrick     Type = BasicType::Variadic;
44473471bf0Spatrick     return true;
44573471bf0Spatrick   case 'x':
44673471bf0Spatrick     Type = BasicType::I64;
44773471bf0Spatrick     return true;
44873471bf0Spatrick   case 'y':
44973471bf0Spatrick     Type = BasicType::U64;
45073471bf0Spatrick     return true;
45173471bf0Spatrick   case 'z':
45273471bf0Spatrick     Type = BasicType::Never;
45373471bf0Spatrick     return true;
45473471bf0Spatrick   default:
45573471bf0Spatrick     return false;
45673471bf0Spatrick   }
45773471bf0Spatrick }
45873471bf0Spatrick 
printBasicType(BasicType Type)45973471bf0Spatrick void Demangler::printBasicType(BasicType Type) {
46073471bf0Spatrick   switch (Type) {
46173471bf0Spatrick   case BasicType::Bool:
46273471bf0Spatrick     print("bool");
46373471bf0Spatrick     break;
46473471bf0Spatrick   case BasicType::Char:
46573471bf0Spatrick     print("char");
46673471bf0Spatrick     break;
46773471bf0Spatrick   case BasicType::I8:
46873471bf0Spatrick     print("i8");
46973471bf0Spatrick     break;
47073471bf0Spatrick   case BasicType::I16:
47173471bf0Spatrick     print("i16");
47273471bf0Spatrick     break;
47373471bf0Spatrick   case BasicType::I32:
47473471bf0Spatrick     print("i32");
47573471bf0Spatrick     break;
47673471bf0Spatrick   case BasicType::I64:
47773471bf0Spatrick     print("i64");
47873471bf0Spatrick     break;
47973471bf0Spatrick   case BasicType::I128:
48073471bf0Spatrick     print("i128");
48173471bf0Spatrick     break;
48273471bf0Spatrick   case BasicType::ISize:
48373471bf0Spatrick     print("isize");
48473471bf0Spatrick     break;
48573471bf0Spatrick   case BasicType::U8:
48673471bf0Spatrick     print("u8");
48773471bf0Spatrick     break;
48873471bf0Spatrick   case BasicType::U16:
48973471bf0Spatrick     print("u16");
49073471bf0Spatrick     break;
49173471bf0Spatrick   case BasicType::U32:
49273471bf0Spatrick     print("u32");
49373471bf0Spatrick     break;
49473471bf0Spatrick   case BasicType::U64:
49573471bf0Spatrick     print("u64");
49673471bf0Spatrick     break;
49773471bf0Spatrick   case BasicType::U128:
49873471bf0Spatrick     print("u128");
49973471bf0Spatrick     break;
50073471bf0Spatrick   case BasicType::USize:
50173471bf0Spatrick     print("usize");
50273471bf0Spatrick     break;
50373471bf0Spatrick   case BasicType::F32:
50473471bf0Spatrick     print("f32");
50573471bf0Spatrick     break;
50673471bf0Spatrick   case BasicType::F64:
50773471bf0Spatrick     print("f64");
50873471bf0Spatrick     break;
50973471bf0Spatrick   case BasicType::Str:
51073471bf0Spatrick     print("str");
51173471bf0Spatrick     break;
51273471bf0Spatrick   case BasicType::Placeholder:
51373471bf0Spatrick     print("_");
51473471bf0Spatrick     break;
51573471bf0Spatrick   case BasicType::Unit:
51673471bf0Spatrick     print("()");
51773471bf0Spatrick     break;
51873471bf0Spatrick   case BasicType::Variadic:
51973471bf0Spatrick     print("...");
52073471bf0Spatrick     break;
52173471bf0Spatrick   case BasicType::Never:
52273471bf0Spatrick     print("!");
52373471bf0Spatrick     break;
52473471bf0Spatrick   }
52573471bf0Spatrick }
52673471bf0Spatrick 
52773471bf0Spatrick // <type> = | <basic-type>
52873471bf0Spatrick //          | <path>                      // named type
52973471bf0Spatrick //          | "A" <type> <const>          // [T; N]
53073471bf0Spatrick //          | "S" <type>                  // [T]
53173471bf0Spatrick //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
53273471bf0Spatrick //          | "R" [<lifetime>] <type>     // &T
53373471bf0Spatrick //          | "Q" [<lifetime>] <type>     // &mut T
53473471bf0Spatrick //          | "P" <type>                  // *const T
53573471bf0Spatrick //          | "O" <type>                  // *mut T
53673471bf0Spatrick //          | "F" <fn-sig>                // fn(...) -> ...
53773471bf0Spatrick //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
53873471bf0Spatrick //          | <backref>                   // backref
demangleType()53973471bf0Spatrick void Demangler::demangleType() {
54073471bf0Spatrick   if (Error || RecursionLevel >= MaxRecursionLevel) {
54173471bf0Spatrick     Error = true;
54273471bf0Spatrick     return;
54373471bf0Spatrick   }
544*d415bd75Srobert   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
54573471bf0Spatrick 
54673471bf0Spatrick   size_t Start = Position;
54773471bf0Spatrick   char C = consume();
54873471bf0Spatrick   BasicType Type;
54973471bf0Spatrick   if (parseBasicType(C, Type))
55073471bf0Spatrick     return printBasicType(Type);
55173471bf0Spatrick 
55273471bf0Spatrick   switch (C) {
55373471bf0Spatrick   case 'A':
55473471bf0Spatrick     print("[");
55573471bf0Spatrick     demangleType();
55673471bf0Spatrick     print("; ");
55773471bf0Spatrick     demangleConst();
55873471bf0Spatrick     print("]");
55973471bf0Spatrick     break;
56073471bf0Spatrick   case 'S':
56173471bf0Spatrick     print("[");
56273471bf0Spatrick     demangleType();
56373471bf0Spatrick     print("]");
56473471bf0Spatrick     break;
56573471bf0Spatrick   case 'T': {
56673471bf0Spatrick     print("(");
56773471bf0Spatrick     size_t I = 0;
56873471bf0Spatrick     for (; !Error && !consumeIf('E'); ++I) {
56973471bf0Spatrick       if (I > 0)
57073471bf0Spatrick         print(", ");
57173471bf0Spatrick       demangleType();
57273471bf0Spatrick     }
57373471bf0Spatrick     if (I == 1)
57473471bf0Spatrick       print(",");
57573471bf0Spatrick     print(")");
57673471bf0Spatrick     break;
57773471bf0Spatrick   }
57873471bf0Spatrick   case 'R':
57973471bf0Spatrick   case 'Q':
58073471bf0Spatrick     print('&');
58173471bf0Spatrick     if (consumeIf('L')) {
58273471bf0Spatrick       if (auto Lifetime = parseBase62Number()) {
58373471bf0Spatrick         printLifetime(Lifetime);
58473471bf0Spatrick         print(' ');
58573471bf0Spatrick       }
58673471bf0Spatrick     }
58773471bf0Spatrick     if (C == 'Q')
58873471bf0Spatrick       print("mut ");
58973471bf0Spatrick     demangleType();
59073471bf0Spatrick     break;
59173471bf0Spatrick   case 'P':
59273471bf0Spatrick     print("*const ");
59373471bf0Spatrick     demangleType();
59473471bf0Spatrick     break;
59573471bf0Spatrick   case 'O':
59673471bf0Spatrick     print("*mut ");
59773471bf0Spatrick     demangleType();
59873471bf0Spatrick     break;
59973471bf0Spatrick   case 'F':
60073471bf0Spatrick     demangleFnSig();
60173471bf0Spatrick     break;
60273471bf0Spatrick   case 'D':
60373471bf0Spatrick     demangleDynBounds();
60473471bf0Spatrick     if (consumeIf('L')) {
60573471bf0Spatrick       if (auto Lifetime = parseBase62Number()) {
60673471bf0Spatrick         print(" + ");
60773471bf0Spatrick         printLifetime(Lifetime);
60873471bf0Spatrick       }
60973471bf0Spatrick     } else {
61073471bf0Spatrick       Error = true;
61173471bf0Spatrick     }
61273471bf0Spatrick     break;
61373471bf0Spatrick   case 'B':
61473471bf0Spatrick     demangleBackref([&] { demangleType(); });
61573471bf0Spatrick     break;
61673471bf0Spatrick   default:
61773471bf0Spatrick     Position = Start;
61873471bf0Spatrick     demanglePath(IsInType::Yes);
61973471bf0Spatrick     break;
62073471bf0Spatrick   }
62173471bf0Spatrick }
62273471bf0Spatrick 
62373471bf0Spatrick // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
62473471bf0Spatrick // <abi> = "C"
62573471bf0Spatrick //       | <undisambiguated-identifier>
demangleFnSig()62673471bf0Spatrick void Demangler::demangleFnSig() {
627*d415bd75Srobert   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
62873471bf0Spatrick   demangleOptionalBinder();
62973471bf0Spatrick 
63073471bf0Spatrick   if (consumeIf('U'))
63173471bf0Spatrick     print("unsafe ");
63273471bf0Spatrick 
63373471bf0Spatrick   if (consumeIf('K')) {
63473471bf0Spatrick     print("extern \"");
63573471bf0Spatrick     if (consumeIf('C')) {
63673471bf0Spatrick       print("C");
63773471bf0Spatrick     } else {
63873471bf0Spatrick       Identifier Ident = parseIdentifier();
639*d415bd75Srobert       if (Ident.Punycode)
640*d415bd75Srobert         Error = true;
64173471bf0Spatrick       for (char C : Ident.Name) {
64273471bf0Spatrick         // When mangling ABI string, the "-" is replaced with "_".
64373471bf0Spatrick         if (C == '_')
64473471bf0Spatrick           C = '-';
64573471bf0Spatrick         print(C);
64673471bf0Spatrick       }
64773471bf0Spatrick     }
64873471bf0Spatrick     print("\" ");
64973471bf0Spatrick   }
65073471bf0Spatrick 
65173471bf0Spatrick   print("fn(");
65273471bf0Spatrick   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
65373471bf0Spatrick     if (I > 0)
65473471bf0Spatrick       print(", ");
65573471bf0Spatrick     demangleType();
65673471bf0Spatrick   }
65773471bf0Spatrick   print(")");
65873471bf0Spatrick 
65973471bf0Spatrick   if (consumeIf('u')) {
66073471bf0Spatrick     // Skip the unit type from the output.
66173471bf0Spatrick   } else {
66273471bf0Spatrick     print(" -> ");
66373471bf0Spatrick     demangleType();
66473471bf0Spatrick   }
66573471bf0Spatrick }
66673471bf0Spatrick 
66773471bf0Spatrick // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()66873471bf0Spatrick void Demangler::demangleDynBounds() {
669*d415bd75Srobert   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
67073471bf0Spatrick   print("dyn ");
67173471bf0Spatrick   demangleOptionalBinder();
67273471bf0Spatrick   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
67373471bf0Spatrick     if (I > 0)
67473471bf0Spatrick       print(" + ");
67573471bf0Spatrick     demangleDynTrait();
67673471bf0Spatrick   }
67773471bf0Spatrick }
67873471bf0Spatrick 
67973471bf0Spatrick // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
68073471bf0Spatrick // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()68173471bf0Spatrick void Demangler::demangleDynTrait() {
68273471bf0Spatrick   bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
68373471bf0Spatrick   while (!Error && consumeIf('p')) {
68473471bf0Spatrick     if (!IsOpen) {
68573471bf0Spatrick       IsOpen = true;
68673471bf0Spatrick       print('<');
68773471bf0Spatrick     } else {
68873471bf0Spatrick       print(", ");
68973471bf0Spatrick     }
69073471bf0Spatrick     print(parseIdentifier().Name);
69173471bf0Spatrick     print(" = ");
69273471bf0Spatrick     demangleType();
69373471bf0Spatrick   }
69473471bf0Spatrick   if (IsOpen)
69573471bf0Spatrick     print(">");
69673471bf0Spatrick }
69773471bf0Spatrick 
69873471bf0Spatrick // Demangles optional binder and updates the number of bound lifetimes.
69973471bf0Spatrick //
70073471bf0Spatrick // <binder> = "G" <base-62-number>
demangleOptionalBinder()70173471bf0Spatrick void Demangler::demangleOptionalBinder() {
70273471bf0Spatrick   uint64_t Binder = parseOptionalBase62Number('G');
70373471bf0Spatrick   if (Error || Binder == 0)
70473471bf0Spatrick     return;
70573471bf0Spatrick 
70673471bf0Spatrick   // In valid inputs each bound lifetime is referenced later. Referencing a
70773471bf0Spatrick   // lifetime requires at least one byte of input. Reject inputs that are too
70873471bf0Spatrick   // short to reference all bound lifetimes. Otherwise demangling of invalid
70973471bf0Spatrick   // binders could generate excessive amounts of output.
71073471bf0Spatrick   if (Binder >= Input.size() - BoundLifetimes) {
71173471bf0Spatrick     Error = true;
71273471bf0Spatrick     return;
71373471bf0Spatrick   }
71473471bf0Spatrick 
71573471bf0Spatrick   print("for<");
71673471bf0Spatrick   for (size_t I = 0; I != Binder; ++I) {
71773471bf0Spatrick     BoundLifetimes += 1;
71873471bf0Spatrick     if (I > 0)
71973471bf0Spatrick       print(", ");
72073471bf0Spatrick     printLifetime(1);
72173471bf0Spatrick   }
72273471bf0Spatrick   print("> ");
72373471bf0Spatrick }
72473471bf0Spatrick 
72573471bf0Spatrick // <const> = <basic-type> <const-data>
72673471bf0Spatrick //         | "p"                          // placeholder
72773471bf0Spatrick //         | <backref>
demangleConst()72873471bf0Spatrick void Demangler::demangleConst() {
72973471bf0Spatrick   if (Error || RecursionLevel >= MaxRecursionLevel) {
73073471bf0Spatrick     Error = true;
73173471bf0Spatrick     return;
73273471bf0Spatrick   }
733*d415bd75Srobert   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
73473471bf0Spatrick 
73573471bf0Spatrick   char C = consume();
73673471bf0Spatrick   BasicType Type;
73773471bf0Spatrick   if (parseBasicType(C, Type)) {
73873471bf0Spatrick     switch (Type) {
73973471bf0Spatrick     case BasicType::I8:
74073471bf0Spatrick     case BasicType::I16:
74173471bf0Spatrick     case BasicType::I32:
74273471bf0Spatrick     case BasicType::I64:
74373471bf0Spatrick     case BasicType::I128:
74473471bf0Spatrick     case BasicType::ISize:
74573471bf0Spatrick     case BasicType::U8:
74673471bf0Spatrick     case BasicType::U16:
74773471bf0Spatrick     case BasicType::U32:
74873471bf0Spatrick     case BasicType::U64:
74973471bf0Spatrick     case BasicType::U128:
75073471bf0Spatrick     case BasicType::USize:
75173471bf0Spatrick       demangleConstInt();
75273471bf0Spatrick       break;
75373471bf0Spatrick     case BasicType::Bool:
75473471bf0Spatrick       demangleConstBool();
75573471bf0Spatrick       break;
75673471bf0Spatrick     case BasicType::Char:
75773471bf0Spatrick       demangleConstChar();
75873471bf0Spatrick       break;
75973471bf0Spatrick     case BasicType::Placeholder:
76073471bf0Spatrick       print('_');
76173471bf0Spatrick       break;
76273471bf0Spatrick     default:
76373471bf0Spatrick       Error = true;
76473471bf0Spatrick       break;
76573471bf0Spatrick     }
76673471bf0Spatrick   } else if (C == 'B') {
76773471bf0Spatrick     demangleBackref([&] { demangleConst(); });
76873471bf0Spatrick   } else {
76973471bf0Spatrick     Error = true;
77073471bf0Spatrick   }
77173471bf0Spatrick }
77273471bf0Spatrick 
77373471bf0Spatrick // <const-data> = ["n"] <hex-number>
demangleConstInt()77473471bf0Spatrick void Demangler::demangleConstInt() {
77573471bf0Spatrick   if (consumeIf('n'))
77673471bf0Spatrick     print('-');
77773471bf0Spatrick 
77873471bf0Spatrick   StringView HexDigits;
77973471bf0Spatrick   uint64_t Value = parseHexNumber(HexDigits);
78073471bf0Spatrick   if (HexDigits.size() <= 16) {
78173471bf0Spatrick     printDecimalNumber(Value);
78273471bf0Spatrick   } else {
78373471bf0Spatrick     print("0x");
78473471bf0Spatrick     print(HexDigits);
78573471bf0Spatrick   }
78673471bf0Spatrick }
78773471bf0Spatrick 
78873471bf0Spatrick // <const-data> = "0_" // false
78973471bf0Spatrick //              | "1_" // true
demangleConstBool()79073471bf0Spatrick void Demangler::demangleConstBool() {
79173471bf0Spatrick   StringView HexDigits;
79273471bf0Spatrick   parseHexNumber(HexDigits);
79373471bf0Spatrick   if (HexDigits == "0")
79473471bf0Spatrick     print("false");
79573471bf0Spatrick   else if (HexDigits == "1")
79673471bf0Spatrick     print("true");
79773471bf0Spatrick   else
79873471bf0Spatrick     Error = true;
79973471bf0Spatrick }
80073471bf0Spatrick 
80173471bf0Spatrick /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)80273471bf0Spatrick static bool isAsciiPrintable(uint64_t CodePoint) {
80373471bf0Spatrick   return 0x20 <= CodePoint && CodePoint <= 0x7e;
80473471bf0Spatrick }
80573471bf0Spatrick 
80673471bf0Spatrick // <const-data> = <hex-number>
demangleConstChar()80773471bf0Spatrick void Demangler::demangleConstChar() {
80873471bf0Spatrick   StringView HexDigits;
80973471bf0Spatrick   uint64_t CodePoint = parseHexNumber(HexDigits);
81073471bf0Spatrick   if (Error || HexDigits.size() > 6) {
81173471bf0Spatrick     Error = true;
81273471bf0Spatrick     return;
81373471bf0Spatrick   }
81473471bf0Spatrick 
81573471bf0Spatrick   print("'");
81673471bf0Spatrick   switch (CodePoint) {
81773471bf0Spatrick   case '\t':
81873471bf0Spatrick     print(R"(\t)");
81973471bf0Spatrick     break;
82073471bf0Spatrick   case '\r':
82173471bf0Spatrick     print(R"(\r)");
82273471bf0Spatrick     break;
82373471bf0Spatrick   case '\n':
82473471bf0Spatrick     print(R"(\n)");
82573471bf0Spatrick     break;
82673471bf0Spatrick   case '\\':
82773471bf0Spatrick     print(R"(\\)");
82873471bf0Spatrick     break;
82973471bf0Spatrick   case '"':
83073471bf0Spatrick     print(R"(")");
83173471bf0Spatrick     break;
83273471bf0Spatrick   case '\'':
83373471bf0Spatrick     print(R"(\')");
83473471bf0Spatrick     break;
83573471bf0Spatrick   default:
83673471bf0Spatrick     if (isAsciiPrintable(CodePoint)) {
83773471bf0Spatrick       char C = CodePoint;
83873471bf0Spatrick       print(C);
83973471bf0Spatrick     } else {
84073471bf0Spatrick       print(R"(\u{)");
84173471bf0Spatrick       print(HexDigits);
84273471bf0Spatrick       print('}');
84373471bf0Spatrick     }
84473471bf0Spatrick     break;
84573471bf0Spatrick   }
84673471bf0Spatrick   print('\'');
84773471bf0Spatrick }
84873471bf0Spatrick 
84973471bf0Spatrick // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()85073471bf0Spatrick Identifier Demangler::parseIdentifier() {
85173471bf0Spatrick   bool Punycode = consumeIf('u');
85273471bf0Spatrick   uint64_t Bytes = parseDecimalNumber();
85373471bf0Spatrick 
85473471bf0Spatrick   // Underscore resolves the ambiguity when identifier starts with a decimal
85573471bf0Spatrick   // digit or another underscore.
85673471bf0Spatrick   consumeIf('_');
85773471bf0Spatrick 
85873471bf0Spatrick   if (Error || Bytes > Input.size() - Position) {
85973471bf0Spatrick     Error = true;
86073471bf0Spatrick     return {};
86173471bf0Spatrick   }
86273471bf0Spatrick   StringView S = Input.substr(Position, Bytes);
86373471bf0Spatrick   Position += Bytes;
86473471bf0Spatrick 
86573471bf0Spatrick   if (!std::all_of(S.begin(), S.end(), isValid)) {
86673471bf0Spatrick     Error = true;
86773471bf0Spatrick     return {};
86873471bf0Spatrick   }
86973471bf0Spatrick 
87073471bf0Spatrick   return {S, Punycode};
87173471bf0Spatrick }
87273471bf0Spatrick 
87373471bf0Spatrick // Parses optional base 62 number. The presence of a number is determined using
87473471bf0Spatrick // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
87573471bf0Spatrick //
87673471bf0Spatrick // This function is indended for parsing disambiguators and binders which when
87773471bf0Spatrick // not present have their value interpreted as 0, and otherwise as decoded
87873471bf0Spatrick // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
87973471bf0Spatrick // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)88073471bf0Spatrick uint64_t Demangler::parseOptionalBase62Number(char Tag) {
88173471bf0Spatrick   if (!consumeIf(Tag))
88273471bf0Spatrick     return 0;
88373471bf0Spatrick 
88473471bf0Spatrick   uint64_t N = parseBase62Number();
88573471bf0Spatrick   if (Error || !addAssign(N, 1))
88673471bf0Spatrick     return 0;
88773471bf0Spatrick 
88873471bf0Spatrick   return N;
88973471bf0Spatrick }
89073471bf0Spatrick 
89173471bf0Spatrick // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
89273471bf0Spatrick // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
89373471bf0Spatrick // "1_" encodes 2, etc.
89473471bf0Spatrick //
89573471bf0Spatrick // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()89673471bf0Spatrick uint64_t Demangler::parseBase62Number() {
89773471bf0Spatrick   if (consumeIf('_'))
89873471bf0Spatrick     return 0;
89973471bf0Spatrick 
90073471bf0Spatrick   uint64_t Value = 0;
90173471bf0Spatrick 
90273471bf0Spatrick   while (true) {
90373471bf0Spatrick     uint64_t Digit;
90473471bf0Spatrick     char C = consume();
90573471bf0Spatrick 
90673471bf0Spatrick     if (C == '_') {
90773471bf0Spatrick       break;
90873471bf0Spatrick     } else if (isDigit(C)) {
90973471bf0Spatrick       Digit = C - '0';
91073471bf0Spatrick     } else if (isLower(C)) {
91173471bf0Spatrick       Digit = 10 + (C - 'a');
91273471bf0Spatrick     } else if (isUpper(C)) {
91373471bf0Spatrick       Digit = 10 + 26 + (C - 'A');
91473471bf0Spatrick     } else {
91573471bf0Spatrick       Error = true;
91673471bf0Spatrick       return 0;
91773471bf0Spatrick     }
91873471bf0Spatrick 
91973471bf0Spatrick     if (!mulAssign(Value, 62))
92073471bf0Spatrick       return 0;
92173471bf0Spatrick 
92273471bf0Spatrick     if (!addAssign(Value, Digit))
92373471bf0Spatrick       return 0;
92473471bf0Spatrick   }
92573471bf0Spatrick 
92673471bf0Spatrick   if (!addAssign(Value, 1))
92773471bf0Spatrick     return 0;
92873471bf0Spatrick 
92973471bf0Spatrick   return Value;
93073471bf0Spatrick }
93173471bf0Spatrick 
93273471bf0Spatrick // Parses a decimal number that had been encoded without any leading zeros.
93373471bf0Spatrick //
93473471bf0Spatrick // <decimal-number> = "0"
93573471bf0Spatrick //                  | <1-9> {<0-9>}
parseDecimalNumber()93673471bf0Spatrick uint64_t Demangler::parseDecimalNumber() {
93773471bf0Spatrick   char C = look();
93873471bf0Spatrick   if (!isDigit(C)) {
93973471bf0Spatrick     Error = true;
94073471bf0Spatrick     return 0;
94173471bf0Spatrick   }
94273471bf0Spatrick 
94373471bf0Spatrick   if (C == '0') {
94473471bf0Spatrick     consume();
94573471bf0Spatrick     return 0;
94673471bf0Spatrick   }
94773471bf0Spatrick 
94873471bf0Spatrick   uint64_t Value = 0;
94973471bf0Spatrick 
95073471bf0Spatrick   while (isDigit(look())) {
95173471bf0Spatrick     if (!mulAssign(Value, 10)) {
95273471bf0Spatrick       Error = true;
95373471bf0Spatrick       return 0;
95473471bf0Spatrick     }
95573471bf0Spatrick 
95673471bf0Spatrick     uint64_t D = consume() - '0';
95773471bf0Spatrick     if (!addAssign(Value, D))
95873471bf0Spatrick       return 0;
95973471bf0Spatrick   }
96073471bf0Spatrick 
96173471bf0Spatrick   return Value;
96273471bf0Spatrick }
96373471bf0Spatrick 
96473471bf0Spatrick // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
96573471bf0Spatrick // value and stores hex digits in HexDigits. The return value is unspecified if
96673471bf0Spatrick // HexDigits.size() > 16.
96773471bf0Spatrick //
96873471bf0Spatrick // <hex-number> = "0_"
96973471bf0Spatrick //              | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)97073471bf0Spatrick uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
97173471bf0Spatrick   size_t Start = Position;
97273471bf0Spatrick   uint64_t Value = 0;
97373471bf0Spatrick 
97473471bf0Spatrick   if (!isHexDigit(look()))
97573471bf0Spatrick     Error = true;
97673471bf0Spatrick 
97773471bf0Spatrick   if (consumeIf('0')) {
97873471bf0Spatrick     if (!consumeIf('_'))
97973471bf0Spatrick       Error = true;
98073471bf0Spatrick   } else {
98173471bf0Spatrick     while (!Error && !consumeIf('_')) {
98273471bf0Spatrick       char C = consume();
98373471bf0Spatrick       Value *= 16;
98473471bf0Spatrick       if (isDigit(C))
98573471bf0Spatrick         Value += C - '0';
98673471bf0Spatrick       else if ('a' <= C && C <= 'f')
98773471bf0Spatrick         Value += 10 + (C - 'a');
98873471bf0Spatrick       else
98973471bf0Spatrick         Error = true;
99073471bf0Spatrick     }
99173471bf0Spatrick   }
99273471bf0Spatrick 
99373471bf0Spatrick   if (Error) {
99473471bf0Spatrick     HexDigits = StringView();
99573471bf0Spatrick     return 0;
99673471bf0Spatrick   }
99773471bf0Spatrick 
99873471bf0Spatrick   size_t End = Position - 1;
99973471bf0Spatrick   assert(Start < End);
100073471bf0Spatrick   HexDigits = Input.substr(Start, End - Start);
100173471bf0Spatrick   return Value;
100273471bf0Spatrick }
100373471bf0Spatrick 
print(char C)100473471bf0Spatrick void Demangler::print(char C) {
100573471bf0Spatrick   if (Error || !Print)
100673471bf0Spatrick     return;
100773471bf0Spatrick 
100873471bf0Spatrick   Output += C;
100973471bf0Spatrick }
101073471bf0Spatrick 
print(StringView S)101173471bf0Spatrick void Demangler::print(StringView S) {
101273471bf0Spatrick   if (Error || !Print)
101373471bf0Spatrick     return;
101473471bf0Spatrick 
101573471bf0Spatrick   Output += S;
101673471bf0Spatrick }
101773471bf0Spatrick 
printDecimalNumber(uint64_t N)101873471bf0Spatrick void Demangler::printDecimalNumber(uint64_t N) {
101973471bf0Spatrick   if (Error || !Print)
102073471bf0Spatrick     return;
102173471bf0Spatrick 
102273471bf0Spatrick   Output << N;
102373471bf0Spatrick }
102473471bf0Spatrick 
102573471bf0Spatrick // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
102673471bf0Spatrick // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
102773471bf0Spatrick // bound by one of the enclosing binders.
printLifetime(uint64_t Index)102873471bf0Spatrick void Demangler::printLifetime(uint64_t Index) {
102973471bf0Spatrick   if (Index == 0) {
103073471bf0Spatrick     print("'_");
103173471bf0Spatrick     return;
103273471bf0Spatrick   }
103373471bf0Spatrick 
103473471bf0Spatrick   if (Index - 1 >= BoundLifetimes) {
103573471bf0Spatrick     Error = true;
103673471bf0Spatrick     return;
103773471bf0Spatrick   }
103873471bf0Spatrick 
103973471bf0Spatrick   uint64_t Depth = BoundLifetimes - Index;
104073471bf0Spatrick   print('\'');
104173471bf0Spatrick   if (Depth < 26) {
104273471bf0Spatrick     char C = 'a' + Depth;
104373471bf0Spatrick     print(C);
104473471bf0Spatrick   } else {
104573471bf0Spatrick     print('z');
104673471bf0Spatrick     printDecimalNumber(Depth - 26 + 1);
104773471bf0Spatrick   }
104873471bf0Spatrick }
104973471bf0Spatrick 
decodePunycodeDigit(char C,size_t & Value)1050*d415bd75Srobert static inline bool decodePunycodeDigit(char C, size_t &Value) {
1051*d415bd75Srobert   if (isLower(C)) {
1052*d415bd75Srobert     Value = C - 'a';
1053*d415bd75Srobert     return true;
1054*d415bd75Srobert   }
1055*d415bd75Srobert 
1056*d415bd75Srobert   if (isDigit(C)) {
1057*d415bd75Srobert     Value = 26 + (C - '0');
1058*d415bd75Srobert     return true;
1059*d415bd75Srobert   }
1060*d415bd75Srobert 
1061*d415bd75Srobert   return false;
1062*d415bd75Srobert }
1063*d415bd75Srobert 
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1064*d415bd75Srobert static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1065*d415bd75Srobert   char *Buffer = Output.getBuffer();
1066*d415bd75Srobert   char *Start = Buffer + StartIdx;
1067*d415bd75Srobert   char *End = Buffer + Output.getCurrentPosition();
1068*d415bd75Srobert   Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1069*d415bd75Srobert }
1070*d415bd75Srobert 
1071*d415bd75Srobert // Encodes code point as UTF-8 and stores results in Output. Returns false if
1072*d415bd75Srobert // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1073*d415bd75Srobert static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1074*d415bd75Srobert   if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1075*d415bd75Srobert     return false;
1076*d415bd75Srobert 
1077*d415bd75Srobert   if (CodePoint <= 0x7F) {
1078*d415bd75Srobert     Output[0] = CodePoint;
1079*d415bd75Srobert     return true;
1080*d415bd75Srobert   }
1081*d415bd75Srobert 
1082*d415bd75Srobert   if (CodePoint <= 0x7FF) {
1083*d415bd75Srobert     Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1084*d415bd75Srobert     Output[1] = 0x80 | (CodePoint & 0x3F);
1085*d415bd75Srobert     return true;
1086*d415bd75Srobert   }
1087*d415bd75Srobert 
1088*d415bd75Srobert   if (CodePoint <= 0xFFFF) {
1089*d415bd75Srobert     Output[0] = 0xE0 | (CodePoint >> 12);
1090*d415bd75Srobert     Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1091*d415bd75Srobert     Output[2] = 0x80 | (CodePoint & 0x3F);
1092*d415bd75Srobert     return true;
1093*d415bd75Srobert   }
1094*d415bd75Srobert 
1095*d415bd75Srobert   if (CodePoint <= 0x10FFFF) {
1096*d415bd75Srobert     Output[0] = 0xF0 | (CodePoint >> 18);
1097*d415bd75Srobert     Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1098*d415bd75Srobert     Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1099*d415bd75Srobert     Output[3] = 0x80 | (CodePoint & 0x3F);
1100*d415bd75Srobert     return true;
1101*d415bd75Srobert   }
1102*d415bd75Srobert 
1103*d415bd75Srobert   return false;
1104*d415bd75Srobert }
1105*d415bd75Srobert 
1106*d415bd75Srobert // Decodes string encoded using punycode and appends results to Output.
1107*d415bd75Srobert // Returns true if decoding was successful.
decodePunycode(StringView Input,OutputBuffer & Output)1108*d415bd75Srobert static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1109*d415bd75Srobert   size_t OutputSize = Output.getCurrentPosition();
1110*d415bd75Srobert   size_t InputIdx = 0;
1111*d415bd75Srobert 
1112*d415bd75Srobert   // Rust uses an underscore as a delimiter.
1113*d415bd75Srobert   size_t DelimiterPos = StringView::npos;
1114*d415bd75Srobert   for (size_t I = 0; I != Input.size(); ++I)
1115*d415bd75Srobert     if (Input[I] == '_')
1116*d415bd75Srobert       DelimiterPos = I;
1117*d415bd75Srobert 
1118*d415bd75Srobert   if (DelimiterPos != StringView::npos) {
1119*d415bd75Srobert     // Copy basic code points before the last delimiter to the output.
1120*d415bd75Srobert     for (; InputIdx != DelimiterPos; ++InputIdx) {
1121*d415bd75Srobert       char C = Input[InputIdx];
1122*d415bd75Srobert       if (!isValid(C))
1123*d415bd75Srobert         return false;
1124*d415bd75Srobert       // Code points are padded with zeros while decoding is in progress.
1125*d415bd75Srobert       char UTF8[4] = {C};
1126*d415bd75Srobert       Output += StringView(UTF8, UTF8 + 4);
1127*d415bd75Srobert     }
1128*d415bd75Srobert     // Skip over the delimiter.
1129*d415bd75Srobert     ++InputIdx;
1130*d415bd75Srobert   }
1131*d415bd75Srobert 
1132*d415bd75Srobert   size_t Base = 36;
1133*d415bd75Srobert   size_t Skew = 38;
1134*d415bd75Srobert   size_t Bias = 72;
1135*d415bd75Srobert   size_t N = 0x80;
1136*d415bd75Srobert   size_t TMin = 1;
1137*d415bd75Srobert   size_t TMax = 26;
1138*d415bd75Srobert   size_t Damp = 700;
1139*d415bd75Srobert 
1140*d415bd75Srobert   auto Adapt = [&](size_t Delta, size_t NumPoints) {
1141*d415bd75Srobert     Delta /= Damp;
1142*d415bd75Srobert     Delta += Delta / NumPoints;
1143*d415bd75Srobert     Damp = 2;
1144*d415bd75Srobert 
1145*d415bd75Srobert     size_t K = 0;
1146*d415bd75Srobert     while (Delta > (Base - TMin) * TMax / 2) {
1147*d415bd75Srobert       Delta /= Base - TMin;
1148*d415bd75Srobert       K += Base;
1149*d415bd75Srobert     }
1150*d415bd75Srobert     return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1151*d415bd75Srobert   };
1152*d415bd75Srobert 
1153*d415bd75Srobert   // Main decoding loop.
1154*d415bd75Srobert   for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1155*d415bd75Srobert     size_t OldI = I;
1156*d415bd75Srobert     size_t W = 1;
1157*d415bd75Srobert     size_t Max = std::numeric_limits<size_t>::max();
1158*d415bd75Srobert     for (size_t K = Base; true; K += Base) {
1159*d415bd75Srobert       if (InputIdx == Input.size())
1160*d415bd75Srobert         return false;
1161*d415bd75Srobert       char C = Input[InputIdx++];
1162*d415bd75Srobert       size_t Digit = 0;
1163*d415bd75Srobert       if (!decodePunycodeDigit(C, Digit))
1164*d415bd75Srobert         return false;
1165*d415bd75Srobert 
1166*d415bd75Srobert       if (Digit > (Max - I) / W)
1167*d415bd75Srobert         return false;
1168*d415bd75Srobert       I += Digit * W;
1169*d415bd75Srobert 
1170*d415bd75Srobert       size_t T;
1171*d415bd75Srobert       if (K <= Bias)
1172*d415bd75Srobert         T = TMin;
1173*d415bd75Srobert       else if (K >= Bias + TMax)
1174*d415bd75Srobert         T = TMax;
1175*d415bd75Srobert       else
1176*d415bd75Srobert         T = K - Bias;
1177*d415bd75Srobert 
1178*d415bd75Srobert       if (Digit < T)
1179*d415bd75Srobert         break;
1180*d415bd75Srobert 
1181*d415bd75Srobert       if (W > Max / (Base - T))
1182*d415bd75Srobert         return false;
1183*d415bd75Srobert       W *= (Base - T);
1184*d415bd75Srobert     }
1185*d415bd75Srobert     size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1186*d415bd75Srobert     Bias = Adapt(I - OldI, NumPoints);
1187*d415bd75Srobert 
1188*d415bd75Srobert     if (I / NumPoints > Max - N)
1189*d415bd75Srobert       return false;
1190*d415bd75Srobert     N += I / NumPoints;
1191*d415bd75Srobert     I = I % NumPoints;
1192*d415bd75Srobert 
1193*d415bd75Srobert     // Insert N at position I in the output.
1194*d415bd75Srobert     char UTF8[4] = {};
1195*d415bd75Srobert     if (!encodeUTF8(N, UTF8))
1196*d415bd75Srobert       return false;
1197*d415bd75Srobert     Output.insert(OutputSize + I * 4, UTF8, 4);
1198*d415bd75Srobert   }
1199*d415bd75Srobert 
1200*d415bd75Srobert   removeNullBytes(Output, OutputSize);
1201*d415bd75Srobert   return true;
1202*d415bd75Srobert }
1203*d415bd75Srobert 
printIdentifier(Identifier Ident)1204*d415bd75Srobert void Demangler::printIdentifier(Identifier Ident) {
1205*d415bd75Srobert   if (Error || !Print)
1206*d415bd75Srobert     return;
1207*d415bd75Srobert 
1208*d415bd75Srobert   if (Ident.Punycode) {
1209*d415bd75Srobert     if (!decodePunycode(Ident.Name, Output))
1210*d415bd75Srobert       Error = true;
1211*d415bd75Srobert   } else {
1212*d415bd75Srobert     print(Ident.Name);
1213*d415bd75Srobert   }
1214*d415bd75Srobert }
1215*d415bd75Srobert 
look() const121673471bf0Spatrick char Demangler::look() const {
121773471bf0Spatrick   if (Error || Position >= Input.size())
121873471bf0Spatrick     return 0;
121973471bf0Spatrick 
122073471bf0Spatrick   return Input[Position];
122173471bf0Spatrick }
122273471bf0Spatrick 
consume()122373471bf0Spatrick char Demangler::consume() {
122473471bf0Spatrick   if (Error || Position >= Input.size()) {
122573471bf0Spatrick     Error = true;
122673471bf0Spatrick     return 0;
122773471bf0Spatrick   }
122873471bf0Spatrick 
122973471bf0Spatrick   return Input[Position++];
123073471bf0Spatrick }
123173471bf0Spatrick 
consumeIf(char Prefix)123273471bf0Spatrick bool Demangler::consumeIf(char Prefix) {
123373471bf0Spatrick   if (Error || Position >= Input.size() || Input[Position] != Prefix)
123473471bf0Spatrick     return false;
123573471bf0Spatrick 
123673471bf0Spatrick   Position += 1;
123773471bf0Spatrick   return true;
123873471bf0Spatrick }
123973471bf0Spatrick 
124073471bf0Spatrick /// Computes A + B. When computation wraps around sets the error and returns
124173471bf0Spatrick /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)124273471bf0Spatrick bool Demangler::addAssign(uint64_t &A, uint64_t B) {
124373471bf0Spatrick   if (A > std::numeric_limits<uint64_t>::max() - B) {
124473471bf0Spatrick     Error = true;
124573471bf0Spatrick     return false;
124673471bf0Spatrick   }
124773471bf0Spatrick 
124873471bf0Spatrick   A += B;
124973471bf0Spatrick   return true;
125073471bf0Spatrick }
125173471bf0Spatrick 
125273471bf0Spatrick /// Computes A * B. When computation wraps around sets the error and returns
125373471bf0Spatrick /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)125473471bf0Spatrick bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
125573471bf0Spatrick   if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
125673471bf0Spatrick     Error = true;
125773471bf0Spatrick     return false;
125873471bf0Spatrick   }
125973471bf0Spatrick 
126073471bf0Spatrick   A *= B;
126173471bf0Spatrick   return true;
126273471bf0Spatrick }
1263