173471bf0Spatrick //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
273471bf0Spatrick //
373471bf0Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
473471bf0Spatrick // See https://llvm.org/LICENSE.txt for license information.
573471bf0Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
673471bf0Spatrick //
773471bf0Spatrick //===----------------------------------------------------------------------===//
873471bf0Spatrick //
973471bf0Spatrick // This file defines a demangler for Rust v0 mangled symbols as specified in
1073471bf0Spatrick // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
1173471bf0Spatrick //
1273471bf0Spatrick //===----------------------------------------------------------------------===//
1373471bf0Spatrick
1473471bf0Spatrick #include "llvm/Demangle/Demangle.h"
1573471bf0Spatrick #include "llvm/Demangle/StringView.h"
1673471bf0Spatrick #include "llvm/Demangle/Utility.h"
1773471bf0Spatrick
1873471bf0Spatrick #include <algorithm>
1973471bf0Spatrick #include <cassert>
2073471bf0Spatrick #include <cstdint>
2173471bf0Spatrick #include <cstring>
2273471bf0Spatrick #include <limits>
2373471bf0Spatrick
2473471bf0Spatrick using namespace llvm;
2573471bf0Spatrick
26*d415bd75Srobert using llvm::itanium_demangle::OutputBuffer;
27*d415bd75Srobert using llvm::itanium_demangle::ScopedOverride;
2873471bf0Spatrick using llvm::itanium_demangle::StringView;
2973471bf0Spatrick
3073471bf0Spatrick namespace {
3173471bf0Spatrick
3273471bf0Spatrick struct Identifier {
3373471bf0Spatrick StringView Name;
3473471bf0Spatrick bool Punycode;
3573471bf0Spatrick
empty__anond672b4d40111::Identifier3673471bf0Spatrick bool empty() const { return Name.empty(); }
3773471bf0Spatrick };
3873471bf0Spatrick
3973471bf0Spatrick enum class BasicType {
4073471bf0Spatrick Bool,
4173471bf0Spatrick Char,
4273471bf0Spatrick I8,
4373471bf0Spatrick I16,
4473471bf0Spatrick I32,
4573471bf0Spatrick I64,
4673471bf0Spatrick I128,
4773471bf0Spatrick ISize,
4873471bf0Spatrick U8,
4973471bf0Spatrick U16,
5073471bf0Spatrick U32,
5173471bf0Spatrick U64,
5273471bf0Spatrick U128,
5373471bf0Spatrick USize,
5473471bf0Spatrick F32,
5573471bf0Spatrick F64,
5673471bf0Spatrick Str,
5773471bf0Spatrick Placeholder,
5873471bf0Spatrick Unit,
5973471bf0Spatrick Variadic,
6073471bf0Spatrick Never,
6173471bf0Spatrick };
6273471bf0Spatrick
6373471bf0Spatrick enum class IsInType {
6473471bf0Spatrick No,
6573471bf0Spatrick Yes,
6673471bf0Spatrick };
6773471bf0Spatrick
6873471bf0Spatrick enum class LeaveGenericsOpen {
6973471bf0Spatrick No,
7073471bf0Spatrick Yes,
7173471bf0Spatrick };
7273471bf0Spatrick
7373471bf0Spatrick class Demangler {
7473471bf0Spatrick // Maximum recursion level. Used to avoid stack overflow.
7573471bf0Spatrick size_t MaxRecursionLevel;
7673471bf0Spatrick // Current recursion level.
7773471bf0Spatrick size_t RecursionLevel;
7873471bf0Spatrick size_t BoundLifetimes;
7973471bf0Spatrick // Input string that is being demangled with "_R" prefix removed.
8073471bf0Spatrick StringView Input;
8173471bf0Spatrick // Position in the input string.
8273471bf0Spatrick size_t Position;
8373471bf0Spatrick // When true, print methods append the output to the stream.
8473471bf0Spatrick // When false, the output is suppressed.
8573471bf0Spatrick bool Print;
8673471bf0Spatrick // True if an error occurred.
8773471bf0Spatrick bool Error;
8873471bf0Spatrick
8973471bf0Spatrick public:
9073471bf0Spatrick // Demangled output.
91*d415bd75Srobert OutputBuffer Output;
9273471bf0Spatrick
9373471bf0Spatrick Demangler(size_t MaxRecursionLevel = 500);
9473471bf0Spatrick
9573471bf0Spatrick bool demangle(StringView MangledName);
9673471bf0Spatrick
9773471bf0Spatrick private:
9873471bf0Spatrick bool demanglePath(IsInType Type,
9973471bf0Spatrick LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
10073471bf0Spatrick void demangleImplPath(IsInType InType);
10173471bf0Spatrick void demangleGenericArg();
10273471bf0Spatrick void demangleType();
10373471bf0Spatrick void demangleFnSig();
10473471bf0Spatrick void demangleDynBounds();
10573471bf0Spatrick void demangleDynTrait();
10673471bf0Spatrick void demangleOptionalBinder();
10773471bf0Spatrick void demangleConst();
10873471bf0Spatrick void demangleConstInt();
10973471bf0Spatrick void demangleConstBool();
11073471bf0Spatrick void demangleConstChar();
11173471bf0Spatrick
demangleBackref(Callable Demangler)11273471bf0Spatrick template <typename Callable> void demangleBackref(Callable Demangler) {
11373471bf0Spatrick uint64_t Backref = parseBase62Number();
11473471bf0Spatrick if (Error || Backref >= Position) {
11573471bf0Spatrick Error = true;
11673471bf0Spatrick return;
11773471bf0Spatrick }
11873471bf0Spatrick
11973471bf0Spatrick if (!Print)
12073471bf0Spatrick return;
12173471bf0Spatrick
122*d415bd75Srobert ScopedOverride<size_t> SavePosition(Position, Position);
12373471bf0Spatrick Position = Backref;
12473471bf0Spatrick Demangler();
12573471bf0Spatrick }
12673471bf0Spatrick
12773471bf0Spatrick Identifier parseIdentifier();
12873471bf0Spatrick uint64_t parseOptionalBase62Number(char Tag);
12973471bf0Spatrick uint64_t parseBase62Number();
13073471bf0Spatrick uint64_t parseDecimalNumber();
13173471bf0Spatrick uint64_t parseHexNumber(StringView &HexDigits);
13273471bf0Spatrick
13373471bf0Spatrick void print(char C);
13473471bf0Spatrick void print(StringView S);
13573471bf0Spatrick void printDecimalNumber(uint64_t N);
13673471bf0Spatrick void printBasicType(BasicType);
13773471bf0Spatrick void printLifetime(uint64_t Index);
138*d415bd75Srobert void printIdentifier(Identifier Ident);
13973471bf0Spatrick
14073471bf0Spatrick char look() const;
14173471bf0Spatrick char consume();
14273471bf0Spatrick bool consumeIf(char Prefix);
14373471bf0Spatrick
14473471bf0Spatrick bool addAssign(uint64_t &A, uint64_t B);
14573471bf0Spatrick bool mulAssign(uint64_t &A, uint64_t B);
14673471bf0Spatrick };
14773471bf0Spatrick
14873471bf0Spatrick } // namespace
14973471bf0Spatrick
rustDemangle(const char * MangledName)150*d415bd75Srobert char *llvm::rustDemangle(const char *MangledName) {
151*d415bd75Srobert if (MangledName == nullptr)
15273471bf0Spatrick return nullptr;
15373471bf0Spatrick
15473471bf0Spatrick // Return early if mangled name doesn't look like a Rust symbol.
15573471bf0Spatrick StringView Mangled(MangledName);
156*d415bd75Srobert if (!Mangled.startsWith("_R"))
15773471bf0Spatrick return nullptr;
15873471bf0Spatrick
15973471bf0Spatrick Demangler D;
16073471bf0Spatrick if (!D.demangle(Mangled)) {
16173471bf0Spatrick std::free(D.Output.getBuffer());
16273471bf0Spatrick return nullptr;
16373471bf0Spatrick }
16473471bf0Spatrick
16573471bf0Spatrick D.Output += '\0';
16673471bf0Spatrick
167*d415bd75Srobert return D.Output.getBuffer();
16873471bf0Spatrick }
16973471bf0Spatrick
Demangler(size_t MaxRecursionLevel)17073471bf0Spatrick Demangler::Demangler(size_t MaxRecursionLevel)
17173471bf0Spatrick : MaxRecursionLevel(MaxRecursionLevel) {}
17273471bf0Spatrick
isDigit(const char C)17373471bf0Spatrick static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
17473471bf0Spatrick
isHexDigit(const char C)17573471bf0Spatrick static inline bool isHexDigit(const char C) {
17673471bf0Spatrick return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
17773471bf0Spatrick }
17873471bf0Spatrick
isLower(const char C)17973471bf0Spatrick static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
18073471bf0Spatrick
isUpper(const char C)18173471bf0Spatrick static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
18273471bf0Spatrick
18373471bf0Spatrick /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)18473471bf0Spatrick static inline bool isValid(const char C) {
18573471bf0Spatrick return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
18673471bf0Spatrick }
18773471bf0Spatrick
18873471bf0Spatrick // Demangles Rust v0 mangled symbol. Returns true when successful, and false
18973471bf0Spatrick // otherwise. The demangled symbol is stored in Output field. It is
19073471bf0Spatrick // responsibility of the caller to free the memory behind the output stream.
19173471bf0Spatrick //
19273471bf0Spatrick // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)19373471bf0Spatrick bool Demangler::demangle(StringView Mangled) {
19473471bf0Spatrick Position = 0;
19573471bf0Spatrick Error = false;
19673471bf0Spatrick Print = true;
19773471bf0Spatrick RecursionLevel = 0;
19873471bf0Spatrick BoundLifetimes = 0;
19973471bf0Spatrick
20073471bf0Spatrick if (!Mangled.consumeFront("_R")) {
20173471bf0Spatrick Error = true;
20273471bf0Spatrick return false;
20373471bf0Spatrick }
20473471bf0Spatrick size_t Dot = Mangled.find('.');
20573471bf0Spatrick Input = Mangled.substr(0, Dot);
20673471bf0Spatrick StringView Suffix = Mangled.dropFront(Dot);
20773471bf0Spatrick
20873471bf0Spatrick demanglePath(IsInType::No);
20973471bf0Spatrick
21073471bf0Spatrick if (Position != Input.size()) {
211*d415bd75Srobert ScopedOverride<bool> SavePrint(Print, false);
21273471bf0Spatrick demanglePath(IsInType::No);
21373471bf0Spatrick }
21473471bf0Spatrick
21573471bf0Spatrick if (Position != Input.size())
21673471bf0Spatrick Error = true;
21773471bf0Spatrick
21873471bf0Spatrick if (!Suffix.empty()) {
21973471bf0Spatrick print(" (");
22073471bf0Spatrick print(Suffix);
22173471bf0Spatrick print(")");
22273471bf0Spatrick }
22373471bf0Spatrick
22473471bf0Spatrick return !Error;
22573471bf0Spatrick }
22673471bf0Spatrick
22773471bf0Spatrick // Demangles a path. InType indicates whether a path is inside a type. When
22873471bf0Spatrick // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
22973471bf0Spatrick // output. Return value indicates whether generics arguments have been left
23073471bf0Spatrick // open.
23173471bf0Spatrick //
23273471bf0Spatrick // <path> = "C" <identifier> // crate root
23373471bf0Spatrick // | "M" <impl-path> <type> // <T> (inherent impl)
23473471bf0Spatrick // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
23573471bf0Spatrick // | "Y" <type> <path> // <T as Trait> (trait definition)
23673471bf0Spatrick // | "N" <ns> <path> <identifier> // ...::ident (nested path)
23773471bf0Spatrick // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
23873471bf0Spatrick // | <backref>
23973471bf0Spatrick // <identifier> = [<disambiguator>] <undisambiguated-identifier>
24073471bf0Spatrick // <ns> = "C" // closure
24173471bf0Spatrick // | "S" // shim
24273471bf0Spatrick // | <A-Z> // other special namespaces
24373471bf0Spatrick // | <a-z> // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)24473471bf0Spatrick bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
24573471bf0Spatrick if (Error || RecursionLevel >= MaxRecursionLevel) {
24673471bf0Spatrick Error = true;
24773471bf0Spatrick return false;
24873471bf0Spatrick }
249*d415bd75Srobert ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
25073471bf0Spatrick
25173471bf0Spatrick switch (consume()) {
25273471bf0Spatrick case 'C': {
25373471bf0Spatrick parseOptionalBase62Number('s');
254*d415bd75Srobert printIdentifier(parseIdentifier());
25573471bf0Spatrick break;
25673471bf0Spatrick }
25773471bf0Spatrick case 'M': {
25873471bf0Spatrick demangleImplPath(InType);
25973471bf0Spatrick print("<");
26073471bf0Spatrick demangleType();
26173471bf0Spatrick print(">");
26273471bf0Spatrick break;
26373471bf0Spatrick }
26473471bf0Spatrick case 'X': {
26573471bf0Spatrick demangleImplPath(InType);
26673471bf0Spatrick print("<");
26773471bf0Spatrick demangleType();
26873471bf0Spatrick print(" as ");
26973471bf0Spatrick demanglePath(IsInType::Yes);
27073471bf0Spatrick print(">");
27173471bf0Spatrick break;
27273471bf0Spatrick }
27373471bf0Spatrick case 'Y': {
27473471bf0Spatrick print("<");
27573471bf0Spatrick demangleType();
27673471bf0Spatrick print(" as ");
27773471bf0Spatrick demanglePath(IsInType::Yes);
27873471bf0Spatrick print(">");
27973471bf0Spatrick break;
28073471bf0Spatrick }
28173471bf0Spatrick case 'N': {
28273471bf0Spatrick char NS = consume();
28373471bf0Spatrick if (!isLower(NS) && !isUpper(NS)) {
28473471bf0Spatrick Error = true;
28573471bf0Spatrick break;
28673471bf0Spatrick }
28773471bf0Spatrick demanglePath(InType);
28873471bf0Spatrick
28973471bf0Spatrick uint64_t Disambiguator = parseOptionalBase62Number('s');
29073471bf0Spatrick Identifier Ident = parseIdentifier();
29173471bf0Spatrick
29273471bf0Spatrick if (isUpper(NS)) {
29373471bf0Spatrick // Special namespaces
29473471bf0Spatrick print("::{");
29573471bf0Spatrick if (NS == 'C')
29673471bf0Spatrick print("closure");
29773471bf0Spatrick else if (NS == 'S')
29873471bf0Spatrick print("shim");
29973471bf0Spatrick else
30073471bf0Spatrick print(NS);
30173471bf0Spatrick if (!Ident.empty()) {
30273471bf0Spatrick print(":");
303*d415bd75Srobert printIdentifier(Ident);
30473471bf0Spatrick }
30573471bf0Spatrick print('#');
30673471bf0Spatrick printDecimalNumber(Disambiguator);
30773471bf0Spatrick print('}');
30873471bf0Spatrick } else {
30973471bf0Spatrick // Implementation internal namespaces.
31073471bf0Spatrick if (!Ident.empty()) {
31173471bf0Spatrick print("::");
312*d415bd75Srobert printIdentifier(Ident);
31373471bf0Spatrick }
31473471bf0Spatrick }
31573471bf0Spatrick break;
31673471bf0Spatrick }
31773471bf0Spatrick case 'I': {
31873471bf0Spatrick demanglePath(InType);
31973471bf0Spatrick // Omit "::" when in a type, where it is optional.
32073471bf0Spatrick if (InType == IsInType::No)
32173471bf0Spatrick print("::");
32273471bf0Spatrick print("<");
32373471bf0Spatrick for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
32473471bf0Spatrick if (I > 0)
32573471bf0Spatrick print(", ");
32673471bf0Spatrick demangleGenericArg();
32773471bf0Spatrick }
32873471bf0Spatrick if (LeaveOpen == LeaveGenericsOpen::Yes)
32973471bf0Spatrick return true;
33073471bf0Spatrick else
33173471bf0Spatrick print(">");
33273471bf0Spatrick break;
33373471bf0Spatrick }
33473471bf0Spatrick case 'B': {
33573471bf0Spatrick bool IsOpen = false;
33673471bf0Spatrick demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
33773471bf0Spatrick return IsOpen;
33873471bf0Spatrick }
33973471bf0Spatrick default:
34073471bf0Spatrick Error = true;
34173471bf0Spatrick break;
34273471bf0Spatrick }
34373471bf0Spatrick
34473471bf0Spatrick return false;
34573471bf0Spatrick }
34673471bf0Spatrick
34773471bf0Spatrick // <impl-path> = [<disambiguator>] <path>
34873471bf0Spatrick // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)34973471bf0Spatrick void Demangler::demangleImplPath(IsInType InType) {
350*d415bd75Srobert ScopedOverride<bool> SavePrint(Print, false);
35173471bf0Spatrick parseOptionalBase62Number('s');
35273471bf0Spatrick demanglePath(InType);
35373471bf0Spatrick }
35473471bf0Spatrick
35573471bf0Spatrick // <generic-arg> = <lifetime>
35673471bf0Spatrick // | <type>
35773471bf0Spatrick // | "K" <const>
35873471bf0Spatrick // <lifetime> = "L" <base-62-number>
demangleGenericArg()35973471bf0Spatrick void Demangler::demangleGenericArg() {
36073471bf0Spatrick if (consumeIf('L'))
36173471bf0Spatrick printLifetime(parseBase62Number());
36273471bf0Spatrick else if (consumeIf('K'))
36373471bf0Spatrick demangleConst();
36473471bf0Spatrick else
36573471bf0Spatrick demangleType();
36673471bf0Spatrick }
36773471bf0Spatrick
36873471bf0Spatrick // <basic-type> = "a" // i8
36973471bf0Spatrick // | "b" // bool
37073471bf0Spatrick // | "c" // char
37173471bf0Spatrick // | "d" // f64
37273471bf0Spatrick // | "e" // str
37373471bf0Spatrick // | "f" // f32
37473471bf0Spatrick // | "h" // u8
37573471bf0Spatrick // | "i" // isize
37673471bf0Spatrick // | "j" // usize
37773471bf0Spatrick // | "l" // i32
37873471bf0Spatrick // | "m" // u32
37973471bf0Spatrick // | "n" // i128
38073471bf0Spatrick // | "o" // u128
38173471bf0Spatrick // | "s" // i16
38273471bf0Spatrick // | "t" // u16
38373471bf0Spatrick // | "u" // ()
38473471bf0Spatrick // | "v" // ...
38573471bf0Spatrick // | "x" // i64
38673471bf0Spatrick // | "y" // u64
38773471bf0Spatrick // | "z" // !
38873471bf0Spatrick // | "p" // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)38973471bf0Spatrick static bool parseBasicType(char C, BasicType &Type) {
39073471bf0Spatrick switch (C) {
39173471bf0Spatrick case 'a':
39273471bf0Spatrick Type = BasicType::I8;
39373471bf0Spatrick return true;
39473471bf0Spatrick case 'b':
39573471bf0Spatrick Type = BasicType::Bool;
39673471bf0Spatrick return true;
39773471bf0Spatrick case 'c':
39873471bf0Spatrick Type = BasicType::Char;
39973471bf0Spatrick return true;
40073471bf0Spatrick case 'd':
40173471bf0Spatrick Type = BasicType::F64;
40273471bf0Spatrick return true;
40373471bf0Spatrick case 'e':
40473471bf0Spatrick Type = BasicType::Str;
40573471bf0Spatrick return true;
40673471bf0Spatrick case 'f':
40773471bf0Spatrick Type = BasicType::F32;
40873471bf0Spatrick return true;
40973471bf0Spatrick case 'h':
41073471bf0Spatrick Type = BasicType::U8;
41173471bf0Spatrick return true;
41273471bf0Spatrick case 'i':
41373471bf0Spatrick Type = BasicType::ISize;
41473471bf0Spatrick return true;
41573471bf0Spatrick case 'j':
41673471bf0Spatrick Type = BasicType::USize;
41773471bf0Spatrick return true;
41873471bf0Spatrick case 'l':
41973471bf0Spatrick Type = BasicType::I32;
42073471bf0Spatrick return true;
42173471bf0Spatrick case 'm':
42273471bf0Spatrick Type = BasicType::U32;
42373471bf0Spatrick return true;
42473471bf0Spatrick case 'n':
42573471bf0Spatrick Type = BasicType::I128;
42673471bf0Spatrick return true;
42773471bf0Spatrick case 'o':
42873471bf0Spatrick Type = BasicType::U128;
42973471bf0Spatrick return true;
43073471bf0Spatrick case 'p':
43173471bf0Spatrick Type = BasicType::Placeholder;
43273471bf0Spatrick return true;
43373471bf0Spatrick case 's':
43473471bf0Spatrick Type = BasicType::I16;
43573471bf0Spatrick return true;
43673471bf0Spatrick case 't':
43773471bf0Spatrick Type = BasicType::U16;
43873471bf0Spatrick return true;
43973471bf0Spatrick case 'u':
44073471bf0Spatrick Type = BasicType::Unit;
44173471bf0Spatrick return true;
44273471bf0Spatrick case 'v':
44373471bf0Spatrick Type = BasicType::Variadic;
44473471bf0Spatrick return true;
44573471bf0Spatrick case 'x':
44673471bf0Spatrick Type = BasicType::I64;
44773471bf0Spatrick return true;
44873471bf0Spatrick case 'y':
44973471bf0Spatrick Type = BasicType::U64;
45073471bf0Spatrick return true;
45173471bf0Spatrick case 'z':
45273471bf0Spatrick Type = BasicType::Never;
45373471bf0Spatrick return true;
45473471bf0Spatrick default:
45573471bf0Spatrick return false;
45673471bf0Spatrick }
45773471bf0Spatrick }
45873471bf0Spatrick
printBasicType(BasicType Type)45973471bf0Spatrick void Demangler::printBasicType(BasicType Type) {
46073471bf0Spatrick switch (Type) {
46173471bf0Spatrick case BasicType::Bool:
46273471bf0Spatrick print("bool");
46373471bf0Spatrick break;
46473471bf0Spatrick case BasicType::Char:
46573471bf0Spatrick print("char");
46673471bf0Spatrick break;
46773471bf0Spatrick case BasicType::I8:
46873471bf0Spatrick print("i8");
46973471bf0Spatrick break;
47073471bf0Spatrick case BasicType::I16:
47173471bf0Spatrick print("i16");
47273471bf0Spatrick break;
47373471bf0Spatrick case BasicType::I32:
47473471bf0Spatrick print("i32");
47573471bf0Spatrick break;
47673471bf0Spatrick case BasicType::I64:
47773471bf0Spatrick print("i64");
47873471bf0Spatrick break;
47973471bf0Spatrick case BasicType::I128:
48073471bf0Spatrick print("i128");
48173471bf0Spatrick break;
48273471bf0Spatrick case BasicType::ISize:
48373471bf0Spatrick print("isize");
48473471bf0Spatrick break;
48573471bf0Spatrick case BasicType::U8:
48673471bf0Spatrick print("u8");
48773471bf0Spatrick break;
48873471bf0Spatrick case BasicType::U16:
48973471bf0Spatrick print("u16");
49073471bf0Spatrick break;
49173471bf0Spatrick case BasicType::U32:
49273471bf0Spatrick print("u32");
49373471bf0Spatrick break;
49473471bf0Spatrick case BasicType::U64:
49573471bf0Spatrick print("u64");
49673471bf0Spatrick break;
49773471bf0Spatrick case BasicType::U128:
49873471bf0Spatrick print("u128");
49973471bf0Spatrick break;
50073471bf0Spatrick case BasicType::USize:
50173471bf0Spatrick print("usize");
50273471bf0Spatrick break;
50373471bf0Spatrick case BasicType::F32:
50473471bf0Spatrick print("f32");
50573471bf0Spatrick break;
50673471bf0Spatrick case BasicType::F64:
50773471bf0Spatrick print("f64");
50873471bf0Spatrick break;
50973471bf0Spatrick case BasicType::Str:
51073471bf0Spatrick print("str");
51173471bf0Spatrick break;
51273471bf0Spatrick case BasicType::Placeholder:
51373471bf0Spatrick print("_");
51473471bf0Spatrick break;
51573471bf0Spatrick case BasicType::Unit:
51673471bf0Spatrick print("()");
51773471bf0Spatrick break;
51873471bf0Spatrick case BasicType::Variadic:
51973471bf0Spatrick print("...");
52073471bf0Spatrick break;
52173471bf0Spatrick case BasicType::Never:
52273471bf0Spatrick print("!");
52373471bf0Spatrick break;
52473471bf0Spatrick }
52573471bf0Spatrick }
52673471bf0Spatrick
52773471bf0Spatrick // <type> = | <basic-type>
52873471bf0Spatrick // | <path> // named type
52973471bf0Spatrick // | "A" <type> <const> // [T; N]
53073471bf0Spatrick // | "S" <type> // [T]
53173471bf0Spatrick // | "T" {<type>} "E" // (T1, T2, T3, ...)
53273471bf0Spatrick // | "R" [<lifetime>] <type> // &T
53373471bf0Spatrick // | "Q" [<lifetime>] <type> // &mut T
53473471bf0Spatrick // | "P" <type> // *const T
53573471bf0Spatrick // | "O" <type> // *mut T
53673471bf0Spatrick // | "F" <fn-sig> // fn(...) -> ...
53773471bf0Spatrick // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
53873471bf0Spatrick // | <backref> // backref
demangleType()53973471bf0Spatrick void Demangler::demangleType() {
54073471bf0Spatrick if (Error || RecursionLevel >= MaxRecursionLevel) {
54173471bf0Spatrick Error = true;
54273471bf0Spatrick return;
54373471bf0Spatrick }
544*d415bd75Srobert ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
54573471bf0Spatrick
54673471bf0Spatrick size_t Start = Position;
54773471bf0Spatrick char C = consume();
54873471bf0Spatrick BasicType Type;
54973471bf0Spatrick if (parseBasicType(C, Type))
55073471bf0Spatrick return printBasicType(Type);
55173471bf0Spatrick
55273471bf0Spatrick switch (C) {
55373471bf0Spatrick case 'A':
55473471bf0Spatrick print("[");
55573471bf0Spatrick demangleType();
55673471bf0Spatrick print("; ");
55773471bf0Spatrick demangleConst();
55873471bf0Spatrick print("]");
55973471bf0Spatrick break;
56073471bf0Spatrick case 'S':
56173471bf0Spatrick print("[");
56273471bf0Spatrick demangleType();
56373471bf0Spatrick print("]");
56473471bf0Spatrick break;
56573471bf0Spatrick case 'T': {
56673471bf0Spatrick print("(");
56773471bf0Spatrick size_t I = 0;
56873471bf0Spatrick for (; !Error && !consumeIf('E'); ++I) {
56973471bf0Spatrick if (I > 0)
57073471bf0Spatrick print(", ");
57173471bf0Spatrick demangleType();
57273471bf0Spatrick }
57373471bf0Spatrick if (I == 1)
57473471bf0Spatrick print(",");
57573471bf0Spatrick print(")");
57673471bf0Spatrick break;
57773471bf0Spatrick }
57873471bf0Spatrick case 'R':
57973471bf0Spatrick case 'Q':
58073471bf0Spatrick print('&');
58173471bf0Spatrick if (consumeIf('L')) {
58273471bf0Spatrick if (auto Lifetime = parseBase62Number()) {
58373471bf0Spatrick printLifetime(Lifetime);
58473471bf0Spatrick print(' ');
58573471bf0Spatrick }
58673471bf0Spatrick }
58773471bf0Spatrick if (C == 'Q')
58873471bf0Spatrick print("mut ");
58973471bf0Spatrick demangleType();
59073471bf0Spatrick break;
59173471bf0Spatrick case 'P':
59273471bf0Spatrick print("*const ");
59373471bf0Spatrick demangleType();
59473471bf0Spatrick break;
59573471bf0Spatrick case 'O':
59673471bf0Spatrick print("*mut ");
59773471bf0Spatrick demangleType();
59873471bf0Spatrick break;
59973471bf0Spatrick case 'F':
60073471bf0Spatrick demangleFnSig();
60173471bf0Spatrick break;
60273471bf0Spatrick case 'D':
60373471bf0Spatrick demangleDynBounds();
60473471bf0Spatrick if (consumeIf('L')) {
60573471bf0Spatrick if (auto Lifetime = parseBase62Number()) {
60673471bf0Spatrick print(" + ");
60773471bf0Spatrick printLifetime(Lifetime);
60873471bf0Spatrick }
60973471bf0Spatrick } else {
61073471bf0Spatrick Error = true;
61173471bf0Spatrick }
61273471bf0Spatrick break;
61373471bf0Spatrick case 'B':
61473471bf0Spatrick demangleBackref([&] { demangleType(); });
61573471bf0Spatrick break;
61673471bf0Spatrick default:
61773471bf0Spatrick Position = Start;
61873471bf0Spatrick demanglePath(IsInType::Yes);
61973471bf0Spatrick break;
62073471bf0Spatrick }
62173471bf0Spatrick }
62273471bf0Spatrick
62373471bf0Spatrick // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
62473471bf0Spatrick // <abi> = "C"
62573471bf0Spatrick // | <undisambiguated-identifier>
demangleFnSig()62673471bf0Spatrick void Demangler::demangleFnSig() {
627*d415bd75Srobert ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
62873471bf0Spatrick demangleOptionalBinder();
62973471bf0Spatrick
63073471bf0Spatrick if (consumeIf('U'))
63173471bf0Spatrick print("unsafe ");
63273471bf0Spatrick
63373471bf0Spatrick if (consumeIf('K')) {
63473471bf0Spatrick print("extern \"");
63573471bf0Spatrick if (consumeIf('C')) {
63673471bf0Spatrick print("C");
63773471bf0Spatrick } else {
63873471bf0Spatrick Identifier Ident = parseIdentifier();
639*d415bd75Srobert if (Ident.Punycode)
640*d415bd75Srobert Error = true;
64173471bf0Spatrick for (char C : Ident.Name) {
64273471bf0Spatrick // When mangling ABI string, the "-" is replaced with "_".
64373471bf0Spatrick if (C == '_')
64473471bf0Spatrick C = '-';
64573471bf0Spatrick print(C);
64673471bf0Spatrick }
64773471bf0Spatrick }
64873471bf0Spatrick print("\" ");
64973471bf0Spatrick }
65073471bf0Spatrick
65173471bf0Spatrick print("fn(");
65273471bf0Spatrick for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
65373471bf0Spatrick if (I > 0)
65473471bf0Spatrick print(", ");
65573471bf0Spatrick demangleType();
65673471bf0Spatrick }
65773471bf0Spatrick print(")");
65873471bf0Spatrick
65973471bf0Spatrick if (consumeIf('u')) {
66073471bf0Spatrick // Skip the unit type from the output.
66173471bf0Spatrick } else {
66273471bf0Spatrick print(" -> ");
66373471bf0Spatrick demangleType();
66473471bf0Spatrick }
66573471bf0Spatrick }
66673471bf0Spatrick
66773471bf0Spatrick // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()66873471bf0Spatrick void Demangler::demangleDynBounds() {
669*d415bd75Srobert ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
67073471bf0Spatrick print("dyn ");
67173471bf0Spatrick demangleOptionalBinder();
67273471bf0Spatrick for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
67373471bf0Spatrick if (I > 0)
67473471bf0Spatrick print(" + ");
67573471bf0Spatrick demangleDynTrait();
67673471bf0Spatrick }
67773471bf0Spatrick }
67873471bf0Spatrick
67973471bf0Spatrick // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
68073471bf0Spatrick // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()68173471bf0Spatrick void Demangler::demangleDynTrait() {
68273471bf0Spatrick bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
68373471bf0Spatrick while (!Error && consumeIf('p')) {
68473471bf0Spatrick if (!IsOpen) {
68573471bf0Spatrick IsOpen = true;
68673471bf0Spatrick print('<');
68773471bf0Spatrick } else {
68873471bf0Spatrick print(", ");
68973471bf0Spatrick }
69073471bf0Spatrick print(parseIdentifier().Name);
69173471bf0Spatrick print(" = ");
69273471bf0Spatrick demangleType();
69373471bf0Spatrick }
69473471bf0Spatrick if (IsOpen)
69573471bf0Spatrick print(">");
69673471bf0Spatrick }
69773471bf0Spatrick
69873471bf0Spatrick // Demangles optional binder and updates the number of bound lifetimes.
69973471bf0Spatrick //
70073471bf0Spatrick // <binder> = "G" <base-62-number>
demangleOptionalBinder()70173471bf0Spatrick void Demangler::demangleOptionalBinder() {
70273471bf0Spatrick uint64_t Binder = parseOptionalBase62Number('G');
70373471bf0Spatrick if (Error || Binder == 0)
70473471bf0Spatrick return;
70573471bf0Spatrick
70673471bf0Spatrick // In valid inputs each bound lifetime is referenced later. Referencing a
70773471bf0Spatrick // lifetime requires at least one byte of input. Reject inputs that are too
70873471bf0Spatrick // short to reference all bound lifetimes. Otherwise demangling of invalid
70973471bf0Spatrick // binders could generate excessive amounts of output.
71073471bf0Spatrick if (Binder >= Input.size() - BoundLifetimes) {
71173471bf0Spatrick Error = true;
71273471bf0Spatrick return;
71373471bf0Spatrick }
71473471bf0Spatrick
71573471bf0Spatrick print("for<");
71673471bf0Spatrick for (size_t I = 0; I != Binder; ++I) {
71773471bf0Spatrick BoundLifetimes += 1;
71873471bf0Spatrick if (I > 0)
71973471bf0Spatrick print(", ");
72073471bf0Spatrick printLifetime(1);
72173471bf0Spatrick }
72273471bf0Spatrick print("> ");
72373471bf0Spatrick }
72473471bf0Spatrick
72573471bf0Spatrick // <const> = <basic-type> <const-data>
72673471bf0Spatrick // | "p" // placeholder
72773471bf0Spatrick // | <backref>
demangleConst()72873471bf0Spatrick void Demangler::demangleConst() {
72973471bf0Spatrick if (Error || RecursionLevel >= MaxRecursionLevel) {
73073471bf0Spatrick Error = true;
73173471bf0Spatrick return;
73273471bf0Spatrick }
733*d415bd75Srobert ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
73473471bf0Spatrick
73573471bf0Spatrick char C = consume();
73673471bf0Spatrick BasicType Type;
73773471bf0Spatrick if (parseBasicType(C, Type)) {
73873471bf0Spatrick switch (Type) {
73973471bf0Spatrick case BasicType::I8:
74073471bf0Spatrick case BasicType::I16:
74173471bf0Spatrick case BasicType::I32:
74273471bf0Spatrick case BasicType::I64:
74373471bf0Spatrick case BasicType::I128:
74473471bf0Spatrick case BasicType::ISize:
74573471bf0Spatrick case BasicType::U8:
74673471bf0Spatrick case BasicType::U16:
74773471bf0Spatrick case BasicType::U32:
74873471bf0Spatrick case BasicType::U64:
74973471bf0Spatrick case BasicType::U128:
75073471bf0Spatrick case BasicType::USize:
75173471bf0Spatrick demangleConstInt();
75273471bf0Spatrick break;
75373471bf0Spatrick case BasicType::Bool:
75473471bf0Spatrick demangleConstBool();
75573471bf0Spatrick break;
75673471bf0Spatrick case BasicType::Char:
75773471bf0Spatrick demangleConstChar();
75873471bf0Spatrick break;
75973471bf0Spatrick case BasicType::Placeholder:
76073471bf0Spatrick print('_');
76173471bf0Spatrick break;
76273471bf0Spatrick default:
76373471bf0Spatrick Error = true;
76473471bf0Spatrick break;
76573471bf0Spatrick }
76673471bf0Spatrick } else if (C == 'B') {
76773471bf0Spatrick demangleBackref([&] { demangleConst(); });
76873471bf0Spatrick } else {
76973471bf0Spatrick Error = true;
77073471bf0Spatrick }
77173471bf0Spatrick }
77273471bf0Spatrick
77373471bf0Spatrick // <const-data> = ["n"] <hex-number>
demangleConstInt()77473471bf0Spatrick void Demangler::demangleConstInt() {
77573471bf0Spatrick if (consumeIf('n'))
77673471bf0Spatrick print('-');
77773471bf0Spatrick
77873471bf0Spatrick StringView HexDigits;
77973471bf0Spatrick uint64_t Value = parseHexNumber(HexDigits);
78073471bf0Spatrick if (HexDigits.size() <= 16) {
78173471bf0Spatrick printDecimalNumber(Value);
78273471bf0Spatrick } else {
78373471bf0Spatrick print("0x");
78473471bf0Spatrick print(HexDigits);
78573471bf0Spatrick }
78673471bf0Spatrick }
78773471bf0Spatrick
78873471bf0Spatrick // <const-data> = "0_" // false
78973471bf0Spatrick // | "1_" // true
demangleConstBool()79073471bf0Spatrick void Demangler::demangleConstBool() {
79173471bf0Spatrick StringView HexDigits;
79273471bf0Spatrick parseHexNumber(HexDigits);
79373471bf0Spatrick if (HexDigits == "0")
79473471bf0Spatrick print("false");
79573471bf0Spatrick else if (HexDigits == "1")
79673471bf0Spatrick print("true");
79773471bf0Spatrick else
79873471bf0Spatrick Error = true;
79973471bf0Spatrick }
80073471bf0Spatrick
80173471bf0Spatrick /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)80273471bf0Spatrick static bool isAsciiPrintable(uint64_t CodePoint) {
80373471bf0Spatrick return 0x20 <= CodePoint && CodePoint <= 0x7e;
80473471bf0Spatrick }
80573471bf0Spatrick
80673471bf0Spatrick // <const-data> = <hex-number>
demangleConstChar()80773471bf0Spatrick void Demangler::demangleConstChar() {
80873471bf0Spatrick StringView HexDigits;
80973471bf0Spatrick uint64_t CodePoint = parseHexNumber(HexDigits);
81073471bf0Spatrick if (Error || HexDigits.size() > 6) {
81173471bf0Spatrick Error = true;
81273471bf0Spatrick return;
81373471bf0Spatrick }
81473471bf0Spatrick
81573471bf0Spatrick print("'");
81673471bf0Spatrick switch (CodePoint) {
81773471bf0Spatrick case '\t':
81873471bf0Spatrick print(R"(\t)");
81973471bf0Spatrick break;
82073471bf0Spatrick case '\r':
82173471bf0Spatrick print(R"(\r)");
82273471bf0Spatrick break;
82373471bf0Spatrick case '\n':
82473471bf0Spatrick print(R"(\n)");
82573471bf0Spatrick break;
82673471bf0Spatrick case '\\':
82773471bf0Spatrick print(R"(\\)");
82873471bf0Spatrick break;
82973471bf0Spatrick case '"':
83073471bf0Spatrick print(R"(")");
83173471bf0Spatrick break;
83273471bf0Spatrick case '\'':
83373471bf0Spatrick print(R"(\')");
83473471bf0Spatrick break;
83573471bf0Spatrick default:
83673471bf0Spatrick if (isAsciiPrintable(CodePoint)) {
83773471bf0Spatrick char C = CodePoint;
83873471bf0Spatrick print(C);
83973471bf0Spatrick } else {
84073471bf0Spatrick print(R"(\u{)");
84173471bf0Spatrick print(HexDigits);
84273471bf0Spatrick print('}');
84373471bf0Spatrick }
84473471bf0Spatrick break;
84573471bf0Spatrick }
84673471bf0Spatrick print('\'');
84773471bf0Spatrick }
84873471bf0Spatrick
84973471bf0Spatrick // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()85073471bf0Spatrick Identifier Demangler::parseIdentifier() {
85173471bf0Spatrick bool Punycode = consumeIf('u');
85273471bf0Spatrick uint64_t Bytes = parseDecimalNumber();
85373471bf0Spatrick
85473471bf0Spatrick // Underscore resolves the ambiguity when identifier starts with a decimal
85573471bf0Spatrick // digit or another underscore.
85673471bf0Spatrick consumeIf('_');
85773471bf0Spatrick
85873471bf0Spatrick if (Error || Bytes > Input.size() - Position) {
85973471bf0Spatrick Error = true;
86073471bf0Spatrick return {};
86173471bf0Spatrick }
86273471bf0Spatrick StringView S = Input.substr(Position, Bytes);
86373471bf0Spatrick Position += Bytes;
86473471bf0Spatrick
86573471bf0Spatrick if (!std::all_of(S.begin(), S.end(), isValid)) {
86673471bf0Spatrick Error = true;
86773471bf0Spatrick return {};
86873471bf0Spatrick }
86973471bf0Spatrick
87073471bf0Spatrick return {S, Punycode};
87173471bf0Spatrick }
87273471bf0Spatrick
87373471bf0Spatrick // Parses optional base 62 number. The presence of a number is determined using
87473471bf0Spatrick // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
87573471bf0Spatrick //
87673471bf0Spatrick // This function is indended for parsing disambiguators and binders which when
87773471bf0Spatrick // not present have their value interpreted as 0, and otherwise as decoded
87873471bf0Spatrick // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
87973471bf0Spatrick // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)88073471bf0Spatrick uint64_t Demangler::parseOptionalBase62Number(char Tag) {
88173471bf0Spatrick if (!consumeIf(Tag))
88273471bf0Spatrick return 0;
88373471bf0Spatrick
88473471bf0Spatrick uint64_t N = parseBase62Number();
88573471bf0Spatrick if (Error || !addAssign(N, 1))
88673471bf0Spatrick return 0;
88773471bf0Spatrick
88873471bf0Spatrick return N;
88973471bf0Spatrick }
89073471bf0Spatrick
89173471bf0Spatrick // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
89273471bf0Spatrick // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
89373471bf0Spatrick // "1_" encodes 2, etc.
89473471bf0Spatrick //
89573471bf0Spatrick // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()89673471bf0Spatrick uint64_t Demangler::parseBase62Number() {
89773471bf0Spatrick if (consumeIf('_'))
89873471bf0Spatrick return 0;
89973471bf0Spatrick
90073471bf0Spatrick uint64_t Value = 0;
90173471bf0Spatrick
90273471bf0Spatrick while (true) {
90373471bf0Spatrick uint64_t Digit;
90473471bf0Spatrick char C = consume();
90573471bf0Spatrick
90673471bf0Spatrick if (C == '_') {
90773471bf0Spatrick break;
90873471bf0Spatrick } else if (isDigit(C)) {
90973471bf0Spatrick Digit = C - '0';
91073471bf0Spatrick } else if (isLower(C)) {
91173471bf0Spatrick Digit = 10 + (C - 'a');
91273471bf0Spatrick } else if (isUpper(C)) {
91373471bf0Spatrick Digit = 10 + 26 + (C - 'A');
91473471bf0Spatrick } else {
91573471bf0Spatrick Error = true;
91673471bf0Spatrick return 0;
91773471bf0Spatrick }
91873471bf0Spatrick
91973471bf0Spatrick if (!mulAssign(Value, 62))
92073471bf0Spatrick return 0;
92173471bf0Spatrick
92273471bf0Spatrick if (!addAssign(Value, Digit))
92373471bf0Spatrick return 0;
92473471bf0Spatrick }
92573471bf0Spatrick
92673471bf0Spatrick if (!addAssign(Value, 1))
92773471bf0Spatrick return 0;
92873471bf0Spatrick
92973471bf0Spatrick return Value;
93073471bf0Spatrick }
93173471bf0Spatrick
93273471bf0Spatrick // Parses a decimal number that had been encoded without any leading zeros.
93373471bf0Spatrick //
93473471bf0Spatrick // <decimal-number> = "0"
93573471bf0Spatrick // | <1-9> {<0-9>}
parseDecimalNumber()93673471bf0Spatrick uint64_t Demangler::parseDecimalNumber() {
93773471bf0Spatrick char C = look();
93873471bf0Spatrick if (!isDigit(C)) {
93973471bf0Spatrick Error = true;
94073471bf0Spatrick return 0;
94173471bf0Spatrick }
94273471bf0Spatrick
94373471bf0Spatrick if (C == '0') {
94473471bf0Spatrick consume();
94573471bf0Spatrick return 0;
94673471bf0Spatrick }
94773471bf0Spatrick
94873471bf0Spatrick uint64_t Value = 0;
94973471bf0Spatrick
95073471bf0Spatrick while (isDigit(look())) {
95173471bf0Spatrick if (!mulAssign(Value, 10)) {
95273471bf0Spatrick Error = true;
95373471bf0Spatrick return 0;
95473471bf0Spatrick }
95573471bf0Spatrick
95673471bf0Spatrick uint64_t D = consume() - '0';
95773471bf0Spatrick if (!addAssign(Value, D))
95873471bf0Spatrick return 0;
95973471bf0Spatrick }
96073471bf0Spatrick
96173471bf0Spatrick return Value;
96273471bf0Spatrick }
96373471bf0Spatrick
96473471bf0Spatrick // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
96573471bf0Spatrick // value and stores hex digits in HexDigits. The return value is unspecified if
96673471bf0Spatrick // HexDigits.size() > 16.
96773471bf0Spatrick //
96873471bf0Spatrick // <hex-number> = "0_"
96973471bf0Spatrick // | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)97073471bf0Spatrick uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
97173471bf0Spatrick size_t Start = Position;
97273471bf0Spatrick uint64_t Value = 0;
97373471bf0Spatrick
97473471bf0Spatrick if (!isHexDigit(look()))
97573471bf0Spatrick Error = true;
97673471bf0Spatrick
97773471bf0Spatrick if (consumeIf('0')) {
97873471bf0Spatrick if (!consumeIf('_'))
97973471bf0Spatrick Error = true;
98073471bf0Spatrick } else {
98173471bf0Spatrick while (!Error && !consumeIf('_')) {
98273471bf0Spatrick char C = consume();
98373471bf0Spatrick Value *= 16;
98473471bf0Spatrick if (isDigit(C))
98573471bf0Spatrick Value += C - '0';
98673471bf0Spatrick else if ('a' <= C && C <= 'f')
98773471bf0Spatrick Value += 10 + (C - 'a');
98873471bf0Spatrick else
98973471bf0Spatrick Error = true;
99073471bf0Spatrick }
99173471bf0Spatrick }
99273471bf0Spatrick
99373471bf0Spatrick if (Error) {
99473471bf0Spatrick HexDigits = StringView();
99573471bf0Spatrick return 0;
99673471bf0Spatrick }
99773471bf0Spatrick
99873471bf0Spatrick size_t End = Position - 1;
99973471bf0Spatrick assert(Start < End);
100073471bf0Spatrick HexDigits = Input.substr(Start, End - Start);
100173471bf0Spatrick return Value;
100273471bf0Spatrick }
100373471bf0Spatrick
print(char C)100473471bf0Spatrick void Demangler::print(char C) {
100573471bf0Spatrick if (Error || !Print)
100673471bf0Spatrick return;
100773471bf0Spatrick
100873471bf0Spatrick Output += C;
100973471bf0Spatrick }
101073471bf0Spatrick
print(StringView S)101173471bf0Spatrick void Demangler::print(StringView S) {
101273471bf0Spatrick if (Error || !Print)
101373471bf0Spatrick return;
101473471bf0Spatrick
101573471bf0Spatrick Output += S;
101673471bf0Spatrick }
101773471bf0Spatrick
printDecimalNumber(uint64_t N)101873471bf0Spatrick void Demangler::printDecimalNumber(uint64_t N) {
101973471bf0Spatrick if (Error || !Print)
102073471bf0Spatrick return;
102173471bf0Spatrick
102273471bf0Spatrick Output << N;
102373471bf0Spatrick }
102473471bf0Spatrick
102573471bf0Spatrick // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
102673471bf0Spatrick // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
102773471bf0Spatrick // bound by one of the enclosing binders.
printLifetime(uint64_t Index)102873471bf0Spatrick void Demangler::printLifetime(uint64_t Index) {
102973471bf0Spatrick if (Index == 0) {
103073471bf0Spatrick print("'_");
103173471bf0Spatrick return;
103273471bf0Spatrick }
103373471bf0Spatrick
103473471bf0Spatrick if (Index - 1 >= BoundLifetimes) {
103573471bf0Spatrick Error = true;
103673471bf0Spatrick return;
103773471bf0Spatrick }
103873471bf0Spatrick
103973471bf0Spatrick uint64_t Depth = BoundLifetimes - Index;
104073471bf0Spatrick print('\'');
104173471bf0Spatrick if (Depth < 26) {
104273471bf0Spatrick char C = 'a' + Depth;
104373471bf0Spatrick print(C);
104473471bf0Spatrick } else {
104573471bf0Spatrick print('z');
104673471bf0Spatrick printDecimalNumber(Depth - 26 + 1);
104773471bf0Spatrick }
104873471bf0Spatrick }
104973471bf0Spatrick
decodePunycodeDigit(char C,size_t & Value)1050*d415bd75Srobert static inline bool decodePunycodeDigit(char C, size_t &Value) {
1051*d415bd75Srobert if (isLower(C)) {
1052*d415bd75Srobert Value = C - 'a';
1053*d415bd75Srobert return true;
1054*d415bd75Srobert }
1055*d415bd75Srobert
1056*d415bd75Srobert if (isDigit(C)) {
1057*d415bd75Srobert Value = 26 + (C - '0');
1058*d415bd75Srobert return true;
1059*d415bd75Srobert }
1060*d415bd75Srobert
1061*d415bd75Srobert return false;
1062*d415bd75Srobert }
1063*d415bd75Srobert
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1064*d415bd75Srobert static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1065*d415bd75Srobert char *Buffer = Output.getBuffer();
1066*d415bd75Srobert char *Start = Buffer + StartIdx;
1067*d415bd75Srobert char *End = Buffer + Output.getCurrentPosition();
1068*d415bd75Srobert Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1069*d415bd75Srobert }
1070*d415bd75Srobert
1071*d415bd75Srobert // Encodes code point as UTF-8 and stores results in Output. Returns false if
1072*d415bd75Srobert // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1073*d415bd75Srobert static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1074*d415bd75Srobert if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1075*d415bd75Srobert return false;
1076*d415bd75Srobert
1077*d415bd75Srobert if (CodePoint <= 0x7F) {
1078*d415bd75Srobert Output[0] = CodePoint;
1079*d415bd75Srobert return true;
1080*d415bd75Srobert }
1081*d415bd75Srobert
1082*d415bd75Srobert if (CodePoint <= 0x7FF) {
1083*d415bd75Srobert Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1084*d415bd75Srobert Output[1] = 0x80 | (CodePoint & 0x3F);
1085*d415bd75Srobert return true;
1086*d415bd75Srobert }
1087*d415bd75Srobert
1088*d415bd75Srobert if (CodePoint <= 0xFFFF) {
1089*d415bd75Srobert Output[0] = 0xE0 | (CodePoint >> 12);
1090*d415bd75Srobert Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1091*d415bd75Srobert Output[2] = 0x80 | (CodePoint & 0x3F);
1092*d415bd75Srobert return true;
1093*d415bd75Srobert }
1094*d415bd75Srobert
1095*d415bd75Srobert if (CodePoint <= 0x10FFFF) {
1096*d415bd75Srobert Output[0] = 0xF0 | (CodePoint >> 18);
1097*d415bd75Srobert Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1098*d415bd75Srobert Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1099*d415bd75Srobert Output[3] = 0x80 | (CodePoint & 0x3F);
1100*d415bd75Srobert return true;
1101*d415bd75Srobert }
1102*d415bd75Srobert
1103*d415bd75Srobert return false;
1104*d415bd75Srobert }
1105*d415bd75Srobert
1106*d415bd75Srobert // Decodes string encoded using punycode and appends results to Output.
1107*d415bd75Srobert // Returns true if decoding was successful.
decodePunycode(StringView Input,OutputBuffer & Output)1108*d415bd75Srobert static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1109*d415bd75Srobert size_t OutputSize = Output.getCurrentPosition();
1110*d415bd75Srobert size_t InputIdx = 0;
1111*d415bd75Srobert
1112*d415bd75Srobert // Rust uses an underscore as a delimiter.
1113*d415bd75Srobert size_t DelimiterPos = StringView::npos;
1114*d415bd75Srobert for (size_t I = 0; I != Input.size(); ++I)
1115*d415bd75Srobert if (Input[I] == '_')
1116*d415bd75Srobert DelimiterPos = I;
1117*d415bd75Srobert
1118*d415bd75Srobert if (DelimiterPos != StringView::npos) {
1119*d415bd75Srobert // Copy basic code points before the last delimiter to the output.
1120*d415bd75Srobert for (; InputIdx != DelimiterPos; ++InputIdx) {
1121*d415bd75Srobert char C = Input[InputIdx];
1122*d415bd75Srobert if (!isValid(C))
1123*d415bd75Srobert return false;
1124*d415bd75Srobert // Code points are padded with zeros while decoding is in progress.
1125*d415bd75Srobert char UTF8[4] = {C};
1126*d415bd75Srobert Output += StringView(UTF8, UTF8 + 4);
1127*d415bd75Srobert }
1128*d415bd75Srobert // Skip over the delimiter.
1129*d415bd75Srobert ++InputIdx;
1130*d415bd75Srobert }
1131*d415bd75Srobert
1132*d415bd75Srobert size_t Base = 36;
1133*d415bd75Srobert size_t Skew = 38;
1134*d415bd75Srobert size_t Bias = 72;
1135*d415bd75Srobert size_t N = 0x80;
1136*d415bd75Srobert size_t TMin = 1;
1137*d415bd75Srobert size_t TMax = 26;
1138*d415bd75Srobert size_t Damp = 700;
1139*d415bd75Srobert
1140*d415bd75Srobert auto Adapt = [&](size_t Delta, size_t NumPoints) {
1141*d415bd75Srobert Delta /= Damp;
1142*d415bd75Srobert Delta += Delta / NumPoints;
1143*d415bd75Srobert Damp = 2;
1144*d415bd75Srobert
1145*d415bd75Srobert size_t K = 0;
1146*d415bd75Srobert while (Delta > (Base - TMin) * TMax / 2) {
1147*d415bd75Srobert Delta /= Base - TMin;
1148*d415bd75Srobert K += Base;
1149*d415bd75Srobert }
1150*d415bd75Srobert return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1151*d415bd75Srobert };
1152*d415bd75Srobert
1153*d415bd75Srobert // Main decoding loop.
1154*d415bd75Srobert for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1155*d415bd75Srobert size_t OldI = I;
1156*d415bd75Srobert size_t W = 1;
1157*d415bd75Srobert size_t Max = std::numeric_limits<size_t>::max();
1158*d415bd75Srobert for (size_t K = Base; true; K += Base) {
1159*d415bd75Srobert if (InputIdx == Input.size())
1160*d415bd75Srobert return false;
1161*d415bd75Srobert char C = Input[InputIdx++];
1162*d415bd75Srobert size_t Digit = 0;
1163*d415bd75Srobert if (!decodePunycodeDigit(C, Digit))
1164*d415bd75Srobert return false;
1165*d415bd75Srobert
1166*d415bd75Srobert if (Digit > (Max - I) / W)
1167*d415bd75Srobert return false;
1168*d415bd75Srobert I += Digit * W;
1169*d415bd75Srobert
1170*d415bd75Srobert size_t T;
1171*d415bd75Srobert if (K <= Bias)
1172*d415bd75Srobert T = TMin;
1173*d415bd75Srobert else if (K >= Bias + TMax)
1174*d415bd75Srobert T = TMax;
1175*d415bd75Srobert else
1176*d415bd75Srobert T = K - Bias;
1177*d415bd75Srobert
1178*d415bd75Srobert if (Digit < T)
1179*d415bd75Srobert break;
1180*d415bd75Srobert
1181*d415bd75Srobert if (W > Max / (Base - T))
1182*d415bd75Srobert return false;
1183*d415bd75Srobert W *= (Base - T);
1184*d415bd75Srobert }
1185*d415bd75Srobert size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1186*d415bd75Srobert Bias = Adapt(I - OldI, NumPoints);
1187*d415bd75Srobert
1188*d415bd75Srobert if (I / NumPoints > Max - N)
1189*d415bd75Srobert return false;
1190*d415bd75Srobert N += I / NumPoints;
1191*d415bd75Srobert I = I % NumPoints;
1192*d415bd75Srobert
1193*d415bd75Srobert // Insert N at position I in the output.
1194*d415bd75Srobert char UTF8[4] = {};
1195*d415bd75Srobert if (!encodeUTF8(N, UTF8))
1196*d415bd75Srobert return false;
1197*d415bd75Srobert Output.insert(OutputSize + I * 4, UTF8, 4);
1198*d415bd75Srobert }
1199*d415bd75Srobert
1200*d415bd75Srobert removeNullBytes(Output, OutputSize);
1201*d415bd75Srobert return true;
1202*d415bd75Srobert }
1203*d415bd75Srobert
printIdentifier(Identifier Ident)1204*d415bd75Srobert void Demangler::printIdentifier(Identifier Ident) {
1205*d415bd75Srobert if (Error || !Print)
1206*d415bd75Srobert return;
1207*d415bd75Srobert
1208*d415bd75Srobert if (Ident.Punycode) {
1209*d415bd75Srobert if (!decodePunycode(Ident.Name, Output))
1210*d415bd75Srobert Error = true;
1211*d415bd75Srobert } else {
1212*d415bd75Srobert print(Ident.Name);
1213*d415bd75Srobert }
1214*d415bd75Srobert }
1215*d415bd75Srobert
look() const121673471bf0Spatrick char Demangler::look() const {
121773471bf0Spatrick if (Error || Position >= Input.size())
121873471bf0Spatrick return 0;
121973471bf0Spatrick
122073471bf0Spatrick return Input[Position];
122173471bf0Spatrick }
122273471bf0Spatrick
consume()122373471bf0Spatrick char Demangler::consume() {
122473471bf0Spatrick if (Error || Position >= Input.size()) {
122573471bf0Spatrick Error = true;
122673471bf0Spatrick return 0;
122773471bf0Spatrick }
122873471bf0Spatrick
122973471bf0Spatrick return Input[Position++];
123073471bf0Spatrick }
123173471bf0Spatrick
consumeIf(char Prefix)123273471bf0Spatrick bool Demangler::consumeIf(char Prefix) {
123373471bf0Spatrick if (Error || Position >= Input.size() || Input[Position] != Prefix)
123473471bf0Spatrick return false;
123573471bf0Spatrick
123673471bf0Spatrick Position += 1;
123773471bf0Spatrick return true;
123873471bf0Spatrick }
123973471bf0Spatrick
124073471bf0Spatrick /// Computes A + B. When computation wraps around sets the error and returns
124173471bf0Spatrick /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)124273471bf0Spatrick bool Demangler::addAssign(uint64_t &A, uint64_t B) {
124373471bf0Spatrick if (A > std::numeric_limits<uint64_t>::max() - B) {
124473471bf0Spatrick Error = true;
124573471bf0Spatrick return false;
124673471bf0Spatrick }
124773471bf0Spatrick
124873471bf0Spatrick A += B;
124973471bf0Spatrick return true;
125073471bf0Spatrick }
125173471bf0Spatrick
125273471bf0Spatrick /// Computes A * B. When computation wraps around sets the error and returns
125373471bf0Spatrick /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)125473471bf0Spatrick bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
125573471bf0Spatrick if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
125673471bf0Spatrick Error = true;
125773471bf0Spatrick return false;
125873471bf0Spatrick }
125973471bf0Spatrick
126073471bf0Spatrick A *= B;
126173471bf0Spatrick return true;
126273471bf0Spatrick }
1263