1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringView.h"
16 #include "llvm/Demangle/Utility.h"
17
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23
24 using namespace llvm;
25
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::ScopedOverride;
28 using llvm::itanium_demangle::StringView;
29
30 namespace {
31
32 struct Identifier {
33 StringView Name;
34 bool Punycode;
35
empty__anond672b4d40111::Identifier36 bool empty() const { return Name.empty(); }
37 };
38
39 enum class BasicType {
40 Bool,
41 Char,
42 I8,
43 I16,
44 I32,
45 I64,
46 I128,
47 ISize,
48 U8,
49 U16,
50 U32,
51 U64,
52 U128,
53 USize,
54 F32,
55 F64,
56 Str,
57 Placeholder,
58 Unit,
59 Variadic,
60 Never,
61 };
62
63 enum class IsInType {
64 No,
65 Yes,
66 };
67
68 enum class LeaveGenericsOpen {
69 No,
70 Yes,
71 };
72
73 class Demangler {
74 // Maximum recursion level. Used to avoid stack overflow.
75 size_t MaxRecursionLevel;
76 // Current recursion level.
77 size_t RecursionLevel;
78 size_t BoundLifetimes;
79 // Input string that is being demangled with "_R" prefix removed.
80 StringView Input;
81 // Position in the input string.
82 size_t Position;
83 // When true, print methods append the output to the stream.
84 // When false, the output is suppressed.
85 bool Print;
86 // True if an error occurred.
87 bool Error;
88
89 public:
90 // Demangled output.
91 OutputBuffer Output;
92
93 Demangler(size_t MaxRecursionLevel = 500);
94
95 bool demangle(StringView MangledName);
96
97 private:
98 bool demanglePath(IsInType Type,
99 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100 void demangleImplPath(IsInType InType);
101 void demangleGenericArg();
102 void demangleType();
103 void demangleFnSig();
104 void demangleDynBounds();
105 void demangleDynTrait();
106 void demangleOptionalBinder();
107 void demangleConst();
108 void demangleConstInt();
109 void demangleConstBool();
110 void demangleConstChar();
111
demangleBackref(Callable Demangler)112 template <typename Callable> void demangleBackref(Callable Demangler) {
113 uint64_t Backref = parseBase62Number();
114 if (Error || Backref >= Position) {
115 Error = true;
116 return;
117 }
118
119 if (!Print)
120 return;
121
122 ScopedOverride<size_t> SavePosition(Position, Position);
123 Position = Backref;
124 Demangler();
125 }
126
127 Identifier parseIdentifier();
128 uint64_t parseOptionalBase62Number(char Tag);
129 uint64_t parseBase62Number();
130 uint64_t parseDecimalNumber();
131 uint64_t parseHexNumber(StringView &HexDigits);
132
133 void print(char C);
134 void print(StringView S);
135 void printDecimalNumber(uint64_t N);
136 void printBasicType(BasicType);
137 void printLifetime(uint64_t Index);
138 void printIdentifier(Identifier Ident);
139
140 char look() const;
141 char consume();
142 bool consumeIf(char Prefix);
143
144 bool addAssign(uint64_t &A, uint64_t B);
145 bool mulAssign(uint64_t &A, uint64_t B);
146 };
147
148 } // namespace
149
rustDemangle(const char * MangledName)150 char *llvm::rustDemangle(const char *MangledName) {
151 if (MangledName == nullptr)
152 return nullptr;
153
154 // Return early if mangled name doesn't look like a Rust symbol.
155 StringView Mangled(MangledName);
156 if (!Mangled.startsWith("_R"))
157 return nullptr;
158
159 Demangler D;
160 if (!D.demangle(Mangled)) {
161 std::free(D.Output.getBuffer());
162 return nullptr;
163 }
164
165 D.Output += '\0';
166
167 return D.Output.getBuffer();
168 }
169
Demangler(size_t MaxRecursionLevel)170 Demangler::Demangler(size_t MaxRecursionLevel)
171 : MaxRecursionLevel(MaxRecursionLevel) {}
172
isDigit(const char C)173 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
174
isHexDigit(const char C)175 static inline bool isHexDigit(const char C) {
176 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
177 }
178
isLower(const char C)179 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
180
isUpper(const char C)181 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
182
183 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)184 static inline bool isValid(const char C) {
185 return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
186 }
187
188 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
189 // otherwise. The demangled symbol is stored in Output field. It is
190 // responsibility of the caller to free the memory behind the output stream.
191 //
192 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)193 bool Demangler::demangle(StringView Mangled) {
194 Position = 0;
195 Error = false;
196 Print = true;
197 RecursionLevel = 0;
198 BoundLifetimes = 0;
199
200 if (!Mangled.consumeFront("_R")) {
201 Error = true;
202 return false;
203 }
204 size_t Dot = Mangled.find('.');
205 Input = Mangled.substr(0, Dot);
206 StringView Suffix = Mangled.dropFront(Dot);
207
208 demanglePath(IsInType::No);
209
210 if (Position != Input.size()) {
211 ScopedOverride<bool> SavePrint(Print, false);
212 demanglePath(IsInType::No);
213 }
214
215 if (Position != Input.size())
216 Error = true;
217
218 if (!Suffix.empty()) {
219 print(" (");
220 print(Suffix);
221 print(")");
222 }
223
224 return !Error;
225 }
226
227 // Demangles a path. InType indicates whether a path is inside a type. When
228 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
229 // output. Return value indicates whether generics arguments have been left
230 // open.
231 //
232 // <path> = "C" <identifier> // crate root
233 // | "M" <impl-path> <type> // <T> (inherent impl)
234 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
235 // | "Y" <type> <path> // <T as Trait> (trait definition)
236 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
237 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
238 // | <backref>
239 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
240 // <ns> = "C" // closure
241 // | "S" // shim
242 // | <A-Z> // other special namespaces
243 // | <a-z> // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)244 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
245 if (Error || RecursionLevel >= MaxRecursionLevel) {
246 Error = true;
247 return false;
248 }
249 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
250
251 switch (consume()) {
252 case 'C': {
253 parseOptionalBase62Number('s');
254 printIdentifier(parseIdentifier());
255 break;
256 }
257 case 'M': {
258 demangleImplPath(InType);
259 print("<");
260 demangleType();
261 print(">");
262 break;
263 }
264 case 'X': {
265 demangleImplPath(InType);
266 print("<");
267 demangleType();
268 print(" as ");
269 demanglePath(IsInType::Yes);
270 print(">");
271 break;
272 }
273 case 'Y': {
274 print("<");
275 demangleType();
276 print(" as ");
277 demanglePath(IsInType::Yes);
278 print(">");
279 break;
280 }
281 case 'N': {
282 char NS = consume();
283 if (!isLower(NS) && !isUpper(NS)) {
284 Error = true;
285 break;
286 }
287 demanglePath(InType);
288
289 uint64_t Disambiguator = parseOptionalBase62Number('s');
290 Identifier Ident = parseIdentifier();
291
292 if (isUpper(NS)) {
293 // Special namespaces
294 print("::{");
295 if (NS == 'C')
296 print("closure");
297 else if (NS == 'S')
298 print("shim");
299 else
300 print(NS);
301 if (!Ident.empty()) {
302 print(":");
303 printIdentifier(Ident);
304 }
305 print('#');
306 printDecimalNumber(Disambiguator);
307 print('}');
308 } else {
309 // Implementation internal namespaces.
310 if (!Ident.empty()) {
311 print("::");
312 printIdentifier(Ident);
313 }
314 }
315 break;
316 }
317 case 'I': {
318 demanglePath(InType);
319 // Omit "::" when in a type, where it is optional.
320 if (InType == IsInType::No)
321 print("::");
322 print("<");
323 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
324 if (I > 0)
325 print(", ");
326 demangleGenericArg();
327 }
328 if (LeaveOpen == LeaveGenericsOpen::Yes)
329 return true;
330 else
331 print(">");
332 break;
333 }
334 case 'B': {
335 bool IsOpen = false;
336 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
337 return IsOpen;
338 }
339 default:
340 Error = true;
341 break;
342 }
343
344 return false;
345 }
346
347 // <impl-path> = [<disambiguator>] <path>
348 // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)349 void Demangler::demangleImplPath(IsInType InType) {
350 ScopedOverride<bool> SavePrint(Print, false);
351 parseOptionalBase62Number('s');
352 demanglePath(InType);
353 }
354
355 // <generic-arg> = <lifetime>
356 // | <type>
357 // | "K" <const>
358 // <lifetime> = "L" <base-62-number>
demangleGenericArg()359 void Demangler::demangleGenericArg() {
360 if (consumeIf('L'))
361 printLifetime(parseBase62Number());
362 else if (consumeIf('K'))
363 demangleConst();
364 else
365 demangleType();
366 }
367
368 // <basic-type> = "a" // i8
369 // | "b" // bool
370 // | "c" // char
371 // | "d" // f64
372 // | "e" // str
373 // | "f" // f32
374 // | "h" // u8
375 // | "i" // isize
376 // | "j" // usize
377 // | "l" // i32
378 // | "m" // u32
379 // | "n" // i128
380 // | "o" // u128
381 // | "s" // i16
382 // | "t" // u16
383 // | "u" // ()
384 // | "v" // ...
385 // | "x" // i64
386 // | "y" // u64
387 // | "z" // !
388 // | "p" // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)389 static bool parseBasicType(char C, BasicType &Type) {
390 switch (C) {
391 case 'a':
392 Type = BasicType::I8;
393 return true;
394 case 'b':
395 Type = BasicType::Bool;
396 return true;
397 case 'c':
398 Type = BasicType::Char;
399 return true;
400 case 'd':
401 Type = BasicType::F64;
402 return true;
403 case 'e':
404 Type = BasicType::Str;
405 return true;
406 case 'f':
407 Type = BasicType::F32;
408 return true;
409 case 'h':
410 Type = BasicType::U8;
411 return true;
412 case 'i':
413 Type = BasicType::ISize;
414 return true;
415 case 'j':
416 Type = BasicType::USize;
417 return true;
418 case 'l':
419 Type = BasicType::I32;
420 return true;
421 case 'm':
422 Type = BasicType::U32;
423 return true;
424 case 'n':
425 Type = BasicType::I128;
426 return true;
427 case 'o':
428 Type = BasicType::U128;
429 return true;
430 case 'p':
431 Type = BasicType::Placeholder;
432 return true;
433 case 's':
434 Type = BasicType::I16;
435 return true;
436 case 't':
437 Type = BasicType::U16;
438 return true;
439 case 'u':
440 Type = BasicType::Unit;
441 return true;
442 case 'v':
443 Type = BasicType::Variadic;
444 return true;
445 case 'x':
446 Type = BasicType::I64;
447 return true;
448 case 'y':
449 Type = BasicType::U64;
450 return true;
451 case 'z':
452 Type = BasicType::Never;
453 return true;
454 default:
455 return false;
456 }
457 }
458
printBasicType(BasicType Type)459 void Demangler::printBasicType(BasicType Type) {
460 switch (Type) {
461 case BasicType::Bool:
462 print("bool");
463 break;
464 case BasicType::Char:
465 print("char");
466 break;
467 case BasicType::I8:
468 print("i8");
469 break;
470 case BasicType::I16:
471 print("i16");
472 break;
473 case BasicType::I32:
474 print("i32");
475 break;
476 case BasicType::I64:
477 print("i64");
478 break;
479 case BasicType::I128:
480 print("i128");
481 break;
482 case BasicType::ISize:
483 print("isize");
484 break;
485 case BasicType::U8:
486 print("u8");
487 break;
488 case BasicType::U16:
489 print("u16");
490 break;
491 case BasicType::U32:
492 print("u32");
493 break;
494 case BasicType::U64:
495 print("u64");
496 break;
497 case BasicType::U128:
498 print("u128");
499 break;
500 case BasicType::USize:
501 print("usize");
502 break;
503 case BasicType::F32:
504 print("f32");
505 break;
506 case BasicType::F64:
507 print("f64");
508 break;
509 case BasicType::Str:
510 print("str");
511 break;
512 case BasicType::Placeholder:
513 print("_");
514 break;
515 case BasicType::Unit:
516 print("()");
517 break;
518 case BasicType::Variadic:
519 print("...");
520 break;
521 case BasicType::Never:
522 print("!");
523 break;
524 }
525 }
526
527 // <type> = | <basic-type>
528 // | <path> // named type
529 // | "A" <type> <const> // [T; N]
530 // | "S" <type> // [T]
531 // | "T" {<type>} "E" // (T1, T2, T3, ...)
532 // | "R" [<lifetime>] <type> // &T
533 // | "Q" [<lifetime>] <type> // &mut T
534 // | "P" <type> // *const T
535 // | "O" <type> // *mut T
536 // | "F" <fn-sig> // fn(...) -> ...
537 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
538 // | <backref> // backref
demangleType()539 void Demangler::demangleType() {
540 if (Error || RecursionLevel >= MaxRecursionLevel) {
541 Error = true;
542 return;
543 }
544 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
545
546 size_t Start = Position;
547 char C = consume();
548 BasicType Type;
549 if (parseBasicType(C, Type))
550 return printBasicType(Type);
551
552 switch (C) {
553 case 'A':
554 print("[");
555 demangleType();
556 print("; ");
557 demangleConst();
558 print("]");
559 break;
560 case 'S':
561 print("[");
562 demangleType();
563 print("]");
564 break;
565 case 'T': {
566 print("(");
567 size_t I = 0;
568 for (; !Error && !consumeIf('E'); ++I) {
569 if (I > 0)
570 print(", ");
571 demangleType();
572 }
573 if (I == 1)
574 print(",");
575 print(")");
576 break;
577 }
578 case 'R':
579 case 'Q':
580 print('&');
581 if (consumeIf('L')) {
582 if (auto Lifetime = parseBase62Number()) {
583 printLifetime(Lifetime);
584 print(' ');
585 }
586 }
587 if (C == 'Q')
588 print("mut ");
589 demangleType();
590 break;
591 case 'P':
592 print("*const ");
593 demangleType();
594 break;
595 case 'O':
596 print("*mut ");
597 demangleType();
598 break;
599 case 'F':
600 demangleFnSig();
601 break;
602 case 'D':
603 demangleDynBounds();
604 if (consumeIf('L')) {
605 if (auto Lifetime = parseBase62Number()) {
606 print(" + ");
607 printLifetime(Lifetime);
608 }
609 } else {
610 Error = true;
611 }
612 break;
613 case 'B':
614 demangleBackref([&] { demangleType(); });
615 break;
616 default:
617 Position = Start;
618 demanglePath(IsInType::Yes);
619 break;
620 }
621 }
622
623 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
624 // <abi> = "C"
625 // | <undisambiguated-identifier>
demangleFnSig()626 void Demangler::demangleFnSig() {
627 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
628 demangleOptionalBinder();
629
630 if (consumeIf('U'))
631 print("unsafe ");
632
633 if (consumeIf('K')) {
634 print("extern \"");
635 if (consumeIf('C')) {
636 print("C");
637 } else {
638 Identifier Ident = parseIdentifier();
639 if (Ident.Punycode)
640 Error = true;
641 for (char C : Ident.Name) {
642 // When mangling ABI string, the "-" is replaced with "_".
643 if (C == '_')
644 C = '-';
645 print(C);
646 }
647 }
648 print("\" ");
649 }
650
651 print("fn(");
652 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
653 if (I > 0)
654 print(", ");
655 demangleType();
656 }
657 print(")");
658
659 if (consumeIf('u')) {
660 // Skip the unit type from the output.
661 } else {
662 print(" -> ");
663 demangleType();
664 }
665 }
666
667 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()668 void Demangler::demangleDynBounds() {
669 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
670 print("dyn ");
671 demangleOptionalBinder();
672 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
673 if (I > 0)
674 print(" + ");
675 demangleDynTrait();
676 }
677 }
678
679 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
680 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()681 void Demangler::demangleDynTrait() {
682 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
683 while (!Error && consumeIf('p')) {
684 if (!IsOpen) {
685 IsOpen = true;
686 print('<');
687 } else {
688 print(", ");
689 }
690 print(parseIdentifier().Name);
691 print(" = ");
692 demangleType();
693 }
694 if (IsOpen)
695 print(">");
696 }
697
698 // Demangles optional binder and updates the number of bound lifetimes.
699 //
700 // <binder> = "G" <base-62-number>
demangleOptionalBinder()701 void Demangler::demangleOptionalBinder() {
702 uint64_t Binder = parseOptionalBase62Number('G');
703 if (Error || Binder == 0)
704 return;
705
706 // In valid inputs each bound lifetime is referenced later. Referencing a
707 // lifetime requires at least one byte of input. Reject inputs that are too
708 // short to reference all bound lifetimes. Otherwise demangling of invalid
709 // binders could generate excessive amounts of output.
710 if (Binder >= Input.size() - BoundLifetimes) {
711 Error = true;
712 return;
713 }
714
715 print("for<");
716 for (size_t I = 0; I != Binder; ++I) {
717 BoundLifetimes += 1;
718 if (I > 0)
719 print(", ");
720 printLifetime(1);
721 }
722 print("> ");
723 }
724
725 // <const> = <basic-type> <const-data>
726 // | "p" // placeholder
727 // | <backref>
demangleConst()728 void Demangler::demangleConst() {
729 if (Error || RecursionLevel >= MaxRecursionLevel) {
730 Error = true;
731 return;
732 }
733 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
734
735 char C = consume();
736 BasicType Type;
737 if (parseBasicType(C, Type)) {
738 switch (Type) {
739 case BasicType::I8:
740 case BasicType::I16:
741 case BasicType::I32:
742 case BasicType::I64:
743 case BasicType::I128:
744 case BasicType::ISize:
745 case BasicType::U8:
746 case BasicType::U16:
747 case BasicType::U32:
748 case BasicType::U64:
749 case BasicType::U128:
750 case BasicType::USize:
751 demangleConstInt();
752 break;
753 case BasicType::Bool:
754 demangleConstBool();
755 break;
756 case BasicType::Char:
757 demangleConstChar();
758 break;
759 case BasicType::Placeholder:
760 print('_');
761 break;
762 default:
763 Error = true;
764 break;
765 }
766 } else if (C == 'B') {
767 demangleBackref([&] { demangleConst(); });
768 } else {
769 Error = true;
770 }
771 }
772
773 // <const-data> = ["n"] <hex-number>
demangleConstInt()774 void Demangler::demangleConstInt() {
775 if (consumeIf('n'))
776 print('-');
777
778 StringView HexDigits;
779 uint64_t Value = parseHexNumber(HexDigits);
780 if (HexDigits.size() <= 16) {
781 printDecimalNumber(Value);
782 } else {
783 print("0x");
784 print(HexDigits);
785 }
786 }
787
788 // <const-data> = "0_" // false
789 // | "1_" // true
demangleConstBool()790 void Demangler::demangleConstBool() {
791 StringView HexDigits;
792 parseHexNumber(HexDigits);
793 if (HexDigits == "0")
794 print("false");
795 else if (HexDigits == "1")
796 print("true");
797 else
798 Error = true;
799 }
800
801 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)802 static bool isAsciiPrintable(uint64_t CodePoint) {
803 return 0x20 <= CodePoint && CodePoint <= 0x7e;
804 }
805
806 // <const-data> = <hex-number>
demangleConstChar()807 void Demangler::demangleConstChar() {
808 StringView HexDigits;
809 uint64_t CodePoint = parseHexNumber(HexDigits);
810 if (Error || HexDigits.size() > 6) {
811 Error = true;
812 return;
813 }
814
815 print("'");
816 switch (CodePoint) {
817 case '\t':
818 print(R"(\t)");
819 break;
820 case '\r':
821 print(R"(\r)");
822 break;
823 case '\n':
824 print(R"(\n)");
825 break;
826 case '\\':
827 print(R"(\\)");
828 break;
829 case '"':
830 print(R"(")");
831 break;
832 case '\'':
833 print(R"(\')");
834 break;
835 default:
836 if (isAsciiPrintable(CodePoint)) {
837 char C = CodePoint;
838 print(C);
839 } else {
840 print(R"(\u{)");
841 print(HexDigits);
842 print('}');
843 }
844 break;
845 }
846 print('\'');
847 }
848
849 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()850 Identifier Demangler::parseIdentifier() {
851 bool Punycode = consumeIf('u');
852 uint64_t Bytes = parseDecimalNumber();
853
854 // Underscore resolves the ambiguity when identifier starts with a decimal
855 // digit or another underscore.
856 consumeIf('_');
857
858 if (Error || Bytes > Input.size() - Position) {
859 Error = true;
860 return {};
861 }
862 StringView S = Input.substr(Position, Bytes);
863 Position += Bytes;
864
865 if (!std::all_of(S.begin(), S.end(), isValid)) {
866 Error = true;
867 return {};
868 }
869
870 return {S, Punycode};
871 }
872
873 // Parses optional base 62 number. The presence of a number is determined using
874 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
875 //
876 // This function is indended for parsing disambiguators and binders which when
877 // not present have their value interpreted as 0, and otherwise as decoded
878 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
879 // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)880 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
881 if (!consumeIf(Tag))
882 return 0;
883
884 uint64_t N = parseBase62Number();
885 if (Error || !addAssign(N, 1))
886 return 0;
887
888 return N;
889 }
890
891 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
892 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
893 // "1_" encodes 2, etc.
894 //
895 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()896 uint64_t Demangler::parseBase62Number() {
897 if (consumeIf('_'))
898 return 0;
899
900 uint64_t Value = 0;
901
902 while (true) {
903 uint64_t Digit;
904 char C = consume();
905
906 if (C == '_') {
907 break;
908 } else if (isDigit(C)) {
909 Digit = C - '0';
910 } else if (isLower(C)) {
911 Digit = 10 + (C - 'a');
912 } else if (isUpper(C)) {
913 Digit = 10 + 26 + (C - 'A');
914 } else {
915 Error = true;
916 return 0;
917 }
918
919 if (!mulAssign(Value, 62))
920 return 0;
921
922 if (!addAssign(Value, Digit))
923 return 0;
924 }
925
926 if (!addAssign(Value, 1))
927 return 0;
928
929 return Value;
930 }
931
932 // Parses a decimal number that had been encoded without any leading zeros.
933 //
934 // <decimal-number> = "0"
935 // | <1-9> {<0-9>}
parseDecimalNumber()936 uint64_t Demangler::parseDecimalNumber() {
937 char C = look();
938 if (!isDigit(C)) {
939 Error = true;
940 return 0;
941 }
942
943 if (C == '0') {
944 consume();
945 return 0;
946 }
947
948 uint64_t Value = 0;
949
950 while (isDigit(look())) {
951 if (!mulAssign(Value, 10)) {
952 Error = true;
953 return 0;
954 }
955
956 uint64_t D = consume() - '0';
957 if (!addAssign(Value, D))
958 return 0;
959 }
960
961 return Value;
962 }
963
964 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
965 // value and stores hex digits in HexDigits. The return value is unspecified if
966 // HexDigits.size() > 16.
967 //
968 // <hex-number> = "0_"
969 // | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)970 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
971 size_t Start = Position;
972 uint64_t Value = 0;
973
974 if (!isHexDigit(look()))
975 Error = true;
976
977 if (consumeIf('0')) {
978 if (!consumeIf('_'))
979 Error = true;
980 } else {
981 while (!Error && !consumeIf('_')) {
982 char C = consume();
983 Value *= 16;
984 if (isDigit(C))
985 Value += C - '0';
986 else if ('a' <= C && C <= 'f')
987 Value += 10 + (C - 'a');
988 else
989 Error = true;
990 }
991 }
992
993 if (Error) {
994 HexDigits = StringView();
995 return 0;
996 }
997
998 size_t End = Position - 1;
999 assert(Start < End);
1000 HexDigits = Input.substr(Start, End - Start);
1001 return Value;
1002 }
1003
print(char C)1004 void Demangler::print(char C) {
1005 if (Error || !Print)
1006 return;
1007
1008 Output += C;
1009 }
1010
print(StringView S)1011 void Demangler::print(StringView S) {
1012 if (Error || !Print)
1013 return;
1014
1015 Output += S;
1016 }
1017
printDecimalNumber(uint64_t N)1018 void Demangler::printDecimalNumber(uint64_t N) {
1019 if (Error || !Print)
1020 return;
1021
1022 Output << N;
1023 }
1024
1025 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1026 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1027 // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1028 void Demangler::printLifetime(uint64_t Index) {
1029 if (Index == 0) {
1030 print("'_");
1031 return;
1032 }
1033
1034 if (Index - 1 >= BoundLifetimes) {
1035 Error = true;
1036 return;
1037 }
1038
1039 uint64_t Depth = BoundLifetimes - Index;
1040 print('\'');
1041 if (Depth < 26) {
1042 char C = 'a' + Depth;
1043 print(C);
1044 } else {
1045 print('z');
1046 printDecimalNumber(Depth - 26 + 1);
1047 }
1048 }
1049
decodePunycodeDigit(char C,size_t & Value)1050 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1051 if (isLower(C)) {
1052 Value = C - 'a';
1053 return true;
1054 }
1055
1056 if (isDigit(C)) {
1057 Value = 26 + (C - '0');
1058 return true;
1059 }
1060
1061 return false;
1062 }
1063
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1064 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1065 char *Buffer = Output.getBuffer();
1066 char *Start = Buffer + StartIdx;
1067 char *End = Buffer + Output.getCurrentPosition();
1068 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1069 }
1070
1071 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1072 // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1073 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1074 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1075 return false;
1076
1077 if (CodePoint <= 0x7F) {
1078 Output[0] = CodePoint;
1079 return true;
1080 }
1081
1082 if (CodePoint <= 0x7FF) {
1083 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1084 Output[1] = 0x80 | (CodePoint & 0x3F);
1085 return true;
1086 }
1087
1088 if (CodePoint <= 0xFFFF) {
1089 Output[0] = 0xE0 | (CodePoint >> 12);
1090 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1091 Output[2] = 0x80 | (CodePoint & 0x3F);
1092 return true;
1093 }
1094
1095 if (CodePoint <= 0x10FFFF) {
1096 Output[0] = 0xF0 | (CodePoint >> 18);
1097 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1098 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1099 Output[3] = 0x80 | (CodePoint & 0x3F);
1100 return true;
1101 }
1102
1103 return false;
1104 }
1105
1106 // Decodes string encoded using punycode and appends results to Output.
1107 // Returns true if decoding was successful.
decodePunycode(StringView Input,OutputBuffer & Output)1108 static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1109 size_t OutputSize = Output.getCurrentPosition();
1110 size_t InputIdx = 0;
1111
1112 // Rust uses an underscore as a delimiter.
1113 size_t DelimiterPos = StringView::npos;
1114 for (size_t I = 0; I != Input.size(); ++I)
1115 if (Input[I] == '_')
1116 DelimiterPos = I;
1117
1118 if (DelimiterPos != StringView::npos) {
1119 // Copy basic code points before the last delimiter to the output.
1120 for (; InputIdx != DelimiterPos; ++InputIdx) {
1121 char C = Input[InputIdx];
1122 if (!isValid(C))
1123 return false;
1124 // Code points are padded with zeros while decoding is in progress.
1125 char UTF8[4] = {C};
1126 Output += StringView(UTF8, UTF8 + 4);
1127 }
1128 // Skip over the delimiter.
1129 ++InputIdx;
1130 }
1131
1132 size_t Base = 36;
1133 size_t Skew = 38;
1134 size_t Bias = 72;
1135 size_t N = 0x80;
1136 size_t TMin = 1;
1137 size_t TMax = 26;
1138 size_t Damp = 700;
1139
1140 auto Adapt = [&](size_t Delta, size_t NumPoints) {
1141 Delta /= Damp;
1142 Delta += Delta / NumPoints;
1143 Damp = 2;
1144
1145 size_t K = 0;
1146 while (Delta > (Base - TMin) * TMax / 2) {
1147 Delta /= Base - TMin;
1148 K += Base;
1149 }
1150 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1151 };
1152
1153 // Main decoding loop.
1154 for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1155 size_t OldI = I;
1156 size_t W = 1;
1157 size_t Max = std::numeric_limits<size_t>::max();
1158 for (size_t K = Base; true; K += Base) {
1159 if (InputIdx == Input.size())
1160 return false;
1161 char C = Input[InputIdx++];
1162 size_t Digit = 0;
1163 if (!decodePunycodeDigit(C, Digit))
1164 return false;
1165
1166 if (Digit > (Max - I) / W)
1167 return false;
1168 I += Digit * W;
1169
1170 size_t T;
1171 if (K <= Bias)
1172 T = TMin;
1173 else if (K >= Bias + TMax)
1174 T = TMax;
1175 else
1176 T = K - Bias;
1177
1178 if (Digit < T)
1179 break;
1180
1181 if (W > Max / (Base - T))
1182 return false;
1183 W *= (Base - T);
1184 }
1185 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1186 Bias = Adapt(I - OldI, NumPoints);
1187
1188 if (I / NumPoints > Max - N)
1189 return false;
1190 N += I / NumPoints;
1191 I = I % NumPoints;
1192
1193 // Insert N at position I in the output.
1194 char UTF8[4] = {};
1195 if (!encodeUTF8(N, UTF8))
1196 return false;
1197 Output.insert(OutputSize + I * 4, UTF8, 4);
1198 }
1199
1200 removeNullBytes(Output, OutputSize);
1201 return true;
1202 }
1203
printIdentifier(Identifier Ident)1204 void Demangler::printIdentifier(Identifier Ident) {
1205 if (Error || !Print)
1206 return;
1207
1208 if (Ident.Punycode) {
1209 if (!decodePunycode(Ident.Name, Output))
1210 Error = true;
1211 } else {
1212 print(Ident.Name);
1213 }
1214 }
1215
look() const1216 char Demangler::look() const {
1217 if (Error || Position >= Input.size())
1218 return 0;
1219
1220 return Input[Position];
1221 }
1222
consume()1223 char Demangler::consume() {
1224 if (Error || Position >= Input.size()) {
1225 Error = true;
1226 return 0;
1227 }
1228
1229 return Input[Position++];
1230 }
1231
consumeIf(char Prefix)1232 bool Demangler::consumeIf(char Prefix) {
1233 if (Error || Position >= Input.size() || Input[Position] != Prefix)
1234 return false;
1235
1236 Position += 1;
1237 return true;
1238 }
1239
1240 /// Computes A + B. When computation wraps around sets the error and returns
1241 /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1242 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1243 if (A > std::numeric_limits<uint64_t>::max() - B) {
1244 Error = true;
1245 return false;
1246 }
1247
1248 A += B;
1249 return true;
1250 }
1251
1252 /// Computes A * B. When computation wraps around sets the error and returns
1253 /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1254 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1255 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1256 Error = true;
1257 return false;
1258 }
1259
1260 A *= B;
1261 return true;
1262 }
1263