1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringView.h"
16 #include "llvm/Demangle/Utility.h"
17
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23
24 using namespace llvm;
25
26 using llvm::itanium_demangle::OutputStream;
27 using llvm::itanium_demangle::StringView;
28 using llvm::itanium_demangle::SwapAndRestore;
29
30 namespace {
31
32 struct Identifier {
33 StringView Name;
34 bool Punycode;
35
empty__anonadfcf2c00111::Identifier36 bool empty() const { return Name.empty(); }
37 };
38
39 enum class BasicType {
40 Bool,
41 Char,
42 I8,
43 I16,
44 I32,
45 I64,
46 I128,
47 ISize,
48 U8,
49 U16,
50 U32,
51 U64,
52 U128,
53 USize,
54 F32,
55 F64,
56 Str,
57 Placeholder,
58 Unit,
59 Variadic,
60 Never,
61 };
62
63 enum class IsInType {
64 No,
65 Yes,
66 };
67
68 enum class LeaveGenericsOpen {
69 No,
70 Yes,
71 };
72
73 class Demangler {
74 // Maximum recursion level. Used to avoid stack overflow.
75 size_t MaxRecursionLevel;
76 // Current recursion level.
77 size_t RecursionLevel;
78 size_t BoundLifetimes;
79 // Input string that is being demangled with "_R" prefix removed.
80 StringView Input;
81 // Position in the input string.
82 size_t Position;
83 // When true, print methods append the output to the stream.
84 // When false, the output is suppressed.
85 bool Print;
86 // True if an error occurred.
87 bool Error;
88
89 public:
90 // Demangled output.
91 OutputStream Output;
92
93 Demangler(size_t MaxRecursionLevel = 500);
94
95 bool demangle(StringView MangledName);
96
97 private:
98 bool demanglePath(IsInType Type,
99 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100 void demangleImplPath(IsInType InType);
101 void demangleGenericArg();
102 void demangleType();
103 void demangleFnSig();
104 void demangleDynBounds();
105 void demangleDynTrait();
106 void demangleOptionalBinder();
107 void demangleConst();
108 void demangleConstInt();
109 void demangleConstBool();
110 void demangleConstChar();
111
demangleBackref(Callable Demangler)112 template <typename Callable> void demangleBackref(Callable Demangler) {
113 uint64_t Backref = parseBase62Number();
114 if (Error || Backref >= Position) {
115 Error = true;
116 return;
117 }
118
119 if (!Print)
120 return;
121
122 SwapAndRestore<size_t> SavePosition(Position, Position);
123 Position = Backref;
124 Demangler();
125 }
126
127 Identifier parseIdentifier();
128 uint64_t parseOptionalBase62Number(char Tag);
129 uint64_t parseBase62Number();
130 uint64_t parseDecimalNumber();
131 uint64_t parseHexNumber(StringView &HexDigits);
132
133 void print(char C);
134 void print(StringView S);
135 void printDecimalNumber(uint64_t N);
136 void printBasicType(BasicType);
137 void printLifetime(uint64_t Index);
138 void printIdentifier(Identifier Ident);
139
140 char look() const;
141 char consume();
142 bool consumeIf(char Prefix);
143
144 bool addAssign(uint64_t &A, uint64_t B);
145 bool mulAssign(uint64_t &A, uint64_t B);
146 };
147
148 } // namespace
149
rustDemangle(const char * MangledName,char * Buf,size_t * N,int * Status)150 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
151 int *Status) {
152 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
153 if (Status != nullptr)
154 *Status = demangle_invalid_args;
155 return nullptr;
156 }
157
158 // Return early if mangled name doesn't look like a Rust symbol.
159 StringView Mangled(MangledName);
160 if (!Mangled.startsWith("_R")) {
161 if (Status != nullptr)
162 *Status = demangle_invalid_mangled_name;
163 return nullptr;
164 }
165
166 Demangler D;
167 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
168 if (Status != nullptr)
169 *Status = demangle_memory_alloc_failure;
170 return nullptr;
171 }
172
173 if (!D.demangle(Mangled)) {
174 if (Status != nullptr)
175 *Status = demangle_invalid_mangled_name;
176 std::free(D.Output.getBuffer());
177 return nullptr;
178 }
179
180 D.Output += '\0';
181 char *Demangled = D.Output.getBuffer();
182 size_t DemangledLen = D.Output.getCurrentPosition();
183
184 if (Buf != nullptr) {
185 if (DemangledLen <= *N) {
186 std::memcpy(Buf, Demangled, DemangledLen);
187 std::free(Demangled);
188 Demangled = Buf;
189 } else {
190 std::free(Buf);
191 }
192 }
193
194 if (N != nullptr)
195 *N = DemangledLen;
196
197 if (Status != nullptr)
198 *Status = demangle_success;
199
200 return Demangled;
201 }
202
Demangler(size_t MaxRecursionLevel)203 Demangler::Demangler(size_t MaxRecursionLevel)
204 : MaxRecursionLevel(MaxRecursionLevel) {}
205
isDigit(const char C)206 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
207
isHexDigit(const char C)208 static inline bool isHexDigit(const char C) {
209 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
210 }
211
isLower(const char C)212 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
213
isUpper(const char C)214 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
215
216 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)217 static inline bool isValid(const char C) {
218 return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
219 }
220
221 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
222 // otherwise. The demangled symbol is stored in Output field. It is
223 // responsibility of the caller to free the memory behind the output stream.
224 //
225 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)226 bool Demangler::demangle(StringView Mangled) {
227 Position = 0;
228 Error = false;
229 Print = true;
230 RecursionLevel = 0;
231 BoundLifetimes = 0;
232
233 if (!Mangled.consumeFront("_R")) {
234 Error = true;
235 return false;
236 }
237 size_t Dot = Mangled.find('.');
238 Input = Mangled.substr(0, Dot);
239 StringView Suffix = Mangled.dropFront(Dot);
240
241 demanglePath(IsInType::No);
242
243 if (Position != Input.size()) {
244 SwapAndRestore<bool> SavePrint(Print, false);
245 demanglePath(IsInType::No);
246 }
247
248 if (Position != Input.size())
249 Error = true;
250
251 if (!Suffix.empty()) {
252 print(" (");
253 print(Suffix);
254 print(")");
255 }
256
257 return !Error;
258 }
259
260 // Demangles a path. InType indicates whether a path is inside a type. When
261 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
262 // output. Return value indicates whether generics arguments have been left
263 // open.
264 //
265 // <path> = "C" <identifier> // crate root
266 // | "M" <impl-path> <type> // <T> (inherent impl)
267 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
268 // | "Y" <type> <path> // <T as Trait> (trait definition)
269 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
270 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
271 // | <backref>
272 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
273 // <ns> = "C" // closure
274 // | "S" // shim
275 // | <A-Z> // other special namespaces
276 // | <a-z> // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)277 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
278 if (Error || RecursionLevel >= MaxRecursionLevel) {
279 Error = true;
280 return false;
281 }
282 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
283
284 switch (consume()) {
285 case 'C': {
286 parseOptionalBase62Number('s');
287 printIdentifier(parseIdentifier());
288 break;
289 }
290 case 'M': {
291 demangleImplPath(InType);
292 print("<");
293 demangleType();
294 print(">");
295 break;
296 }
297 case 'X': {
298 demangleImplPath(InType);
299 print("<");
300 demangleType();
301 print(" as ");
302 demanglePath(IsInType::Yes);
303 print(">");
304 break;
305 }
306 case 'Y': {
307 print("<");
308 demangleType();
309 print(" as ");
310 demanglePath(IsInType::Yes);
311 print(">");
312 break;
313 }
314 case 'N': {
315 char NS = consume();
316 if (!isLower(NS) && !isUpper(NS)) {
317 Error = true;
318 break;
319 }
320 demanglePath(InType);
321
322 uint64_t Disambiguator = parseOptionalBase62Number('s');
323 Identifier Ident = parseIdentifier();
324
325 if (isUpper(NS)) {
326 // Special namespaces
327 print("::{");
328 if (NS == 'C')
329 print("closure");
330 else if (NS == 'S')
331 print("shim");
332 else
333 print(NS);
334 if (!Ident.empty()) {
335 print(":");
336 printIdentifier(Ident);
337 }
338 print('#');
339 printDecimalNumber(Disambiguator);
340 print('}');
341 } else {
342 // Implementation internal namespaces.
343 if (!Ident.empty()) {
344 print("::");
345 printIdentifier(Ident);
346 }
347 }
348 break;
349 }
350 case 'I': {
351 demanglePath(InType);
352 // Omit "::" when in a type, where it is optional.
353 if (InType == IsInType::No)
354 print("::");
355 print("<");
356 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
357 if (I > 0)
358 print(", ");
359 demangleGenericArg();
360 }
361 if (LeaveOpen == LeaveGenericsOpen::Yes)
362 return true;
363 else
364 print(">");
365 break;
366 }
367 case 'B': {
368 bool IsOpen = false;
369 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
370 return IsOpen;
371 }
372 default:
373 Error = true;
374 break;
375 }
376
377 return false;
378 }
379
380 // <impl-path> = [<disambiguator>] <path>
381 // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)382 void Demangler::demangleImplPath(IsInType InType) {
383 SwapAndRestore<bool> SavePrint(Print, false);
384 parseOptionalBase62Number('s');
385 demanglePath(InType);
386 }
387
388 // <generic-arg> = <lifetime>
389 // | <type>
390 // | "K" <const>
391 // <lifetime> = "L" <base-62-number>
demangleGenericArg()392 void Demangler::demangleGenericArg() {
393 if (consumeIf('L'))
394 printLifetime(parseBase62Number());
395 else if (consumeIf('K'))
396 demangleConst();
397 else
398 demangleType();
399 }
400
401 // <basic-type> = "a" // i8
402 // | "b" // bool
403 // | "c" // char
404 // | "d" // f64
405 // | "e" // str
406 // | "f" // f32
407 // | "h" // u8
408 // | "i" // isize
409 // | "j" // usize
410 // | "l" // i32
411 // | "m" // u32
412 // | "n" // i128
413 // | "o" // u128
414 // | "s" // i16
415 // | "t" // u16
416 // | "u" // ()
417 // | "v" // ...
418 // | "x" // i64
419 // | "y" // u64
420 // | "z" // !
421 // | "p" // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)422 static bool parseBasicType(char C, BasicType &Type) {
423 switch (C) {
424 case 'a':
425 Type = BasicType::I8;
426 return true;
427 case 'b':
428 Type = BasicType::Bool;
429 return true;
430 case 'c':
431 Type = BasicType::Char;
432 return true;
433 case 'd':
434 Type = BasicType::F64;
435 return true;
436 case 'e':
437 Type = BasicType::Str;
438 return true;
439 case 'f':
440 Type = BasicType::F32;
441 return true;
442 case 'h':
443 Type = BasicType::U8;
444 return true;
445 case 'i':
446 Type = BasicType::ISize;
447 return true;
448 case 'j':
449 Type = BasicType::USize;
450 return true;
451 case 'l':
452 Type = BasicType::I32;
453 return true;
454 case 'm':
455 Type = BasicType::U32;
456 return true;
457 case 'n':
458 Type = BasicType::I128;
459 return true;
460 case 'o':
461 Type = BasicType::U128;
462 return true;
463 case 'p':
464 Type = BasicType::Placeholder;
465 return true;
466 case 's':
467 Type = BasicType::I16;
468 return true;
469 case 't':
470 Type = BasicType::U16;
471 return true;
472 case 'u':
473 Type = BasicType::Unit;
474 return true;
475 case 'v':
476 Type = BasicType::Variadic;
477 return true;
478 case 'x':
479 Type = BasicType::I64;
480 return true;
481 case 'y':
482 Type = BasicType::U64;
483 return true;
484 case 'z':
485 Type = BasicType::Never;
486 return true;
487 default:
488 return false;
489 }
490 }
491
printBasicType(BasicType Type)492 void Demangler::printBasicType(BasicType Type) {
493 switch (Type) {
494 case BasicType::Bool:
495 print("bool");
496 break;
497 case BasicType::Char:
498 print("char");
499 break;
500 case BasicType::I8:
501 print("i8");
502 break;
503 case BasicType::I16:
504 print("i16");
505 break;
506 case BasicType::I32:
507 print("i32");
508 break;
509 case BasicType::I64:
510 print("i64");
511 break;
512 case BasicType::I128:
513 print("i128");
514 break;
515 case BasicType::ISize:
516 print("isize");
517 break;
518 case BasicType::U8:
519 print("u8");
520 break;
521 case BasicType::U16:
522 print("u16");
523 break;
524 case BasicType::U32:
525 print("u32");
526 break;
527 case BasicType::U64:
528 print("u64");
529 break;
530 case BasicType::U128:
531 print("u128");
532 break;
533 case BasicType::USize:
534 print("usize");
535 break;
536 case BasicType::F32:
537 print("f32");
538 break;
539 case BasicType::F64:
540 print("f64");
541 break;
542 case BasicType::Str:
543 print("str");
544 break;
545 case BasicType::Placeholder:
546 print("_");
547 break;
548 case BasicType::Unit:
549 print("()");
550 break;
551 case BasicType::Variadic:
552 print("...");
553 break;
554 case BasicType::Never:
555 print("!");
556 break;
557 }
558 }
559
560 // <type> = | <basic-type>
561 // | <path> // named type
562 // | "A" <type> <const> // [T; N]
563 // | "S" <type> // [T]
564 // | "T" {<type>} "E" // (T1, T2, T3, ...)
565 // | "R" [<lifetime>] <type> // &T
566 // | "Q" [<lifetime>] <type> // &mut T
567 // | "P" <type> // *const T
568 // | "O" <type> // *mut T
569 // | "F" <fn-sig> // fn(...) -> ...
570 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
571 // | <backref> // backref
demangleType()572 void Demangler::demangleType() {
573 if (Error || RecursionLevel >= MaxRecursionLevel) {
574 Error = true;
575 return;
576 }
577 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
578
579 size_t Start = Position;
580 char C = consume();
581 BasicType Type;
582 if (parseBasicType(C, Type))
583 return printBasicType(Type);
584
585 switch (C) {
586 case 'A':
587 print("[");
588 demangleType();
589 print("; ");
590 demangleConst();
591 print("]");
592 break;
593 case 'S':
594 print("[");
595 demangleType();
596 print("]");
597 break;
598 case 'T': {
599 print("(");
600 size_t I = 0;
601 for (; !Error && !consumeIf('E'); ++I) {
602 if (I > 0)
603 print(", ");
604 demangleType();
605 }
606 if (I == 1)
607 print(",");
608 print(")");
609 break;
610 }
611 case 'R':
612 case 'Q':
613 print('&');
614 if (consumeIf('L')) {
615 if (auto Lifetime = parseBase62Number()) {
616 printLifetime(Lifetime);
617 print(' ');
618 }
619 }
620 if (C == 'Q')
621 print("mut ");
622 demangleType();
623 break;
624 case 'P':
625 print("*const ");
626 demangleType();
627 break;
628 case 'O':
629 print("*mut ");
630 demangleType();
631 break;
632 case 'F':
633 demangleFnSig();
634 break;
635 case 'D':
636 demangleDynBounds();
637 if (consumeIf('L')) {
638 if (auto Lifetime = parseBase62Number()) {
639 print(" + ");
640 printLifetime(Lifetime);
641 }
642 } else {
643 Error = true;
644 }
645 break;
646 case 'B':
647 demangleBackref([&] { demangleType(); });
648 break;
649 default:
650 Position = Start;
651 demanglePath(IsInType::Yes);
652 break;
653 }
654 }
655
656 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
657 // <abi> = "C"
658 // | <undisambiguated-identifier>
demangleFnSig()659 void Demangler::demangleFnSig() {
660 SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
661 demangleOptionalBinder();
662
663 if (consumeIf('U'))
664 print("unsafe ");
665
666 if (consumeIf('K')) {
667 print("extern \"");
668 if (consumeIf('C')) {
669 print("C");
670 } else {
671 Identifier Ident = parseIdentifier();
672 if (Ident.Punycode)
673 Error = true;
674 for (char C : Ident.Name) {
675 // When mangling ABI string, the "-" is replaced with "_".
676 if (C == '_')
677 C = '-';
678 print(C);
679 }
680 }
681 print("\" ");
682 }
683
684 print("fn(");
685 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
686 if (I > 0)
687 print(", ");
688 demangleType();
689 }
690 print(")");
691
692 if (consumeIf('u')) {
693 // Skip the unit type from the output.
694 } else {
695 print(" -> ");
696 demangleType();
697 }
698 }
699
700 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()701 void Demangler::demangleDynBounds() {
702 SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
703 print("dyn ");
704 demangleOptionalBinder();
705 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
706 if (I > 0)
707 print(" + ");
708 demangleDynTrait();
709 }
710 }
711
712 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
713 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()714 void Demangler::demangleDynTrait() {
715 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
716 while (!Error && consumeIf('p')) {
717 if (!IsOpen) {
718 IsOpen = true;
719 print('<');
720 } else {
721 print(", ");
722 }
723 print(parseIdentifier().Name);
724 print(" = ");
725 demangleType();
726 }
727 if (IsOpen)
728 print(">");
729 }
730
731 // Demangles optional binder and updates the number of bound lifetimes.
732 //
733 // <binder> = "G" <base-62-number>
demangleOptionalBinder()734 void Demangler::demangleOptionalBinder() {
735 uint64_t Binder = parseOptionalBase62Number('G');
736 if (Error || Binder == 0)
737 return;
738
739 // In valid inputs each bound lifetime is referenced later. Referencing a
740 // lifetime requires at least one byte of input. Reject inputs that are too
741 // short to reference all bound lifetimes. Otherwise demangling of invalid
742 // binders could generate excessive amounts of output.
743 if (Binder >= Input.size() - BoundLifetimes) {
744 Error = true;
745 return;
746 }
747
748 print("for<");
749 for (size_t I = 0; I != Binder; ++I) {
750 BoundLifetimes += 1;
751 if (I > 0)
752 print(", ");
753 printLifetime(1);
754 }
755 print("> ");
756 }
757
758 // <const> = <basic-type> <const-data>
759 // | "p" // placeholder
760 // | <backref>
demangleConst()761 void Demangler::demangleConst() {
762 if (Error || RecursionLevel >= MaxRecursionLevel) {
763 Error = true;
764 return;
765 }
766 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
767
768 char C = consume();
769 BasicType Type;
770 if (parseBasicType(C, Type)) {
771 switch (Type) {
772 case BasicType::I8:
773 case BasicType::I16:
774 case BasicType::I32:
775 case BasicType::I64:
776 case BasicType::I128:
777 case BasicType::ISize:
778 case BasicType::U8:
779 case BasicType::U16:
780 case BasicType::U32:
781 case BasicType::U64:
782 case BasicType::U128:
783 case BasicType::USize:
784 demangleConstInt();
785 break;
786 case BasicType::Bool:
787 demangleConstBool();
788 break;
789 case BasicType::Char:
790 demangleConstChar();
791 break;
792 case BasicType::Placeholder:
793 print('_');
794 break;
795 default:
796 Error = true;
797 break;
798 }
799 } else if (C == 'B') {
800 demangleBackref([&] { demangleConst(); });
801 } else {
802 Error = true;
803 }
804 }
805
806 // <const-data> = ["n"] <hex-number>
demangleConstInt()807 void Demangler::demangleConstInt() {
808 if (consumeIf('n'))
809 print('-');
810
811 StringView HexDigits;
812 uint64_t Value = parseHexNumber(HexDigits);
813 if (HexDigits.size() <= 16) {
814 printDecimalNumber(Value);
815 } else {
816 print("0x");
817 print(HexDigits);
818 }
819 }
820
821 // <const-data> = "0_" // false
822 // | "1_" // true
demangleConstBool()823 void Demangler::demangleConstBool() {
824 StringView HexDigits;
825 parseHexNumber(HexDigits);
826 if (HexDigits == "0")
827 print("false");
828 else if (HexDigits == "1")
829 print("true");
830 else
831 Error = true;
832 }
833
834 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)835 static bool isAsciiPrintable(uint64_t CodePoint) {
836 return 0x20 <= CodePoint && CodePoint <= 0x7e;
837 }
838
839 // <const-data> = <hex-number>
demangleConstChar()840 void Demangler::demangleConstChar() {
841 StringView HexDigits;
842 uint64_t CodePoint = parseHexNumber(HexDigits);
843 if (Error || HexDigits.size() > 6) {
844 Error = true;
845 return;
846 }
847
848 print("'");
849 switch (CodePoint) {
850 case '\t':
851 print(R"(\t)");
852 break;
853 case '\r':
854 print(R"(\r)");
855 break;
856 case '\n':
857 print(R"(\n)");
858 break;
859 case '\\':
860 print(R"(\\)");
861 break;
862 case '"':
863 print(R"(")");
864 break;
865 case '\'':
866 print(R"(\')");
867 break;
868 default:
869 if (isAsciiPrintable(CodePoint)) {
870 char C = CodePoint;
871 print(C);
872 } else {
873 print(R"(\u{)");
874 print(HexDigits);
875 print('}');
876 }
877 break;
878 }
879 print('\'');
880 }
881
882 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()883 Identifier Demangler::parseIdentifier() {
884 bool Punycode = consumeIf('u');
885 uint64_t Bytes = parseDecimalNumber();
886
887 // Underscore resolves the ambiguity when identifier starts with a decimal
888 // digit or another underscore.
889 consumeIf('_');
890
891 if (Error || Bytes > Input.size() - Position) {
892 Error = true;
893 return {};
894 }
895 StringView S = Input.substr(Position, Bytes);
896 Position += Bytes;
897
898 if (!std::all_of(S.begin(), S.end(), isValid)) {
899 Error = true;
900 return {};
901 }
902
903 return {S, Punycode};
904 }
905
906 // Parses optional base 62 number. The presence of a number is determined using
907 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
908 //
909 // This function is indended for parsing disambiguators and binders which when
910 // not present have their value interpreted as 0, and otherwise as decoded
911 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
912 // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)913 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
914 if (!consumeIf(Tag))
915 return 0;
916
917 uint64_t N = parseBase62Number();
918 if (Error || !addAssign(N, 1))
919 return 0;
920
921 return N;
922 }
923
924 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
925 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
926 // "1_" encodes 2, etc.
927 //
928 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()929 uint64_t Demangler::parseBase62Number() {
930 if (consumeIf('_'))
931 return 0;
932
933 uint64_t Value = 0;
934
935 while (true) {
936 uint64_t Digit;
937 char C = consume();
938
939 if (C == '_') {
940 break;
941 } else if (isDigit(C)) {
942 Digit = C - '0';
943 } else if (isLower(C)) {
944 Digit = 10 + (C - 'a');
945 } else if (isUpper(C)) {
946 Digit = 10 + 26 + (C - 'A');
947 } else {
948 Error = true;
949 return 0;
950 }
951
952 if (!mulAssign(Value, 62))
953 return 0;
954
955 if (!addAssign(Value, Digit))
956 return 0;
957 }
958
959 if (!addAssign(Value, 1))
960 return 0;
961
962 return Value;
963 }
964
965 // Parses a decimal number that had been encoded without any leading zeros.
966 //
967 // <decimal-number> = "0"
968 // | <1-9> {<0-9>}
parseDecimalNumber()969 uint64_t Demangler::parseDecimalNumber() {
970 char C = look();
971 if (!isDigit(C)) {
972 Error = true;
973 return 0;
974 }
975
976 if (C == '0') {
977 consume();
978 return 0;
979 }
980
981 uint64_t Value = 0;
982
983 while (isDigit(look())) {
984 if (!mulAssign(Value, 10)) {
985 Error = true;
986 return 0;
987 }
988
989 uint64_t D = consume() - '0';
990 if (!addAssign(Value, D))
991 return 0;
992 }
993
994 return Value;
995 }
996
997 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
998 // value and stores hex digits in HexDigits. The return value is unspecified if
999 // HexDigits.size() > 16.
1000 //
1001 // <hex-number> = "0_"
1002 // | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)1003 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
1004 size_t Start = Position;
1005 uint64_t Value = 0;
1006
1007 if (!isHexDigit(look()))
1008 Error = true;
1009
1010 if (consumeIf('0')) {
1011 if (!consumeIf('_'))
1012 Error = true;
1013 } else {
1014 while (!Error && !consumeIf('_')) {
1015 char C = consume();
1016 Value *= 16;
1017 if (isDigit(C))
1018 Value += C - '0';
1019 else if ('a' <= C && C <= 'f')
1020 Value += 10 + (C - 'a');
1021 else
1022 Error = true;
1023 }
1024 }
1025
1026 if (Error) {
1027 HexDigits = StringView();
1028 return 0;
1029 }
1030
1031 size_t End = Position - 1;
1032 assert(Start < End);
1033 HexDigits = Input.substr(Start, End - Start);
1034 return Value;
1035 }
1036
print(char C)1037 void Demangler::print(char C) {
1038 if (Error || !Print)
1039 return;
1040
1041 Output += C;
1042 }
1043
print(StringView S)1044 void Demangler::print(StringView S) {
1045 if (Error || !Print)
1046 return;
1047
1048 Output += S;
1049 }
1050
printDecimalNumber(uint64_t N)1051 void Demangler::printDecimalNumber(uint64_t N) {
1052 if (Error || !Print)
1053 return;
1054
1055 Output << N;
1056 }
1057
1058 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1059 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1060 // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1061 void Demangler::printLifetime(uint64_t Index) {
1062 if (Index == 0) {
1063 print("'_");
1064 return;
1065 }
1066
1067 if (Index - 1 >= BoundLifetimes) {
1068 Error = true;
1069 return;
1070 }
1071
1072 uint64_t Depth = BoundLifetimes - Index;
1073 print('\'');
1074 if (Depth < 26) {
1075 char C = 'a' + Depth;
1076 print(C);
1077 } else {
1078 print('z');
1079 printDecimalNumber(Depth - 26 + 1);
1080 }
1081 }
1082
decodePunycodeDigit(char C,size_t & Value)1083 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1084 if (isLower(C)) {
1085 Value = C - 'a';
1086 return true;
1087 }
1088
1089 if (isDigit(C)) {
1090 Value = 26 + (C - '0');
1091 return true;
1092 }
1093
1094 return false;
1095 }
1096
removeNullBytes(OutputStream & Output,size_t StartIdx)1097 static void removeNullBytes(OutputStream &Output, size_t StartIdx) {
1098 char *Buffer = Output.getBuffer();
1099 char *Start = Buffer + StartIdx;
1100 char *End = Buffer + Output.getCurrentPosition();
1101 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1102 }
1103
1104 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1105 // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1106 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1107 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1108 return false;
1109
1110 if (CodePoint <= 0x7F) {
1111 Output[0] = CodePoint;
1112 return true;
1113 }
1114
1115 if (CodePoint <= 0x7FF) {
1116 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1117 Output[1] = 0x80 | (CodePoint & 0x3F);
1118 return true;
1119 }
1120
1121 if (CodePoint <= 0xFFFF) {
1122 Output[0] = 0xE0 | (CodePoint >> 12);
1123 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1124 Output[2] = 0x80 | (CodePoint & 0x3F);
1125 return true;
1126 }
1127
1128 if (CodePoint <= 0x10FFFF) {
1129 Output[0] = 0xF0 | (CodePoint >> 18);
1130 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1131 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1132 Output[3] = 0x80 | (CodePoint & 0x3F);
1133 return true;
1134 }
1135
1136 return false;
1137 }
1138
1139 // Decodes string encoded using punycode and appends results to Output.
1140 // Returns true if decoding was successful.
decodePunycode(StringView Input,OutputStream & Output)1141 static bool decodePunycode(StringView Input, OutputStream &Output) {
1142 size_t OutputSize = Output.getCurrentPosition();
1143 size_t InputIdx = 0;
1144
1145 // Rust uses an underscore as a delimiter.
1146 size_t DelimiterPos = StringView::npos;
1147 for (size_t I = 0; I != Input.size(); ++I)
1148 if (Input[I] == '_')
1149 DelimiterPos = I;
1150
1151 if (DelimiterPos != StringView::npos) {
1152 // Copy basic code points before the last delimiter to the output.
1153 for (; InputIdx != DelimiterPos; ++InputIdx) {
1154 char C = Input[InputIdx];
1155 if (!isValid(C))
1156 return false;
1157 // Code points are padded with zeros while decoding is in progress.
1158 char UTF8[4] = {C};
1159 Output += StringView(UTF8, UTF8 + 4);
1160 }
1161 // Skip over the delimiter.
1162 ++InputIdx;
1163 }
1164
1165 size_t Base = 36;
1166 size_t Skew = 38;
1167 size_t Bias = 72;
1168 size_t N = 0x80;
1169 size_t TMin = 1;
1170 size_t TMax = 26;
1171 size_t Damp = 700;
1172
1173 auto Adapt = [&](size_t Delta, size_t NumPoints) {
1174 Delta /= Damp;
1175 Delta += Delta / NumPoints;
1176 Damp = 2;
1177
1178 size_t K = 0;
1179 while (Delta > (Base - TMin) * TMax / 2) {
1180 Delta /= Base - TMin;
1181 K += Base;
1182 }
1183 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1184 };
1185
1186 // Main decoding loop.
1187 for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1188 size_t OldI = I;
1189 size_t W = 1;
1190 size_t Max = std::numeric_limits<size_t>::max();
1191 for (size_t K = Base; true; K += Base) {
1192 if (InputIdx == Input.size())
1193 return false;
1194 char C = Input[InputIdx++];
1195 size_t Digit = 0;
1196 if (!decodePunycodeDigit(C, Digit))
1197 return false;
1198
1199 if (Digit > (Max - I) / W)
1200 return false;
1201 I += Digit * W;
1202
1203 size_t T;
1204 if (K <= Bias)
1205 T = TMin;
1206 else if (K >= Bias + TMax)
1207 T = TMax;
1208 else
1209 T = K - Bias;
1210
1211 if (Digit < T)
1212 break;
1213
1214 if (W > Max / (Base - T))
1215 return false;
1216 W *= (Base - T);
1217 }
1218 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1219 Bias = Adapt(I - OldI, NumPoints);
1220
1221 if (I / NumPoints > Max - N)
1222 return false;
1223 N += I / NumPoints;
1224 I = I % NumPoints;
1225
1226 // Insert N at position I in the output.
1227 char UTF8[4] = {};
1228 if (!encodeUTF8(N, UTF8))
1229 return false;
1230 Output.insert(OutputSize + I * 4, UTF8, 4);
1231 }
1232
1233 removeNullBytes(Output, OutputSize);
1234 return true;
1235 }
1236
printIdentifier(Identifier Ident)1237 void Demangler::printIdentifier(Identifier Ident) {
1238 if (Error || !Print)
1239 return;
1240
1241 if (Ident.Punycode) {
1242 if (!decodePunycode(Ident.Name, Output))
1243 Error = true;
1244 } else {
1245 print(Ident.Name);
1246 }
1247 }
1248
look() const1249 char Demangler::look() const {
1250 if (Error || Position >= Input.size())
1251 return 0;
1252
1253 return Input[Position];
1254 }
1255
consume()1256 char Demangler::consume() {
1257 if (Error || Position >= Input.size()) {
1258 Error = true;
1259 return 0;
1260 }
1261
1262 return Input[Position++];
1263 }
1264
consumeIf(char Prefix)1265 bool Demangler::consumeIf(char Prefix) {
1266 if (Error || Position >= Input.size() || Input[Position] != Prefix)
1267 return false;
1268
1269 Position += 1;
1270 return true;
1271 }
1272
1273 /// Computes A + B. When computation wraps around sets the error and returns
1274 /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1275 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1276 if (A > std::numeric_limits<uint64_t>::max() - B) {
1277 Error = true;
1278 return false;
1279 }
1280
1281 A += B;
1282 return true;
1283 }
1284
1285 /// Computes A * B. When computation wraps around sets the error and returns
1286 /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1287 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1288 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1289 Error = true;
1290 return false;
1291 }
1292
1293 A *= B;
1294 return true;
1295 }
1296