1 /* Demangler for the Rust programming language
2 Copyright (C) 2016-2021 Free Software Foundation, Inc.
3 Written by David Tolnay (dtolnay@gmail.com).
4 Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
5
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file. (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
20
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 Library General Public License for more details.
25
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>. */
29
30
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include "safe-ctype.h"
36
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #else
46 extern size_t strlen(const char *s);
47 extern int strncmp(const char *s1, const char *s2, size_t n);
48 extern void *memset(void *s, int c, size_t n);
49 #endif
50
51 #include <demangle.h>
52 #include "libiberty.h"
53
54 struct rust_demangler
55 {
56 const char *sym;
57 size_t sym_len;
58
59 void *callback_opaque;
60 demangle_callbackref callback;
61
62 /* Position of the next character to read from the symbol. */
63 size_t next;
64
65 /* Non-zero if any error occurred. */
66 int errored;
67
68 /* Non-zero if nothing should be printed. */
69 int skipping_printing;
70
71 /* Non-zero if printing should be verbose (e.g. include hashes). */
72 int verbose;
73
74 /* Rust mangling version, with legacy mangling being -1. */
75 int version;
76
77 uint64_t bound_lifetime_depth;
78 };
79
80 /* Parsing functions. */
81
82 static char
peek(const struct rust_demangler * rdm)83 peek (const struct rust_demangler *rdm)
84 {
85 if (rdm->next < rdm->sym_len)
86 return rdm->sym[rdm->next];
87 return 0;
88 }
89
90 static int
eat(struct rust_demangler * rdm,char c)91 eat (struct rust_demangler *rdm, char c)
92 {
93 if (peek (rdm) == c)
94 {
95 rdm->next++;
96 return 1;
97 }
98 else
99 return 0;
100 }
101
102 static char
next(struct rust_demangler * rdm)103 next (struct rust_demangler *rdm)
104 {
105 char c = peek (rdm);
106 if (!c)
107 rdm->errored = 1;
108 else
109 rdm->next++;
110 return c;
111 }
112
113 static uint64_t
parse_integer_62(struct rust_demangler * rdm)114 parse_integer_62 (struct rust_demangler *rdm)
115 {
116 char c;
117 uint64_t x;
118
119 if (eat (rdm, '_'))
120 return 0;
121
122 x = 0;
123 while (!eat (rdm, '_'))
124 {
125 c = next (rdm);
126 x *= 62;
127 if (ISDIGIT (c))
128 x += c - '0';
129 else if (ISLOWER (c))
130 x += 10 + (c - 'a');
131 else if (ISUPPER (c))
132 x += 10 + 26 + (c - 'A');
133 else
134 {
135 rdm->errored = 1;
136 return 0;
137 }
138 }
139 return x + 1;
140 }
141
142 static uint64_t
parse_opt_integer_62(struct rust_demangler * rdm,char tag)143 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
144 {
145 if (!eat (rdm, tag))
146 return 0;
147 return 1 + parse_integer_62 (rdm);
148 }
149
150 static uint64_t
parse_disambiguator(struct rust_demangler * rdm)151 parse_disambiguator (struct rust_demangler *rdm)
152 {
153 return parse_opt_integer_62 (rdm, 's');
154 }
155
156 static size_t
parse_hex_nibbles(struct rust_demangler * rdm,uint64_t * value)157 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
158 {
159 char c;
160 size_t hex_len;
161
162 hex_len = 0;
163 *value = 0;
164
165 while (!eat (rdm, '_'))
166 {
167 *value <<= 4;
168
169 c = next (rdm);
170 if (ISDIGIT (c))
171 *value |= c - '0';
172 else if (c >= 'a' && c <= 'f')
173 *value |= 10 + (c - 'a');
174 else
175 {
176 rdm->errored = 1;
177 return 0;
178 }
179 hex_len++;
180 }
181
182 return hex_len;
183 }
184
185 struct rust_mangled_ident
186 {
187 /* ASCII part of the identifier. */
188 const char *ascii;
189 size_t ascii_len;
190
191 /* Punycode insertion codes for Unicode codepoints, if any. */
192 const char *punycode;
193 size_t punycode_len;
194 };
195
196 static struct rust_mangled_ident
parse_ident(struct rust_demangler * rdm)197 parse_ident (struct rust_demangler *rdm)
198 {
199 char c;
200 size_t start, len;
201 int is_punycode = 0;
202 struct rust_mangled_ident ident;
203
204 ident.ascii = NULL;
205 ident.ascii_len = 0;
206 ident.punycode = NULL;
207 ident.punycode_len = 0;
208
209 if (rdm->version != -1)
210 is_punycode = eat (rdm, 'u');
211
212 c = next (rdm);
213 if (!ISDIGIT (c))
214 {
215 rdm->errored = 1;
216 return ident;
217 }
218 len = c - '0';
219
220 if (c != '0')
221 while (ISDIGIT (peek (rdm)))
222 len = len * 10 + (next (rdm) - '0');
223
224 /* Skip past the optional `_` separator (v0). */
225 if (rdm->version != -1)
226 eat (rdm, '_');
227
228 start = rdm->next;
229 rdm->next += len;
230 /* Check for overflows. */
231 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
232 {
233 rdm->errored = 1;
234 return ident;
235 }
236
237 ident.ascii = rdm->sym + start;
238 ident.ascii_len = len;
239
240 if (is_punycode)
241 {
242 ident.punycode_len = 0;
243 while (ident.ascii_len > 0)
244 {
245 ident.ascii_len--;
246
247 /* The last '_' is a separator between ascii & punycode. */
248 if (ident.ascii[ident.ascii_len] == '_')
249 break;
250
251 ident.punycode_len++;
252 }
253 if (!ident.punycode_len)
254 {
255 rdm->errored = 1;
256 return ident;
257 }
258 ident.punycode = ident.ascii + (len - ident.punycode_len);
259 }
260
261 if (ident.ascii_len == 0)
262 ident.ascii = NULL;
263
264 return ident;
265 }
266
267 /* Printing functions. */
268
269 static void
print_str(struct rust_demangler * rdm,const char * data,size_t len)270 print_str (struct rust_demangler *rdm, const char *data, size_t len)
271 {
272 if (!rdm->errored && !rdm->skipping_printing)
273 rdm->callback (data, len, rdm->callback_opaque);
274 }
275
276 #define PRINT(s) print_str (rdm, s, strlen (s))
277
278 static void
print_uint64(struct rust_demangler * rdm,uint64_t x)279 print_uint64 (struct rust_demangler *rdm, uint64_t x)
280 {
281 char s[21];
282 snprintf (s, 21, "%" PRIu64, x);
283 PRINT (s);
284 }
285
286 static void
print_uint64_hex(struct rust_demangler * rdm,uint64_t x)287 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
288 {
289 char s[17];
290 snprintf (s, 17, "%" PRIx64, x);
291 PRINT (s);
292 }
293
294 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
295 static int
decode_lower_hex_nibble(char nibble)296 decode_lower_hex_nibble (char nibble)
297 {
298 if ('0' <= nibble && nibble <= '9')
299 return nibble - '0';
300 if ('a' <= nibble && nibble <= 'f')
301 return 0xa + (nibble - 'a');
302 return -1;
303 }
304
305 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
306 static char
decode_legacy_escape(const char * e,size_t len,size_t * out_len)307 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
308 {
309 char c = 0;
310 size_t escape_len = 0;
311 int lo_nibble = -1, hi_nibble = -1;
312
313 if (len < 3 || e[0] != '$')
314 return 0;
315
316 e++;
317 len--;
318
319 if (e[0] == 'C')
320 {
321 escape_len = 1;
322
323 c = ',';
324 }
325 else if (len > 2)
326 {
327 escape_len = 2;
328
329 if (e[0] == 'S' && e[1] == 'P')
330 c = '@';
331 else if (e[0] == 'B' && e[1] == 'P')
332 c = '*';
333 else if (e[0] == 'R' && e[1] == 'F')
334 c = '&';
335 else if (e[0] == 'L' && e[1] == 'T')
336 c = '<';
337 else if (e[0] == 'G' && e[1] == 'T')
338 c = '>';
339 else if (e[0] == 'L' && e[1] == 'P')
340 c = '(';
341 else if (e[0] == 'R' && e[1] == 'P')
342 c = ')';
343 else if (e[0] == 'u' && len > 3)
344 {
345 escape_len = 3;
346
347 hi_nibble = decode_lower_hex_nibble (e[1]);
348 if (hi_nibble < 0)
349 return 0;
350 lo_nibble = decode_lower_hex_nibble (e[2]);
351 if (lo_nibble < 0)
352 return 0;
353
354 /* Only allow non-control ASCII characters. */
355 if (hi_nibble > 7)
356 return 0;
357 c = (hi_nibble << 4) | lo_nibble;
358 if (c < 0x20)
359 return 0;
360 }
361 }
362
363 if (!c || len <= escape_len || e[escape_len] != '$')
364 return 0;
365
366 *out_len = 2 + escape_len;
367 return c;
368 }
369
370 static void
print_ident(struct rust_demangler * rdm,struct rust_mangled_ident ident)371 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
372 {
373 char unescaped;
374 uint8_t *out, *p, d;
375 size_t len, cap, punycode_pos, j;
376 /* Punycode parameters and state. */
377 uint32_t c;
378 size_t base, t_min, t_max, skew, damp, bias, i;
379 size_t delta, w, k, t;
380
381 if (rdm->errored || rdm->skipping_printing)
382 return;
383
384 if (rdm->version == -1)
385 {
386 /* Ignore leading underscores preceding escape sequences.
387 The mangler inserts an underscore to make sure the
388 identifier begins with a XID_Start character. */
389 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
390 && ident.ascii[1] == '$')
391 {
392 ident.ascii++;
393 ident.ascii_len--;
394 }
395
396 while (ident.ascii_len > 0)
397 {
398 /* Handle legacy escape sequences ("$...$", ".." or "."). */
399 if (ident.ascii[0] == '$')
400 {
401 unescaped
402 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
403 if (unescaped)
404 print_str (rdm, &unescaped, 1);
405 else
406 {
407 /* Unexpected escape sequence, print the rest verbatim. */
408 print_str (rdm, ident.ascii, ident.ascii_len);
409 return;
410 }
411 }
412 else if (ident.ascii[0] == '.')
413 {
414 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
415 {
416 /* ".." becomes "::" */
417 PRINT ("::");
418 len = 2;
419 }
420 else
421 {
422 PRINT (".");
423 len = 1;
424 }
425 }
426 else
427 {
428 /* Print everything before the next escape sequence, at once. */
429 for (len = 0; len < ident.ascii_len; len++)
430 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
431 break;
432
433 print_str (rdm, ident.ascii, len);
434 }
435
436 ident.ascii += len;
437 ident.ascii_len -= len;
438 }
439
440 return;
441 }
442
443 if (!ident.punycode)
444 {
445 print_str (rdm, ident.ascii, ident.ascii_len);
446 return;
447 }
448
449 len = 0;
450 cap = 4;
451 while (cap < ident.ascii_len)
452 {
453 cap *= 2;
454 /* Check for overflows. */
455 if ((cap * 4) / 4 != cap)
456 {
457 rdm->errored = 1;
458 return;
459 }
460 }
461
462 /* Store the output codepoints as groups of 4 UTF-8 bytes. */
463 out = (uint8_t *)malloc (cap * 4);
464 if (!out)
465 {
466 rdm->errored = 1;
467 return;
468 }
469
470 /* Populate initial output from ASCII fragment. */
471 for (len = 0; len < ident.ascii_len; len++)
472 {
473 p = out + 4 * len;
474 p[0] = 0;
475 p[1] = 0;
476 p[2] = 0;
477 p[3] = ident.ascii[len];
478 }
479
480 /* Punycode parameters and initial state. */
481 base = 36;
482 t_min = 1;
483 t_max = 26;
484 skew = 38;
485 damp = 700;
486 bias = 72;
487 i = 0;
488 c = 0x80;
489
490 punycode_pos = 0;
491 while (punycode_pos < ident.punycode_len)
492 {
493 /* Read one delta value. */
494 delta = 0;
495 w = 1;
496 k = 0;
497 do
498 {
499 k += base;
500 t = k < bias ? 0 : (k - bias);
501 if (t < t_min)
502 t = t_min;
503 if (t > t_max)
504 t = t_max;
505
506 if (punycode_pos >= ident.punycode_len)
507 goto cleanup;
508 d = ident.punycode[punycode_pos++];
509
510 if (ISLOWER (d))
511 d = d - 'a';
512 else if (ISDIGIT (d))
513 d = 26 + (d - '0');
514 else
515 {
516 rdm->errored = 1;
517 goto cleanup;
518 }
519
520 delta += d * w;
521 w *= base - t;
522 }
523 while (d >= t);
524
525 /* Compute the new insert position and character. */
526 len++;
527 i += delta;
528 c += i / len;
529 i %= len;
530
531 /* Ensure enough space is available. */
532 if (cap < len)
533 {
534 cap *= 2;
535 /* Check for overflows. */
536 if ((cap * 4) / 4 != cap || cap < len)
537 {
538 rdm->errored = 1;
539 goto cleanup;
540 }
541 }
542 p = (uint8_t *)realloc (out, cap * 4);
543 if (!p)
544 {
545 rdm->errored = 1;
546 goto cleanup;
547 }
548 out = p;
549
550 /* Move the characters after the insert position. */
551 p = out + i * 4;
552 memmove (p + 4, p, (len - i - 1) * 4);
553
554 /* Insert the new character, as UTF-8 bytes. */
555 p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
556 p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
557 p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
558 p[3] = 0x80 | (c & 0x3f);
559
560 /* If there are no more deltas, decoding is complete. */
561 if (punycode_pos == ident.punycode_len)
562 break;
563
564 i++;
565
566 /* Perform bias adaptation. */
567 delta /= damp;
568 damp = 2;
569
570 delta += delta / len;
571 k = 0;
572 while (delta > ((base - t_min) * t_max) / 2)
573 {
574 delta /= base - t_min;
575 k += base;
576 }
577 bias = k + ((base - t_min + 1) * delta) / (delta + skew);
578 }
579
580 /* Remove all the 0 bytes to leave behind an UTF-8 string. */
581 for (i = 0, j = 0; i < len * 4; i++)
582 if (out[i] != 0)
583 out[j++] = out[i];
584
585 print_str (rdm, (const char *)out, j);
586
587 cleanup:
588 free (out);
589 }
590
591 /* Print the lifetime according to the previously decoded index.
592 An index of `0` always refers to `'_`, but starting with `1`,
593 indices refer to late-bound lifetimes introduced by a binder. */
594 static void
print_lifetime_from_index(struct rust_demangler * rdm,uint64_t lt)595 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
596 {
597 char c;
598 uint64_t depth;
599
600 PRINT ("'");
601 if (lt == 0)
602 {
603 PRINT ("_");
604 return;
605 }
606
607 depth = rdm->bound_lifetime_depth - lt;
608 /* Try to print lifetimes alphabetically first. */
609 if (depth < 26)
610 {
611 c = 'a' + depth;
612 print_str (rdm, &c, 1);
613 }
614 else
615 {
616 /* Use `'_123` after running out of letters. */
617 PRINT ("_");
618 print_uint64 (rdm, depth);
619 }
620 }
621
622 /* Demangling functions. */
623
624 static void demangle_binder (struct rust_demangler *rdm);
625 static void demangle_path (struct rust_demangler *rdm, int in_value);
626 static void demangle_generic_arg (struct rust_demangler *rdm);
627 static void demangle_type (struct rust_demangler *rdm);
628 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
629 static void demangle_dyn_trait (struct rust_demangler *rdm);
630 static void demangle_const (struct rust_demangler *rdm);
631 static void demangle_const_uint (struct rust_demangler *rdm);
632 static void demangle_const_int (struct rust_demangler *rdm);
633 static void demangle_const_bool (struct rust_demangler *rdm);
634 static void demangle_const_char (struct rust_demangler *rdm);
635
636 /* Optionally enter a binder ('G') for late-bound lifetimes,
637 printing e.g. `for<'a, 'b> `, and make those lifetimes visible
638 to the caller (via depth level, which the caller should reset). */
639 static void
demangle_binder(struct rust_demangler * rdm)640 demangle_binder (struct rust_demangler *rdm)
641 {
642 uint64_t i, bound_lifetimes;
643
644 if (rdm->errored)
645 return;
646
647 bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
648 if (bound_lifetimes > 0)
649 {
650 PRINT ("for<");
651 for (i = 0; i < bound_lifetimes; i++)
652 {
653 if (i > 0)
654 PRINT (", ");
655 rdm->bound_lifetime_depth++;
656 print_lifetime_from_index (rdm, 1);
657 }
658 PRINT ("> ");
659 }
660 }
661
662 static void
demangle_path(struct rust_demangler * rdm,int in_value)663 demangle_path (struct rust_demangler *rdm, int in_value)
664 {
665 char tag, ns;
666 int was_skipping_printing;
667 size_t i, backref, old_next;
668 uint64_t dis;
669 struct rust_mangled_ident name;
670
671 if (rdm->errored)
672 return;
673
674 switch (tag = next (rdm))
675 {
676 case 'C':
677 dis = parse_disambiguator (rdm);
678 name = parse_ident (rdm);
679
680 print_ident (rdm, name);
681 if (rdm->verbose)
682 {
683 PRINT ("[");
684 print_uint64_hex (rdm, dis);
685 PRINT ("]");
686 }
687 break;
688 case 'N':
689 ns = next (rdm);
690 if (!ISLOWER (ns) && !ISUPPER (ns))
691 {
692 rdm->errored = 1;
693 return;
694 }
695
696 demangle_path (rdm, in_value);
697
698 dis = parse_disambiguator (rdm);
699 name = parse_ident (rdm);
700
701 if (ISUPPER (ns))
702 {
703 /* Special namespaces, like closures and shims. */
704 PRINT ("::{");
705 switch (ns)
706 {
707 case 'C':
708 PRINT ("closure");
709 break;
710 case 'S':
711 PRINT ("shim");
712 break;
713 default:
714 print_str (rdm, &ns, 1);
715 }
716 if (name.ascii || name.punycode)
717 {
718 PRINT (":");
719 print_ident (rdm, name);
720 }
721 PRINT ("#");
722 print_uint64 (rdm, dis);
723 PRINT ("}");
724 }
725 else
726 {
727 /* Implementation-specific/unspecified namespaces. */
728
729 if (name.ascii || name.punycode)
730 {
731 PRINT ("::");
732 print_ident (rdm, name);
733 }
734 }
735 break;
736 case 'M':
737 case 'X':
738 /* Ignore the `impl`'s own path.*/
739 parse_disambiguator (rdm);
740 was_skipping_printing = rdm->skipping_printing;
741 rdm->skipping_printing = 1;
742 demangle_path (rdm, in_value);
743 rdm->skipping_printing = was_skipping_printing;
744 /* fallthrough */
745 case 'Y':
746 PRINT ("<");
747 demangle_type (rdm);
748 if (tag != 'M')
749 {
750 PRINT (" as ");
751 demangle_path (rdm, 0);
752 }
753 PRINT (">");
754 break;
755 case 'I':
756 demangle_path (rdm, in_value);
757 if (in_value)
758 PRINT ("::");
759 PRINT ("<");
760 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
761 {
762 if (i > 0)
763 PRINT (", ");
764 demangle_generic_arg (rdm);
765 }
766 PRINT (">");
767 break;
768 case 'B':
769 backref = parse_integer_62 (rdm);
770 if (!rdm->skipping_printing)
771 {
772 old_next = rdm->next;
773 rdm->next = backref;
774 demangle_path (rdm, in_value);
775 rdm->next = old_next;
776 }
777 break;
778 default:
779 rdm->errored = 1;
780 return;
781 }
782 }
783
784 static void
demangle_generic_arg(struct rust_demangler * rdm)785 demangle_generic_arg (struct rust_demangler *rdm)
786 {
787 uint64_t lt;
788 if (eat (rdm, 'L'))
789 {
790 lt = parse_integer_62 (rdm);
791 print_lifetime_from_index (rdm, lt);
792 }
793 else if (eat (rdm, 'K'))
794 demangle_const (rdm);
795 else
796 demangle_type (rdm);
797 }
798
799 static const char *
basic_type(char tag)800 basic_type (char tag)
801 {
802 switch (tag)
803 {
804 case 'b':
805 return "bool";
806 case 'c':
807 return "char";
808 case 'e':
809 return "str";
810 case 'u':
811 return "()";
812 case 'a':
813 return "i8";
814 case 's':
815 return "i16";
816 case 'l':
817 return "i32";
818 case 'x':
819 return "i64";
820 case 'n':
821 return "i128";
822 case 'i':
823 return "isize";
824 case 'h':
825 return "u8";
826 case 't':
827 return "u16";
828 case 'm':
829 return "u32";
830 case 'y':
831 return "u64";
832 case 'o':
833 return "u128";
834 case 'j':
835 return "usize";
836 case 'f':
837 return "f32";
838 case 'd':
839 return "f64";
840 case 'z':
841 return "!";
842 case 'p':
843 return "_";
844 case 'v':
845 return "...";
846
847 default:
848 return NULL;
849 }
850 }
851
852 static void
demangle_type(struct rust_demangler * rdm)853 demangle_type (struct rust_demangler *rdm)
854 {
855 char tag;
856 size_t i, old_next, backref;
857 uint64_t lt, old_bound_lifetime_depth;
858 const char *basic;
859 struct rust_mangled_ident abi;
860
861 if (rdm->errored)
862 return;
863
864 tag = next (rdm);
865
866 basic = basic_type (tag);
867 if (basic)
868 {
869 PRINT (basic);
870 return;
871 }
872
873 switch (tag)
874 {
875 case 'R':
876 case 'Q':
877 PRINT ("&");
878 if (eat (rdm, 'L'))
879 {
880 lt = parse_integer_62 (rdm);
881 if (lt)
882 {
883 print_lifetime_from_index (rdm, lt);
884 PRINT (" ");
885 }
886 }
887 if (tag != 'R')
888 PRINT ("mut ");
889 demangle_type (rdm);
890 break;
891 case 'P':
892 case 'O':
893 PRINT ("*");
894 if (tag != 'P')
895 PRINT ("mut ");
896 else
897 PRINT ("const ");
898 demangle_type (rdm);
899 break;
900 case 'A':
901 case 'S':
902 PRINT ("[");
903 demangle_type (rdm);
904 if (tag == 'A')
905 {
906 PRINT ("; ");
907 demangle_const (rdm);
908 }
909 PRINT ("]");
910 break;
911 case 'T':
912 PRINT ("(");
913 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
914 {
915 if (i > 0)
916 PRINT (", ");
917 demangle_type (rdm);
918 }
919 if (i == 1)
920 PRINT (",");
921 PRINT (")");
922 break;
923 case 'F':
924 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
925 demangle_binder (rdm);
926
927 if (eat (rdm, 'U'))
928 PRINT ("unsafe ");
929
930 if (eat (rdm, 'K'))
931 {
932 if (eat (rdm, 'C'))
933 {
934 abi.ascii = "C";
935 abi.ascii_len = 1;
936 }
937 else
938 {
939 abi = parse_ident (rdm);
940 if (!abi.ascii || abi.punycode)
941 {
942 rdm->errored = 1;
943 goto restore;
944 }
945 }
946
947 PRINT ("extern \"");
948
949 /* If the ABI had any `-`, they were replaced with `_`,
950 so the parts between `_` have to be re-joined with `-`. */
951 for (i = 0; i < abi.ascii_len; i++)
952 {
953 if (abi.ascii[i] == '_')
954 {
955 print_str (rdm, abi.ascii, i);
956 PRINT ("-");
957 abi.ascii += i + 1;
958 abi.ascii_len -= i + 1;
959 i = 0;
960 }
961 }
962 print_str (rdm, abi.ascii, abi.ascii_len);
963
964 PRINT ("\" ");
965 }
966
967 PRINT ("fn(");
968 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
969 {
970 if (i > 0)
971 PRINT (", ");
972 demangle_type (rdm);
973 }
974 PRINT (")");
975
976 if (eat (rdm, 'u'))
977 {
978 /* Skip printing the return type if it's 'u', i.e. `()`. */
979 }
980 else
981 {
982 PRINT (" -> ");
983 demangle_type (rdm);
984 }
985
986 /* Restore `bound_lifetime_depth` to outside the binder. */
987 restore:
988 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
989 break;
990 case 'D':
991 PRINT ("dyn ");
992
993 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
994 demangle_binder (rdm);
995
996 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
997 {
998 if (i > 0)
999 PRINT (" + ");
1000 demangle_dyn_trait (rdm);
1001 }
1002
1003 /* Restore `bound_lifetime_depth` to outside the binder. */
1004 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1005
1006 if (!eat (rdm, 'L'))
1007 {
1008 rdm->errored = 1;
1009 return;
1010 }
1011 lt = parse_integer_62 (rdm);
1012 if (lt)
1013 {
1014 PRINT (" + ");
1015 print_lifetime_from_index (rdm, lt);
1016 }
1017 break;
1018 case 'B':
1019 backref = parse_integer_62 (rdm);
1020 if (!rdm->skipping_printing)
1021 {
1022 old_next = rdm->next;
1023 rdm->next = backref;
1024 demangle_type (rdm);
1025 rdm->next = old_next;
1026 }
1027 break;
1028 default:
1029 /* Go back to the tag, so `demangle_path` also sees it. */
1030 rdm->next--;
1031 demangle_path (rdm, 0);
1032 }
1033 }
1034
1035 /* A trait in a trait object may have some "existential projections"
1036 (i.e. associated type bindings) after it, which should be printed
1037 in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1038 To this end, this method will keep the `<...>` of an 'I' path
1039 open, by omitting the `>`, and return `Ok(true)` in that case. */
1040 static int
demangle_path_maybe_open_generics(struct rust_demangler * rdm)1041 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1042 {
1043 int open;
1044 size_t i, old_next, backref;
1045
1046 open = 0;
1047
1048 if (rdm->errored)
1049 return open;
1050
1051 if (eat (rdm, 'B'))
1052 {
1053 backref = parse_integer_62 (rdm);
1054 if (!rdm->skipping_printing)
1055 {
1056 old_next = rdm->next;
1057 rdm->next = backref;
1058 open = demangle_path_maybe_open_generics (rdm);
1059 rdm->next = old_next;
1060 }
1061 }
1062 else if (eat (rdm, 'I'))
1063 {
1064 demangle_path (rdm, 0);
1065 PRINT ("<");
1066 open = 1;
1067 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1068 {
1069 if (i > 0)
1070 PRINT (", ");
1071 demangle_generic_arg (rdm);
1072 }
1073 }
1074 else
1075 demangle_path (rdm, 0);
1076 return open;
1077 }
1078
1079 static void
demangle_dyn_trait(struct rust_demangler * rdm)1080 demangle_dyn_trait (struct rust_demangler *rdm)
1081 {
1082 int open;
1083 struct rust_mangled_ident name;
1084
1085 if (rdm->errored)
1086 return;
1087
1088 open = demangle_path_maybe_open_generics (rdm);
1089
1090 while (eat (rdm, 'p'))
1091 {
1092 if (!open)
1093 PRINT ("<");
1094 else
1095 PRINT (", ");
1096 open = 1;
1097
1098 name = parse_ident (rdm);
1099 print_ident (rdm, name);
1100 PRINT (" = ");
1101 demangle_type (rdm);
1102 }
1103
1104 if (open)
1105 PRINT (">");
1106 }
1107
1108 static void
demangle_const(struct rust_demangler * rdm)1109 demangle_const (struct rust_demangler *rdm)
1110 {
1111 char ty_tag;
1112 size_t old_next, backref;
1113
1114 if (rdm->errored)
1115 return;
1116
1117 if (eat (rdm, 'B'))
1118 {
1119 backref = parse_integer_62 (rdm);
1120 if (!rdm->skipping_printing)
1121 {
1122 old_next = rdm->next;
1123 rdm->next = backref;
1124 demangle_const (rdm);
1125 rdm->next = old_next;
1126 }
1127 return;
1128 }
1129
1130 ty_tag = next (rdm);
1131 switch (ty_tag)
1132 {
1133 /* Placeholder. */
1134 case 'p':
1135 PRINT ("_");
1136 return;
1137
1138 /* Unsigned integer types. */
1139 case 'h':
1140 case 't':
1141 case 'm':
1142 case 'y':
1143 case 'o':
1144 case 'j':
1145 demangle_const_uint (rdm);
1146 break;
1147
1148 /* Signed integer types. */
1149 case 'a':
1150 case 's':
1151 case 'l':
1152 case 'x':
1153 case 'n':
1154 case 'i':
1155 demangle_const_int (rdm);
1156 break;
1157
1158 /* Boolean. */
1159 case 'b':
1160 demangle_const_bool (rdm);
1161 break;
1162
1163 /* Character. */
1164 case 'c':
1165 demangle_const_char (rdm);
1166 break;
1167
1168 default:
1169 rdm->errored = 1;
1170 return;
1171 }
1172
1173 if (rdm->errored)
1174 return;
1175
1176 if (rdm->verbose)
1177 {
1178 PRINT (": ");
1179 PRINT (basic_type (ty_tag));
1180 }
1181 }
1182
1183 static void
demangle_const_uint(struct rust_demangler * rdm)1184 demangle_const_uint (struct rust_demangler *rdm)
1185 {
1186 size_t hex_len;
1187 uint64_t value;
1188
1189 if (rdm->errored)
1190 return;
1191
1192 hex_len = parse_hex_nibbles (rdm, &value);
1193
1194 if (hex_len > 16)
1195 {
1196 /* Print anything that doesn't fit in `uint64_t` verbatim. */
1197 PRINT ("0x");
1198 print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1199 }
1200 else if (hex_len > 0)
1201 print_uint64 (rdm, value);
1202 else
1203 rdm->errored = 1;
1204 }
1205
1206 static void
demangle_const_int(struct rust_demangler * rdm)1207 demangle_const_int (struct rust_demangler *rdm)
1208 {
1209 if (eat (rdm, 'n'))
1210 PRINT ("-");
1211 demangle_const_uint (rdm);
1212 }
1213
1214 static void
demangle_const_bool(struct rust_demangler * rdm)1215 demangle_const_bool (struct rust_demangler *rdm)
1216 {
1217 uint64_t value;
1218
1219 if (parse_hex_nibbles (rdm, &value) != 1)
1220 {
1221 rdm->errored = 1;
1222 return;
1223 }
1224
1225 if (value == 0)
1226 PRINT ("false");
1227 else if (value == 1)
1228 PRINT ("true");
1229 else
1230 rdm->errored = 1;
1231 }
1232
1233 static void
demangle_const_char(struct rust_demangler * rdm)1234 demangle_const_char (struct rust_demangler *rdm)
1235 {
1236 size_t hex_len;
1237 uint64_t value;
1238
1239 hex_len = parse_hex_nibbles (rdm, &value);
1240
1241 if (hex_len == 0 || hex_len > 8)
1242 {
1243 rdm->errored = 1;
1244 return;
1245 }
1246
1247 /* Match Rust's character "debug" output as best as we can. */
1248 PRINT ("'");
1249 if (value == '\t')
1250 PRINT ("\\t");
1251 else if (value == '\r')
1252 PRINT ("\\r");
1253 else if (value == '\n')
1254 PRINT ("\\n");
1255 else if (value > ' ' && value < '~')
1256 {
1257 /* Rust also considers many non-ASCII codepoints to be printable, but
1258 that logic is not easily ported to C. */
1259 char c = value;
1260 print_str (rdm, &c, 1);
1261 }
1262 else
1263 {
1264 PRINT ("\\u{");
1265 print_uint64_hex (rdm, value);
1266 PRINT ("}");
1267 }
1268 PRINT ("'");
1269 }
1270
1271 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1272 The hex digits must contain at least 5 distinct digits. */
1273 static int
is_legacy_prefixed_hash(struct rust_mangled_ident ident)1274 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1275 {
1276 uint16_t seen;
1277 int nibble;
1278 size_t i, count;
1279
1280 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1281 return 0;
1282
1283 seen = 0;
1284 for (i = 0; i < 16; i++)
1285 {
1286 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1287 if (nibble < 0)
1288 return 0;
1289 seen |= (uint16_t)1 << nibble;
1290 }
1291
1292 /* Count how many distinct digits were seen. */
1293 count = 0;
1294 while (seen)
1295 {
1296 if (seen & 1)
1297 count++;
1298 seen >>= 1;
1299 }
1300
1301 return count >= 5;
1302 }
1303
1304 int
rust_demangle_callback(const char * mangled,int options,demangle_callbackref callback,void * opaque)1305 rust_demangle_callback (const char *mangled, int options,
1306 demangle_callbackref callback, void *opaque)
1307 {
1308 const char *p;
1309 struct rust_demangler rdm;
1310 struct rust_mangled_ident ident;
1311
1312 rdm.sym = mangled;
1313 rdm.sym_len = 0;
1314
1315 rdm.callback_opaque = opaque;
1316 rdm.callback = callback;
1317
1318 rdm.next = 0;
1319 rdm.errored = 0;
1320 rdm.skipping_printing = 0;
1321 rdm.verbose = (options & DMGL_VERBOSE) != 0;
1322 rdm.version = 0;
1323 rdm.bound_lifetime_depth = 0;
1324
1325 /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1326 if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1327 rdm.sym += 2;
1328 else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1329 {
1330 rdm.sym += 3;
1331 rdm.version = -1;
1332 }
1333 else
1334 return 0;
1335
1336 /* Paths (v0) always start with uppercase characters. */
1337 if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1338 return 0;
1339
1340 /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1341 for (p = rdm.sym; *p; p++)
1342 {
1343 rdm.sym_len++;
1344
1345 if (*p == '_' || ISALNUM (*p))
1346 continue;
1347
1348 /* Legacy Rust symbols can also contain [.:$] characters. */
1349 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
1350 continue;
1351
1352 return 0;
1353 }
1354
1355 /* Legacy Rust symbols need to be handled separately. */
1356 if (rdm.version == -1)
1357 {
1358 /* Legacy Rust symbols always end with E. */
1359 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1360 return 0;
1361 rdm.sym_len--;
1362
1363 /* Legacy Rust symbols also always end with a path segment
1364 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1365 This early check, before any parse_ident calls, should
1366 quickly filter out most C++ symbols unrelated to Rust. */
1367 if (!(rdm.sym_len > 19
1368 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1369 return 0;
1370
1371 do
1372 {
1373 ident = parse_ident (&rdm);
1374 if (rdm.errored || !ident.ascii)
1375 return 0;
1376 }
1377 while (rdm.next < rdm.sym_len);
1378
1379 /* The last path segment should be the hash. */
1380 if (!is_legacy_prefixed_hash (ident))
1381 return 0;
1382
1383 /* Reset the state for a second pass, to print the symbol. */
1384 rdm.next = 0;
1385 if (!rdm.verbose && rdm.sym_len > 19)
1386 {
1387 /* Hide the last segment, containing the hash, if not verbose. */
1388 rdm.sym_len -= 19;
1389 }
1390
1391 do
1392 {
1393 if (rdm.next > 0)
1394 print_str (&rdm, "::", 2);
1395
1396 ident = parse_ident (&rdm);
1397 print_ident (&rdm, ident);
1398 }
1399 while (rdm.next < rdm.sym_len);
1400 }
1401 else
1402 {
1403 demangle_path (&rdm, 1);
1404
1405 /* Skip instantiating crate. */
1406 if (!rdm.errored && rdm.next < rdm.sym_len)
1407 {
1408 rdm.skipping_printing = 1;
1409 demangle_path (&rdm, 0);
1410 }
1411
1412 /* It's an error to not reach the end. */
1413 rdm.errored |= rdm.next != rdm.sym_len;
1414 }
1415
1416 return !rdm.errored;
1417 }
1418
1419 /* Growable string buffers. */
1420 struct str_buf
1421 {
1422 char *ptr;
1423 size_t len;
1424 size_t cap;
1425 int errored;
1426 };
1427
1428 static void
str_buf_reserve(struct str_buf * buf,size_t extra)1429 str_buf_reserve (struct str_buf *buf, size_t extra)
1430 {
1431 size_t available, min_new_cap, new_cap;
1432 char *new_ptr;
1433
1434 /* Allocation failed before. */
1435 if (buf->errored)
1436 return;
1437
1438 available = buf->cap - buf->len;
1439
1440 if (extra <= available)
1441 return;
1442
1443 min_new_cap = buf->cap + (extra - available);
1444
1445 /* Check for overflows. */
1446 if (min_new_cap < buf->cap)
1447 {
1448 buf->errored = 1;
1449 return;
1450 }
1451
1452 new_cap = buf->cap;
1453
1454 if (new_cap == 0)
1455 new_cap = 4;
1456
1457 /* Double capacity until sufficiently large. */
1458 while (new_cap < min_new_cap)
1459 {
1460 new_cap *= 2;
1461
1462 /* Check for overflows. */
1463 if (new_cap < buf->cap)
1464 {
1465 buf->errored = 1;
1466 return;
1467 }
1468 }
1469
1470 new_ptr = (char *)realloc (buf->ptr, new_cap);
1471 if (new_ptr == NULL)
1472 {
1473 free (buf->ptr);
1474 buf->ptr = NULL;
1475 buf->len = 0;
1476 buf->cap = 0;
1477 buf->errored = 1;
1478 }
1479 else
1480 {
1481 buf->ptr = new_ptr;
1482 buf->cap = new_cap;
1483 }
1484 }
1485
1486 static void
str_buf_append(struct str_buf * buf,const char * data,size_t len)1487 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1488 {
1489 str_buf_reserve (buf, len);
1490 if (buf->errored)
1491 return;
1492
1493 memcpy (buf->ptr + buf->len, data, len);
1494 buf->len += len;
1495 }
1496
1497 static void
str_buf_demangle_callback(const char * data,size_t len,void * opaque)1498 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1499 {
1500 str_buf_append ((struct str_buf *)opaque, data, len);
1501 }
1502
1503 char *
rust_demangle(const char * mangled,int options)1504 rust_demangle (const char *mangled, int options)
1505 {
1506 struct str_buf out;
1507 int success;
1508
1509 out.ptr = NULL;
1510 out.len = 0;
1511 out.cap = 0;
1512 out.errored = 0;
1513
1514 success = rust_demangle_callback (mangled, options,
1515 str_buf_demangle_callback, &out);
1516
1517 if (!success)
1518 {
1519 free (out.ptr);
1520 return NULL;
1521 }
1522
1523 str_buf_append (&out, "\0", 1);
1524 return out.ptr;
1525 }
1526