1 /* Demangler for the Rust programming language
2    Copyright (C) 2016-2021 Free Software Foundation, Inc.
3    Written by David Tolnay (dtolnay@gmail.com).
4    Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
5 
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11 
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file.  (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
20 
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24 Library General Public License for more details.
25 
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>.  */
29 
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include "safe-ctype.h"
36 
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #else
46 extern size_t strlen(const char *s);
47 extern int strncmp(const char *s1, const char *s2, size_t n);
48 extern void *memset(void *s, int c, size_t n);
49 #endif
50 
51 #include <demangle.h>
52 #include "libiberty.h"
53 
54 struct rust_demangler
55 {
56   const char *sym;
57   size_t sym_len;
58 
59   void *callback_opaque;
60   demangle_callbackref callback;
61 
62   /* Position of the next character to read from the symbol. */
63   size_t next;
64 
65   /* Non-zero if any error occurred. */
66   int errored;
67 
68   /* Non-zero if nothing should be printed. */
69   int skipping_printing;
70 
71   /* Non-zero if printing should be verbose (e.g. include hashes). */
72   int verbose;
73 
74   /* Rust mangling version, with legacy mangling being -1. */
75   int version;
76 
77   uint64_t bound_lifetime_depth;
78 };
79 
80 /* Parsing functions. */
81 
82 static char
peek(const struct rust_demangler * rdm)83 peek (const struct rust_demangler *rdm)
84 {
85   if (rdm->next < rdm->sym_len)
86     return rdm->sym[rdm->next];
87   return 0;
88 }
89 
90 static int
eat(struct rust_demangler * rdm,char c)91 eat (struct rust_demangler *rdm, char c)
92 {
93   if (peek (rdm) == c)
94     {
95       rdm->next++;
96       return 1;
97     }
98   else
99     return 0;
100 }
101 
102 static char
next(struct rust_demangler * rdm)103 next (struct rust_demangler *rdm)
104 {
105   char c = peek (rdm);
106   if (!c)
107     rdm->errored = 1;
108   else
109     rdm->next++;
110   return c;
111 }
112 
113 static uint64_t
parse_integer_62(struct rust_demangler * rdm)114 parse_integer_62 (struct rust_demangler *rdm)
115 {
116   char c;
117   uint64_t x;
118 
119   if (eat (rdm, '_'))
120     return 0;
121 
122   x = 0;
123   while (!eat (rdm, '_'))
124     {
125       c = next (rdm);
126       x *= 62;
127       if (ISDIGIT (c))
128         x += c - '0';
129       else if (ISLOWER (c))
130         x += 10 + (c - 'a');
131       else if (ISUPPER (c))
132         x += 10 + 26 + (c - 'A');
133       else
134         {
135           rdm->errored = 1;
136           return 0;
137         }
138     }
139   return x + 1;
140 }
141 
142 static uint64_t
parse_opt_integer_62(struct rust_demangler * rdm,char tag)143 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
144 {
145   if (!eat (rdm, tag))
146     return 0;
147   return 1 + parse_integer_62 (rdm);
148 }
149 
150 static uint64_t
parse_disambiguator(struct rust_demangler * rdm)151 parse_disambiguator (struct rust_demangler *rdm)
152 {
153   return parse_opt_integer_62 (rdm, 's');
154 }
155 
156 static size_t
parse_hex_nibbles(struct rust_demangler * rdm,uint64_t * value)157 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
158 {
159   char c;
160   size_t hex_len;
161 
162   hex_len = 0;
163   *value = 0;
164 
165   while (!eat (rdm, '_'))
166     {
167       *value <<= 4;
168 
169       c = next (rdm);
170       if (ISDIGIT (c))
171         *value |= c - '0';
172       else if (c >= 'a' && c <= 'f')
173         *value |= 10 + (c - 'a');
174       else
175         {
176           rdm->errored = 1;
177           return 0;
178         }
179       hex_len++;
180     }
181 
182   return hex_len;
183 }
184 
185 struct rust_mangled_ident
186 {
187   /* ASCII part of the identifier. */
188   const char *ascii;
189   size_t ascii_len;
190 
191   /* Punycode insertion codes for Unicode codepoints, if any. */
192   const char *punycode;
193   size_t punycode_len;
194 };
195 
196 static struct rust_mangled_ident
parse_ident(struct rust_demangler * rdm)197 parse_ident (struct rust_demangler *rdm)
198 {
199   char c;
200   size_t start, len;
201   int is_punycode = 0;
202   struct rust_mangled_ident ident;
203 
204   ident.ascii = NULL;
205   ident.ascii_len = 0;
206   ident.punycode = NULL;
207   ident.punycode_len = 0;
208 
209   if (rdm->version != -1)
210     is_punycode = eat (rdm, 'u');
211 
212   c = next (rdm);
213   if (!ISDIGIT (c))
214     {
215       rdm->errored = 1;
216       return ident;
217     }
218   len = c - '0';
219 
220   if (c != '0')
221     while (ISDIGIT (peek (rdm)))
222       len = len * 10 + (next (rdm) - '0');
223 
224   /* Skip past the optional `_` separator (v0). */
225   if (rdm->version != -1)
226     eat (rdm, '_');
227 
228   start = rdm->next;
229   rdm->next += len;
230   /* Check for overflows. */
231   if ((start > rdm->next) || (rdm->next > rdm->sym_len))
232     {
233       rdm->errored = 1;
234       return ident;
235     }
236 
237   ident.ascii = rdm->sym + start;
238   ident.ascii_len = len;
239 
240   if (is_punycode)
241     {
242       ident.punycode_len = 0;
243       while (ident.ascii_len > 0)
244         {
245           ident.ascii_len--;
246 
247           /* The last '_' is a separator between ascii & punycode. */
248           if (ident.ascii[ident.ascii_len] == '_')
249             break;
250 
251           ident.punycode_len++;
252         }
253       if (!ident.punycode_len)
254         {
255           rdm->errored = 1;
256           return ident;
257         }
258       ident.punycode = ident.ascii + (len - ident.punycode_len);
259     }
260 
261   if (ident.ascii_len == 0)
262     ident.ascii = NULL;
263 
264   return ident;
265 }
266 
267 /* Printing functions. */
268 
269 static void
print_str(struct rust_demangler * rdm,const char * data,size_t len)270 print_str (struct rust_demangler *rdm, const char *data, size_t len)
271 {
272   if (!rdm->errored && !rdm->skipping_printing)
273     rdm->callback (data, len, rdm->callback_opaque);
274 }
275 
276 #define PRINT(s) print_str (rdm, s, strlen (s))
277 
278 static void
print_uint64(struct rust_demangler * rdm,uint64_t x)279 print_uint64 (struct rust_demangler *rdm, uint64_t x)
280 {
281   char s[21];
282   snprintf (s, 21, "%" PRIu64, x);
283   PRINT (s);
284 }
285 
286 static void
print_uint64_hex(struct rust_demangler * rdm,uint64_t x)287 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
288 {
289   char s[17];
290   snprintf (s, 17, "%" PRIx64, x);
291   PRINT (s);
292 }
293 
294 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
295 static int
decode_lower_hex_nibble(char nibble)296 decode_lower_hex_nibble (char nibble)
297 {
298   if ('0' <= nibble && nibble <= '9')
299     return nibble - '0';
300   if ('a' <= nibble && nibble <= 'f')
301     return 0xa + (nibble - 'a');
302   return -1;
303 }
304 
305 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
306 static char
decode_legacy_escape(const char * e,size_t len,size_t * out_len)307 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
308 {
309   char c = 0;
310   size_t escape_len = 0;
311   int lo_nibble = -1, hi_nibble = -1;
312 
313   if (len < 3 || e[0] != '$')
314     return 0;
315 
316   e++;
317   len--;
318 
319   if (e[0] == 'C')
320     {
321       escape_len = 1;
322 
323       c = ',';
324     }
325   else if (len > 2)
326     {
327       escape_len = 2;
328 
329       if (e[0] == 'S' && e[1] == 'P')
330         c = '@';
331       else if (e[0] == 'B' && e[1] == 'P')
332         c = '*';
333       else if (e[0] == 'R' && e[1] == 'F')
334         c = '&';
335       else if (e[0] == 'L' && e[1] == 'T')
336         c = '<';
337       else if (e[0] == 'G' && e[1] == 'T')
338         c = '>';
339       else if (e[0] == 'L' && e[1] == 'P')
340         c = '(';
341       else if (e[0] == 'R' && e[1] == 'P')
342         c = ')';
343       else if (e[0] == 'u' && len > 3)
344         {
345           escape_len = 3;
346 
347           hi_nibble = decode_lower_hex_nibble (e[1]);
348           if (hi_nibble < 0)
349             return 0;
350           lo_nibble = decode_lower_hex_nibble (e[2]);
351           if (lo_nibble < 0)
352             return 0;
353 
354           /* Only allow non-control ASCII characters. */
355           if (hi_nibble > 7)
356             return 0;
357           c = (hi_nibble << 4) | lo_nibble;
358           if (c < 0x20)
359             return 0;
360         }
361     }
362 
363   if (!c || len <= escape_len || e[escape_len] != '$')
364     return 0;
365 
366   *out_len = 2 + escape_len;
367   return c;
368 }
369 
370 static void
print_ident(struct rust_demangler * rdm,struct rust_mangled_ident ident)371 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
372 {
373   char unescaped;
374   uint8_t *out, *p, d;
375   size_t len, cap, punycode_pos, j;
376   /* Punycode parameters and state. */
377   uint32_t c;
378   size_t base, t_min, t_max, skew, damp, bias, i;
379   size_t delta, w, k, t;
380 
381   if (rdm->errored || rdm->skipping_printing)
382     return;
383 
384   if (rdm->version == -1)
385     {
386       /* Ignore leading underscores preceding escape sequences.
387          The mangler inserts an underscore to make sure the
388          identifier begins with a XID_Start character. */
389       if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
390           && ident.ascii[1] == '$')
391         {
392           ident.ascii++;
393           ident.ascii_len--;
394         }
395 
396       while (ident.ascii_len > 0)
397         {
398           /* Handle legacy escape sequences ("$...$", ".." or "."). */
399           if (ident.ascii[0] == '$')
400             {
401               unescaped
402                   = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
403               if (unescaped)
404                 print_str (rdm, &unescaped, 1);
405               else
406                 {
407                   /* Unexpected escape sequence, print the rest verbatim. */
408                   print_str (rdm, ident.ascii, ident.ascii_len);
409                   return;
410                 }
411             }
412           else if (ident.ascii[0] == '.')
413             {
414               if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
415                 {
416                   /* ".." becomes "::" */
417                   PRINT ("::");
418                   len = 2;
419                 }
420               else
421                 {
422                   PRINT (".");
423                   len = 1;
424                 }
425             }
426           else
427             {
428               /* Print everything before the next escape sequence, at once. */
429               for (len = 0; len < ident.ascii_len; len++)
430                 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
431                   break;
432 
433               print_str (rdm, ident.ascii, len);
434             }
435 
436           ident.ascii += len;
437           ident.ascii_len -= len;
438         }
439 
440       return;
441     }
442 
443   if (!ident.punycode)
444     {
445       print_str (rdm, ident.ascii, ident.ascii_len);
446       return;
447     }
448 
449   len = 0;
450   cap = 4;
451   while (cap < ident.ascii_len)
452     {
453       cap *= 2;
454       /* Check for overflows. */
455       if ((cap * 4) / 4 != cap)
456         {
457           rdm->errored = 1;
458           return;
459         }
460     }
461 
462   /* Store the output codepoints as groups of 4 UTF-8 bytes. */
463   out = (uint8_t *)malloc (cap * 4);
464   if (!out)
465     {
466       rdm->errored = 1;
467       return;
468     }
469 
470   /* Populate initial output from ASCII fragment. */
471   for (len = 0; len < ident.ascii_len; len++)
472     {
473       p = out + 4 * len;
474       p[0] = 0;
475       p[1] = 0;
476       p[2] = 0;
477       p[3] = ident.ascii[len];
478     }
479 
480   /* Punycode parameters and initial state. */
481   base = 36;
482   t_min = 1;
483   t_max = 26;
484   skew = 38;
485   damp = 700;
486   bias = 72;
487   i = 0;
488   c = 0x80;
489 
490   punycode_pos = 0;
491   while (punycode_pos < ident.punycode_len)
492     {
493       /* Read one delta value. */
494       delta = 0;
495       w = 1;
496       k = 0;
497       do
498         {
499           k += base;
500           t = k < bias ? 0 : (k - bias);
501           if (t < t_min)
502             t = t_min;
503           if (t > t_max)
504             t = t_max;
505 
506           if (punycode_pos >= ident.punycode_len)
507             goto cleanup;
508           d = ident.punycode[punycode_pos++];
509 
510           if (ISLOWER (d))
511             d = d - 'a';
512           else if (ISDIGIT (d))
513             d = 26 + (d - '0');
514           else
515             {
516               rdm->errored = 1;
517               goto cleanup;
518             }
519 
520           delta += d * w;
521           w *= base - t;
522         }
523       while (d >= t);
524 
525       /* Compute the new insert position and character. */
526       len++;
527       i += delta;
528       c += i / len;
529       i %= len;
530 
531       /* Ensure enough space is available. */
532       if (cap < len)
533         {
534           cap *= 2;
535           /* Check for overflows. */
536           if ((cap * 4) / 4 != cap || cap < len)
537             {
538               rdm->errored = 1;
539               goto cleanup;
540             }
541         }
542       p = (uint8_t *)realloc (out, cap * 4);
543       if (!p)
544         {
545           rdm->errored = 1;
546           goto cleanup;
547         }
548       out = p;
549 
550       /* Move the characters after the insert position. */
551       p = out + i * 4;
552       memmove (p + 4, p, (len - i - 1) * 4);
553 
554       /* Insert the new character, as UTF-8 bytes. */
555       p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
556       p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
557       p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
558       p[3] = 0x80 | (c & 0x3f);
559 
560       /* If there are no more deltas, decoding is complete. */
561       if (punycode_pos == ident.punycode_len)
562         break;
563 
564       i++;
565 
566       /* Perform bias adaptation. */
567       delta /= damp;
568       damp = 2;
569 
570       delta += delta / len;
571       k = 0;
572       while (delta > ((base - t_min) * t_max) / 2)
573         {
574           delta /= base - t_min;
575           k += base;
576         }
577       bias = k + ((base - t_min + 1) * delta) / (delta + skew);
578     }
579 
580   /* Remove all the 0 bytes to leave behind an UTF-8 string. */
581   for (i = 0, j = 0; i < len * 4; i++)
582     if (out[i] != 0)
583       out[j++] = out[i];
584 
585   print_str (rdm, (const char *)out, j);
586 
587 cleanup:
588   free (out);
589 }
590 
591 /* Print the lifetime according to the previously decoded index.
592    An index of `0` always refers to `'_`, but starting with `1`,
593    indices refer to late-bound lifetimes introduced by a binder. */
594 static void
print_lifetime_from_index(struct rust_demangler * rdm,uint64_t lt)595 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
596 {
597   char c;
598   uint64_t depth;
599 
600   PRINT ("'");
601   if (lt == 0)
602     {
603       PRINT ("_");
604       return;
605     }
606 
607   depth = rdm->bound_lifetime_depth - lt;
608   /* Try to print lifetimes alphabetically first. */
609   if (depth < 26)
610     {
611       c = 'a' + depth;
612       print_str (rdm, &c, 1);
613     }
614   else
615     {
616       /* Use `'_123` after running out of letters. */
617       PRINT ("_");
618       print_uint64 (rdm, depth);
619     }
620 }
621 
622 /* Demangling functions. */
623 
624 static void demangle_binder (struct rust_demangler *rdm);
625 static void demangle_path (struct rust_demangler *rdm, int in_value);
626 static void demangle_generic_arg (struct rust_demangler *rdm);
627 static void demangle_type (struct rust_demangler *rdm);
628 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
629 static void demangle_dyn_trait (struct rust_demangler *rdm);
630 static void demangle_const (struct rust_demangler *rdm);
631 static void demangle_const_uint (struct rust_demangler *rdm);
632 static void demangle_const_int (struct rust_demangler *rdm);
633 static void demangle_const_bool (struct rust_demangler *rdm);
634 static void demangle_const_char (struct rust_demangler *rdm);
635 
636 /* Optionally enter a binder ('G') for late-bound lifetimes,
637    printing e.g. `for<'a, 'b> `, and make those lifetimes visible
638    to the caller (via depth level, which the caller should reset). */
639 static void
demangle_binder(struct rust_demangler * rdm)640 demangle_binder (struct rust_demangler *rdm)
641 {
642   uint64_t i, bound_lifetimes;
643 
644   if (rdm->errored)
645     return;
646 
647   bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
648   if (bound_lifetimes > 0)
649     {
650       PRINT ("for<");
651       for (i = 0; i < bound_lifetimes; i++)
652         {
653           if (i > 0)
654             PRINT (", ");
655           rdm->bound_lifetime_depth++;
656           print_lifetime_from_index (rdm, 1);
657         }
658       PRINT ("> ");
659     }
660 }
661 
662 static void
demangle_path(struct rust_demangler * rdm,int in_value)663 demangle_path (struct rust_demangler *rdm, int in_value)
664 {
665   char tag, ns;
666   int was_skipping_printing;
667   size_t i, backref, old_next;
668   uint64_t dis;
669   struct rust_mangled_ident name;
670 
671   if (rdm->errored)
672     return;
673 
674   switch (tag = next (rdm))
675     {
676     case 'C':
677       dis = parse_disambiguator (rdm);
678       name = parse_ident (rdm);
679 
680       print_ident (rdm, name);
681       if (rdm->verbose)
682         {
683           PRINT ("[");
684           print_uint64_hex (rdm, dis);
685           PRINT ("]");
686         }
687       break;
688     case 'N':
689       ns = next (rdm);
690       if (!ISLOWER (ns) && !ISUPPER (ns))
691         {
692           rdm->errored = 1;
693           return;
694         }
695 
696       demangle_path (rdm, in_value);
697 
698       dis = parse_disambiguator (rdm);
699       name = parse_ident (rdm);
700 
701       if (ISUPPER (ns))
702         {
703           /* Special namespaces, like closures and shims. */
704           PRINT ("::{");
705           switch (ns)
706             {
707             case 'C':
708               PRINT ("closure");
709               break;
710             case 'S':
711               PRINT ("shim");
712               break;
713             default:
714               print_str (rdm, &ns, 1);
715             }
716           if (name.ascii || name.punycode)
717             {
718               PRINT (":");
719               print_ident (rdm, name);
720             }
721           PRINT ("#");
722           print_uint64 (rdm, dis);
723           PRINT ("}");
724         }
725       else
726         {
727           /* Implementation-specific/unspecified namespaces. */
728 
729           if (name.ascii || name.punycode)
730             {
731               PRINT ("::");
732               print_ident (rdm, name);
733             }
734         }
735       break;
736     case 'M':
737     case 'X':
738       /* Ignore the `impl`'s own path.*/
739       parse_disambiguator (rdm);
740       was_skipping_printing = rdm->skipping_printing;
741       rdm->skipping_printing = 1;
742       demangle_path (rdm, in_value);
743       rdm->skipping_printing = was_skipping_printing;
744       /* fallthrough */
745     case 'Y':
746       PRINT ("<");
747       demangle_type (rdm);
748       if (tag != 'M')
749         {
750           PRINT (" as ");
751           demangle_path (rdm, 0);
752         }
753       PRINT (">");
754       break;
755     case 'I':
756       demangle_path (rdm, in_value);
757       if (in_value)
758         PRINT ("::");
759       PRINT ("<");
760       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
761         {
762           if (i > 0)
763             PRINT (", ");
764           demangle_generic_arg (rdm);
765         }
766       PRINT (">");
767       break;
768     case 'B':
769       backref = parse_integer_62 (rdm);
770       if (!rdm->skipping_printing)
771         {
772           old_next = rdm->next;
773           rdm->next = backref;
774           demangle_path (rdm, in_value);
775           rdm->next = old_next;
776         }
777       break;
778     default:
779       rdm->errored = 1;
780       return;
781     }
782 }
783 
784 static void
demangle_generic_arg(struct rust_demangler * rdm)785 demangle_generic_arg (struct rust_demangler *rdm)
786 {
787   uint64_t lt;
788   if (eat (rdm, 'L'))
789     {
790       lt = parse_integer_62 (rdm);
791       print_lifetime_from_index (rdm, lt);
792     }
793   else if (eat (rdm, 'K'))
794     demangle_const (rdm);
795   else
796     demangle_type (rdm);
797 }
798 
799 static const char *
basic_type(char tag)800 basic_type (char tag)
801 {
802   switch (tag)
803     {
804     case 'b':
805       return "bool";
806     case 'c':
807       return "char";
808     case 'e':
809       return "str";
810     case 'u':
811       return "()";
812     case 'a':
813       return "i8";
814     case 's':
815       return "i16";
816     case 'l':
817       return "i32";
818     case 'x':
819       return "i64";
820     case 'n':
821       return "i128";
822     case 'i':
823       return "isize";
824     case 'h':
825       return "u8";
826     case 't':
827       return "u16";
828     case 'm':
829       return "u32";
830     case 'y':
831       return "u64";
832     case 'o':
833       return "u128";
834     case 'j':
835       return "usize";
836     case 'f':
837       return "f32";
838     case 'd':
839       return "f64";
840     case 'z':
841       return "!";
842     case 'p':
843       return "_";
844     case 'v':
845       return "...";
846 
847     default:
848       return NULL;
849     }
850 }
851 
852 static void
demangle_type(struct rust_demangler * rdm)853 demangle_type (struct rust_demangler *rdm)
854 {
855   char tag;
856   size_t i, old_next, backref;
857   uint64_t lt, old_bound_lifetime_depth;
858   const char *basic;
859   struct rust_mangled_ident abi;
860 
861   if (rdm->errored)
862     return;
863 
864   tag = next (rdm);
865 
866   basic = basic_type (tag);
867   if (basic)
868     {
869       PRINT (basic);
870       return;
871     }
872 
873   switch (tag)
874     {
875     case 'R':
876     case 'Q':
877       PRINT ("&");
878       if (eat (rdm, 'L'))
879         {
880           lt = parse_integer_62 (rdm);
881           if (lt)
882             {
883               print_lifetime_from_index (rdm, lt);
884               PRINT (" ");
885             }
886         }
887       if (tag != 'R')
888         PRINT ("mut ");
889       demangle_type (rdm);
890       break;
891     case 'P':
892     case 'O':
893       PRINT ("*");
894       if (tag != 'P')
895         PRINT ("mut ");
896       else
897         PRINT ("const ");
898       demangle_type (rdm);
899       break;
900     case 'A':
901     case 'S':
902       PRINT ("[");
903       demangle_type (rdm);
904       if (tag == 'A')
905         {
906           PRINT ("; ");
907           demangle_const (rdm);
908         }
909       PRINT ("]");
910       break;
911     case 'T':
912       PRINT ("(");
913       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
914         {
915           if (i > 0)
916             PRINT (", ");
917           demangle_type (rdm);
918         }
919       if (i == 1)
920         PRINT (",");
921       PRINT (")");
922       break;
923     case 'F':
924       old_bound_lifetime_depth = rdm->bound_lifetime_depth;
925       demangle_binder (rdm);
926 
927       if (eat (rdm, 'U'))
928         PRINT ("unsafe ");
929 
930       if (eat (rdm, 'K'))
931         {
932           if (eat (rdm, 'C'))
933             {
934               abi.ascii = "C";
935               abi.ascii_len = 1;
936             }
937           else
938             {
939               abi = parse_ident (rdm);
940               if (!abi.ascii || abi.punycode)
941                 {
942                   rdm->errored = 1;
943                   goto restore;
944                 }
945             }
946 
947           PRINT ("extern \"");
948 
949           /* If the ABI had any `-`, they were replaced with `_`,
950              so the parts between `_` have to be re-joined with `-`. */
951           for (i = 0; i < abi.ascii_len; i++)
952             {
953               if (abi.ascii[i] == '_')
954                 {
955                   print_str (rdm, abi.ascii, i);
956                   PRINT ("-");
957                   abi.ascii += i + 1;
958                   abi.ascii_len -= i + 1;
959                   i = 0;
960                 }
961             }
962           print_str (rdm, abi.ascii, abi.ascii_len);
963 
964           PRINT ("\" ");
965         }
966 
967       PRINT ("fn(");
968       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
969         {
970           if (i > 0)
971             PRINT (", ");
972           demangle_type (rdm);
973         }
974       PRINT (")");
975 
976       if (eat (rdm, 'u'))
977         {
978           /* Skip printing the return type if it's 'u', i.e. `()`. */
979         }
980       else
981         {
982           PRINT (" -> ");
983           demangle_type (rdm);
984         }
985 
986     /* Restore `bound_lifetime_depth` to outside the binder. */
987     restore:
988       rdm->bound_lifetime_depth = old_bound_lifetime_depth;
989       break;
990     case 'D':
991       PRINT ("dyn ");
992 
993       old_bound_lifetime_depth = rdm->bound_lifetime_depth;
994       demangle_binder (rdm);
995 
996       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
997         {
998           if (i > 0)
999             PRINT (" + ");
1000           demangle_dyn_trait (rdm);
1001         }
1002 
1003       /* Restore `bound_lifetime_depth` to outside the binder. */
1004       rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1005 
1006       if (!eat (rdm, 'L'))
1007         {
1008           rdm->errored = 1;
1009           return;
1010         }
1011       lt = parse_integer_62 (rdm);
1012       if (lt)
1013         {
1014           PRINT (" + ");
1015           print_lifetime_from_index (rdm, lt);
1016         }
1017       break;
1018     case 'B':
1019       backref = parse_integer_62 (rdm);
1020       if (!rdm->skipping_printing)
1021         {
1022           old_next = rdm->next;
1023           rdm->next = backref;
1024           demangle_type (rdm);
1025           rdm->next = old_next;
1026         }
1027       break;
1028     default:
1029       /* Go back to the tag, so `demangle_path` also sees it. */
1030       rdm->next--;
1031       demangle_path (rdm, 0);
1032     }
1033 }
1034 
1035 /* A trait in a trait object may have some "existential projections"
1036    (i.e. associated type bindings) after it, which should be printed
1037    in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1038    To this end, this method will keep the `<...>` of an 'I' path
1039    open, by omitting the `>`, and return `Ok(true)` in that case. */
1040 static int
demangle_path_maybe_open_generics(struct rust_demangler * rdm)1041 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1042 {
1043   int open;
1044   size_t i, old_next, backref;
1045 
1046   open = 0;
1047 
1048   if (rdm->errored)
1049     return open;
1050 
1051   if (eat (rdm, 'B'))
1052     {
1053       backref = parse_integer_62 (rdm);
1054       if (!rdm->skipping_printing)
1055         {
1056           old_next = rdm->next;
1057           rdm->next = backref;
1058           open = demangle_path_maybe_open_generics (rdm);
1059           rdm->next = old_next;
1060         }
1061     }
1062   else if (eat (rdm, 'I'))
1063     {
1064       demangle_path (rdm, 0);
1065       PRINT ("<");
1066       open = 1;
1067       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1068         {
1069           if (i > 0)
1070             PRINT (", ");
1071           demangle_generic_arg (rdm);
1072         }
1073     }
1074   else
1075     demangle_path (rdm, 0);
1076   return open;
1077 }
1078 
1079 static void
demangle_dyn_trait(struct rust_demangler * rdm)1080 demangle_dyn_trait (struct rust_demangler *rdm)
1081 {
1082   int open;
1083   struct rust_mangled_ident name;
1084 
1085   if (rdm->errored)
1086     return;
1087 
1088   open = demangle_path_maybe_open_generics (rdm);
1089 
1090   while (eat (rdm, 'p'))
1091     {
1092       if (!open)
1093         PRINT ("<");
1094       else
1095         PRINT (", ");
1096       open = 1;
1097 
1098       name = parse_ident (rdm);
1099       print_ident (rdm, name);
1100       PRINT (" = ");
1101       demangle_type (rdm);
1102     }
1103 
1104   if (open)
1105     PRINT (">");
1106 }
1107 
1108 static void
demangle_const(struct rust_demangler * rdm)1109 demangle_const (struct rust_demangler *rdm)
1110 {
1111   char ty_tag;
1112   size_t old_next, backref;
1113 
1114   if (rdm->errored)
1115     return;
1116 
1117   if (eat (rdm, 'B'))
1118     {
1119       backref = parse_integer_62 (rdm);
1120       if (!rdm->skipping_printing)
1121         {
1122           old_next = rdm->next;
1123           rdm->next = backref;
1124           demangle_const (rdm);
1125           rdm->next = old_next;
1126         }
1127       return;
1128     }
1129 
1130   ty_tag = next (rdm);
1131   switch (ty_tag)
1132     {
1133     /* Placeholder. */
1134     case 'p':
1135       PRINT ("_");
1136       return;
1137 
1138     /* Unsigned integer types. */
1139     case 'h':
1140     case 't':
1141     case 'm':
1142     case 'y':
1143     case 'o':
1144     case 'j':
1145       demangle_const_uint (rdm);
1146       break;
1147 
1148     /* Signed integer types. */
1149     case 'a':
1150     case 's':
1151     case 'l':
1152     case 'x':
1153     case 'n':
1154     case 'i':
1155       demangle_const_int (rdm);
1156       break;
1157 
1158     /* Boolean. */
1159     case 'b':
1160       demangle_const_bool (rdm);
1161       break;
1162 
1163     /* Character. */
1164     case 'c':
1165       demangle_const_char (rdm);
1166       break;
1167 
1168     default:
1169       rdm->errored = 1;
1170       return;
1171     }
1172 
1173   if (rdm->errored)
1174     return;
1175 
1176   if (rdm->verbose)
1177     {
1178       PRINT (": ");
1179       PRINT (basic_type (ty_tag));
1180     }
1181 }
1182 
1183 static void
demangle_const_uint(struct rust_demangler * rdm)1184 demangle_const_uint (struct rust_demangler *rdm)
1185 {
1186   size_t hex_len;
1187   uint64_t value;
1188 
1189   if (rdm->errored)
1190     return;
1191 
1192   hex_len = parse_hex_nibbles (rdm, &value);
1193 
1194   if (hex_len > 16)
1195     {
1196       /* Print anything that doesn't fit in `uint64_t` verbatim. */
1197       PRINT ("0x");
1198       print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1199     }
1200   else if (hex_len > 0)
1201     print_uint64 (rdm, value);
1202   else
1203     rdm->errored = 1;
1204 }
1205 
1206 static void
demangle_const_int(struct rust_demangler * rdm)1207 demangle_const_int (struct rust_demangler *rdm)
1208 {
1209   if (eat (rdm, 'n'))
1210     PRINT ("-");
1211   demangle_const_uint (rdm);
1212 }
1213 
1214 static void
demangle_const_bool(struct rust_demangler * rdm)1215 demangle_const_bool (struct rust_demangler *rdm)
1216 {
1217   uint64_t value;
1218 
1219   if (parse_hex_nibbles (rdm, &value) != 1)
1220     {
1221       rdm->errored = 1;
1222       return;
1223     }
1224 
1225   if (value == 0)
1226     PRINT ("false");
1227   else if (value == 1)
1228     PRINT ("true");
1229   else
1230     rdm->errored = 1;
1231 }
1232 
1233 static void
demangle_const_char(struct rust_demangler * rdm)1234 demangle_const_char (struct rust_demangler *rdm)
1235 {
1236   size_t hex_len;
1237   uint64_t value;
1238 
1239   hex_len = parse_hex_nibbles (rdm, &value);
1240 
1241   if (hex_len == 0 || hex_len > 8)
1242     {
1243       rdm->errored = 1;
1244       return;
1245     }
1246 
1247   /* Match Rust's character "debug" output as best as we can. */
1248   PRINT ("'");
1249   if (value == '\t')
1250     PRINT ("\\t");
1251   else if (value == '\r')
1252     PRINT ("\\r");
1253   else if (value == '\n')
1254     PRINT ("\\n");
1255   else if (value > ' ' && value < '~')
1256     {
1257       /* Rust also considers many non-ASCII codepoints to be printable, but
1258 	 that logic is not easily ported to C. */
1259       char c = value;
1260       print_str (rdm, &c, 1);
1261     }
1262   else
1263     {
1264       PRINT ("\\u{");
1265       print_uint64_hex (rdm, value);
1266       PRINT ("}");
1267     }
1268   PRINT ("'");
1269 }
1270 
1271 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1272    The hex digits must contain at least 5 distinct digits. */
1273 static int
is_legacy_prefixed_hash(struct rust_mangled_ident ident)1274 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1275 {
1276   uint16_t seen;
1277   int nibble;
1278   size_t i, count;
1279 
1280   if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1281     return 0;
1282 
1283   seen = 0;
1284   for (i = 0; i < 16; i++)
1285     {
1286       nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1287       if (nibble < 0)
1288         return 0;
1289       seen |= (uint16_t)1 << nibble;
1290     }
1291 
1292   /* Count how many distinct digits were seen. */
1293   count = 0;
1294   while (seen)
1295     {
1296       if (seen & 1)
1297         count++;
1298       seen >>= 1;
1299     }
1300 
1301   return count >= 5;
1302 }
1303 
1304 int
rust_demangle_callback(const char * mangled,int options,demangle_callbackref callback,void * opaque)1305 rust_demangle_callback (const char *mangled, int options,
1306                         demangle_callbackref callback, void *opaque)
1307 {
1308   const char *p;
1309   struct rust_demangler rdm;
1310   struct rust_mangled_ident ident;
1311 
1312   rdm.sym = mangled;
1313   rdm.sym_len = 0;
1314 
1315   rdm.callback_opaque = opaque;
1316   rdm.callback = callback;
1317 
1318   rdm.next = 0;
1319   rdm.errored = 0;
1320   rdm.skipping_printing = 0;
1321   rdm.verbose = (options & DMGL_VERBOSE) != 0;
1322   rdm.version = 0;
1323   rdm.bound_lifetime_depth = 0;
1324 
1325   /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1326   if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1327     rdm.sym += 2;
1328   else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1329     {
1330       rdm.sym += 3;
1331       rdm.version = -1;
1332     }
1333   else
1334     return 0;
1335 
1336   /* Paths (v0) always start with uppercase characters. */
1337   if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1338     return 0;
1339 
1340   /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1341   for (p = rdm.sym; *p; p++)
1342     {
1343       rdm.sym_len++;
1344 
1345       if (*p == '_' || ISALNUM (*p))
1346         continue;
1347 
1348       /* Legacy Rust symbols can also contain [.:$] characters. */
1349       if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
1350         continue;
1351 
1352       return 0;
1353     }
1354 
1355   /* Legacy Rust symbols need to be handled separately. */
1356   if (rdm.version == -1)
1357     {
1358       /* Legacy Rust symbols always end with E. */
1359       if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1360         return 0;
1361       rdm.sym_len--;
1362 
1363       /* Legacy Rust symbols also always end with a path segment
1364          that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1365          This early check, before any parse_ident calls, should
1366          quickly filter out most C++ symbols unrelated to Rust. */
1367       if (!(rdm.sym_len > 19
1368             && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1369         return 0;
1370 
1371       do
1372         {
1373           ident = parse_ident (&rdm);
1374           if (rdm.errored || !ident.ascii)
1375             return 0;
1376         }
1377       while (rdm.next < rdm.sym_len);
1378 
1379       /* The last path segment should be the hash. */
1380       if (!is_legacy_prefixed_hash (ident))
1381         return 0;
1382 
1383       /* Reset the state for a second pass, to print the symbol. */
1384       rdm.next = 0;
1385       if (!rdm.verbose && rdm.sym_len > 19)
1386         {
1387           /* Hide the last segment, containing the hash, if not verbose. */
1388           rdm.sym_len -= 19;
1389         }
1390 
1391       do
1392         {
1393           if (rdm.next > 0)
1394             print_str (&rdm, "::", 2);
1395 
1396           ident = parse_ident (&rdm);
1397           print_ident (&rdm, ident);
1398         }
1399       while (rdm.next < rdm.sym_len);
1400     }
1401   else
1402     {
1403       demangle_path (&rdm, 1);
1404 
1405       /* Skip instantiating crate. */
1406       if (!rdm.errored && rdm.next < rdm.sym_len)
1407         {
1408           rdm.skipping_printing = 1;
1409           demangle_path (&rdm, 0);
1410         }
1411 
1412       /* It's an error to not reach the end. */
1413       rdm.errored |= rdm.next != rdm.sym_len;
1414     }
1415 
1416   return !rdm.errored;
1417 }
1418 
1419 /* Growable string buffers. */
1420 struct str_buf
1421 {
1422   char *ptr;
1423   size_t len;
1424   size_t cap;
1425   int errored;
1426 };
1427 
1428 static void
str_buf_reserve(struct str_buf * buf,size_t extra)1429 str_buf_reserve (struct str_buf *buf, size_t extra)
1430 {
1431   size_t available, min_new_cap, new_cap;
1432   char *new_ptr;
1433 
1434   /* Allocation failed before. */
1435   if (buf->errored)
1436     return;
1437 
1438   available = buf->cap - buf->len;
1439 
1440   if (extra <= available)
1441     return;
1442 
1443   min_new_cap = buf->cap + (extra - available);
1444 
1445   /* Check for overflows. */
1446   if (min_new_cap < buf->cap)
1447     {
1448       buf->errored = 1;
1449       return;
1450     }
1451 
1452   new_cap = buf->cap;
1453 
1454   if (new_cap == 0)
1455     new_cap = 4;
1456 
1457   /* Double capacity until sufficiently large. */
1458   while (new_cap < min_new_cap)
1459     {
1460       new_cap *= 2;
1461 
1462       /* Check for overflows. */
1463       if (new_cap < buf->cap)
1464         {
1465           buf->errored = 1;
1466           return;
1467         }
1468     }
1469 
1470   new_ptr = (char *)realloc (buf->ptr, new_cap);
1471   if (new_ptr == NULL)
1472     {
1473       free (buf->ptr);
1474       buf->ptr = NULL;
1475       buf->len = 0;
1476       buf->cap = 0;
1477       buf->errored = 1;
1478     }
1479   else
1480     {
1481       buf->ptr = new_ptr;
1482       buf->cap = new_cap;
1483     }
1484 }
1485 
1486 static void
str_buf_append(struct str_buf * buf,const char * data,size_t len)1487 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1488 {
1489   str_buf_reserve (buf, len);
1490   if (buf->errored)
1491     return;
1492 
1493   memcpy (buf->ptr + buf->len, data, len);
1494   buf->len += len;
1495 }
1496 
1497 static void
str_buf_demangle_callback(const char * data,size_t len,void * opaque)1498 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1499 {
1500   str_buf_append ((struct str_buf *)opaque, data, len);
1501 }
1502 
1503 char *
rust_demangle(const char * mangled,int options)1504 rust_demangle (const char *mangled, int options)
1505 {
1506   struct str_buf out;
1507   int success;
1508 
1509   out.ptr = NULL;
1510   out.len = 0;
1511   out.cap = 0;
1512   out.errored = 0;
1513 
1514   success = rust_demangle_callback (mangled, options,
1515                                     str_buf_demangle_callback, &out);
1516 
1517   if (!success)
1518     {
1519       free (out.ptr);
1520       return NULL;
1521     }
1522 
1523   str_buf_append (&out, "\0", 1);
1524   return out.ptr;
1525 }
1526