1 /* Demangler for the Rust programming language
2    Copyright (C) 2016-2020 Free Software Foundation, Inc.
3    Written by David Tolnay (dtolnay@gmail.com).
4 
5 This file is part of the libiberty library.
6 Libiberty is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10 
11 In addition to the permissions in the GNU Library General Public
12 License, the Free Software Foundation gives you unlimited permission
13 to link the compiled version of this file into combinations with other
14 programs, and to distribute those combinations without any restriction
15 coming from the use of this file.  (The Library Public License
16 restrictions do apply in other respects; for example, they cover
17 modification of the file, and distribution when not linked into a
18 combined executable.)
19 
20 Libiberty is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23 Library General Public License for more details.
24 
25 You should have received a copy of the GNU Library General Public
26 License along with libiberty; see the file COPYING.LIB.
27 If not, see <http://www.gnu.org/licenses/>.  */
28 
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "safe-ctype.h"
35 
36 #include <inttypes.h>
37 #include <sys/types.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 
42 #ifdef HAVE_STRING_H
43 #include <string.h>
44 #else
45 extern size_t strlen(const char *s);
46 extern int strncmp(const char *s1, const char *s2, size_t n);
47 extern void *memset(void *s, int c, size_t n);
48 #endif
49 
50 #include <demangle.h>
51 #include "libiberty.h"
52 
53 struct rust_demangler
54 {
55   const char *sym;
56   size_t sym_len;
57 
58   void *callback_opaque;
59   demangle_callbackref callback;
60 
61   /* Position of the next character to read from the symbol. */
62   size_t next;
63 
64   /* Non-zero if any error occurred. */
65   int errored;
66 
67   /* Non-zero if printing should be verbose (e.g. include hashes). */
68   int verbose;
69 
70   /* Rust mangling version, with legacy mangling being -1. */
71   int version;
72 };
73 
74 /* Parsing functions. */
75 
76 static char
77 peek (const struct rust_demangler *rdm)
78 {
79   if (rdm->next < rdm->sym_len)
80     return rdm->sym[rdm->next];
81   return 0;
82 }
83 
84 static char
85 next (struct rust_demangler *rdm)
86 {
87   char c = peek (rdm);
88   if (!c)
89     rdm->errored = 1;
90   else
91     rdm->next++;
92   return c;
93 }
94 
95 struct rust_mangled_ident
96 {
97   /* ASCII part of the identifier. */
98   const char *ascii;
99   size_t ascii_len;
100 };
101 
102 static struct rust_mangled_ident
103 parse_ident (struct rust_demangler *rdm)
104 {
105   char c;
106   size_t start, len;
107   struct rust_mangled_ident ident;
108 
109   ident.ascii = NULL;
110   ident.ascii_len = 0;
111 
112   c = next (rdm);
113   if (!ISDIGIT (c))
114     {
115       rdm->errored = 1;
116       return ident;
117     }
118   len = c - '0';
119 
120   if (c != '0')
121     while (ISDIGIT (peek (rdm)))
122       len = len * 10 + (next (rdm) - '0');
123 
124   start = rdm->next;
125   rdm->next += len;
126   /* Check for overflows. */
127   if ((start > rdm->next) || (rdm->next > rdm->sym_len))
128     {
129       rdm->errored = 1;
130       return ident;
131     }
132 
133   ident.ascii = rdm->sym + start;
134   ident.ascii_len = len;
135 
136   if (ident.ascii_len == 0)
137     ident.ascii = NULL;
138 
139   return ident;
140 }
141 
142 /* Printing functions. */
143 
144 static void
145 print_str (struct rust_demangler *rdm, const char *data, size_t len)
146 {
147   if (!rdm->errored)
148     rdm->callback (data, len, rdm->callback_opaque);
149 }
150 
151 #define PRINT(s) print_str (rdm, s, strlen (s))
152 
153 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
154 static int
155 decode_lower_hex_nibble (char nibble)
156 {
157   if ('0' <= nibble && nibble <= '9')
158     return nibble - '0';
159   if ('a' <= nibble && nibble <= 'f')
160     return 0xa + (nibble - 'a');
161   return -1;
162 }
163 
164 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
165 static char
166 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
167 {
168   char c = 0;
169   size_t escape_len = 0;
170   int lo_nibble = -1, hi_nibble = -1;
171 
172   if (len < 3 || e[0] != '$')
173     return 0;
174 
175   e++;
176   len--;
177 
178   if (e[0] == 'C')
179     {
180       escape_len = 1;
181 
182       c = ',';
183     }
184   else if (len > 2)
185     {
186       escape_len = 2;
187 
188       if (e[0] == 'S' && e[1] == 'P')
189         c = '@';
190       else if (e[0] == 'B' && e[1] == 'P')
191         c = '*';
192       else if (e[0] == 'R' && e[1] == 'F')
193         c = '&';
194       else if (e[0] == 'L' && e[1] == 'T')
195         c = '<';
196       else if (e[0] == 'G' && e[1] == 'T')
197         c = '>';
198       else if (e[0] == 'L' && e[1] == 'P')
199         c = '(';
200       else if (e[0] == 'R' && e[1] == 'P')
201         c = ')';
202       else if (e[0] == 'u' && len > 3)
203         {
204           escape_len = 3;
205 
206           hi_nibble = decode_lower_hex_nibble (e[1]);
207           if (hi_nibble < 0)
208             return 0;
209           lo_nibble = decode_lower_hex_nibble (e[2]);
210           if (lo_nibble < 0)
211             return 0;
212 
213           /* Only allow non-control ASCII characters. */
214           if (hi_nibble > 7)
215             return 0;
216           c = (hi_nibble << 4) | lo_nibble;
217           if (c < 0x20)
218             return 0;
219         }
220     }
221 
222   if (!c || len <= escape_len || e[escape_len] != '$')
223     return 0;
224 
225   *out_len = 2 + escape_len;
226   return c;
227 }
228 
229 static void
230 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
231 {
232   char unescaped;
233   size_t len;
234 
235   if (rdm->errored)
236     return;
237 
238   if (rdm->version == -1)
239     {
240       /* Ignore leading underscores preceding escape sequences.
241          The mangler inserts an underscore to make sure the
242          identifier begins with a XID_Start character. */
243       if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
244           && ident.ascii[1] == '$')
245         {
246           ident.ascii++;
247           ident.ascii_len--;
248         }
249 
250       while (ident.ascii_len > 0)
251         {
252           /* Handle legacy escape sequences ("$...$", ".." or "."). */
253           if (ident.ascii[0] == '$')
254             {
255               unescaped
256                   = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
257               if (unescaped)
258                 print_str (rdm, &unescaped, 1);
259               else
260                 {
261                   /* Unexpected escape sequence, print the rest verbatim. */
262                   print_str (rdm, ident.ascii, ident.ascii_len);
263                   return;
264                 }
265             }
266           else if (ident.ascii[0] == '.')
267             {
268               if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
269                 {
270                   /* ".." becomes "::" */
271                   PRINT ("::");
272                   len = 2;
273                 }
274               else
275                 {
276                   /* "." becomes "-" */
277                   PRINT ("-");
278                   len = 1;
279                 }
280             }
281           else
282             {
283               /* Print everything before the next escape sequence, at once. */
284               for (len = 0; len < ident.ascii_len; len++)
285                 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
286                   break;
287 
288               print_str (rdm, ident.ascii, len);
289             }
290 
291           ident.ascii += len;
292           ident.ascii_len -= len;
293         }
294 
295       return;
296     }
297 }
298 
299 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
300    The hex digits must contain at least 5 distinct digits. */
301 static int
302 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
303 {
304   uint16_t seen;
305   int nibble;
306   size_t i, count;
307 
308   if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
309     return 0;
310 
311   seen = 0;
312   for (i = 0; i < 16; i++)
313     {
314       nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
315       if (nibble < 0)
316         return 0;
317       seen |= (uint16_t)1 << nibble;
318     }
319 
320   /* Count how many distinct digits were seen. */
321   count = 0;
322   while (seen)
323     {
324       if (seen & 1)
325         count++;
326       seen >>= 1;
327     }
328 
329   return count >= 5;
330 }
331 
332 int
333 rust_demangle_callback (const char *mangled, int options,
334                         demangle_callbackref callback, void *opaque)
335 {
336   const char *p;
337   struct rust_demangler rdm;
338   struct rust_mangled_ident ident;
339 
340   rdm.sym = mangled;
341   rdm.sym_len = 0;
342 
343   rdm.callback_opaque = opaque;
344   rdm.callback = callback;
345 
346   rdm.next = 0;
347   rdm.errored = 0;
348   rdm.verbose = (options & DMGL_VERBOSE) != 0;
349   rdm.version = 0;
350 
351   /* Rust symbols always start with _ZN (legacy). */
352   if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
353     {
354       rdm.sym += 3;
355       rdm.version = -1;
356     }
357   else
358     return 0;
359 
360   /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
361   for (p = rdm.sym; *p; p++)
362     {
363       rdm.sym_len++;
364 
365       if (*p == '_' || ISALNUM (*p))
366         continue;
367 
368       if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
369         continue;
370 
371       return 0;
372     }
373 
374   /* Legacy Rust symbols need to be handled separately. */
375   if (rdm.version == -1)
376     {
377       /* Legacy Rust symbols always end with E. */
378       if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
379         return 0;
380       rdm.sym_len--;
381 
382       /* Legacy Rust symbols also always end with a path segment
383          that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
384          This early check, before any parse_ident calls, should
385          quickly filter out most C++ symbols unrelated to Rust. */
386       if (!(rdm.sym_len > 19
387             && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
388         return 0;
389 
390       do
391         {
392           ident = parse_ident (&rdm);
393           if (rdm.errored || !ident.ascii)
394             return 0;
395         }
396       while (rdm.next < rdm.sym_len);
397 
398       /* The last path segment should be the hash. */
399       if (!is_legacy_prefixed_hash (ident))
400         return 0;
401 
402       /* Reset the state for a second pass, to print the symbol. */
403       rdm.next = 0;
404       if (!rdm.verbose && rdm.sym_len > 19)
405         {
406           /* Hide the last segment, containing the hash, if not verbose. */
407           rdm.sym_len -= 19;
408         }
409 
410       do
411         {
412           if (rdm.next > 0)
413             print_str (&rdm, "::", 2);
414 
415           ident = parse_ident (&rdm);
416           print_ident (&rdm, ident);
417         }
418       while (rdm.next < rdm.sym_len);
419     }
420   else
421     return 0;
422 
423   return !rdm.errored;
424 }
425 
426 /* Growable string buffers. */
427 struct str_buf
428 {
429   char *ptr;
430   size_t len;
431   size_t cap;
432   int errored;
433 };
434 
435 static void
436 str_buf_reserve (struct str_buf *buf, size_t extra)
437 {
438   size_t available, min_new_cap, new_cap;
439   char *new_ptr;
440 
441   /* Allocation failed before. */
442   if (buf->errored)
443     return;
444 
445   available = buf->cap - buf->len;
446 
447   if (extra <= available)
448     return;
449 
450   min_new_cap = buf->cap + (extra - available);
451 
452   /* Check for overflows. */
453   if (min_new_cap < buf->cap)
454     {
455       buf->errored = 1;
456       return;
457     }
458 
459   new_cap = buf->cap;
460 
461   if (new_cap == 0)
462     new_cap = 4;
463 
464   /* Double capacity until sufficiently large. */
465   while (new_cap < min_new_cap)
466     {
467       new_cap *= 2;
468 
469       /* Check for overflows. */
470       if (new_cap < buf->cap)
471         {
472           buf->errored = 1;
473           return;
474         }
475     }
476 
477   new_ptr = (char *)realloc (buf->ptr, new_cap);
478   if (new_ptr == NULL)
479     {
480       free (buf->ptr);
481       buf->ptr = NULL;
482       buf->len = 0;
483       buf->cap = 0;
484       buf->errored = 1;
485     }
486   else
487     {
488       buf->ptr = new_ptr;
489       buf->cap = new_cap;
490     }
491 }
492 
493 static void
494 str_buf_append (struct str_buf *buf, const char *data, size_t len)
495 {
496   str_buf_reserve (buf, len);
497   if (buf->errored)
498     return;
499 
500   memcpy (buf->ptr + buf->len, data, len);
501   buf->len += len;
502 }
503 
504 static void
505 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
506 {
507   str_buf_append ((struct str_buf *)opaque, data, len);
508 }
509 
510 char *
511 rust_demangle (const char *mangled, int options)
512 {
513   struct str_buf out;
514   int success;
515 
516   out.ptr = NULL;
517   out.len = 0;
518   out.cap = 0;
519   out.errored = 0;
520 
521   success = rust_demangle_callback (mangled, options,
522                                     str_buf_demangle_callback, &out);
523 
524   if (!success)
525     {
526       free (out.ptr);
527       return NULL;
528     }
529 
530   str_buf_append (&out, "\0", 1);
531   return out.ptr;
532 }
533