1 /* Demangler for the Rust programming language 2 Copyright (C) 2016-2020 Free Software Foundation, Inc. 3 Written by David Tolnay (dtolnay@gmail.com). 4 5 This file is part of the libiberty library. 6 Libiberty is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Library General Public 8 License as published by the Free Software Foundation; either 9 version 2 of the License, or (at your option) any later version. 10 11 In addition to the permissions in the GNU Library General Public 12 License, the Free Software Foundation gives you unlimited permission 13 to link the compiled version of this file into combinations with other 14 programs, and to distribute those combinations without any restriction 15 coming from the use of this file. (The Library Public License 16 restrictions do apply in other respects; for example, they cover 17 modification of the file, and distribution when not linked into a 18 combined executable.) 19 20 Libiberty is distributed in the hope that it will be useful, 21 but WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 Library General Public License for more details. 24 25 You should have received a copy of the GNU Library General Public 26 License along with libiberty; see the file COPYING.LIB. 27 If not, see <http://www.gnu.org/licenses/>. */ 28 29 30 #ifdef HAVE_CONFIG_H 31 #include "config.h" 32 #endif 33 34 #include "safe-ctype.h" 35 36 #include <inttypes.h> 37 #include <sys/types.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 42 #ifdef HAVE_STRING_H 43 #include <string.h> 44 #else 45 extern size_t strlen(const char *s); 46 extern int strncmp(const char *s1, const char *s2, size_t n); 47 extern void *memset(void *s, int c, size_t n); 48 #endif 49 50 #include <demangle.h> 51 #include "libiberty.h" 52 53 struct rust_demangler 54 { 55 const char *sym; 56 size_t sym_len; 57 58 void *callback_opaque; 59 demangle_callbackref callback; 60 61 /* Position of the next character to read from the symbol. */ 62 size_t next; 63 64 /* Non-zero if any error occurred. */ 65 int errored; 66 67 /* Non-zero if printing should be verbose (e.g. include hashes). */ 68 int verbose; 69 70 /* Rust mangling version, with legacy mangling being -1. */ 71 int version; 72 }; 73 74 /* Parsing functions. */ 75 76 static char 77 peek (const struct rust_demangler *rdm) 78 { 79 if (rdm->next < rdm->sym_len) 80 return rdm->sym[rdm->next]; 81 return 0; 82 } 83 84 static char 85 next (struct rust_demangler *rdm) 86 { 87 char c = peek (rdm); 88 if (!c) 89 rdm->errored = 1; 90 else 91 rdm->next++; 92 return c; 93 } 94 95 struct rust_mangled_ident 96 { 97 /* ASCII part of the identifier. */ 98 const char *ascii; 99 size_t ascii_len; 100 }; 101 102 static struct rust_mangled_ident 103 parse_ident (struct rust_demangler *rdm) 104 { 105 char c; 106 size_t start, len; 107 struct rust_mangled_ident ident; 108 109 ident.ascii = NULL; 110 ident.ascii_len = 0; 111 112 c = next (rdm); 113 if (!ISDIGIT (c)) 114 { 115 rdm->errored = 1; 116 return ident; 117 } 118 len = c - '0'; 119 120 if (c != '0') 121 while (ISDIGIT (peek (rdm))) 122 len = len * 10 + (next (rdm) - '0'); 123 124 start = rdm->next; 125 rdm->next += len; 126 /* Check for overflows. */ 127 if ((start > rdm->next) || (rdm->next > rdm->sym_len)) 128 { 129 rdm->errored = 1; 130 return ident; 131 } 132 133 ident.ascii = rdm->sym + start; 134 ident.ascii_len = len; 135 136 if (ident.ascii_len == 0) 137 ident.ascii = NULL; 138 139 return ident; 140 } 141 142 /* Printing functions. */ 143 144 static void 145 print_str (struct rust_demangler *rdm, const char *data, size_t len) 146 { 147 if (!rdm->errored) 148 rdm->callback (data, len, rdm->callback_opaque); 149 } 150 151 #define PRINT(s) print_str (rdm, s, strlen (s)) 152 153 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */ 154 static int 155 decode_lower_hex_nibble (char nibble) 156 { 157 if ('0' <= nibble && nibble <= '9') 158 return nibble - '0'; 159 if ('a' <= nibble && nibble <= 'f') 160 return 0xa + (nibble - 'a'); 161 return -1; 162 } 163 164 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */ 165 static char 166 decode_legacy_escape (const char *e, size_t len, size_t *out_len) 167 { 168 char c = 0; 169 size_t escape_len = 0; 170 int lo_nibble = -1, hi_nibble = -1; 171 172 if (len < 3 || e[0] != '$') 173 return 0; 174 175 e++; 176 len--; 177 178 if (e[0] == 'C') 179 { 180 escape_len = 1; 181 182 c = ','; 183 } 184 else if (len > 2) 185 { 186 escape_len = 2; 187 188 if (e[0] == 'S' && e[1] == 'P') 189 c = '@'; 190 else if (e[0] == 'B' && e[1] == 'P') 191 c = '*'; 192 else if (e[0] == 'R' && e[1] == 'F') 193 c = '&'; 194 else if (e[0] == 'L' && e[1] == 'T') 195 c = '<'; 196 else if (e[0] == 'G' && e[1] == 'T') 197 c = '>'; 198 else if (e[0] == 'L' && e[1] == 'P') 199 c = '('; 200 else if (e[0] == 'R' && e[1] == 'P') 201 c = ')'; 202 else if (e[0] == 'u' && len > 3) 203 { 204 escape_len = 3; 205 206 hi_nibble = decode_lower_hex_nibble (e[1]); 207 if (hi_nibble < 0) 208 return 0; 209 lo_nibble = decode_lower_hex_nibble (e[2]); 210 if (lo_nibble < 0) 211 return 0; 212 213 /* Only allow non-control ASCII characters. */ 214 if (hi_nibble > 7) 215 return 0; 216 c = (hi_nibble << 4) | lo_nibble; 217 if (c < 0x20) 218 return 0; 219 } 220 } 221 222 if (!c || len <= escape_len || e[escape_len] != '$') 223 return 0; 224 225 *out_len = 2 + escape_len; 226 return c; 227 } 228 229 static void 230 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) 231 { 232 char unescaped; 233 size_t len; 234 235 if (rdm->errored) 236 return; 237 238 if (rdm->version == -1) 239 { 240 /* Ignore leading underscores preceding escape sequences. 241 The mangler inserts an underscore to make sure the 242 identifier begins with a XID_Start character. */ 243 if (ident.ascii_len >= 2 && ident.ascii[0] == '_' 244 && ident.ascii[1] == '$') 245 { 246 ident.ascii++; 247 ident.ascii_len--; 248 } 249 250 while (ident.ascii_len > 0) 251 { 252 /* Handle legacy escape sequences ("$...$", ".." or "."). */ 253 if (ident.ascii[0] == '$') 254 { 255 unescaped 256 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len); 257 if (unescaped) 258 print_str (rdm, &unescaped, 1); 259 else 260 { 261 /* Unexpected escape sequence, print the rest verbatim. */ 262 print_str (rdm, ident.ascii, ident.ascii_len); 263 return; 264 } 265 } 266 else if (ident.ascii[0] == '.') 267 { 268 if (ident.ascii_len >= 2 && ident.ascii[1] == '.') 269 { 270 /* ".." becomes "::" */ 271 PRINT ("::"); 272 len = 2; 273 } 274 else 275 { 276 /* "." becomes "-" */ 277 PRINT ("-"); 278 len = 1; 279 } 280 } 281 else 282 { 283 /* Print everything before the next escape sequence, at once. */ 284 for (len = 0; len < ident.ascii_len; len++) 285 if (ident.ascii[len] == '$' || ident.ascii[len] == '.') 286 break; 287 288 print_str (rdm, ident.ascii, len); 289 } 290 291 ident.ascii += len; 292 ident.ascii_len -= len; 293 } 294 295 return; 296 } 297 } 298 299 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits. 300 The hex digits must contain at least 5 distinct digits. */ 301 static int 302 is_legacy_prefixed_hash (struct rust_mangled_ident ident) 303 { 304 uint16_t seen; 305 int nibble; 306 size_t i, count; 307 308 if (ident.ascii_len != 17 || ident.ascii[0] != 'h') 309 return 0; 310 311 seen = 0; 312 for (i = 0; i < 16; i++) 313 { 314 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]); 315 if (nibble < 0) 316 return 0; 317 seen |= (uint16_t)1 << nibble; 318 } 319 320 /* Count how many distinct digits were seen. */ 321 count = 0; 322 while (seen) 323 { 324 if (seen & 1) 325 count++; 326 seen >>= 1; 327 } 328 329 return count >= 5; 330 } 331 332 int 333 rust_demangle_callback (const char *mangled, int options, 334 demangle_callbackref callback, void *opaque) 335 { 336 const char *p; 337 struct rust_demangler rdm; 338 struct rust_mangled_ident ident; 339 340 rdm.sym = mangled; 341 rdm.sym_len = 0; 342 343 rdm.callback_opaque = opaque; 344 rdm.callback = callback; 345 346 rdm.next = 0; 347 rdm.errored = 0; 348 rdm.verbose = (options & DMGL_VERBOSE) != 0; 349 rdm.version = 0; 350 351 /* Rust symbols always start with _ZN (legacy). */ 352 if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N') 353 { 354 rdm.sym += 3; 355 rdm.version = -1; 356 } 357 else 358 return 0; 359 360 /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */ 361 for (p = rdm.sym; *p; p++) 362 { 363 rdm.sym_len++; 364 365 if (*p == '_' || ISALNUM (*p)) 366 continue; 367 368 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':')) 369 continue; 370 371 return 0; 372 } 373 374 /* Legacy Rust symbols need to be handled separately. */ 375 if (rdm.version == -1) 376 { 377 /* Legacy Rust symbols always end with E. */ 378 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E')) 379 return 0; 380 rdm.sym_len--; 381 382 /* Legacy Rust symbols also always end with a path segment 383 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'. 384 This early check, before any parse_ident calls, should 385 quickly filter out most C++ symbols unrelated to Rust. */ 386 if (!(rdm.sym_len > 19 387 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3))) 388 return 0; 389 390 do 391 { 392 ident = parse_ident (&rdm); 393 if (rdm.errored || !ident.ascii) 394 return 0; 395 } 396 while (rdm.next < rdm.sym_len); 397 398 /* The last path segment should be the hash. */ 399 if (!is_legacy_prefixed_hash (ident)) 400 return 0; 401 402 /* Reset the state for a second pass, to print the symbol. */ 403 rdm.next = 0; 404 if (!rdm.verbose && rdm.sym_len > 19) 405 { 406 /* Hide the last segment, containing the hash, if not verbose. */ 407 rdm.sym_len -= 19; 408 } 409 410 do 411 { 412 if (rdm.next > 0) 413 print_str (&rdm, "::", 2); 414 415 ident = parse_ident (&rdm); 416 print_ident (&rdm, ident); 417 } 418 while (rdm.next < rdm.sym_len); 419 } 420 else 421 return 0; 422 423 return !rdm.errored; 424 } 425 426 /* Growable string buffers. */ 427 struct str_buf 428 { 429 char *ptr; 430 size_t len; 431 size_t cap; 432 int errored; 433 }; 434 435 static void 436 str_buf_reserve (struct str_buf *buf, size_t extra) 437 { 438 size_t available, min_new_cap, new_cap; 439 char *new_ptr; 440 441 /* Allocation failed before. */ 442 if (buf->errored) 443 return; 444 445 available = buf->cap - buf->len; 446 447 if (extra <= available) 448 return; 449 450 min_new_cap = buf->cap + (extra - available); 451 452 /* Check for overflows. */ 453 if (min_new_cap < buf->cap) 454 { 455 buf->errored = 1; 456 return; 457 } 458 459 new_cap = buf->cap; 460 461 if (new_cap == 0) 462 new_cap = 4; 463 464 /* Double capacity until sufficiently large. */ 465 while (new_cap < min_new_cap) 466 { 467 new_cap *= 2; 468 469 /* Check for overflows. */ 470 if (new_cap < buf->cap) 471 { 472 buf->errored = 1; 473 return; 474 } 475 } 476 477 new_ptr = (char *)realloc (buf->ptr, new_cap); 478 if (new_ptr == NULL) 479 { 480 free (buf->ptr); 481 buf->ptr = NULL; 482 buf->len = 0; 483 buf->cap = 0; 484 buf->errored = 1; 485 } 486 else 487 { 488 buf->ptr = new_ptr; 489 buf->cap = new_cap; 490 } 491 } 492 493 static void 494 str_buf_append (struct str_buf *buf, const char *data, size_t len) 495 { 496 str_buf_reserve (buf, len); 497 if (buf->errored) 498 return; 499 500 memcpy (buf->ptr + buf->len, data, len); 501 buf->len += len; 502 } 503 504 static void 505 str_buf_demangle_callback (const char *data, size_t len, void *opaque) 506 { 507 str_buf_append ((struct str_buf *)opaque, data, len); 508 } 509 510 char * 511 rust_demangle (const char *mangled, int options) 512 { 513 struct str_buf out; 514 int success; 515 516 out.ptr = NULL; 517 out.len = 0; 518 out.cap = 0; 519 out.errored = 0; 520 521 success = rust_demangle_callback (mangled, options, 522 str_buf_demangle_callback, &out); 523 524 if (!success) 525 { 526 free (out.ptr); 527 return NULL; 528 } 529 530 str_buf_append (&out, "\0", 1); 531 return out.ptr; 532 } 533