1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Satoru Takabayashi
31 //
32 // For reference check out:
33 // http://www.codesourcery.com/public/cxx-abi/abi.html#mangling
34 //
35 // Note that we only have partial C++0x support yet.
36 
37 #include <stdio.h>  // for NULL
38 #include "demangle.h"
39 
40 _START_GOOGLE_NAMESPACE_
41 
42 typedef struct {
43   const char *abbrev;
44   const char *real_name;
45 } AbbrevPair;
46 
47 // List of operators from Itanium C++ ABI.
48 static const AbbrevPair kOperatorList[] = {
49   { "nw", "new" },
50   { "na", "new[]" },
51   { "dl", "delete" },
52   { "da", "delete[]" },
53   { "ps", "+" },
54   { "ng", "-" },
55   { "ad", "&" },
56   { "de", "*" },
57   { "co", "~" },
58   { "pl", "+" },
59   { "mi", "-" },
60   { "ml", "*" },
61   { "dv", "/" },
62   { "rm", "%" },
63   { "an", "&" },
64   { "or", "|" },
65   { "eo", "^" },
66   { "aS", "=" },
67   { "pL", "+=" },
68   { "mI", "-=" },
69   { "mL", "*=" },
70   { "dV", "/=" },
71   { "rM", "%=" },
72   { "aN", "&=" },
73   { "oR", "|=" },
74   { "eO", "^=" },
75   { "ls", "<<" },
76   { "rs", ">>" },
77   { "lS", "<<=" },
78   { "rS", ">>=" },
79   { "eq", "==" },
80   { "ne", "!=" },
81   { "lt", "<" },
82   { "gt", ">" },
83   { "le", "<=" },
84   { "ge", ">=" },
85   { "nt", "!" },
86   { "aa", "&&" },
87   { "oo", "||" },
88   { "pp", "++" },
89   { "mm", "--" },
90   { "cm", "," },
91   { "pm", "->*" },
92   { "pt", "->" },
93   { "cl", "()" },
94   { "ix", "[]" },
95   { "qu", "?" },
96   { "st", "sizeof" },
97   { "sz", "sizeof" },
98   { NULL, NULL },
99 };
100 
101 // List of builtin types from Itanium C++ ABI.
102 static const AbbrevPair kBuiltinTypeList[] = {
103   { "v", "void" },
104   { "w", "wchar_t" },
105   { "b", "bool" },
106   { "c", "char" },
107   { "a", "signed char" },
108   { "h", "unsigned char" },
109   { "s", "short" },
110   { "t", "unsigned short" },
111   { "i", "int" },
112   { "j", "unsigned int" },
113   { "l", "long" },
114   { "m", "unsigned long" },
115   { "x", "long long" },
116   { "y", "unsigned long long" },
117   { "n", "__int128" },
118   { "o", "unsigned __int128" },
119   { "f", "float" },
120   { "d", "double" },
121   { "e", "long double" },
122   { "g", "__float128" },
123   { "z", "ellipsis" },
124   { NULL, NULL }
125 };
126 
127 // List of substitutions Itanium C++ ABI.
128 static const AbbrevPair kSubstitutionList[] = {
129   { "St", "" },
130   { "Sa", "allocator" },
131   { "Sb", "basic_string" },
132   // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
133   { "Ss", "string"},
134   // std::basic_istream<char, std::char_traits<char> >
135   { "Si", "istream" },
136   // std::basic_ostream<char, std::char_traits<char> >
137   { "So", "ostream" },
138   // std::basic_iostream<char, std::char_traits<char> >
139   { "Sd", "iostream" },
140   { NULL, NULL }
141 };
142 
143 // State needed for demangling.
144 typedef struct {
145   const char *mangled_cur;  // Cursor of mangled name.
146   char *out_cur;            // Cursor of output string.
147   const char *out_begin;    // Beginning of output string.
148   const char *out_end;      // End of output string.
149   const char *prev_name;    // For constructors/destructors.
150   int prev_name_length;     // For constructors/destructors.
151   short nest_level;         // For nested names.
152   bool append;              // Append flag.
153   bool overflowed;          // True if output gets overflowed.
154 } State;
155 
156 // We don't use strlen() in libc since it's not guaranteed to be async
157 // signal safe.
StrLen(const char * str)158 static size_t StrLen(const char *str) {
159   size_t len = 0;
160   while (*str != '\0') {
161     ++str;
162     ++len;
163   }
164   return len;
165 }
166 
167 // Returns true if "str" has at least "n" characters remaining.
AtLeastNumCharsRemaining(const char * str,int n)168 static bool AtLeastNumCharsRemaining(const char *str, int n) {
169   for (int i = 0; i < n; ++i) {
170     if (str[i] == '\0') {
171       return false;
172     }
173   }
174   return true;
175 }
176 
177 // Returns true if "str" has "prefix" as a prefix.
StrPrefix(const char * str,const char * prefix)178 static bool StrPrefix(const char *str, const char *prefix) {
179   size_t i = 0;
180   while (str[i] != '\0' && prefix[i] != '\0' &&
181          str[i] == prefix[i]) {
182     ++i;
183   }
184   return prefix[i] == '\0';  // Consumed everything in "prefix".
185 }
186 
InitState(State * state,const char * mangled,char * out,int out_size)187 static void InitState(State *state, const char *mangled,
188                       char *out, int out_size) {
189   state->mangled_cur = mangled;
190   state->out_cur = out;
191   state->out_begin = out;
192   state->out_end = out + out_size;
193   state->prev_name  = NULL;
194   state->prev_name_length = -1;
195   state->nest_level = -1;
196   state->append = true;
197   state->overflowed = false;
198 }
199 
200 // Returns true and advances "mangled_cur" if we find "one_char_token"
201 // at "mangled_cur" position.  It is assumed that "one_char_token" does
202 // not contain '\0'.
ParseOneCharToken(State * state,const char one_char_token)203 static bool ParseOneCharToken(State *state, const char one_char_token) {
204   if (state->mangled_cur[0] == one_char_token) {
205     ++state->mangled_cur;
206     return true;
207   }
208   return false;
209 }
210 
211 // Returns true and advances "mangled_cur" if we find "two_char_token"
212 // at "mangled_cur" position.  It is assumed that "two_char_token" does
213 // not contain '\0'.
ParseTwoCharToken(State * state,const char * two_char_token)214 static bool ParseTwoCharToken(State *state, const char *two_char_token) {
215   if (state->mangled_cur[0] == two_char_token[0] &&
216       state->mangled_cur[1] == two_char_token[1]) {
217     state->mangled_cur += 2;
218     return true;
219   }
220   return false;
221 }
222 
223 // Returns true and advances "mangled_cur" if we find any character in
224 // "char_class" at "mangled_cur" position.
ParseCharClass(State * state,const char * char_class)225 static bool ParseCharClass(State *state, const char *char_class) {
226   const char *p = char_class;
227   for (; *p != '\0'; ++p) {
228     if (state->mangled_cur[0] == *p) {
229       ++state->mangled_cur;
230       return true;
231     }
232   }
233   return false;
234 }
235 
236 // This function is used for handling an optional non-terminal.
Optional(bool)237 static bool Optional(bool) {
238   return true;
239 }
240 
241 // This function is used for handling <non-terminal>+ syntax.
242 typedef bool (*ParseFunc)(State *);
OneOrMore(ParseFunc parse_func,State * state)243 static bool OneOrMore(ParseFunc parse_func, State *state) {
244   if (parse_func(state)) {
245     while (parse_func(state)) {
246     }
247     return true;
248   }
249   return false;
250 }
251 
252 // This function is used for handling <non-terminal>* syntax. The function
253 // always returns true and must be followed by a termination token or a
254 // terminating sequence not handled by parse_func (e.g.
255 // ParseOneCharToken(state, 'E')).
ZeroOrMore(ParseFunc parse_func,State * state)256 static bool ZeroOrMore(ParseFunc parse_func, State *state) {
257   while (parse_func(state)) {
258   }
259   return true;
260 }
261 
262 // Append "str" at "out_cur".  If there is an overflow, "overflowed"
263 // is set to true for later use.  The output string is ensured to
264 // always terminate with '\0' as long as there is no overflow.
Append(State * state,const char * const str,const int length)265 static void Append(State *state, const char * const str, const int length) {
266   int i;
267   for (i = 0; i < length; ++i) {
268     if (state->out_cur + 1 < state->out_end) {  // +1 for '\0'
269       *state->out_cur = str[i];
270       ++state->out_cur;
271     } else {
272       state->overflowed = true;
273       break;
274     }
275   }
276   if (!state->overflowed) {
277     *state->out_cur = '\0';  // Terminate it with '\0'
278   }
279 }
280 
281 // We don't use equivalents in libc to avoid locale issues.
IsLower(char c)282 static bool IsLower(char c) {
283   return c >= 'a' && c <= 'z';
284 }
285 
IsAlpha(char c)286 static bool IsAlpha(char c) {
287   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
288 }
289 
IsDigit(char c)290 static bool IsDigit(char c) {
291   return c >= '0' && c <= '9';
292 }
293 
294 // Returns true if "str" is a function clone suffix.  These suffixes are used
295 // by GCC 4.5.x and later versions to indicate functions which have been
296 // cloned during optimization.  We treat any sequence (.<alpha>+.<digit>+)+ as
297 // a function clone suffix.
IsFunctionCloneSuffix(const char * str)298 static bool IsFunctionCloneSuffix(const char *str) {
299   size_t i = 0;
300   while (str[i] != '\0') {
301     // Consume a single .<alpha>+.<digit>+ sequence.
302     if (str[i] != '.' || !IsAlpha(str[i + 1])) {
303       return false;
304     }
305     i += 2;
306     while (IsAlpha(str[i])) {
307       ++i;
308     }
309     if (str[i] != '.' || !IsDigit(str[i + 1])) {
310       return false;
311     }
312     i += 2;
313     while (IsDigit(str[i])) {
314       ++i;
315     }
316   }
317   return true;  // Consumed everything in "str".
318 }
319 
320 // Append "str" with some tweaks, iff "append" state is true.
321 // Returns true so that it can be placed in "if" conditions.
MaybeAppendWithLength(State * state,const char * const str,const int length)322 static void MaybeAppendWithLength(State *state, const char * const str,
323                                   const int length) {
324   if (state->append && length > 0) {
325     // Append a space if the output buffer ends with '<' and "str"
326     // starts with '<' to avoid <<<.
327     if (str[0] == '<' && state->out_begin < state->out_cur  &&
328         state->out_cur[-1] == '<') {
329       Append(state, " ", 1);
330     }
331     // Remember the last identifier name for ctors/dtors.
332     if (IsAlpha(str[0]) || str[0] == '_') {
333       state->prev_name = state->out_cur;
334       state->prev_name_length = length;
335     }
336     Append(state, str, length);
337   }
338 }
339 
340 // A convenient wrapper arount MaybeAppendWithLength().
MaybeAppend(State * state,const char * const str)341 static bool MaybeAppend(State *state, const char * const str) {
342   if (state->append) {
343     int length = StrLen(str);
344     MaybeAppendWithLength(state, str, length);
345   }
346   return true;
347 }
348 
349 // This function is used for handling nested names.
EnterNestedName(State * state)350 static bool EnterNestedName(State *state) {
351   state->nest_level = 0;
352   return true;
353 }
354 
355 // This function is used for handling nested names.
LeaveNestedName(State * state,short prev_value)356 static bool LeaveNestedName(State *state, short prev_value) {
357   state->nest_level = prev_value;
358   return true;
359 }
360 
361 // Disable the append mode not to print function parameters, etc.
DisableAppend(State * state)362 static bool DisableAppend(State *state) {
363   state->append = false;
364   return true;
365 }
366 
367 // Restore the append mode to the previous state.
RestoreAppend(State * state,bool prev_value)368 static bool RestoreAppend(State *state, bool prev_value) {
369   state->append = prev_value;
370   return true;
371 }
372 
373 // Increase the nest level for nested names.
MaybeIncreaseNestLevel(State * state)374 static void MaybeIncreaseNestLevel(State *state) {
375   if (state->nest_level > -1) {
376     ++state->nest_level;
377   }
378 }
379 
380 // Appends :: for nested names if necessary.
MaybeAppendSeparator(State * state)381 static void MaybeAppendSeparator(State *state) {
382   if (state->nest_level >= 1) {
383     MaybeAppend(state, "::");
384   }
385 }
386 
387 // Cancel the last separator if necessary.
MaybeCancelLastSeparator(State * state)388 static void MaybeCancelLastSeparator(State *state) {
389   if (state->nest_level >= 1 && state->append &&
390       state->out_begin <= state->out_cur - 2) {
391     state->out_cur -= 2;
392     *state->out_cur = '\0';
393   }
394 }
395 
396 // Returns true if the identifier of the given length pointed to by
397 // "mangled_cur" is anonymous namespace.
IdentifierIsAnonymousNamespace(State * state,int length)398 static bool IdentifierIsAnonymousNamespace(State *state, int length) {
399   static const char anon_prefix[] = "_GLOBAL__N_";
400   return (length > (int)sizeof(anon_prefix) - 1 &&  // Should be longer.
401           StrPrefix(state->mangled_cur, anon_prefix));
402 }
403 
404 // Forward declarations of our parsing functions.
405 static bool ParseMangledName(State *state);
406 static bool ParseEncoding(State *state);
407 static bool ParseName(State *state);
408 static bool ParseUnscopedName(State *state);
409 static bool ParseUnscopedTemplateName(State *state);
410 static bool ParseNestedName(State *state);
411 static bool ParsePrefix(State *state);
412 static bool ParseUnqualifiedName(State *state);
413 static bool ParseSourceName(State *state);
414 static bool ParseLocalSourceName(State *state);
415 static bool ParseNumber(State *state, int *number_out);
416 static bool ParseFloatNumber(State *state);
417 static bool ParseSeqId(State *state);
418 static bool ParseIdentifier(State *state, int length);
419 static bool ParseOperatorName(State *state);
420 static bool ParseSpecialName(State *state);
421 static bool ParseCallOffset(State *state);
422 static bool ParseNVOffset(State *state);
423 static bool ParseVOffset(State *state);
424 static bool ParseCtorDtorName(State *state);
425 static bool ParseType(State *state);
426 static bool ParseCVQualifiers(State *state);
427 static bool ParseBuiltinType(State *state);
428 static bool ParseFunctionType(State *state);
429 static bool ParseBareFunctionType(State *state);
430 static bool ParseClassEnumType(State *state);
431 static bool ParseArrayType(State *state);
432 static bool ParsePointerToMemberType(State *state);
433 static bool ParseTemplateParam(State *state);
434 static bool ParseTemplateTemplateParam(State *state);
435 static bool ParseTemplateArgs(State *state);
436 static bool ParseTemplateArg(State *state);
437 static bool ParseExpression(State *state);
438 static bool ParseExprPrimary(State *state);
439 static bool ParseLocalName(State *state);
440 static bool ParseDiscriminator(State *state);
441 static bool ParseSubstitution(State *state);
442 
443 // Implementation note: the following code is a straightforward
444 // translation of the Itanium C++ ABI defined in BNF with a couple of
445 // exceptions.
446 //
447 // - Support GNU extensions not defined in the Itanium C++ ABI
448 // - <prefix> and <template-prefix> are combined to avoid infinite loop
449 // - Reorder patterns to shorten the code
450 // - Reorder patterns to give greedier functions precedence
451 //   We'll mark "Less greedy than" for these cases in the code
452 //
453 // Each parsing function changes the state and returns true on
454 // success.  Otherwise, don't change the state and returns false.  To
455 // ensure that the state isn't changed in the latter case, we save the
456 // original state before we call more than one parsing functions
457 // consecutively with &&, and restore the state if unsuccessful.  See
458 // ParseEncoding() as an example of this convention.  We follow the
459 // convention throughout the code.
460 //
461 // Originally we tried to do demangling without following the full ABI
462 // syntax but it turned out we needed to follow the full syntax to
463 // parse complicated cases like nested template arguments.  Note that
464 // implementing a full-fledged demangler isn't trivial (libiberty's
465 // cp-demangle.c has +4300 lines).
466 //
467 // Note that (foo) in <(foo) ...> is a modifier to be ignored.
468 //
469 // Reference:
470 // - Itanium C++ ABI
471 //   <http://www.codesourcery.com/cxx-abi/abi.html#mangling>
472 
473 // <mangled-name> ::= _Z <encoding>
ParseMangledName(State * state)474 static bool ParseMangledName(State *state) {
475   return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
476 }
477 
478 // <encoding> ::= <(function) name> <bare-function-type>
479 //            ::= <(data) name>
480 //            ::= <special-name>
ParseEncoding(State * state)481 static bool ParseEncoding(State *state) {
482   State copy = *state;
483   if (ParseName(state) && ParseBareFunctionType(state)) {
484     return true;
485   }
486   *state = copy;
487 
488   if (ParseName(state) || ParseSpecialName(state)) {
489     return true;
490   }
491   return false;
492 }
493 
494 // <name> ::= <nested-name>
495 //        ::= <unscoped-template-name> <template-args>
496 //        ::= <unscoped-name>
497 //        ::= <local-name>
ParseName(State * state)498 static bool ParseName(State *state) {
499   if (ParseNestedName(state) || ParseLocalName(state)) {
500     return true;
501   }
502 
503   State copy = *state;
504   if (ParseUnscopedTemplateName(state) &&
505       ParseTemplateArgs(state)) {
506     return true;
507   }
508   *state = copy;
509 
510   // Less greedy than <unscoped-template-name> <template-args>.
511   if (ParseUnscopedName(state)) {
512     return true;
513   }
514   return false;
515 }
516 
517 // <unscoped-name> ::= <unqualified-name>
518 //                 ::= St <unqualified-name>
ParseUnscopedName(State * state)519 static bool ParseUnscopedName(State *state) {
520   if (ParseUnqualifiedName(state)) {
521     return true;
522   }
523 
524   State copy = *state;
525   if (ParseTwoCharToken(state, "St") &&
526       MaybeAppend(state, "std::") &&
527       ParseUnqualifiedName(state)) {
528     return true;
529   }
530   *state = copy;
531   return false;
532 }
533 
534 // <unscoped-template-name> ::= <unscoped-name>
535 //                          ::= <substitution>
ParseUnscopedTemplateName(State * state)536 static bool ParseUnscopedTemplateName(State *state) {
537   return ParseUnscopedName(state) || ParseSubstitution(state);
538 }
539 
540 // <nested-name> ::= N [<CV-qualifiers>] <prefix> <unqualified-name> E
541 //               ::= N [<CV-qualifiers>] <template-prefix> <template-args> E
ParseNestedName(State * state)542 static bool ParseNestedName(State *state) {
543   State copy = *state;
544   if (ParseOneCharToken(state, 'N') &&
545       EnterNestedName(state) &&
546       Optional(ParseCVQualifiers(state)) &&
547       ParsePrefix(state) &&
548       LeaveNestedName(state, copy.nest_level) &&
549       ParseOneCharToken(state, 'E')) {
550     return true;
551   }
552   *state = copy;
553   return false;
554 }
555 
556 // This part is tricky.  If we literally translate them to code, we'll
557 // end up infinite loop.  Hence we merge them to avoid the case.
558 //
559 // <prefix> ::= <prefix> <unqualified-name>
560 //          ::= <template-prefix> <template-args>
561 //          ::= <template-param>
562 //          ::= <substitution>
563 //          ::= # empty
564 // <template-prefix> ::= <prefix> <(template) unqualified-name>
565 //                   ::= <template-param>
566 //                   ::= <substitution>
ParsePrefix(State * state)567 static bool ParsePrefix(State *state) {
568   bool has_something = false;
569   while (true) {
570     MaybeAppendSeparator(state);
571     if (ParseTemplateParam(state) ||
572         ParseSubstitution(state) ||
573         ParseUnscopedName(state)) {
574       has_something = true;
575       MaybeIncreaseNestLevel(state);
576       continue;
577     }
578     MaybeCancelLastSeparator(state);
579     if (has_something && ParseTemplateArgs(state)) {
580       return ParsePrefix(state);
581     } else {
582       break;
583     }
584   }
585   return true;
586 }
587 
588 // <unqualified-name> ::= <operator-name>
589 //                    ::= <ctor-dtor-name>
590 //                    ::= <source-name>
591 //                    ::= <local-source-name>
ParseUnqualifiedName(State * state)592 static bool ParseUnqualifiedName(State *state) {
593   return (ParseOperatorName(state) ||
594           ParseCtorDtorName(state) ||
595           ParseSourceName(state) ||
596           ParseLocalSourceName(state));
597 }
598 
599 // <source-name> ::= <positive length number> <identifier>
ParseSourceName(State * state)600 static bool ParseSourceName(State *state) {
601   State copy = *state;
602   int length = -1;
603   if (ParseNumber(state, &length) && ParseIdentifier(state, length)) {
604     return true;
605   }
606   *state = copy;
607   return false;
608 }
609 
610 // <local-source-name> ::= L <source-name> [<discriminator>]
611 //
612 // References:
613 //   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
614 //   http://gcc.gnu.org/viewcvs?view=rev&revision=124467
ParseLocalSourceName(State * state)615 static bool ParseLocalSourceName(State *state) {
616   State copy = *state;
617   if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
618       Optional(ParseDiscriminator(state))) {
619     return true;
620   }
621   *state = copy;
622   return false;
623 }
624 
625 // <number> ::= [n] <non-negative decimal integer>
626 // If "number_out" is non-null, then *number_out is set to the value of the
627 // parsed number on success.
ParseNumber(State * state,int * number_out)628 static bool ParseNumber(State *state, int *number_out) {
629   int sign = 1;
630   if (ParseOneCharToken(state, 'n')) {
631     sign = -1;
632   }
633   const char *p = state->mangled_cur;
634   int number = 0;
635   for (;*p != '\0'; ++p) {
636     if (IsDigit(*p)) {
637       number = number * 10 + (*p - '0');
638     } else {
639       break;
640     }
641   }
642   if (p != state->mangled_cur) {  // Conversion succeeded.
643     state->mangled_cur = p;
644     if (number_out != NULL) {
645       *number_out = number * sign;
646     }
647     return true;
648   }
649   return false;
650 }
651 
652 // Floating-point literals are encoded using a fixed-length lowercase
653 // hexadecimal string.
ParseFloatNumber(State * state)654 static bool ParseFloatNumber(State *state) {
655   const char *p = state->mangled_cur;
656   for (;*p != '\0'; ++p) {
657     if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
658       break;
659     }
660   }
661   if (p != state->mangled_cur) {  // Conversion succeeded.
662     state->mangled_cur = p;
663     return true;
664   }
665   return false;
666 }
667 
668 // The <seq-id> is a sequence number in base 36,
669 // using digits and upper case letters
ParseSeqId(State * state)670 static bool ParseSeqId(State *state) {
671   const char *p = state->mangled_cur;
672   for (;*p != '\0'; ++p) {
673     if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
674       break;
675     }
676   }
677   if (p != state->mangled_cur) {  // Conversion succeeded.
678     state->mangled_cur = p;
679     return true;
680   }
681   return false;
682 }
683 
684 // <identifier> ::= <unqualified source code identifier> (of given length)
ParseIdentifier(State * state,int length)685 static bool ParseIdentifier(State *state, int length) {
686   if (length == -1 ||
687       !AtLeastNumCharsRemaining(state->mangled_cur, length)) {
688     return false;
689   }
690   if (IdentifierIsAnonymousNamespace(state, length)) {
691     MaybeAppend(state, "(anonymous namespace)");
692   } else {
693     MaybeAppendWithLength(state, state->mangled_cur, length);
694   }
695   state->mangled_cur += length;
696   return true;
697 }
698 
699 // <operator-name> ::= nw, and other two letters cases
700 //                 ::= cv <type>  # (cast)
701 //                 ::= v  <digit> <source-name> # vendor extended operator
ParseOperatorName(State * state)702 static bool ParseOperatorName(State *state) {
703   if (!AtLeastNumCharsRemaining(state->mangled_cur, 2)) {
704     return false;
705   }
706   // First check with "cv" (cast) case.
707   State copy = *state;
708   if (ParseTwoCharToken(state, "cv") &&
709       MaybeAppend(state, "operator ") &&
710       EnterNestedName(state) &&
711       ParseType(state) &&
712       LeaveNestedName(state, copy.nest_level)) {
713     return true;
714   }
715   *state = copy;
716 
717   // Then vendor extended operators.
718   if (ParseOneCharToken(state, 'v') && ParseCharClass(state, "0123456789") &&
719       ParseSourceName(state)) {
720     return true;
721   }
722   *state = copy;
723 
724   // Other operator names should start with a lower alphabet followed
725   // by a lower/upper alphabet.
726   if (!(IsLower(state->mangled_cur[0]) &&
727         IsAlpha(state->mangled_cur[1]))) {
728     return false;
729   }
730   // We may want to perform a binary search if we really need speed.
731   const AbbrevPair *p;
732   for (p = kOperatorList; p->abbrev != NULL; ++p) {
733     if (state->mangled_cur[0] == p->abbrev[0] &&
734         state->mangled_cur[1] == p->abbrev[1]) {
735       MaybeAppend(state, "operator");
736       if (IsLower(*p->real_name)) {  // new, delete, etc.
737         MaybeAppend(state, " ");
738       }
739       MaybeAppend(state, p->real_name);
740       state->mangled_cur += 2;
741       return true;
742     }
743   }
744   return false;
745 }
746 
747 // <special-name> ::= TV <type>
748 //                ::= TT <type>
749 //                ::= TI <type>
750 //                ::= TS <type>
751 //                ::= Tc <call-offset> <call-offset> <(base) encoding>
752 //                ::= GV <(object) name>
753 //                ::= T <call-offset> <(base) encoding>
754 // G++ extensions:
755 //                ::= TC <type> <(offset) number> _ <(base) type>
756 //                ::= TF <type>
757 //                ::= TJ <type>
758 //                ::= GR <name>
759 //                ::= GA <encoding>
760 //                ::= Th <call-offset> <(base) encoding>
761 //                ::= Tv <call-offset> <(base) encoding>
762 //
763 // Note: we don't care much about them since they don't appear in
764 // stack traces.  The are special data.
ParseSpecialName(State * state)765 static bool ParseSpecialName(State *state) {
766   State copy = *state;
767   if (ParseOneCharToken(state, 'T') &&
768       ParseCharClass(state, "VTIS") &&
769       ParseType(state)) {
770     return true;
771   }
772   *state = copy;
773 
774   if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
775       ParseCallOffset(state) && ParseEncoding(state)) {
776     return true;
777   }
778   *state = copy;
779 
780   if (ParseTwoCharToken(state, "GV") &&
781       ParseName(state)) {
782     return true;
783   }
784   *state = copy;
785 
786   if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
787       ParseEncoding(state)) {
788     return true;
789   }
790   *state = copy;
791 
792   // G++ extensions
793   if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
794       ParseNumber(state, NULL) && ParseOneCharToken(state, '_') &&
795       DisableAppend(state) &&
796       ParseType(state)) {
797     RestoreAppend(state, copy.append);
798     return true;
799   }
800   *state = copy;
801 
802   if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
803       ParseType(state)) {
804     return true;
805   }
806   *state = copy;
807 
808   if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
809     return true;
810   }
811   *state = copy;
812 
813   if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
814     return true;
815   }
816   *state = copy;
817 
818   if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
819       ParseCallOffset(state) && ParseEncoding(state)) {
820     return true;
821   }
822   *state = copy;
823   return false;
824 }
825 
826 // <call-offset> ::= h <nv-offset> _
827 //               ::= v <v-offset> _
ParseCallOffset(State * state)828 static bool ParseCallOffset(State *state) {
829   State copy = *state;
830   if (ParseOneCharToken(state, 'h') &&
831       ParseNVOffset(state) && ParseOneCharToken(state, '_')) {
832     return true;
833   }
834   *state = copy;
835 
836   if (ParseOneCharToken(state, 'v') &&
837       ParseVOffset(state) && ParseOneCharToken(state, '_')) {
838     return true;
839   }
840   *state = copy;
841 
842   return false;
843 }
844 
845 // <nv-offset> ::= <(offset) number>
ParseNVOffset(State * state)846 static bool ParseNVOffset(State *state) {
847   return ParseNumber(state, NULL);
848 }
849 
850 // <v-offset>  ::= <(offset) number> _ <(virtual offset) number>
ParseVOffset(State * state)851 static bool ParseVOffset(State *state) {
852   State copy = *state;
853   if (ParseNumber(state, NULL) && ParseOneCharToken(state, '_') &&
854       ParseNumber(state, NULL)) {
855     return true;
856   }
857   *state = copy;
858   return false;
859 }
860 
861 // <ctor-dtor-name> ::= C1 | C2 | C3
862 //                  ::= D0 | D1 | D2
ParseCtorDtorName(State * state)863 static bool ParseCtorDtorName(State *state) {
864   State copy = *state;
865   if (ParseOneCharToken(state, 'C') &&
866       ParseCharClass(state, "123")) {
867     const char * const prev_name = state->prev_name;
868     const int prev_name_length = state->prev_name_length;
869     MaybeAppendWithLength(state, prev_name, prev_name_length);
870     return true;
871   }
872   *state = copy;
873 
874   if (ParseOneCharToken(state, 'D') &&
875       ParseCharClass(state, "012")) {
876     const char * const prev_name = state->prev_name;
877     const int prev_name_length = state->prev_name_length;
878     MaybeAppend(state, "~");
879     MaybeAppendWithLength(state, prev_name, prev_name_length);
880     return true;
881   }
882   *state = copy;
883   return false;
884 }
885 
886 // <type> ::= <CV-qualifiers> <type>
887 //        ::= P <type>   # pointer-to
888 //        ::= R <type>   # reference-to
889 //        ::= O <type>   # rvalue reference-to (C++0x)
890 //        ::= C <type>   # complex pair (C 2000)
891 //        ::= G <type>   # imaginary (C 2000)
892 //        ::= U <source-name> <type>  # vendor extended type qualifier
893 //        ::= <builtin-type>
894 //        ::= <function-type>
895 //        ::= <class-enum-type>
896 //        ::= <array-type>
897 //        ::= <pointer-to-member-type>
898 //        ::= <template-template-param> <template-args>
899 //        ::= <template-param>
900 //        ::= <substitution>
901 //        ::= Dp <type>          # pack expansion of (C++0x)
902 //        ::= Dt <expression> E  # decltype of an id-expression or class
903 //                               # member access (C++0x)
904 //        ::= DT <expression> E  # decltype of an expression (C++0x)
905 //
ParseType(State * state)906 static bool ParseType(State *state) {
907   // We should check CV-qualifers, and PRGC things first.
908   State copy = *state;
909   if (ParseCVQualifiers(state) && ParseType(state)) {
910     return true;
911   }
912   *state = copy;
913 
914   if (ParseCharClass(state, "OPRCG") && ParseType(state)) {
915     return true;
916   }
917   *state = copy;
918 
919   if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
920     return true;
921   }
922   *state = copy;
923 
924   if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
925       ParseExpression(state) && ParseOneCharToken(state, 'E')) {
926     return true;
927   }
928   *state = copy;
929 
930   if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
931       ParseType(state)) {
932     return true;
933   }
934   *state = copy;
935 
936   if (ParseBuiltinType(state) ||
937       ParseFunctionType(state) ||
938       ParseClassEnumType(state) ||
939       ParseArrayType(state) ||
940       ParsePointerToMemberType(state) ||
941       ParseSubstitution(state)) {
942     return true;
943   }
944 
945   if (ParseTemplateTemplateParam(state) &&
946       ParseTemplateArgs(state)) {
947     return true;
948   }
949   *state = copy;
950 
951   // Less greedy than <template-template-param> <template-args>.
952   if (ParseTemplateParam(state)) {
953     return true;
954   }
955 
956   return false;
957 }
958 
959 // <CV-qualifiers> ::= [r] [V] [K]
960 // We don't allow empty <CV-qualifiers> to avoid infinite loop in
961 // ParseType().
ParseCVQualifiers(State * state)962 static bool ParseCVQualifiers(State *state) {
963   int num_cv_qualifiers = 0;
964   num_cv_qualifiers += ParseOneCharToken(state, 'r');
965   num_cv_qualifiers += ParseOneCharToken(state, 'V');
966   num_cv_qualifiers += ParseOneCharToken(state, 'K');
967   return num_cv_qualifiers > 0;
968 }
969 
970 // <builtin-type> ::= v, etc.
971 //                ::= u <source-name>
ParseBuiltinType(State * state)972 static bool ParseBuiltinType(State *state) {
973   const AbbrevPair *p;
974   for (p = kBuiltinTypeList; p->abbrev != NULL; ++p) {
975     if (state->mangled_cur[0] == p->abbrev[0]) {
976       MaybeAppend(state, p->real_name);
977       ++state->mangled_cur;
978       return true;
979     }
980   }
981 
982   State copy = *state;
983   if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
984     return true;
985   }
986   *state = copy;
987   return false;
988 }
989 
990 // <function-type> ::= F [Y] <bare-function-type> E
ParseFunctionType(State * state)991 static bool ParseFunctionType(State *state) {
992   State copy = *state;
993   if (ParseOneCharToken(state, 'F') &&
994       Optional(ParseOneCharToken(state, 'Y')) &&
995       ParseBareFunctionType(state) && ParseOneCharToken(state, 'E')) {
996     return true;
997   }
998   *state = copy;
999   return false;
1000 }
1001 
1002 // <bare-function-type> ::= <(signature) type>+
ParseBareFunctionType(State * state)1003 static bool ParseBareFunctionType(State *state) {
1004   State copy = *state;
1005   DisableAppend(state);
1006   if (OneOrMore(ParseType, state)) {
1007     RestoreAppend(state, copy.append);
1008     MaybeAppend(state, "()");
1009     return true;
1010   }
1011   *state = copy;
1012   return false;
1013 }
1014 
1015 // <class-enum-type> ::= <name>
ParseClassEnumType(State * state)1016 static bool ParseClassEnumType(State *state) {
1017   return ParseName(state);
1018 }
1019 
1020 // <array-type> ::= A <(positive dimension) number> _ <(element) type>
1021 //              ::= A [<(dimension) expression>] _ <(element) type>
ParseArrayType(State * state)1022 static bool ParseArrayType(State *state) {
1023   State copy = *state;
1024   if (ParseOneCharToken(state, 'A') && ParseNumber(state, NULL) &&
1025       ParseOneCharToken(state, '_') && ParseType(state)) {
1026     return true;
1027   }
1028   *state = copy;
1029 
1030   if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
1031       ParseOneCharToken(state, '_') && ParseType(state)) {
1032     return true;
1033   }
1034   *state = copy;
1035   return false;
1036 }
1037 
1038 // <pointer-to-member-type> ::= M <(class) type> <(member) type>
ParsePointerToMemberType(State * state)1039 static bool ParsePointerToMemberType(State *state) {
1040   State copy = *state;
1041   if (ParseOneCharToken(state, 'M') && ParseType(state) &&
1042       ParseType(state)) {
1043     return true;
1044   }
1045   *state = copy;
1046   return false;
1047 }
1048 
1049 // <template-param> ::= T_
1050 //                  ::= T <parameter-2 non-negative number> _
ParseTemplateParam(State * state)1051 static bool ParseTemplateParam(State *state) {
1052   if (ParseTwoCharToken(state, "T_")) {
1053     MaybeAppend(state, "?");  // We don't support template substitutions.
1054     return true;
1055   }
1056 
1057   State copy = *state;
1058   if (ParseOneCharToken(state, 'T') && ParseNumber(state, NULL) &&
1059       ParseOneCharToken(state, '_')) {
1060     MaybeAppend(state, "?");  // We don't support template substitutions.
1061     return true;
1062   }
1063   *state = copy;
1064   return false;
1065 }
1066 
1067 
1068 // <template-template-param> ::= <template-param>
1069 //                           ::= <substitution>
ParseTemplateTemplateParam(State * state)1070 static bool ParseTemplateTemplateParam(State *state) {
1071   return (ParseTemplateParam(state) ||
1072           ParseSubstitution(state));
1073 }
1074 
1075 // <template-args> ::= I <template-arg>+ E
ParseTemplateArgs(State * state)1076 static bool ParseTemplateArgs(State *state) {
1077   State copy = *state;
1078   DisableAppend(state);
1079   if (ParseOneCharToken(state, 'I') &&
1080       OneOrMore(ParseTemplateArg, state) &&
1081       ParseOneCharToken(state, 'E')) {
1082     RestoreAppend(state, copy.append);
1083     MaybeAppend(state, "<>");
1084     return true;
1085   }
1086   *state = copy;
1087   return false;
1088 }
1089 
1090 // <template-arg>  ::= <type>
1091 //                 ::= <expr-primary>
1092 //                 ::= I <template-arg>* E        # argument pack
1093 //                 ::= X <expression> E
ParseTemplateArg(State * state)1094 static bool ParseTemplateArg(State *state) {
1095   State copy = *state;
1096   if (ParseOneCharToken(state, 'I') &&
1097       ZeroOrMore(ParseTemplateArg, state) &&
1098       ParseOneCharToken(state, 'E')) {
1099     return true;
1100   }
1101   *state = copy;
1102 
1103   if (ParseType(state) ||
1104       ParseExprPrimary(state)) {
1105     return true;
1106   }
1107   *state = copy;
1108 
1109   if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
1110       ParseOneCharToken(state, 'E')) {
1111     return true;
1112   }
1113   *state = copy;
1114   return false;
1115 }
1116 
1117 // <expression> ::= <template-param>
1118 //              ::= <expr-primary>
1119 //              ::= <unary operator-name> <expression>
1120 //              ::= <binary operator-name> <expression> <expression>
1121 //              ::= <trinary operator-name> <expression> <expression>
1122 //                  <expression>
1123 //              ::= st <type>
1124 //              ::= sr <type> <unqualified-name> <template-args>
1125 //              ::= sr <type> <unqualified-name>
ParseExpression(State * state)1126 static bool ParseExpression(State *state) {
1127   if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
1128     return true;
1129   }
1130 
1131   State copy = *state;
1132   if (ParseOperatorName(state) &&
1133       ParseExpression(state) &&
1134       ParseExpression(state) &&
1135       ParseExpression(state)) {
1136     return true;
1137   }
1138   *state = copy;
1139 
1140   if (ParseOperatorName(state) &&
1141       ParseExpression(state) &&
1142       ParseExpression(state)) {
1143     return true;
1144   }
1145   *state = copy;
1146 
1147   if (ParseOperatorName(state) &&
1148       ParseExpression(state)) {
1149     return true;
1150   }
1151   *state = copy;
1152 
1153   if (ParseTwoCharToken(state, "st") && ParseType(state)) {
1154     return true;
1155   }
1156   *state = copy;
1157 
1158   if (ParseTwoCharToken(state, "sr") && ParseType(state) &&
1159       ParseUnqualifiedName(state) &&
1160       ParseTemplateArgs(state)) {
1161     return true;
1162   }
1163   *state = copy;
1164 
1165   if (ParseTwoCharToken(state, "sr") && ParseType(state) &&
1166       ParseUnqualifiedName(state)) {
1167     return true;
1168   }
1169   *state = copy;
1170   return false;
1171 }
1172 
1173 // <expr-primary> ::= L <type> <(value) number> E
1174 //                ::= L <type> <(value) float> E
1175 //                ::= L <mangled-name> E
1176 //                // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
1177 //                ::= LZ <encoding> E
ParseExprPrimary(State * state)1178 static bool ParseExprPrimary(State *state) {
1179   State copy = *state;
1180   if (ParseOneCharToken(state, 'L') && ParseType(state) &&
1181       ParseNumber(state, NULL) &&
1182       ParseOneCharToken(state, 'E')) {
1183     return true;
1184   }
1185   *state = copy;
1186 
1187   if (ParseOneCharToken(state, 'L') && ParseType(state) &&
1188       ParseFloatNumber(state) &&
1189       ParseOneCharToken(state, 'E')) {
1190     return true;
1191   }
1192   *state = copy;
1193 
1194   if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
1195       ParseOneCharToken(state, 'E')) {
1196     return true;
1197   }
1198   *state = copy;
1199 
1200   if (ParseTwoCharToken(state, "LZ") && ParseEncoding(state) &&
1201       ParseOneCharToken(state, 'E')) {
1202     return true;
1203   }
1204   *state = copy;
1205 
1206   return false;
1207 }
1208 
1209 // <local-name> := Z <(function) encoding> E <(entity) name>
1210 //                 [<discriminator>]
1211 //              := Z <(function) encoding> E s [<discriminator>]
ParseLocalName(State * state)1212 static bool ParseLocalName(State *state) {
1213   State copy = *state;
1214   if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
1215       ParseOneCharToken(state, 'E') && MaybeAppend(state, "::") &&
1216       ParseName(state) && Optional(ParseDiscriminator(state))) {
1217     return true;
1218   }
1219   *state = copy;
1220 
1221   if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
1222       ParseTwoCharToken(state, "Es") && Optional(ParseDiscriminator(state))) {
1223     return true;
1224   }
1225   *state = copy;
1226   return false;
1227 }
1228 
1229 // <discriminator> := _ <(non-negative) number>
ParseDiscriminator(State * state)1230 static bool ParseDiscriminator(State *state) {
1231   State copy = *state;
1232   if (ParseOneCharToken(state, '_') && ParseNumber(state, NULL)) {
1233     return true;
1234   }
1235   *state = copy;
1236   return false;
1237 }
1238 
1239 // <substitution> ::= S_
1240 //                ::= S <seq-id> _
1241 //                ::= St, etc.
ParseSubstitution(State * state)1242 static bool ParseSubstitution(State *state) {
1243   if (ParseTwoCharToken(state, "S_")) {
1244     MaybeAppend(state, "?");  // We don't support substitutions.
1245     return true;
1246   }
1247 
1248   State copy = *state;
1249   if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
1250       ParseOneCharToken(state, '_')) {
1251     MaybeAppend(state, "?");  // We don't support substitutions.
1252     return true;
1253   }
1254   *state = copy;
1255 
1256   // Expand abbreviations like "St" => "std".
1257   if (ParseOneCharToken(state, 'S')) {
1258     const AbbrevPair *p;
1259     for (p = kSubstitutionList; p->abbrev != NULL; ++p) {
1260       if (state->mangled_cur[0] == p->abbrev[1]) {
1261         MaybeAppend(state, "std");
1262         if (p->real_name[0] != '\0') {
1263           MaybeAppend(state, "::");
1264           MaybeAppend(state, p->real_name);
1265         }
1266         ++state->mangled_cur;
1267         return true;
1268       }
1269     }
1270   }
1271   *state = copy;
1272   return false;
1273 }
1274 
1275 // Parse <mangled-name>, optionally followed by either a function-clone suffix
1276 // or version suffix.  Returns true only if all of "mangled_cur" was consumed.
ParseTopLevelMangledName(State * state)1277 static bool ParseTopLevelMangledName(State *state) {
1278   if (ParseMangledName(state)) {
1279     if (state->mangled_cur[0] != '\0') {
1280       // Drop trailing function clone suffix, if any.
1281       if (IsFunctionCloneSuffix(state->mangled_cur)) {
1282         return true;
1283       }
1284       // Append trailing version suffix if any.
1285       // ex. _Z3foo@@GLIBCXX_3.4
1286       if (state->mangled_cur[0] == '@') {
1287         MaybeAppend(state, state->mangled_cur);
1288         return true;
1289       }
1290       return false;  // Unconsumed suffix.
1291     }
1292     return true;
1293   }
1294   return false;
1295 }
1296 
1297 // The demangler entry point.
Demangle(const char * mangled,char * out,int out_size)1298 bool Demangle(const char *mangled, char *out, int out_size) {
1299   State state;
1300   InitState(&state, mangled, out, out_size);
1301   return ParseTopLevelMangledName(&state) && !state.overflowed;
1302 }
1303 
1304 _END_GOOGLE_NAMESPACE_
1305