1 /* Shared functions related to mangling names for the GNU compiler
2    for the Java(TM) language.
3    Copyright (C) 2001-2013 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.
20 
21 Java and all Java-based marks are trademarks or registered trademarks
22 of Sun Microsystems, Inc. in the United States and other countries.
23 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
24 
25 /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
26 
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "jcf.h"
31 #include "tree.h"
32 #include "java-tree.h"
33 #include "obstack.h"
34 #include "diagnostic-core.h"
35 
36 static void append_unicode_mangled_name (const char *, int);
37 #ifndef HAVE_AS_UTF8
38 static int  unicode_mangling_length (const char *, int);
39 #endif
40 
41 extern struct obstack *mangle_obstack;
42 
43 static int
utf8_cmp(const unsigned char * str,int length,const char * name)44 utf8_cmp (const unsigned char *str, int length, const char *name)
45 {
46   const unsigned char *limit = str + length;
47   int i;
48 
49   for (i = 0; name[i]; ++i)
50     {
51       int ch = UTF8_GET (str, limit);
52       if (ch != name[i])
53 	return ch - name[i];
54     }
55 
56   return str == limit ? 0 : 1;
57 }
58 
59 /* A sorted list of all C++ keywords.  If you change this, be sure
60    also to change the list in
61    libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java.  */
62 static const char *const cxx_keywords[] =
63 {
64   "_Complex",
65   "__alignof",
66   "__alignof__",
67   "__asm",
68   "__asm__",
69   "__attribute",
70   "__attribute__",
71   "__builtin_va_arg",
72   "__complex",
73   "__complex__",
74   "__const",
75   "__const__",
76   "__extension__",
77   "__imag",
78   "__imag__",
79   "__inline",
80   "__inline__",
81   "__label__",
82   "__null",
83   "__real",
84   "__real__",
85   "__restrict",
86   "__restrict__",
87   "__signed",
88   "__signed__",
89   "__typeof",
90   "__typeof__",
91   "__volatile",
92   "__volatile__",
93   "and",
94   "and_eq",
95   "asm",
96   "auto",
97   "bitand",
98   "bitor",
99   "bool",
100   "break",
101   "case",
102   "catch",
103   "char",
104   "class",
105   "compl",
106   "const",
107   "const_cast",
108   "continue",
109   "default",
110   "delete",
111   "do",
112   "double",
113   "dynamic_cast",
114   "else",
115   "enum",
116   "explicit",
117   "export",
118   "extern",
119   "false",
120   "float",
121   "for",
122   "friend",
123   "goto",
124   "if",
125   "inline",
126   "int",
127   "long",
128   "mutable",
129   "namespace",
130   "new",
131   "not",
132   "not_eq",
133   "operator",
134   "or",
135   "or_eq",
136   "private",
137   "protected",
138   "public",
139   "register",
140   "reinterpret_cast",
141   "return",
142   "short",
143   "signed",
144   "sizeof",
145   "static",
146   "static_cast",
147   "struct",
148   "switch",
149   "template",
150   "this",
151   "throw",
152   "true",
153   "try",
154   "typedef",
155   "typeid",
156   "typename",
157   "typeof",
158   "union",
159   "unsigned",
160   "using",
161   "virtual",
162   "void",
163   "volatile",
164   "wchar_t",
165   "while",
166   "xor",
167   "xor_eq"
168 };
169 
170 /* Return true if NAME is a C++ keyword.  */
171 int
cxx_keyword_p(const char * name,int length)172 cxx_keyword_p (const char *name, int length)
173 {
174   int last = ARRAY_SIZE (cxx_keywords);
175   int first = 0;
176   int mid = (last + first) / 2;
177   int old = -1;
178 
179   for (mid = (last + first) / 2;
180        mid != old;
181        old = mid, mid = (last + first) / 2)
182     {
183       int kwl = strlen (cxx_keywords[mid]);
184       int min_length = kwl > length ? length : kwl;
185       int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
186 
187       if (r == 0)
188 	{
189 	  int i;
190 	  /* We've found a match if all the remaining characters are `$'.  */
191 	  for (i = min_length; i < length && name[i] == '$'; ++i)
192 	    ;
193 	  if (i == length)
194 	    return 1;
195 	  r = 1;
196 	}
197 
198       if (r < 0)
199 	last = mid;
200       else
201 	first = mid;
202     }
203   return 0;
204 }
205 
206 /* If NAME happens to be a C++ keyword, add `$'.  */
207 #define MANGLE_CXX_KEYWORDS(NAME, LEN)			\
208 do							\
209   {							\
210     if (cxx_keyword_p ((NAME), (LEN)))			\
211       {							\
212 	char *tmp_buf = (char *)alloca ((LEN)+1);	\
213 	memcpy (tmp_buf, (NAME), (LEN));		\
214 	tmp_buf[LEN]= '$';				\
215 	(NAME) = tmp_buf;				\
216 	(LEN)++;					\
217       }							\
218   }							\
219 while (0)
220 
221 
222 /* If the assembler doesn't support UTF8 in symbol names, some
223    characters might need to be escaped.  */
224 
225 #ifndef HAVE_AS_UTF8
226 
227 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
228    appropriately mangled (with Unicode escapes if needed) to
229    MANGLE_OBSTACK.  Note that `java', `lang' and `Object' are used so
230    frequently that they could be cached.  */
231 
232 void
append_gpp_mangled_name(const char * name,int len)233 append_gpp_mangled_name (const char *name, int len)
234 {
235   int encoded_len, needs_escapes;
236   char buf[6];
237 
238   MANGLE_CXX_KEYWORDS (name, len);
239 
240   encoded_len = unicode_mangling_length (name, len);
241   needs_escapes = encoded_len > 0;
242 
243   sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
244   obstack_grow (mangle_obstack, buf, strlen (buf));
245 
246   if (needs_escapes)
247     append_unicode_mangled_name (name, len);
248   else
249     obstack_grow (mangle_obstack, name, len);
250 }
251 
252 /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
253    appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
254    Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
255    which case `__U' will be mangled `__U_'.  */
256 
257 static void
append_unicode_mangled_name(const char * name,int len)258 append_unicode_mangled_name (const char *name, int len)
259 {
260   const unsigned char *ptr;
261   const unsigned char *limit = (const unsigned char *)name + len;
262   int uuU = 0;
263   for (ptr = (const unsigned char *) name;  ptr < limit;  )
264     {
265       int ch = UTF8_GET(ptr, limit);
266 
267       if ((ISALNUM (ch) && ch != 'U') || ch == '$')
268         {
269 	  obstack_1grow (mangle_obstack, ch);
270           uuU = 0;
271         }
272       /* Everything else needs encoding */
273       else
274 	{
275 	  char buf [9];
276 	  if (ch == '_' || ch == 'U')
277 	    {
278 	      /* Prepare to recognize __U */
279 	      if (ch == '_' && (uuU < 3))
280 		{
281 		  uuU++;
282 		  obstack_1grow (mangle_obstack, ch);
283 		}
284 	      /* We recognize __U that we wish to encode
285                  __U_. Finish the encoding. */
286 	      else if (ch == 'U' && (uuU == 2))
287 		{
288 		  uuU = 0;
289 		  obstack_grow (mangle_obstack, "U_", 2);
290 		}
291 	      /* Otherwise, just reset uuU and emit the character we
292                  have. */
293 	      else
294 		{
295 		  uuU = 0;
296 		  obstack_1grow (mangle_obstack, ch);
297 		}
298 	      continue;
299 	    }
300 	  sprintf (buf, "__U%x_", ch);
301 	  obstack_grow (mangle_obstack, buf, strlen (buf));
302 	  uuU = 0;
303 	}
304     }
305 }
306 
307 /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
308    length of the string as mangled (a la g++) including Unicode
309    escapes.  If no escapes are needed, return 0.  */
310 
311 static int
unicode_mangling_length(const char * name,int len)312 unicode_mangling_length (const char *name, int len)
313 {
314   const unsigned char *ptr;
315   const unsigned char *limit = (const unsigned char *)name + len;
316   int need_escapes = 0;		/* Whether we need an escape or not */
317   int num_chars = 0;		/* Number of characters in the mangled name */
318   int uuU = 0;			/* Help us to find __U. 0: '_', 1: '__' */
319   for (ptr = (const unsigned char *) name;  ptr < limit;  )
320     {
321       int ch = UTF8_GET(ptr, limit);
322 
323       if (ch < 0)
324 	error ("internal error - invalid Utf8 name");
325       if ((ISALNUM (ch) && ch != 'U') || ch == '$')
326 	{
327 	  num_chars++;
328 	  uuU = 0;
329 	}
330       /* Everything else needs encoding */
331       else
332 	{
333 	  int encoding_length = 2;
334 
335 	  if (ch == '_' || ch == 'U')
336 	    {
337 	      /* It's always at least one character. */
338 	      num_chars++;
339 
340 	      /* Prepare to recognize __U */
341 	      if (ch == '_' && (uuU < 3))
342 		uuU++;
343 
344 	      /* We recognize __U that we wish to encode __U_, we
345 	         count one more character. */
346 	      else if (ch == 'U' && (uuU == 2))
347 		{
348 		  num_chars++;
349 		  need_escapes = 1;
350 		  uuU = 0;
351 		}
352 	      /* Otherwise, just reset uuU */
353 	      else
354 		uuU = 0;
355 
356 	      continue;
357 	    }
358 
359 	  if (ch > 0xff)
360 	    encoding_length++;
361 	  if (ch > 0xfff)
362 	    encoding_length++;
363 
364 	  num_chars += (4 + encoding_length);
365 	  need_escapes = 1;
366 	  uuU = 0;
367 	}
368     }
369   if (need_escapes)
370     return num_chars;
371   else
372     return 0;
373 }
374 
375 #else
376 
377 /* The assembler supports UTF8, we don't use escapes. Mangling is
378    simply <N>NAME. <N> is the number of UTF8 encoded characters that
379    are found in NAME. Note that `java', `lang' and `Object' are used
380    so frequently that they could be cached.  */
381 
382 void
append_gpp_mangled_name(const char * name,int len)383 append_gpp_mangled_name (const char *name, int len)
384 {
385   const unsigned char *ptr;
386   const unsigned char *limit;
387   int encoded_len;
388   char buf [6];
389 
390   MANGLE_CXX_KEYWORDS (name, len);
391 
392   limit = (const unsigned char *)name + len;
393 
394   /* Compute the length of the string we wish to mangle. */
395   for (encoded_len =  0, ptr = (const unsigned char *) name;
396        ptr < limit; encoded_len++)
397     {
398       int ch = UTF8_GET(ptr, limit);
399 
400       if (ch < 0)
401 	error ("internal error - invalid Utf8 name");
402     }
403 
404   sprintf (buf, "%d", encoded_len);
405   obstack_grow (mangle_obstack, buf, strlen (buf));
406   obstack_grow (mangle_obstack, name, len);
407 }
408 
409 #endif /* HAVE_AS_UTF8 */
410