1 /*
2  * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package com.sun.tools.javac.util;
27 
28 /** Utility class for static conversion methods between numbers
29  *  and strings in various formats.
30  *
31  *  <p>Note regarding UTF-8.
32  *  The JVMS defines its own version of the UTF-8 format so that it
33  *  contains no zero bytes (modified UTF-8). This is not actually the same
34  *  as Charset.forName("UTF-8").
35  *
36  *  <p>
37  *  See also:
38  *  <ul>
39  *  <li><a href="http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.4.7">
40  *    JVMS 4.4.7 </a></li>
41  *  <li><a href="http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8">
42       java.io.DataInput: Modified UTF-8 </a></li>
43     <li><a href="https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8">
44       Modified UTF-8 (wikipedia) </a></li>
45  *  </ul>
46  *
47  *  The methods here support modified UTF-8.
48  *
49  *  <p><b>This is NOT part of any supported API.
50  *  If you write code that depends on this, you do so at your own risk.
51  *  This code and its internal interfaces are subject to change or
52  *  deletion without notice.</b>
53  */
54 public class Convert {
55 
56     /** Convert string to integer.
57      */
string2int(String s, int radix)58     public static int string2int(String s, int radix)
59         throws NumberFormatException {
60         if (radix == 10) {
61             return Integer.parseInt(s, radix);
62         } else {
63             char[] cs = s.toCharArray();
64             int limit = Integer.MAX_VALUE / (radix/2);
65             int n = 0;
66             for (char c : cs) {
67                 int d = Character.digit(c, radix);
68                 if (n < 0 ||
69                     n > limit ||
70                     n * radix > Integer.MAX_VALUE - d)
71                     throw new NumberFormatException();
72                 n = n * radix + d;
73             }
74             return n;
75         }
76     }
77 
78     /** Convert string to long integer.
79      */
string2long(String s, int radix)80     public static long string2long(String s, int radix)
81         throws NumberFormatException {
82         if (radix == 10) {
83             return Long.parseLong(s, radix);
84         } else {
85             char[] cs = s.toCharArray();
86             long limit = Long.MAX_VALUE / (radix/2);
87             long n = 0;
88             for (char c : cs) {
89                 int d = Character.digit(c, radix);
90                 if (n < 0 ||
91                     n > limit ||
92                     n * radix > Long.MAX_VALUE - d)
93                     throw new NumberFormatException();
94                 n = n * radix + d;
95             }
96             return n;
97         }
98     }
99 
100 /* Conversion routines between names, strings, and byte arrays in Utf8 format
101  */
102 
103     /** Convert `len' bytes from utf8 to characters.
104      *  Parameters are as in System.arraycopy
105      *  Return first index in `dst' past the last copied char.
106      *  @param src        The array holding the bytes to convert.
107      *  @param sindex     The start index from which bytes are converted.
108      *  @param dst        The array holding the converted characters..
109      *  @param dindex     The start index from which converted characters
110      *                    are written.
111      *  @param len        The maximum number of bytes to convert.
112      */
utf2chars(byte[] src, int sindex, char[] dst, int dindex, int len)113     public static int utf2chars(byte[] src, int sindex,
114                                 char[] dst, int dindex,
115                                 int len) {
116         int i = sindex;
117         int j = dindex;
118         int limit = sindex + len;
119         while (i < limit) {
120             int b = src[i++] & 0xFF;
121             if (b >= 0xE0) {
122                 b = (b & 0x0F) << 12;
123                 b = b | (src[i++] & 0x3F) << 6;
124                 b = b | (src[i++] & 0x3F);
125             } else if (b >= 0xC0) {
126                 b = (b & 0x1F) << 6;
127                 b = b | (src[i++] & 0x3F);
128             }
129             dst[j++] = (char)b;
130         }
131         return j;
132     }
133 
134     /** Return bytes in Utf8 representation as an array of characters.
135      *  @param src        The array holding the bytes.
136      *  @param sindex     The start index from which bytes are converted.
137      *  @param len        The maximum number of bytes to convert.
138      */
utf2chars(byte[] src, int sindex, int len)139     public static char[] utf2chars(byte[] src, int sindex, int len) {
140         char[] dst = new char[len];
141         int len1 = utf2chars(src, sindex, dst, 0, len);
142         char[] result = new char[len1];
143         System.arraycopy(dst, 0, result, 0, len1);
144         return result;
145     }
146 
147     /** Return all bytes of a given array in Utf8 representation
148      *  as an array of characters.
149      *  @param src        The array holding the bytes.
150      */
utf2chars(byte[] src)151     public static char[] utf2chars(byte[] src) {
152         return utf2chars(src, 0, src.length);
153     }
154 
155     /** Return bytes in Utf8 representation as a string.
156      *  @param src        The array holding the bytes.
157      *  @param sindex     The start index from which bytes are converted.
158      *  @param len        The maximum number of bytes to convert.
159      */
utf2string(byte[] src, int sindex, int len)160     public static String utf2string(byte[] src, int sindex, int len) {
161         char dst[] = new char[len];
162         int len1 = utf2chars(src, sindex, dst, 0, len);
163         return new String(dst, 0, len1);
164     }
165 
166     /** Return all bytes of a given array in Utf8 representation
167      *  as a string.
168      *  @param src        The array holding the bytes.
169      */
utf2string(byte[] src)170     public static String utf2string(byte[] src) {
171         return utf2string(src, 0, src.length);
172     }
173 
174     /** Copy characters in source array to bytes in target array,
175      *  converting them to Utf8 representation.
176      *  The target array must be large enough to hold the result.
177      *  returns first index in `dst' past the last copied byte.
178      *  @param src        The array holding the characters to convert.
179      *  @param sindex     The start index from which characters are converted.
180      *  @param dst        The array holding the converted characters..
181      *  @param dindex     The start index from which converted bytes
182      *                    are written.
183      *  @param len        The maximum number of characters to convert.
184      */
chars2utf(char[] src, int sindex, byte[] dst, int dindex, int len)185     public static int chars2utf(char[] src, int sindex,
186                                 byte[] dst, int dindex,
187                                 int len) {
188         int j = dindex;
189         int limit = sindex + len;
190         for (int i = sindex; i < limit; i++) {
191             char ch = src[i];
192             if (1 <= ch && ch <= 0x7F) {
193                 dst[j++] = (byte)ch;
194             } else if (ch <= 0x7FF) {
195                 dst[j++] = (byte)(0xC0 | (ch >> 6));
196                 dst[j++] = (byte)(0x80 | (ch & 0x3F));
197             } else {
198                 dst[j++] = (byte)(0xE0 | (ch >> 12));
199                 dst[j++] = (byte)(0x80 | ((ch >> 6) & 0x3F));
200                 dst[j++] = (byte)(0x80 | (ch & 0x3F));
201             }
202         }
203         return j;
204     }
205 
206     /** Return characters as an array of bytes in Utf8 representation.
207      *  @param src        The array holding the characters.
208      *  @param sindex     The start index from which characters are converted.
209      *  @param len        The maximum number of characters to convert.
210      */
chars2utf(char[] src, int sindex, int len)211     public static byte[] chars2utf(char[] src, int sindex, int len) {
212         byte[] dst = new byte[len * 3];
213         int len1 = chars2utf(src, sindex, dst, 0, len);
214         byte[] result = new byte[len1];
215         System.arraycopy(dst, 0, result, 0, len1);
216         return result;
217     }
218 
219     /** Return all characters in given array as an array of bytes
220      *  in Utf8 representation.
221      *  @param src        The array holding the characters.
222      */
chars2utf(char[] src)223     public static byte[] chars2utf(char[] src) {
224         return chars2utf(src, 0, src.length);
225     }
226 
227     /** Return string as an array of bytes in in Utf8 representation.
228      */
string2utf(String s)229     public static byte[] string2utf(String s) {
230         return chars2utf(s.toCharArray());
231     }
232 
233     /**
234      * Escapes each character in a string that has an escape sequence or
235      * is non-printable ASCII.  Leaves non-ASCII characters alone.
236      */
quote(String s)237     public static String quote(String s) {
238         StringBuilder buf = new StringBuilder();
239         for (int i = 0; i < s.length(); i++) {
240             buf.append(quote(s.charAt(i)));
241         }
242         return buf.toString();
243     }
244 
245     /**
246      * Escapes a character if it has an escape sequence or is
247      * non-printable ASCII.  Leaves non-ASCII characters alone.
248      */
quote(char ch)249     public static String quote(char ch) {
250         switch (ch) {
251         case '\b':  return "\\b";
252         case '\f':  return "\\f";
253         case '\n':  return "\\n";
254         case '\r':  return "\\r";
255         case '\t':  return "\\t";
256         case '\'':  return "\\'";
257         case '\"':  return "\\\"";
258         case '\\':  return "\\\\";
259         default:
260             return (isPrintableAscii(ch))
261                 ? String.valueOf(ch)
262                 : String.format("\\u%04x", (int) ch);
263         }
264     }
265 
266     /**
267      * Is a character printable ASCII?
268      */
isPrintableAscii(char ch)269     private static boolean isPrintableAscii(char ch) {
270         return ch >= ' ' && ch <= '~';
271     }
272 
273     /** Escape all unicode characters in string.
274      */
escapeUnicode(String s)275     public static String escapeUnicode(String s) {
276         int len = s.length();
277         int i = 0;
278         while (i < len) {
279             char ch = s.charAt(i);
280             if (ch > 255) {
281                 StringBuilder buf = new StringBuilder();
282                 buf.append(s.substring(0, i));
283                 while (i < len) {
284                     ch = s.charAt(i);
285                     if (ch > 255) {
286                         buf.append("\\u");
287                         buf.append(Character.forDigit((ch >> 12) % 16, 16));
288                         buf.append(Character.forDigit((ch >>  8) % 16, 16));
289                         buf.append(Character.forDigit((ch >>  4) % 16, 16));
290                         buf.append(Character.forDigit((ch      ) % 16, 16));
291                     } else {
292                         buf.append(ch);
293                     }
294                     i++;
295                 }
296                 s = buf.toString();
297             } else {
298                 i++;
299             }
300         }
301         return s;
302     }
303 
304 /* Conversion routines for qualified name splitting
305  */
306     /** Return the last part of a qualified name.
307      *  @param name the qualified name
308      *  @return the last part of the qualified name
309      */
shortName(Name name)310     public static Name shortName(Name name) {
311         int start = name.lastIndexOf((byte)'.') + 1;
312         int end = name.getByteLength();
313         if (start == 0 && end == name.length()) {
314             return name;
315         }
316         return name.subName(
317             name.lastIndexOf((byte)'.') + 1, name.getByteLength());
318     }
319 
320     /** Return the last part of a qualified name from its string representation
321      *  @param name the string representation of the qualified name
322      *  @return the last part of the qualified name
323      */
shortName(String name)324     public static String shortName(String name) {
325         return name.substring(name.lastIndexOf('.') + 1);
326     }
327 
328     /** Return the package name of a class name, excluding the trailing '.',
329      *  "" if not existent.
330      */
packagePart(Name classname)331     public static Name packagePart(Name classname) {
332         return classname.subName(0, classname.lastIndexOf((byte)'.'));
333     }
334 
packagePart(String classname)335     public static String packagePart(String classname) {
336         int lastDot = classname.lastIndexOf('.');
337         return (lastDot < 0 ? "" : classname.substring(0, lastDot));
338     }
339 
enclosingCandidates(Name name)340     public static List<Name> enclosingCandidates(Name name) {
341         List<Name> names = List.nil();
342         int index;
343         while ((index = name.lastIndexOf((byte)'$')) > 0) {
344             name = name.subName(0, index);
345             names = names.prepend(name);
346         }
347         return names;
348     }
349 
classCandidates(Name name)350     public static List<Name> classCandidates(Name name) {
351         List<Name> names = List.nil();
352         String nameStr = name.toString();
353         int index = -1;
354         while ((index = nameStr.indexOf('.', index + 1)) > 0) {
355             String pack = nameStr.substring(0, index + 1);
356             String clz = nameStr.substring(index + 1).replace('.', '$');
357             names = names.prepend(name.table.names.fromString(pack + clz));
358         }
359         return names.reverse();
360     }
361 }
362