1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Copyright 1999-2005 The Apache Software Foundation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xml.internal.utils;
22 
23 import java.util.Arrays;
24 
25 
26 /**
27  * THIS IS A COPY OF THE XERCES-2J CLASS com.sun.org.apache.xerces.internal.utls.XMLChar
28  *
29  * This class defines the basic properties of characters in XML 1.1. The data
30  * in this class can be used to verify that a character is a valid
31  * XML 1.1 character or if the character is a space, name start, or name
32  * character.
33  * <p>
34  * A series of convenience methods are supplied to ease the burden
35  * of the developer.  Using the character as an index into the <code>XML11CHARS</code>
36  * array and applying the appropriate mask flag (e.g.
37  * <code>MASK_VALID</code>), yields the same results as calling the
38  * convenience methods. There is one exception: check the comments
39  * for the <code>isValid</code> method for details.
40  *
41  */
42 public class XML11Char {
43 
44     //
45     // Constants
46     //
47 
48     /** Character flags for XML 1.1. */
49     private static final byte XML11CHARS [] = new byte [1 << 16];
50 
51     /** XML 1.1 Valid character mask. */
52     public static final int MASK_XML11_VALID = 0x01;
53 
54     /** XML 1.1 Space character mask. */
55     public static final int MASK_XML11_SPACE = 0x02;
56 
57     /** XML 1.1 Name start character mask. */
58     public static final int MASK_XML11_NAME_START = 0x04;
59 
60     /** XML 1.1 Name character mask. */
61     public static final int MASK_XML11_NAME = 0x08;
62 
63     /** XML 1.1 control character mask */
64     public static final int MASK_XML11_CONTROL = 0x10;
65 
66     /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
67     public static final int MASK_XML11_CONTENT = 0x20;
68 
69     /** XML namespaces 1.1 NCNameStart */
70     public static final int MASK_XML11_NCNAME_START = 0x40;
71 
72     /** XML namespaces 1.1 NCName */
73     public static final int MASK_XML11_NCNAME = 0x80;
74 
75     /** XML 1.1 content for internal entities (valid - "special" chars) */
76     public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
77 
78     //
79     // Static initialization
80     //
81 
82     static {
83 
84         // Initializing the Character Flag Array
85         // Code generated by: XML11CharGenerator.
86 
Arrays.fill(XML11CHARS, 1, 9, (byte) 17 )87         Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
88         XML11CHARS[9] = 35;
89         XML11CHARS[10] = 3;
Arrays.fill(XML11CHARS, 11, 13, (byte) 17 )90         Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
91         XML11CHARS[13] = 3;
Arrays.fill(XML11CHARS, 14, 32, (byte) 17 )92         Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
93         XML11CHARS[32] = 35;
Arrays.fill(XML11CHARS, 33, 38, (byte) 33 )94         Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
95         XML11CHARS[38] = 1;
Arrays.fill(XML11CHARS, 39, 45, (byte) 33 )96         Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
Arrays.fill(XML11CHARS, 45, 47, (byte) -87 )97         Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
98         XML11CHARS[47] = 33;
Arrays.fill(XML11CHARS, 48, 58, (byte) -87 )99         Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
100         XML11CHARS[58] = 45;
101         XML11CHARS[59] = 33;
102         XML11CHARS[60] = 1;
Arrays.fill(XML11CHARS, 61, 65, (byte) 33 )103         Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
Arrays.fill(XML11CHARS, 65, 91, (byte) -19 )104         Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
Arrays.fill(XML11CHARS, 91, 93, (byte) 33 )105         Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
106         XML11CHARS[93] = 1;
107         XML11CHARS[94] = 33;
108         XML11CHARS[95] = -19;
109         XML11CHARS[96] = 33;
Arrays.fill(XML11CHARS, 97, 123, (byte) -19 )110         Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
Arrays.fill(XML11CHARS, 123, 127, (byte) 33 )111         Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
Arrays.fill(XML11CHARS, 127, 133, (byte) 17 )112         Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
113         XML11CHARS[133] = 35;
Arrays.fill(XML11CHARS, 134, 160, (byte) 17 )114         Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
Arrays.fill(XML11CHARS, 160, 183, (byte) 33 )115         Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
116         XML11CHARS[183] = -87;
Arrays.fill(XML11CHARS, 184, 192, (byte) 33 )117         Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
Arrays.fill(XML11CHARS, 192, 215, (byte) -19 )118         Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
119         XML11CHARS[215] = 33;
Arrays.fill(XML11CHARS, 216, 247, (byte) -19 )120         Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
121         XML11CHARS[247] = 33;
Arrays.fill(XML11CHARS, 248, 768, (byte) -19 )122         Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
Arrays.fill(XML11CHARS, 768, 880, (byte) -87 )123         Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
Arrays.fill(XML11CHARS, 880, 894, (byte) -19 )124         Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
125         XML11CHARS[894] = 33;
Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 )126         Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 )127         Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 )128         Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 )129         Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
130         XML11CHARS[8232] = 35;
Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 )131         Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 )132         Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 )133         Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 )134         Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 )135         Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 )136         Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 )137         Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 )138         Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 )139         Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 )140         Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 )141         Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 )142         Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
143 
144     } // <clinit>()
145 
146     //
147     // Public static methods
148     //
149 
150     /**
151      * Returns true if the specified character is a space character
152      * as amdended in the XML 1.1 specification.
153      *
154      * @param c The character to check.
155      */
isXML11Space(int c)156     public static boolean isXML11Space(int c) {
157         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
158     } // isXML11Space(int):boolean
159 
160     /**
161      * Returns true if the specified character is valid. This method
162      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
163      * <p>
164      * If the program chooses to apply the mask directly to the
165      * <code>XML11CHARS</code> array, then they are responsible for checking
166      * the surrogate character range.
167      *
168      * @param c The character to check.
169      */
isXML11Valid(int c)170     public static boolean isXML11Valid(int c) {
171         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
172                 || (0x10000 <= c && c <= 0x10FFFF);
173     } // isXML11Valid(int):boolean
174 
175     /**
176      * Returns true if the specified character is invalid.
177      *
178      * @param c The character to check.
179      */
isXML11Invalid(int c)180     public static boolean isXML11Invalid(int c) {
181         return !isXML11Valid(c);
182     } // isXML11Invalid(int):boolean
183 
184     /**
185      * Returns true if the specified character is valid and permitted outside
186      * of a character reference.
187      * That is, this method will return false for the same set as
188      * isXML11Valid, except it also reports false for "control characters".
189      *
190      * @param c The character to check.
191      */
isXML11ValidLiteral(int c)192     public static boolean isXML11ValidLiteral(int c) {
193         return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
194             || (0x10000 <= c && c <= 0x10FFFF));
195     } // isXML11ValidLiteral(int):boolean
196 
197     /**
198      * Returns true if the specified character can be considered
199      * content in an external parsed entity.
200      *
201      * @param c The character to check.
202      */
isXML11Content(int c)203     public static boolean isXML11Content(int c) {
204         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
205                (0x10000 <= c && c <= 0x10FFFF);
206     } // isXML11Content(int):boolean
207 
208     /**
209      * Returns true if the specified character can be considered
210      * content in an internal parsed entity.
211      *
212      * @param c The character to check.
213      */
isXML11InternalEntityContent(int c)214     public static boolean isXML11InternalEntityContent(int c) {
215         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
216                (0x10000 <= c && c <= 0x10FFFF);
217     } // isXML11InternalEntityContent(int):boolean
218 
219     /**
220      * Returns true if the specified character is a valid name start
221      * character as defined by production [4] in the XML 1.1
222      * specification.
223      *
224      * @param c The character to check.
225      */
isXML11NameStart(int c)226     public static boolean isXML11NameStart(int c) {
227         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
228             || (0x10000 <= c && c < 0xF0000);
229     } // isXML11NameStart(int):boolean
230 
231     /**
232      * Returns true if the specified character is a valid name
233      * character as defined by production [4a] in the XML 1.1
234      * specification.
235      *
236      * @param c The character to check.
237      */
isXML11Name(int c)238     public static boolean isXML11Name(int c) {
239         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
240             || (c >= 0x10000 && c < 0xF0000);
241     } // isXML11Name(int):boolean
242 
243     /**
244      * Returns true if the specified character is a valid NCName start
245      * character as defined by production [4] in Namespaces in XML
246      * 1.1 recommendation.
247      *
248      * @param c The character to check.
249      */
isXML11NCNameStart(int c)250     public static boolean isXML11NCNameStart(int c) {
251         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
252             || (0x10000 <= c && c < 0xF0000);
253     } // isXML11NCNameStart(int):boolean
254 
255     /**
256      * Returns true if the specified character is a valid NCName
257      * character as defined by production [5] in Namespaces in XML
258      * 1.1 recommendation.
259      *
260      * @param c The character to check.
261      */
isXML11NCName(int c)262     public static boolean isXML11NCName(int c) {
263         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
264             || (0x10000 <= c && c < 0xF0000);
265     } // isXML11NCName(int):boolean
266 
267     /**
268      * Returns whether the given character is a valid
269      * high surrogate for a name character. This includes
270      * all high surrogates for characters [0x10000-0xEFFFF].
271      * In other words everything excluding planes 15 and 16.
272      *
273      * @param c The character to check.
274      */
isXML11NameHighSurrogate(int c)275     public static boolean isXML11NameHighSurrogate(int c) {
276         return (0xD800 <= c && c <= 0xDB7F);
277     }
278 
279     /*
280      * [5] Name ::= NameStartChar NameChar*
281      */
282     /**
283      * Check to see if a string is a valid Name according to [5]
284      * in the XML 1.1 Recommendation
285      *
286      * @param name string to check
287      * @return true if name is a valid Name
288      */
isXML11ValidName(String name)289     public static boolean isXML11ValidName(String name) {
290         int length = name.length();
291         if (length == 0)
292             return false;
293         int i = 1;
294         char ch = name.charAt(0);
295         if( !isXML11NameStart(ch) ) {
296             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
297                 char ch2 = name.charAt(1);
298                 if ( !XMLChar.isLowSurrogate(ch2) ||
299                      !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
300                     return false;
301                 }
302                 i = 2;
303             }
304             else {
305                 return false;
306             }
307         }
308         while (i < length) {
309             ch = name.charAt(i);
310             if ( !isXML11Name(ch) ) {
311                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
312                     char ch2 = name.charAt(i);
313                     if ( !XMLChar.isLowSurrogate(ch2) ||
314                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
315                         return false;
316                     }
317                 }
318                 else {
319                     return false;
320                 }
321             }
322             ++i;
323         }
324         return true;
325     } // isXML11ValidName(String):boolean
326 
327 
328     /*
329      * from the namespace 1.1 rec
330      * [4] NCName ::= NCNameStartChar NCNameChar*
331      */
332     /**
333      * Check to see if a string is a valid NCName according to [4]
334      * from the XML Namespaces 1.1 Recommendation
335      *
336      * @param ncName string to check
337      * @return true if name is a valid NCName
338      */
isXML11ValidNCName(String ncName)339     public static boolean isXML11ValidNCName(String ncName) {
340         int length = ncName.length();
341         if (length == 0)
342             return false;
343         int i = 1;
344         char ch = ncName.charAt(0);
345         if( !isXML11NCNameStart(ch) ) {
346             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
347                 char ch2 = ncName.charAt(1);
348                 if ( !XMLChar.isLowSurrogate(ch2) ||
349                      !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
350                     return false;
351                 }
352                 i = 2;
353             }
354             else {
355                 return false;
356             }
357         }
358         while (i < length) {
359             ch = ncName.charAt(i);
360             if ( !isXML11NCName(ch) ) {
361                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
362                     char ch2 = ncName.charAt(i);
363                     if ( !XMLChar.isLowSurrogate(ch2) ||
364                          !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
365                         return false;
366                     }
367                 }
368                 else {
369                     return false;
370                 }
371             }
372             ++i;
373         }
374         return true;
375     } // isXML11ValidNCName(String):boolean
376 
377     /*
378      * [7] Nmtoken ::= (NameChar)+
379      */
380     /**
381      * Check to see if a string is a valid Nmtoken according to [7]
382      * in the XML 1.1 Recommendation
383      *
384      * @param nmtoken string to check
385      * @return true if nmtoken is a valid Nmtoken
386      */
isXML11ValidNmtoken(String nmtoken)387     public static boolean isXML11ValidNmtoken(String nmtoken) {
388         int length = nmtoken.length();
389         if (length == 0)
390             return false;
391         for (int i = 0; i < length; ++i ) {
392             char ch = nmtoken.charAt(i);
393             if( !isXML11Name(ch) ) {
394                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
395                     char ch2 = nmtoken.charAt(i);
396                     if ( !XMLChar.isLowSurrogate(ch2) ||
397                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
398                         return false;
399                     }
400                 }
401                 else {
402                     return false;
403                 }
404             }
405         }
406         return true;
407     } // isXML11ValidName(String):boolean
408 
409     /**
410       * Simple check to determine if qname is legal. If it returns false
411       * then <param>str</param> is illegal; if it returns true then
412       * <param>str</param> is legal.
413       */
isXML11ValidQName(String str)414      public static boolean isXML11ValidQName(String str) {
415 
416         final int colon = str.indexOf(':');
417 
418         if (colon == 0 || colon == str.length() - 1) {
419             return false;
420         }
421 
422         if (colon > 0) {
423             final String prefix = str.substring(0,colon);
424             final String localPart = str.substring(colon+1);
425             return isXML11ValidNCName(prefix) && isXML11ValidNCName(localPart);
426         }
427         else {
428             return isXML11ValidNCName(str);
429         }
430      }
431 
432 } // class XML11Char
433