1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Copyright 1999-2004 The Apache Software Foundation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.util;
22 
23 import java.util.Arrays;
24 
25 /**
26  * This class defines the basic properties of characters in XML 1.1. The data
27  * in this class can be used to verify that a character is a valid
28  * XML 1.1 character or if the character is a space, name start, or name
29  * character.
30  * <p>
31  * A series of convenience methods are supplied to ease the burden
32  * of the developer.  Using the character as an index into the <code>XML11CHARS</code>
33  * array and applying the appropriate mask flag (e.g.
34  * <code>MASK_VALID</code>), yields the same results as calling the
35  * convenience methods. There is one exception: check the comments
36  * for the <code>isValid</code> method for details.
37  *
38  * @author Glenn Marcy, IBM
39  * @author Andy Clark, IBM
40  * @author Arnaud  Le Hors, IBM
41  * @author Neil Graham, IBM
42  * @author Michael Glavassevich, IBM
43  *
44  * @version $Id: XML11Char.java,v 1.7 2010-11-01 04:40:15 joehw Exp $
45  */
46 public class XML11Char {
47 
48     //
49     // Constants
50     //
51 
52     /** Character flags for XML 1.1. */
53     private static final byte XML11CHARS [] = new byte [1 << 16];
54 
55     /** XML 1.1 Valid character mask. */
56     public static final int MASK_XML11_VALID = 0x01;
57 
58     /** XML 1.1 Space character mask. */
59     public static final int MASK_XML11_SPACE = 0x02;
60 
61     /** XML 1.1 Name start character mask. */
62     public static final int MASK_XML11_NAME_START = 0x04;
63 
64     /** XML 1.1 Name character mask. */
65     public static final int MASK_XML11_NAME = 0x08;
66 
67     /** XML 1.1 control character mask */
68     public static final int MASK_XML11_CONTROL = 0x10;
69 
70     /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
71     public static final int MASK_XML11_CONTENT = 0x20;
72 
73     /** XML namespaces 1.1 NCNameStart */
74     public static final int MASK_XML11_NCNAME_START = 0x40;
75 
76     /** XML namespaces 1.1 NCName */
77     public static final int MASK_XML11_NCNAME = 0x80;
78 
79     /** XML 1.1 content for internal entities (valid - "special" chars) */
80     public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
81 
82     //
83     // Static initialization
84     //
85 
86     static {
87 
88         // Initializing the Character Flag Array
89         // Code generated by: XML11CharGenerator.
90 
Arrays.fill(XML11CHARS, 1, 9, (byte) 17 )91         Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
92         XML11CHARS[9] = 35;
93         XML11CHARS[10] = 3;
Arrays.fill(XML11CHARS, 11, 13, (byte) 17 )94         Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
95         XML11CHARS[13] = 3;
Arrays.fill(XML11CHARS, 14, 32, (byte) 17 )96         Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
97         XML11CHARS[32] = 35;
Arrays.fill(XML11CHARS, 33, 38, (byte) 33 )98         Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
99         XML11CHARS[38] = 1;
Arrays.fill(XML11CHARS, 39, 45, (byte) 33 )100         Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
Arrays.fill(XML11CHARS, 45, 47, (byte) -87 )101         Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
102         XML11CHARS[47] = 33;
Arrays.fill(XML11CHARS, 48, 58, (byte) -87 )103         Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
104         XML11CHARS[58] = 45;
105         XML11CHARS[59] = 33;
106         XML11CHARS[60] = 1;
Arrays.fill(XML11CHARS, 61, 65, (byte) 33 )107         Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
Arrays.fill(XML11CHARS, 65, 91, (byte) -19 )108         Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
Arrays.fill(XML11CHARS, 91, 93, (byte) 33 )109         Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
110         XML11CHARS[93] = 1;
111         XML11CHARS[94] = 33;
112         XML11CHARS[95] = -19;
113         XML11CHARS[96] = 33;
Arrays.fill(XML11CHARS, 97, 123, (byte) -19 )114         Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
Arrays.fill(XML11CHARS, 123, 127, (byte) 33 )115         Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
Arrays.fill(XML11CHARS, 127, 133, (byte) 17 )116         Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
117         XML11CHARS[133] = 35;
Arrays.fill(XML11CHARS, 134, 160, (byte) 17 )118         Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
Arrays.fill(XML11CHARS, 160, 183, (byte) 33 )119         Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
120         XML11CHARS[183] = -87;
Arrays.fill(XML11CHARS, 184, 192, (byte) 33 )121         Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
Arrays.fill(XML11CHARS, 192, 215, (byte) -19 )122         Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
123         XML11CHARS[215] = 33;
Arrays.fill(XML11CHARS, 216, 247, (byte) -19 )124         Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
125         XML11CHARS[247] = 33;
Arrays.fill(XML11CHARS, 248, 768, (byte) -19 )126         Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
Arrays.fill(XML11CHARS, 768, 880, (byte) -87 )127         Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
Arrays.fill(XML11CHARS, 880, 894, (byte) -19 )128         Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
129         XML11CHARS[894] = 33;
Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 )130         Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 )131         Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 )132         Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 )133         Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
134         XML11CHARS[8232] = 35;
Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 )135         Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 )136         Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 )137         Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 )138         Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 )139         Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 )140         Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 )141         Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 )142         Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 )143         Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 )144         Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 )145         Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 )146         Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
147 
148     } // <clinit>()
149 
150     //
151     // Public static methods
152     //
153 
154     /**
155      * Returns true if the specified character is a space character
156      * as amdended in the XML 1.1 specification.
157      *
158      * @param c The character to check.
159      */
isXML11Space(int c)160     public static boolean isXML11Space(int c) {
161         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
162     } // isXML11Space(int):boolean
163 
164     /**
165      * Returns true if the specified character is valid. This method
166      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
167      * <p>
168      * If the program chooses to apply the mask directly to the
169      * <code>XML11CHARS</code> array, then they are responsible for checking
170      * the surrogate character range.
171      *
172      * @param c The character to check.
173      */
isXML11Valid(int c)174     public static boolean isXML11Valid(int c) {
175         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
176                 || (0x10000 <= c && c <= 0x10FFFF);
177     } // isXML11Valid(int):boolean
178 
179     /**
180      * Returns true if the specified character is invalid.
181      *
182      * @param c The character to check.
183      */
isXML11Invalid(int c)184     public static boolean isXML11Invalid(int c) {
185         return !isXML11Valid(c);
186     } // isXML11Invalid(int):boolean
187 
188     /**
189      * Returns true if the specified character is valid and permitted outside
190      * of a character reference.
191      * That is, this method will return false for the same set as
192      * isXML11Valid, except it also reports false for "control characters".
193      *
194      * @param c The character to check.
195      */
isXML11ValidLiteral(int c)196     public static boolean isXML11ValidLiteral(int c) {
197         return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
198             || (0x10000 <= c && c <= 0x10FFFF));
199     } // isXML11ValidLiteral(int):boolean
200 
201     /**
202      * Returns true if the specified character can be considered
203      * content in an external parsed entity.
204      *
205      * @param c The character to check.
206      */
isXML11Content(int c)207     public static boolean isXML11Content(int c) {
208         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
209                (0x10000 <= c && c <= 0x10FFFF);
210     } // isXML11Content(int):boolean
211 
212     /**
213      * Returns true if the specified character can be considered
214      * content in an internal parsed entity.
215      *
216      * @param c The character to check.
217      */
isXML11InternalEntityContent(int c)218     public static boolean isXML11InternalEntityContent(int c) {
219         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
220                (0x10000 <= c && c <= 0x10FFFF);
221     } // isXML11InternalEntityContent(int):boolean
222 
223     /**
224      * Returns true if the specified character is a valid name start
225      * character as defined by production [4] in the XML 1.1
226      * specification.
227      *
228      * @param c The character to check.
229      */
isXML11NameStart(int c)230     public static boolean isXML11NameStart(int c) {
231         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
232             || (0x10000 <= c && c < 0xF0000);
233     } // isXML11NameStart(int):boolean
234 
235     /**
236      * Returns true if the specified character is a valid name
237      * character as defined by production [4a] in the XML 1.1
238      * specification.
239      *
240      * @param c The character to check.
241      */
isXML11Name(int c)242     public static boolean isXML11Name(int c) {
243         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
244             || (c >= 0x10000 && c < 0xF0000);
245     } // isXML11Name(int):boolean
246 
247     /**
248      * Returns true if the specified character is a valid NCName start
249      * character as defined by production [4] in Namespaces in XML
250      * 1.1 recommendation.
251      *
252      * @param c The character to check.
253      */
isXML11NCNameStart(int c)254     public static boolean isXML11NCNameStart(int c) {
255         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
256             || (0x10000 <= c && c < 0xF0000);
257     } // isXML11NCNameStart(int):boolean
258 
259     /**
260      * Returns true if the specified character is a valid NCName
261      * character as defined by production [5] in Namespaces in XML
262      * 1.1 recommendation.
263      *
264      * @param c The character to check.
265      */
isXML11NCName(int c)266     public static boolean isXML11NCName(int c) {
267         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
268             || (0x10000 <= c && c < 0xF0000);
269     } // isXML11NCName(int):boolean
270 
271     /**
272      * Returns whether the given character is a valid
273      * high surrogate for a name character. This includes
274      * all high surrogates for characters [0x10000-0xEFFFF].
275      * In other words everything excluding planes 15 and 16.
276      *
277      * @param c The character to check.
278      */
isXML11NameHighSurrogate(int c)279     public static boolean isXML11NameHighSurrogate(int c) {
280         return (0xD800 <= c && c <= 0xDB7F);
281     }
282 
283     /*
284      * [5] Name ::= NameStartChar NameChar*
285      */
286     /**
287      * Check to see if a string is a valid Name according to [5]
288      * in the XML 1.1 Recommendation
289      *
290      * @param name string to check
291      * @return true if name is a valid Name
292      */
isXML11ValidName(String name)293     public static boolean isXML11ValidName(String name) {
294         final int length = name.length();
295         if (length == 0) {
296             return false;
297         }
298         int i = 1;
299         char ch = name.charAt(0);
300         if (!isXML11NameStart(ch)) {
301             if (length > 1 && isXML11NameHighSurrogate(ch)) {
302                 char ch2 = name.charAt(1);
303                 if (!XMLChar.isLowSurrogate(ch2) ||
304                     !isXML11NameStart(XMLChar.supplemental(ch, ch2))) {
305                     return false;
306                 }
307                 i = 2;
308             }
309             else {
310                 return false;
311             }
312         }
313         while (i < length) {
314             ch = name.charAt(i);
315             if (!isXML11Name(ch)) {
316                 if (++i < length && isXML11NameHighSurrogate(ch)) {
317                     char ch2 = name.charAt(i);
318                     if (!XMLChar.isLowSurrogate(ch2) ||
319                         !isXML11Name(XMLChar.supplemental(ch, ch2))) {
320                         return false;
321                     }
322                 }
323                 else {
324                     return false;
325                 }
326             }
327             ++i;
328         }
329         return true;
330     } // isXML11ValidName(String):boolean
331 
332     /*
333      * from the namespace 1.1 rec
334      * [4] NCName ::= NCNameStartChar NCNameChar*
335      */
336     /**
337      * Check to see if a string is a valid NCName according to [4]
338      * from the XML Namespaces 1.1 Recommendation
339      *
340      * @param ncName string to check
341      * @return true if name is a valid NCName
342      */
isXML11ValidNCName(String ncName)343     public static boolean isXML11ValidNCName(String ncName) {
344         final int length = ncName.length();
345         if (length == 0) {
346             return false;
347         }
348         int i = 1;
349         char ch = ncName.charAt(0);
350         if (!isXML11NCNameStart(ch)) {
351             if (length > 1 && isXML11NameHighSurrogate(ch)) {
352                 char ch2 = ncName.charAt(1);
353                 if (!XMLChar.isLowSurrogate(ch2) ||
354                     !isXML11NCNameStart(XMLChar.supplemental(ch, ch2))) {
355                     return false;
356                 }
357                 i = 2;
358             }
359             else {
360                 return false;
361             }
362         }
363         while (i < length) {
364             ch = ncName.charAt(i);
365             if (!isXML11NCName(ch)) {
366                 if (++i < length && isXML11NameHighSurrogate(ch)) {
367                     char ch2 = ncName.charAt(i);
368                     if (!XMLChar.isLowSurrogate(ch2) ||
369                         !isXML11NCName(XMLChar.supplemental(ch, ch2))) {
370                         return false;
371                     }
372                 }
373                 else {
374                     return false;
375                 }
376             }
377             ++i;
378         }
379         return true;
380     } // isXML11ValidNCName(String):boolean
381 
382     /*
383      * [7] Nmtoken ::= (NameChar)+
384      */
385     /**
386      * Check to see if a string is a valid Nmtoken according to [7]
387      * in the XML 1.1 Recommendation
388      *
389      * @param nmtoken string to check
390      * @return true if nmtoken is a valid Nmtoken
391      */
isXML11ValidNmtoken(String nmtoken)392     public static boolean isXML11ValidNmtoken(String nmtoken) {
393         final int length = nmtoken.length();
394         if (length == 0) {
395             return false;
396         }
397         for (int i = 0; i < length; ++i) {
398             char ch = nmtoken.charAt(i);
399             if (!isXML11Name(ch)) {
400                 if (++i < length && isXML11NameHighSurrogate(ch)) {
401                     char ch2 = nmtoken.charAt(i);
402                     if (!XMLChar.isLowSurrogate(ch2) ||
403                         !isXML11Name(XMLChar.supplemental(ch, ch2))) {
404                         return false;
405                     }
406                 }
407                 else {
408                     return false;
409                 }
410             }
411         }
412         return true;
413     } // isXML11ValidName(String):boolean
414 
415 } // class XML11Char
416