1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Licensed to the Apache Software Foundation (ASF) under one or more
7  * contributor license agreements.  See the NOTICE file distributed with
8  * this work for additional information regarding copyright ownership.
9  * The ASF licenses this file to You under the Apache License, Version 2.0
10  * (the "License"); you may not use this file except in compliance with
11  * the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  */
21 
22 package com.sun.org.apache.xml.internal.utils;
23 
24 import java.util.Arrays;
25 
26 
27 /**
28  * THIS IS A COPY OF THE XERCES-2J CLASS com.sun.org.apache.xerces.internal.utls.XMLChar
29  *
30  * This class defines the basic properties of characters in XML 1.1. The data
31  * in this class can be used to verify that a character is a valid
32  * XML 1.1 character or if the character is a space, name start, or name
33  * character.
34  * <p>
35  * A series of convenience methods are supplied to ease the burden
36  * of the developer.  Using the character as an index into the <code>XML11CHARS</code>
37  * array and applying the appropriate mask flag (e.g.
38  * <code>MASK_VALID</code>), yields the same results as calling the
39  * convenience methods. There is one exception: check the comments
40  * for the <code>isValid</code> method for details.
41  *
42  */
43 public class XML11Char {
44 
45     //
46     // Constants
47     //
48 
49     /** Character flags for XML 1.1. */
50     private static final byte XML11CHARS [] = new byte [1 << 16];
51 
52     /** XML 1.1 Valid character mask. */
53     public static final int MASK_XML11_VALID = 0x01;
54 
55     /** XML 1.1 Space character mask. */
56     public static final int MASK_XML11_SPACE = 0x02;
57 
58     /** XML 1.1 Name start character mask. */
59     public static final int MASK_XML11_NAME_START = 0x04;
60 
61     /** XML 1.1 Name character mask. */
62     public static final int MASK_XML11_NAME = 0x08;
63 
64     /** XML 1.1 control character mask */
65     public static final int MASK_XML11_CONTROL = 0x10;
66 
67     /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
68     public static final int MASK_XML11_CONTENT = 0x20;
69 
70     /** XML namespaces 1.1 NCNameStart */
71     public static final int MASK_XML11_NCNAME_START = 0x40;
72 
73     /** XML namespaces 1.1 NCName */
74     public static final int MASK_XML11_NCNAME = 0x80;
75 
76     /** XML 1.1 content for internal entities (valid - "special" chars) */
77     public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
78 
79     //
80     // Static initialization
81     //
82 
83     static {
84 
85         // Initializing the Character Flag Array
86         // Code generated by: XML11CharGenerator.
87 
Arrays.fill(XML11CHARS, 1, 9, (byte) 17 )88         Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
89         XML11CHARS[9] = 35;
90         XML11CHARS[10] = 3;
Arrays.fill(XML11CHARS, 11, 13, (byte) 17 )91         Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
92         XML11CHARS[13] = 3;
Arrays.fill(XML11CHARS, 14, 32, (byte) 17 )93         Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
94         XML11CHARS[32] = 35;
Arrays.fill(XML11CHARS, 33, 38, (byte) 33 )95         Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
96         XML11CHARS[38] = 1;
Arrays.fill(XML11CHARS, 39, 45, (byte) 33 )97         Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
Arrays.fill(XML11CHARS, 45, 47, (byte) -87 )98         Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
99         XML11CHARS[47] = 33;
Arrays.fill(XML11CHARS, 48, 58, (byte) -87 )100         Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
101         XML11CHARS[58] = 45;
102         XML11CHARS[59] = 33;
103         XML11CHARS[60] = 1;
Arrays.fill(XML11CHARS, 61, 65, (byte) 33 )104         Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
Arrays.fill(XML11CHARS, 65, 91, (byte) -19 )105         Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
Arrays.fill(XML11CHARS, 91, 93, (byte) 33 )106         Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
107         XML11CHARS[93] = 1;
108         XML11CHARS[94] = 33;
109         XML11CHARS[95] = -19;
110         XML11CHARS[96] = 33;
Arrays.fill(XML11CHARS, 97, 123, (byte) -19 )111         Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
Arrays.fill(XML11CHARS, 123, 127, (byte) 33 )112         Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
Arrays.fill(XML11CHARS, 127, 133, (byte) 17 )113         Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
114         XML11CHARS[133] = 35;
Arrays.fill(XML11CHARS, 134, 160, (byte) 17 )115         Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
Arrays.fill(XML11CHARS, 160, 183, (byte) 33 )116         Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
117         XML11CHARS[183] = -87;
Arrays.fill(XML11CHARS, 184, 192, (byte) 33 )118         Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
Arrays.fill(XML11CHARS, 192, 215, (byte) -19 )119         Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
120         XML11CHARS[215] = 33;
Arrays.fill(XML11CHARS, 216, 247, (byte) -19 )121         Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
122         XML11CHARS[247] = 33;
Arrays.fill(XML11CHARS, 248, 768, (byte) -19 )123         Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
Arrays.fill(XML11CHARS, 768, 880, (byte) -87 )124         Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
Arrays.fill(XML11CHARS, 880, 894, (byte) -19 )125         Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
126         XML11CHARS[894] = 33;
Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 )127         Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 )128         Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 )129         Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 )130         Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
131         XML11CHARS[8232] = 35;
Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 )132         Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 )133         Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 )134         Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 )135         Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 )136         Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 )137         Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 )138         Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 )139         Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 )140         Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 )141         Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 )142         Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 )143         Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
144 
145     } // <clinit>()
146 
147     //
148     // Public static methods
149     //
150 
151     /**
152      * Returns true if the specified character is a space character
153      * as amdended in the XML 1.1 specification.
154      *
155      * @param c The character to check.
156      */
isXML11Space(int c)157     public static boolean isXML11Space(int c) {
158         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
159     } // isXML11Space(int):boolean
160 
161     /**
162      * Returns true if the specified character is valid. This method
163      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
164      * <p>
165      * If the program chooses to apply the mask directly to the
166      * <code>XML11CHARS</code> array, then they are responsible for checking
167      * the surrogate character range.
168      *
169      * @param c The character to check.
170      */
isXML11Valid(int c)171     public static boolean isXML11Valid(int c) {
172         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
173                 || (0x10000 <= c && c <= 0x10FFFF);
174     } // isXML11Valid(int):boolean
175 
176     /**
177      * Returns true if the specified character is invalid.
178      *
179      * @param c The character to check.
180      */
isXML11Invalid(int c)181     public static boolean isXML11Invalid(int c) {
182         return !isXML11Valid(c);
183     } // isXML11Invalid(int):boolean
184 
185     /**
186      * Returns true if the specified character is valid and permitted outside
187      * of a character reference.
188      * That is, this method will return false for the same set as
189      * isXML11Valid, except it also reports false for "control characters".
190      *
191      * @param c The character to check.
192      */
isXML11ValidLiteral(int c)193     public static boolean isXML11ValidLiteral(int c) {
194         return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
195             || (0x10000 <= c && c <= 0x10FFFF));
196     } // isXML11ValidLiteral(int):boolean
197 
198     /**
199      * Returns true if the specified character can be considered
200      * content in an external parsed entity.
201      *
202      * @param c The character to check.
203      */
isXML11Content(int c)204     public static boolean isXML11Content(int c) {
205         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
206                (0x10000 <= c && c <= 0x10FFFF);
207     } // isXML11Content(int):boolean
208 
209     /**
210      * Returns true if the specified character can be considered
211      * content in an internal parsed entity.
212      *
213      * @param c The character to check.
214      */
isXML11InternalEntityContent(int c)215     public static boolean isXML11InternalEntityContent(int c) {
216         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
217                (0x10000 <= c && c <= 0x10FFFF);
218     } // isXML11InternalEntityContent(int):boolean
219 
220     /**
221      * Returns true if the specified character is a valid name start
222      * character as defined by production [4] in the XML 1.1
223      * specification.
224      *
225      * @param c The character to check.
226      */
isXML11NameStart(int c)227     public static boolean isXML11NameStart(int c) {
228         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
229             || (0x10000 <= c && c < 0xF0000);
230     } // isXML11NameStart(int):boolean
231 
232     /**
233      * Returns true if the specified character is a valid name
234      * character as defined by production [4a] in the XML 1.1
235      * specification.
236      *
237      * @param c The character to check.
238      */
isXML11Name(int c)239     public static boolean isXML11Name(int c) {
240         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
241             || (c >= 0x10000 && c < 0xF0000);
242     } // isXML11Name(int):boolean
243 
244     /**
245      * Returns true if the specified character is a valid NCName start
246      * character as defined by production [4] in Namespaces in XML
247      * 1.1 recommendation.
248      *
249      * @param c The character to check.
250      */
isXML11NCNameStart(int c)251     public static boolean isXML11NCNameStart(int c) {
252         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
253             || (0x10000 <= c && c < 0xF0000);
254     } // isXML11NCNameStart(int):boolean
255 
256     /**
257      * Returns true if the specified character is a valid NCName
258      * character as defined by production [5] in Namespaces in XML
259      * 1.1 recommendation.
260      *
261      * @param c The character to check.
262      */
isXML11NCName(int c)263     public static boolean isXML11NCName(int c) {
264         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
265             || (0x10000 <= c && c < 0xF0000);
266     } // isXML11NCName(int):boolean
267 
268     /**
269      * Returns whether the given character is a valid
270      * high surrogate for a name character. This includes
271      * all high surrogates for characters [0x10000-0xEFFFF].
272      * In other words everything excluding planes 15 and 16.
273      *
274      * @param c The character to check.
275      */
isXML11NameHighSurrogate(int c)276     public static boolean isXML11NameHighSurrogate(int c) {
277         return (0xD800 <= c && c <= 0xDB7F);
278     }
279 
280     /*
281      * [5] Name ::= NameStartChar NameChar*
282      */
283     /**
284      * Check to see if a string is a valid Name according to [5]
285      * in the XML 1.1 Recommendation
286      *
287      * @param name string to check
288      * @return true if name is a valid Name
289      */
isXML11ValidName(String name)290     public static boolean isXML11ValidName(String name) {
291         int length = name.length();
292         if (length == 0)
293             return false;
294         int i = 1;
295         char ch = name.charAt(0);
296         if( !isXML11NameStart(ch) ) {
297             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
298                 char ch2 = name.charAt(1);
299                 if ( !XMLChar.isLowSurrogate(ch2) ||
300                      !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
301                     return false;
302                 }
303                 i = 2;
304             }
305             else {
306                 return false;
307             }
308         }
309         while (i < length) {
310             ch = name.charAt(i);
311             if ( !isXML11Name(ch) ) {
312                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
313                     char ch2 = name.charAt(i);
314                     if ( !XMLChar.isLowSurrogate(ch2) ||
315                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
316                         return false;
317                     }
318                 }
319                 else {
320                     return false;
321                 }
322             }
323             ++i;
324         }
325         return true;
326     } // isXML11ValidName(String):boolean
327 
328 
329     /*
330      * from the namespace 1.1 rec
331      * [4] NCName ::= NCNameStartChar NCNameChar*
332      */
333     /**
334      * Check to see if a string is a valid NCName according to [4]
335      * from the XML Namespaces 1.1 Recommendation
336      *
337      * @param ncName string to check
338      * @return true if name is a valid NCName
339      */
isXML11ValidNCName(String ncName)340     public static boolean isXML11ValidNCName(String ncName) {
341         int length = ncName.length();
342         if (length == 0)
343             return false;
344         int i = 1;
345         char ch = ncName.charAt(0);
346         if( !isXML11NCNameStart(ch) ) {
347             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
348                 char ch2 = ncName.charAt(1);
349                 if ( !XMLChar.isLowSurrogate(ch2) ||
350                      !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
351                     return false;
352                 }
353                 i = 2;
354             }
355             else {
356                 return false;
357             }
358         }
359         while (i < length) {
360             ch = ncName.charAt(i);
361             if ( !isXML11NCName(ch) ) {
362                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
363                     char ch2 = ncName.charAt(i);
364                     if ( !XMLChar.isLowSurrogate(ch2) ||
365                          !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
366                         return false;
367                     }
368                 }
369                 else {
370                     return false;
371                 }
372             }
373             ++i;
374         }
375         return true;
376     } // isXML11ValidNCName(String):boolean
377 
378     /*
379      * [7] Nmtoken ::= (NameChar)+
380      */
381     /**
382      * Check to see if a string is a valid Nmtoken according to [7]
383      * in the XML 1.1 Recommendation
384      *
385      * @param nmtoken string to check
386      * @return true if nmtoken is a valid Nmtoken
387      */
isXML11ValidNmtoken(String nmtoken)388     public static boolean isXML11ValidNmtoken(String nmtoken) {
389         int length = nmtoken.length();
390         if (length == 0)
391             return false;
392         for (int i = 0; i < length; ++i ) {
393             char ch = nmtoken.charAt(i);
394             if( !isXML11Name(ch) ) {
395                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
396                     char ch2 = nmtoken.charAt(i);
397                     if ( !XMLChar.isLowSurrogate(ch2) ||
398                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
399                         return false;
400                     }
401                 }
402                 else {
403                     return false;
404                 }
405             }
406         }
407         return true;
408     } // isXML11ValidName(String):boolean
409 
410     /**
411       * Simple check to determine if qname is legal. If it returns false
412       * then <param>str</param> is illegal; if it returns true then
413       * <param>str</param> is legal.
414       */
isXML11ValidQName(String str)415      public static boolean isXML11ValidQName(String str) {
416 
417         final int colon = str.indexOf(':');
418 
419         if (colon == 0 || colon == str.length() - 1) {
420             return false;
421         }
422 
423         if (colon > 0) {
424             final String prefix = str.substring(0,colon);
425             final String localPart = str.substring(colon+1);
426             return isXML11ValidNCName(prefix) && isXML11ValidNCName(localPart);
427         }
428         else {
429             return isXML11ValidNCName(str);
430         }
431      }
432 
433 } // class XML11Char
434