1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Licensed to the Apache Software Foundation (ASF) under one or more
7  * contributor license agreements.  See the NOTICE file distributed with
8  * this work for additional information regarding copyright ownership.
9  * The ASF licenses this file to You under the Apache License, Version 2.0
10  * (the "License"); you may not use this file except in compliance with
11  * the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  */
21 
22 package com.sun.org.apache.xml.internal.utils;
23 
24 /**
25  * This class defines the basic XML character properties. The data
26  * in this class can be used to verify that a character is a valid
27  * XML character or if the character is a space, name start, or name
28  * character.
29  * <p>
30  * A series of convenience methods are supplied to ease the burden
31  * of the developer. Because inlining the checks can improve per
32  * character performance, the tables of character properties are
33  * public. Using the character as an index into the <code>CHARS</code>
34  * array and applying the appropriate mask flag (e.g.
35  * <code>MASK_VALID</code>), yields the same results as calling the
36  * convenience methods. There is one exception: check the comments
37  * for the <code>isValid</code> method for details.
38  *
39  * @author Glenn Marcy, IBM
40  * @author Andy Clark, IBM
41  * @author Eric Ye, IBM
42  * @author Arnaud  Le Hors, IBM
43  * @author Rahul Srivastava, Sun Microsystems Inc.
44  *
45  */
46 public class XMLChar {
47 
48     //
49     // Constants
50     //
51 
52     /** Character flags. */
53     private static final byte[] CHARS = new byte[1 << 16];
54 
55     /** Valid character mask. */
56     public static final int MASK_VALID = 0x01;
57 
58     /** Space character mask. */
59     public static final int MASK_SPACE = 0x02;
60 
61     /** Name start character mask. */
62     public static final int MASK_NAME_START = 0x04;
63 
64     /** Name character mask. */
65     public static final int MASK_NAME = 0x08;
66 
67     /** Pubid character mask. */
68     public static final int MASK_PUBID = 0x10;
69 
70     /**
71      * Content character mask. Special characters are those that can
72      * be considered the start of markup, such as '&lt;' and '&amp;'.
73      * The various newline characters are considered special as well.
74      * All other valid XML characters can be considered content.
75      * <p>
76      * This is an optimization for the inner loop of character scanning.
77      */
78     public static final int MASK_CONTENT = 0x20;
79 
80     /** NCName start character mask. */
81     public static final int MASK_NCNAME_START = 0x40;
82 
83     /** NCName character mask. */
84     public static final int MASK_NCNAME = 0x80;
85 
86     //
87     // Static initialization
88     //
89 
90     static {
91 
92         //
93         // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
94         //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
95         //
96 
97         int charRange[] = {
98             0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
99         };
100 
101         //
102         // [3] S ::= (#x20 | #x9 | #xD | #xA)+
103         //
104 
105         int spaceChar[] = {
106             0x0020, 0x0009, 0x000D, 0x000A,
107         };
108 
109         //
110         // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
111         //                  CombiningChar | Extender
112         //
113 
114         int nameChar[] = {
115             0x002D, 0x002E, // '-' and '.'
116         };
117 
118         //
119         // [5] Name ::= (Letter | '_' | ':') (NameChar)*
120         //
121 
122         int nameStartChar[] = {
123             0x003A, 0x005F, // ':' and '_'
124         };
125 
126         //
127         // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
128         //
129 
130         int pubidChar[] = {
131             0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
132             0x005F
133         };
134 
135         int pubidRange[] = {
136             0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
137         };
138 
139         //
140         // [84] Letter ::= BaseChar | Ideographic
141         //
142 
143         int letterRange[] = {
144             // BaseChar
145             0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
146             0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
147             0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
148             0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
149             0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
150             0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
151             0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
152             0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
153             0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
154             0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
155             0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
156             0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
157             0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
158             0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
159             0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
160             0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
161             0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
162             0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
163             0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
164             0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
165             0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
166             0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
167             0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
168             0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
169             0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
170             0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
171             0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
172             0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
173             0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
174             0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
175             0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
176             0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
177             0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
178             0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
179             0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
180             0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
181             0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
182             0xAC00, 0xD7A3,
183             // Ideographic
184             0x3021, 0x3029, 0x4E00, 0x9FA5,
185         };
186         int letterChar[] = {
187             // BaseChar
188             0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
189             0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
190             0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
191             0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
192             0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
193             0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
194             0x1F5D, 0x1FBE, 0x2126, 0x212E,
195             // Ideographic
196             0x3007,
197         };
198 
199         //
200         // [87] CombiningChar ::= ...
201         //
202 
203         int combiningCharRange[] = {
204             0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
205             0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
206             0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
207             0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
208             0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
209             0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
210             0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
211             0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
212             0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
213             0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
214             0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
215             0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
216             0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
217             0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
218             0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
219             0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
220             0x20D0, 0x20DC, 0x302A, 0x302F,
221         };
222 
223         int combiningCharChar[] = {
224             0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
225             0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
226             0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
227             0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
228         };
229 
230         //
231         // [88] Digit ::= ...
232         //
233 
234         int digitRange[] = {
235             0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
236             0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
237             0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
238             0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
239         };
240 
241         //
242         // [89] Extender ::= ...
243         //
244 
245         int extenderRange[] = {
246             0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
247         };
248 
249         int extenderChar[] = {
250             0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
251         };
252 
253         //
254         // SpecialChar ::= '<', '&', '\n', '\r', ']'
255         //
256 
257         int specialChar[] = {
258             '<', '&', '\n', '\r', ']',
259         };
260 
261         //
262         // Initialize
263         //
264 
265         // set valid characters
266         for (int i = 0; i < charRange.length; i += 2) {
267             for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
268                 CHARS[j] |= MASK_VALID | MASK_CONTENT;
269             }
270         }
271 
272         // remove special characters
273         for (int i = 0; i < specialChar.length; i++) {
274             CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
275         }
276 
277         // set space characters
278         for (int i = 0; i < spaceChar.length; i++) {
279             CHARS[spaceChar[i]] |= MASK_SPACE;
280         }
281 
282         // set name start characters
283         for (int i = 0; i < nameStartChar.length; i++) {
284             CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
285                                        MASK_NCNAME_START | MASK_NCNAME;
286         }
287         for (int i = 0; i < letterRange.length; i += 2) {
288             for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
289                 CHARS[j] |= MASK_NAME_START | MASK_NAME |
290                             MASK_NCNAME_START | MASK_NCNAME;
291             }
292         }
293         for (int i = 0; i < letterChar.length; i++) {
294             CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
295                                     MASK_NCNAME_START | MASK_NCNAME;
296         }
297 
298         // set name characters
299         for (int i = 0; i < nameChar.length; i++) {
300             CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
301         }
302         for (int i = 0; i < digitRange.length; i += 2) {
303             for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
304                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
305             }
306         }
307         for (int i = 0; i < combiningCharRange.length; i += 2) {
308             for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
309                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
310             }
311         }
312         for (int i = 0; i < combiningCharChar.length; i++) {
313             CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
314         }
315         for (int i = 0; i < extenderRange.length; i += 2) {
316             for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
317                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
318             }
319         }
320         for (int i = 0; i < extenderChar.length; i++) {
321             CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
322         }
323 
324         // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
325         CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
326 
327         // set Pubid characters
328         for (int i = 0; i < pubidChar.length; i++) {
329             CHARS[pubidChar[i]] |= MASK_PUBID;
330         }
331         for (int i = 0; i < pubidRange.length; i += 2) {
332             for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
333                 CHARS[j] |= MASK_PUBID;
334             }
335         }
336 
337     } // <clinit>()
338 
339     //
340     // Public static methods
341     //
342 
343     /**
344      * Returns true if the specified character is a supplemental character.
345      *
346      * @param c The character to check.
347      */
isSupplemental(int c)348     public static boolean isSupplemental(int c) {
349         return (c >= 0x10000 && c <= 0x10FFFF);
350     }
351 
352     /**
353      * Returns true the supplemental character corresponding to the given
354      * surrogates.
355      *
356      * @param h The high surrogate.
357      * @param l The low surrogate.
358      */
supplemental(char h, char l)359     public static int supplemental(char h, char l) {
360         return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
361     }
362 
363     /**
364      * Returns the high surrogate of a supplemental character
365      *
366      * @param c The supplemental character to "split".
367      */
highSurrogate(int c)368     public static char highSurrogate(int c) {
369         return (char) (((c - 0x00010000) >> 10) + 0xD800);
370     }
371 
372     /**
373      * Returns the low surrogate of a supplemental character
374      *
375      * @param c The supplemental character to "split".
376      */
lowSurrogate(int c)377     public static char lowSurrogate(int c) {
378         return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
379     }
380 
381     /**
382      * Returns whether the given character is a high surrogate
383      *
384      * @param c The character to check.
385      */
isHighSurrogate(int c)386     public static boolean isHighSurrogate(int c) {
387         return (0xD800 <= c && c <= 0xDBFF);
388     }
389 
390     /**
391      * Returns whether the given character is a low surrogate
392      *
393      * @param c The character to check.
394      */
isLowSurrogate(int c)395     public static boolean isLowSurrogate(int c) {
396         return (0xDC00 <= c && c <= 0xDFFF);
397     }
398 
399 
400     /**
401      * Returns true if the specified character is valid. This method
402      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
403      * <p>
404      * If the program chooses to apply the mask directly to the
405      * <code>CHARS</code> array, then they are responsible for checking
406      * the surrogate character range.
407      *
408      * @param c The character to check.
409      */
isValid(int c)410     public static boolean isValid(int c) {
411         return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
412                (0x10000 <= c && c <= 0x10FFFF);
413     } // isValid(int):boolean
414 
415     /**
416      * Returns true if the specified character is invalid.
417      *
418      * @param c The character to check.
419      */
isInvalid(int c)420     public static boolean isInvalid(int c) {
421         return !isValid(c);
422     } // isInvalid(int):boolean
423 
424     /**
425      * Returns true if the specified character can be considered content.
426      *
427      * @param c The character to check.
428      */
isContent(int c)429     public static boolean isContent(int c) {
430         return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
431                (0x10000 <= c && c <= 0x10FFFF);
432     } // isContent(int):boolean
433 
434     /**
435      * Returns true if the specified character can be considered markup.
436      * Markup characters include '&lt;', '&amp;', and '%'.
437      *
438      * @param c The character to check.
439      */
isMarkup(int c)440     public static boolean isMarkup(int c) {
441         return c == '<' || c == '&' || c == '%';
442     } // isMarkup(int):boolean
443 
444     /**
445      * Returns true if the specified character is a space character
446      * as defined by production [3] in the XML 1.0 specification.
447      *
448      * @param c The character to check.
449      */
isSpace(int c)450     public static boolean isSpace(int c) {
451         return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
452     } // isSpace(int):boolean
453 
454     /**
455      * Returns true if the specified character is a valid name start
456      * character as defined by production [5] in the XML 1.0
457      * specification.
458      *
459      * @param c The character to check.
460      */
isNameStart(int c)461     public static boolean isNameStart(int c) {
462         return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
463     } // isNameStart(int):boolean
464 
465     /**
466      * Returns true if the specified character is a valid name
467      * character as defined by production [4] in the XML 1.0
468      * specification.
469      *
470      * @param c The character to check.
471      */
isName(int c)472     public static boolean isName(int c) {
473         return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
474     } // isName(int):boolean
475 
476     /**
477      * Returns true if the specified character is a valid NCName start
478      * character as defined by production [4] in Namespaces in XML
479      * recommendation.
480      *
481      * @param c The character to check.
482      */
isNCNameStart(int c)483     public static boolean isNCNameStart(int c) {
484         return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
485     } // isNCNameStart(int):boolean
486 
487     /**
488      * Returns true if the specified character is a valid NCName
489      * character as defined by production [5] in Namespaces in XML
490      * recommendation.
491      *
492      * @param c The character to check.
493      */
isNCName(int c)494     public static boolean isNCName(int c) {
495         return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
496     } // isNCName(int):boolean
497 
498     /**
499      * Returns true if the specified character is a valid Pubid
500      * character as defined by production [13] in the XML 1.0
501      * specification.
502      *
503      * @param c The character to check.
504      */
isPubid(int c)505     public static boolean isPubid(int c) {
506         return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
507     } // isPubid(int):boolean
508 
509     /*
510      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
511      */
512     /**
513      * Check to see if a string is a valid Name according to [5]
514      * in the XML 1.0 Recommendation
515      *
516      * @param name string to check
517      * @return true if name is a valid Name
518      */
isValidName(String name)519     public static boolean isValidName(String name) {
520         if (name.length() == 0)
521             return false;
522         char ch = name.charAt(0);
523         if( isNameStart(ch) == false)
524            return false;
525         for (int i = 1; i < name.length(); i++ ) {
526            ch = name.charAt(i);
527            if( isName( ch ) == false ){
528               return false;
529            }
530         }
531         return true;
532     } // isValidName(String):boolean
533 
534 
535     /*
536      * from the namespace rec
537      * [4] NCName ::= (Letter | '_') (NCNameChar)*
538      */
539     /**
540      * Check to see if a string is a valid NCName according to [4]
541      * from the XML Namespaces 1.0 Recommendation
542      *
543      * @param ncName string to check
544      * @return true if name is a valid NCName
545      */
isValidNCName(String ncName)546     public static boolean isValidNCName(String ncName) {
547         if (ncName.length() == 0)
548             return false;
549         char ch = ncName.charAt(0);
550         if( isNCNameStart(ch) == false)
551            return false;
552         for (int i = 1; i < ncName.length(); i++ ) {
553            ch = ncName.charAt(i);
554            if( isNCName( ch ) == false ){
555               return false;
556            }
557         }
558         return true;
559     } // isValidNCName(String):boolean
560 
561     /*
562      * [7] Nmtoken ::= (NameChar)+
563      */
564     /**
565      * Check to see if a string is a valid Nmtoken according to [7]
566      * in the XML 1.0 Recommendation
567      *
568      * @param nmtoken string to check
569      * @return true if nmtoken is a valid Nmtoken
570      */
isValidNmtoken(String nmtoken)571     public static boolean isValidNmtoken(String nmtoken) {
572         if (nmtoken.length() == 0)
573             return false;
574         for (int i = 0; i < nmtoken.length(); i++ ) {
575            char ch = nmtoken.charAt(i);
576            if(  ! isName( ch ) ){
577               return false;
578            }
579         }
580         return true;
581     } // isValidName(String):boolean
582 
583 
584 
585 
586 
587     // encodings
588 
589     /**
590      * Returns true if the encoding name is a valid IANA encoding.
591      * This method does not verify that there is a decoder available
592      * for this encoding, only that the characters are valid for an
593      * IANA encoding name.
594      *
595      * @param ianaEncoding The IANA encoding name.
596      */
isValidIANAEncoding(String ianaEncoding)597     public static boolean isValidIANAEncoding(String ianaEncoding) {
598         if (ianaEncoding != null) {
599             int length = ianaEncoding.length();
600             if (length > 0) {
601                 char c = ianaEncoding.charAt(0);
602                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
603                     for (int i = 1; i < length; i++) {
604                         c = ianaEncoding.charAt(i);
605                         if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
606                             (c < '0' || c > '9') && c != '.' && c != '_' &&
607                             c != '-') {
608                             return false;
609                         }
610                     }
611                     return true;
612                 }
613             }
614         }
615         return false;
616     } // isValidIANAEncoding(String):boolean
617 
618     /**
619      * Returns true if the encoding name is a valid Java encoding.
620      * This method does not verify that there is a decoder available
621      * for this encoding, only that the characters are valid for an
622      * Java encoding name.
623      *
624      * @param javaEncoding The Java encoding name.
625      */
isValidJavaEncoding(String javaEncoding)626     public static boolean isValidJavaEncoding(String javaEncoding) {
627         if (javaEncoding != null) {
628             int length = javaEncoding.length();
629             if (length > 0) {
630                 for (int i = 1; i < length; i++) {
631                     char c = javaEncoding.charAt(i);
632                     if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
633                         (c < '0' || c > '9') && c != '.' && c != '_' &&
634                         c != '-') {
635                         return false;
636                     }
637                 }
638                 return true;
639             }
640         }
641         return false;
642     } // isValidIANAEncoding(String):boolean
643 
644    /**
645      * Simple check to determine if qname is legal. If it returns false
646      * then <param>str</param> is illegal; if it returns true then
647      * <param>str</param> is legal.
648      */
isValidQName(String str)649     public static boolean isValidQName(String str) {
650 
651        final int colon = str.indexOf(':');
652 
653        if (colon == 0 || colon == str.length() - 1) {
654            return false;
655        }
656 
657        if (colon > 0) {
658            final String prefix = str.substring(0,colon);
659            final String localPart = str.substring(colon+1);
660            return isValidNCName(prefix) && isValidNCName(localPart);
661        }
662        else {
663            return isValidNCName(str);
664        }
665     }
666 
667 } // class XMLChar
668