1 /*
2  * $Id: XMLChecker.java,v 1.11 2005/09/12 08:40:02 znerd Exp $
3  */
4 package org.znerd.xmlenc;
5 
6 /**
7  * Utility class that provides XML checking functionality.
8  *
9  * @version $Revision: 1.11 $ $Date: 2005/09/12 08:40:02 $
10  * @author Ernst de Haan (<a href="mailto:wfe.dehaan@gmail.com">wfe.dehaan@gmail.com</a>)
11  *
12  * @since xmlenc 0.41
13  */
14 public final class XMLChecker extends Object {
15 
16    //-------------------------------------------------------------------------
17    // Class functions
18    //-------------------------------------------------------------------------
19 
20    /**
21     * Checks if the specified string matches the <em>S</em> (white space)
22     * production.
23     *
24     * <p>See:
25     * <a href="http://www.w3.org/TR/REC-xml#NT-S">Definition of S</a>.
26     *
27     * @param s
28     *    the character string to check, cannot be <code>null</code>.
29     *
30     * @throws NullPointerException
31     *    if <code>s == null</code>.
32     *
33     * @throws InvalidXMLException
34     *    if the specified character string does not match the <em>S</em>
35     *    production.
36     */
checkS(String s)37    public static final void checkS(String s)
38    throws NullPointerException {
39       checkS(s.toCharArray(), 0, s.length());
40    }
41 
42    /**
43     * Checks if the specified part of a character array matches the <em>S</em>
44     * (white space) production.
45     *
46     * <p>See:
47     * <a href="http://www.w3.org/TR/REC-xml#NT-S">Definition of S</a>.
48     *
49     * @param ch
50     *    the character array that contains the characters to be checked,
51     *    cannot be <code>null</code>.
52     *
53     * @param start
54     *    the start index into <code>ch</code>, must be &gt;= 0.
55     *
56     * @param length
57     *    the number of characters to take from <code>ch</code>, starting at
58     *    the <code>start</code> index.
59     *
60     * @throws NullPointerException
61     *    if <code>ch == null</code>.
62     *
63     * @throws IndexOutOfBoundsException
64     *    if <code>start &lt; 0
65     *          || start + length &gt; ch.length</code>.
66     *
67     * @throws InvalidXMLException
68     *    if the specified character string does not match the <em>S</em>
69     *    production.
70     */
checkS(char[] ch, int start, int length)71    public static final void checkS(char[] ch, int start, int length)
72    throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException {
73 
74       // Loop through the array and check each character
75       for (int i = start; i < length; i++) {
76          int c = (int) ch[i];
77 
78          if (c != 0x20 && c != 0x9 && c != 0xD && c != 0xA) {
79             throw new InvalidXMLException("The character 0x" + Integer.toHexString(c) + " is not valid for the 'S' production (white space).");
80          }
81       }
82    }
83 
84    /**
85     * Determines if the specified string matches the <em>Name</em> production.
86     *
87     * <p>See:
88     * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>.
89     *
90     * @param s
91     *    the character string to check, cannot be <code>null</code>.
92     *
93     * @throws NullPointerException
94     *    if <code>s == null</code>.
95     *
96     * @return
97     *    <code>true</code> if the {@link String} matches the production, or
98     *    <code>false</code> otherwise.
99     */
isName(String s)100    public static final boolean isName(String s)
101    throws NullPointerException {
102       try {
103          checkName(s);
104          return true;
105       } catch (InvalidXMLException exception) {
106          return false;
107       }
108    }
109 
110    /**
111     * Checks if the specified string matches the <em>Name</em> production.
112     *
113     * <p>See:
114     * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>.
115     *
116     * @param s
117     *    the character string to check, cannot be <code>null</code>.
118     *
119     * @throws NullPointerException
120     *    if <code>s == null</code>.
121     *
122     * @throws InvalidXMLException
123     *    if the specified character string does not match the <em>Name</em>
124     *    production.
125     */
checkName(String s)126    public static final void checkName(String s)
127    throws NullPointerException, InvalidXMLException {
128       checkName(s.toCharArray(), 0, s.length());
129    }
130 
131    /**
132     * Checks if the specified part of a character array matches the
133     * <em>Name</em> production.
134     *
135     * <p>See:
136     * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>.
137     *
138     * @param ch
139     *    the character array that contains the characters to be checked,
140     *    cannot be <code>null</code>.
141     *
142     * @param start
143     *    the start index into <code>ch</code>, must be &gt;= 0.
144     *
145     * @param length
146     *    the number of characters to take from <code>ch</code>, starting at
147     *    the <code>start</code> index.
148     *
149     * @throws NullPointerException
150     *    if <code>ch == null</code>.
151     *
152     * @throws IndexOutOfBoundsException
153     *    if <code>start &lt; 0
154     *          || start + length &gt; ch.length</code>.
155     *
156     * @throws InvalidXMLException
157     *    if the specified character string does not match the <em>Name</em>
158     *    production.
159     */
checkName(char[] ch, int start, int length)160    public static final void checkName(char[] ch, int start, int length)
161    throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException {
162 
163       // Minimum length is 1
164       if (length < 1) {
165          throw new InvalidXMLException("An empty string does not match the 'Name' production.");
166       }
167 
168       // First char must match: (Letter | '_' | ':')
169       int i = start;
170       char c = ch[i];
171       if (c != '_' && c != ':' && !isLetter(c)) {
172          throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is invalid as a starting character in the 'Name' production.");
173       }
174 
175       // Loop through the array and check each character
176       for (i++; i < length; i++) {
177          c = ch[i];
178 
179          if (!isNameChar(c)) {
180             throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid for the 'Name' production.");
181          }
182       }
183    }
184 
185    /**
186     * Determines if the specified string matches the <em>SystemLiteral</em>
187     * production.
188     *
189     * <p>See:
190     * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>.
191     *
192     * @param s
193     *    the character string to check, cannot be <code>null</code>.
194     *
195     * @throws NullPointerException
196     *    if <code>s == null</code>.
197     *
198     * @return
199     *    <code>true</code> if the {@link String} matches the production, or
200     *    <code>false</code> otherwise.
201     */
isSystemLiteral(String s)202    public static final boolean isSystemLiteral(String s)
203    throws NullPointerException {
204       try {
205          checkSystemLiteral(s);
206          return true;
207       } catch (InvalidXMLException exception) {
208          return false;
209       }
210    }
211 
212    /**
213     * Checks if the specified string matches the <em>SystemLiteral</em>
214     * production.
215     *
216     * <p>See:
217     * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>.
218     *
219     * @param s
220     *    the character string to check, cannot be <code>null</code>.
221     *
222     * @throws NullPointerException
223     *    if <code>s == null</code>.
224     *
225     * @throws InvalidXMLException
226     *    if the specified character string does not match the
227     *    <em>PubidLiteral</em> production.
228     */
checkSystemLiteral(String s)229    public static final void checkSystemLiteral(String s)
230    throws NullPointerException, InvalidXMLException {
231       checkSystemLiteral(s.toCharArray(), 0, s.length());
232    }
233 
234    /**
235     * Checks if the specified part of a character array matches the
236     * <em>SystemLiteral</em> production.
237     *
238     * <p>See:
239     * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>.
240     *
241     * @param ch
242     *    the character array that contains the characters to be checked,
243     *    cannot be <code>null</code>.
244     *
245     * @param start
246     *    the start index into <code>ch</code>, must be &gt;= 0.
247     *
248     * @param length
249     *    the number of characters to take from <code>ch</code>, starting at
250     *    the <code>start</code> index.
251     *
252     * @throws NullPointerException
253     *    if <code>ch == null</code>.
254     *
255     * @throws IndexOutOfBoundsException
256     *    if <code>start &lt; 0
257     *          || start + length &gt; ch.length</code>.
258     *
259     * @throws InvalidXMLException
260     *    if the specified character string does not match the
261     *    <em>SystemLiteral</em> production.
262     */
checkSystemLiteral(char[] ch, int start, int length)263    public static final void checkSystemLiteral(char[] ch,
264                                                int    start,
265                                                int    length)
266    throws NullPointerException,
267           IndexOutOfBoundsException,
268           InvalidXMLException {
269 
270       // Minimum length is 3
271       if (length < 3) {
272          throw new InvalidXMLException("Minimum length for the 'SystemLiteral' production is 3 characters.");
273       }
274 
275       int  lastIndex = start + length - 1;
276       char firstChar = ch[0];
277       char lastChar  = ch[lastIndex];
278 
279       // First and last char: single qoute (apostrophe)
280       String otherAllowedChars;
281       if (firstChar == '\'') {
282          if (lastChar != '\'') {
283             throw new InvalidXMLException("First character is '\\'', but the "
284                                      + "last character is 0x"
285                                      + Integer.toHexString((int) lastChar)
286                                      + '.');
287          }
288          otherAllowedChars = "-()+,./:=?;!*#@$_%";
289 
290       // First and last char: double qoute character
291       } else if (firstChar == '"') {
292          if (lastChar != '"') {
293             throw new InvalidXMLException("First character is '\"', but the "
294                                      + "last character is 0x"
295                                      + Integer.toHexString((int) lastChar)
296                                      + '.');
297          }
298          otherAllowedChars = "-'()+,./:=?;!*#@$_%";
299 
300       // First character is invalid
301       } else {
302          throw new InvalidXMLException("First char must either be '\\'' or "
303                                      + "'\"' instead of 0x"
304                                      + Integer.toHexString((int) firstChar)
305                                      + '.');
306       }
307 
308 
309       // Check each character
310       for (int i = 1; i < (length - 1); i++) {
311          char c = ch[i];
312 
313          if (c == firstChar) {
314             if (firstChar == '\'') {
315                throw new InvalidXMLException("Found '\\'' at position " + i + '.');
316             } else {
317                throw new InvalidXMLException("Found '\"' at position " + i + '.');
318             }
319          }
320       }
321    }
322 
323    /**
324     * Determines if the specified string matches the <em>PubidLiteral</em>
325     * production.
326     *
327     * <p>See:
328     * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>.
329     *
330     * @param s
331     *    the character string to check, cannot be <code>null</code>.
332     *
333     * @throws NullPointerException
334     *    if <code>s == null</code>.
335     *
336     * @return
337     *    <code>true</code> if the {@link String} matches the production, or
338     *    <code>false</code> otherwise.
339     */
isPubidLiteral(String s)340    public static final boolean isPubidLiteral(String s)
341    throws NullPointerException {
342       try {
343          checkPubidLiteral(s);
344          return true;
345       } catch (InvalidXMLException exception) {
346          return false;
347       }
348    }
349 
350    /**
351     * Checks if the specified string matches the <em>PubidLiteral</em>
352     * production.
353     *
354     * <p>See:
355     * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>.
356     *
357     * @param s
358     *    the character string to check, cannot be <code>null</code>.
359     *
360     * @throws NullPointerException
361     *    if <code>s == null</code>.
362     *
363     * @throws InvalidXMLException
364     *    if the specified character string does not match the
365     *    <em>PubidLiteral</em> production.
366     */
checkPubidLiteral(String s)367    public static final void checkPubidLiteral(String s)
368    throws NullPointerException, InvalidXMLException {
369       checkPubidLiteral(s.toCharArray(), 0, s.length());
370    }
371 
372    /**
373     * Checks if the specified part of a character array matches the
374     * <em>PubidLiteral</em> production.
375     *
376     * <p>See:
377     * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>.
378     *
379     * @param ch
380     *    the character array that contains the characters to be checked,
381     *    cannot be <code>null</code>.
382     *
383     * @param start
384     *    the start index into <code>ch</code>, must be &gt;= 0.
385     *
386     * @param length
387     *    the number of characters to take from <code>ch</code>, starting at
388     *    the <code>start</code> index.
389     *
390     * @throws NullPointerException
391     *    if <code>ch == null</code>.
392     *
393     * @throws IndexOutOfBoundsException
394     *    if <code>start &lt; 0
395     *          || start + length &gt; ch.length</code>.
396     *
397     * @throws InvalidXMLException
398     *    if the specified character string does not match the
399     *    <em>PubidLiteral</em> production.
400     */
checkPubidLiteral(char[] ch, int start, int length)401    public static final void checkPubidLiteral(char[] ch,
402                                               int    start,
403                                               int    length)
404    throws NullPointerException,
405           IndexOutOfBoundsException,
406           InvalidXMLException {
407 
408       // Minimum length is 3
409       if (length < 3) {
410          throw new InvalidXMLException("Minimum length for the 'PubidLiteral' production is 3 characters.");
411       }
412 
413       int  lastIndex = start + length - 1;
414       char firstChar = ch[0];
415       char lastChar  = ch[lastIndex];
416 
417       // First and last char: single qoute (apostrophe)
418       String otherAllowedChars;
419       if (firstChar == '\'') {
420          if (lastChar != '\'') {
421             throw new InvalidXMLException("First character is '\\'', but the "
422                                      + "last character is 0x"
423                                      + Integer.toHexString((int) lastChar)
424                                      + '.');
425          }
426          otherAllowedChars = "-()+,./:=?;!*#@$_%";
427 
428       // First and last char: double qoute character
429       } else if (firstChar == '"') {
430          if (lastChar != '"') {
431             throw new InvalidXMLException("First character is '\"', but the "
432                                      + "last character is 0x"
433                                      + Integer.toHexString((int) lastChar)
434                                      + '.');
435          }
436          otherAllowedChars = "-'()+,./:=?;!*#@$_%";
437 
438       // First character is invalid
439       } else {
440          throw new InvalidXMLException("First char must either be '\\'' or "
441                                      + "'\"' instead of 0x"
442                                      + Integer.toHexString((int) firstChar)
443                                      + '.');
444       }
445 
446 
447       // Check each character
448       for (int i = 1; i < (length - 1); i++) {
449          char c = ch[i];
450 
451          if (c != 0x20 && c != 0x0D && c != 0x0A && !isLetter(c) && !isDigit(c)
452              && otherAllowedChars.indexOf(c) < 0) {
453             // TODO: Quote character properly, even if it is an apostrophe
454             throw new InvalidXMLException("The character '"
455                                         + c
456                                         + "' (0x"
457                                         + Integer.toHexString((int) c)
458                                         + ") is not valid for the "
459                                         + "'PubidLiteral' production.");
460          }
461       }
462    }
463 
464    /**
465     * Determines if the specified character matches the <em>NameChar</em>
466     * production.
467     *
468     * <p>See:
469     * <a href="http://www.w3.org/TR/REC-xml#NT-NameChar">Definition of NameChar</a>.
470     *
471     * @param c
472     *    the character to check.
473     *
474     * @return
475     *    <code>true</code> if the character matches the production, or
476     *    <code>false</code> if it does not.
477     */
isNameChar(char c)478    private static final boolean isNameChar(char c) {
479       return c == '.'
480           || c == '-'
481           || c == '_'
482           || c == ':'
483           || isDigit(c)
484           || isLetter(c)
485           || isCombiningChar(c)
486           || isExtender(c);
487    }
488 
489    /**
490     * Determines if the specified character matches the <em>Letter</em>
491     * production.
492     *
493     * <p>See:
494     * <a href="http://www.w3.org/TR/REC-xml#NT-Letter">Definition of Letter</a>.
495     *
496     * @param c
497     *    the character to check.
498     *
499     * @return
500     *    <code>true</code> if the character matches the production, or
501     *    <code>false</code> if it does not.
502     */
isLetter(char c)503    private static final boolean isLetter(char c) {
504       return isBaseChar(c) || isIdeographic(c);
505    }
506 
507    /**
508     * Determines if the specified character matches the <em>BaseChar</em>
509     * production.
510     *
511     * <p>See:
512     * <a href="http://www.w3.org/TR/REC-xml#NT-BaseChar">Definition of BaseChar</a>.
513     *
514     * @param c
515     *    the character to check.
516     *
517     * @return
518     *    <code>true</code> if the character matches the production, or
519     *    <code>false</code> if it does not.
520     */
isBaseChar(char c)521    private static final boolean isBaseChar(char c) {
522       int n = (int) c;
523       return (n >= 0x0041 && n <= 0x005A)
524           || (n >= 0x0061 && n <= 0x007A)
525           || (n >= 0x00C0 && n <= 0x00D6)
526           || (n >= 0x00D8 && n <= 0x00F6)
527           || (n >= 0x00F8 && n <= 0x00FF)
528           || (n >= 0x0100 && n <= 0x0131)
529           || (n >= 0x0134 && n <= 0x013E)
530           || (n >= 0x0141 && n <= 0x0148)
531           || (n >= 0x014A && n <= 0x017E)
532           || (n >= 0x0180 && n <= 0x01C3)
533           || (n >= 0x01CD && n <= 0x01F0)
534           || (n >= 0x01F4 && n <= 0x01F5)
535           || (n >= 0x01FA && n <= 0x0217)
536           || (n >= 0x0250 && n <= 0x02A8)
537           || (n >= 0x02BB && n <= 0x02C1)
538           || (n == 0x0386)
539           || (n >= 0x0388 && n <= 0x038A)
540           || (n == 0x038C)
541           || (n >= 0x038E && n <= 0x03A1)
542           || (n >= 0x03A3 && n <= 0x03CE)
543           || (n >= 0x03D0 && n <= 0x03D6)
544           || (n == 0x03DA)
545           || (n == 0x03DC)
546           || (n == 0x03DE)
547           || (n == 0x03E0)
548           || (n >= 0x03E2 && n <= 0x03F3)
549           || (n >= 0x0401 && n <= 0x040C)
550           || (n >= 0x040E && n <= 0x044F)
551           || (n >= 0x0451 && n <= 0x045C)
552           || (n >= 0x045E && n <= 0x0481)
553           || (n >= 0x0490 && n <= 0x04C4)
554           || (n >= 0x04C7 && n <= 0x04C8)
555           || (n >= 0x04CB && n <= 0x04CC)
556           || (n >= 0x04D0 && n <= 0x04EB)
557           || (n >= 0x04EE && n <= 0x04F5)
558           || (n >= 0x04F8 && n <= 0x04F9)
559           || (n >= 0x0531 && n <= 0x0556)
560           || (n == 0x0559)
561           || (n >= 0x0561 && n <= 0x0586)
562           || (n >= 0x05D0 && n <= 0x05EA)
563           || (n >= 0x05F0 && n <= 0x05F2)
564           || (n >= 0x0621 && n <= 0x063A)
565           || (n >= 0x0641 && n <= 0x064A)
566           || (n >= 0x0671 && n <= 0x06B7)
567           || (n >= 0x06BA && n <= 0x06BE)
568           || (n >= 0x06C0 && n <= 0x06CE)
569           || (n >= 0x06D0 && n <= 0x06D3)
570           || (n == 0x06D5)
571           || (n >= 0x06E5 && n <= 0x06E6)
572           || (n >= 0x0905 && n <= 0x0939)
573           || (n == 0x093D)
574           || (n >= 0x0958 && n <= 0x0961)
575           || (n >= 0x0985 && n <= 0x098C)
576           || (n >= 0x098F && n <= 0x0990)
577           || (n >= 0x0993 && n <= 0x09A8)
578           || (n >= 0x09AA && n <= 0x09B0)
579           || (n == 0x09B2)
580           || (n >= 0x09B6 && n <= 0x09B9)
581           || (n >= 0x09DC && n <= 0x09DD)
582           || (n >= 0x09DF && n <= 0x09E1)
583           || (n >= 0x09F0 && n <= 0x09F1)
584           || (n >= 0x0A05 && n <= 0x0A0A)
585           || (n >= 0x0A0F && n <= 0x0A10)
586           || (n >= 0x0A13 && n <= 0x0A28)
587           || (n >= 0x0A2A && n <= 0x0A30)
588           || (n >= 0x0A32 && n <= 0x0A33)
589           || (n >= 0x0A35 && n <= 0x0A36)
590           || (n >= 0x0A38 && n <= 0x0A39)
591           || (n >= 0x0A59 && n <= 0x0A5C)
592           || (n == 0x0A5E)
593           || (n >= 0x0A72 && n <= 0x0A74)
594           || (n >= 0x0A85 && n <= 0x0A8B)
595           || (n == 0x0A8D)
596           || (n >= 0x0A8F && n <= 0x0A91)
597           || (n >= 0x0A93 && n <= 0x0AA8)
598           || (n >= 0x0AAA && n <= 0x0AB0)
599           || (n >= 0x0AB2 && n <= 0x0AB3)
600           || (n >= 0x0AB5 && n <= 0x0AB9)
601           || (n == 0x0ABD)
602           || (n == 0x0AE0)
603           || (n >= 0x0B05 && n <= 0x0B0C)
604           || (n >= 0x0B0F && n <= 0x0B10)
605           || (n >= 0x0B13 && n <= 0x0B28)
606           || (n >= 0x0B2A && n <= 0x0B30)
607           || (n >= 0x0B32 && n <= 0x0B33)
608           || (n >= 0x0B36 && n <= 0x0B39)
609           || (n == 0x0B3D)
610           || (n >= 0x0B5C && n <= 0x0B5D)
611           || (n >= 0x0B5F && n <= 0x0B61)
612           || (n >= 0x0B85 && n <= 0x0B8A)
613           || (n >= 0x0B8E && n <= 0x0B90)
614           || (n >= 0x0B92 && n <= 0x0B95)
615           || (n >= 0x0B99 && n <= 0x0B9A)
616           || (n == 0x0B9C)
617           || (n >= 0x0B9E && n <= 0x0B9F)
618           || (n >= 0x0BA3 && n <= 0x0BA4)
619           || (n >= 0x0BA8 && n <= 0x0BAA)
620           || (n >= 0x0BAE && n <= 0x0BB5)
621           || (n >= 0x0BB7 && n <= 0x0BB9)
622           || (n >= 0x0C05 && n <= 0x0C0C)
623           || (n >= 0x0C0E && n <= 0x0C10)
624           || (n >= 0x0C12 && n <= 0x0C28)
625           || (n >= 0x0C2A && n <= 0x0C33)
626           || (n >= 0x0C35 && n <= 0x0C39)
627           || (n >= 0x0C60 && n <= 0x0C61)
628           || (n >= 0x0C85 && n <= 0x0C8C)
629           || (n >= 0x0C8E && n <= 0x0C90)
630           || (n >= 0x0C92 && n <= 0x0CA8)
631           || (n >= 0x0CAA && n <= 0x0CB3)
632           || (n >= 0x0CB5 && n <= 0x0CB9)
633           || (n == 0x0CDE)
634           || (n >= 0x0CE0 && n <= 0x0CE1)
635           || (n >= 0x0D05 && n <= 0x0D0C)
636           || (n >= 0x0D0E && n <= 0x0D10)
637           || (n >= 0x0D12 && n <= 0x0D28)
638           || (n >= 0x0D2A && n <= 0x0D39)
639           || (n >= 0x0D60 && n <= 0x0D61)
640           || (n >= 0x0E01 && n <= 0x0E2E)
641           || (n == 0x0E30)
642           || (n >= 0x0E32 && n <= 0x0E33)
643           || (n >= 0x0E40 && n <= 0x0E45)
644           || (n >= 0x0E81 && n <= 0x0E82)
645           || (n == 0x0E84)
646           || (n >= 0x0E87 && n <= 0x0E88)
647           || (n == 0x0E8A)
648           || (n == 0x0E8D)
649           || (n >= 0x0E94 && n <= 0x0E97)
650           || (n >= 0x0E99 && n <= 0x0E9F)
651           || (n >= 0x0EA1 && n <= 0x0EA3)
652           || (n == 0x0EA5)
653           || (n == 0x0EA7)
654           || (n >= 0x0EAA && n <= 0x0EAB)
655           || (n >= 0x0EAD && n <= 0x0EAE)
656           || (n == 0x0EB0)
657           || (n >= 0x0EB2 && n <= 0x0EB3)
658           || (n == 0x0EBD)
659           || (n >= 0x0EC0 && n <= 0x0EC4)
660           || (n >= 0x0F40 && n <= 0x0F47)
661           || (n >= 0x0F49 && n <= 0x0F69)
662           || (n >= 0x10A0 && n <= 0x10C5)
663           || (n >= 0x10D0 && n <= 0x10F6)
664           || (n == 0x1100)
665           || (n >= 0x1102 && n <= 0x1103)
666           || (n >= 0x1105 && n <= 0x1107)
667           || (n == 0x1109)
668           || (n >= 0x110B && n <= 0x110C)
669           || (n >= 0x110E && n <= 0x1112)
670           || (n == 0x113C)
671           || (n == 0x113E)
672           || (n == 0x1140)
673           || (n == 0x114C)
674           || (n == 0x114E)
675           || (n == 0x1150)
676           || (n >= 0x1154 && n <= 0x1155)
677           || (n == 0x1159)
678           || (n >= 0x115F && n <= 0x1161)
679           || (n == 0x1163)
680           || (n == 0x1165)
681           || (n == 0x1167)
682           || (n == 0x1169)
683           || (n >= 0x116D && n <= 0x116E)
684           || (n >= 0x1172 && n <= 0x1173)
685           || (n == 0x1175)
686           || (n == 0x119E)
687           || (n == 0x11A8)
688           || (n == 0x11AB)
689           || (n >= 0x11AE && n <= 0x11AF)
690           || (n >= 0x11B7 && n <= 0x11B8)
691           || (n == 0x11BA)
692           || (n >= 0x11BC && n <= 0x11C2)
693           || (n == 0x11EB)
694           || (n == 0x11F0)
695           || (n == 0x11F9)
696           || (n >= 0x1E00 && n <= 0x1E9B)
697           || (n >= 0x1EA0 && n <= 0x1EF9)
698           || (n >= 0x1F00 && n <= 0x1F15)
699           || (n >= 0x1F18 && n <= 0x1F1D)
700           || (n >= 0x1F20 && n <= 0x1F45)
701           || (n >= 0x1F48 && n <= 0x1F4D)
702           || (n >= 0x1F50 && n <= 0x1F57)
703           || (n == 0x1F59)
704           || (n == 0x1F5B)
705           || (n == 0x1F5D)
706           || (n >= 0x1F5F && n <= 0x1F7D)
707           || (n >= 0x1F80 && n <= 0x1FB4)
708           || (n >= 0x1FB6 && n <= 0x1FBC)
709           || (n == 0x1FBE)
710           || (n >= 0x1FC2 && n <= 0x1FC4)
711           || (n >= 0x1FC6 && n <= 0x1FCC)
712           || (n >= 0x1FD0 && n <= 0x1FD3)
713           || (n >= 0x1FD6 && n <= 0x1FDB)
714           || (n >= 0x1FE0 && n <= 0x1FEC)
715           || (n >= 0x1FF2 && n <= 0x1FF4)
716           || (n >= 0x1FF6 && n <= 0x1FFC)
717           || (n == 0x2126)
718           || (n >= 0x212A && n <= 0x212B)
719           || (n == 0x212E)
720           || (n >= 0x2180 && n <= 0x2182)
721           || (n >= 0x3041 && n <= 0x3094)
722           || (n >= 0x30A1 && n <= 0x30FA)
723           || (n >= 0x3105 && n <= 0x312C)
724           || (n >= 0xAC00 && n <= 0xD7A3);
725    }
726 
727    /**
728     * Determines if the specified character matches the <em>Ideographic</em>
729     * production.
730     *
731     * <p>See:
732     * <a href="http://www.w3.org/TR/REC-xml#NT-Ideographic">Definition of Ideographic</a>.
733     *
734     * @param c
735     *    the character to check.
736     *
737     * @return
738     *    <code>true</code> if the character matches the production, or
739     *    <code>false</code> if it does not.
740     */
isIdeographic(char c)741    private static final boolean isIdeographic(char c) {
742       int n = (int) c;
743       return (n >= 0x4E00 && n <= 0x9FA5)
744           || (n == 0x3007)
745           || (n >= 0x3021 && n <= 0x3029);
746    }
747 
748    /**
749     * Determines if the specified character matches the <em>CombiningChar</em>
750     * production.
751     *
752     * <p>See:
753     * <a href="http://www.w3.org/TR/REC-xml#NT-CombiningChar">Definition of CombiningChar</a>.
754     *
755     * @param c
756     *    the character to check.
757     *
758     * @return
759     *    <code>true</code> if the character matches the production, or
760     *    <code>false</code> if it does not.
761     */
isCombiningChar(char c)762    private static final boolean isCombiningChar(char c) {
763       int n = (int) c;
764       return (n >= 0x0300 && n <= 0x0345)
765           || (n >= 0x0360 && n <= 0x0361)
766           || (n >= 0x0483 && n <= 0x0486)
767           || (n >= 0x0591 && n <= 0x05A1)
768           || (n >= 0x05A3 && n <= 0x05B9)
769           || (n >= 0x05BB && n <= 0x05BD)
770           || (n == 0x05BF)
771           || (n >= 0x05C1 && n <= 0x05C2)
772           || (n == 0x05C4)
773           || (n >= 0x064B && n <= 0x0652)
774           || (n == 0x0670)
775           || (n >= 0x06D6 && n <= 0x06DC)
776           || (n >= 0x06DD && n <= 0x06DF)
777           || (n >= 0x06E0 && n <= 0x06E4)
778           || (n >= 0x06E7 && n <= 0x06E8)
779           || (n >= 0x06EA && n <= 0x06ED)
780           || (n >= 0x0901 && n <= 0x0903)
781           || (n == 0x093C)
782           || (n >= 0x093E && n <= 0x094C)
783           || (n == 0x094D)
784           || (n >= 0x0951 && n <= 0x0954)
785           || (n >= 0x0962 && n <= 0x0963)
786           || (n >= 0x0981 && n <= 0x0983)
787           || (n == 0x09BC)
788           || (n == 0x09BE)
789           || (n == 0x09BF)
790           || (n >= 0x09C0 && n <= 0x09C4)
791           || (n >= 0x09C7 && n <= 0x09C8)
792           || (n >= 0x09CB && n <= 0x09CD)
793           || (n == 0x09D7)
794           || (n >= 0x09E2 && n <= 0x09E3)
795           || (n == 0x0A02)
796           || (n == 0x0A3C)
797           || (n == 0x0A3E)
798           || (n == 0x0A3F)
799           || (n >= 0x0A40 && n <= 0x0A42)
800           || (n >= 0x0A47 && n <= 0x0A48)
801           || (n >= 0x0A4B && n <= 0x0A4D)
802           || (n >= 0x0A70 && n <= 0x0A71)
803           || (n >= 0x0A81 && n <= 0x0A83)
804           || (n == 0x0ABC)
805           || (n >= 0x0ABE && n <= 0x0AC5)
806           || (n >= 0x0AC7 && n <= 0x0AC9)
807           || (n >= 0x0ACB && n <= 0x0ACD)
808           || (n >= 0x0B01 && n <= 0x0B03)
809           || (n == 0x0B3C)
810           || (n >= 0x0B3E && n <= 0x0B43)
811           || (n >= 0x0B47 && n <= 0x0B48)
812           || (n >= 0x0B4B && n <= 0x0B4D)
813           || (n >= 0x0B56 && n <= 0x0B57)
814           || (n >= 0x0B82 && n <= 0x0B83)
815           || (n >= 0x0BBE && n <= 0x0BC2)
816           || (n >= 0x0BC6 && n <= 0x0BC8)
817           || (n >= 0x0BCA && n <= 0x0BCD)
818           || (n == 0x0BD7)
819           || (n >= 0x0C01 && n <= 0x0C03)
820           || (n >= 0x0C3E && n <= 0x0C44)
821           || (n >= 0x0C46 && n <= 0x0C48)
822           || (n >= 0x0C4A && n <= 0x0C4D)
823           || (n >= 0x0C55 && n <= 0x0C56)
824           || (n >= 0x0C82 && n <= 0x0C83)
825           || (n >= 0x0CBE && n <= 0x0CC4)
826           || (n >= 0x0CC6 && n <= 0x0CC8)
827           || (n >= 0x0CCA && n <= 0x0CCD)
828           || (n >= 0x0CD5 && n <= 0x0CD6)
829           || (n >= 0x0D02 && n <= 0x0D03)
830           || (n >= 0x0D3E && n <= 0x0D43)
831           || (n >= 0x0D46 && n <= 0x0D48)
832           || (n >= 0x0D4A && n <= 0x0D4D)
833           || (n == 0x0D57)
834           || (n == 0x0E31)
835           || (n >= 0x0E34 && n <= 0x0E3A)
836           || (n >= 0x0E47 && n <= 0x0E4E)
837           || (n == 0x0EB1)
838           || (n >= 0x0EB4 && n <= 0x0EB9)
839           || (n >= 0x0EBB && n <= 0x0EBC)
840           || (n >= 0x0EC8 && n <= 0x0ECD)
841           || (n >= 0x0F18 && n <= 0x0F19)
842           || (n == 0x0F35)
843           || (n == 0x0F37)
844           || (n == 0x0F39)
845           || (n == 0x0F3E)
846           || (n == 0x0F3F)
847           || (n >= 0x0F71 && n <= 0x0F84)
848           || (n >= 0x0F86 && n <= 0x0F8B)
849           || (n >= 0x0F90 && n <= 0x0F95)
850           || (n == 0x0F97)
851           || (n >= 0x0F99 && n <= 0x0FAD)
852           || (n >= 0x0FB1 && n <= 0x0FB7)
853           || (n == 0x0FB9)
854           || (n >= 0x20D0 && n <= 0x20DC)
855           || (n == 0x20E1)
856           || (n >= 0x302A && n <= 0x302F)
857           || (n == 0x3099)
858           || (n == 0x309A);
859    }
860 
861    /**
862     * Determines if the specified character matches the <em>Digit</em>
863     * production.
864     *
865     * <p>See:
866     * <a href="http://www.w3.org/TR/REC-xml#NT-Digit">Definition of Digit</a>.
867     *
868     * @param c
869     *    the character to check.
870     *
871     * @return
872     *    <code>true</code> if the character matches the production, or
873     *    <code>false</code> if it does not.
874     */
isDigit(char c)875    private static final boolean isDigit(char c) {
876       int n = (int) c;
877       return (n >= 0x0030 && n <= 0x0039)
878           || (n >= 0x0660 && n <= 0x0669)
879           || (n >= 0x06F0 && n <= 0x06F9)
880           || (n >= 0x0966 && n <= 0x096F)
881           || (n >= 0x09E6 && n <= 0x09EF)
882           || (n >= 0x0A66 && n <= 0x0A6F)
883           || (n >= 0x0AE6 && n <= 0x0AEF)
884           || (n >= 0x0B66 && n <= 0x0B6F)
885           || (n >= 0x0BE7 && n <= 0x0BEF)
886           || (n >= 0x0C66 && n <= 0x0C6F)
887           || (n >= 0x0CE6 && n <= 0x0CEF)
888           || (n >= 0x0D66 && n <= 0x0D6F)
889           || (n >= 0x0E50 && n <= 0x0E59)
890           || (n >= 0x0ED0 && n <= 0x0ED9)
891           || (n >= 0x0F20 && n <= 0x0F29);
892    }
893 
894    /**
895     * Determines if the specified character matches the <em>Extender</em>
896     * production.
897     *
898     * <p>See:
899     * <a href="http://www.w3.org/TR/REC-xml#NT-Extender">Definition of Extender</a>.
900     *
901     * @param c
902     *    the character to check.
903     *
904     * @return
905     *    <code>true</code> if the character matches the production, or
906     *    <code>false</code> if it does not.
907     */
isExtender(char c)908    private static final boolean isExtender(char c) {
909       int n = (int) c;
910       return (n == 0x00B7)
911           || (n == 0x02D0)
912           || (n == 0x02D1)
913           || (n == 0x0387)
914           || (n == 0x0640)
915           || (n == 0x0E46)
916           || (n == 0x0EC6)
917           || (n == 0x3005)
918           || (n >= 0x3031 && n <= 0x3035)
919           || (n >= 0x309D && n <= 0x309E)
920           || (n >= 0x30FC && n <= 0x30FE);
921    }
922 
923 
924    //-------------------------------------------------------------------------
925    // Class fields
926    //-------------------------------------------------------------------------
927 
928    //-------------------------------------------------------------------------
929    // Constructor
930    //-------------------------------------------------------------------------
931 
932    /**
933     * Constructs a new <code>XMLChecker</code> object. This constructor is
934     * private since no objects of this class should be created.
935     */
XMLChecker()936    private XMLChecker() {
937       // empty
938    }
939 
940 
941    //-------------------------------------------------------------------------
942    // Fields
943    //-------------------------------------------------------------------------
944 
945    //-------------------------------------------------------------------------
946    // Methods
947    //-------------------------------------------------------------------------
948 }
949