1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 1999-2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.utils; 22 23 import java.util.Arrays; 24 25 26 /** 27 * THIS IS A COPY OF THE XERCES-2J CLASS com.sun.org.apache.xerces.internal.utls.XMLChar 28 * 29 * This class defines the basic properties of characters in XML 1.1. The data 30 * in this class can be used to verify that a character is a valid 31 * XML 1.1 character or if the character is a space, name start, or name 32 * character. 33 * <p> 34 * A series of convenience methods are supplied to ease the burden 35 * of the developer. Using the character as an index into the <code>XML11CHARS</code> 36 * array and applying the appropriate mask flag (e.g. 37 * <code>MASK_VALID</code>), yields the same results as calling the 38 * convenience methods. There is one exception: check the comments 39 * for the <code>isValid</code> method for details. 40 * 41 */ 42 public class XML11Char { 43 44 // 45 // Constants 46 // 47 48 /** Character flags for XML 1.1. */ 49 private static final byte XML11CHARS [] = new byte [1 << 16]; 50 51 /** XML 1.1 Valid character mask. */ 52 public static final int MASK_XML11_VALID = 0x01; 53 54 /** XML 1.1 Space character mask. */ 55 public static final int MASK_XML11_SPACE = 0x02; 56 57 /** XML 1.1 Name start character mask. */ 58 public static final int MASK_XML11_NAME_START = 0x04; 59 60 /** XML 1.1 Name character mask. */ 61 public static final int MASK_XML11_NAME = 0x08; 62 63 /** XML 1.1 control character mask */ 64 public static final int MASK_XML11_CONTROL = 0x10; 65 66 /** XML 1.1 content for external entities (valid - "special" chars - control chars) */ 67 public static final int MASK_XML11_CONTENT = 0x20; 68 69 /** XML namespaces 1.1 NCNameStart */ 70 public static final int MASK_XML11_NCNAME_START = 0x40; 71 72 /** XML namespaces 1.1 NCName */ 73 public static final int MASK_XML11_NCNAME = 0x80; 74 75 /** XML 1.1 content for internal entities (valid - "special" chars) */ 76 public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT; 77 78 // 79 // Static initialization 80 // 81 82 static { 83 84 // Initializing the Character Flag Array 85 // Code generated by: XML11CharGenerator. 86 Arrays.fill(XML11CHARS, 1, 9, (byte) 17 )87 Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17 88 XML11CHARS[9] = 35; 89 XML11CHARS[10] = 3; Arrays.fill(XML11CHARS, 11, 13, (byte) 17 )90 Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17 91 XML11CHARS[13] = 3; Arrays.fill(XML11CHARS, 14, 32, (byte) 17 )92 Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17 93 XML11CHARS[32] = 35; Arrays.fill(XML11CHARS, 33, 38, (byte) 33 )94 Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33 95 XML11CHARS[38] = 1; Arrays.fill(XML11CHARS, 39, 45, (byte) 33 )96 Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33 Arrays.fill(XML11CHARS, 45, 47, (byte) -87 )97 Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87 98 XML11CHARS[47] = 33; Arrays.fill(XML11CHARS, 48, 58, (byte) -87 )99 Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87 100 XML11CHARS[58] = 45; 101 XML11CHARS[59] = 33; 102 XML11CHARS[60] = 1; Arrays.fill(XML11CHARS, 61, 65, (byte) 33 )103 Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33 Arrays.fill(XML11CHARS, 65, 91, (byte) -19 )104 Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19 Arrays.fill(XML11CHARS, 91, 93, (byte) 33 )105 Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33 106 XML11CHARS[93] = 1; 107 XML11CHARS[94] = 33; 108 XML11CHARS[95] = -19; 109 XML11CHARS[96] = 33; Arrays.fill(XML11CHARS, 97, 123, (byte) -19 )110 Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19 Arrays.fill(XML11CHARS, 123, 127, (byte) 33 )111 Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33 Arrays.fill(XML11CHARS, 127, 133, (byte) 17 )112 Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17 113 XML11CHARS[133] = 35; Arrays.fill(XML11CHARS, 134, 160, (byte) 17 )114 Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17 Arrays.fill(XML11CHARS, 160, 183, (byte) 33 )115 Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33 116 XML11CHARS[183] = -87; Arrays.fill(XML11CHARS, 184, 192, (byte) 33 )117 Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33 Arrays.fill(XML11CHARS, 192, 215, (byte) -19 )118 Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19 119 XML11CHARS[215] = 33; Arrays.fill(XML11CHARS, 216, 247, (byte) -19 )120 Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19 121 XML11CHARS[247] = 33; Arrays.fill(XML11CHARS, 248, 768, (byte) -19 )122 Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19 Arrays.fill(XML11CHARS, 768, 880, (byte) -87 )123 Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87 Arrays.fill(XML11CHARS, 880, 894, (byte) -19 )124 Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19 125 XML11CHARS[894] = 33; Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 )126 Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19 Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 )127 Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33 Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 )128 Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19 Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 )129 Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33 130 XML11CHARS[8232] = 35; Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 )131 Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33 Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 )132 Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87 Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 )133 Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33 Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 )134 Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19 Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 )135 Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33 Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 )136 Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19 Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 )137 Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33 Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 )138 Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19 Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 )139 Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33 Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 )140 Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19 Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 )141 Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33 Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 )142 Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19 143 144 } // <clinit>() 145 146 // 147 // Public static methods 148 // 149 150 /** 151 * Returns true if the specified character is a space character 152 * as amdended in the XML 1.1 specification. 153 * 154 * @param c The character to check. 155 */ isXML11Space(int c)156 public static boolean isXML11Space(int c) { 157 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0); 158 } // isXML11Space(int):boolean 159 160 /** 161 * Returns true if the specified character is valid. This method 162 * also checks the surrogate character range from 0x10000 to 0x10FFFF. 163 * <p> 164 * If the program chooses to apply the mask directly to the 165 * <code>XML11CHARS</code> array, then they are responsible for checking 166 * the surrogate character range. 167 * 168 * @param c The character to check. 169 */ isXML11Valid(int c)170 public static boolean isXML11Valid(int c) { 171 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0) 172 || (0x10000 <= c && c <= 0x10FFFF); 173 } // isXML11Valid(int):boolean 174 175 /** 176 * Returns true if the specified character is invalid. 177 * 178 * @param c The character to check. 179 */ isXML11Invalid(int c)180 public static boolean isXML11Invalid(int c) { 181 return !isXML11Valid(c); 182 } // isXML11Invalid(int):boolean 183 184 /** 185 * Returns true if the specified character is valid and permitted outside 186 * of a character reference. 187 * That is, this method will return false for the same set as 188 * isXML11Valid, except it also reports false for "control characters". 189 * 190 * @param c The character to check. 191 */ isXML11ValidLiteral(int c)192 public static boolean isXML11ValidLiteral(int c) { 193 return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0)) 194 || (0x10000 <= c && c <= 0x10FFFF)); 195 } // isXML11ValidLiteral(int):boolean 196 197 /** 198 * Returns true if the specified character can be considered 199 * content in an external parsed entity. 200 * 201 * @param c The character to check. 202 */ isXML11Content(int c)203 public static boolean isXML11Content(int c) { 204 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) || 205 (0x10000 <= c && c <= 0x10FFFF); 206 } // isXML11Content(int):boolean 207 208 /** 209 * Returns true if the specified character can be considered 210 * content in an internal parsed entity. 211 * 212 * @param c The character to check. 213 */ isXML11InternalEntityContent(int c)214 public static boolean isXML11InternalEntityContent(int c) { 215 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) || 216 (0x10000 <= c && c <= 0x10FFFF); 217 } // isXML11InternalEntityContent(int):boolean 218 219 /** 220 * Returns true if the specified character is a valid name start 221 * character as defined by production [4] in the XML 1.1 222 * specification. 223 * 224 * @param c The character to check. 225 */ isXML11NameStart(int c)226 public static boolean isXML11NameStart(int c) { 227 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0) 228 || (0x10000 <= c && c < 0xF0000); 229 } // isXML11NameStart(int):boolean 230 231 /** 232 * Returns true if the specified character is a valid name 233 * character as defined by production [4a] in the XML 1.1 234 * specification. 235 * 236 * @param c The character to check. 237 */ isXML11Name(int c)238 public static boolean isXML11Name(int c) { 239 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0) 240 || (c >= 0x10000 && c < 0xF0000); 241 } // isXML11Name(int):boolean 242 243 /** 244 * Returns true if the specified character is a valid NCName start 245 * character as defined by production [4] in Namespaces in XML 246 * 1.1 recommendation. 247 * 248 * @param c The character to check. 249 */ isXML11NCNameStart(int c)250 public static boolean isXML11NCNameStart(int c) { 251 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0) 252 || (0x10000 <= c && c < 0xF0000); 253 } // isXML11NCNameStart(int):boolean 254 255 /** 256 * Returns true if the specified character is a valid NCName 257 * character as defined by production [5] in Namespaces in XML 258 * 1.1 recommendation. 259 * 260 * @param c The character to check. 261 */ isXML11NCName(int c)262 public static boolean isXML11NCName(int c) { 263 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0) 264 || (0x10000 <= c && c < 0xF0000); 265 } // isXML11NCName(int):boolean 266 267 /** 268 * Returns whether the given character is a valid 269 * high surrogate for a name character. This includes 270 * all high surrogates for characters [0x10000-0xEFFFF]. 271 * In other words everything excluding planes 15 and 16. 272 * 273 * @param c The character to check. 274 */ isXML11NameHighSurrogate(int c)275 public static boolean isXML11NameHighSurrogate(int c) { 276 return (0xD800 <= c && c <= 0xDB7F); 277 } 278 279 /* 280 * [5] Name ::= NameStartChar NameChar* 281 */ 282 /** 283 * Check to see if a string is a valid Name according to [5] 284 * in the XML 1.1 Recommendation 285 * 286 * @param name string to check 287 * @return true if name is a valid Name 288 */ isXML11ValidName(String name)289 public static boolean isXML11ValidName(String name) { 290 int length = name.length(); 291 if (length == 0) 292 return false; 293 int i = 1; 294 char ch = name.charAt(0); 295 if( !isXML11NameStart(ch) ) { 296 if ( length > 1 && isXML11NameHighSurrogate(ch) ) { 297 char ch2 = name.charAt(1); 298 if ( !XMLChar.isLowSurrogate(ch2) || 299 !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) { 300 return false; 301 } 302 i = 2; 303 } 304 else { 305 return false; 306 } 307 } 308 while (i < length) { 309 ch = name.charAt(i); 310 if ( !isXML11Name(ch) ) { 311 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 312 char ch2 = name.charAt(i); 313 if ( !XMLChar.isLowSurrogate(ch2) || 314 !isXML11Name(XMLChar.supplemental(ch, ch2)) ) { 315 return false; 316 } 317 } 318 else { 319 return false; 320 } 321 } 322 ++i; 323 } 324 return true; 325 } // isXML11ValidName(String):boolean 326 327 328 /* 329 * from the namespace 1.1 rec 330 * [4] NCName ::= NCNameStartChar NCNameChar* 331 */ 332 /** 333 * Check to see if a string is a valid NCName according to [4] 334 * from the XML Namespaces 1.1 Recommendation 335 * 336 * @param ncName string to check 337 * @return true if name is a valid NCName 338 */ isXML11ValidNCName(String ncName)339 public static boolean isXML11ValidNCName(String ncName) { 340 int length = ncName.length(); 341 if (length == 0) 342 return false; 343 int i = 1; 344 char ch = ncName.charAt(0); 345 if( !isXML11NCNameStart(ch) ) { 346 if ( length > 1 && isXML11NameHighSurrogate(ch) ) { 347 char ch2 = ncName.charAt(1); 348 if ( !XMLChar.isLowSurrogate(ch2) || 349 !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) { 350 return false; 351 } 352 i = 2; 353 } 354 else { 355 return false; 356 } 357 } 358 while (i < length) { 359 ch = ncName.charAt(i); 360 if ( !isXML11NCName(ch) ) { 361 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 362 char ch2 = ncName.charAt(i); 363 if ( !XMLChar.isLowSurrogate(ch2) || 364 !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) { 365 return false; 366 } 367 } 368 else { 369 return false; 370 } 371 } 372 ++i; 373 } 374 return true; 375 } // isXML11ValidNCName(String):boolean 376 377 /* 378 * [7] Nmtoken ::= (NameChar)+ 379 */ 380 /** 381 * Check to see if a string is a valid Nmtoken according to [7] 382 * in the XML 1.1 Recommendation 383 * 384 * @param nmtoken string to check 385 * @return true if nmtoken is a valid Nmtoken 386 */ isXML11ValidNmtoken(String nmtoken)387 public static boolean isXML11ValidNmtoken(String nmtoken) { 388 int length = nmtoken.length(); 389 if (length == 0) 390 return false; 391 for (int i = 0; i < length; ++i ) { 392 char ch = nmtoken.charAt(i); 393 if( !isXML11Name(ch) ) { 394 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 395 char ch2 = nmtoken.charAt(i); 396 if ( !XMLChar.isLowSurrogate(ch2) || 397 !isXML11Name(XMLChar.supplemental(ch, ch2)) ) { 398 return false; 399 } 400 } 401 else { 402 return false; 403 } 404 } 405 } 406 return true; 407 } // isXML11ValidName(String):boolean 408 409 /** 410 * Simple check to determine if qname is legal. If it returns false 411 * then <param>str</param> is illegal; if it returns true then 412 * <param>str</param> is legal. 413 */ isXML11ValidQName(String str)414 public static boolean isXML11ValidQName(String str) { 415 416 final int colon = str.indexOf(':'); 417 418 if (colon == 0 || colon == str.length() - 1) { 419 return false; 420 } 421 422 if (colon > 0) { 423 final String prefix = str.substring(0,colon); 424 final String localPart = str.substring(colon+1); 425 return isXML11ValidNCName(prefix) && isXML11ValidNCName(localPart); 426 } 427 else { 428 return isXML11ValidNCName(str); 429 } 430 } 431 432 } // class XML11Char 433