1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xml.internal.utils; 23 24 import java.util.Arrays; 25 26 27 /** 28 * THIS IS A COPY OF THE XERCES-2J CLASS com.sun.org.apache.xerces.internal.utls.XMLChar 29 * 30 * This class defines the basic properties of characters in XML 1.1. The data 31 * in this class can be used to verify that a character is a valid 32 * XML 1.1 character or if the character is a space, name start, or name 33 * character. 34 * <p> 35 * A series of convenience methods are supplied to ease the burden 36 * of the developer. Using the character as an index into the <code>XML11CHARS</code> 37 * array and applying the appropriate mask flag (e.g. 38 * <code>MASK_VALID</code>), yields the same results as calling the 39 * convenience methods. There is one exception: check the comments 40 * for the <code>isValid</code> method for details. 41 * 42 */ 43 public class XML11Char { 44 45 // 46 // Constants 47 // 48 49 /** Character flags for XML 1.1. */ 50 private static final byte XML11CHARS [] = new byte [1 << 16]; 51 52 /** XML 1.1 Valid character mask. */ 53 public static final int MASK_XML11_VALID = 0x01; 54 55 /** XML 1.1 Space character mask. */ 56 public static final int MASK_XML11_SPACE = 0x02; 57 58 /** XML 1.1 Name start character mask. */ 59 public static final int MASK_XML11_NAME_START = 0x04; 60 61 /** XML 1.1 Name character mask. */ 62 public static final int MASK_XML11_NAME = 0x08; 63 64 /** XML 1.1 control character mask */ 65 public static final int MASK_XML11_CONTROL = 0x10; 66 67 /** XML 1.1 content for external entities (valid - "special" chars - control chars) */ 68 public static final int MASK_XML11_CONTENT = 0x20; 69 70 /** XML namespaces 1.1 NCNameStart */ 71 public static final int MASK_XML11_NCNAME_START = 0x40; 72 73 /** XML namespaces 1.1 NCName */ 74 public static final int MASK_XML11_NCNAME = 0x80; 75 76 /** XML 1.1 content for internal entities (valid - "special" chars) */ 77 public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT; 78 79 // 80 // Static initialization 81 // 82 83 static { 84 85 // Initializing the Character Flag Array 86 // Code generated by: XML11CharGenerator. 87 Arrays.fill(XML11CHARS, 1, 9, (byte) 17 )88 Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17 89 XML11CHARS[9] = 35; 90 XML11CHARS[10] = 3; Arrays.fill(XML11CHARS, 11, 13, (byte) 17 )91 Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17 92 XML11CHARS[13] = 3; Arrays.fill(XML11CHARS, 14, 32, (byte) 17 )93 Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17 94 XML11CHARS[32] = 35; Arrays.fill(XML11CHARS, 33, 38, (byte) 33 )95 Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33 96 XML11CHARS[38] = 1; Arrays.fill(XML11CHARS, 39, 45, (byte) 33 )97 Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33 Arrays.fill(XML11CHARS, 45, 47, (byte) -87 )98 Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87 99 XML11CHARS[47] = 33; Arrays.fill(XML11CHARS, 48, 58, (byte) -87 )100 Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87 101 XML11CHARS[58] = 45; 102 XML11CHARS[59] = 33; 103 XML11CHARS[60] = 1; Arrays.fill(XML11CHARS, 61, 65, (byte) 33 )104 Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33 Arrays.fill(XML11CHARS, 65, 91, (byte) -19 )105 Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19 Arrays.fill(XML11CHARS, 91, 93, (byte) 33 )106 Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33 107 XML11CHARS[93] = 1; 108 XML11CHARS[94] = 33; 109 XML11CHARS[95] = -19; 110 XML11CHARS[96] = 33; Arrays.fill(XML11CHARS, 97, 123, (byte) -19 )111 Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19 Arrays.fill(XML11CHARS, 123, 127, (byte) 33 )112 Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33 Arrays.fill(XML11CHARS, 127, 133, (byte) 17 )113 Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17 114 XML11CHARS[133] = 35; Arrays.fill(XML11CHARS, 134, 160, (byte) 17 )115 Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17 Arrays.fill(XML11CHARS, 160, 183, (byte) 33 )116 Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33 117 XML11CHARS[183] = -87; Arrays.fill(XML11CHARS, 184, 192, (byte) 33 )118 Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33 Arrays.fill(XML11CHARS, 192, 215, (byte) -19 )119 Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19 120 XML11CHARS[215] = 33; Arrays.fill(XML11CHARS, 216, 247, (byte) -19 )121 Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19 122 XML11CHARS[247] = 33; Arrays.fill(XML11CHARS, 248, 768, (byte) -19 )123 Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19 Arrays.fill(XML11CHARS, 768, 880, (byte) -87 )124 Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87 Arrays.fill(XML11CHARS, 880, 894, (byte) -19 )125 Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19 126 XML11CHARS[894] = 33; Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 )127 Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19 Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 )128 Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33 Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 )129 Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19 Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 )130 Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33 131 XML11CHARS[8232] = 35; Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 )132 Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33 Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 )133 Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87 Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 )134 Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33 Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 )135 Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19 Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 )136 Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33 Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 )137 Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19 Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 )138 Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33 Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 )139 Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19 Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 )140 Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33 Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 )141 Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19 Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 )142 Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33 Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 )143 Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19 144 145 } // <clinit>() 146 147 // 148 // Public static methods 149 // 150 151 /** 152 * Returns true if the specified character is a space character 153 * as amdended in the XML 1.1 specification. 154 * 155 * @param c The character to check. 156 */ isXML11Space(int c)157 public static boolean isXML11Space(int c) { 158 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0); 159 } // isXML11Space(int):boolean 160 161 /** 162 * Returns true if the specified character is valid. This method 163 * also checks the surrogate character range from 0x10000 to 0x10FFFF. 164 * <p> 165 * If the program chooses to apply the mask directly to the 166 * <code>XML11CHARS</code> array, then they are responsible for checking 167 * the surrogate character range. 168 * 169 * @param c The character to check. 170 */ isXML11Valid(int c)171 public static boolean isXML11Valid(int c) { 172 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0) 173 || (0x10000 <= c && c <= 0x10FFFF); 174 } // isXML11Valid(int):boolean 175 176 /** 177 * Returns true if the specified character is invalid. 178 * 179 * @param c The character to check. 180 */ isXML11Invalid(int c)181 public static boolean isXML11Invalid(int c) { 182 return !isXML11Valid(c); 183 } // isXML11Invalid(int):boolean 184 185 /** 186 * Returns true if the specified character is valid and permitted outside 187 * of a character reference. 188 * That is, this method will return false for the same set as 189 * isXML11Valid, except it also reports false for "control characters". 190 * 191 * @param c The character to check. 192 */ isXML11ValidLiteral(int c)193 public static boolean isXML11ValidLiteral(int c) { 194 return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0)) 195 || (0x10000 <= c && c <= 0x10FFFF)); 196 } // isXML11ValidLiteral(int):boolean 197 198 /** 199 * Returns true if the specified character can be considered 200 * content in an external parsed entity. 201 * 202 * @param c The character to check. 203 */ isXML11Content(int c)204 public static boolean isXML11Content(int c) { 205 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) || 206 (0x10000 <= c && c <= 0x10FFFF); 207 } // isXML11Content(int):boolean 208 209 /** 210 * Returns true if the specified character can be considered 211 * content in an internal parsed entity. 212 * 213 * @param c The character to check. 214 */ isXML11InternalEntityContent(int c)215 public static boolean isXML11InternalEntityContent(int c) { 216 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) || 217 (0x10000 <= c && c <= 0x10FFFF); 218 } // isXML11InternalEntityContent(int):boolean 219 220 /** 221 * Returns true if the specified character is a valid name start 222 * character as defined by production [4] in the XML 1.1 223 * specification. 224 * 225 * @param c The character to check. 226 */ isXML11NameStart(int c)227 public static boolean isXML11NameStart(int c) { 228 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0) 229 || (0x10000 <= c && c < 0xF0000); 230 } // isXML11NameStart(int):boolean 231 232 /** 233 * Returns true if the specified character is a valid name 234 * character as defined by production [4a] in the XML 1.1 235 * specification. 236 * 237 * @param c The character to check. 238 */ isXML11Name(int c)239 public static boolean isXML11Name(int c) { 240 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0) 241 || (c >= 0x10000 && c < 0xF0000); 242 } // isXML11Name(int):boolean 243 244 /** 245 * Returns true if the specified character is a valid NCName start 246 * character as defined by production [4] in Namespaces in XML 247 * 1.1 recommendation. 248 * 249 * @param c The character to check. 250 */ isXML11NCNameStart(int c)251 public static boolean isXML11NCNameStart(int c) { 252 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0) 253 || (0x10000 <= c && c < 0xF0000); 254 } // isXML11NCNameStart(int):boolean 255 256 /** 257 * Returns true if the specified character is a valid NCName 258 * character as defined by production [5] in Namespaces in XML 259 * 1.1 recommendation. 260 * 261 * @param c The character to check. 262 */ isXML11NCName(int c)263 public static boolean isXML11NCName(int c) { 264 return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0) 265 || (0x10000 <= c && c < 0xF0000); 266 } // isXML11NCName(int):boolean 267 268 /** 269 * Returns whether the given character is a valid 270 * high surrogate for a name character. This includes 271 * all high surrogates for characters [0x10000-0xEFFFF]. 272 * In other words everything excluding planes 15 and 16. 273 * 274 * @param c The character to check. 275 */ isXML11NameHighSurrogate(int c)276 public static boolean isXML11NameHighSurrogate(int c) { 277 return (0xD800 <= c && c <= 0xDB7F); 278 } 279 280 /* 281 * [5] Name ::= NameStartChar NameChar* 282 */ 283 /** 284 * Check to see if a string is a valid Name according to [5] 285 * in the XML 1.1 Recommendation 286 * 287 * @param name string to check 288 * @return true if name is a valid Name 289 */ isXML11ValidName(String name)290 public static boolean isXML11ValidName(String name) { 291 int length = name.length(); 292 if (length == 0) 293 return false; 294 int i = 1; 295 char ch = name.charAt(0); 296 if( !isXML11NameStart(ch) ) { 297 if ( length > 1 && isXML11NameHighSurrogate(ch) ) { 298 char ch2 = name.charAt(1); 299 if ( !XMLChar.isLowSurrogate(ch2) || 300 !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) { 301 return false; 302 } 303 i = 2; 304 } 305 else { 306 return false; 307 } 308 } 309 while (i < length) { 310 ch = name.charAt(i); 311 if ( !isXML11Name(ch) ) { 312 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 313 char ch2 = name.charAt(i); 314 if ( !XMLChar.isLowSurrogate(ch2) || 315 !isXML11Name(XMLChar.supplemental(ch, ch2)) ) { 316 return false; 317 } 318 } 319 else { 320 return false; 321 } 322 } 323 ++i; 324 } 325 return true; 326 } // isXML11ValidName(String):boolean 327 328 329 /* 330 * from the namespace 1.1 rec 331 * [4] NCName ::= NCNameStartChar NCNameChar* 332 */ 333 /** 334 * Check to see if a string is a valid NCName according to [4] 335 * from the XML Namespaces 1.1 Recommendation 336 * 337 * @param ncName string to check 338 * @return true if name is a valid NCName 339 */ isXML11ValidNCName(String ncName)340 public static boolean isXML11ValidNCName(String ncName) { 341 int length = ncName.length(); 342 if (length == 0) 343 return false; 344 int i = 1; 345 char ch = ncName.charAt(0); 346 if( !isXML11NCNameStart(ch) ) { 347 if ( length > 1 && isXML11NameHighSurrogate(ch) ) { 348 char ch2 = ncName.charAt(1); 349 if ( !XMLChar.isLowSurrogate(ch2) || 350 !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) { 351 return false; 352 } 353 i = 2; 354 } 355 else { 356 return false; 357 } 358 } 359 while (i < length) { 360 ch = ncName.charAt(i); 361 if ( !isXML11NCName(ch) ) { 362 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 363 char ch2 = ncName.charAt(i); 364 if ( !XMLChar.isLowSurrogate(ch2) || 365 !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) { 366 return false; 367 } 368 } 369 else { 370 return false; 371 } 372 } 373 ++i; 374 } 375 return true; 376 } // isXML11ValidNCName(String):boolean 377 378 /* 379 * [7] Nmtoken ::= (NameChar)+ 380 */ 381 /** 382 * Check to see if a string is a valid Nmtoken according to [7] 383 * in the XML 1.1 Recommendation 384 * 385 * @param nmtoken string to check 386 * @return true if nmtoken is a valid Nmtoken 387 */ isXML11ValidNmtoken(String nmtoken)388 public static boolean isXML11ValidNmtoken(String nmtoken) { 389 int length = nmtoken.length(); 390 if (length == 0) 391 return false; 392 for (int i = 0; i < length; ++i ) { 393 char ch = nmtoken.charAt(i); 394 if( !isXML11Name(ch) ) { 395 if ( ++i < length && isXML11NameHighSurrogate(ch) ) { 396 char ch2 = nmtoken.charAt(i); 397 if ( !XMLChar.isLowSurrogate(ch2) || 398 !isXML11Name(XMLChar.supplemental(ch, ch2)) ) { 399 return false; 400 } 401 } 402 else { 403 return false; 404 } 405 } 406 } 407 return true; 408 } // isXML11ValidName(String):boolean 409 410 /** 411 * Simple check to determine if qname is legal. If it returns false 412 * then <param>str</param> is illegal; if it returns true then 413 * <param>str</param> is legal. 414 */ isXML11ValidQName(String str)415 public static boolean isXML11ValidQName(String str) { 416 417 final int colon = str.indexOf(':'); 418 419 if (colon == 0 || colon == str.length() - 1) { 420 return false; 421 } 422 423 if (colon > 0) { 424 final String prefix = str.substring(0,colon); 425 final String localPart = str.substring(colon+1); 426 return isXML11ValidNCName(prefix) && isXML11ValidNCName(localPart); 427 } 428 else { 429 return isXML11ValidNCName(str); 430 } 431 } 432 433 } // class XML11Char 434