1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xml.internal.utils; 23 24 /** 25 * This class defines the basic XML character properties. The data 26 * in this class can be used to verify that a character is a valid 27 * XML character or if the character is a space, name start, or name 28 * character. 29 * <p> 30 * A series of convenience methods are supplied to ease the burden 31 * of the developer. Because inlining the checks can improve per 32 * character performance, the tables of character properties are 33 * public. Using the character as an index into the <code>CHARS</code> 34 * array and applying the appropriate mask flag (e.g. 35 * <code>MASK_VALID</code>), yields the same results as calling the 36 * convenience methods. There is one exception: check the comments 37 * for the <code>isValid</code> method for details. 38 * 39 * @author Glenn Marcy, IBM 40 * @author Andy Clark, IBM 41 * @author Eric Ye, IBM 42 * @author Arnaud Le Hors, IBM 43 * @author Rahul Srivastava, Sun Microsystems Inc. 44 * 45 */ 46 public class XMLChar { 47 48 // 49 // Constants 50 // 51 52 /** Character flags. */ 53 private static final byte[] CHARS = new byte[1 << 16]; 54 55 /** Valid character mask. */ 56 public static final int MASK_VALID = 0x01; 57 58 /** Space character mask. */ 59 public static final int MASK_SPACE = 0x02; 60 61 /** Name start character mask. */ 62 public static final int MASK_NAME_START = 0x04; 63 64 /** Name character mask. */ 65 public static final int MASK_NAME = 0x08; 66 67 /** Pubid character mask. */ 68 public static final int MASK_PUBID = 0x10; 69 70 /** 71 * Content character mask. Special characters are those that can 72 * be considered the start of markup, such as '<' and '&'. 73 * The various newline characters are considered special as well. 74 * All other valid XML characters can be considered content. 75 * <p> 76 * This is an optimization for the inner loop of character scanning. 77 */ 78 public static final int MASK_CONTENT = 0x20; 79 80 /** NCName start character mask. */ 81 public static final int MASK_NCNAME_START = 0x40; 82 83 /** NCName character mask. */ 84 public static final int MASK_NCNAME = 0x80; 85 86 // 87 // Static initialization 88 // 89 90 static { 91 92 // 93 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | 94 // [#xE000-#xFFFD] | [#x10000-#x10FFFF] 95 // 96 97 int charRange[] = { 98 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD, 99 }; 100 101 // 102 // [3] S ::= (#x20 | #x9 | #xD | #xA)+ 103 // 104 105 int spaceChar[] = { 106 0x0020, 0x0009, 0x000D, 0x000A, 107 }; 108 109 // 110 // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 111 // CombiningChar | Extender 112 // 113 114 int nameChar[] = { 115 0x002D, 0x002E, // '-' and '.' 116 }; 117 118 // 119 // [5] Name ::= (Letter | '_' | ':') (NameChar)* 120 // 121 122 int nameStartChar[] = { 123 0x003A, 0x005F, // ':' and '_' 124 }; 125 126 // 127 // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 128 // 129 130 int pubidChar[] = { 131 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D, 132 0x005F 133 }; 134 135 int pubidRange[] = { 136 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A 137 }; 138 139 // 140 // [84] Letter ::= BaseChar | Ideographic 141 // 142 143 int letterRange[] = { 144 // BaseChar 145 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 146 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E, 147 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217, 148 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1, 149 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 150 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 151 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5, 152 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA, 153 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7, 154 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6, 155 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990, 156 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 157 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 158 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36, 159 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B, 160 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3, 161 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28, 162 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 163 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 164 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 165 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10, 166 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61, 167 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3, 168 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10, 169 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E, 170 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88, 171 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB, 172 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47, 173 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103, 174 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155, 175 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF, 176 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9, 177 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D, 178 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC, 179 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB, 180 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B, 181 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C, 182 0xAC00, 0xD7A3, 183 // Ideographic 184 0x3021, 0x3029, 0x4E00, 0x9FA5, 185 }; 186 int letterChar[] = { 187 // BaseChar 188 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5, 189 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C, 190 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0, 191 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 192 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 193 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 194 0x1F5D, 0x1FBE, 0x2126, 0x212E, 195 // Ideographic 196 0x3007, 197 }; 198 199 // 200 // [87] CombiningChar ::= ... 201 // 202 203 int combiningCharRange[] = { 204 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1, 205 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652, 206 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8, 207 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954, 208 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 209 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 210 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5, 211 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43, 212 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83, 213 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03, 214 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 215 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD, 216 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48, 217 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9, 218 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84, 219 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7, 220 0x20D0, 0x20DC, 0x302A, 0x302F, 221 }; 222 223 int combiningCharChar[] = { 224 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF, 225 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7, 226 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, 227 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A, 228 }; 229 230 // 231 // [88] Digit ::= ... 232 // 233 234 int digitRange[] = { 235 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 236 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 237 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 238 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 239 }; 240 241 // 242 // [89] Extender ::= ... 243 // 244 245 int extenderRange[] = { 246 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE, 247 }; 248 249 int extenderChar[] = { 250 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, 251 }; 252 253 // 254 // SpecialChar ::= '<', '&', '\n', '\r', ']' 255 // 256 257 int specialChar[] = { 258 '<', '&', '\n', '\r', ']', 259 }; 260 261 // 262 // Initialize 263 // 264 265 // set valid characters 266 for (int i = 0; i < charRange.length; i += 2) { 267 for (int j = charRange[i]; j <= charRange[i + 1]; j++) { 268 CHARS[j] |= MASK_VALID | MASK_CONTENT; 269 } 270 } 271 272 // remove special characters 273 for (int i = 0; i < specialChar.length; i++) { 274 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT); 275 } 276 277 // set space characters 278 for (int i = 0; i < spaceChar.length; i++) { 279 CHARS[spaceChar[i]] |= MASK_SPACE; 280 } 281 282 // set name start characters 283 for (int i = 0; i < nameStartChar.length; i++) { 284 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 285 MASK_NCNAME_START | MASK_NCNAME; 286 } 287 for (int i = 0; i < letterRange.length; i += 2) { 288 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) { 289 CHARS[j] |= MASK_NAME_START | MASK_NAME | 290 MASK_NCNAME_START | MASK_NCNAME; 291 } 292 } 293 for (int i = 0; i < letterChar.length; i++) { 294 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME | 295 MASK_NCNAME_START | MASK_NCNAME; 296 } 297 298 // set name characters 299 for (int i = 0; i < nameChar.length; i++) { 300 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME; 301 } 302 for (int i = 0; i < digitRange.length; i += 2) { 303 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) { 304 CHARS[j] |= MASK_NAME | MASK_NCNAME; 305 } 306 } 307 for (int i = 0; i < combiningCharRange.length; i += 2) { 308 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) { 309 CHARS[j] |= MASK_NAME | MASK_NCNAME; 310 } 311 } 312 for (int i = 0; i < combiningCharChar.length; i++) { 313 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME; 314 } 315 for (int i = 0; i < extenderRange.length; i += 2) { 316 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) { 317 CHARS[j] |= MASK_NAME | MASK_NCNAME; 318 } 319 } 320 for (int i = 0; i < extenderChar.length; i++) { 321 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME; 322 } 323 324 // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars 325 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME); 326 327 // set Pubid characters 328 for (int i = 0; i < pubidChar.length; i++) { 329 CHARS[pubidChar[i]] |= MASK_PUBID; 330 } 331 for (int i = 0; i < pubidRange.length; i += 2) { 332 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) { 333 CHARS[j] |= MASK_PUBID; 334 } 335 } 336 337 } // <clinit>() 338 339 // 340 // Public static methods 341 // 342 343 /** 344 * Returns true if the specified character is a supplemental character. 345 * 346 * @param c The character to check. 347 */ isSupplemental(int c)348 public static boolean isSupplemental(int c) { 349 return (c >= 0x10000 && c <= 0x10FFFF); 350 } 351 352 /** 353 * Returns true the supplemental character corresponding to the given 354 * surrogates. 355 * 356 * @param h The high surrogate. 357 * @param l The low surrogate. 358 */ supplemental(char h, char l)359 public static int supplemental(char h, char l) { 360 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; 361 } 362 363 /** 364 * Returns the high surrogate of a supplemental character 365 * 366 * @param c The supplemental character to "split". 367 */ highSurrogate(int c)368 public static char highSurrogate(int c) { 369 return (char) (((c - 0x00010000) >> 10) + 0xD800); 370 } 371 372 /** 373 * Returns the low surrogate of a supplemental character 374 * 375 * @param c The supplemental character to "split". 376 */ lowSurrogate(int c)377 public static char lowSurrogate(int c) { 378 return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00); 379 } 380 381 /** 382 * Returns whether the given character is a high surrogate 383 * 384 * @param c The character to check. 385 */ isHighSurrogate(int c)386 public static boolean isHighSurrogate(int c) { 387 return (0xD800 <= c && c <= 0xDBFF); 388 } 389 390 /** 391 * Returns whether the given character is a low surrogate 392 * 393 * @param c The character to check. 394 */ isLowSurrogate(int c)395 public static boolean isLowSurrogate(int c) { 396 return (0xDC00 <= c && c <= 0xDFFF); 397 } 398 399 400 /** 401 * Returns true if the specified character is valid. This method 402 * also checks the surrogate character range from 0x10000 to 0x10FFFF. 403 * <p> 404 * If the program chooses to apply the mask directly to the 405 * <code>CHARS</code> array, then they are responsible for checking 406 * the surrogate character range. 407 * 408 * @param c The character to check. 409 */ isValid(int c)410 public static boolean isValid(int c) { 411 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) || 412 (0x10000 <= c && c <= 0x10FFFF); 413 } // isValid(int):boolean 414 415 /** 416 * Returns true if the specified character is invalid. 417 * 418 * @param c The character to check. 419 */ isInvalid(int c)420 public static boolean isInvalid(int c) { 421 return !isValid(c); 422 } // isInvalid(int):boolean 423 424 /** 425 * Returns true if the specified character can be considered content. 426 * 427 * @param c The character to check. 428 */ isContent(int c)429 public static boolean isContent(int c) { 430 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) || 431 (0x10000 <= c && c <= 0x10FFFF); 432 } // isContent(int):boolean 433 434 /** 435 * Returns true if the specified character can be considered markup. 436 * Markup characters include '<', '&', and '%'. 437 * 438 * @param c The character to check. 439 */ isMarkup(int c)440 public static boolean isMarkup(int c) { 441 return c == '<' || c == '&' || c == '%'; 442 } // isMarkup(int):boolean 443 444 /** 445 * Returns true if the specified character is a space character 446 * as defined by production [3] in the XML 1.0 specification. 447 * 448 * @param c The character to check. 449 */ isSpace(int c)450 public static boolean isSpace(int c) { 451 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0; 452 } // isSpace(int):boolean 453 454 /** 455 * Returns true if the specified character is a valid name start 456 * character as defined by production [5] in the XML 1.0 457 * specification. 458 * 459 * @param c The character to check. 460 */ isNameStart(int c)461 public static boolean isNameStart(int c) { 462 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0; 463 } // isNameStart(int):boolean 464 465 /** 466 * Returns true if the specified character is a valid name 467 * character as defined by production [4] in the XML 1.0 468 * specification. 469 * 470 * @param c The character to check. 471 */ isName(int c)472 public static boolean isName(int c) { 473 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0; 474 } // isName(int):boolean 475 476 /** 477 * Returns true if the specified character is a valid NCName start 478 * character as defined by production [4] in Namespaces in XML 479 * recommendation. 480 * 481 * @param c The character to check. 482 */ isNCNameStart(int c)483 public static boolean isNCNameStart(int c) { 484 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0; 485 } // isNCNameStart(int):boolean 486 487 /** 488 * Returns true if the specified character is a valid NCName 489 * character as defined by production [5] in Namespaces in XML 490 * recommendation. 491 * 492 * @param c The character to check. 493 */ isNCName(int c)494 public static boolean isNCName(int c) { 495 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0; 496 } // isNCName(int):boolean 497 498 /** 499 * Returns true if the specified character is a valid Pubid 500 * character as defined by production [13] in the XML 1.0 501 * specification. 502 * 503 * @param c The character to check. 504 */ isPubid(int c)505 public static boolean isPubid(int c) { 506 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0; 507 } // isPubid(int):boolean 508 509 /* 510 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 511 */ 512 /** 513 * Check to see if a string is a valid Name according to [5] 514 * in the XML 1.0 Recommendation 515 * 516 * @param name string to check 517 * @return true if name is a valid Name 518 */ isValidName(String name)519 public static boolean isValidName(String name) { 520 if (name.length() == 0) 521 return false; 522 char ch = name.charAt(0); 523 if( isNameStart(ch) == false) 524 return false; 525 for (int i = 1; i < name.length(); i++ ) { 526 ch = name.charAt(i); 527 if( isName( ch ) == false ){ 528 return false; 529 } 530 } 531 return true; 532 } // isValidName(String):boolean 533 534 535 /* 536 * from the namespace rec 537 * [4] NCName ::= (Letter | '_') (NCNameChar)* 538 */ 539 /** 540 * Check to see if a string is a valid NCName according to [4] 541 * from the XML Namespaces 1.0 Recommendation 542 * 543 * @param ncName string to check 544 * @return true if name is a valid NCName 545 */ isValidNCName(String ncName)546 public static boolean isValidNCName(String ncName) { 547 if (ncName.length() == 0) 548 return false; 549 char ch = ncName.charAt(0); 550 if( isNCNameStart(ch) == false) 551 return false; 552 for (int i = 1; i < ncName.length(); i++ ) { 553 ch = ncName.charAt(i); 554 if( isNCName( ch ) == false ){ 555 return false; 556 } 557 } 558 return true; 559 } // isValidNCName(String):boolean 560 561 /* 562 * [7] Nmtoken ::= (NameChar)+ 563 */ 564 /** 565 * Check to see if a string is a valid Nmtoken according to [7] 566 * in the XML 1.0 Recommendation 567 * 568 * @param nmtoken string to check 569 * @return true if nmtoken is a valid Nmtoken 570 */ isValidNmtoken(String nmtoken)571 public static boolean isValidNmtoken(String nmtoken) { 572 if (nmtoken.length() == 0) 573 return false; 574 for (int i = 0; i < nmtoken.length(); i++ ) { 575 char ch = nmtoken.charAt(i); 576 if( ! isName( ch ) ){ 577 return false; 578 } 579 } 580 return true; 581 } // isValidName(String):boolean 582 583 584 585 586 587 // encodings 588 589 /** 590 * Returns true if the encoding name is a valid IANA encoding. 591 * This method does not verify that there is a decoder available 592 * for this encoding, only that the characters are valid for an 593 * IANA encoding name. 594 * 595 * @param ianaEncoding The IANA encoding name. 596 */ isValidIANAEncoding(String ianaEncoding)597 public static boolean isValidIANAEncoding(String ianaEncoding) { 598 if (ianaEncoding != null) { 599 int length = ianaEncoding.length(); 600 if (length > 0) { 601 char c = ianaEncoding.charAt(0); 602 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 603 for (int i = 1; i < length; i++) { 604 c = ianaEncoding.charAt(i); 605 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 606 (c < '0' || c > '9') && c != '.' && c != '_' && 607 c != '-') { 608 return false; 609 } 610 } 611 return true; 612 } 613 } 614 } 615 return false; 616 } // isValidIANAEncoding(String):boolean 617 618 /** 619 * Returns true if the encoding name is a valid Java encoding. 620 * This method does not verify that there is a decoder available 621 * for this encoding, only that the characters are valid for an 622 * Java encoding name. 623 * 624 * @param javaEncoding The Java encoding name. 625 */ isValidJavaEncoding(String javaEncoding)626 public static boolean isValidJavaEncoding(String javaEncoding) { 627 if (javaEncoding != null) { 628 int length = javaEncoding.length(); 629 if (length > 0) { 630 for (int i = 1; i < length; i++) { 631 char c = javaEncoding.charAt(i); 632 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 633 (c < '0' || c > '9') && c != '.' && c != '_' && 634 c != '-') { 635 return false; 636 } 637 } 638 return true; 639 } 640 } 641 return false; 642 } // isValidIANAEncoding(String):boolean 643 644 /** 645 * Simple check to determine if qname is legal. If it returns false 646 * then <param>str</param> is illegal; if it returns true then 647 * <param>str</param> is legal. 648 */ isValidQName(String str)649 public static boolean isValidQName(String str) { 650 651 final int colon = str.indexOf(':'); 652 653 if (colon == 0 || colon == str.length() - 1) { 654 return false; 655 } 656 657 if (colon > 0) { 658 final String prefix = str.substring(0,colon); 659 final String localPart = str.substring(colon+1); 660 return isValidNCName(prefix) && isValidNCName(localPart); 661 } 662 else { 663 return isValidNCName(str); 664 } 665 } 666 667 } // class XMLChar 668