1 package com.jclark.xml.tok; 2 3 /** 4 * Parses the prolog of an XML document. 5 * A <code>PrologParser</code> object represents the state of a parse 6 * of the prolog. 7 * It operates on the tokens returned 8 * by <code>Encoding.tokenizeProlog</code>. 9 * It does not build any data structures to represent the information 10 * in the prolog; instead it tells the caller the action needed 11 * for each token. 12 * The state of the parse can be saved by using the <code>clone</code> 13 * method. 14 * @version $Revision: 1.8 $ $Date: 1998/10/30 02:25:20 $ 15 */ 16 public class PrologParser implements Cloneable { 17 public static final int ACTION_NONE = 0; 18 public static final int ACTION_XML_DECL = ACTION_NONE + 1; 19 public static final int ACTION_TEXT_DECL = ACTION_XML_DECL + 1; 20 public static final int ACTION_PI = ACTION_TEXT_DECL + 1; 21 public static final int ACTION_COMMENT = ACTION_PI + 1; 22 public static final int ACTION_DOCTYPE_NAME = ACTION_COMMENT + 1; 23 public static final int ACTION_DOCTYPE_SYSTEM_ID = ACTION_DOCTYPE_NAME + 1; 24 public static final int ACTION_DOCTYPE_PUBLIC_ID = ACTION_DOCTYPE_SYSTEM_ID + 1; 25 public static final int ACTION_DOCTYPE_SUBSET = ACTION_DOCTYPE_PUBLIC_ID + 1; 26 public static final int ACTION_DOCTYPE_CLOSE = ACTION_DOCTYPE_SUBSET + 1; 27 public static final int ACTION_GENERAL_ENTITY_NAME = ACTION_DOCTYPE_CLOSE + 1; 28 public static final int ACTION_PARAM_ENTITY_NAME = ACTION_GENERAL_ENTITY_NAME + 1; 29 public static final int ACTION_ENTITY_VALUE_WITH_PEREFS = ACTION_PARAM_ENTITY_NAME + 1; 30 public static final int ACTION_ENTITY_VALUE_NO_PEREFS = ACTION_ENTITY_VALUE_WITH_PEREFS + 1; 31 public static final int ACTION_ENTITY_SYSTEM_ID = ACTION_ENTITY_VALUE_NO_PEREFS + 1; 32 public static final int ACTION_ENTITY_PUBLIC_ID = ACTION_ENTITY_SYSTEM_ID + 1; 33 public static final int ACTION_ENTITY_NOTATION_NAME = ACTION_ENTITY_PUBLIC_ID + 1; 34 public static final int ACTION_NOTATION_NAME = ACTION_ENTITY_NOTATION_NAME + 1; 35 public static final int ACTION_NOTATION_SYSTEM_ID = ACTION_NOTATION_NAME + 1; 36 public static final int ACTION_NOTATION_PUBLIC_ID = ACTION_NOTATION_SYSTEM_ID + 1; 37 public static final int ACTION_ATTRIBUTE_NAME = ACTION_NOTATION_PUBLIC_ID + 1; 38 public static final int ACTION_ATTRIBUTE_TYPE_CDATA = ACTION_ATTRIBUTE_NAME + 1; 39 public static final int ACTION_ATTRIBUTE_TYPE_ID = ACTION_ATTRIBUTE_TYPE_CDATA + 1; 40 public static final int ACTION_ATTRIBUTE_TYPE_IDREF = ACTION_ATTRIBUTE_TYPE_ID + 1; 41 public static final int ACTION_ATTRIBUTE_TYPE_IDREFS = ACTION_ATTRIBUTE_TYPE_IDREF + 1; 42 public static final int ACTION_ATTRIBUTE_TYPE_ENTITY = ACTION_ATTRIBUTE_TYPE_IDREFS + 1; 43 public static final int ACTION_ATTRIBUTE_TYPE_ENTITIES = ACTION_ATTRIBUTE_TYPE_ENTITY + 1; 44 public static final int ACTION_ATTRIBUTE_TYPE_NMTOKEN = ACTION_ATTRIBUTE_TYPE_ENTITIES + 1; 45 public static final int ACTION_ATTRIBUTE_TYPE_NMTOKENS = ACTION_ATTRIBUTE_TYPE_NMTOKEN + 1; 46 public static final int ACTION_ATTRIBUTE_ENUM_VALUE = ACTION_ATTRIBUTE_TYPE_NMTOKENS + 1; 47 public static final int ACTION_ATTRIBUTE_NOTATION_VALUE = ACTION_ATTRIBUTE_ENUM_VALUE + 1; 48 public static final int ACTION_ATTLIST_ELEMENT_NAME = ACTION_ATTRIBUTE_NOTATION_VALUE + 1; 49 public static final int ACTION_IMPLIED_ATTRIBUTE_VALUE = ACTION_ATTLIST_ELEMENT_NAME + 1; 50 public static final int ACTION_REQUIRED_ATTRIBUTE_VALUE = ACTION_IMPLIED_ATTRIBUTE_VALUE + 1; 51 public static final int ACTION_DEFAULT_ATTRIBUTE_VALUE = ACTION_REQUIRED_ATTRIBUTE_VALUE + 1; 52 public static final int ACTION_FIXED_ATTRIBUTE_VALUE = ACTION_DEFAULT_ATTRIBUTE_VALUE + 1; 53 public static final int ACTION_ELEMENT_NAME = ACTION_FIXED_ATTRIBUTE_VALUE + 1; 54 public static final int ACTION_CONTENT_ANY = ACTION_ELEMENT_NAME + 1; 55 public static final int ACTION_CONTENT_EMPTY = ACTION_CONTENT_ANY + 1; 56 public static final int ACTION_CONTENT_PCDATA = ACTION_CONTENT_EMPTY + 1; 57 public static final int ACTION_GROUP_OPEN = ACTION_CONTENT_PCDATA + 1; 58 public static final int ACTION_GROUP_CLOSE = ACTION_GROUP_OPEN + 1; 59 public static final int ACTION_GROUP_CLOSE_REP = ACTION_GROUP_CLOSE + 1; 60 public static final int ACTION_GROUP_CLOSE_OPT = ACTION_GROUP_CLOSE_REP + 1; 61 public static final int ACTION_GROUP_CLOSE_PLUS = ACTION_GROUP_CLOSE_OPT + 1; 62 public static final int ACTION_GROUP_CHOICE = ACTION_GROUP_CLOSE_PLUS + 1; 63 public static final int ACTION_GROUP_SEQUENCE = ACTION_GROUP_CHOICE + 1; 64 public static final int ACTION_CONTENT_ELEMENT = ACTION_GROUP_SEQUENCE + 1; 65 public static final int ACTION_CONTENT_ELEMENT_REP = ACTION_CONTENT_ELEMENT + 1; 66 public static final int ACTION_CONTENT_ELEMENT_OPT = ACTION_CONTENT_ELEMENT_REP + 1; 67 public static final int ACTION_CONTENT_ELEMENT_PLUS = ACTION_CONTENT_ELEMENT_OPT + 1; 68 public static final int ACTION_OUTER_PARAM_ENTITY_REF = ACTION_CONTENT_ELEMENT_PLUS + 1; 69 public static final int ACTION_INNER_PARAM_ENTITY_REF = ACTION_OUTER_PARAM_ENTITY_REF + 1; 70 public static final int ACTION_IGNORE_SECT = ACTION_INNER_PARAM_ENTITY_REF + 1; 71 public static final int ACTION_DECL_CLOSE = ACTION_IGNORE_SECT + 1; 72 73 private static final byte prolog0 = 0; 74 private static final byte prolog1 = prolog0 + 1; 75 private static final byte prolog2 = prolog1 + 1; 76 private static final byte doctype0 = prolog2 + 1; 77 private static final byte doctype1 = doctype0 + 1; 78 private static final byte doctype2 = doctype1 + 1; 79 private static final byte doctype3 = doctype2 + 1; 80 private static final byte doctype4 = doctype3 + 1; 81 private static final byte doctype5 = doctype4 + 1; 82 private static final byte internalSubset = doctype5 + 1; 83 private static final byte entity0 = internalSubset + 1; 84 private static final byte entity1 = entity0 + 1; 85 private static final byte entity2 = entity1 + 1; 86 private static final byte entity3 = entity2 + 1; 87 private static final byte entity4 = entity3 + 1; 88 private static final byte entity5 = entity4 + 1; 89 private static final byte entity6 = entity5 + 1; 90 private static final byte entity7 = entity6 + 1; 91 private static final byte entity8 = entity7 + 1; 92 private static final byte entity9 = entity8 + 1; 93 private static final byte notation0 = entity9 + 1; 94 private static final byte notation1 = notation0 + 1; 95 private static final byte notation2 = notation1 + 1; 96 private static final byte notation3 = notation2 + 1; 97 private static final byte notation4 = notation3 + 1; 98 private static final byte attlist0 = notation4 + 1; 99 private static final byte attlist1 = attlist0 + 1; 100 private static final byte attlist2 = attlist1 + 1; 101 private static final byte attlist3 = attlist2 + 1; 102 private static final byte attlist4 = attlist3 + 1; 103 private static final byte attlist5 = attlist4 + 1; 104 private static final byte attlist6 = attlist5 + 1; 105 private static final byte attlist7 = attlist6 + 1; 106 private static final byte attlist8 = attlist7 + 1; 107 private static final byte attlist9 = attlist8 + 1; 108 private static final byte element0 = attlist9 + 1; 109 private static final byte element1 = element0 + 1; 110 private static final byte element2 = element1 + 1; 111 private static final byte element3 = element2 + 1; 112 private static final byte element4 = element3 + 1; 113 private static final byte element5 = element4 + 1; 114 private static final byte element6 = element5 + 1; 115 private static final byte element7 = element6 + 1; 116 private static final byte declClose = element7 + 1; 117 private static final byte externalSubset0 = declClose + 1; 118 private static final byte externalSubset1 = externalSubset0 + 1; 119 private static final byte condSect0 = externalSubset1 + 1; 120 private static final byte condSect1 = condSect0 + 1; 121 private static final byte condSect2 = condSect1 + 1; 122 123 private byte state; 124 private int groupLevel; 125 private int includeLevel; 126 private byte connector[] = new byte[2]; 127 private boolean documentEntity; 128 129 public static final byte PROLOG = 0; 130 public static final byte EXTERNAL_ENTITY = 1; 131 public static final byte INTERNAL_ENTITY = 2; 132 PrologParser(byte type)133 public PrologParser(byte type) { 134 switch (type) { 135 case PROLOG: 136 documentEntity = true; 137 state = prolog0; 138 break; 139 case EXTERNAL_ENTITY: 140 documentEntity = false; 141 state = externalSubset0; 142 break; 143 case INTERNAL_ENTITY: 144 documentEntity = false; 145 state = externalSubset1; 146 break; 147 default: 148 throw new IllegalArgumentException(); 149 } 150 } 151 end()152 public final void end() throws PrologSyntaxException { 153 switch (state) { 154 case prolog0: 155 case prolog1: 156 case prolog2: 157 break; 158 case externalSubset0: 159 case externalSubset1: 160 if (includeLevel == 0) 161 break; 162 /* fall through */ 163 default: 164 throw new PrologSyntaxException(); 165 } 166 } 167 action(int tok, byte[] buf, int start, int end, Encoding enc)168 public int action(int tok, byte[] buf, int start, int end, Encoding enc) throws PrologSyntaxException { 169 switch (state) { 170 case prolog0: 171 state = prolog1; 172 if (tok == Encoding.TOK_XML_DECL) 173 return ACTION_XML_DECL; 174 /* fall through */ 175 case prolog1: 176 if (tok == Encoding.TOK_DECL_OPEN 177 && enc.matchesXMLString(buf, 178 start + 2 * enc.getMinBytesPerChar(), 179 end, 180 "DOCTYPE")) { 181 state = doctype0; 182 return ACTION_NONE; 183 } 184 /* fall through */ 185 case prolog2: 186 switch (tok) { 187 case Encoding.TOK_PI: 188 return ACTION_PI; 189 case Encoding.TOK_COMMENT: 190 return ACTION_COMMENT; 191 } 192 break; 193 case doctype0: 194 if (tok == Encoding.TOK_NAME) { 195 state = doctype1; 196 return ACTION_DOCTYPE_NAME; 197 } 198 break; 199 case doctype1: 200 switch (tok) { 201 case Encoding.TOK_OPEN_BRACKET: 202 state = internalSubset; 203 return ACTION_DOCTYPE_SUBSET; 204 case Encoding.TOK_DECL_CLOSE: 205 state = prolog2; 206 return ACTION_DOCTYPE_CLOSE; 207 case Encoding.TOK_NAME: 208 if (enc.matchesXMLString(buf, start, end, "SYSTEM")) { 209 state = doctype3; 210 return ACTION_NONE; 211 } 212 if (enc.matchesXMLString(buf, start, end, "PUBLIC")) { 213 state = doctype2; 214 return ACTION_NONE; 215 } 216 break; 217 } 218 break; 219 case doctype2: 220 if (tok == Encoding.TOK_LITERAL) { 221 state = doctype3; 222 return ACTION_DOCTYPE_PUBLIC_ID; 223 } 224 break; 225 case doctype3: 226 if (tok == Encoding.TOK_LITERAL) { 227 state = doctype4; 228 return ACTION_DOCTYPE_SYSTEM_ID; 229 } 230 break; 231 case doctype4: 232 switch (tok) { 233 case Encoding.TOK_OPEN_BRACKET: 234 state = internalSubset; 235 return ACTION_DOCTYPE_SUBSET; 236 case Encoding.TOK_DECL_CLOSE: 237 state = prolog2; 238 return ACTION_DOCTYPE_CLOSE; 239 } 240 break; 241 case doctype5: 242 if (tok == Encoding.TOK_DECL_CLOSE) { 243 state = prolog2; 244 return ACTION_DOCTYPE_CLOSE; 245 } 246 break; 247 case externalSubset0: 248 state = externalSubset1; 249 if (tok == Encoding.TOK_XML_DECL) 250 return ACTION_TEXT_DECL; 251 /* fall through */ 252 case externalSubset1: 253 switch (tok) { 254 case Encoding.TOK_COND_SECT_OPEN: 255 state = condSect0; 256 return ACTION_NONE; 257 case Encoding.TOK_COND_SECT_CLOSE: 258 if (includeLevel == 0) 259 break; 260 --includeLevel; 261 return ACTION_NONE; 262 case Encoding.TOK_CLOSE_BRACKET: 263 throw new PrologSyntaxException(); 264 } 265 /* fall through */ 266 case internalSubset: 267 switch (tok) { 268 case Encoding.TOK_DECL_OPEN: 269 if (enc.matchesXMLString(buf, 270 start + 2 * enc.getMinBytesPerChar(), 271 end, 272 "ENTITY")) { 273 state = entity0; 274 return ACTION_NONE; 275 } 276 if (enc.matchesXMLString(buf, 277 start + 2 * enc.getMinBytesPerChar(), 278 end, 279 "ATTLIST")) { 280 state = attlist0; 281 return ACTION_NONE; 282 } 283 if (enc.matchesXMLString(buf, 284 start + 2 * enc.getMinBytesPerChar(), 285 end, 286 "ELEMENT")) { 287 state = element0; 288 return ACTION_NONE; 289 } 290 if (enc.matchesXMLString(buf, 291 start + 2 * enc.getMinBytesPerChar(), 292 end, 293 "NOTATION")) { 294 state = notation0; 295 return ACTION_NONE; 296 } 297 break; 298 case Encoding.TOK_PI: 299 return ACTION_PI; 300 case Encoding.TOK_COMMENT: 301 return ACTION_COMMENT; 302 case Encoding.TOK_PARAM_ENTITY_REF: 303 return ACTION_OUTER_PARAM_ENTITY_REF; 304 case Encoding.TOK_CLOSE_BRACKET: 305 state = doctype5; 306 return ACTION_NONE; 307 } 308 break; 309 case entity0: 310 switch (tok) { 311 case Encoding.TOK_PERCENT: 312 state = entity1; 313 return ACTION_NONE; 314 case Encoding.TOK_NAME: 315 state = entity2; 316 return ACTION_GENERAL_ENTITY_NAME; 317 } 318 break; 319 case entity1: 320 if (tok == Encoding.TOK_NAME) { 321 state = entity7; 322 return ACTION_PARAM_ENTITY_NAME; 323 } 324 break; 325 case entity2: 326 switch (tok) { 327 case Encoding.TOK_NAME: 328 if (enc.matchesXMLString(buf, start, end, "SYSTEM")) { 329 state = entity4; 330 return ACTION_NONE; 331 } 332 if (enc.matchesXMLString(buf, start, end, "PUBLIC")) { 333 state = entity3; 334 return ACTION_NONE; 335 } 336 break; 337 case Encoding.TOK_LITERAL: 338 state = declClose; 339 return documentEntity ? ACTION_ENTITY_VALUE_NO_PEREFS : ACTION_ENTITY_VALUE_WITH_PEREFS; 340 } 341 break; 342 case entity3: 343 if (tok == Encoding.TOK_LITERAL) { 344 state = entity4; 345 return ACTION_ENTITY_PUBLIC_ID; 346 } 347 break; 348 case entity4: 349 if (tok == Encoding.TOK_LITERAL) { 350 state = entity5; 351 return ACTION_ENTITY_SYSTEM_ID; 352 } 353 break; 354 case entity5: 355 switch (tok) { 356 case Encoding.TOK_DECL_CLOSE: 357 state = documentEntity ? internalSubset : externalSubset1; 358 return ACTION_DECL_CLOSE; 359 case Encoding.TOK_NAME: 360 if (enc.matchesXMLString(buf, start, end, "NDATA")) { 361 state = entity6; 362 return ACTION_NONE; 363 } 364 break; 365 } 366 break; 367 case entity6: 368 switch (tok) { 369 case Encoding.TOK_NAME: 370 state = declClose; 371 return ACTION_ENTITY_NOTATION_NAME; 372 } 373 break; 374 case entity7: 375 switch (tok) { 376 case Encoding.TOK_NAME: 377 if (enc.matchesXMLString(buf, start, end, "SYSTEM")) { 378 state = entity9; 379 return ACTION_NONE; 380 } 381 if (enc.matchesXMLString(buf, start, end, "PUBLIC")) { 382 state = entity8; 383 return ACTION_NONE; 384 } 385 break; 386 case Encoding.TOK_LITERAL: 387 state = declClose; 388 return documentEntity ? ACTION_ENTITY_VALUE_NO_PEREFS : ACTION_ENTITY_VALUE_WITH_PEREFS; 389 } 390 break; 391 case entity8: 392 if (tok == Encoding.TOK_LITERAL) { 393 state = entity9; 394 return ACTION_ENTITY_PUBLIC_ID; 395 } 396 break; 397 case entity9: 398 if (tok == Encoding.TOK_LITERAL) { 399 state = declClose; 400 return ACTION_ENTITY_SYSTEM_ID; 401 } 402 break; 403 case notation0: 404 if (tok == Encoding.TOK_NAME) { 405 state = notation1; 406 return ACTION_NOTATION_NAME; 407 } 408 break; 409 case notation1: 410 switch (tok) { 411 case Encoding.TOK_NAME: 412 if (enc.matchesXMLString(buf, start, end, "SYSTEM")) { 413 state = notation3; 414 return ACTION_NONE; 415 } 416 if (enc.matchesXMLString(buf, start, end, "PUBLIC")) { 417 state = notation2; 418 return ACTION_NONE; 419 } 420 break; 421 } 422 break; 423 case notation2: 424 if (tok == Encoding.TOK_LITERAL) { 425 state = notation4; 426 return ACTION_NOTATION_PUBLIC_ID; 427 } 428 break; 429 case notation3: 430 if (tok == Encoding.TOK_LITERAL) { 431 state = declClose; 432 return ACTION_NOTATION_SYSTEM_ID; 433 } 434 break; 435 case notation4: 436 switch (tok) { 437 case Encoding.TOK_LITERAL: 438 state = declClose; 439 return ACTION_NOTATION_SYSTEM_ID; 440 case Encoding.TOK_DECL_CLOSE: 441 state = documentEntity ? internalSubset : externalSubset1; 442 return ACTION_DECL_CLOSE; 443 } 444 break; 445 case attlist0: 446 if (tok == Encoding.TOK_NAME) { 447 state = attlist1; 448 return ACTION_ATTLIST_ELEMENT_NAME; 449 } 450 break; 451 case attlist1: 452 switch (tok) { 453 case Encoding.TOK_DECL_CLOSE: 454 state = documentEntity ? internalSubset : externalSubset1; 455 return ACTION_NONE; 456 case Encoding.TOK_NAME: 457 state = attlist2; 458 return ACTION_ATTRIBUTE_NAME; 459 } 460 break; 461 case attlist2: 462 switch (tok) { 463 case Encoding.TOK_NAME: 464 for (int i = 0; i < attributeTypes.length; i++) 465 if (enc.matchesXMLString(buf, start, end, attributeTypes[i])) { 466 state = attlist8; 467 return ACTION_ATTRIBUTE_TYPE_CDATA + i; 468 } 469 if (enc.matchesXMLString(buf, start, end, "NOTATION")) { 470 state = attlist5; 471 return ACTION_NONE; 472 } 473 break; 474 case Encoding.TOK_OPEN_PAREN: 475 groupLevel = 1; 476 state = attlist3; 477 return ACTION_NONE; 478 } 479 break; 480 case attlist3: 481 switch (tok) { 482 case Encoding.TOK_NMTOKEN: 483 case Encoding.TOK_NAME: 484 state = attlist4; 485 return ACTION_ATTRIBUTE_ENUM_VALUE; 486 } 487 break; 488 case attlist4: 489 switch (tok) { 490 case Encoding.TOK_CLOSE_PAREN: 491 state = attlist8; 492 groupLevel = 0; 493 return ACTION_NONE; 494 case Encoding.TOK_OR: 495 state = attlist3; 496 return ACTION_NONE; 497 } 498 break; 499 case attlist5: 500 if (tok == Encoding.TOK_OPEN_PAREN) { 501 state = attlist6; 502 groupLevel = 1; 503 return ACTION_NONE; 504 } 505 break; 506 case attlist6: 507 if (tok == Encoding.TOK_NAME) { 508 state = attlist7; 509 return ACTION_ATTRIBUTE_NOTATION_VALUE; 510 } 511 break; 512 case attlist7: 513 switch (tok) { 514 case Encoding.TOK_CLOSE_PAREN: 515 groupLevel = 0; 516 state = attlist8; 517 return ACTION_NONE; 518 case Encoding.TOK_OR: 519 state = attlist6; 520 return ACTION_NONE; 521 } 522 break; 523 /* default value */ 524 case attlist8: 525 switch (tok) { 526 case Encoding.TOK_POUND_NAME: 527 if (enc.matchesXMLString(buf, 528 start + enc.getMinBytesPerChar(), 529 end, 530 "IMPLIED")) { 531 state = attlist1; 532 return ACTION_IMPLIED_ATTRIBUTE_VALUE; 533 } 534 if (enc.matchesXMLString(buf, 535 start + enc.getMinBytesPerChar(), 536 end, 537 "REQUIRED")) { 538 state = attlist1; 539 return ACTION_REQUIRED_ATTRIBUTE_VALUE; 540 } 541 if (enc.matchesXMLString(buf, 542 start + enc.getMinBytesPerChar(), 543 end, 544 "FIXED")) { 545 state = attlist9; 546 return ACTION_NONE; 547 } 548 break; 549 case Encoding.TOK_LITERAL: 550 state = attlist1; 551 return ACTION_DEFAULT_ATTRIBUTE_VALUE; 552 } 553 break; 554 case attlist9: 555 if (tok == Encoding.TOK_LITERAL) { 556 state = attlist1; 557 return ACTION_FIXED_ATTRIBUTE_VALUE; 558 } 559 break; 560 case element0: 561 if (tok == Encoding.TOK_NAME) { 562 state = element1; 563 return ACTION_ELEMENT_NAME; 564 } 565 break; 566 case element1: 567 switch (tok) { 568 case Encoding.TOK_NAME: 569 if (enc.matchesXMLString(buf, start, end, "EMPTY")) { 570 state = declClose; 571 return ACTION_CONTENT_EMPTY; 572 } 573 if (enc.matchesXMLString(buf, start, end, "ANY")) { 574 state = declClose; 575 return ACTION_CONTENT_ANY; 576 } 577 break; 578 case Encoding.TOK_OPEN_PAREN: 579 state = element2; 580 groupLevel = 1; 581 connector[0] = (byte)0; 582 return ACTION_GROUP_OPEN; 583 } 584 break; 585 case element2: 586 switch (tok) { 587 case Encoding.TOK_POUND_NAME: 588 if (enc.matchesXMLString(buf, 589 start + enc.getMinBytesPerChar(), 590 end, 591 "PCDATA")) { 592 state = element3; 593 return ACTION_CONTENT_PCDATA; 594 } 595 break; 596 case Encoding.TOK_OPEN_PAREN: 597 groupLevel = 2; 598 connector[1] = (byte)0; 599 state = element6; 600 return ACTION_GROUP_OPEN; 601 case Encoding.TOK_NAME: 602 state = element7; 603 return ACTION_CONTENT_ELEMENT; 604 case Encoding.TOK_NAME_QUESTION: 605 state = element7; 606 return ACTION_CONTENT_ELEMENT_OPT; 607 case Encoding.TOK_NAME_ASTERISK: 608 state = element7; 609 return ACTION_CONTENT_ELEMENT_REP; 610 case Encoding.TOK_NAME_PLUS: 611 state = element7; 612 return ACTION_CONTENT_ELEMENT_PLUS; 613 } 614 break; 615 case element3: 616 switch (tok) { 617 case Encoding.TOK_CLOSE_PAREN: 618 case Encoding.TOK_CLOSE_PAREN_ASTERISK: 619 groupLevel = 0; 620 state = declClose; 621 return ACTION_GROUP_CLOSE_REP; 622 case Encoding.TOK_OR: 623 state = element4; 624 return ACTION_GROUP_CHOICE; 625 } 626 break; 627 case element4: 628 if (tok == Encoding.TOK_NAME) { 629 state = element5; 630 return ACTION_CONTENT_ELEMENT; 631 } 632 break; 633 case element5: 634 switch (tok) { 635 case Encoding.TOK_CLOSE_PAREN_ASTERISK: 636 groupLevel = 0; 637 state = declClose; 638 return ACTION_GROUP_CLOSE_REP; 639 case Encoding.TOK_OR: 640 state = element4; 641 return ACTION_GROUP_CHOICE; 642 } 643 break; 644 case element6: 645 switch (tok) { 646 case Encoding.TOK_OPEN_PAREN: 647 if (groupLevel >= connector.length) { 648 byte[] tem = new byte[connector.length << 1]; 649 System.arraycopy(connector, 0, tem, 0, connector.length); 650 connector = tem; 651 } 652 connector[groupLevel] = (byte)0; 653 groupLevel += 1; 654 return ACTION_GROUP_OPEN; 655 case Encoding.TOK_NAME: 656 state = element7; 657 return ACTION_CONTENT_ELEMENT; 658 case Encoding.TOK_NAME_QUESTION: 659 state = element7; 660 return ACTION_CONTENT_ELEMENT_OPT; 661 case Encoding.TOK_NAME_ASTERISK: 662 state = element7; 663 return ACTION_CONTENT_ELEMENT_REP; 664 case Encoding.TOK_NAME_PLUS: 665 state = element7; 666 return ACTION_CONTENT_ELEMENT_PLUS; 667 } 668 break; 669 case element7: 670 switch (tok) { 671 case Encoding.TOK_CLOSE_PAREN: 672 groupLevel -= 1; 673 if (groupLevel == 0) 674 state = declClose; 675 return ACTION_GROUP_CLOSE; 676 case Encoding.TOK_CLOSE_PAREN_ASTERISK: 677 groupLevel -= 1; 678 if (groupLevel == 0) 679 state = declClose; 680 return ACTION_GROUP_CLOSE_REP; 681 case Encoding.TOK_CLOSE_PAREN_QUESTION: 682 groupLevel -= 1; 683 if (groupLevel == 0) 684 state = declClose; 685 return ACTION_GROUP_CLOSE_OPT; 686 case Encoding.TOK_CLOSE_PAREN_PLUS: 687 groupLevel -= 1; 688 if (groupLevel == 0) 689 state = declClose; 690 return ACTION_GROUP_CLOSE_PLUS; 691 case Encoding.TOK_COMMA: 692 state = element6; 693 if (connector[groupLevel - 1] == (byte)'|') 694 break; 695 connector[groupLevel - 1] = (byte)','; 696 return ACTION_GROUP_SEQUENCE; 697 case Encoding.TOK_OR: 698 state = element6; 699 if (connector[groupLevel - 1] == (byte)',') 700 break; 701 connector[groupLevel - 1] = (byte)'|'; 702 return ACTION_GROUP_CHOICE; 703 } 704 break; 705 case declClose: 706 if (tok == Encoding.TOK_DECL_CLOSE) { 707 state = documentEntity ? internalSubset : externalSubset1; 708 return ACTION_DECL_CLOSE; 709 } 710 break; 711 case condSect0: 712 if (tok == Encoding.TOK_NAME) { 713 if (enc.matchesXMLString(buf, start, end, "INCLUDE")) { 714 state = condSect1; 715 return ACTION_NONE; 716 } 717 if (enc.matchesXMLString(buf, start, end, "IGNORE")) { 718 state = condSect2; 719 return ACTION_NONE; 720 } 721 } 722 break; 723 case condSect1: 724 if (tok == Encoding.TOK_OPEN_BRACKET) { 725 state = externalSubset1; 726 includeLevel++; 727 return ACTION_NONE; 728 } 729 break; 730 case condSect2: 731 if (tok == Encoding.TOK_OPEN_BRACKET) { 732 state = externalSubset1; 733 return ACTION_IGNORE_SECT; 734 } 735 break; 736 } 737 if (tok == Encoding.TOK_PROLOG_S) 738 return ACTION_NONE; 739 if (tok == Encoding.TOK_PARAM_ENTITY_REF && !documentEntity) 740 return ACTION_INNER_PARAM_ENTITY_REF; 741 throw new PrologSyntaxException(); 742 } 743 clone()744 public Object clone() { 745 try { 746 PrologParser copy = (PrologParser)super.clone(); 747 copy.connector = new byte[connector.length]; 748 System.arraycopy(connector, 0, copy.connector, 0, groupLevel); 749 return copy; 750 } 751 catch (CloneNotSupportedException e) { 752 throw new InternalError(); 753 } 754 } 755 getGroupLevel()756 public final int getGroupLevel() { 757 return groupLevel; 758 } 759 760 private static final String[] attributeTypes = { 761 "CDATA", 762 "ID", 763 "IDREF", 764 "IDREFS", 765 "ENTITY", 766 "ENTITIES", 767 "NMTOKEN", 768 "NMTOKENS", 769 }; 770 } 771