1 package com.jclark.xml.tok;
2 
3 /**
4  * Parses the prolog of an XML document.
5  * A <code>PrologParser</code> object represents the state of a parse
6  * of the prolog.
7  * It operates on the tokens returned
8  * by <code>Encoding.tokenizeProlog</code>.
9  * It does not build any data structures to represent the information
10  * in the prolog; instead it tells the caller the action needed
11  * for each token.
12  * The state of the parse can be saved by using the <code>clone</code>
13  * method.
14  * @version $Revision: 1.8 $ $Date: 1998/10/30 02:25:20 $
15  */
16 public class PrologParser implements Cloneable {
17   public static final int ACTION_NONE = 0;
18   public static final int ACTION_XML_DECL = ACTION_NONE + 1;
19   public static final int ACTION_TEXT_DECL = ACTION_XML_DECL + 1;
20   public static final int ACTION_PI = ACTION_TEXT_DECL + 1;
21   public static final int ACTION_COMMENT = ACTION_PI + 1;
22   public static final int ACTION_DOCTYPE_NAME = ACTION_COMMENT + 1;
23   public static final int ACTION_DOCTYPE_SYSTEM_ID = ACTION_DOCTYPE_NAME + 1;
24   public static final int ACTION_DOCTYPE_PUBLIC_ID = ACTION_DOCTYPE_SYSTEM_ID + 1;
25   public static final int ACTION_DOCTYPE_SUBSET = ACTION_DOCTYPE_PUBLIC_ID + 1;
26   public static final int ACTION_DOCTYPE_CLOSE = ACTION_DOCTYPE_SUBSET + 1;
27   public static final int ACTION_GENERAL_ENTITY_NAME = ACTION_DOCTYPE_CLOSE + 1;
28   public static final int ACTION_PARAM_ENTITY_NAME = ACTION_GENERAL_ENTITY_NAME + 1;
29   public static final int ACTION_ENTITY_VALUE_WITH_PEREFS = ACTION_PARAM_ENTITY_NAME + 1;
30   public static final int ACTION_ENTITY_VALUE_NO_PEREFS = ACTION_ENTITY_VALUE_WITH_PEREFS + 1;
31   public static final int ACTION_ENTITY_SYSTEM_ID = ACTION_ENTITY_VALUE_NO_PEREFS + 1;
32   public static final int ACTION_ENTITY_PUBLIC_ID = ACTION_ENTITY_SYSTEM_ID + 1;
33   public static final int ACTION_ENTITY_NOTATION_NAME = ACTION_ENTITY_PUBLIC_ID + 1;
34   public static final int ACTION_NOTATION_NAME = ACTION_ENTITY_NOTATION_NAME + 1;
35   public static final int ACTION_NOTATION_SYSTEM_ID = ACTION_NOTATION_NAME + 1;
36   public static final int ACTION_NOTATION_PUBLIC_ID = ACTION_NOTATION_SYSTEM_ID + 1;
37   public static final int ACTION_ATTRIBUTE_NAME = ACTION_NOTATION_PUBLIC_ID + 1;
38   public static final int ACTION_ATTRIBUTE_TYPE_CDATA = ACTION_ATTRIBUTE_NAME + 1;
39   public static final int ACTION_ATTRIBUTE_TYPE_ID = ACTION_ATTRIBUTE_TYPE_CDATA + 1;
40   public static final int ACTION_ATTRIBUTE_TYPE_IDREF = ACTION_ATTRIBUTE_TYPE_ID + 1;
41   public static final int ACTION_ATTRIBUTE_TYPE_IDREFS = ACTION_ATTRIBUTE_TYPE_IDREF + 1;
42   public static final int ACTION_ATTRIBUTE_TYPE_ENTITY = ACTION_ATTRIBUTE_TYPE_IDREFS + 1;
43   public static final int ACTION_ATTRIBUTE_TYPE_ENTITIES = ACTION_ATTRIBUTE_TYPE_ENTITY + 1;
44   public static final int ACTION_ATTRIBUTE_TYPE_NMTOKEN = ACTION_ATTRIBUTE_TYPE_ENTITIES + 1;
45   public static final int ACTION_ATTRIBUTE_TYPE_NMTOKENS = ACTION_ATTRIBUTE_TYPE_NMTOKEN + 1;
46   public static final int ACTION_ATTRIBUTE_ENUM_VALUE = ACTION_ATTRIBUTE_TYPE_NMTOKENS + 1;
47   public static final int ACTION_ATTRIBUTE_NOTATION_VALUE = ACTION_ATTRIBUTE_ENUM_VALUE + 1;
48   public static final int ACTION_ATTLIST_ELEMENT_NAME = ACTION_ATTRIBUTE_NOTATION_VALUE + 1;
49   public static final int ACTION_IMPLIED_ATTRIBUTE_VALUE = ACTION_ATTLIST_ELEMENT_NAME + 1;
50   public static final int ACTION_REQUIRED_ATTRIBUTE_VALUE = ACTION_IMPLIED_ATTRIBUTE_VALUE + 1;
51   public static final int ACTION_DEFAULT_ATTRIBUTE_VALUE = ACTION_REQUIRED_ATTRIBUTE_VALUE + 1;
52   public static final int ACTION_FIXED_ATTRIBUTE_VALUE = ACTION_DEFAULT_ATTRIBUTE_VALUE + 1;
53   public static final int ACTION_ELEMENT_NAME = ACTION_FIXED_ATTRIBUTE_VALUE + 1;
54   public static final int ACTION_CONTENT_ANY = ACTION_ELEMENT_NAME + 1;
55   public static final int ACTION_CONTENT_EMPTY = ACTION_CONTENT_ANY + 1;
56   public static final int ACTION_CONTENT_PCDATA = ACTION_CONTENT_EMPTY + 1;
57   public static final int ACTION_GROUP_OPEN = ACTION_CONTENT_PCDATA + 1;
58   public static final int ACTION_GROUP_CLOSE = ACTION_GROUP_OPEN + 1;
59   public static final int ACTION_GROUP_CLOSE_REP = ACTION_GROUP_CLOSE + 1;
60   public static final int ACTION_GROUP_CLOSE_OPT = ACTION_GROUP_CLOSE_REP + 1;
61   public static final int ACTION_GROUP_CLOSE_PLUS = ACTION_GROUP_CLOSE_OPT + 1;
62   public static final int ACTION_GROUP_CHOICE = ACTION_GROUP_CLOSE_PLUS + 1;
63   public static final int ACTION_GROUP_SEQUENCE = ACTION_GROUP_CHOICE + 1;
64   public static final int ACTION_CONTENT_ELEMENT = ACTION_GROUP_SEQUENCE + 1;
65   public static final int ACTION_CONTENT_ELEMENT_REP = ACTION_CONTENT_ELEMENT + 1;
66   public static final int ACTION_CONTENT_ELEMENT_OPT = ACTION_CONTENT_ELEMENT_REP + 1;
67   public static final int ACTION_CONTENT_ELEMENT_PLUS = ACTION_CONTENT_ELEMENT_OPT + 1;
68   public static final int ACTION_OUTER_PARAM_ENTITY_REF = ACTION_CONTENT_ELEMENT_PLUS + 1;
69   public static final int ACTION_INNER_PARAM_ENTITY_REF = ACTION_OUTER_PARAM_ENTITY_REF + 1;
70   public static final int ACTION_IGNORE_SECT = ACTION_INNER_PARAM_ENTITY_REF + 1;
71   public static final int ACTION_DECL_CLOSE = ACTION_IGNORE_SECT + 1;
72 
73   private static final byte prolog0 = 0;
74   private static final byte prolog1 = prolog0 + 1;
75   private static final byte prolog2 = prolog1 + 1;
76   private static final byte doctype0 = prolog2 + 1;
77   private static final byte doctype1 = doctype0 + 1;
78   private static final byte doctype2 = doctype1 + 1;
79   private static final byte doctype3 = doctype2 + 1;
80   private static final byte doctype4 = doctype3 + 1;
81   private static final byte doctype5 = doctype4 + 1;
82   private static final byte internalSubset = doctype5 + 1;
83   private static final byte entity0 = internalSubset + 1;
84   private static final byte entity1 = entity0 + 1;
85   private static final byte entity2 = entity1 + 1;
86   private static final byte entity3 = entity2 + 1;
87   private static final byte entity4 = entity3 + 1;
88   private static final byte entity5 = entity4 + 1;
89   private static final byte entity6 = entity5 + 1;
90   private static final byte entity7 = entity6 + 1;
91   private static final byte entity8 = entity7 + 1;
92   private static final byte entity9 = entity8 + 1;
93   private static final byte notation0 = entity9 + 1;
94   private static final byte notation1 = notation0 + 1;
95   private static final byte notation2 = notation1 + 1;
96   private static final byte notation3 = notation2 + 1;
97   private static final byte notation4 = notation3 + 1;
98   private static final byte attlist0 = notation4 + 1;
99   private static final byte attlist1 = attlist0 + 1;
100   private static final byte attlist2 = attlist1 + 1;
101   private static final byte attlist3 = attlist2 + 1;
102   private static final byte attlist4 = attlist3 + 1;
103   private static final byte attlist5 = attlist4 + 1;
104   private static final byte attlist6 = attlist5 + 1;
105   private static final byte attlist7 = attlist6 + 1;
106   private static final byte attlist8 = attlist7 + 1;
107   private static final byte attlist9 = attlist8 + 1;
108   private static final byte element0 = attlist9 + 1;
109   private static final byte element1 = element0 + 1;
110   private static final byte element2 = element1 + 1;
111   private static final byte element3 = element2 + 1;
112   private static final byte element4 = element3 + 1;
113   private static final byte element5 = element4 + 1;
114   private static final byte element6 = element5 + 1;
115   private static final byte element7 = element6 + 1;
116   private static final byte declClose = element7 + 1;
117   private static final byte externalSubset0 = declClose + 1;
118   private static final byte externalSubset1 = externalSubset0 + 1;
119   private static final byte condSect0 = externalSubset1 + 1;
120   private static final byte condSect1 = condSect0 + 1;
121   private static final byte condSect2 = condSect1 + 1;
122 
123   private byte state;
124   private int groupLevel;
125   private int includeLevel;
126   private byte connector[] = new byte[2];
127   private boolean documentEntity;
128 
129   public static final byte PROLOG = 0;
130   public static final byte EXTERNAL_ENTITY = 1;
131   public static final byte INTERNAL_ENTITY = 2;
132 
PrologParser(byte type)133   public PrologParser(byte type) {
134     switch (type) {
135     case PROLOG:
136       documentEntity = true;
137       state = prolog0;
138       break;
139     case EXTERNAL_ENTITY:
140       documentEntity = false;
141       state = externalSubset0;
142       break;
143     case INTERNAL_ENTITY:
144       documentEntity = false;
145       state = externalSubset1;
146       break;
147     default:
148       throw new IllegalArgumentException();
149     }
150   }
151 
end()152   public final void end() throws PrologSyntaxException {
153     switch (state) {
154     case prolog0:
155     case prolog1:
156     case prolog2:
157       break;
158     case externalSubset0:
159     case externalSubset1:
160       if (includeLevel == 0)
161 	break;
162       /* fall through */
163     default:
164       throw new PrologSyntaxException();
165     }
166   }
167 
action(int tok, byte[] buf, int start, int end, Encoding enc)168   public int action(int tok, byte[] buf, int start, int end, Encoding enc) throws PrologSyntaxException {
169     switch (state) {
170     case prolog0:
171       state = prolog1;
172       if (tok == Encoding.TOK_XML_DECL)
173 	return ACTION_XML_DECL;
174       /* fall through */
175     case prolog1:
176       if (tok == Encoding.TOK_DECL_OPEN
177 	  && enc.matchesXMLString(buf,
178 				  start + 2 * enc.getMinBytesPerChar(),
179 				  end,
180 				  "DOCTYPE")) {
181 	state = doctype0;
182 	return ACTION_NONE;
183       }
184       /* fall through */
185     case prolog2:
186       switch (tok) {
187       case Encoding.TOK_PI:
188 	return ACTION_PI;
189       case Encoding.TOK_COMMENT:
190 	return ACTION_COMMENT;
191       }
192       break;
193     case doctype0:
194       if (tok == Encoding.TOK_NAME) {
195 	state = doctype1;
196 	return ACTION_DOCTYPE_NAME;
197       }
198       break;
199     case doctype1:
200       switch (tok) {
201       case Encoding.TOK_OPEN_BRACKET:
202 	state = internalSubset;
203 	return ACTION_DOCTYPE_SUBSET;
204       case Encoding.TOK_DECL_CLOSE:
205 	state = prolog2;
206 	return ACTION_DOCTYPE_CLOSE;
207       case Encoding.TOK_NAME:
208 	if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
209 	  state = doctype3;
210 	  return ACTION_NONE;
211 	}
212 	if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
213 	  state = doctype2;
214 	  return ACTION_NONE;
215 	}
216 	break;
217       }
218       break;
219     case doctype2:
220       if (tok == Encoding.TOK_LITERAL) {
221 	state = doctype3;
222 	return ACTION_DOCTYPE_PUBLIC_ID;
223       }
224       break;
225     case doctype3:
226       if (tok == Encoding.TOK_LITERAL) {
227 	state = doctype4;
228 	return ACTION_DOCTYPE_SYSTEM_ID;
229       }
230       break;
231     case doctype4:
232       switch (tok) {
233       case Encoding.TOK_OPEN_BRACKET:
234 	state = internalSubset;
235 	return ACTION_DOCTYPE_SUBSET;
236       case Encoding.TOK_DECL_CLOSE:
237 	state = prolog2;
238 	return ACTION_DOCTYPE_CLOSE;
239       }
240       break;
241     case doctype5:
242       if (tok == Encoding.TOK_DECL_CLOSE) {
243 	state = prolog2;
244 	return ACTION_DOCTYPE_CLOSE;
245       }
246       break;
247     case externalSubset0:
248       state = externalSubset1;
249       if (tok == Encoding.TOK_XML_DECL)
250 	return ACTION_TEXT_DECL;
251       /* fall through */
252     case externalSubset1:
253       switch (tok) {
254       case Encoding.TOK_COND_SECT_OPEN:
255 	state = condSect0;
256 	return ACTION_NONE;
257       case Encoding.TOK_COND_SECT_CLOSE:
258 	if (includeLevel == 0)
259 	  break;
260 	--includeLevel;
261 	return ACTION_NONE;
262       case Encoding.TOK_CLOSE_BRACKET:
263 	throw new PrologSyntaxException();
264       }
265       /* fall through */
266     case internalSubset:
267       switch (tok) {
268       case Encoding.TOK_DECL_OPEN:
269 	if (enc.matchesXMLString(buf,
270 				 start + 2 * enc.getMinBytesPerChar(),
271 				 end,
272 				 "ENTITY")) {
273 	  state = entity0;
274 	  return ACTION_NONE;
275 	}
276 	if (enc.matchesXMLString(buf,
277 				 start + 2 * enc.getMinBytesPerChar(),
278 				 end,
279 				 "ATTLIST")) {
280 	  state = attlist0;
281 	  return ACTION_NONE;
282 	}
283 	if (enc.matchesXMLString(buf,
284 				 start + 2 * enc.getMinBytesPerChar(),
285 				 end,
286 				 "ELEMENT")) {
287 	  state = element0;
288 	  return ACTION_NONE;
289 	}
290 	if (enc.matchesXMLString(buf,
291 				 start + 2 * enc.getMinBytesPerChar(),
292 				 end,
293 				 "NOTATION")) {
294 	  state = notation0;
295 	  return ACTION_NONE;
296 	}
297 	break;
298       case Encoding.TOK_PI:
299 	return ACTION_PI;
300       case Encoding.TOK_COMMENT:
301 	return ACTION_COMMENT;
302       case Encoding.TOK_PARAM_ENTITY_REF:
303 	return ACTION_OUTER_PARAM_ENTITY_REF;
304       case Encoding.TOK_CLOSE_BRACKET:
305 	state = doctype5;
306 	return ACTION_NONE;
307       }
308       break;
309     case entity0:
310       switch (tok) {
311       case Encoding.TOK_PERCENT:
312 	state = entity1;
313 	return ACTION_NONE;
314       case Encoding.TOK_NAME:
315 	state = entity2;
316 	return ACTION_GENERAL_ENTITY_NAME;
317       }
318       break;
319     case entity1:
320       if (tok == Encoding.TOK_NAME) {
321 	state = entity7;
322 	return ACTION_PARAM_ENTITY_NAME;
323       }
324       break;
325     case entity2:
326       switch (tok) {
327       case Encoding.TOK_NAME:
328 	if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
329 	  state = entity4;
330 	  return ACTION_NONE;
331 	}
332 	if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
333 	  state = entity3;
334 	  return ACTION_NONE;
335 	}
336 	break;
337       case Encoding.TOK_LITERAL:
338 	state = declClose;
339 	return documentEntity ? ACTION_ENTITY_VALUE_NO_PEREFS : ACTION_ENTITY_VALUE_WITH_PEREFS;
340       }
341       break;
342     case entity3:
343       if (tok == Encoding.TOK_LITERAL) {
344 	state = entity4;
345 	return ACTION_ENTITY_PUBLIC_ID;
346       }
347       break;
348     case entity4:
349       if (tok == Encoding.TOK_LITERAL) {
350 	state = entity5;
351 	return ACTION_ENTITY_SYSTEM_ID;
352       }
353       break;
354     case entity5:
355       switch (tok) {
356       case Encoding.TOK_DECL_CLOSE:
357 	state = documentEntity ? internalSubset : externalSubset1;
358 	return ACTION_DECL_CLOSE;
359       case Encoding.TOK_NAME:
360 	if (enc.matchesXMLString(buf, start, end, "NDATA")) {
361 	  state = entity6;
362 	  return ACTION_NONE;
363 	}
364 	break;
365       }
366       break;
367     case entity6:
368       switch (tok) {
369       case Encoding.TOK_NAME:
370 	state = declClose;
371 	return ACTION_ENTITY_NOTATION_NAME;
372       }
373       break;
374     case entity7:
375       switch (tok) {
376       case Encoding.TOK_NAME:
377 	if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
378 	  state = entity9;
379 	  return ACTION_NONE;
380 	}
381 	if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
382 	  state = entity8;
383 	  return ACTION_NONE;
384 	}
385 	break;
386       case Encoding.TOK_LITERAL:
387 	state = declClose;
388 	return documentEntity ? ACTION_ENTITY_VALUE_NO_PEREFS : ACTION_ENTITY_VALUE_WITH_PEREFS;
389       }
390       break;
391     case entity8:
392       if (tok == Encoding.TOK_LITERAL) {
393 	state = entity9;
394 	return ACTION_ENTITY_PUBLIC_ID;
395       }
396       break;
397     case entity9:
398       if (tok == Encoding.TOK_LITERAL) {
399 	state = declClose;
400 	return ACTION_ENTITY_SYSTEM_ID;
401       }
402       break;
403     case notation0:
404       if (tok == Encoding.TOK_NAME) {
405 	state = notation1;
406 	return ACTION_NOTATION_NAME;
407       }
408       break;
409     case notation1:
410       switch (tok) {
411       case Encoding.TOK_NAME:
412 	if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
413 	  state = notation3;
414 	  return ACTION_NONE;
415 	}
416 	if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
417 	  state = notation2;
418 	  return ACTION_NONE;
419 	}
420 	break;
421       }
422       break;
423     case notation2:
424       if (tok == Encoding.TOK_LITERAL) {
425 	state = notation4;
426 	return ACTION_NOTATION_PUBLIC_ID;
427       }
428       break;
429     case notation3:
430       if (tok == Encoding.TOK_LITERAL) {
431 	state = declClose;
432 	return ACTION_NOTATION_SYSTEM_ID;
433       }
434       break;
435     case notation4:
436       switch (tok) {
437       case Encoding.TOK_LITERAL:
438 	state = declClose;
439 	return ACTION_NOTATION_SYSTEM_ID;
440       case Encoding.TOK_DECL_CLOSE:
441 	state = documentEntity ? internalSubset : externalSubset1;
442 	return ACTION_DECL_CLOSE;
443       }
444       break;
445     case attlist0:
446       if (tok == Encoding.TOK_NAME) {
447 	state = attlist1;
448 	return ACTION_ATTLIST_ELEMENT_NAME;
449       }
450       break;
451     case attlist1:
452       switch (tok) {
453       case Encoding.TOK_DECL_CLOSE:
454 	state = documentEntity ? internalSubset : externalSubset1;
455 	return ACTION_NONE;
456       case Encoding.TOK_NAME:
457 	state = attlist2;
458 	return ACTION_ATTRIBUTE_NAME;
459       }
460       break;
461     case attlist2:
462       switch (tok) {
463       case Encoding.TOK_NAME:
464 	for (int i = 0; i < attributeTypes.length; i++)
465 	  if (enc.matchesXMLString(buf, start, end, attributeTypes[i])) {
466 	    state = attlist8;
467 	    return ACTION_ATTRIBUTE_TYPE_CDATA + i;
468 	  }
469 	if (enc.matchesXMLString(buf, start, end, "NOTATION")) {
470 	  state = attlist5;
471 	  return ACTION_NONE;
472 	}
473 	break;
474       case Encoding.TOK_OPEN_PAREN:
475 	groupLevel = 1;
476 	state = attlist3;
477 	return ACTION_NONE;
478       }
479       break;
480     case attlist3:
481       switch (tok) {
482       case Encoding.TOK_NMTOKEN:
483       case Encoding.TOK_NAME:
484 	state = attlist4;
485 	return ACTION_ATTRIBUTE_ENUM_VALUE;
486       }
487       break;
488     case attlist4:
489       switch (tok) {
490       case Encoding.TOK_CLOSE_PAREN:
491 	state = attlist8;
492 	groupLevel = 0;
493 	return ACTION_NONE;
494       case Encoding.TOK_OR:
495 	state = attlist3;
496 	return ACTION_NONE;
497       }
498       break;
499     case attlist5:
500       if (tok == Encoding.TOK_OPEN_PAREN) {
501 	state = attlist6;
502 	groupLevel = 1;
503 	return ACTION_NONE;
504       }
505       break;
506     case attlist6:
507       if (tok == Encoding.TOK_NAME) {
508 	state = attlist7;
509 	return ACTION_ATTRIBUTE_NOTATION_VALUE;
510       }
511       break;
512     case attlist7:
513       switch (tok) {
514       case Encoding.TOK_CLOSE_PAREN:
515 	groupLevel = 0;
516 	state = attlist8;
517 	return ACTION_NONE;
518       case Encoding.TOK_OR:
519 	state = attlist6;
520 	return ACTION_NONE;
521       }
522       break;
523       /* default value */
524     case attlist8:
525       switch (tok) {
526       case Encoding.TOK_POUND_NAME:
527 	if (enc.matchesXMLString(buf,
528 				 start + enc.getMinBytesPerChar(),
529 				 end,
530 				 "IMPLIED")) {
531 	  state = attlist1;
532 	  return ACTION_IMPLIED_ATTRIBUTE_VALUE;
533 	}
534 	if (enc.matchesXMLString(buf,
535 				 start + enc.getMinBytesPerChar(),
536 				 end,
537 				 "REQUIRED")) {
538 	  state = attlist1;
539 	  return ACTION_REQUIRED_ATTRIBUTE_VALUE;
540 	}
541 	if (enc.matchesXMLString(buf,
542 				 start + enc.getMinBytesPerChar(),
543 				 end,
544 				 "FIXED")) {
545 	  state = attlist9;
546 	  return ACTION_NONE;
547 	}
548 	break;
549       case Encoding.TOK_LITERAL:
550 	state = attlist1;
551 	return ACTION_DEFAULT_ATTRIBUTE_VALUE;
552       }
553       break;
554     case attlist9:
555       if (tok == Encoding.TOK_LITERAL) {
556 	state = attlist1;
557 	return ACTION_FIXED_ATTRIBUTE_VALUE;
558       }
559       break;
560     case element0:
561       if (tok == Encoding.TOK_NAME) {
562 	state = element1;
563 	return ACTION_ELEMENT_NAME;
564       }
565       break;
566     case element1:
567       switch (tok) {
568       case Encoding.TOK_NAME:
569 	if (enc.matchesXMLString(buf, start, end, "EMPTY")) {
570 	  state = declClose;
571 	  return ACTION_CONTENT_EMPTY;
572 	}
573 	if (enc.matchesXMLString(buf, start, end, "ANY")) {
574 	  state = declClose;
575 	  return ACTION_CONTENT_ANY;
576 	}
577 	break;
578       case Encoding.TOK_OPEN_PAREN:
579 	state = element2;
580 	groupLevel = 1;
581 	connector[0] = (byte)0;
582 	return ACTION_GROUP_OPEN;
583       }
584       break;
585     case element2:
586       switch (tok) {
587       case Encoding.TOK_POUND_NAME:
588 	if (enc.matchesXMLString(buf,
589 				 start + enc.getMinBytesPerChar(),
590 				 end,
591 				 "PCDATA")) {
592 	  state = element3;
593 	  return ACTION_CONTENT_PCDATA;
594 	}
595 	break;
596       case Encoding.TOK_OPEN_PAREN:
597 	groupLevel = 2;
598 	connector[1] = (byte)0;
599 	state = element6;
600 	return ACTION_GROUP_OPEN;
601       case Encoding.TOK_NAME:
602 	state = element7;
603 	return ACTION_CONTENT_ELEMENT;
604       case Encoding.TOK_NAME_QUESTION:
605 	state = element7;
606 	return ACTION_CONTENT_ELEMENT_OPT;
607       case Encoding.TOK_NAME_ASTERISK:
608 	state = element7;
609 	return ACTION_CONTENT_ELEMENT_REP;
610       case Encoding.TOK_NAME_PLUS:
611 	state = element7;
612 	return ACTION_CONTENT_ELEMENT_PLUS;
613       }
614       break;
615     case element3:
616       switch (tok) {
617       case Encoding.TOK_CLOSE_PAREN:
618       case Encoding.TOK_CLOSE_PAREN_ASTERISK:
619 	groupLevel = 0;
620 	state = declClose;
621 	return ACTION_GROUP_CLOSE_REP;
622       case Encoding.TOK_OR:
623 	state = element4;
624 	return ACTION_GROUP_CHOICE;
625       }
626       break;
627     case element4:
628       if (tok == Encoding.TOK_NAME) {
629 	state = element5;
630 	return ACTION_CONTENT_ELEMENT;
631       }
632       break;
633     case element5:
634       switch (tok) {
635       case Encoding.TOK_CLOSE_PAREN_ASTERISK:
636 	groupLevel = 0;
637 	state = declClose;
638 	return ACTION_GROUP_CLOSE_REP;
639       case Encoding.TOK_OR:
640 	state = element4;
641 	return ACTION_GROUP_CHOICE;
642       }
643       break;
644     case element6:
645       switch (tok) {
646       case Encoding.TOK_OPEN_PAREN:
647 	if (groupLevel >= connector.length) {
648 	  byte[] tem = new byte[connector.length << 1];
649 	  System.arraycopy(connector, 0, tem, 0, connector.length);
650 	  connector = tem;
651 	}
652 	connector[groupLevel] = (byte)0;
653 	groupLevel += 1;
654 	return ACTION_GROUP_OPEN;
655       case Encoding.TOK_NAME:
656 	state = element7;
657 	return ACTION_CONTENT_ELEMENT;
658       case Encoding.TOK_NAME_QUESTION:
659 	state = element7;
660 	return ACTION_CONTENT_ELEMENT_OPT;
661       case Encoding.TOK_NAME_ASTERISK:
662 	state = element7;
663 	return ACTION_CONTENT_ELEMENT_REP;
664       case Encoding.TOK_NAME_PLUS:
665 	state = element7;
666 	return ACTION_CONTENT_ELEMENT_PLUS;
667       }
668       break;
669     case element7:
670       switch (tok) {
671       case Encoding.TOK_CLOSE_PAREN:
672 	groupLevel -= 1;
673 	if (groupLevel == 0)
674 	  state = declClose;
675 	return ACTION_GROUP_CLOSE;
676       case Encoding.TOK_CLOSE_PAREN_ASTERISK:
677 	groupLevel -= 1;
678 	if (groupLevel == 0)
679 	  state = declClose;
680 	return ACTION_GROUP_CLOSE_REP;
681       case Encoding.TOK_CLOSE_PAREN_QUESTION:
682 	groupLevel -= 1;
683 	if (groupLevel == 0)
684 	  state = declClose;
685 	return ACTION_GROUP_CLOSE_OPT;
686       case Encoding.TOK_CLOSE_PAREN_PLUS:
687 	groupLevel -= 1;
688 	if (groupLevel == 0)
689 	  state = declClose;
690 	return ACTION_GROUP_CLOSE_PLUS;
691       case Encoding.TOK_COMMA:
692 	state = element6;
693 	if (connector[groupLevel - 1] == (byte)'|')
694 	  break;
695 	connector[groupLevel - 1] = (byte)',';
696 	return ACTION_GROUP_SEQUENCE;
697       case Encoding.TOK_OR:
698 	state = element6;
699 	if (connector[groupLevel - 1] == (byte)',')
700 	  break;
701 	connector[groupLevel - 1] = (byte)'|';
702 	return ACTION_GROUP_CHOICE;
703       }
704       break;
705     case declClose:
706       if (tok == Encoding.TOK_DECL_CLOSE) {
707 	state = documentEntity ? internalSubset : externalSubset1;
708 	return ACTION_DECL_CLOSE;
709       }
710       break;
711     case condSect0:
712       if (tok == Encoding.TOK_NAME) {
713 	if (enc.matchesXMLString(buf, start, end, "INCLUDE")) {
714 	  state = condSect1;
715 	  return ACTION_NONE;
716 	}
717 	if (enc.matchesXMLString(buf, start, end, "IGNORE")) {
718 	  state = condSect2;
719 	  return ACTION_NONE;
720 	}
721       }
722       break;
723     case condSect1:
724       if (tok == Encoding.TOK_OPEN_BRACKET) {
725 	state = externalSubset1;
726 	includeLevel++;
727 	return ACTION_NONE;
728       }
729       break;
730     case condSect2:
731       if (tok == Encoding.TOK_OPEN_BRACKET) {
732 	state = externalSubset1;
733 	return ACTION_IGNORE_SECT;
734       }
735       break;
736     }
737     if (tok == Encoding.TOK_PROLOG_S)
738       return ACTION_NONE;
739     if (tok == Encoding.TOK_PARAM_ENTITY_REF && !documentEntity)
740       return ACTION_INNER_PARAM_ENTITY_REF;
741     throw new PrologSyntaxException();
742   }
743 
clone()744   public Object clone() {
745     try {
746       PrologParser copy = (PrologParser)super.clone();
747       copy.connector = new byte[connector.length];
748       System.arraycopy(connector, 0, copy.connector, 0, groupLevel);
749       return copy;
750     }
751     catch (CloneNotSupportedException e) {
752       throw new InternalError();
753     }
754   }
755 
getGroupLevel()756   public final int getGroupLevel() {
757     return groupLevel;
758   }
759 
760   private static final String[] attributeTypes = {
761     "CDATA",
762     "ID",
763     "IDREF",
764     "IDREFS",
765     "ENTITY",
766     "ENTITIES",
767     "NMTOKEN",
768     "NMTOKENS",
769   };
770 }
771