1 /*
2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file COPYING for copying permission.
4 */
5 
6 
7 #ifdef COMPILED_FROM_DSP
8 #  include "winconfig.h"
9 #else
10 #  include <config.h>
11 #endif /* ndef COMPILED_FROM_DSP */
12 
13 #include "xmlrole.h"
14 #include "ascii.h"
15 
16 /* Doesn't check:
17 
18  that ,| are not mixed in a model group
19  content of literals
20 
21 */
22 
23 static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
24 static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
25 static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
26 static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
27 static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
28 static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
29 static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
30 static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
31 static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
32 static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
33 static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
34 static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
35 static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
36 static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
37 static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
38 static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
39 static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
40 static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
41 static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
42 static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
43 static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
44 static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
45 static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
46 
47 #ifndef MIN_BYTES_PER_CHAR
48 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
49 #endif
50 
51 #ifdef XML_DTD
52 #define setTopLevel(state) \
53   ((state)->handler = ((state)->documentEntity \
54                        ? internalSubset \
55                        : externalSubset1))
56 #else /* not XML_DTD */
57 #define setTopLevel(state) ((state)->handler = internalSubset)
58 #endif /* not XML_DTD */
59 
60 typedef int PROLOG_HANDLER(PROLOG_STATE *state,
61 			   int tok,
62 			   const char *ptr,
63 			   const char *end,
64 			   const ENCODING *enc);
65 
66 static PROLOG_HANDLER
67   prolog0, prolog1, prolog2,
68   doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
69   internalSubset,
70   entity0, entity1, entity2, entity3, entity4, entity5, entity6,
71   entity7, entity8, entity9,
72   notation0, notation1, notation2, notation3, notation4,
73   attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
74   attlist7, attlist8, attlist9,
75   element0, element1, element2, element3, element4, element5, element6,
76   element7,
77 #ifdef XML_DTD
78   externalSubset0, externalSubset1,
79   condSect0, condSect1, condSect2,
80 #endif /* XML_DTD */
81   declClose,
82   error;
83 
84 static
85 int common(PROLOG_STATE *state, int tok);
86 
87 static
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)88 int prolog0(PROLOG_STATE *state,
89 	    int tok,
90 	    const char *ptr,
91 	    const char *end,
92 	    const ENCODING *enc)
93 {
94   switch (tok) {
95   case XML_TOK_PROLOG_S:
96     state->handler = prolog1;
97     return XML_ROLE_NONE;
98   case XML_TOK_XML_DECL:
99     state->handler = prolog1;
100     return XML_ROLE_XML_DECL;
101   case XML_TOK_PI:
102     state->handler = prolog1;
103     return XML_ROLE_NONE;
104   case XML_TOK_COMMENT:
105     state->handler = prolog1;
106   case XML_TOK_BOM:
107     return XML_ROLE_NONE;
108   case XML_TOK_DECL_OPEN:
109     if (!XmlNameMatchesAscii(enc,
110 			     ptr + 2 * MIN_BYTES_PER_CHAR(enc),
111 			     end,
112 			     KW_DOCTYPE))
113       break;
114     state->handler = doctype0;
115     return XML_ROLE_NONE;
116   case XML_TOK_INSTANCE_START:
117     state->handler = error;
118     return XML_ROLE_INSTANCE_START;
119   }
120   return common(state, tok);
121 }
122 
123 static
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)124 int prolog1(PROLOG_STATE *state,
125 	    int tok,
126 	    const char *ptr,
127 	    const char *end,
128 	    const ENCODING *enc)
129 {
130   switch (tok) {
131   case XML_TOK_PROLOG_S:
132     return XML_ROLE_NONE;
133   case XML_TOK_PI:
134   case XML_TOK_COMMENT:
135   case XML_TOK_BOM:
136     return XML_ROLE_NONE;
137   case XML_TOK_DECL_OPEN:
138     if (!XmlNameMatchesAscii(enc,
139 			     ptr + 2 * MIN_BYTES_PER_CHAR(enc),
140 			     end,
141 			     KW_DOCTYPE))
142       break;
143     state->handler = doctype0;
144     return XML_ROLE_NONE;
145   case XML_TOK_INSTANCE_START:
146     state->handler = error;
147     return XML_ROLE_INSTANCE_START;
148   }
149   return common(state, tok);
150 }
151 
152 static
prolog2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)153 int prolog2(PROLOG_STATE *state,
154 	    int tok,
155 	    const char *ptr,
156 	    const char *end,
157 	    const ENCODING *enc)
158 {
159   switch (tok) {
160   case XML_TOK_PROLOG_S:
161     return XML_ROLE_NONE;
162   case XML_TOK_PI:
163   case XML_TOK_COMMENT:
164     return XML_ROLE_NONE;
165   case XML_TOK_INSTANCE_START:
166     state->handler = error;
167     return XML_ROLE_INSTANCE_START;
168   }
169   return common(state, tok);
170 }
171 
172 static
doctype0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)173 int doctype0(PROLOG_STATE *state,
174 	     int tok,
175 	     const char *ptr,
176 	     const char *end,
177 	     const ENCODING *enc)
178 {
179   switch (tok) {
180   case XML_TOK_PROLOG_S:
181     return XML_ROLE_NONE;
182   case XML_TOK_NAME:
183   case XML_TOK_PREFIXED_NAME:
184     state->handler = doctype1;
185     return XML_ROLE_DOCTYPE_NAME;
186   }
187   return common(state, tok);
188 }
189 
190 static
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)191 int doctype1(PROLOG_STATE *state,
192 	     int tok,
193 	     const char *ptr,
194 	     const char *end,
195 	     const ENCODING *enc)
196 {
197   switch (tok) {
198   case XML_TOK_PROLOG_S:
199     return XML_ROLE_NONE;
200   case XML_TOK_OPEN_BRACKET:
201     state->handler = internalSubset;
202     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
203   case XML_TOK_DECL_CLOSE:
204     state->handler = prolog2;
205     return XML_ROLE_DOCTYPE_CLOSE;
206   case XML_TOK_NAME:
207     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
208       state->handler = doctype3;
209       return XML_ROLE_NONE;
210     }
211     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
212       state->handler = doctype2;
213       return XML_ROLE_NONE;
214     }
215     break;
216   }
217   return common(state, tok);
218 }
219 
220 static
doctype2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)221 int doctype2(PROLOG_STATE *state,
222 	     int tok,
223 	     const char *ptr,
224 	     const char *end,
225 	     const ENCODING *enc)
226 {
227   switch (tok) {
228   case XML_TOK_PROLOG_S:
229     return XML_ROLE_NONE;
230   case XML_TOK_LITERAL:
231     state->handler = doctype3;
232     return XML_ROLE_DOCTYPE_PUBLIC_ID;
233   }
234   return common(state, tok);
235 }
236 
237 static
doctype3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)238 int doctype3(PROLOG_STATE *state,
239 	     int tok,
240 	     const char *ptr,
241 	     const char *end,
242 	     const ENCODING *enc)
243 {
244   switch (tok) {
245   case XML_TOK_PROLOG_S:
246     return XML_ROLE_NONE;
247   case XML_TOK_LITERAL:
248     state->handler = doctype4;
249     return XML_ROLE_DOCTYPE_SYSTEM_ID;
250   }
251   return common(state, tok);
252 }
253 
254 static
doctype4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)255 int doctype4(PROLOG_STATE *state,
256 	     int tok,
257 	     const char *ptr,
258 	     const char *end,
259 	     const ENCODING *enc)
260 {
261   switch (tok) {
262   case XML_TOK_PROLOG_S:
263     return XML_ROLE_NONE;
264   case XML_TOK_OPEN_BRACKET:
265     state->handler = internalSubset;
266     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
267   case XML_TOK_DECL_CLOSE:
268     state->handler = prolog2;
269     return XML_ROLE_DOCTYPE_CLOSE;
270   }
271   return common(state, tok);
272 }
273 
274 static
doctype5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)275 int doctype5(PROLOG_STATE *state,
276 	     int tok,
277 	     const char *ptr,
278 	     const char *end,
279 	     const ENCODING *enc)
280 {
281   switch (tok) {
282   case XML_TOK_PROLOG_S:
283     return XML_ROLE_NONE;
284   case XML_TOK_DECL_CLOSE:
285     state->handler = prolog2;
286     return XML_ROLE_DOCTYPE_CLOSE;
287   }
288   return common(state, tok);
289 }
290 
291 static
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)292 int internalSubset(PROLOG_STATE *state,
293 		   int tok,
294 		   const char *ptr,
295 		   const char *end,
296 		   const ENCODING *enc)
297 {
298   switch (tok) {
299   case XML_TOK_PROLOG_S:
300     return XML_ROLE_NONE;
301   case XML_TOK_DECL_OPEN:
302     if (XmlNameMatchesAscii(enc,
303 			    ptr + 2 * MIN_BYTES_PER_CHAR(enc),
304 			    end,
305 			    KW_ENTITY)) {
306       state->handler = entity0;
307       return XML_ROLE_NONE;
308     }
309     if (XmlNameMatchesAscii(enc,
310 			    ptr + 2 * MIN_BYTES_PER_CHAR(enc),
311 			    end,
312 			    KW_ATTLIST)) {
313       state->handler = attlist0;
314       return XML_ROLE_NONE;
315     }
316     if (XmlNameMatchesAscii(enc,
317 			    ptr + 2 * MIN_BYTES_PER_CHAR(enc),
318 			    end,
319 			    KW_ELEMENT)) {
320       state->handler = element0;
321       return XML_ROLE_NONE;
322     }
323     if (XmlNameMatchesAscii(enc,
324 			    ptr + 2 * MIN_BYTES_PER_CHAR(enc),
325 			    end,
326 			    KW_NOTATION)) {
327       state->handler = notation0;
328       return XML_ROLE_NONE;
329     }
330     break;
331   case XML_TOK_PI:
332   case XML_TOK_COMMENT:
333     return XML_ROLE_NONE;
334   case XML_TOK_PARAM_ENTITY_REF:
335     return XML_ROLE_PARAM_ENTITY_REF;
336   case XML_TOK_CLOSE_BRACKET:
337     state->handler = doctype5;
338     return XML_ROLE_NONE;
339   }
340   return common(state, tok);
341 }
342 
343 #ifdef XML_DTD
344 
345 static
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)346 int externalSubset0(PROLOG_STATE *state,
347 		    int tok,
348 		    const char *ptr,
349 		    const char *end,
350 		    const ENCODING *enc)
351 {
352   state->handler = externalSubset1;
353   if (tok == XML_TOK_XML_DECL)
354     return XML_ROLE_TEXT_DECL;
355   return externalSubset1(state, tok, ptr, end, enc);
356 }
357 
358 static
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)359 int externalSubset1(PROLOG_STATE *state,
360 		    int tok,
361 		    const char *ptr,
362 		    const char *end,
363 		    const ENCODING *enc)
364 {
365   switch (tok) {
366   case XML_TOK_COND_SECT_OPEN:
367     state->handler = condSect0;
368     return XML_ROLE_NONE;
369   case XML_TOK_COND_SECT_CLOSE:
370     if (state->includeLevel == 0)
371       break;
372     state->includeLevel -= 1;
373     return XML_ROLE_NONE;
374   case XML_TOK_PROLOG_S:
375     return XML_ROLE_NONE;
376   case XML_TOK_CLOSE_BRACKET:
377     break;
378   case XML_TOK_NONE:
379     if (state->includeLevel)
380       break;
381     return XML_ROLE_NONE;
382   default:
383     return internalSubset(state, tok, ptr, end, enc);
384   }
385   return common(state, tok);
386 }
387 
388 #endif /* XML_DTD */
389 
390 static
entity0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)391 int entity0(PROLOG_STATE *state,
392 	    int tok,
393 	    const char *ptr,
394 	    const char *end,
395 	    const ENCODING *enc)
396 {
397   switch (tok) {
398   case XML_TOK_PROLOG_S:
399     return XML_ROLE_NONE;
400   case XML_TOK_PERCENT:
401     state->handler = entity1;
402     return XML_ROLE_NONE;
403   case XML_TOK_NAME:
404     state->handler = entity2;
405     return XML_ROLE_GENERAL_ENTITY_NAME;
406   }
407   return common(state, tok);
408 }
409 
410 static
entity1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)411 int entity1(PROLOG_STATE *state,
412 	    int tok,
413 	    const char *ptr,
414 	    const char *end,
415 	    const ENCODING *enc)
416 {
417   switch (tok) {
418   case XML_TOK_PROLOG_S:
419     return XML_ROLE_NONE;
420   case XML_TOK_NAME:
421     state->handler = entity7;
422     return XML_ROLE_PARAM_ENTITY_NAME;
423   }
424   return common(state, tok);
425 }
426 
427 static
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)428 int entity2(PROLOG_STATE *state,
429 	    int tok,
430 	    const char *ptr,
431 	    const char *end,
432 	    const ENCODING *enc)
433 {
434   switch (tok) {
435   case XML_TOK_PROLOG_S:
436     return XML_ROLE_NONE;
437   case XML_TOK_NAME:
438     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
439       state->handler = entity4;
440       return XML_ROLE_NONE;
441     }
442     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
443       state->handler = entity3;
444       return XML_ROLE_NONE;
445     }
446     break;
447   case XML_TOK_LITERAL:
448     state->handler = declClose;
449     return XML_ROLE_ENTITY_VALUE;
450   }
451   return common(state, tok);
452 }
453 
454 static
entity3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)455 int entity3(PROLOG_STATE *state,
456 	    int tok,
457 	    const char *ptr,
458 	    const char *end,
459 	    const ENCODING *enc)
460 {
461   switch (tok) {
462   case XML_TOK_PROLOG_S:
463     return XML_ROLE_NONE;
464   case XML_TOK_LITERAL:
465     state->handler = entity4;
466     return XML_ROLE_ENTITY_PUBLIC_ID;
467   }
468   return common(state, tok);
469 }
470 
471 
472 static
entity4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)473 int entity4(PROLOG_STATE *state,
474 	    int tok,
475 	    const char *ptr,
476 	    const char *end,
477 	    const ENCODING *enc)
478 {
479   switch (tok) {
480   case XML_TOK_PROLOG_S:
481     return XML_ROLE_NONE;
482   case XML_TOK_LITERAL:
483     state->handler = entity5;
484     return XML_ROLE_ENTITY_SYSTEM_ID;
485   }
486   return common(state, tok);
487 }
488 
489 static
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)490 int entity5(PROLOG_STATE *state,
491 	    int tok,
492 	    const char *ptr,
493 	    const char *end,
494 	    const ENCODING *enc)
495 {
496   switch (tok) {
497   case XML_TOK_PROLOG_S:
498     return XML_ROLE_NONE;
499   case XML_TOK_DECL_CLOSE:
500     setTopLevel(state);
501     return XML_ROLE_ENTITY_COMPLETE;
502   case XML_TOK_NAME:
503     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
504       state->handler = entity6;
505       return XML_ROLE_NONE;
506     }
507     break;
508   }
509   return common(state, tok);
510 }
511 
512 static
entity6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)513 int entity6(PROLOG_STATE *state,
514 	    int tok,
515 	    const char *ptr,
516 	    const char *end,
517 	    const ENCODING *enc)
518 {
519   switch (tok) {
520   case XML_TOK_PROLOG_S:
521     return XML_ROLE_NONE;
522   case XML_TOK_NAME:
523     state->handler = declClose;
524     return XML_ROLE_ENTITY_NOTATION_NAME;
525   }
526   return common(state, tok);
527 }
528 
529 static
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)530 int entity7(PROLOG_STATE *state,
531 	    int tok,
532 	    const char *ptr,
533 	    const char *end,
534 	    const ENCODING *enc)
535 {
536   switch (tok) {
537   case XML_TOK_PROLOG_S:
538     return XML_ROLE_NONE;
539   case XML_TOK_NAME:
540     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
541       state->handler = entity9;
542       return XML_ROLE_NONE;
543     }
544     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
545       state->handler = entity8;
546       return XML_ROLE_NONE;
547     }
548     break;
549   case XML_TOK_LITERAL:
550     state->handler = declClose;
551     return XML_ROLE_ENTITY_VALUE;
552   }
553   return common(state, tok);
554 }
555 
556 static
entity8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)557 int entity8(PROLOG_STATE *state,
558 	    int tok,
559 	    const char *ptr,
560 	    const char *end,
561 	    const ENCODING *enc)
562 {
563   switch (tok) {
564   case XML_TOK_PROLOG_S:
565     return XML_ROLE_NONE;
566   case XML_TOK_LITERAL:
567     state->handler = entity9;
568     return XML_ROLE_ENTITY_PUBLIC_ID;
569   }
570   return common(state, tok);
571 }
572 
573 static
entity9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)574 int entity9(PROLOG_STATE *state,
575 	    int tok,
576 	    const char *ptr,
577 	    const char *end,
578 	    const ENCODING *enc)
579 {
580   switch (tok) {
581   case XML_TOK_PROLOG_S:
582     return XML_ROLE_NONE;
583   case XML_TOK_LITERAL:
584     state->handler = declClose;
585     return XML_ROLE_ENTITY_SYSTEM_ID;
586   }
587   return common(state, tok);
588 }
589 
590 static
notation0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)591 int notation0(PROLOG_STATE *state,
592 	      int tok,
593 	      const char *ptr,
594 	      const char *end,
595 	      const ENCODING *enc)
596 {
597   switch (tok) {
598   case XML_TOK_PROLOG_S:
599     return XML_ROLE_NONE;
600   case XML_TOK_NAME:
601     state->handler = notation1;
602     return XML_ROLE_NOTATION_NAME;
603   }
604   return common(state, tok);
605 }
606 
607 static
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)608 int notation1(PROLOG_STATE *state,
609 	      int tok,
610 	      const char *ptr,
611 	      const char *end,
612 	      const ENCODING *enc)
613 {
614   switch (tok) {
615   case XML_TOK_PROLOG_S:
616     return XML_ROLE_NONE;
617   case XML_TOK_NAME:
618     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
619       state->handler = notation3;
620       return XML_ROLE_NONE;
621     }
622     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
623       state->handler = notation2;
624       return XML_ROLE_NONE;
625     }
626     break;
627   }
628   return common(state, tok);
629 }
630 
631 static
notation2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)632 int notation2(PROLOG_STATE *state,
633 	      int tok,
634 	      const char *ptr,
635 	      const char *end,
636 	      const ENCODING *enc)
637 {
638   switch (tok) {
639   case XML_TOK_PROLOG_S:
640     return XML_ROLE_NONE;
641   case XML_TOK_LITERAL:
642     state->handler = notation4;
643     return XML_ROLE_NOTATION_PUBLIC_ID;
644   }
645   return common(state, tok);
646 }
647 
648 static
notation3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)649 int notation3(PROLOG_STATE *state,
650 	      int tok,
651 	      const char *ptr,
652 	      const char *end,
653 	      const ENCODING *enc)
654 {
655   switch (tok) {
656   case XML_TOK_PROLOG_S:
657     return XML_ROLE_NONE;
658   case XML_TOK_LITERAL:
659     state->handler = declClose;
660     return XML_ROLE_NOTATION_SYSTEM_ID;
661   }
662   return common(state, tok);
663 }
664 
665 static
notation4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)666 int notation4(PROLOG_STATE *state,
667 	      int tok,
668 	      const char *ptr,
669 	      const char *end,
670 	      const ENCODING *enc)
671 {
672   switch (tok) {
673   case XML_TOK_PROLOG_S:
674     return XML_ROLE_NONE;
675   case XML_TOK_LITERAL:
676     state->handler = declClose;
677     return XML_ROLE_NOTATION_SYSTEM_ID;
678   case XML_TOK_DECL_CLOSE:
679     setTopLevel(state);
680     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
681   }
682   return common(state, tok);
683 }
684 
685 static
attlist0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)686 int attlist0(PROLOG_STATE *state,
687 	     int tok,
688 	     const char *ptr,
689 	     const char *end,
690 	     const ENCODING *enc)
691 {
692   switch (tok) {
693   case XML_TOK_PROLOG_S:
694     return XML_ROLE_NONE;
695   case XML_TOK_NAME:
696   case XML_TOK_PREFIXED_NAME:
697     state->handler = attlist1;
698     return XML_ROLE_ATTLIST_ELEMENT_NAME;
699   }
700   return common(state, tok);
701 }
702 
703 static
attlist1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)704 int attlist1(PROLOG_STATE *state,
705 	     int tok,
706 	     const char *ptr,
707 	     const char *end,
708 	     const ENCODING *enc)
709 {
710   switch (tok) {
711   case XML_TOK_PROLOG_S:
712     return XML_ROLE_NONE;
713   case XML_TOK_DECL_CLOSE:
714     setTopLevel(state);
715     return XML_ROLE_NONE;
716   case XML_TOK_NAME:
717   case XML_TOK_PREFIXED_NAME:
718     state->handler = attlist2;
719     return XML_ROLE_ATTRIBUTE_NAME;
720   }
721   return common(state, tok);
722 }
723 
724 static
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)725 int attlist2(PROLOG_STATE *state,
726 	     int tok,
727 	     const char *ptr,
728 	     const char *end,
729 	     const ENCODING *enc)
730 {
731   switch (tok) {
732   case XML_TOK_PROLOG_S:
733     return XML_ROLE_NONE;
734   case XML_TOK_NAME:
735     {
736       static const char *types[] = {
737 	KW_CDATA,
738         KW_ID,
739         KW_IDREF,
740         KW_IDREFS,
741         KW_ENTITY,
742         KW_ENTITIES,
743         KW_NMTOKEN,
744         KW_NMTOKENS,
745       };
746       int i;
747       for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
748 	if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
749 	  state->handler = attlist8;
750 	  return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
751 	}
752     }
753     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
754       state->handler = attlist5;
755       return XML_ROLE_NONE;
756     }
757     break;
758   case XML_TOK_OPEN_PAREN:
759     state->handler = attlist3;
760     return XML_ROLE_NONE;
761   }
762   return common(state, tok);
763 }
764 
765 static
attlist3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)766 int attlist3(PROLOG_STATE *state,
767 	     int tok,
768 	     const char *ptr,
769 	     const char *end,
770 	     const ENCODING *enc)
771 {
772   switch (tok) {
773   case XML_TOK_PROLOG_S:
774     return XML_ROLE_NONE;
775   case XML_TOK_NMTOKEN:
776   case XML_TOK_NAME:
777   case XML_TOK_PREFIXED_NAME:
778     state->handler = attlist4;
779     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
780   }
781   return common(state, tok);
782 }
783 
784 static
attlist4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)785 int attlist4(PROLOG_STATE *state,
786 	     int tok,
787 	     const char *ptr,
788 	     const char *end,
789 	     const ENCODING *enc)
790 {
791   switch (tok) {
792   case XML_TOK_PROLOG_S:
793     return XML_ROLE_NONE;
794   case XML_TOK_CLOSE_PAREN:
795     state->handler = attlist8;
796     return XML_ROLE_NONE;
797   case XML_TOK_OR:
798     state->handler = attlist3;
799     return XML_ROLE_NONE;
800   }
801   return common(state, tok);
802 }
803 
804 static
attlist5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)805 int attlist5(PROLOG_STATE *state,
806 	     int tok,
807 	     const char *ptr,
808 	     const char *end,
809 	     const ENCODING *enc)
810 {
811   switch (tok) {
812   case XML_TOK_PROLOG_S:
813     return XML_ROLE_NONE;
814   case XML_TOK_OPEN_PAREN:
815     state->handler = attlist6;
816     return XML_ROLE_NONE;
817   }
818   return common(state, tok);
819 }
820 
821 
822 static
attlist6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)823 int attlist6(PROLOG_STATE *state,
824 	     int tok,
825 	     const char *ptr,
826 	     const char *end,
827 	     const ENCODING *enc)
828 {
829   switch (tok) {
830   case XML_TOK_PROLOG_S:
831     return XML_ROLE_NONE;
832   case XML_TOK_NAME:
833     state->handler = attlist7;
834     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
835   }
836   return common(state, tok);
837 }
838 
839 static
attlist7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)840 int attlist7(PROLOG_STATE *state,
841 	     int tok,
842 	     const char *ptr,
843 	     const char *end,
844 	     const ENCODING *enc)
845 {
846   switch (tok) {
847   case XML_TOK_PROLOG_S:
848     return XML_ROLE_NONE;
849   case XML_TOK_CLOSE_PAREN:
850     state->handler = attlist8;
851     return XML_ROLE_NONE;
852   case XML_TOK_OR:
853     state->handler = attlist6;
854     return XML_ROLE_NONE;
855   }
856   return common(state, tok);
857 }
858 
859 /* default value */
860 static
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)861 int attlist8(PROLOG_STATE *state,
862 	     int tok,
863 	     const char *ptr,
864 	     const char *end,
865 	     const ENCODING *enc)
866 {
867   switch (tok) {
868   case XML_TOK_PROLOG_S:
869     return XML_ROLE_NONE;
870   case XML_TOK_POUND_NAME:
871     if (XmlNameMatchesAscii(enc,
872 			    ptr + MIN_BYTES_PER_CHAR(enc),
873 			    end,
874 			    KW_IMPLIED)) {
875       state->handler = attlist1;
876       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
877     }
878     if (XmlNameMatchesAscii(enc,
879 			    ptr + MIN_BYTES_PER_CHAR(enc),
880 			    end,
881 			    KW_REQUIRED)) {
882       state->handler = attlist1;
883       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
884     }
885     if (XmlNameMatchesAscii(enc,
886 			    ptr + MIN_BYTES_PER_CHAR(enc),
887 			    end,
888 			    KW_FIXED)) {
889       state->handler = attlist9;
890       return XML_ROLE_NONE;
891     }
892     break;
893   case XML_TOK_LITERAL:
894     state->handler = attlist1;
895     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
896   }
897   return common(state, tok);
898 }
899 
900 static
attlist9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)901 int attlist9(PROLOG_STATE *state,
902 	     int tok,
903 	     const char *ptr,
904 	     const char *end,
905 	     const ENCODING *enc)
906 {
907   switch (tok) {
908   case XML_TOK_PROLOG_S:
909     return XML_ROLE_NONE;
910   case XML_TOK_LITERAL:
911     state->handler = attlist1;
912     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
913   }
914   return common(state, tok);
915 }
916 
917 static
element0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)918 int element0(PROLOG_STATE *state,
919 	     int tok,
920 	     const char *ptr,
921 	     const char *end,
922 	     const ENCODING *enc)
923 {
924   switch (tok) {
925   case XML_TOK_PROLOG_S:
926     return XML_ROLE_NONE;
927   case XML_TOK_NAME:
928   case XML_TOK_PREFIXED_NAME:
929     state->handler = element1;
930     return XML_ROLE_ELEMENT_NAME;
931   }
932   return common(state, tok);
933 }
934 
935 static
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)936 int element1(PROLOG_STATE *state,
937 	     int tok,
938 	     const char *ptr,
939 	     const char *end,
940 	     const ENCODING *enc)
941 {
942   switch (tok) {
943   case XML_TOK_PROLOG_S:
944     return XML_ROLE_NONE;
945   case XML_TOK_NAME:
946     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
947       state->handler = declClose;
948       return XML_ROLE_CONTENT_EMPTY;
949     }
950     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
951       state->handler = declClose;
952       return XML_ROLE_CONTENT_ANY;
953     }
954     break;
955   case XML_TOK_OPEN_PAREN:
956     state->handler = element2;
957     state->level = 1;
958     return XML_ROLE_GROUP_OPEN;
959   }
960   return common(state, tok);
961 }
962 
963 static
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)964 int element2(PROLOG_STATE *state,
965 	     int tok,
966 	     const char *ptr,
967 	     const char *end,
968 	     const ENCODING *enc)
969 {
970   switch (tok) {
971   case XML_TOK_PROLOG_S:
972     return XML_ROLE_NONE;
973   case XML_TOK_POUND_NAME:
974     if (XmlNameMatchesAscii(enc,
975 			    ptr + MIN_BYTES_PER_CHAR(enc),
976 			    end,
977 			    KW_PCDATA)) {
978       state->handler = element3;
979       return XML_ROLE_CONTENT_PCDATA;
980     }
981     break;
982   case XML_TOK_OPEN_PAREN:
983     state->level = 2;
984     state->handler = element6;
985     return XML_ROLE_GROUP_OPEN;
986   case XML_TOK_NAME:
987   case XML_TOK_PREFIXED_NAME:
988     state->handler = element7;
989     return XML_ROLE_CONTENT_ELEMENT;
990   case XML_TOK_NAME_QUESTION:
991     state->handler = element7;
992     return XML_ROLE_CONTENT_ELEMENT_OPT;
993   case XML_TOK_NAME_ASTERISK:
994     state->handler = element7;
995     return XML_ROLE_CONTENT_ELEMENT_REP;
996   case XML_TOK_NAME_PLUS:
997     state->handler = element7;
998     return XML_ROLE_CONTENT_ELEMENT_PLUS;
999   }
1000   return common(state, tok);
1001 }
1002 
1003 static
element3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1004 int element3(PROLOG_STATE *state,
1005 	     int tok,
1006 	     const char *ptr,
1007 	     const char *end,
1008 	     const ENCODING *enc)
1009 {
1010   switch (tok) {
1011   case XML_TOK_PROLOG_S:
1012     return XML_ROLE_NONE;
1013   case XML_TOK_CLOSE_PAREN:
1014     state->handler = declClose;
1015     return XML_ROLE_GROUP_CLOSE;
1016   case XML_TOK_CLOSE_PAREN_ASTERISK:
1017     state->handler = declClose;
1018     return XML_ROLE_GROUP_CLOSE_REP;
1019   case XML_TOK_OR:
1020     state->handler = element4;
1021     return XML_ROLE_NONE;
1022   }
1023   return common(state, tok);
1024 }
1025 
1026 static
element4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1027 int element4(PROLOG_STATE *state,
1028 	     int tok,
1029 	     const char *ptr,
1030 	     const char *end,
1031 	     const ENCODING *enc)
1032 {
1033   switch (tok) {
1034   case XML_TOK_PROLOG_S:
1035     return XML_ROLE_NONE;
1036   case XML_TOK_NAME:
1037   case XML_TOK_PREFIXED_NAME:
1038     state->handler = element5;
1039     return XML_ROLE_CONTENT_ELEMENT;
1040   }
1041   return common(state, tok);
1042 }
1043 
1044 static
element5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1045 int element5(PROLOG_STATE *state,
1046 	     int tok,
1047 	     const char *ptr,
1048 	     const char *end,
1049 	     const ENCODING *enc)
1050 {
1051   switch (tok) {
1052   case XML_TOK_PROLOG_S:
1053     return XML_ROLE_NONE;
1054   case XML_TOK_CLOSE_PAREN_ASTERISK:
1055     state->handler = declClose;
1056     return XML_ROLE_GROUP_CLOSE_REP;
1057   case XML_TOK_OR:
1058     state->handler = element4;
1059     return XML_ROLE_NONE;
1060   }
1061   return common(state, tok);
1062 }
1063 
1064 static
element6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1065 int element6(PROLOG_STATE *state,
1066 	     int tok,
1067 	     const char *ptr,
1068 	     const char *end,
1069 	     const ENCODING *enc)
1070 {
1071   switch (tok) {
1072   case XML_TOK_PROLOG_S:
1073     return XML_ROLE_NONE;
1074   case XML_TOK_OPEN_PAREN:
1075     state->level += 1;
1076     return XML_ROLE_GROUP_OPEN;
1077   case XML_TOK_NAME:
1078   case XML_TOK_PREFIXED_NAME:
1079     state->handler = element7;
1080     return XML_ROLE_CONTENT_ELEMENT;
1081   case XML_TOK_NAME_QUESTION:
1082     state->handler = element7;
1083     return XML_ROLE_CONTENT_ELEMENT_OPT;
1084   case XML_TOK_NAME_ASTERISK:
1085     state->handler = element7;
1086     return XML_ROLE_CONTENT_ELEMENT_REP;
1087   case XML_TOK_NAME_PLUS:
1088     state->handler = element7;
1089     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1090   }
1091   return common(state, tok);
1092 }
1093 
1094 static
element7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1095 int element7(PROLOG_STATE *state,
1096 	     int tok,
1097 	     const char *ptr,
1098 	     const char *end,
1099 	     const ENCODING *enc)
1100 {
1101   switch (tok) {
1102   case XML_TOK_PROLOG_S:
1103     return XML_ROLE_NONE;
1104   case XML_TOK_CLOSE_PAREN:
1105     state->level -= 1;
1106     if (state->level == 0)
1107       state->handler = declClose;
1108     return XML_ROLE_GROUP_CLOSE;
1109   case XML_TOK_CLOSE_PAREN_ASTERISK:
1110     state->level -= 1;
1111     if (state->level == 0)
1112       state->handler = declClose;
1113     return XML_ROLE_GROUP_CLOSE_REP;
1114   case XML_TOK_CLOSE_PAREN_QUESTION:
1115     state->level -= 1;
1116     if (state->level == 0)
1117       state->handler = declClose;
1118     return XML_ROLE_GROUP_CLOSE_OPT;
1119   case XML_TOK_CLOSE_PAREN_PLUS:
1120     state->level -= 1;
1121     if (state->level == 0)
1122       state->handler = declClose;
1123     return XML_ROLE_GROUP_CLOSE_PLUS;
1124   case XML_TOK_COMMA:
1125     state->handler = element6;
1126     return XML_ROLE_GROUP_SEQUENCE;
1127   case XML_TOK_OR:
1128     state->handler = element6;
1129     return XML_ROLE_GROUP_CHOICE;
1130   }
1131   return common(state, tok);
1132 }
1133 
1134 #ifdef XML_DTD
1135 
1136 static
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1137 int condSect0(PROLOG_STATE *state,
1138 	      int tok,
1139 	      const char *ptr,
1140 	      const char *end,
1141 	      const ENCODING *enc)
1142 {
1143   switch (tok) {
1144   case XML_TOK_PROLOG_S:
1145     return XML_ROLE_NONE;
1146   case XML_TOK_NAME:
1147     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1148       state->handler = condSect1;
1149       return XML_ROLE_NONE;
1150     }
1151     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1152       state->handler = condSect2;
1153       return XML_ROLE_NONE;
1154     }
1155     break;
1156   }
1157   return common(state, tok);
1158 }
1159 
1160 static
condSect1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1161 int condSect1(PROLOG_STATE *state,
1162 	      int tok,
1163 	      const char *ptr,
1164 	      const char *end,
1165 	      const ENCODING *enc)
1166 {
1167   switch (tok) {
1168   case XML_TOK_PROLOG_S:
1169     return XML_ROLE_NONE;
1170   case XML_TOK_OPEN_BRACKET:
1171     state->handler = externalSubset1;
1172     state->includeLevel += 1;
1173     return XML_ROLE_NONE;
1174   }
1175   return common(state, tok);
1176 }
1177 
1178 static
condSect2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1179 int condSect2(PROLOG_STATE *state,
1180 	      int tok,
1181 	      const char *ptr,
1182 	      const char *end,
1183 	      const ENCODING *enc)
1184 {
1185   switch (tok) {
1186   case XML_TOK_PROLOG_S:
1187     return XML_ROLE_NONE;
1188   case XML_TOK_OPEN_BRACKET:
1189     state->handler = externalSubset1;
1190     return XML_ROLE_IGNORE_SECT;
1191   }
1192   return common(state, tok);
1193 }
1194 
1195 #endif /* XML_DTD */
1196 
1197 static
declClose(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1198 int declClose(PROLOG_STATE *state,
1199 	      int tok,
1200 	      const char *ptr,
1201 	      const char *end,
1202 	      const ENCODING *enc)
1203 {
1204   switch (tok) {
1205   case XML_TOK_PROLOG_S:
1206     return XML_ROLE_NONE;
1207   case XML_TOK_DECL_CLOSE:
1208     setTopLevel(state);
1209     return XML_ROLE_NONE;
1210   }
1211   return common(state, tok);
1212 }
1213 
1214 #if 0
1215 
1216 static
1217 int ignore(PROLOG_STATE *state,
1218 	   int tok,
1219 	   const char *ptr,
1220 	   const char *end,
1221 	   const ENCODING *enc)
1222 {
1223   switch (tok) {
1224   case XML_TOK_DECL_CLOSE:
1225     state->handler = internalSubset;
1226     return 0;
1227   default:
1228     return XML_ROLE_NONE;
1229   }
1230   return common(state, tok);
1231 }
1232 #endif
1233 
1234 static
error(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1235 int error(PROLOG_STATE *state,
1236 	  int tok,
1237 	  const char *ptr,
1238 	  const char *end,
1239 	  const ENCODING *enc)
1240 {
1241   return XML_ROLE_NONE;
1242 }
1243 
1244 static
common(PROLOG_STATE * state,int tok)1245 int common(PROLOG_STATE *state, int tok)
1246 {
1247 #ifdef XML_DTD
1248   if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1249     return XML_ROLE_INNER_PARAM_ENTITY_REF;
1250 #endif
1251   state->handler = error;
1252   return XML_ROLE_ERROR;
1253 }
1254 
XmlPrologStateInit(PROLOG_STATE * state)1255 void XmlPrologStateInit(PROLOG_STATE *state)
1256 {
1257   state->handler = prolog0;
1258 #ifdef XML_DTD
1259   state->documentEntity = 1;
1260   state->includeLevel = 0;
1261 #endif /* XML_DTD */
1262 }
1263 
1264 #ifdef XML_DTD
1265 
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1266 void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1267 {
1268   state->handler = externalSubset0;
1269   state->documentEntity = 0;
1270   state->includeLevel = 0;
1271 }
1272 
1273 #endif /* XML_DTD */
1274