1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #include <stddef.h>
34 
35 #ifdef _WIN32
36 #include "winconfig.h"
37 #else
38 #ifdef HAVE_EXPAT_CONFIG_H
39 #include <expat_config.h>
40 #endif
41 #endif /* ndef _WIN32 */
42 
43 #include "expat_external.h"
44 #include "internal.h"
45 #include "xmlrole.h"
46 #include "ascii.h"
47 
48 /* Doesn't check:
49 
50  that ,| are not mixed in a model group
51  content of literals
52 
53 */
54 
55 static const char KW_ANY[] = {
56     ASCII_A, ASCII_N, ASCII_Y, '\0' };
57 static const char KW_ATTLIST[] = {
58     ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
59 static const char KW_CDATA[] = {
60     ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
61 static const char KW_DOCTYPE[] = {
62     ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
63 static const char KW_ELEMENT[] = {
64     ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
65 static const char KW_EMPTY[] = {
66     ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
67 static const char KW_ENTITIES[] = {
68     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
69     '\0' };
70 static const char KW_ENTITY[] = {
71     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
72 static const char KW_FIXED[] = {
73     ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
74 static const char KW_ID[] = {
75     ASCII_I, ASCII_D, '\0' };
76 static const char KW_IDREF[] = {
77     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
78 static const char KW_IDREFS[] = {
79     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
80 #ifdef XML_DTD
81 static const char KW_IGNORE[] = {
82     ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
83 #endif
84 static const char KW_IMPLIED[] = {
85     ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
86 #ifdef XML_DTD
87 static const char KW_INCLUDE[] = {
88     ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
89 #endif
90 static const char KW_NDATA[] = {
91     ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
92 static const char KW_NMTOKEN[] = {
93     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
94 static const char KW_NMTOKENS[] = {
95     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
96     '\0' };
97 static const char KW_NOTATION[] =
98     { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
99       '\0' };
100 static const char KW_PCDATA[] = {
101     ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
102 static const char KW_PUBLIC[] = {
103     ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
104 static const char KW_REQUIRED[] = {
105     ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
106     '\0' };
107 static const char KW_SYSTEM[] = {
108     ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
109 
110 #ifndef MIN_BYTES_PER_CHAR
111 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
112 #endif
113 
114 #ifdef XML_DTD
115 #define setTopLevel(state) \
116   ((state)->handler = ((state)->documentEntity \
117                        ? internalSubset \
118                        : externalSubset1))
119 #else /* not XML_DTD */
120 #define setTopLevel(state) ((state)->handler = internalSubset)
121 #endif /* not XML_DTD */
122 
123 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
124                                    int tok,
125                                    const char *ptr,
126                                    const char *end,
127                                    const ENCODING *enc);
128 
129 static PROLOG_HANDLER
130   prolog0, prolog1, prolog2,
131   doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
132   internalSubset,
133   entity0, entity1, entity2, entity3, entity4, entity5, entity6,
134   entity7, entity8, entity9, entity10,
135   notation0, notation1, notation2, notation3, notation4,
136   attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
137   attlist7, attlist8, attlist9,
138   element0, element1, element2, element3, element4, element5, element6,
139   element7,
140 #ifdef XML_DTD
141   externalSubset0, externalSubset1,
142   condSect0, condSect1, condSect2,
143 #endif /* XML_DTD */
144   declClose,
145   error;
146 
147 static int FASTCALL common(PROLOG_STATE *state, int tok);
148 
149 static int PTRCALL
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)150 prolog0(PROLOG_STATE *state,
151         int tok,
152         const char *ptr,
153         const char *end,
154         const ENCODING *enc)
155 {
156   switch (tok) {
157   case XML_TOK_PROLOG_S:
158     state->handler = prolog1;
159     return XML_ROLE_NONE;
160   case XML_TOK_XML_DECL:
161     state->handler = prolog1;
162     return XML_ROLE_XML_DECL;
163   case XML_TOK_PI:
164     state->handler = prolog1;
165     return XML_ROLE_PI;
166   case XML_TOK_COMMENT:
167     state->handler = prolog1;
168     return XML_ROLE_COMMENT;
169   case XML_TOK_BOM:
170     return XML_ROLE_NONE;
171   case XML_TOK_DECL_OPEN:
172     if (!XmlNameMatchesAscii(enc,
173                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
174                              end,
175                              KW_DOCTYPE))
176       break;
177     state->handler = doctype0;
178     return XML_ROLE_DOCTYPE_NONE;
179   case XML_TOK_INSTANCE_START:
180     state->handler = error;
181     return XML_ROLE_INSTANCE_START;
182   }
183   return common(state, tok);
184 }
185 
186 static int PTRCALL
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)187 prolog1(PROLOG_STATE *state,
188         int tok,
189         const char *ptr,
190         const char *end,
191         const ENCODING *enc)
192 {
193   switch (tok) {
194   case XML_TOK_PROLOG_S:
195     return XML_ROLE_NONE;
196   case XML_TOK_PI:
197     return XML_ROLE_PI;
198   case XML_TOK_COMMENT:
199     return XML_ROLE_COMMENT;
200   case XML_TOK_BOM:
201     /* This case can never arise.  To reach this role function, the
202      * parse must have passed through prolog0 and therefore have had
203      * some form of input, even if only a space.  At that point, a
204      * byte order mark is no longer a valid character (though
205      * technically it should be interpreted as a non-breaking space),
206      * so will be rejected by the tokenizing stages.
207      */
208     return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
209   case XML_TOK_DECL_OPEN:
210     if (!XmlNameMatchesAscii(enc,
211                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
212                              end,
213                              KW_DOCTYPE))
214       break;
215     state->handler = doctype0;
216     return XML_ROLE_DOCTYPE_NONE;
217   case XML_TOK_INSTANCE_START:
218     state->handler = error;
219     return XML_ROLE_INSTANCE_START;
220   }
221   return common(state, tok);
222 }
223 
224 static int PTRCALL
prolog2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))225 prolog2(PROLOG_STATE *state,
226         int tok,
227         const char *UNUSED_P(ptr),
228         const char *UNUSED_P(end),
229         const ENCODING *UNUSED_P(enc))
230 {
231   switch (tok) {
232   case XML_TOK_PROLOG_S:
233     return XML_ROLE_NONE;
234   case XML_TOK_PI:
235     return XML_ROLE_PI;
236   case XML_TOK_COMMENT:
237     return XML_ROLE_COMMENT;
238   case XML_TOK_INSTANCE_START:
239     state->handler = error;
240     return XML_ROLE_INSTANCE_START;
241   }
242   return common(state, tok);
243 }
244 
245 static int PTRCALL
doctype0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))246 doctype0(PROLOG_STATE *state,
247          int tok,
248          const char *UNUSED_P(ptr),
249          const char *UNUSED_P(end),
250          const ENCODING *UNUSED_P(enc))
251 {
252   switch (tok) {
253   case XML_TOK_PROLOG_S:
254     return XML_ROLE_DOCTYPE_NONE;
255   case XML_TOK_NAME:
256   case XML_TOK_PREFIXED_NAME:
257     state->handler = doctype1;
258     return XML_ROLE_DOCTYPE_NAME;
259   }
260   return common(state, tok);
261 }
262 
263 static int PTRCALL
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)264 doctype1(PROLOG_STATE *state,
265          int tok,
266          const char *ptr,
267          const char *end,
268          const ENCODING *enc)
269 {
270   switch (tok) {
271   case XML_TOK_PROLOG_S:
272     return XML_ROLE_DOCTYPE_NONE;
273   case XML_TOK_OPEN_BRACKET:
274     state->handler = internalSubset;
275     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
276   case XML_TOK_DECL_CLOSE:
277     state->handler = prolog2;
278     return XML_ROLE_DOCTYPE_CLOSE;
279   case XML_TOK_NAME:
280     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
281       state->handler = doctype3;
282       return XML_ROLE_DOCTYPE_NONE;
283     }
284     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
285       state->handler = doctype2;
286       return XML_ROLE_DOCTYPE_NONE;
287     }
288     break;
289   }
290   return common(state, tok);
291 }
292 
293 static int PTRCALL
doctype2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))294 doctype2(PROLOG_STATE *state,
295          int tok,
296          const char *UNUSED_P(ptr),
297          const char *UNUSED_P(end),
298          const ENCODING *UNUSED_P(enc))
299 {
300   switch (tok) {
301   case XML_TOK_PROLOG_S:
302     return XML_ROLE_DOCTYPE_NONE;
303   case XML_TOK_LITERAL:
304     state->handler = doctype3;
305     return XML_ROLE_DOCTYPE_PUBLIC_ID;
306   }
307   return common(state, tok);
308 }
309 
310 static int PTRCALL
doctype3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))311 doctype3(PROLOG_STATE *state,
312          int tok,
313          const char *UNUSED_P(ptr),
314          const char *UNUSED_P(end),
315          const ENCODING *UNUSED_P(enc))
316 {
317   switch (tok) {
318   case XML_TOK_PROLOG_S:
319     return XML_ROLE_DOCTYPE_NONE;
320   case XML_TOK_LITERAL:
321     state->handler = doctype4;
322     return XML_ROLE_DOCTYPE_SYSTEM_ID;
323   }
324   return common(state, tok);
325 }
326 
327 static int PTRCALL
doctype4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))328 doctype4(PROLOG_STATE *state,
329          int tok,
330          const char *UNUSED_P(ptr),
331          const char *UNUSED_P(end),
332          const ENCODING *UNUSED_P(enc))
333 {
334   switch (tok) {
335   case XML_TOK_PROLOG_S:
336     return XML_ROLE_DOCTYPE_NONE;
337   case XML_TOK_OPEN_BRACKET:
338     state->handler = internalSubset;
339     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
340   case XML_TOK_DECL_CLOSE:
341     state->handler = prolog2;
342     return XML_ROLE_DOCTYPE_CLOSE;
343   }
344   return common(state, tok);
345 }
346 
347 static int PTRCALL
doctype5(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))348 doctype5(PROLOG_STATE *state,
349          int tok,
350          const char *UNUSED_P(ptr),
351          const char *UNUSED_P(end),
352          const ENCODING *UNUSED_P(enc))
353 {
354   switch (tok) {
355   case XML_TOK_PROLOG_S:
356     return XML_ROLE_DOCTYPE_NONE;
357   case XML_TOK_DECL_CLOSE:
358     state->handler = prolog2;
359     return XML_ROLE_DOCTYPE_CLOSE;
360   }
361   return common(state, tok);
362 }
363 
364 static int PTRCALL
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)365 internalSubset(PROLOG_STATE *state,
366                int tok,
367                const char *ptr,
368                const char *end,
369                const ENCODING *enc)
370 {
371   switch (tok) {
372   case XML_TOK_PROLOG_S:
373     return XML_ROLE_NONE;
374   case XML_TOK_DECL_OPEN:
375     if (XmlNameMatchesAscii(enc,
376                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
377                             end,
378                             KW_ENTITY)) {
379       state->handler = entity0;
380       return XML_ROLE_ENTITY_NONE;
381     }
382     if (XmlNameMatchesAscii(enc,
383                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
384                             end,
385                             KW_ATTLIST)) {
386       state->handler = attlist0;
387       return XML_ROLE_ATTLIST_NONE;
388     }
389     if (XmlNameMatchesAscii(enc,
390                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
391                             end,
392                             KW_ELEMENT)) {
393       state->handler = element0;
394       return XML_ROLE_ELEMENT_NONE;
395     }
396     if (XmlNameMatchesAscii(enc,
397                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
398                             end,
399                             KW_NOTATION)) {
400       state->handler = notation0;
401       return XML_ROLE_NOTATION_NONE;
402     }
403     break;
404   case XML_TOK_PI:
405     return XML_ROLE_PI;
406   case XML_TOK_COMMENT:
407     return XML_ROLE_COMMENT;
408   case XML_TOK_PARAM_ENTITY_REF:
409     return XML_ROLE_PARAM_ENTITY_REF;
410   case XML_TOK_CLOSE_BRACKET:
411     state->handler = doctype5;
412     return XML_ROLE_DOCTYPE_NONE;
413   case XML_TOK_NONE:
414     return XML_ROLE_NONE;
415   }
416   return common(state, tok);
417 }
418 
419 #ifdef XML_DTD
420 
421 static int PTRCALL
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)422 externalSubset0(PROLOG_STATE *state,
423                 int tok,
424                 const char *ptr,
425                 const char *end,
426                 const ENCODING *enc)
427 {
428   state->handler = externalSubset1;
429   if (tok == XML_TOK_XML_DECL)
430     return XML_ROLE_TEXT_DECL;
431   return externalSubset1(state, tok, ptr, end, enc);
432 }
433 
434 static int PTRCALL
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)435 externalSubset1(PROLOG_STATE *state,
436                 int tok,
437                 const char *ptr,
438                 const char *end,
439                 const ENCODING *enc)
440 {
441   switch (tok) {
442   case XML_TOK_COND_SECT_OPEN:
443     state->handler = condSect0;
444     return XML_ROLE_NONE;
445   case XML_TOK_COND_SECT_CLOSE:
446     if (state->includeLevel == 0)
447       break;
448     state->includeLevel -= 1;
449     return XML_ROLE_NONE;
450   case XML_TOK_PROLOG_S:
451     return XML_ROLE_NONE;
452   case XML_TOK_CLOSE_BRACKET:
453     break;
454   case XML_TOK_NONE:
455     if (state->includeLevel)
456       break;
457     return XML_ROLE_NONE;
458   default:
459     return internalSubset(state, tok, ptr, end, enc);
460   }
461   return common(state, tok);
462 }
463 
464 #endif /* XML_DTD */
465 
466 static int PTRCALL
entity0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))467 entity0(PROLOG_STATE *state,
468         int tok,
469         const char *UNUSED_P(ptr),
470         const char *UNUSED_P(end),
471         const ENCODING *UNUSED_P(enc))
472 {
473   switch (tok) {
474   case XML_TOK_PROLOG_S:
475     return XML_ROLE_ENTITY_NONE;
476   case XML_TOK_PERCENT:
477     state->handler = entity1;
478     return XML_ROLE_ENTITY_NONE;
479   case XML_TOK_NAME:
480     state->handler = entity2;
481     return XML_ROLE_GENERAL_ENTITY_NAME;
482   }
483   return common(state, tok);
484 }
485 
486 static int PTRCALL
entity1(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))487 entity1(PROLOG_STATE *state,
488         int tok,
489         const char *UNUSED_P(ptr),
490         const char *UNUSED_P(end),
491         const ENCODING *UNUSED_P(enc))
492 {
493   switch (tok) {
494   case XML_TOK_PROLOG_S:
495     return XML_ROLE_ENTITY_NONE;
496   case XML_TOK_NAME:
497     state->handler = entity7;
498     return XML_ROLE_PARAM_ENTITY_NAME;
499   }
500   return common(state, tok);
501 }
502 
503 static int PTRCALL
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)504 entity2(PROLOG_STATE *state,
505         int tok,
506         const char *ptr,
507         const char *end,
508         const ENCODING *enc)
509 {
510   switch (tok) {
511   case XML_TOK_PROLOG_S:
512     return XML_ROLE_ENTITY_NONE;
513   case XML_TOK_NAME:
514     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
515       state->handler = entity4;
516       return XML_ROLE_ENTITY_NONE;
517     }
518     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
519       state->handler = entity3;
520       return XML_ROLE_ENTITY_NONE;
521     }
522     break;
523   case XML_TOK_LITERAL:
524     state->handler = declClose;
525     state->role_none = XML_ROLE_ENTITY_NONE;
526     return XML_ROLE_ENTITY_VALUE;
527   }
528   return common(state, tok);
529 }
530 
531 static int PTRCALL
entity3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))532 entity3(PROLOG_STATE *state,
533         int tok,
534         const char *UNUSED_P(ptr),
535         const char *UNUSED_P(end),
536         const ENCODING *UNUSED_P(enc))
537 {
538   switch (tok) {
539   case XML_TOK_PROLOG_S:
540     return XML_ROLE_ENTITY_NONE;
541   case XML_TOK_LITERAL:
542     state->handler = entity4;
543     return XML_ROLE_ENTITY_PUBLIC_ID;
544   }
545   return common(state, tok);
546 }
547 
548 static int PTRCALL
entity4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))549 entity4(PROLOG_STATE *state,
550         int tok,
551         const char *UNUSED_P(ptr),
552         const char *UNUSED_P(end),
553         const ENCODING *UNUSED_P(enc))
554 {
555   switch (tok) {
556   case XML_TOK_PROLOG_S:
557     return XML_ROLE_ENTITY_NONE;
558   case XML_TOK_LITERAL:
559     state->handler = entity5;
560     return XML_ROLE_ENTITY_SYSTEM_ID;
561   }
562   return common(state, tok);
563 }
564 
565 static int PTRCALL
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)566 entity5(PROLOG_STATE *state,
567         int tok,
568         const char *ptr,
569         const char *end,
570         const ENCODING *enc)
571 {
572   switch (tok) {
573   case XML_TOK_PROLOG_S:
574     return XML_ROLE_ENTITY_NONE;
575   case XML_TOK_DECL_CLOSE:
576     setTopLevel(state);
577     return XML_ROLE_ENTITY_COMPLETE;
578   case XML_TOK_NAME:
579     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
580       state->handler = entity6;
581       return XML_ROLE_ENTITY_NONE;
582     }
583     break;
584   }
585   return common(state, tok);
586 }
587 
588 static int PTRCALL
entity6(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))589 entity6(PROLOG_STATE *state,
590         int tok,
591         const char *UNUSED_P(ptr),
592         const char *UNUSED_P(end),
593         const ENCODING *UNUSED_P(enc))
594 {
595   switch (tok) {
596   case XML_TOK_PROLOG_S:
597     return XML_ROLE_ENTITY_NONE;
598   case XML_TOK_NAME:
599     state->handler = declClose;
600     state->role_none = XML_ROLE_ENTITY_NONE;
601     return XML_ROLE_ENTITY_NOTATION_NAME;
602   }
603   return common(state, tok);
604 }
605 
606 static int PTRCALL
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)607 entity7(PROLOG_STATE *state,
608         int tok,
609         const char *ptr,
610         const char *end,
611         const ENCODING *enc)
612 {
613   switch (tok) {
614   case XML_TOK_PROLOG_S:
615     return XML_ROLE_ENTITY_NONE;
616   case XML_TOK_NAME:
617     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
618       state->handler = entity9;
619       return XML_ROLE_ENTITY_NONE;
620     }
621     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
622       state->handler = entity8;
623       return XML_ROLE_ENTITY_NONE;
624     }
625     break;
626   case XML_TOK_LITERAL:
627     state->handler = declClose;
628     state->role_none = XML_ROLE_ENTITY_NONE;
629     return XML_ROLE_ENTITY_VALUE;
630   }
631   return common(state, tok);
632 }
633 
634 static int PTRCALL
entity8(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))635 entity8(PROLOG_STATE *state,
636         int tok,
637         const char *UNUSED_P(ptr),
638         const char *UNUSED_P(end),
639         const ENCODING *UNUSED_P(enc))
640 {
641   switch (tok) {
642   case XML_TOK_PROLOG_S:
643     return XML_ROLE_ENTITY_NONE;
644   case XML_TOK_LITERAL:
645     state->handler = entity9;
646     return XML_ROLE_ENTITY_PUBLIC_ID;
647   }
648   return common(state, tok);
649 }
650 
651 static int PTRCALL
entity9(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))652 entity9(PROLOG_STATE *state,
653         int tok,
654         const char *UNUSED_P(ptr),
655         const char *UNUSED_P(end),
656         const ENCODING *UNUSED_P(enc))
657 {
658   switch (tok) {
659   case XML_TOK_PROLOG_S:
660     return XML_ROLE_ENTITY_NONE;
661   case XML_TOK_LITERAL:
662     state->handler = entity10;
663     return XML_ROLE_ENTITY_SYSTEM_ID;
664   }
665   return common(state, tok);
666 }
667 
668 static int PTRCALL
entity10(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))669 entity10(PROLOG_STATE *state,
670          int tok,
671          const char *UNUSED_P(ptr),
672          const char *UNUSED_P(end),
673          const ENCODING *UNUSED_P(enc))
674 {
675   switch (tok) {
676   case XML_TOK_PROLOG_S:
677     return XML_ROLE_ENTITY_NONE;
678   case XML_TOK_DECL_CLOSE:
679     setTopLevel(state);
680     return XML_ROLE_ENTITY_COMPLETE;
681   }
682   return common(state, tok);
683 }
684 
685 static int PTRCALL
notation0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))686 notation0(PROLOG_STATE *state,
687           int tok,
688           const char *UNUSED_P(ptr),
689           const char *UNUSED_P(end),
690           const ENCODING *UNUSED_P(enc))
691 {
692   switch (tok) {
693   case XML_TOK_PROLOG_S:
694     return XML_ROLE_NOTATION_NONE;
695   case XML_TOK_NAME:
696     state->handler = notation1;
697     return XML_ROLE_NOTATION_NAME;
698   }
699   return common(state, tok);
700 }
701 
702 static int PTRCALL
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)703 notation1(PROLOG_STATE *state,
704           int tok,
705           const char *ptr,
706           const char *end,
707           const ENCODING *enc)
708 {
709   switch (tok) {
710   case XML_TOK_PROLOG_S:
711     return XML_ROLE_NOTATION_NONE;
712   case XML_TOK_NAME:
713     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
714       state->handler = notation3;
715       return XML_ROLE_NOTATION_NONE;
716     }
717     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
718       state->handler = notation2;
719       return XML_ROLE_NOTATION_NONE;
720     }
721     break;
722   }
723   return common(state, tok);
724 }
725 
726 static int PTRCALL
notation2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))727 notation2(PROLOG_STATE *state,
728           int tok,
729           const char *UNUSED_P(ptr),
730           const char *UNUSED_P(end),
731           const ENCODING *UNUSED_P(enc))
732 {
733   switch (tok) {
734   case XML_TOK_PROLOG_S:
735     return XML_ROLE_NOTATION_NONE;
736   case XML_TOK_LITERAL:
737     state->handler = notation4;
738     return XML_ROLE_NOTATION_PUBLIC_ID;
739   }
740   return common(state, tok);
741 }
742 
743 static int PTRCALL
notation3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))744 notation3(PROLOG_STATE *state,
745           int tok,
746           const char *UNUSED_P(ptr),
747           const char *UNUSED_P(end),
748           const ENCODING *UNUSED_P(enc))
749 {
750   switch (tok) {
751   case XML_TOK_PROLOG_S:
752     return XML_ROLE_NOTATION_NONE;
753   case XML_TOK_LITERAL:
754     state->handler = declClose;
755     state->role_none = XML_ROLE_NOTATION_NONE;
756     return XML_ROLE_NOTATION_SYSTEM_ID;
757   }
758   return common(state, tok);
759 }
760 
761 static int PTRCALL
notation4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))762 notation4(PROLOG_STATE *state,
763           int tok,
764           const char *UNUSED_P(ptr),
765           const char *UNUSED_P(end),
766           const ENCODING *UNUSED_P(enc))
767 {
768   switch (tok) {
769   case XML_TOK_PROLOG_S:
770     return XML_ROLE_NOTATION_NONE;
771   case XML_TOK_LITERAL:
772     state->handler = declClose;
773     state->role_none = XML_ROLE_NOTATION_NONE;
774     return XML_ROLE_NOTATION_SYSTEM_ID;
775   case XML_TOK_DECL_CLOSE:
776     setTopLevel(state);
777     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
778   }
779   return common(state, tok);
780 }
781 
782 static int PTRCALL
attlist0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))783 attlist0(PROLOG_STATE *state,
784          int tok,
785          const char *UNUSED_P(ptr),
786          const char *UNUSED_P(end),
787          const ENCODING *UNUSED_P(enc))
788 {
789   switch (tok) {
790   case XML_TOK_PROLOG_S:
791     return XML_ROLE_ATTLIST_NONE;
792   case XML_TOK_NAME:
793   case XML_TOK_PREFIXED_NAME:
794     state->handler = attlist1;
795     return XML_ROLE_ATTLIST_ELEMENT_NAME;
796   }
797   return common(state, tok);
798 }
799 
800 static int PTRCALL
attlist1(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))801 attlist1(PROLOG_STATE *state,
802          int tok,
803          const char *UNUSED_P(ptr),
804          const char *UNUSED_P(end),
805          const ENCODING *UNUSED_P(enc))
806 {
807   switch (tok) {
808   case XML_TOK_PROLOG_S:
809     return XML_ROLE_ATTLIST_NONE;
810   case XML_TOK_DECL_CLOSE:
811     setTopLevel(state);
812     return XML_ROLE_ATTLIST_NONE;
813   case XML_TOK_NAME:
814   case XML_TOK_PREFIXED_NAME:
815     state->handler = attlist2;
816     return XML_ROLE_ATTRIBUTE_NAME;
817   }
818   return common(state, tok);
819 }
820 
821 static int PTRCALL
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)822 attlist2(PROLOG_STATE *state,
823          int tok,
824          const char *ptr,
825          const char *end,
826          const ENCODING *enc)
827 {
828   switch (tok) {
829   case XML_TOK_PROLOG_S:
830     return XML_ROLE_ATTLIST_NONE;
831   case XML_TOK_NAME:
832     {
833       static const char * const types[] = {
834         KW_CDATA,
835         KW_ID,
836         KW_IDREF,
837         KW_IDREFS,
838         KW_ENTITY,
839         KW_ENTITIES,
840         KW_NMTOKEN,
841         KW_NMTOKENS,
842       };
843       int i;
844       for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
845         if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
846           state->handler = attlist8;
847           return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
848         }
849     }
850     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
851       state->handler = attlist5;
852       return XML_ROLE_ATTLIST_NONE;
853     }
854     break;
855   case XML_TOK_OPEN_PAREN:
856     state->handler = attlist3;
857     return XML_ROLE_ATTLIST_NONE;
858   }
859   return common(state, tok);
860 }
861 
862 static int PTRCALL
attlist3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))863 attlist3(PROLOG_STATE *state,
864          int tok,
865          const char *UNUSED_P(ptr),
866          const char *UNUSED_P(end),
867          const ENCODING *UNUSED_P(enc))
868 {
869   switch (tok) {
870   case XML_TOK_PROLOG_S:
871     return XML_ROLE_ATTLIST_NONE;
872   case XML_TOK_NMTOKEN:
873   case XML_TOK_NAME:
874   case XML_TOK_PREFIXED_NAME:
875     state->handler = attlist4;
876     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
877   }
878   return common(state, tok);
879 }
880 
881 static int PTRCALL
attlist4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))882 attlist4(PROLOG_STATE *state,
883          int tok,
884          const char *UNUSED_P(ptr),
885          const char *UNUSED_P(end),
886          const ENCODING *UNUSED_P(enc))
887 {
888   switch (tok) {
889   case XML_TOK_PROLOG_S:
890     return XML_ROLE_ATTLIST_NONE;
891   case XML_TOK_CLOSE_PAREN:
892     state->handler = attlist8;
893     return XML_ROLE_ATTLIST_NONE;
894   case XML_TOK_OR:
895     state->handler = attlist3;
896     return XML_ROLE_ATTLIST_NONE;
897   }
898   return common(state, tok);
899 }
900 
901 static int PTRCALL
attlist5(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))902 attlist5(PROLOG_STATE *state,
903          int tok,
904          const char *UNUSED_P(ptr),
905          const char *UNUSED_P(end),
906          const ENCODING *UNUSED_P(enc))
907 {
908   switch (tok) {
909   case XML_TOK_PROLOG_S:
910     return XML_ROLE_ATTLIST_NONE;
911   case XML_TOK_OPEN_PAREN:
912     state->handler = attlist6;
913     return XML_ROLE_ATTLIST_NONE;
914   }
915   return common(state, tok);
916 }
917 
918 static int PTRCALL
attlist6(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))919 attlist6(PROLOG_STATE *state,
920          int tok,
921          const char *UNUSED_P(ptr),
922          const char *UNUSED_P(end),
923          const ENCODING *UNUSED_P(enc))
924 {
925   switch (tok) {
926   case XML_TOK_PROLOG_S:
927     return XML_ROLE_ATTLIST_NONE;
928   case XML_TOK_NAME:
929     state->handler = attlist7;
930     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
931   }
932   return common(state, tok);
933 }
934 
935 static int PTRCALL
attlist7(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))936 attlist7(PROLOG_STATE *state,
937          int tok,
938          const char *UNUSED_P(ptr),
939          const char *UNUSED_P(end),
940          const ENCODING *UNUSED_P(enc))
941 {
942   switch (tok) {
943   case XML_TOK_PROLOG_S:
944     return XML_ROLE_ATTLIST_NONE;
945   case XML_TOK_CLOSE_PAREN:
946     state->handler = attlist8;
947     return XML_ROLE_ATTLIST_NONE;
948   case XML_TOK_OR:
949     state->handler = attlist6;
950     return XML_ROLE_ATTLIST_NONE;
951   }
952   return common(state, tok);
953 }
954 
955 /* default value */
956 static int PTRCALL
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)957 attlist8(PROLOG_STATE *state,
958          int tok,
959          const char *ptr,
960          const char *end,
961          const ENCODING *enc)
962 {
963   switch (tok) {
964   case XML_TOK_PROLOG_S:
965     return XML_ROLE_ATTLIST_NONE;
966   case XML_TOK_POUND_NAME:
967     if (XmlNameMatchesAscii(enc,
968                             ptr + MIN_BYTES_PER_CHAR(enc),
969                             end,
970                             KW_IMPLIED)) {
971       state->handler = attlist1;
972       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
973     }
974     if (XmlNameMatchesAscii(enc,
975                             ptr + MIN_BYTES_PER_CHAR(enc),
976                             end,
977                             KW_REQUIRED)) {
978       state->handler = attlist1;
979       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
980     }
981     if (XmlNameMatchesAscii(enc,
982                             ptr + MIN_BYTES_PER_CHAR(enc),
983                             end,
984                             KW_FIXED)) {
985       state->handler = attlist9;
986       return XML_ROLE_ATTLIST_NONE;
987     }
988     break;
989   case XML_TOK_LITERAL:
990     state->handler = attlist1;
991     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
992   }
993   return common(state, tok);
994 }
995 
996 static int PTRCALL
attlist9(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))997 attlist9(PROLOG_STATE *state,
998          int tok,
999          const char *UNUSED_P(ptr),
1000          const char *UNUSED_P(end),
1001          const ENCODING *UNUSED_P(enc))
1002 {
1003   switch (tok) {
1004   case XML_TOK_PROLOG_S:
1005     return XML_ROLE_ATTLIST_NONE;
1006   case XML_TOK_LITERAL:
1007     state->handler = attlist1;
1008     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
1009   }
1010   return common(state, tok);
1011 }
1012 
1013 static int PTRCALL
element0(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1014 element0(PROLOG_STATE *state,
1015          int tok,
1016          const char *UNUSED_P(ptr),
1017          const char *UNUSED_P(end),
1018          const ENCODING *UNUSED_P(enc))
1019 {
1020   switch (tok) {
1021   case XML_TOK_PROLOG_S:
1022     return XML_ROLE_ELEMENT_NONE;
1023   case XML_TOK_NAME:
1024   case XML_TOK_PREFIXED_NAME:
1025     state->handler = element1;
1026     return XML_ROLE_ELEMENT_NAME;
1027   }
1028   return common(state, tok);
1029 }
1030 
1031 static int PTRCALL
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1032 element1(PROLOG_STATE *state,
1033          int tok,
1034          const char *ptr,
1035          const char *end,
1036          const ENCODING *enc)
1037 {
1038   switch (tok) {
1039   case XML_TOK_PROLOG_S:
1040     return XML_ROLE_ELEMENT_NONE;
1041   case XML_TOK_NAME:
1042     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1043       state->handler = declClose;
1044       state->role_none = XML_ROLE_ELEMENT_NONE;
1045       return XML_ROLE_CONTENT_EMPTY;
1046     }
1047     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1048       state->handler = declClose;
1049       state->role_none = XML_ROLE_ELEMENT_NONE;
1050       return XML_ROLE_CONTENT_ANY;
1051     }
1052     break;
1053   case XML_TOK_OPEN_PAREN:
1054     state->handler = element2;
1055     state->level = 1;
1056     return XML_ROLE_GROUP_OPEN;
1057   }
1058   return common(state, tok);
1059 }
1060 
1061 static int PTRCALL
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1062 element2(PROLOG_STATE *state,
1063          int tok,
1064          const char *ptr,
1065          const char *end,
1066          const ENCODING *enc)
1067 {
1068   switch (tok) {
1069   case XML_TOK_PROLOG_S:
1070     return XML_ROLE_ELEMENT_NONE;
1071   case XML_TOK_POUND_NAME:
1072     if (XmlNameMatchesAscii(enc,
1073                             ptr + MIN_BYTES_PER_CHAR(enc),
1074                             end,
1075                             KW_PCDATA)) {
1076       state->handler = element3;
1077       return XML_ROLE_CONTENT_PCDATA;
1078     }
1079     break;
1080   case XML_TOK_OPEN_PAREN:
1081     state->level = 2;
1082     state->handler = element6;
1083     return XML_ROLE_GROUP_OPEN;
1084   case XML_TOK_NAME:
1085   case XML_TOK_PREFIXED_NAME:
1086     state->handler = element7;
1087     return XML_ROLE_CONTENT_ELEMENT;
1088   case XML_TOK_NAME_QUESTION:
1089     state->handler = element7;
1090     return XML_ROLE_CONTENT_ELEMENT_OPT;
1091   case XML_TOK_NAME_ASTERISK:
1092     state->handler = element7;
1093     return XML_ROLE_CONTENT_ELEMENT_REP;
1094   case XML_TOK_NAME_PLUS:
1095     state->handler = element7;
1096     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1097   }
1098   return common(state, tok);
1099 }
1100 
1101 static int PTRCALL
element3(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1102 element3(PROLOG_STATE *state,
1103          int tok,
1104          const char *UNUSED_P(ptr),
1105          const char *UNUSED_P(end),
1106          const ENCODING *UNUSED_P(enc))
1107 {
1108   switch (tok) {
1109   case XML_TOK_PROLOG_S:
1110     return XML_ROLE_ELEMENT_NONE;
1111   case XML_TOK_CLOSE_PAREN:
1112     state->handler = declClose;
1113     state->role_none = XML_ROLE_ELEMENT_NONE;
1114     return XML_ROLE_GROUP_CLOSE;
1115   case XML_TOK_CLOSE_PAREN_ASTERISK:
1116     state->handler = declClose;
1117     state->role_none = XML_ROLE_ELEMENT_NONE;
1118     return XML_ROLE_GROUP_CLOSE_REP;
1119   case XML_TOK_OR:
1120     state->handler = element4;
1121     return XML_ROLE_ELEMENT_NONE;
1122   }
1123   return common(state, tok);
1124 }
1125 
1126 static int PTRCALL
element4(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1127 element4(PROLOG_STATE *state,
1128          int tok,
1129          const char *UNUSED_P(ptr),
1130          const char *UNUSED_P(end),
1131          const ENCODING *UNUSED_P(enc))
1132 {
1133   switch (tok) {
1134   case XML_TOK_PROLOG_S:
1135     return XML_ROLE_ELEMENT_NONE;
1136   case XML_TOK_NAME:
1137   case XML_TOK_PREFIXED_NAME:
1138     state->handler = element5;
1139     return XML_ROLE_CONTENT_ELEMENT;
1140   }
1141   return common(state, tok);
1142 }
1143 
1144 static int PTRCALL
element5(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1145 element5(PROLOG_STATE *state,
1146          int tok,
1147          const char *UNUSED_P(ptr),
1148          const char *UNUSED_P(end),
1149          const ENCODING *UNUSED_P(enc))
1150 {
1151   switch (tok) {
1152   case XML_TOK_PROLOG_S:
1153     return XML_ROLE_ELEMENT_NONE;
1154   case XML_TOK_CLOSE_PAREN_ASTERISK:
1155     state->handler = declClose;
1156     state->role_none = XML_ROLE_ELEMENT_NONE;
1157     return XML_ROLE_GROUP_CLOSE_REP;
1158   case XML_TOK_OR:
1159     state->handler = element4;
1160     return XML_ROLE_ELEMENT_NONE;
1161   }
1162   return common(state, tok);
1163 }
1164 
1165 static int PTRCALL
element6(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1166 element6(PROLOG_STATE *state,
1167          int tok,
1168          const char *UNUSED_P(ptr),
1169          const char *UNUSED_P(end),
1170          const ENCODING *UNUSED_P(enc))
1171 {
1172   switch (tok) {
1173   case XML_TOK_PROLOG_S:
1174     return XML_ROLE_ELEMENT_NONE;
1175   case XML_TOK_OPEN_PAREN:
1176     state->level += 1;
1177     return XML_ROLE_GROUP_OPEN;
1178   case XML_TOK_NAME:
1179   case XML_TOK_PREFIXED_NAME:
1180     state->handler = element7;
1181     return XML_ROLE_CONTENT_ELEMENT;
1182   case XML_TOK_NAME_QUESTION:
1183     state->handler = element7;
1184     return XML_ROLE_CONTENT_ELEMENT_OPT;
1185   case XML_TOK_NAME_ASTERISK:
1186     state->handler = element7;
1187     return XML_ROLE_CONTENT_ELEMENT_REP;
1188   case XML_TOK_NAME_PLUS:
1189     state->handler = element7;
1190     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1191   }
1192   return common(state, tok);
1193 }
1194 
1195 static int PTRCALL
element7(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1196 element7(PROLOG_STATE *state,
1197          int tok,
1198          const char *UNUSED_P(ptr),
1199          const char *UNUSED_P(end),
1200          const ENCODING *UNUSED_P(enc))
1201 {
1202   switch (tok) {
1203   case XML_TOK_PROLOG_S:
1204     return XML_ROLE_ELEMENT_NONE;
1205   case XML_TOK_CLOSE_PAREN:
1206     state->level -= 1;
1207     if (state->level == 0) {
1208       state->handler = declClose;
1209       state->role_none = XML_ROLE_ELEMENT_NONE;
1210     }
1211     return XML_ROLE_GROUP_CLOSE;
1212   case XML_TOK_CLOSE_PAREN_ASTERISK:
1213     state->level -= 1;
1214     if (state->level == 0) {
1215       state->handler = declClose;
1216       state->role_none = XML_ROLE_ELEMENT_NONE;
1217     }
1218     return XML_ROLE_GROUP_CLOSE_REP;
1219   case XML_TOK_CLOSE_PAREN_QUESTION:
1220     state->level -= 1;
1221     if (state->level == 0) {
1222       state->handler = declClose;
1223       state->role_none = XML_ROLE_ELEMENT_NONE;
1224     }
1225     return XML_ROLE_GROUP_CLOSE_OPT;
1226   case XML_TOK_CLOSE_PAREN_PLUS:
1227     state->level -= 1;
1228     if (state->level == 0) {
1229       state->handler = declClose;
1230       state->role_none = XML_ROLE_ELEMENT_NONE;
1231     }
1232     return XML_ROLE_GROUP_CLOSE_PLUS;
1233   case XML_TOK_COMMA:
1234     state->handler = element6;
1235     return XML_ROLE_GROUP_SEQUENCE;
1236   case XML_TOK_OR:
1237     state->handler = element6;
1238     return XML_ROLE_GROUP_CHOICE;
1239   }
1240   return common(state, tok);
1241 }
1242 
1243 #ifdef XML_DTD
1244 
1245 static int PTRCALL
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1246 condSect0(PROLOG_STATE *state,
1247           int tok,
1248           const char *ptr,
1249           const char *end,
1250           const ENCODING *enc)
1251 {
1252   switch (tok) {
1253   case XML_TOK_PROLOG_S:
1254     return XML_ROLE_NONE;
1255   case XML_TOK_NAME:
1256     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1257       state->handler = condSect1;
1258       return XML_ROLE_NONE;
1259     }
1260     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1261       state->handler = condSect2;
1262       return XML_ROLE_NONE;
1263     }
1264     break;
1265   }
1266   return common(state, tok);
1267 }
1268 
1269 static int PTRCALL
condSect1(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1270 condSect1(PROLOG_STATE *state,
1271           int tok,
1272           const char *UNUSED_P(ptr),
1273           const char *UNUSED_P(end),
1274           const ENCODING *UNUSED_P(enc))
1275 {
1276   switch (tok) {
1277   case XML_TOK_PROLOG_S:
1278     return XML_ROLE_NONE;
1279   case XML_TOK_OPEN_BRACKET:
1280     state->handler = externalSubset1;
1281     state->includeLevel += 1;
1282     return XML_ROLE_NONE;
1283   }
1284   return common(state, tok);
1285 }
1286 
1287 static int PTRCALL
condSect2(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1288 condSect2(PROLOG_STATE *state,
1289           int tok,
1290           const char *UNUSED_P(ptr),
1291           const char *UNUSED_P(end),
1292           const ENCODING *UNUSED_P(enc))
1293 {
1294   switch (tok) {
1295   case XML_TOK_PROLOG_S:
1296     return XML_ROLE_NONE;
1297   case XML_TOK_OPEN_BRACKET:
1298     state->handler = externalSubset1;
1299     return XML_ROLE_IGNORE_SECT;
1300   }
1301   return common(state, tok);
1302 }
1303 
1304 #endif /* XML_DTD */
1305 
1306 static int PTRCALL
declClose(PROLOG_STATE * state,int tok,const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1307 declClose(PROLOG_STATE *state,
1308           int tok,
1309           const char *UNUSED_P(ptr),
1310           const char *UNUSED_P(end),
1311           const ENCODING *UNUSED_P(enc))
1312 {
1313   switch (tok) {
1314   case XML_TOK_PROLOG_S:
1315     return state->role_none;
1316   case XML_TOK_DECL_CLOSE:
1317     setTopLevel(state);
1318     return state->role_none;
1319   }
1320   return common(state, tok);
1321 }
1322 
1323 /* This function will only be invoked if the internal logic of the
1324  * parser has broken down.  It is used in two cases:
1325  *
1326  * 1: When the XML prolog has been finished.  At this point the
1327  * processor (the parser level above these role handlers) should
1328  * switch from prologProcessor to contentProcessor and reinitialise
1329  * the handler function.
1330  *
1331  * 2: When an error has been detected (via common() below).  At this
1332  * point again the processor should be switched to errorProcessor,
1333  * which will never call a handler.
1334  *
1335  * The result of this is that error() can only be called if the
1336  * processor switch failed to happen, which is an internal error and
1337  * therefore we shouldn't be able to provoke it simply by using the
1338  * library.  It is a necessary backstop, however, so we merely exclude
1339  * it from the coverage statistics.
1340  *
1341  * LCOV_EXCL_START
1342  */
1343 static int PTRCALL
error(PROLOG_STATE * UNUSED_P (state),int UNUSED_P (tok),const char * UNUSED_P (ptr),const char * UNUSED_P (end),const ENCODING * UNUSED_P (enc))1344 error(PROLOG_STATE *UNUSED_P(state),
1345       int UNUSED_P(tok),
1346       const char *UNUSED_P(ptr),
1347       const char *UNUSED_P(end),
1348       const ENCODING *UNUSED_P(enc))
1349 {
1350   return XML_ROLE_NONE;
1351 }
1352 /* LCOV_EXCL_STOP */
1353 
1354 static int FASTCALL
common(PROLOG_STATE * state,int tok)1355 common(PROLOG_STATE *state, int tok)
1356 {
1357 #ifdef XML_DTD
1358   if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1359     return XML_ROLE_INNER_PARAM_ENTITY_REF;
1360 #endif
1361   state->handler = error;
1362   return XML_ROLE_ERROR;
1363 }
1364 
1365 void
XmlPrologStateInit(PROLOG_STATE * state)1366 XmlPrologStateInit(PROLOG_STATE *state)
1367 {
1368   state->handler = prolog0;
1369 #ifdef XML_DTD
1370   state->documentEntity = 1;
1371   state->includeLevel = 0;
1372   state->inEntityValue = 0;
1373 #endif /* XML_DTD */
1374 }
1375 
1376 #ifdef XML_DTD
1377 
1378 void
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1379 XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1380 {
1381   state->handler = externalSubset0;
1382   state->documentEntity = 0;
1383   state->includeLevel = 0;
1384 }
1385 
1386 #endif /* XML_DTD */
1387