1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2002      Greg Stein <gstein@users.sourceforge.net>
12    Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
14    Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
15    Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
16    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
17    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
18    Licensed under the MIT license:
19 
20    Permission is  hereby granted,  free of charge,  to any  person obtaining
21    a  copy  of  this  software   and  associated  documentation  files  (the
22    "Software"),  to  deal in  the  Software  without restriction,  including
23    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24    distribute, sublicense, and/or sell copies of the Software, and to permit
25    persons  to whom  the Software  is  furnished to  do so,  subject to  the
26    following conditions:
27 
28    The above copyright  notice and this permission notice  shall be included
29    in all copies or substantial portions of the Software.
30 
31    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37    USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39 
40 #include <stddef.h>
41 
42 #ifdef _WIN32
43 #  include "winconfig.h"
44 #endif
45 
46 #include <expat_config.h>
47 
48 #include "expat_external.h"
49 #include "internal.h"
50 #include "xmlrole.h"
51 #include "ascii.h"
52 
53 /* Doesn't check:
54 
55  that ,| are not mixed in a model group
56  content of literals
57 
58 */
59 
60 static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'};
61 static const char KW_ATTLIST[]
62     = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'};
63 static const char KW_CDATA[]
64     = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
65 static const char KW_DOCTYPE[]
66     = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'};
67 static const char KW_ELEMENT[]
68     = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'};
69 static const char KW_EMPTY[]
70     = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'};
71 static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
72                                    ASCII_I, ASCII_E, ASCII_S, '\0'};
73 static const char KW_ENTITY[]
74     = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
75 static const char KW_FIXED[]
76     = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'};
77 static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'};
78 static const char KW_IDREF[]
79     = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
80 static const char KW_IDREFS[]
81     = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
82 #ifdef XML_DTD
83 static const char KW_IGNORE[]
84     = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'};
85 #endif
86 static const char KW_IMPLIED[]
87     = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'};
88 #ifdef XML_DTD
89 static const char KW_INCLUDE[]
90     = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'};
91 #endif
92 static const char KW_NDATA[]
93     = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
94 static const char KW_NMTOKEN[]
95     = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
96 static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
97                                    ASCII_E, ASCII_N, ASCII_S, '\0'};
98 static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
99                                    ASCII_I, ASCII_O, ASCII_N, '\0'};
100 static const char KW_PCDATA[]
101     = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
102 static const char KW_PUBLIC[]
103     = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'};
104 static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I,
105                                    ASCII_R, ASCII_E, ASCII_D, '\0'};
106 static const char KW_SYSTEM[]
107     = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'};
108 
109 #ifndef MIN_BYTES_PER_CHAR
110 #  define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
111 #endif
112 
113 #ifdef XML_DTD
114 #  define setTopLevel(state)                                                   \
115     ((state)->handler                                                          \
116      = ((state)->documentEntity ? internalSubset : externalSubset1))
117 #else /* not XML_DTD */
118 #  define setTopLevel(state) ((state)->handler = internalSubset)
119 #endif /* not XML_DTD */
120 
121 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok,
122                                    const char *ptr, const char *end,
123                                    const ENCODING *enc);
124 
125 static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2,
126     doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2,
127     entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10,
128     notation0, notation1, notation2, notation3, notation4, attlist0, attlist1,
129     attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8,
130     attlist9, element0, element1, element2, element3, element4, element5,
131     element6, element7,
132 #ifdef XML_DTD
133     externalSubset0, externalSubset1, condSect0, condSect1, condSect2,
134 #endif /* XML_DTD */
135     declClose, error;
136 
137 static int FASTCALL common(PROLOG_STATE *state, int tok);
138 
139 static int PTRCALL
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)140 prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
141         const ENCODING *enc) {
142   switch (tok) {
143   case XML_TOK_PROLOG_S:
144     state->handler = prolog1;
145     return XML_ROLE_NONE;
146   case XML_TOK_XML_DECL:
147     state->handler = prolog1;
148     return XML_ROLE_XML_DECL;
149   case XML_TOK_PI:
150     state->handler = prolog1;
151     return XML_ROLE_PI;
152   case XML_TOK_COMMENT:
153     state->handler = prolog1;
154     return XML_ROLE_COMMENT;
155   case XML_TOK_BOM:
156     return XML_ROLE_NONE;
157   case XML_TOK_DECL_OPEN:
158     if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
159                               KW_DOCTYPE))
160       break;
161     state->handler = doctype0;
162     return XML_ROLE_DOCTYPE_NONE;
163   case XML_TOK_INSTANCE_START:
164     state->handler = error;
165     return XML_ROLE_INSTANCE_START;
166   }
167   return common(state, tok);
168 }
169 
170 static int PTRCALL
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)171 prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
172         const ENCODING *enc) {
173   switch (tok) {
174   case XML_TOK_PROLOG_S:
175     return XML_ROLE_NONE;
176   case XML_TOK_PI:
177     return XML_ROLE_PI;
178   case XML_TOK_COMMENT:
179     return XML_ROLE_COMMENT;
180   case XML_TOK_BOM:
181     /* This case can never arise.  To reach this role function, the
182      * parse must have passed through prolog0 and therefore have had
183      * some form of input, even if only a space.  At that point, a
184      * byte order mark is no longer a valid character (though
185      * technically it should be interpreted as a non-breaking space),
186      * so will be rejected by the tokenizing stages.
187      */
188     return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
189   case XML_TOK_DECL_OPEN:
190     if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
191                               KW_DOCTYPE))
192       break;
193     state->handler = doctype0;
194     return XML_ROLE_DOCTYPE_NONE;
195   case XML_TOK_INSTANCE_START:
196     state->handler = error;
197     return XML_ROLE_INSTANCE_START;
198   }
199   return common(state, tok);
200 }
201 
202 static int PTRCALL
prolog2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)203 prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
204         const ENCODING *enc) {
205   UNUSED_P(ptr);
206   UNUSED_P(end);
207   UNUSED_P(enc);
208   switch (tok) {
209   case XML_TOK_PROLOG_S:
210     return XML_ROLE_NONE;
211   case XML_TOK_PI:
212     return XML_ROLE_PI;
213   case XML_TOK_COMMENT:
214     return XML_ROLE_COMMENT;
215   case XML_TOK_INSTANCE_START:
216     state->handler = error;
217     return XML_ROLE_INSTANCE_START;
218   }
219   return common(state, tok);
220 }
221 
222 static int PTRCALL
doctype0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)223 doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
224          const ENCODING *enc) {
225   UNUSED_P(ptr);
226   UNUSED_P(end);
227   UNUSED_P(enc);
228   switch (tok) {
229   case XML_TOK_PROLOG_S:
230     return XML_ROLE_DOCTYPE_NONE;
231   case XML_TOK_NAME:
232   case XML_TOK_PREFIXED_NAME:
233     state->handler = doctype1;
234     return XML_ROLE_DOCTYPE_NAME;
235   }
236   return common(state, tok);
237 }
238 
239 static int PTRCALL
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)240 doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
241          const ENCODING *enc) {
242   switch (tok) {
243   case XML_TOK_PROLOG_S:
244     return XML_ROLE_DOCTYPE_NONE;
245   case XML_TOK_OPEN_BRACKET:
246     state->handler = internalSubset;
247     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
248   case XML_TOK_DECL_CLOSE:
249     state->handler = prolog2;
250     return XML_ROLE_DOCTYPE_CLOSE;
251   case XML_TOK_NAME:
252     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
253       state->handler = doctype3;
254       return XML_ROLE_DOCTYPE_NONE;
255     }
256     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
257       state->handler = doctype2;
258       return XML_ROLE_DOCTYPE_NONE;
259     }
260     break;
261   }
262   return common(state, tok);
263 }
264 
265 static int PTRCALL
doctype2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)266 doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
267          const ENCODING *enc) {
268   UNUSED_P(ptr);
269   UNUSED_P(end);
270   UNUSED_P(enc);
271   switch (tok) {
272   case XML_TOK_PROLOG_S:
273     return XML_ROLE_DOCTYPE_NONE;
274   case XML_TOK_LITERAL:
275     state->handler = doctype3;
276     return XML_ROLE_DOCTYPE_PUBLIC_ID;
277   }
278   return common(state, tok);
279 }
280 
281 static int PTRCALL
doctype3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)282 doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
283          const ENCODING *enc) {
284   UNUSED_P(ptr);
285   UNUSED_P(end);
286   UNUSED_P(enc);
287   switch (tok) {
288   case XML_TOK_PROLOG_S:
289     return XML_ROLE_DOCTYPE_NONE;
290   case XML_TOK_LITERAL:
291     state->handler = doctype4;
292     return XML_ROLE_DOCTYPE_SYSTEM_ID;
293   }
294   return common(state, tok);
295 }
296 
297 static int PTRCALL
doctype4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)298 doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
299          const ENCODING *enc) {
300   UNUSED_P(ptr);
301   UNUSED_P(end);
302   UNUSED_P(enc);
303   switch (tok) {
304   case XML_TOK_PROLOG_S:
305     return XML_ROLE_DOCTYPE_NONE;
306   case XML_TOK_OPEN_BRACKET:
307     state->handler = internalSubset;
308     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
309   case XML_TOK_DECL_CLOSE:
310     state->handler = prolog2;
311     return XML_ROLE_DOCTYPE_CLOSE;
312   }
313   return common(state, tok);
314 }
315 
316 static int PTRCALL
doctype5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)317 doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
318          const ENCODING *enc) {
319   UNUSED_P(ptr);
320   UNUSED_P(end);
321   UNUSED_P(enc);
322   switch (tok) {
323   case XML_TOK_PROLOG_S:
324     return XML_ROLE_DOCTYPE_NONE;
325   case XML_TOK_DECL_CLOSE:
326     state->handler = prolog2;
327     return XML_ROLE_DOCTYPE_CLOSE;
328   }
329   return common(state, tok);
330 }
331 
332 static int PTRCALL
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)333 internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
334                const ENCODING *enc) {
335   switch (tok) {
336   case XML_TOK_PROLOG_S:
337     return XML_ROLE_NONE;
338   case XML_TOK_DECL_OPEN:
339     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
340                             KW_ENTITY)) {
341       state->handler = entity0;
342       return XML_ROLE_ENTITY_NONE;
343     }
344     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
345                             KW_ATTLIST)) {
346       state->handler = attlist0;
347       return XML_ROLE_ATTLIST_NONE;
348     }
349     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
350                             KW_ELEMENT)) {
351       state->handler = element0;
352       return XML_ROLE_ELEMENT_NONE;
353     }
354     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
355                             KW_NOTATION)) {
356       state->handler = notation0;
357       return XML_ROLE_NOTATION_NONE;
358     }
359     break;
360   case XML_TOK_PI:
361     return XML_ROLE_PI;
362   case XML_TOK_COMMENT:
363     return XML_ROLE_COMMENT;
364   case XML_TOK_PARAM_ENTITY_REF:
365     return XML_ROLE_PARAM_ENTITY_REF;
366   case XML_TOK_CLOSE_BRACKET:
367     state->handler = doctype5;
368     return XML_ROLE_DOCTYPE_NONE;
369   case XML_TOK_NONE:
370     return XML_ROLE_NONE;
371   }
372   return common(state, tok);
373 }
374 
375 #ifdef XML_DTD
376 
377 static int PTRCALL
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)378 externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
379                 const ENCODING *enc) {
380   state->handler = externalSubset1;
381   if (tok == XML_TOK_XML_DECL)
382     return XML_ROLE_TEXT_DECL;
383   return externalSubset1(state, tok, ptr, end, enc);
384 }
385 
386 static int PTRCALL
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)387 externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
388                 const ENCODING *enc) {
389   switch (tok) {
390   case XML_TOK_COND_SECT_OPEN:
391     state->handler = condSect0;
392     return XML_ROLE_NONE;
393   case XML_TOK_COND_SECT_CLOSE:
394     if (state->includeLevel == 0)
395       break;
396     state->includeLevel -= 1;
397     return XML_ROLE_NONE;
398   case XML_TOK_PROLOG_S:
399     return XML_ROLE_NONE;
400   case XML_TOK_CLOSE_BRACKET:
401     break;
402   case XML_TOK_NONE:
403     if (state->includeLevel)
404       break;
405     return XML_ROLE_NONE;
406   default:
407     return internalSubset(state, tok, ptr, end, enc);
408   }
409   return common(state, tok);
410 }
411 
412 #endif /* XML_DTD */
413 
414 static int PTRCALL
entity0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)415 entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
416         const ENCODING *enc) {
417   UNUSED_P(ptr);
418   UNUSED_P(end);
419   UNUSED_P(enc);
420   switch (tok) {
421   case XML_TOK_PROLOG_S:
422     return XML_ROLE_ENTITY_NONE;
423   case XML_TOK_PERCENT:
424     state->handler = entity1;
425     return XML_ROLE_ENTITY_NONE;
426   case XML_TOK_NAME:
427     state->handler = entity2;
428     return XML_ROLE_GENERAL_ENTITY_NAME;
429   }
430   return common(state, tok);
431 }
432 
433 static int PTRCALL
entity1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)434 entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
435         const ENCODING *enc) {
436   UNUSED_P(ptr);
437   UNUSED_P(end);
438   UNUSED_P(enc);
439   switch (tok) {
440   case XML_TOK_PROLOG_S:
441     return XML_ROLE_ENTITY_NONE;
442   case XML_TOK_NAME:
443     state->handler = entity7;
444     return XML_ROLE_PARAM_ENTITY_NAME;
445   }
446   return common(state, tok);
447 }
448 
449 static int PTRCALL
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)450 entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
451         const ENCODING *enc) {
452   switch (tok) {
453   case XML_TOK_PROLOG_S:
454     return XML_ROLE_ENTITY_NONE;
455   case XML_TOK_NAME:
456     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
457       state->handler = entity4;
458       return XML_ROLE_ENTITY_NONE;
459     }
460     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
461       state->handler = entity3;
462       return XML_ROLE_ENTITY_NONE;
463     }
464     break;
465   case XML_TOK_LITERAL:
466     state->handler = declClose;
467     state->role_none = XML_ROLE_ENTITY_NONE;
468     return XML_ROLE_ENTITY_VALUE;
469   }
470   return common(state, tok);
471 }
472 
473 static int PTRCALL
entity3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)474 entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
475         const ENCODING *enc) {
476   UNUSED_P(ptr);
477   UNUSED_P(end);
478   UNUSED_P(enc);
479   switch (tok) {
480   case XML_TOK_PROLOG_S:
481     return XML_ROLE_ENTITY_NONE;
482   case XML_TOK_LITERAL:
483     state->handler = entity4;
484     return XML_ROLE_ENTITY_PUBLIC_ID;
485   }
486   return common(state, tok);
487 }
488 
489 static int PTRCALL
entity4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)490 entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
491         const ENCODING *enc) {
492   UNUSED_P(ptr);
493   UNUSED_P(end);
494   UNUSED_P(enc);
495   switch (tok) {
496   case XML_TOK_PROLOG_S:
497     return XML_ROLE_ENTITY_NONE;
498   case XML_TOK_LITERAL:
499     state->handler = entity5;
500     return XML_ROLE_ENTITY_SYSTEM_ID;
501   }
502   return common(state, tok);
503 }
504 
505 static int PTRCALL
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)506 entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
507         const ENCODING *enc) {
508   switch (tok) {
509   case XML_TOK_PROLOG_S:
510     return XML_ROLE_ENTITY_NONE;
511   case XML_TOK_DECL_CLOSE:
512     setTopLevel(state);
513     return XML_ROLE_ENTITY_COMPLETE;
514   case XML_TOK_NAME:
515     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
516       state->handler = entity6;
517       return XML_ROLE_ENTITY_NONE;
518     }
519     break;
520   }
521   return common(state, tok);
522 }
523 
524 static int PTRCALL
entity6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)525 entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
526         const ENCODING *enc) {
527   UNUSED_P(ptr);
528   UNUSED_P(end);
529   UNUSED_P(enc);
530   switch (tok) {
531   case XML_TOK_PROLOG_S:
532     return XML_ROLE_ENTITY_NONE;
533   case XML_TOK_NAME:
534     state->handler = declClose;
535     state->role_none = XML_ROLE_ENTITY_NONE;
536     return XML_ROLE_ENTITY_NOTATION_NAME;
537   }
538   return common(state, tok);
539 }
540 
541 static int PTRCALL
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)542 entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
543         const ENCODING *enc) {
544   switch (tok) {
545   case XML_TOK_PROLOG_S:
546     return XML_ROLE_ENTITY_NONE;
547   case XML_TOK_NAME:
548     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
549       state->handler = entity9;
550       return XML_ROLE_ENTITY_NONE;
551     }
552     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
553       state->handler = entity8;
554       return XML_ROLE_ENTITY_NONE;
555     }
556     break;
557   case XML_TOK_LITERAL:
558     state->handler = declClose;
559     state->role_none = XML_ROLE_ENTITY_NONE;
560     return XML_ROLE_ENTITY_VALUE;
561   }
562   return common(state, tok);
563 }
564 
565 static int PTRCALL
entity8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)566 entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
567         const ENCODING *enc) {
568   UNUSED_P(ptr);
569   UNUSED_P(end);
570   UNUSED_P(enc);
571   switch (tok) {
572   case XML_TOK_PROLOG_S:
573     return XML_ROLE_ENTITY_NONE;
574   case XML_TOK_LITERAL:
575     state->handler = entity9;
576     return XML_ROLE_ENTITY_PUBLIC_ID;
577   }
578   return common(state, tok);
579 }
580 
581 static int PTRCALL
entity9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)582 entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
583         const ENCODING *enc) {
584   UNUSED_P(ptr);
585   UNUSED_P(end);
586   UNUSED_P(enc);
587   switch (tok) {
588   case XML_TOK_PROLOG_S:
589     return XML_ROLE_ENTITY_NONE;
590   case XML_TOK_LITERAL:
591     state->handler = entity10;
592     return XML_ROLE_ENTITY_SYSTEM_ID;
593   }
594   return common(state, tok);
595 }
596 
597 static int PTRCALL
entity10(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)598 entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
599          const ENCODING *enc) {
600   UNUSED_P(ptr);
601   UNUSED_P(end);
602   UNUSED_P(enc);
603   switch (tok) {
604   case XML_TOK_PROLOG_S:
605     return XML_ROLE_ENTITY_NONE;
606   case XML_TOK_DECL_CLOSE:
607     setTopLevel(state);
608     return XML_ROLE_ENTITY_COMPLETE;
609   }
610   return common(state, tok);
611 }
612 
613 static int PTRCALL
notation0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)614 notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
615           const ENCODING *enc) {
616   UNUSED_P(ptr);
617   UNUSED_P(end);
618   UNUSED_P(enc);
619   switch (tok) {
620   case XML_TOK_PROLOG_S:
621     return XML_ROLE_NOTATION_NONE;
622   case XML_TOK_NAME:
623     state->handler = notation1;
624     return XML_ROLE_NOTATION_NAME;
625   }
626   return common(state, tok);
627 }
628 
629 static int PTRCALL
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)630 notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
631           const ENCODING *enc) {
632   switch (tok) {
633   case XML_TOK_PROLOG_S:
634     return XML_ROLE_NOTATION_NONE;
635   case XML_TOK_NAME:
636     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
637       state->handler = notation3;
638       return XML_ROLE_NOTATION_NONE;
639     }
640     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
641       state->handler = notation2;
642       return XML_ROLE_NOTATION_NONE;
643     }
644     break;
645   }
646   return common(state, tok);
647 }
648 
649 static int PTRCALL
notation2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)650 notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
651           const ENCODING *enc) {
652   UNUSED_P(ptr);
653   UNUSED_P(end);
654   UNUSED_P(enc);
655   switch (tok) {
656   case XML_TOK_PROLOG_S:
657     return XML_ROLE_NOTATION_NONE;
658   case XML_TOK_LITERAL:
659     state->handler = notation4;
660     return XML_ROLE_NOTATION_PUBLIC_ID;
661   }
662   return common(state, tok);
663 }
664 
665 static int PTRCALL
notation3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)666 notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
667           const ENCODING *enc) {
668   UNUSED_P(ptr);
669   UNUSED_P(end);
670   UNUSED_P(enc);
671   switch (tok) {
672   case XML_TOK_PROLOG_S:
673     return XML_ROLE_NOTATION_NONE;
674   case XML_TOK_LITERAL:
675     state->handler = declClose;
676     state->role_none = XML_ROLE_NOTATION_NONE;
677     return XML_ROLE_NOTATION_SYSTEM_ID;
678   }
679   return common(state, tok);
680 }
681 
682 static int PTRCALL
notation4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)683 notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
684           const ENCODING *enc) {
685   UNUSED_P(ptr);
686   UNUSED_P(end);
687   UNUSED_P(enc);
688   switch (tok) {
689   case XML_TOK_PROLOG_S:
690     return XML_ROLE_NOTATION_NONE;
691   case XML_TOK_LITERAL:
692     state->handler = declClose;
693     state->role_none = XML_ROLE_NOTATION_NONE;
694     return XML_ROLE_NOTATION_SYSTEM_ID;
695   case XML_TOK_DECL_CLOSE:
696     setTopLevel(state);
697     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
698   }
699   return common(state, tok);
700 }
701 
702 static int PTRCALL
attlist0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)703 attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
704          const ENCODING *enc) {
705   UNUSED_P(ptr);
706   UNUSED_P(end);
707   UNUSED_P(enc);
708   switch (tok) {
709   case XML_TOK_PROLOG_S:
710     return XML_ROLE_ATTLIST_NONE;
711   case XML_TOK_NAME:
712   case XML_TOK_PREFIXED_NAME:
713     state->handler = attlist1;
714     return XML_ROLE_ATTLIST_ELEMENT_NAME;
715   }
716   return common(state, tok);
717 }
718 
719 static int PTRCALL
attlist1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)720 attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
721          const ENCODING *enc) {
722   UNUSED_P(ptr);
723   UNUSED_P(end);
724   UNUSED_P(enc);
725   switch (tok) {
726   case XML_TOK_PROLOG_S:
727     return XML_ROLE_ATTLIST_NONE;
728   case XML_TOK_DECL_CLOSE:
729     setTopLevel(state);
730     return XML_ROLE_ATTLIST_NONE;
731   case XML_TOK_NAME:
732   case XML_TOK_PREFIXED_NAME:
733     state->handler = attlist2;
734     return XML_ROLE_ATTRIBUTE_NAME;
735   }
736   return common(state, tok);
737 }
738 
739 static int PTRCALL
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)740 attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
741          const ENCODING *enc) {
742   switch (tok) {
743   case XML_TOK_PROLOG_S:
744     return XML_ROLE_ATTLIST_NONE;
745   case XML_TOK_NAME: {
746     static const char *const types[] = {
747         KW_CDATA,  KW_ID,       KW_IDREF,   KW_IDREFS,
748         KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS,
749     };
750     int i;
751     for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++)
752       if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
753         state->handler = attlist8;
754         return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
755       }
756   }
757     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
758       state->handler = attlist5;
759       return XML_ROLE_ATTLIST_NONE;
760     }
761     break;
762   case XML_TOK_OPEN_PAREN:
763     state->handler = attlist3;
764     return XML_ROLE_ATTLIST_NONE;
765   }
766   return common(state, tok);
767 }
768 
769 static int PTRCALL
attlist3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)770 attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
771          const ENCODING *enc) {
772   UNUSED_P(ptr);
773   UNUSED_P(end);
774   UNUSED_P(enc);
775   switch (tok) {
776   case XML_TOK_PROLOG_S:
777     return XML_ROLE_ATTLIST_NONE;
778   case XML_TOK_NMTOKEN:
779   case XML_TOK_NAME:
780   case XML_TOK_PREFIXED_NAME:
781     state->handler = attlist4;
782     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
783   }
784   return common(state, tok);
785 }
786 
787 static int PTRCALL
attlist4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)788 attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
789          const ENCODING *enc) {
790   UNUSED_P(ptr);
791   UNUSED_P(end);
792   UNUSED_P(enc);
793   switch (tok) {
794   case XML_TOK_PROLOG_S:
795     return XML_ROLE_ATTLIST_NONE;
796   case XML_TOK_CLOSE_PAREN:
797     state->handler = attlist8;
798     return XML_ROLE_ATTLIST_NONE;
799   case XML_TOK_OR:
800     state->handler = attlist3;
801     return XML_ROLE_ATTLIST_NONE;
802   }
803   return common(state, tok);
804 }
805 
806 static int PTRCALL
attlist5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)807 attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
808          const ENCODING *enc) {
809   UNUSED_P(ptr);
810   UNUSED_P(end);
811   UNUSED_P(enc);
812   switch (tok) {
813   case XML_TOK_PROLOG_S:
814     return XML_ROLE_ATTLIST_NONE;
815   case XML_TOK_OPEN_PAREN:
816     state->handler = attlist6;
817     return XML_ROLE_ATTLIST_NONE;
818   }
819   return common(state, tok);
820 }
821 
822 static int PTRCALL
attlist6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)823 attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
824          const ENCODING *enc) {
825   UNUSED_P(ptr);
826   UNUSED_P(end);
827   UNUSED_P(enc);
828   switch (tok) {
829   case XML_TOK_PROLOG_S:
830     return XML_ROLE_ATTLIST_NONE;
831   case XML_TOK_NAME:
832     state->handler = attlist7;
833     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
834   }
835   return common(state, tok);
836 }
837 
838 static int PTRCALL
attlist7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)839 attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
840          const ENCODING *enc) {
841   UNUSED_P(ptr);
842   UNUSED_P(end);
843   UNUSED_P(enc);
844   switch (tok) {
845   case XML_TOK_PROLOG_S:
846     return XML_ROLE_ATTLIST_NONE;
847   case XML_TOK_CLOSE_PAREN:
848     state->handler = attlist8;
849     return XML_ROLE_ATTLIST_NONE;
850   case XML_TOK_OR:
851     state->handler = attlist6;
852     return XML_ROLE_ATTLIST_NONE;
853   }
854   return common(state, tok);
855 }
856 
857 /* default value */
858 static int PTRCALL
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)859 attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
860          const ENCODING *enc) {
861   switch (tok) {
862   case XML_TOK_PROLOG_S:
863     return XML_ROLE_ATTLIST_NONE;
864   case XML_TOK_POUND_NAME:
865     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
866                             KW_IMPLIED)) {
867       state->handler = attlist1;
868       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
869     }
870     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
871                             KW_REQUIRED)) {
872       state->handler = attlist1;
873       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
874     }
875     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
876                             KW_FIXED)) {
877       state->handler = attlist9;
878       return XML_ROLE_ATTLIST_NONE;
879     }
880     break;
881   case XML_TOK_LITERAL:
882     state->handler = attlist1;
883     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
884   }
885   return common(state, tok);
886 }
887 
888 static int PTRCALL
attlist9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)889 attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
890          const ENCODING *enc) {
891   UNUSED_P(ptr);
892   UNUSED_P(end);
893   UNUSED_P(enc);
894   switch (tok) {
895   case XML_TOK_PROLOG_S:
896     return XML_ROLE_ATTLIST_NONE;
897   case XML_TOK_LITERAL:
898     state->handler = attlist1;
899     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
900   }
901   return common(state, tok);
902 }
903 
904 static int PTRCALL
element0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)905 element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
906          const ENCODING *enc) {
907   UNUSED_P(ptr);
908   UNUSED_P(end);
909   UNUSED_P(enc);
910   switch (tok) {
911   case XML_TOK_PROLOG_S:
912     return XML_ROLE_ELEMENT_NONE;
913   case XML_TOK_NAME:
914   case XML_TOK_PREFIXED_NAME:
915     state->handler = element1;
916     return XML_ROLE_ELEMENT_NAME;
917   }
918   return common(state, tok);
919 }
920 
921 static int PTRCALL
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)922 element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
923          const ENCODING *enc) {
924   switch (tok) {
925   case XML_TOK_PROLOG_S:
926     return XML_ROLE_ELEMENT_NONE;
927   case XML_TOK_NAME:
928     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
929       state->handler = declClose;
930       state->role_none = XML_ROLE_ELEMENT_NONE;
931       return XML_ROLE_CONTENT_EMPTY;
932     }
933     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
934       state->handler = declClose;
935       state->role_none = XML_ROLE_ELEMENT_NONE;
936       return XML_ROLE_CONTENT_ANY;
937     }
938     break;
939   case XML_TOK_OPEN_PAREN:
940     state->handler = element2;
941     state->level = 1;
942     return XML_ROLE_GROUP_OPEN;
943   }
944   return common(state, tok);
945 }
946 
947 static int PTRCALL
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)948 element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
949          const ENCODING *enc) {
950   switch (tok) {
951   case XML_TOK_PROLOG_S:
952     return XML_ROLE_ELEMENT_NONE;
953   case XML_TOK_POUND_NAME:
954     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
955                             KW_PCDATA)) {
956       state->handler = element3;
957       return XML_ROLE_CONTENT_PCDATA;
958     }
959     break;
960   case XML_TOK_OPEN_PAREN:
961     state->level = 2;
962     state->handler = element6;
963     return XML_ROLE_GROUP_OPEN;
964   case XML_TOK_NAME:
965   case XML_TOK_PREFIXED_NAME:
966     state->handler = element7;
967     return XML_ROLE_CONTENT_ELEMENT;
968   case XML_TOK_NAME_QUESTION:
969     state->handler = element7;
970     return XML_ROLE_CONTENT_ELEMENT_OPT;
971   case XML_TOK_NAME_ASTERISK:
972     state->handler = element7;
973     return XML_ROLE_CONTENT_ELEMENT_REP;
974   case XML_TOK_NAME_PLUS:
975     state->handler = element7;
976     return XML_ROLE_CONTENT_ELEMENT_PLUS;
977   }
978   return common(state, tok);
979 }
980 
981 static int PTRCALL
element3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)982 element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
983          const ENCODING *enc) {
984   UNUSED_P(ptr);
985   UNUSED_P(end);
986   UNUSED_P(enc);
987   switch (tok) {
988   case XML_TOK_PROLOG_S:
989     return XML_ROLE_ELEMENT_NONE;
990   case XML_TOK_CLOSE_PAREN:
991     state->handler = declClose;
992     state->role_none = XML_ROLE_ELEMENT_NONE;
993     return XML_ROLE_GROUP_CLOSE;
994   case XML_TOK_CLOSE_PAREN_ASTERISK:
995     state->handler = declClose;
996     state->role_none = XML_ROLE_ELEMENT_NONE;
997     return XML_ROLE_GROUP_CLOSE_REP;
998   case XML_TOK_OR:
999     state->handler = element4;
1000     return XML_ROLE_ELEMENT_NONE;
1001   }
1002   return common(state, tok);
1003 }
1004 
1005 static int PTRCALL
element4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1006 element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1007          const ENCODING *enc) {
1008   UNUSED_P(ptr);
1009   UNUSED_P(end);
1010   UNUSED_P(enc);
1011   switch (tok) {
1012   case XML_TOK_PROLOG_S:
1013     return XML_ROLE_ELEMENT_NONE;
1014   case XML_TOK_NAME:
1015   case XML_TOK_PREFIXED_NAME:
1016     state->handler = element5;
1017     return XML_ROLE_CONTENT_ELEMENT;
1018   }
1019   return common(state, tok);
1020 }
1021 
1022 static int PTRCALL
element5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1023 element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1024          const ENCODING *enc) {
1025   UNUSED_P(ptr);
1026   UNUSED_P(end);
1027   UNUSED_P(enc);
1028   switch (tok) {
1029   case XML_TOK_PROLOG_S:
1030     return XML_ROLE_ELEMENT_NONE;
1031   case XML_TOK_CLOSE_PAREN_ASTERISK:
1032     state->handler = declClose;
1033     state->role_none = XML_ROLE_ELEMENT_NONE;
1034     return XML_ROLE_GROUP_CLOSE_REP;
1035   case XML_TOK_OR:
1036     state->handler = element4;
1037     return XML_ROLE_ELEMENT_NONE;
1038   }
1039   return common(state, tok);
1040 }
1041 
1042 static int PTRCALL
element6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1043 element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1044          const ENCODING *enc) {
1045   UNUSED_P(ptr);
1046   UNUSED_P(end);
1047   UNUSED_P(enc);
1048   switch (tok) {
1049   case XML_TOK_PROLOG_S:
1050     return XML_ROLE_ELEMENT_NONE;
1051   case XML_TOK_OPEN_PAREN:
1052     state->level += 1;
1053     return XML_ROLE_GROUP_OPEN;
1054   case XML_TOK_NAME:
1055   case XML_TOK_PREFIXED_NAME:
1056     state->handler = element7;
1057     return XML_ROLE_CONTENT_ELEMENT;
1058   case XML_TOK_NAME_QUESTION:
1059     state->handler = element7;
1060     return XML_ROLE_CONTENT_ELEMENT_OPT;
1061   case XML_TOK_NAME_ASTERISK:
1062     state->handler = element7;
1063     return XML_ROLE_CONTENT_ELEMENT_REP;
1064   case XML_TOK_NAME_PLUS:
1065     state->handler = element7;
1066     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1067   }
1068   return common(state, tok);
1069 }
1070 
1071 static int PTRCALL
element7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1072 element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1073          const ENCODING *enc) {
1074   UNUSED_P(ptr);
1075   UNUSED_P(end);
1076   UNUSED_P(enc);
1077   switch (tok) {
1078   case XML_TOK_PROLOG_S:
1079     return XML_ROLE_ELEMENT_NONE;
1080   case XML_TOK_CLOSE_PAREN:
1081     state->level -= 1;
1082     if (state->level == 0) {
1083       state->handler = declClose;
1084       state->role_none = XML_ROLE_ELEMENT_NONE;
1085     }
1086     return XML_ROLE_GROUP_CLOSE;
1087   case XML_TOK_CLOSE_PAREN_ASTERISK:
1088     state->level -= 1;
1089     if (state->level == 0) {
1090       state->handler = declClose;
1091       state->role_none = XML_ROLE_ELEMENT_NONE;
1092     }
1093     return XML_ROLE_GROUP_CLOSE_REP;
1094   case XML_TOK_CLOSE_PAREN_QUESTION:
1095     state->level -= 1;
1096     if (state->level == 0) {
1097       state->handler = declClose;
1098       state->role_none = XML_ROLE_ELEMENT_NONE;
1099     }
1100     return XML_ROLE_GROUP_CLOSE_OPT;
1101   case XML_TOK_CLOSE_PAREN_PLUS:
1102     state->level -= 1;
1103     if (state->level == 0) {
1104       state->handler = declClose;
1105       state->role_none = XML_ROLE_ELEMENT_NONE;
1106     }
1107     return XML_ROLE_GROUP_CLOSE_PLUS;
1108   case XML_TOK_COMMA:
1109     state->handler = element6;
1110     return XML_ROLE_GROUP_SEQUENCE;
1111   case XML_TOK_OR:
1112     state->handler = element6;
1113     return XML_ROLE_GROUP_CHOICE;
1114   }
1115   return common(state, tok);
1116 }
1117 
1118 #ifdef XML_DTD
1119 
1120 static int PTRCALL
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1121 condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1122           const ENCODING *enc) {
1123   switch (tok) {
1124   case XML_TOK_PROLOG_S:
1125     return XML_ROLE_NONE;
1126   case XML_TOK_NAME:
1127     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1128       state->handler = condSect1;
1129       return XML_ROLE_NONE;
1130     }
1131     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1132       state->handler = condSect2;
1133       return XML_ROLE_NONE;
1134     }
1135     break;
1136   }
1137   return common(state, tok);
1138 }
1139 
1140 static int PTRCALL
condSect1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1141 condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1142           const ENCODING *enc) {
1143   UNUSED_P(ptr);
1144   UNUSED_P(end);
1145   UNUSED_P(enc);
1146   switch (tok) {
1147   case XML_TOK_PROLOG_S:
1148     return XML_ROLE_NONE;
1149   case XML_TOK_OPEN_BRACKET:
1150     state->handler = externalSubset1;
1151     state->includeLevel += 1;
1152     return XML_ROLE_NONE;
1153   }
1154   return common(state, tok);
1155 }
1156 
1157 static int PTRCALL
condSect2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1158 condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1159           const ENCODING *enc) {
1160   UNUSED_P(ptr);
1161   UNUSED_P(end);
1162   UNUSED_P(enc);
1163   switch (tok) {
1164   case XML_TOK_PROLOG_S:
1165     return XML_ROLE_NONE;
1166   case XML_TOK_OPEN_BRACKET:
1167     state->handler = externalSubset1;
1168     return XML_ROLE_IGNORE_SECT;
1169   }
1170   return common(state, tok);
1171 }
1172 
1173 #endif /* XML_DTD */
1174 
1175 static int PTRCALL
declClose(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1176 declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1177           const ENCODING *enc) {
1178   UNUSED_P(ptr);
1179   UNUSED_P(end);
1180   UNUSED_P(enc);
1181   switch (tok) {
1182   case XML_TOK_PROLOG_S:
1183     return state->role_none;
1184   case XML_TOK_DECL_CLOSE:
1185     setTopLevel(state);
1186     return state->role_none;
1187   }
1188   return common(state, tok);
1189 }
1190 
1191 /* This function will only be invoked if the internal logic of the
1192  * parser has broken down.  It is used in two cases:
1193  *
1194  * 1: When the XML prolog has been finished.  At this point the
1195  * processor (the parser level above these role handlers) should
1196  * switch from prologProcessor to contentProcessor and reinitialise
1197  * the handler function.
1198  *
1199  * 2: When an error has been detected (via common() below).  At this
1200  * point again the processor should be switched to errorProcessor,
1201  * which will never call a handler.
1202  *
1203  * The result of this is that error() can only be called if the
1204  * processor switch failed to happen, which is an internal error and
1205  * therefore we shouldn't be able to provoke it simply by using the
1206  * library.  It is a necessary backstop, however, so we merely exclude
1207  * it from the coverage statistics.
1208  *
1209  * LCOV_EXCL_START
1210  */
1211 static int PTRCALL
error(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1212 error(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1213       const ENCODING *enc) {
1214   UNUSED_P(state);
1215   UNUSED_P(tok);
1216   UNUSED_P(ptr);
1217   UNUSED_P(end);
1218   UNUSED_P(enc);
1219   return XML_ROLE_NONE;
1220 }
1221 /* LCOV_EXCL_STOP */
1222 
1223 static int FASTCALL
common(PROLOG_STATE * state,int tok)1224 common(PROLOG_STATE *state, int tok) {
1225 #ifdef XML_DTD
1226   if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1227     return XML_ROLE_INNER_PARAM_ENTITY_REF;
1228 #else
1229   UNUSED_P(tok);
1230 #endif
1231   state->handler = error;
1232   return XML_ROLE_ERROR;
1233 }
1234 
1235 void
XmlPrologStateInit(PROLOG_STATE * state)1236 XmlPrologStateInit(PROLOG_STATE *state) {
1237   state->handler = prolog0;
1238 #ifdef XML_DTD
1239   state->documentEntity = 1;
1240   state->includeLevel = 0;
1241   state->inEntityValue = 0;
1242 #endif /* XML_DTD */
1243 }
1244 
1245 #ifdef XML_DTD
1246 
1247 void
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1248 XmlPrologStateInitExternalEntity(PROLOG_STATE *state) {
1249   state->handler = externalSubset0;
1250   state->documentEntity = 0;
1251   state->includeLevel = 0;
1252 }
1253 
1254 #endif /* XML_DTD */
1255