1 /*
2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file COPYING for copying permission.
4 */
5
6
7 #ifdef COMPILED_FROM_DSP
8 # include "winconfig.h"
9 #else
10 # include <config.h>
11 #endif /* ndef COMPILED_FROM_DSP */
12
13 #include "xmlrole.h"
14 #include "ascii.h"
15
16 /* Doesn't check:
17
18 that ,| are not mixed in a model group
19 content of literals
20
21 */
22
23 static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
24 static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
25 static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
26 static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
27 static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
28 static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
29 static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
30 static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
31 static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
32 static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
33 static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
34 static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
35 static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
36 static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
37 static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
38 static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
39 static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
40 static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
41 static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
42 static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
43 static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
44 static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
45 static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
46
47 #ifndef MIN_BYTES_PER_CHAR
48 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
49 #endif
50
51 #ifdef XML_DTD
52 #define setTopLevel(state) \
53 ((state)->handler = ((state)->documentEntity \
54 ? internalSubset \
55 : externalSubset1))
56 #else /* not XML_DTD */
57 #define setTopLevel(state) ((state)->handler = internalSubset)
58 #endif /* not XML_DTD */
59
60 typedef int PROLOG_HANDLER(PROLOG_STATE *state,
61 int tok,
62 const char *ptr,
63 const char *end,
64 const ENCODING *enc);
65
66 static PROLOG_HANDLER
67 prolog0, prolog1, prolog2,
68 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
69 internalSubset,
70 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
71 entity7, entity8, entity9,
72 notation0, notation1, notation2, notation3, notation4,
73 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
74 attlist7, attlist8, attlist9,
75 element0, element1, element2, element3, element4, element5, element6,
76 element7,
77 #ifdef XML_DTD
78 externalSubset0, externalSubset1,
79 condSect0, condSect1, condSect2,
80 #endif /* XML_DTD */
81 declClose,
82 error;
83
84 static
85 int common(PROLOG_STATE *state, int tok);
86
87 static
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)88 int prolog0(PROLOG_STATE *state,
89 int tok,
90 const char *ptr,
91 const char *end,
92 const ENCODING *enc)
93 {
94 switch (tok) {
95 case XML_TOK_PROLOG_S:
96 state->handler = prolog1;
97 return XML_ROLE_NONE;
98 case XML_TOK_XML_DECL:
99 state->handler = prolog1;
100 return XML_ROLE_XML_DECL;
101 case XML_TOK_PI:
102 state->handler = prolog1;
103 return XML_ROLE_NONE;
104 case XML_TOK_COMMENT:
105 state->handler = prolog1;
106 case XML_TOK_BOM:
107 return XML_ROLE_NONE;
108 case XML_TOK_DECL_OPEN:
109 if (!XmlNameMatchesAscii(enc,
110 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
111 end,
112 KW_DOCTYPE))
113 break;
114 state->handler = doctype0;
115 return XML_ROLE_NONE;
116 case XML_TOK_INSTANCE_START:
117 state->handler = error;
118 return XML_ROLE_INSTANCE_START;
119 }
120 return common(state, tok);
121 }
122
123 static
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)124 int prolog1(PROLOG_STATE *state,
125 int tok,
126 const char *ptr,
127 const char *end,
128 const ENCODING *enc)
129 {
130 switch (tok) {
131 case XML_TOK_PROLOG_S:
132 return XML_ROLE_NONE;
133 case XML_TOK_PI:
134 case XML_TOK_COMMENT:
135 case XML_TOK_BOM:
136 return XML_ROLE_NONE;
137 case XML_TOK_DECL_OPEN:
138 if (!XmlNameMatchesAscii(enc,
139 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
140 end,
141 KW_DOCTYPE))
142 break;
143 state->handler = doctype0;
144 return XML_ROLE_NONE;
145 case XML_TOK_INSTANCE_START:
146 state->handler = error;
147 return XML_ROLE_INSTANCE_START;
148 }
149 return common(state, tok);
150 }
151
152 static
prolog2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)153 int prolog2(PROLOG_STATE *state,
154 int tok,
155 const char *ptr,
156 const char *end,
157 const ENCODING *enc)
158 {
159 switch (tok) {
160 case XML_TOK_PROLOG_S:
161 return XML_ROLE_NONE;
162 case XML_TOK_PI:
163 case XML_TOK_COMMENT:
164 return XML_ROLE_NONE;
165 case XML_TOK_INSTANCE_START:
166 state->handler = error;
167 return XML_ROLE_INSTANCE_START;
168 }
169 return common(state, tok);
170 }
171
172 static
doctype0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)173 int doctype0(PROLOG_STATE *state,
174 int tok,
175 const char *ptr,
176 const char *end,
177 const ENCODING *enc)
178 {
179 switch (tok) {
180 case XML_TOK_PROLOG_S:
181 return XML_ROLE_NONE;
182 case XML_TOK_NAME:
183 case XML_TOK_PREFIXED_NAME:
184 state->handler = doctype1;
185 return XML_ROLE_DOCTYPE_NAME;
186 }
187 return common(state, tok);
188 }
189
190 static
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)191 int doctype1(PROLOG_STATE *state,
192 int tok,
193 const char *ptr,
194 const char *end,
195 const ENCODING *enc)
196 {
197 switch (tok) {
198 case XML_TOK_PROLOG_S:
199 return XML_ROLE_NONE;
200 case XML_TOK_OPEN_BRACKET:
201 state->handler = internalSubset;
202 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
203 case XML_TOK_DECL_CLOSE:
204 state->handler = prolog2;
205 return XML_ROLE_DOCTYPE_CLOSE;
206 case XML_TOK_NAME:
207 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
208 state->handler = doctype3;
209 return XML_ROLE_NONE;
210 }
211 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
212 state->handler = doctype2;
213 return XML_ROLE_NONE;
214 }
215 break;
216 }
217 return common(state, tok);
218 }
219
220 static
doctype2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)221 int doctype2(PROLOG_STATE *state,
222 int tok,
223 const char *ptr,
224 const char *end,
225 const ENCODING *enc)
226 {
227 switch (tok) {
228 case XML_TOK_PROLOG_S:
229 return XML_ROLE_NONE;
230 case XML_TOK_LITERAL:
231 state->handler = doctype3;
232 return XML_ROLE_DOCTYPE_PUBLIC_ID;
233 }
234 return common(state, tok);
235 }
236
237 static
doctype3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)238 int doctype3(PROLOG_STATE *state,
239 int tok,
240 const char *ptr,
241 const char *end,
242 const ENCODING *enc)
243 {
244 switch (tok) {
245 case XML_TOK_PROLOG_S:
246 return XML_ROLE_NONE;
247 case XML_TOK_LITERAL:
248 state->handler = doctype4;
249 return XML_ROLE_DOCTYPE_SYSTEM_ID;
250 }
251 return common(state, tok);
252 }
253
254 static
doctype4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)255 int doctype4(PROLOG_STATE *state,
256 int tok,
257 const char *ptr,
258 const char *end,
259 const ENCODING *enc)
260 {
261 switch (tok) {
262 case XML_TOK_PROLOG_S:
263 return XML_ROLE_NONE;
264 case XML_TOK_OPEN_BRACKET:
265 state->handler = internalSubset;
266 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
267 case XML_TOK_DECL_CLOSE:
268 state->handler = prolog2;
269 return XML_ROLE_DOCTYPE_CLOSE;
270 }
271 return common(state, tok);
272 }
273
274 static
doctype5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)275 int doctype5(PROLOG_STATE *state,
276 int tok,
277 const char *ptr,
278 const char *end,
279 const ENCODING *enc)
280 {
281 switch (tok) {
282 case XML_TOK_PROLOG_S:
283 return XML_ROLE_NONE;
284 case XML_TOK_DECL_CLOSE:
285 state->handler = prolog2;
286 return XML_ROLE_DOCTYPE_CLOSE;
287 }
288 return common(state, tok);
289 }
290
291 static
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)292 int internalSubset(PROLOG_STATE *state,
293 int tok,
294 const char *ptr,
295 const char *end,
296 const ENCODING *enc)
297 {
298 switch (tok) {
299 case XML_TOK_PROLOG_S:
300 return XML_ROLE_NONE;
301 case XML_TOK_DECL_OPEN:
302 if (XmlNameMatchesAscii(enc,
303 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
304 end,
305 KW_ENTITY)) {
306 state->handler = entity0;
307 return XML_ROLE_NONE;
308 }
309 if (XmlNameMatchesAscii(enc,
310 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
311 end,
312 KW_ATTLIST)) {
313 state->handler = attlist0;
314 return XML_ROLE_NONE;
315 }
316 if (XmlNameMatchesAscii(enc,
317 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
318 end,
319 KW_ELEMENT)) {
320 state->handler = element0;
321 return XML_ROLE_NONE;
322 }
323 if (XmlNameMatchesAscii(enc,
324 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
325 end,
326 KW_NOTATION)) {
327 state->handler = notation0;
328 return XML_ROLE_NONE;
329 }
330 break;
331 case XML_TOK_PI:
332 case XML_TOK_COMMENT:
333 return XML_ROLE_NONE;
334 case XML_TOK_PARAM_ENTITY_REF:
335 return XML_ROLE_PARAM_ENTITY_REF;
336 case XML_TOK_CLOSE_BRACKET:
337 state->handler = doctype5;
338 return XML_ROLE_NONE;
339 }
340 return common(state, tok);
341 }
342
343 #ifdef XML_DTD
344
345 static
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)346 int externalSubset0(PROLOG_STATE *state,
347 int tok,
348 const char *ptr,
349 const char *end,
350 const ENCODING *enc)
351 {
352 state->handler = externalSubset1;
353 if (tok == XML_TOK_XML_DECL)
354 return XML_ROLE_TEXT_DECL;
355 return externalSubset1(state, tok, ptr, end, enc);
356 }
357
358 static
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)359 int externalSubset1(PROLOG_STATE *state,
360 int tok,
361 const char *ptr,
362 const char *end,
363 const ENCODING *enc)
364 {
365 switch (tok) {
366 case XML_TOK_COND_SECT_OPEN:
367 state->handler = condSect0;
368 return XML_ROLE_NONE;
369 case XML_TOK_COND_SECT_CLOSE:
370 if (state->includeLevel == 0)
371 break;
372 state->includeLevel -= 1;
373 return XML_ROLE_NONE;
374 case XML_TOK_PROLOG_S:
375 return XML_ROLE_NONE;
376 case XML_TOK_CLOSE_BRACKET:
377 break;
378 case XML_TOK_NONE:
379 if (state->includeLevel)
380 break;
381 return XML_ROLE_NONE;
382 default:
383 return internalSubset(state, tok, ptr, end, enc);
384 }
385 return common(state, tok);
386 }
387
388 #endif /* XML_DTD */
389
390 static
entity0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)391 int entity0(PROLOG_STATE *state,
392 int tok,
393 const char *ptr,
394 const char *end,
395 const ENCODING *enc)
396 {
397 switch (tok) {
398 case XML_TOK_PROLOG_S:
399 return XML_ROLE_NONE;
400 case XML_TOK_PERCENT:
401 state->handler = entity1;
402 return XML_ROLE_NONE;
403 case XML_TOK_NAME:
404 state->handler = entity2;
405 return XML_ROLE_GENERAL_ENTITY_NAME;
406 }
407 return common(state, tok);
408 }
409
410 static
entity1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)411 int entity1(PROLOG_STATE *state,
412 int tok,
413 const char *ptr,
414 const char *end,
415 const ENCODING *enc)
416 {
417 switch (tok) {
418 case XML_TOK_PROLOG_S:
419 return XML_ROLE_NONE;
420 case XML_TOK_NAME:
421 state->handler = entity7;
422 return XML_ROLE_PARAM_ENTITY_NAME;
423 }
424 return common(state, tok);
425 }
426
427 static
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)428 int entity2(PROLOG_STATE *state,
429 int tok,
430 const char *ptr,
431 const char *end,
432 const ENCODING *enc)
433 {
434 switch (tok) {
435 case XML_TOK_PROLOG_S:
436 return XML_ROLE_NONE;
437 case XML_TOK_NAME:
438 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
439 state->handler = entity4;
440 return XML_ROLE_NONE;
441 }
442 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
443 state->handler = entity3;
444 return XML_ROLE_NONE;
445 }
446 break;
447 case XML_TOK_LITERAL:
448 state->handler = declClose;
449 return XML_ROLE_ENTITY_VALUE;
450 }
451 return common(state, tok);
452 }
453
454 static
entity3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)455 int entity3(PROLOG_STATE *state,
456 int tok,
457 const char *ptr,
458 const char *end,
459 const ENCODING *enc)
460 {
461 switch (tok) {
462 case XML_TOK_PROLOG_S:
463 return XML_ROLE_NONE;
464 case XML_TOK_LITERAL:
465 state->handler = entity4;
466 return XML_ROLE_ENTITY_PUBLIC_ID;
467 }
468 return common(state, tok);
469 }
470
471
472 static
entity4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)473 int entity4(PROLOG_STATE *state,
474 int tok,
475 const char *ptr,
476 const char *end,
477 const ENCODING *enc)
478 {
479 switch (tok) {
480 case XML_TOK_PROLOG_S:
481 return XML_ROLE_NONE;
482 case XML_TOK_LITERAL:
483 state->handler = entity5;
484 return XML_ROLE_ENTITY_SYSTEM_ID;
485 }
486 return common(state, tok);
487 }
488
489 static
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)490 int entity5(PROLOG_STATE *state,
491 int tok,
492 const char *ptr,
493 const char *end,
494 const ENCODING *enc)
495 {
496 switch (tok) {
497 case XML_TOK_PROLOG_S:
498 return XML_ROLE_NONE;
499 case XML_TOK_DECL_CLOSE:
500 setTopLevel(state);
501 return XML_ROLE_ENTITY_COMPLETE;
502 case XML_TOK_NAME:
503 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
504 state->handler = entity6;
505 return XML_ROLE_NONE;
506 }
507 break;
508 }
509 return common(state, tok);
510 }
511
512 static
entity6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)513 int entity6(PROLOG_STATE *state,
514 int tok,
515 const char *ptr,
516 const char *end,
517 const ENCODING *enc)
518 {
519 switch (tok) {
520 case XML_TOK_PROLOG_S:
521 return XML_ROLE_NONE;
522 case XML_TOK_NAME:
523 state->handler = declClose;
524 return XML_ROLE_ENTITY_NOTATION_NAME;
525 }
526 return common(state, tok);
527 }
528
529 static
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)530 int entity7(PROLOG_STATE *state,
531 int tok,
532 const char *ptr,
533 const char *end,
534 const ENCODING *enc)
535 {
536 switch (tok) {
537 case XML_TOK_PROLOG_S:
538 return XML_ROLE_NONE;
539 case XML_TOK_NAME:
540 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
541 state->handler = entity9;
542 return XML_ROLE_NONE;
543 }
544 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
545 state->handler = entity8;
546 return XML_ROLE_NONE;
547 }
548 break;
549 case XML_TOK_LITERAL:
550 state->handler = declClose;
551 return XML_ROLE_ENTITY_VALUE;
552 }
553 return common(state, tok);
554 }
555
556 static
entity8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)557 int entity8(PROLOG_STATE *state,
558 int tok,
559 const char *ptr,
560 const char *end,
561 const ENCODING *enc)
562 {
563 switch (tok) {
564 case XML_TOK_PROLOG_S:
565 return XML_ROLE_NONE;
566 case XML_TOK_LITERAL:
567 state->handler = entity9;
568 return XML_ROLE_ENTITY_PUBLIC_ID;
569 }
570 return common(state, tok);
571 }
572
573 static
entity9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)574 int entity9(PROLOG_STATE *state,
575 int tok,
576 const char *ptr,
577 const char *end,
578 const ENCODING *enc)
579 {
580 switch (tok) {
581 case XML_TOK_PROLOG_S:
582 return XML_ROLE_NONE;
583 case XML_TOK_LITERAL:
584 state->handler = declClose;
585 return XML_ROLE_ENTITY_SYSTEM_ID;
586 }
587 return common(state, tok);
588 }
589
590 static
notation0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)591 int notation0(PROLOG_STATE *state,
592 int tok,
593 const char *ptr,
594 const char *end,
595 const ENCODING *enc)
596 {
597 switch (tok) {
598 case XML_TOK_PROLOG_S:
599 return XML_ROLE_NONE;
600 case XML_TOK_NAME:
601 state->handler = notation1;
602 return XML_ROLE_NOTATION_NAME;
603 }
604 return common(state, tok);
605 }
606
607 static
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)608 int notation1(PROLOG_STATE *state,
609 int tok,
610 const char *ptr,
611 const char *end,
612 const ENCODING *enc)
613 {
614 switch (tok) {
615 case XML_TOK_PROLOG_S:
616 return XML_ROLE_NONE;
617 case XML_TOK_NAME:
618 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
619 state->handler = notation3;
620 return XML_ROLE_NONE;
621 }
622 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
623 state->handler = notation2;
624 return XML_ROLE_NONE;
625 }
626 break;
627 }
628 return common(state, tok);
629 }
630
631 static
notation2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)632 int notation2(PROLOG_STATE *state,
633 int tok,
634 const char *ptr,
635 const char *end,
636 const ENCODING *enc)
637 {
638 switch (tok) {
639 case XML_TOK_PROLOG_S:
640 return XML_ROLE_NONE;
641 case XML_TOK_LITERAL:
642 state->handler = notation4;
643 return XML_ROLE_NOTATION_PUBLIC_ID;
644 }
645 return common(state, tok);
646 }
647
648 static
notation3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)649 int notation3(PROLOG_STATE *state,
650 int tok,
651 const char *ptr,
652 const char *end,
653 const ENCODING *enc)
654 {
655 switch (tok) {
656 case XML_TOK_PROLOG_S:
657 return XML_ROLE_NONE;
658 case XML_TOK_LITERAL:
659 state->handler = declClose;
660 return XML_ROLE_NOTATION_SYSTEM_ID;
661 }
662 return common(state, tok);
663 }
664
665 static
notation4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)666 int notation4(PROLOG_STATE *state,
667 int tok,
668 const char *ptr,
669 const char *end,
670 const ENCODING *enc)
671 {
672 switch (tok) {
673 case XML_TOK_PROLOG_S:
674 return XML_ROLE_NONE;
675 case XML_TOK_LITERAL:
676 state->handler = declClose;
677 return XML_ROLE_NOTATION_SYSTEM_ID;
678 case XML_TOK_DECL_CLOSE:
679 setTopLevel(state);
680 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
681 }
682 return common(state, tok);
683 }
684
685 static
attlist0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)686 int attlist0(PROLOG_STATE *state,
687 int tok,
688 const char *ptr,
689 const char *end,
690 const ENCODING *enc)
691 {
692 switch (tok) {
693 case XML_TOK_PROLOG_S:
694 return XML_ROLE_NONE;
695 case XML_TOK_NAME:
696 case XML_TOK_PREFIXED_NAME:
697 state->handler = attlist1;
698 return XML_ROLE_ATTLIST_ELEMENT_NAME;
699 }
700 return common(state, tok);
701 }
702
703 static
attlist1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)704 int attlist1(PROLOG_STATE *state,
705 int tok,
706 const char *ptr,
707 const char *end,
708 const ENCODING *enc)
709 {
710 switch (tok) {
711 case XML_TOK_PROLOG_S:
712 return XML_ROLE_NONE;
713 case XML_TOK_DECL_CLOSE:
714 setTopLevel(state);
715 return XML_ROLE_NONE;
716 case XML_TOK_NAME:
717 case XML_TOK_PREFIXED_NAME:
718 state->handler = attlist2;
719 return XML_ROLE_ATTRIBUTE_NAME;
720 }
721 return common(state, tok);
722 }
723
724 static
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)725 int attlist2(PROLOG_STATE *state,
726 int tok,
727 const char *ptr,
728 const char *end,
729 const ENCODING *enc)
730 {
731 switch (tok) {
732 case XML_TOK_PROLOG_S:
733 return XML_ROLE_NONE;
734 case XML_TOK_NAME:
735 {
736 static const char *types[] = {
737 KW_CDATA,
738 KW_ID,
739 KW_IDREF,
740 KW_IDREFS,
741 KW_ENTITY,
742 KW_ENTITIES,
743 KW_NMTOKEN,
744 KW_NMTOKENS,
745 };
746 int i;
747 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
748 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
749 state->handler = attlist8;
750 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
751 }
752 }
753 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
754 state->handler = attlist5;
755 return XML_ROLE_NONE;
756 }
757 break;
758 case XML_TOK_OPEN_PAREN:
759 state->handler = attlist3;
760 return XML_ROLE_NONE;
761 }
762 return common(state, tok);
763 }
764
765 static
attlist3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)766 int attlist3(PROLOG_STATE *state,
767 int tok,
768 const char *ptr,
769 const char *end,
770 const ENCODING *enc)
771 {
772 switch (tok) {
773 case XML_TOK_PROLOG_S:
774 return XML_ROLE_NONE;
775 case XML_TOK_NMTOKEN:
776 case XML_TOK_NAME:
777 case XML_TOK_PREFIXED_NAME:
778 state->handler = attlist4;
779 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
780 }
781 return common(state, tok);
782 }
783
784 static
attlist4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)785 int attlist4(PROLOG_STATE *state,
786 int tok,
787 const char *ptr,
788 const char *end,
789 const ENCODING *enc)
790 {
791 switch (tok) {
792 case XML_TOK_PROLOG_S:
793 return XML_ROLE_NONE;
794 case XML_TOK_CLOSE_PAREN:
795 state->handler = attlist8;
796 return XML_ROLE_NONE;
797 case XML_TOK_OR:
798 state->handler = attlist3;
799 return XML_ROLE_NONE;
800 }
801 return common(state, tok);
802 }
803
804 static
attlist5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)805 int attlist5(PROLOG_STATE *state,
806 int tok,
807 const char *ptr,
808 const char *end,
809 const ENCODING *enc)
810 {
811 switch (tok) {
812 case XML_TOK_PROLOG_S:
813 return XML_ROLE_NONE;
814 case XML_TOK_OPEN_PAREN:
815 state->handler = attlist6;
816 return XML_ROLE_NONE;
817 }
818 return common(state, tok);
819 }
820
821
822 static
attlist6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)823 int attlist6(PROLOG_STATE *state,
824 int tok,
825 const char *ptr,
826 const char *end,
827 const ENCODING *enc)
828 {
829 switch (tok) {
830 case XML_TOK_PROLOG_S:
831 return XML_ROLE_NONE;
832 case XML_TOK_NAME:
833 state->handler = attlist7;
834 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
835 }
836 return common(state, tok);
837 }
838
839 static
attlist7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)840 int attlist7(PROLOG_STATE *state,
841 int tok,
842 const char *ptr,
843 const char *end,
844 const ENCODING *enc)
845 {
846 switch (tok) {
847 case XML_TOK_PROLOG_S:
848 return XML_ROLE_NONE;
849 case XML_TOK_CLOSE_PAREN:
850 state->handler = attlist8;
851 return XML_ROLE_NONE;
852 case XML_TOK_OR:
853 state->handler = attlist6;
854 return XML_ROLE_NONE;
855 }
856 return common(state, tok);
857 }
858
859 /* default value */
860 static
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)861 int attlist8(PROLOG_STATE *state,
862 int tok,
863 const char *ptr,
864 const char *end,
865 const ENCODING *enc)
866 {
867 switch (tok) {
868 case XML_TOK_PROLOG_S:
869 return XML_ROLE_NONE;
870 case XML_TOK_POUND_NAME:
871 if (XmlNameMatchesAscii(enc,
872 ptr + MIN_BYTES_PER_CHAR(enc),
873 end,
874 KW_IMPLIED)) {
875 state->handler = attlist1;
876 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
877 }
878 if (XmlNameMatchesAscii(enc,
879 ptr + MIN_BYTES_PER_CHAR(enc),
880 end,
881 KW_REQUIRED)) {
882 state->handler = attlist1;
883 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
884 }
885 if (XmlNameMatchesAscii(enc,
886 ptr + MIN_BYTES_PER_CHAR(enc),
887 end,
888 KW_FIXED)) {
889 state->handler = attlist9;
890 return XML_ROLE_NONE;
891 }
892 break;
893 case XML_TOK_LITERAL:
894 state->handler = attlist1;
895 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
896 }
897 return common(state, tok);
898 }
899
900 static
attlist9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)901 int attlist9(PROLOG_STATE *state,
902 int tok,
903 const char *ptr,
904 const char *end,
905 const ENCODING *enc)
906 {
907 switch (tok) {
908 case XML_TOK_PROLOG_S:
909 return XML_ROLE_NONE;
910 case XML_TOK_LITERAL:
911 state->handler = attlist1;
912 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
913 }
914 return common(state, tok);
915 }
916
917 static
element0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)918 int element0(PROLOG_STATE *state,
919 int tok,
920 const char *ptr,
921 const char *end,
922 const ENCODING *enc)
923 {
924 switch (tok) {
925 case XML_TOK_PROLOG_S:
926 return XML_ROLE_NONE;
927 case XML_TOK_NAME:
928 case XML_TOK_PREFIXED_NAME:
929 state->handler = element1;
930 return XML_ROLE_ELEMENT_NAME;
931 }
932 return common(state, tok);
933 }
934
935 static
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)936 int element1(PROLOG_STATE *state,
937 int tok,
938 const char *ptr,
939 const char *end,
940 const ENCODING *enc)
941 {
942 switch (tok) {
943 case XML_TOK_PROLOG_S:
944 return XML_ROLE_NONE;
945 case XML_TOK_NAME:
946 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
947 state->handler = declClose;
948 return XML_ROLE_CONTENT_EMPTY;
949 }
950 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
951 state->handler = declClose;
952 return XML_ROLE_CONTENT_ANY;
953 }
954 break;
955 case XML_TOK_OPEN_PAREN:
956 state->handler = element2;
957 state->level = 1;
958 return XML_ROLE_GROUP_OPEN;
959 }
960 return common(state, tok);
961 }
962
963 static
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)964 int element2(PROLOG_STATE *state,
965 int tok,
966 const char *ptr,
967 const char *end,
968 const ENCODING *enc)
969 {
970 switch (tok) {
971 case XML_TOK_PROLOG_S:
972 return XML_ROLE_NONE;
973 case XML_TOK_POUND_NAME:
974 if (XmlNameMatchesAscii(enc,
975 ptr + MIN_BYTES_PER_CHAR(enc),
976 end,
977 KW_PCDATA)) {
978 state->handler = element3;
979 return XML_ROLE_CONTENT_PCDATA;
980 }
981 break;
982 case XML_TOK_OPEN_PAREN:
983 state->level = 2;
984 state->handler = element6;
985 return XML_ROLE_GROUP_OPEN;
986 case XML_TOK_NAME:
987 case XML_TOK_PREFIXED_NAME:
988 state->handler = element7;
989 return XML_ROLE_CONTENT_ELEMENT;
990 case XML_TOK_NAME_QUESTION:
991 state->handler = element7;
992 return XML_ROLE_CONTENT_ELEMENT_OPT;
993 case XML_TOK_NAME_ASTERISK:
994 state->handler = element7;
995 return XML_ROLE_CONTENT_ELEMENT_REP;
996 case XML_TOK_NAME_PLUS:
997 state->handler = element7;
998 return XML_ROLE_CONTENT_ELEMENT_PLUS;
999 }
1000 return common(state, tok);
1001 }
1002
1003 static
element3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1004 int element3(PROLOG_STATE *state,
1005 int tok,
1006 const char *ptr,
1007 const char *end,
1008 const ENCODING *enc)
1009 {
1010 switch (tok) {
1011 case XML_TOK_PROLOG_S:
1012 return XML_ROLE_NONE;
1013 case XML_TOK_CLOSE_PAREN:
1014 state->handler = declClose;
1015 return XML_ROLE_GROUP_CLOSE;
1016 case XML_TOK_CLOSE_PAREN_ASTERISK:
1017 state->handler = declClose;
1018 return XML_ROLE_GROUP_CLOSE_REP;
1019 case XML_TOK_OR:
1020 state->handler = element4;
1021 return XML_ROLE_NONE;
1022 }
1023 return common(state, tok);
1024 }
1025
1026 static
element4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1027 int element4(PROLOG_STATE *state,
1028 int tok,
1029 const char *ptr,
1030 const char *end,
1031 const ENCODING *enc)
1032 {
1033 switch (tok) {
1034 case XML_TOK_PROLOG_S:
1035 return XML_ROLE_NONE;
1036 case XML_TOK_NAME:
1037 case XML_TOK_PREFIXED_NAME:
1038 state->handler = element5;
1039 return XML_ROLE_CONTENT_ELEMENT;
1040 }
1041 return common(state, tok);
1042 }
1043
1044 static
element5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1045 int element5(PROLOG_STATE *state,
1046 int tok,
1047 const char *ptr,
1048 const char *end,
1049 const ENCODING *enc)
1050 {
1051 switch (tok) {
1052 case XML_TOK_PROLOG_S:
1053 return XML_ROLE_NONE;
1054 case XML_TOK_CLOSE_PAREN_ASTERISK:
1055 state->handler = declClose;
1056 return XML_ROLE_GROUP_CLOSE_REP;
1057 case XML_TOK_OR:
1058 state->handler = element4;
1059 return XML_ROLE_NONE;
1060 }
1061 return common(state, tok);
1062 }
1063
1064 static
element6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1065 int element6(PROLOG_STATE *state,
1066 int tok,
1067 const char *ptr,
1068 const char *end,
1069 const ENCODING *enc)
1070 {
1071 switch (tok) {
1072 case XML_TOK_PROLOG_S:
1073 return XML_ROLE_NONE;
1074 case XML_TOK_OPEN_PAREN:
1075 state->level += 1;
1076 return XML_ROLE_GROUP_OPEN;
1077 case XML_TOK_NAME:
1078 case XML_TOK_PREFIXED_NAME:
1079 state->handler = element7;
1080 return XML_ROLE_CONTENT_ELEMENT;
1081 case XML_TOK_NAME_QUESTION:
1082 state->handler = element7;
1083 return XML_ROLE_CONTENT_ELEMENT_OPT;
1084 case XML_TOK_NAME_ASTERISK:
1085 state->handler = element7;
1086 return XML_ROLE_CONTENT_ELEMENT_REP;
1087 case XML_TOK_NAME_PLUS:
1088 state->handler = element7;
1089 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1090 }
1091 return common(state, tok);
1092 }
1093
1094 static
element7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1095 int element7(PROLOG_STATE *state,
1096 int tok,
1097 const char *ptr,
1098 const char *end,
1099 const ENCODING *enc)
1100 {
1101 switch (tok) {
1102 case XML_TOK_PROLOG_S:
1103 return XML_ROLE_NONE;
1104 case XML_TOK_CLOSE_PAREN:
1105 state->level -= 1;
1106 if (state->level == 0)
1107 state->handler = declClose;
1108 return XML_ROLE_GROUP_CLOSE;
1109 case XML_TOK_CLOSE_PAREN_ASTERISK:
1110 state->level -= 1;
1111 if (state->level == 0)
1112 state->handler = declClose;
1113 return XML_ROLE_GROUP_CLOSE_REP;
1114 case XML_TOK_CLOSE_PAREN_QUESTION:
1115 state->level -= 1;
1116 if (state->level == 0)
1117 state->handler = declClose;
1118 return XML_ROLE_GROUP_CLOSE_OPT;
1119 case XML_TOK_CLOSE_PAREN_PLUS:
1120 state->level -= 1;
1121 if (state->level == 0)
1122 state->handler = declClose;
1123 return XML_ROLE_GROUP_CLOSE_PLUS;
1124 case XML_TOK_COMMA:
1125 state->handler = element6;
1126 return XML_ROLE_GROUP_SEQUENCE;
1127 case XML_TOK_OR:
1128 state->handler = element6;
1129 return XML_ROLE_GROUP_CHOICE;
1130 }
1131 return common(state, tok);
1132 }
1133
1134 #ifdef XML_DTD
1135
1136 static
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1137 int condSect0(PROLOG_STATE *state,
1138 int tok,
1139 const char *ptr,
1140 const char *end,
1141 const ENCODING *enc)
1142 {
1143 switch (tok) {
1144 case XML_TOK_PROLOG_S:
1145 return XML_ROLE_NONE;
1146 case XML_TOK_NAME:
1147 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1148 state->handler = condSect1;
1149 return XML_ROLE_NONE;
1150 }
1151 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1152 state->handler = condSect2;
1153 return XML_ROLE_NONE;
1154 }
1155 break;
1156 }
1157 return common(state, tok);
1158 }
1159
1160 static
condSect1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1161 int condSect1(PROLOG_STATE *state,
1162 int tok,
1163 const char *ptr,
1164 const char *end,
1165 const ENCODING *enc)
1166 {
1167 switch (tok) {
1168 case XML_TOK_PROLOG_S:
1169 return XML_ROLE_NONE;
1170 case XML_TOK_OPEN_BRACKET:
1171 state->handler = externalSubset1;
1172 state->includeLevel += 1;
1173 return XML_ROLE_NONE;
1174 }
1175 return common(state, tok);
1176 }
1177
1178 static
condSect2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1179 int condSect2(PROLOG_STATE *state,
1180 int tok,
1181 const char *ptr,
1182 const char *end,
1183 const ENCODING *enc)
1184 {
1185 switch (tok) {
1186 case XML_TOK_PROLOG_S:
1187 return XML_ROLE_NONE;
1188 case XML_TOK_OPEN_BRACKET:
1189 state->handler = externalSubset1;
1190 return XML_ROLE_IGNORE_SECT;
1191 }
1192 return common(state, tok);
1193 }
1194
1195 #endif /* XML_DTD */
1196
1197 static
declClose(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1198 int declClose(PROLOG_STATE *state,
1199 int tok,
1200 const char *ptr,
1201 const char *end,
1202 const ENCODING *enc)
1203 {
1204 switch (tok) {
1205 case XML_TOK_PROLOG_S:
1206 return XML_ROLE_NONE;
1207 case XML_TOK_DECL_CLOSE:
1208 setTopLevel(state);
1209 return XML_ROLE_NONE;
1210 }
1211 return common(state, tok);
1212 }
1213
1214 #if 0
1215
1216 static
1217 int ignore(PROLOG_STATE *state,
1218 int tok,
1219 const char *ptr,
1220 const char *end,
1221 const ENCODING *enc)
1222 {
1223 switch (tok) {
1224 case XML_TOK_DECL_CLOSE:
1225 state->handler = internalSubset;
1226 return 0;
1227 default:
1228 return XML_ROLE_NONE;
1229 }
1230 return common(state, tok);
1231 }
1232 #endif
1233
1234 static
error(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1235 int error(PROLOG_STATE *state,
1236 int tok,
1237 const char *ptr,
1238 const char *end,
1239 const ENCODING *enc)
1240 {
1241 return XML_ROLE_NONE;
1242 }
1243
1244 static
common(PROLOG_STATE * state,int tok)1245 int common(PROLOG_STATE *state, int tok)
1246 {
1247 #ifdef XML_DTD
1248 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1249 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1250 #endif
1251 state->handler = error;
1252 return XML_ROLE_ERROR;
1253 }
1254
XmlPrologStateInit(PROLOG_STATE * state)1255 void XmlPrologStateInit(PROLOG_STATE *state)
1256 {
1257 state->handler = prolog0;
1258 #ifdef XML_DTD
1259 state->documentEntity = 1;
1260 state->includeLevel = 0;
1261 #endif /* XML_DTD */
1262 }
1263
1264 #ifdef XML_DTD
1265
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1266 void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1267 {
1268 state->handler = externalSubset0;
1269 state->documentEntity = 0;
1270 state->includeLevel = 0;
1271 }
1272
1273 #endif /* XML_DTD */
1274