1 /*
2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file COPYING for copying permission.
4 */
5
6 #ifndef IS_INVALID_CHAR
7 #define IS_INVALID_CHAR(enc, ptr, n) (0)
8 #endif
9
10 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
11 case BT_LEAD ## n: \
12 if (end - ptr < n) \
13 return XML_TOK_PARTIAL_CHAR; \
14 if (IS_INVALID_CHAR(enc, ptr, n)) { \
15 *(nextTokPtr) = (ptr); \
16 return XML_TOK_INVALID; \
17 } \
18 ptr += n; \
19 break;
20
21 #define INVALID_CASES(ptr, nextTokPtr) \
22 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
23 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
24 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
25 case BT_NONXML: \
26 case BT_MALFORM: \
27 case BT_TRAIL: \
28 *(nextTokPtr) = (ptr); \
29 return XML_TOK_INVALID;
30
31 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
32 case BT_LEAD ## n: \
33 if (end - ptr < n) \
34 return XML_TOK_PARTIAL_CHAR; \
35 if (!IS_NAME_CHAR(enc, ptr, n)) { \
36 *nextTokPtr = ptr; \
37 return XML_TOK_INVALID; \
38 } \
39 ptr += n; \
40 break;
41
42 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
43 case BT_NONASCII: \
44 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
45 *nextTokPtr = ptr; \
46 return XML_TOK_INVALID; \
47 } \
48 case BT_NMSTRT: \
49 case BT_HEX: \
50 case BT_DIGIT: \
51 case BT_NAME: \
52 case BT_MINUS: \
53 ptr += MINBPC(enc); \
54 break; \
55 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
56 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
57 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
58
59 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
60 case BT_LEAD ## n: \
61 if (end - ptr < n) \
62 return XML_TOK_PARTIAL_CHAR; \
63 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
64 *nextTokPtr = ptr; \
65 return XML_TOK_INVALID; \
66 } \
67 ptr += n; \
68 break;
69
70 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
71 case BT_NONASCII: \
72 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
73 *nextTokPtr = ptr; \
74 return XML_TOK_INVALID; \
75 } \
76 case BT_NMSTRT: \
77 case BT_HEX: \
78 ptr += MINBPC(enc); \
79 break; \
80 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
81 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
82 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
83
84 #ifndef PREFIX
85 #define PREFIX(ident) ident
86 #endif
87
88 /* ptr points to character following "<!-" */
89
90 static
PREFIX(scanComment)91 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
92 const char **nextTokPtr)
93 {
94 if (ptr != end) {
95 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
96 *nextTokPtr = ptr;
97 return XML_TOK_INVALID;
98 }
99 ptr += MINBPC(enc);
100 while (ptr != end) {
101 switch (BYTE_TYPE(enc, ptr)) {
102 INVALID_CASES(ptr, nextTokPtr)
103 case BT_MINUS:
104 if ((ptr += MINBPC(enc)) == end)
105 return XML_TOK_PARTIAL;
106 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
107 if ((ptr += MINBPC(enc)) == end)
108 return XML_TOK_PARTIAL;
109 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
110 *nextTokPtr = ptr;
111 return XML_TOK_INVALID;
112 }
113 *nextTokPtr = ptr + MINBPC(enc);
114 return XML_TOK_COMMENT;
115 }
116 break;
117 default:
118 ptr += MINBPC(enc);
119 break;
120 }
121 }
122 }
123 return XML_TOK_PARTIAL;
124 }
125
126 /* ptr points to character following "<!" */
127
128 static
PREFIX(scanDecl)129 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
130 const char **nextTokPtr)
131 {
132 if (ptr == end)
133 return XML_TOK_PARTIAL;
134 switch (BYTE_TYPE(enc, ptr)) {
135 case BT_MINUS:
136 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
137 case BT_LSQB:
138 *nextTokPtr = ptr + MINBPC(enc);
139 return XML_TOK_COND_SECT_OPEN;
140 case BT_NMSTRT:
141 case BT_HEX:
142 ptr += MINBPC(enc);
143 break;
144 default:
145 *nextTokPtr = ptr;
146 return XML_TOK_INVALID;
147 }
148 while (ptr != end) {
149 switch (BYTE_TYPE(enc, ptr)) {
150 case BT_PERCNT:
151 if (ptr + MINBPC(enc) == end)
152 return XML_TOK_PARTIAL;
153 /* don't allow <!ENTITY% foo "whatever"> */
154 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
155 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
156 *nextTokPtr = ptr;
157 return XML_TOK_INVALID;
158 }
159 /* fall through */
160 case BT_S: case BT_CR: case BT_LF:
161 *nextTokPtr = ptr;
162 return XML_TOK_DECL_OPEN;
163 case BT_NMSTRT:
164 case BT_HEX:
165 ptr += MINBPC(enc);
166 break;
167 default:
168 *nextTokPtr = ptr;
169 return XML_TOK_INVALID;
170 }
171 }
172 return XML_TOK_PARTIAL;
173 }
174
175 static
PREFIX(checkPiTarget)176 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
177 {
178 int upper = 0;
179 vtkExpatUnused(enc);
180 *tokPtr = XML_TOK_PI;
181 if (end - ptr != MINBPC(enc)*3)
182 return 1;
183 switch (BYTE_TO_ASCII(enc, ptr)) {
184 case ASCII_x:
185 break;
186 case ASCII_X:
187 upper = 1;
188 break;
189 default:
190 return 1;
191 }
192 ptr += MINBPC(enc);
193 switch (BYTE_TO_ASCII(enc, ptr)) {
194 case ASCII_m:
195 break;
196 case ASCII_M:
197 upper = 1;
198 break;
199 default:
200 return 1;
201 }
202 ptr += MINBPC(enc);
203 switch (BYTE_TO_ASCII(enc, ptr)) {
204 case ASCII_l:
205 break;
206 case ASCII_L:
207 upper = 1;
208 break;
209 default:
210 return 1;
211 }
212 if (upper)
213 return 0;
214 *tokPtr = XML_TOK_XML_DECL;
215 return 1;
216 }
217
218 /* ptr points to character following "<?" */
219
220 static
PREFIX(scanPi)221 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
222 const char **nextTokPtr)
223 {
224 int tok;
225 const char *target = ptr;
226 if (ptr == end)
227 return XML_TOK_PARTIAL;
228 switch (BYTE_TYPE(enc, ptr)) {
229 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
230 default:
231 *nextTokPtr = ptr;
232 return XML_TOK_INVALID;
233 }
234 while (ptr != end) {
235 switch (BYTE_TYPE(enc, ptr)) {
236 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
237 case BT_S: case BT_CR: case BT_LF:
238 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
239 *nextTokPtr = ptr;
240 return XML_TOK_INVALID;
241 }
242 ptr += MINBPC(enc);
243 while (ptr != end) {
244 switch (BYTE_TYPE(enc, ptr)) {
245 INVALID_CASES(ptr, nextTokPtr)
246 case BT_QUEST:
247 ptr += MINBPC(enc);
248 if (ptr == end)
249 return XML_TOK_PARTIAL;
250 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
251 *nextTokPtr = ptr + MINBPC(enc);
252 return tok;
253 }
254 break;
255 default:
256 ptr += MINBPC(enc);
257 break;
258 }
259 }
260 return XML_TOK_PARTIAL;
261 case BT_QUEST:
262 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
263 *nextTokPtr = ptr;
264 return XML_TOK_INVALID;
265 }
266 ptr += MINBPC(enc);
267 if (ptr == end)
268 return XML_TOK_PARTIAL;
269 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
270 *nextTokPtr = ptr + MINBPC(enc);
271 return tok;
272 }
273 /* fall through */
274 default:
275 *nextTokPtr = ptr;
276 return XML_TOK_INVALID;
277 }
278 }
279 return XML_TOK_PARTIAL;
280 }
281
282
283 static
PREFIX(scanCdataSection)284 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
285 const char **nextTokPtr)
286 {
287 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
288 int i;
289 vtkExpatUnused(enc);
290 /* CDATA[ */
291 if (end - ptr < 6 * MINBPC(enc))
292 return XML_TOK_PARTIAL;
293 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
294 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
295 *nextTokPtr = ptr;
296 return XML_TOK_INVALID;
297 }
298 }
299 *nextTokPtr = ptr;
300 return XML_TOK_CDATA_SECT_OPEN;
301 }
302
303 static
PREFIX(cdataSectionTok)304 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
305 const char **nextTokPtr)
306 {
307 if (ptr == end)
308 return XML_TOK_NONE;
309 if (MINBPC(enc) > 1) {
310 size_t n = end - ptr;
311 if (n & (MINBPC(enc) - 1)) {
312 n &= ~(MINBPC(enc) - 1);
313 if (n == 0)
314 return XML_TOK_PARTIAL;
315 end = ptr + n;
316 }
317 }
318 switch (BYTE_TYPE(enc, ptr)) {
319 case BT_RSQB:
320 ptr += MINBPC(enc);
321 if (ptr == end)
322 return XML_TOK_PARTIAL;
323 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
324 break;
325 ptr += MINBPC(enc);
326 if (ptr == end)
327 return XML_TOK_PARTIAL;
328 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
329 ptr -= MINBPC(enc);
330 break;
331 }
332 *nextTokPtr = ptr + MINBPC(enc);
333 return XML_TOK_CDATA_SECT_CLOSE;
334 case BT_CR:
335 ptr += MINBPC(enc);
336 if (ptr == end)
337 return XML_TOK_PARTIAL;
338 if (BYTE_TYPE(enc, ptr) == BT_LF)
339 ptr += MINBPC(enc);
340 *nextTokPtr = ptr;
341 return XML_TOK_DATA_NEWLINE;
342 case BT_LF:
343 *nextTokPtr = ptr + MINBPC(enc);
344 return XML_TOK_DATA_NEWLINE;
345 INVALID_CASES(ptr, nextTokPtr)
346 default:
347 ptr += MINBPC(enc);
348 break;
349 }
350 while (ptr != end) {
351 switch (BYTE_TYPE(enc, ptr)) {
352 #define LEAD_CASE(n) \
353 case BT_LEAD ## n: \
354 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
355 *nextTokPtr = ptr; \
356 return XML_TOK_DATA_CHARS; \
357 } \
358 ptr += n; \
359 break;
360 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
361 #undef LEAD_CASE
362 case BT_NONXML:
363 case BT_MALFORM:
364 case BT_TRAIL:
365 case BT_CR:
366 case BT_LF:
367 case BT_RSQB:
368 *nextTokPtr = ptr;
369 return XML_TOK_DATA_CHARS;
370 default:
371 ptr += MINBPC(enc);
372 break;
373 }
374 }
375 *nextTokPtr = ptr;
376 return XML_TOK_DATA_CHARS;
377 }
378
379 /* ptr points to character following "</" */
380
381 static
PREFIX(scanEndTag)382 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
383 const char **nextTokPtr)
384 {
385 if (ptr == end)
386 return XML_TOK_PARTIAL;
387 switch (BYTE_TYPE(enc, ptr)) {
388 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
389 default:
390 *nextTokPtr = ptr;
391 return XML_TOK_INVALID;
392 }
393 while (ptr != end) {
394 switch (BYTE_TYPE(enc, ptr)) {
395 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
396 case BT_S: case BT_CR: case BT_LF:
397 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
398 switch (BYTE_TYPE(enc, ptr)) {
399 case BT_S: case BT_CR: case BT_LF:
400 break;
401 case BT_GT:
402 *nextTokPtr = ptr + MINBPC(enc);
403 return XML_TOK_END_TAG;
404 default:
405 *nextTokPtr = ptr;
406 return XML_TOK_INVALID;
407 }
408 }
409 return XML_TOK_PARTIAL;
410 #ifdef XML_NS
411 case BT_COLON:
412 /* no need to check qname syntax here, since end-tag must match exactly */
413 ptr += MINBPC(enc);
414 break;
415 #endif
416 case BT_GT:
417 *nextTokPtr = ptr + MINBPC(enc);
418 return XML_TOK_END_TAG;
419 default:
420 *nextTokPtr = ptr;
421 return XML_TOK_INVALID;
422 }
423 }
424 return XML_TOK_PARTIAL;
425 }
426
427 /* ptr points to character following "&#X" */
428
429 static
PREFIX(scanHexCharRef)430 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
431 const char **nextTokPtr)
432 {
433 if (ptr != end) {
434 switch (BYTE_TYPE(enc, ptr)) {
435 case BT_DIGIT:
436 case BT_HEX:
437 break;
438 default:
439 *nextTokPtr = ptr;
440 return XML_TOK_INVALID;
441 }
442 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
443 switch (BYTE_TYPE(enc, ptr)) {
444 case BT_DIGIT:
445 case BT_HEX:
446 break;
447 case BT_SEMI:
448 *nextTokPtr = ptr + MINBPC(enc);
449 return XML_TOK_CHAR_REF;
450 default:
451 *nextTokPtr = ptr;
452 return XML_TOK_INVALID;
453 }
454 }
455 }
456 return XML_TOK_PARTIAL;
457 }
458
459 /* ptr points to character following "&#" */
460
461 static
PREFIX(scanCharRef)462 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
463 const char **nextTokPtr)
464 {
465 if (ptr != end) {
466 if (CHAR_MATCHES(enc, ptr, ASCII_x))
467 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
468 switch (BYTE_TYPE(enc, ptr)) {
469 case BT_DIGIT:
470 break;
471 default:
472 *nextTokPtr = ptr;
473 return XML_TOK_INVALID;
474 }
475 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
476 switch (BYTE_TYPE(enc, ptr)) {
477 case BT_DIGIT:
478 break;
479 case BT_SEMI:
480 *nextTokPtr = ptr + MINBPC(enc);
481 return XML_TOK_CHAR_REF;
482 default:
483 *nextTokPtr = ptr;
484 return XML_TOK_INVALID;
485 }
486 }
487 }
488 return XML_TOK_PARTIAL;
489 }
490
491 /* ptr points to character following "&" */
492
493 static
PREFIX(scanRef)494 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
495 const char **nextTokPtr)
496 {
497 if (ptr == end)
498 return XML_TOK_PARTIAL;
499 switch (BYTE_TYPE(enc, ptr)) {
500 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
501 case BT_NUM:
502 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
503 default:
504 *nextTokPtr = ptr;
505 return XML_TOK_INVALID;
506 }
507 while (ptr != end) {
508 switch (BYTE_TYPE(enc, ptr)) {
509 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
510 case BT_SEMI:
511 *nextTokPtr = ptr + MINBPC(enc);
512 return XML_TOK_ENTITY_REF;
513 default:
514 *nextTokPtr = ptr;
515 return XML_TOK_INVALID;
516 }
517 }
518 return XML_TOK_PARTIAL;
519 }
520
521 /* ptr points to character following first character of attribute name */
522
523 static
PREFIX(scanAtts)524 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
525 const char **nextTokPtr)
526 {
527 #ifdef XML_NS
528 int hadColon = 0;
529 #endif
530 while (ptr != end) {
531 switch (BYTE_TYPE(enc, ptr)) {
532 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
533 #ifdef XML_NS
534 case BT_COLON:
535 if (hadColon) {
536 *nextTokPtr = ptr;
537 return XML_TOK_INVALID;
538 }
539 hadColon = 1;
540 ptr += MINBPC(enc);
541 if (ptr == end)
542 return XML_TOK_PARTIAL;
543 switch (BYTE_TYPE(enc, ptr)) {
544 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
545 default:
546 *nextTokPtr = ptr;
547 return XML_TOK_INVALID;
548 }
549 break;
550 #endif
551 case BT_S: case BT_CR: case BT_LF:
552 for (;;) {
553 int t;
554
555 ptr += MINBPC(enc);
556 if (ptr == end)
557 return XML_TOK_PARTIAL;
558 t = BYTE_TYPE(enc, ptr);
559 if (t == BT_EQUALS)
560 break;
561 switch (t) {
562 case BT_S:
563 case BT_LF:
564 case BT_CR:
565 break;
566 default:
567 *nextTokPtr = ptr;
568 return XML_TOK_INVALID;
569 }
570 }
571 /* fall through */
572 case BT_EQUALS:
573 {
574 int open;
575 #ifdef XML_NS
576 hadColon = 0;
577 #endif
578 for (;;) {
579
580 ptr += MINBPC(enc);
581 if (ptr == end)
582 return XML_TOK_PARTIAL;
583 open = BYTE_TYPE(enc, ptr);
584 if (open == BT_QUOT || open == BT_APOS)
585 break;
586 switch (open) {
587 case BT_S:
588 case BT_LF:
589 case BT_CR:
590 break;
591 default:
592 *nextTokPtr = ptr;
593 return XML_TOK_INVALID;
594 }
595 }
596 ptr += MINBPC(enc);
597 /* in attribute value */
598 for (;;) {
599 int t;
600 if (ptr == end)
601 return XML_TOK_PARTIAL;
602 t = BYTE_TYPE(enc, ptr);
603 if (t == open)
604 break;
605 switch (t) {
606 INVALID_CASES(ptr, nextTokPtr)
607 case BT_AMP:
608 {
609 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
610 if (tok <= 0) {
611 if (tok == XML_TOK_INVALID)
612 *nextTokPtr = ptr;
613 return tok;
614 }
615 break;
616 }
617 case BT_LT:
618 *nextTokPtr = ptr;
619 return XML_TOK_INVALID;
620 default:
621 ptr += MINBPC(enc);
622 break;
623 }
624 }
625 ptr += MINBPC(enc);
626 if (ptr == end)
627 return XML_TOK_PARTIAL;
628 switch (BYTE_TYPE(enc, ptr)) {
629 case BT_S:
630 case BT_CR:
631 case BT_LF:
632 break;
633 case BT_SOL:
634 goto sol;
635 case BT_GT:
636 goto gt;
637 default:
638 *nextTokPtr = ptr;
639 return XML_TOK_INVALID;
640 }
641 /* ptr points to closing quote */
642 for (;;) {
643 ptr += MINBPC(enc);
644 if (ptr == end)
645 return XML_TOK_PARTIAL;
646 switch (BYTE_TYPE(enc, ptr)) {
647 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
648 case BT_S: case BT_CR: case BT_LF:
649 continue;
650 case BT_GT:
651 gt:
652 *nextTokPtr = ptr + MINBPC(enc);
653 return XML_TOK_START_TAG_WITH_ATTS;
654 case BT_SOL:
655 sol:
656 ptr += MINBPC(enc);
657 if (ptr == end)
658 return XML_TOK_PARTIAL;
659 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
660 *nextTokPtr = ptr;
661 return XML_TOK_INVALID;
662 }
663 *nextTokPtr = ptr + MINBPC(enc);
664 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
665 default:
666 *nextTokPtr = ptr;
667 return XML_TOK_INVALID;
668 }
669 break;
670 }
671 break;
672 }
673 default:
674 *nextTokPtr = ptr;
675 return XML_TOK_INVALID;
676 }
677 }
678 return XML_TOK_PARTIAL;
679 }
680
681 /* ptr points to character following "<" */
682
683 static
PREFIX(scanLt)684 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
685 const char **nextTokPtr)
686 {
687 #ifdef XML_NS
688 int hadColon;
689 #endif
690 if (ptr == end)
691 return XML_TOK_PARTIAL;
692 switch (BYTE_TYPE(enc, ptr)) {
693 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
694 case BT_EXCL:
695 if ((ptr += MINBPC(enc)) == end)
696 return XML_TOK_PARTIAL;
697 switch (BYTE_TYPE(enc, ptr)) {
698 case BT_MINUS:
699 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
700 case BT_LSQB:
701 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
702 }
703 *nextTokPtr = ptr;
704 return XML_TOK_INVALID;
705 case BT_QUEST:
706 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
707 case BT_SOL:
708 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709 default:
710 *nextTokPtr = ptr;
711 return XML_TOK_INVALID;
712 }
713 #ifdef XML_NS
714 hadColon = 0;
715 #endif
716 /* we have a start-tag */
717 while (ptr != end) {
718 switch (BYTE_TYPE(enc, ptr)) {
719 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
720 #ifdef XML_NS
721 case BT_COLON:
722 if (hadColon) {
723 *nextTokPtr = ptr;
724 return XML_TOK_INVALID;
725 }
726 hadColon = 1;
727 ptr += MINBPC(enc);
728 if (ptr == end)
729 return XML_TOK_PARTIAL;
730 switch (BYTE_TYPE(enc, ptr)) {
731 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
732 default:
733 *nextTokPtr = ptr;
734 return XML_TOK_INVALID;
735 }
736 break;
737 #endif
738 case BT_S: case BT_CR: case BT_LF:
739 {
740 ptr += MINBPC(enc);
741 while (ptr != end) {
742 switch (BYTE_TYPE(enc, ptr)) {
743 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
744 case BT_GT:
745 goto gt;
746 case BT_SOL:
747 goto sol;
748 case BT_S: case BT_CR: case BT_LF:
749 ptr += MINBPC(enc);
750 continue;
751 default:
752 *nextTokPtr = ptr;
753 return XML_TOK_INVALID;
754 }
755 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
756 }
757 return XML_TOK_PARTIAL;
758 }
759 case BT_GT:
760 gt:
761 *nextTokPtr = ptr + MINBPC(enc);
762 return XML_TOK_START_TAG_NO_ATTS;
763 case BT_SOL:
764 sol:
765 ptr += MINBPC(enc);
766 if (ptr == end)
767 return XML_TOK_PARTIAL;
768 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
769 *nextTokPtr = ptr;
770 return XML_TOK_INVALID;
771 }
772 *nextTokPtr = ptr + MINBPC(enc);
773 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
774 default:
775 *nextTokPtr = ptr;
776 return XML_TOK_INVALID;
777 }
778 }
779 return XML_TOK_PARTIAL;
780 }
781
782 static
PREFIX(contentTok)783 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
784 const char **nextTokPtr)
785 {
786 if (ptr == end)
787 return XML_TOK_NONE;
788 if (MINBPC(enc) > 1) {
789 size_t n = end - ptr;
790 if (n & (MINBPC(enc) - 1)) {
791 n &= ~(MINBPC(enc) - 1);
792 if (n == 0)
793 return XML_TOK_PARTIAL;
794 end = ptr + n;
795 }
796 }
797 switch (BYTE_TYPE(enc, ptr)) {
798 case BT_LT:
799 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
800 case BT_AMP:
801 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802 case BT_CR:
803 ptr += MINBPC(enc);
804 if (ptr == end)
805 return XML_TOK_TRAILING_CR;
806 if (BYTE_TYPE(enc, ptr) == BT_LF)
807 ptr += MINBPC(enc);
808 *nextTokPtr = ptr;
809 return XML_TOK_DATA_NEWLINE;
810 case BT_LF:
811 *nextTokPtr = ptr + MINBPC(enc);
812 return XML_TOK_DATA_NEWLINE;
813 case BT_RSQB:
814 ptr += MINBPC(enc);
815 if (ptr == end)
816 return XML_TOK_TRAILING_RSQB;
817 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
818 break;
819 ptr += MINBPC(enc);
820 if (ptr == end)
821 return XML_TOK_TRAILING_RSQB;
822 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
823 ptr -= MINBPC(enc);
824 break;
825 }
826 *nextTokPtr = ptr;
827 return XML_TOK_INVALID;
828 INVALID_CASES(ptr, nextTokPtr)
829 default:
830 ptr += MINBPC(enc);
831 break;
832 }
833 while (ptr != end) {
834 switch (BYTE_TYPE(enc, ptr)) {
835 #define LEAD_CASE(n) \
836 case BT_LEAD ## n: \
837 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
838 *nextTokPtr = ptr; \
839 return XML_TOK_DATA_CHARS; \
840 } \
841 ptr += n; \
842 break;
843 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
844 #undef LEAD_CASE
845 case BT_RSQB:
846 if (ptr + MINBPC(enc) != end) {
847 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
848 ptr += MINBPC(enc);
849 break;
850 }
851 if (ptr + 2*MINBPC(enc) != end) {
852 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
853 ptr += MINBPC(enc);
854 break;
855 }
856 *nextTokPtr = ptr + 2*MINBPC(enc);
857 return XML_TOK_INVALID;
858 }
859 }
860 /* fall through */
861 case BT_AMP:
862 case BT_LT:
863 case BT_NONXML:
864 case BT_MALFORM:
865 case BT_TRAIL:
866 case BT_CR:
867 case BT_LF:
868 *nextTokPtr = ptr;
869 return XML_TOK_DATA_CHARS;
870 default:
871 ptr += MINBPC(enc);
872 break;
873 }
874 }
875 *nextTokPtr = ptr;
876 return XML_TOK_DATA_CHARS;
877 }
878
879 /* ptr points to character following "%" */
880
881 static
PREFIX(scanPercent)882 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
883 const char **nextTokPtr)
884 {
885 if (ptr == end)
886 return XML_TOK_PARTIAL;
887 switch (BYTE_TYPE(enc, ptr)) {
888 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
889 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
890 *nextTokPtr = ptr;
891 return XML_TOK_PERCENT;
892 default:
893 *nextTokPtr = ptr;
894 return XML_TOK_INVALID;
895 }
896 while (ptr != end) {
897 switch (BYTE_TYPE(enc, ptr)) {
898 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
899 case BT_SEMI:
900 *nextTokPtr = ptr + MINBPC(enc);
901 return XML_TOK_PARAM_ENTITY_REF;
902 default:
903 *nextTokPtr = ptr;
904 return XML_TOK_INVALID;
905 }
906 }
907 return XML_TOK_PARTIAL;
908 }
909
910 static
PREFIX(scanPoundName)911 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
912 const char **nextTokPtr)
913 {
914 if (ptr == end)
915 return XML_TOK_PARTIAL;
916 switch (BYTE_TYPE(enc, ptr)) {
917 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
918 default:
919 *nextTokPtr = ptr;
920 return XML_TOK_INVALID;
921 }
922 while (ptr != end) {
923 switch (BYTE_TYPE(enc, ptr)) {
924 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
925 case BT_CR: case BT_LF: case BT_S:
926 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
927 *nextTokPtr = ptr;
928 return XML_TOK_POUND_NAME;
929 default:
930 *nextTokPtr = ptr;
931 return XML_TOK_INVALID;
932 }
933 }
934 return -XML_TOK_POUND_NAME;
935 }
936
937 static
PREFIX(scanLit)938 int PREFIX(scanLit)(int open, const ENCODING *enc,
939 const char *ptr, const char *end,
940 const char **nextTokPtr)
941 {
942 while (ptr != end) {
943 int t = BYTE_TYPE(enc, ptr);
944 switch (t) {
945 INVALID_CASES(ptr, nextTokPtr)
946 case BT_QUOT:
947 case BT_APOS:
948 ptr += MINBPC(enc);
949 if (t != open)
950 break;
951 if (ptr == end)
952 return -XML_TOK_LITERAL;
953 *nextTokPtr = ptr;
954 switch (BYTE_TYPE(enc, ptr)) {
955 case BT_S: case BT_CR: case BT_LF:
956 case BT_GT: case BT_PERCNT: case BT_LSQB:
957 return XML_TOK_LITERAL;
958 default:
959 return XML_TOK_INVALID;
960 }
961 default:
962 ptr += MINBPC(enc);
963 break;
964 }
965 }
966 return XML_TOK_PARTIAL;
967 }
968
969 static
PREFIX(prologTok)970 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
971 const char **nextTokPtr)
972 {
973 int tok;
974 if (ptr == end)
975 return XML_TOK_NONE;
976 if (MINBPC(enc) > 1) {
977 size_t n = end - ptr;
978 if (n & (MINBPC(enc) - 1)) {
979 n &= ~(MINBPC(enc) - 1);
980 if (n == 0)
981 return XML_TOK_PARTIAL;
982 end = ptr + n;
983 }
984 }
985 switch (BYTE_TYPE(enc, ptr)) {
986 case BT_QUOT:
987 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
988 case BT_APOS:
989 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
990 case BT_LT:
991 {
992 ptr += MINBPC(enc);
993 if (ptr == end)
994 return XML_TOK_PARTIAL;
995 switch (BYTE_TYPE(enc, ptr)) {
996 case BT_EXCL:
997 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
998 case BT_QUEST:
999 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000 case BT_NMSTRT:
1001 case BT_HEX:
1002 case BT_NONASCII:
1003 case BT_LEAD2:
1004 case BT_LEAD3:
1005 case BT_LEAD4:
1006 *nextTokPtr = ptr - MINBPC(enc);
1007 return XML_TOK_INSTANCE_START;
1008 }
1009 *nextTokPtr = ptr;
1010 return XML_TOK_INVALID;
1011 }
1012 case BT_CR:
1013 if (ptr + MINBPC(enc) == end)
1014 return -XML_TOK_PROLOG_S;
1015 /* fall through */
1016 case BT_S: case BT_LF:
1017 for (;;) {
1018 ptr += MINBPC(enc);
1019 if (ptr == end)
1020 break;
1021 switch (BYTE_TYPE(enc, ptr)) {
1022 case BT_S: case BT_LF:
1023 break;
1024 case BT_CR:
1025 /* don't split CR/LF pair */
1026 if (ptr + MINBPC(enc) != end)
1027 break;
1028 /* fall through */
1029 default:
1030 *nextTokPtr = ptr;
1031 return XML_TOK_PROLOG_S;
1032 }
1033 }
1034 *nextTokPtr = ptr;
1035 return XML_TOK_PROLOG_S;
1036 case BT_PERCNT:
1037 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1038 case BT_COMMA:
1039 *nextTokPtr = ptr + MINBPC(enc);
1040 return XML_TOK_COMMA;
1041 case BT_LSQB:
1042 *nextTokPtr = ptr + MINBPC(enc);
1043 return XML_TOK_OPEN_BRACKET;
1044 case BT_RSQB:
1045 ptr += MINBPC(enc);
1046 if (ptr == end)
1047 return -XML_TOK_CLOSE_BRACKET;
1048 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1049 if (ptr + MINBPC(enc) == end)
1050 return XML_TOK_PARTIAL;
1051 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1052 *nextTokPtr = ptr + 2*MINBPC(enc);
1053 return XML_TOK_COND_SECT_CLOSE;
1054 }
1055 }
1056 *nextTokPtr = ptr;
1057 return XML_TOK_CLOSE_BRACKET;
1058 case BT_LPAR:
1059 *nextTokPtr = ptr + MINBPC(enc);
1060 return XML_TOK_OPEN_PAREN;
1061 case BT_RPAR:
1062 ptr += MINBPC(enc);
1063 if (ptr == end)
1064 return -XML_TOK_CLOSE_PAREN;
1065 switch (BYTE_TYPE(enc, ptr)) {
1066 case BT_AST:
1067 *nextTokPtr = ptr + MINBPC(enc);
1068 return XML_TOK_CLOSE_PAREN_ASTERISK;
1069 case BT_QUEST:
1070 *nextTokPtr = ptr + MINBPC(enc);
1071 return XML_TOK_CLOSE_PAREN_QUESTION;
1072 case BT_PLUS:
1073 *nextTokPtr = ptr + MINBPC(enc);
1074 return XML_TOK_CLOSE_PAREN_PLUS;
1075 case BT_CR: case BT_LF: case BT_S:
1076 case BT_GT: case BT_COMMA: case BT_VERBAR:
1077 case BT_RPAR:
1078 *nextTokPtr = ptr;
1079 return XML_TOK_CLOSE_PAREN;
1080 }
1081 *nextTokPtr = ptr;
1082 return XML_TOK_INVALID;
1083 case BT_VERBAR:
1084 *nextTokPtr = ptr + MINBPC(enc);
1085 return XML_TOK_OR;
1086 case BT_GT:
1087 *nextTokPtr = ptr + MINBPC(enc);
1088 return XML_TOK_DECL_CLOSE;
1089 case BT_NUM:
1090 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1091 #define LEAD_CASE(n) \
1092 case BT_LEAD ## n: \
1093 if (end - ptr < n) \
1094 return XML_TOK_PARTIAL_CHAR; \
1095 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1096 ptr += n; \
1097 tok = XML_TOK_NAME; \
1098 break; \
1099 } \
1100 if (IS_NAME_CHAR(enc, ptr, n)) { \
1101 ptr += n; \
1102 tok = XML_TOK_NMTOKEN; \
1103 break; \
1104 } \
1105 *nextTokPtr = ptr; \
1106 return XML_TOK_INVALID;
1107 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1108 #undef LEAD_CASE
1109 case BT_NMSTRT:
1110 case BT_HEX:
1111 tok = XML_TOK_NAME;
1112 ptr += MINBPC(enc);
1113 break;
1114 case BT_DIGIT:
1115 case BT_NAME:
1116 case BT_MINUS:
1117 #ifdef XML_NS
1118 case BT_COLON:
1119 #endif
1120 tok = XML_TOK_NMTOKEN;
1121 ptr += MINBPC(enc);
1122 break;
1123 case BT_NONASCII:
1124 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1125 ptr += MINBPC(enc);
1126 tok = XML_TOK_NAME;
1127 break;
1128 }
1129 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1130 ptr += MINBPC(enc);
1131 tok = XML_TOK_NMTOKEN;
1132 break;
1133 }
1134 /* fall through */
1135 default:
1136 *nextTokPtr = ptr;
1137 return XML_TOK_INVALID;
1138 }
1139 while (ptr != end) {
1140 switch (BYTE_TYPE(enc, ptr)) {
1141 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1142 case BT_GT: case BT_RPAR: case BT_COMMA:
1143 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1144 case BT_S: case BT_CR: case BT_LF:
1145 *nextTokPtr = ptr;
1146 return tok;
1147 #ifdef XML_NS
1148 case BT_COLON:
1149 ptr += MINBPC(enc);
1150 switch (tok) {
1151 case XML_TOK_NAME:
1152 if (ptr == end)
1153 return XML_TOK_PARTIAL;
1154 tok = XML_TOK_PREFIXED_NAME;
1155 switch (BYTE_TYPE(enc, ptr)) {
1156 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1157 default:
1158 tok = XML_TOK_NMTOKEN;
1159 break;
1160 }
1161 break;
1162 case XML_TOK_PREFIXED_NAME:
1163 tok = XML_TOK_NMTOKEN;
1164 break;
1165 }
1166 break;
1167 #endif
1168 case BT_PLUS:
1169 if (tok == XML_TOK_NMTOKEN) {
1170 *nextTokPtr = ptr;
1171 return XML_TOK_INVALID;
1172 }
1173 *nextTokPtr = ptr + MINBPC(enc);
1174 return XML_TOK_NAME_PLUS;
1175 case BT_AST:
1176 if (tok == XML_TOK_NMTOKEN) {
1177 *nextTokPtr = ptr;
1178 return XML_TOK_INVALID;
1179 }
1180 *nextTokPtr = ptr + MINBPC(enc);
1181 return XML_TOK_NAME_ASTERISK;
1182 case BT_QUEST:
1183 if (tok == XML_TOK_NMTOKEN) {
1184 *nextTokPtr = ptr;
1185 return XML_TOK_INVALID;
1186 }
1187 *nextTokPtr = ptr + MINBPC(enc);
1188 return XML_TOK_NAME_QUESTION;
1189 default:
1190 *nextTokPtr = ptr;
1191 return XML_TOK_INVALID;
1192 }
1193 }
1194 return -tok;
1195 }
1196
1197 static
PREFIX(attributeValueTok)1198 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1199 const char **nextTokPtr)
1200 {
1201 const char *start;
1202 if (ptr == end)
1203 return XML_TOK_NONE;
1204 start = ptr;
1205 while (ptr != end) {
1206 switch (BYTE_TYPE(enc, ptr)) {
1207 #define LEAD_CASE(n) \
1208 case BT_LEAD ## n: ptr += n; break;
1209 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1210 #undef LEAD_CASE
1211 case BT_AMP:
1212 if (ptr == start)
1213 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1214 *nextTokPtr = ptr;
1215 return XML_TOK_DATA_CHARS;
1216 case BT_LT:
1217 /* this is for inside entity references */
1218 *nextTokPtr = ptr;
1219 return XML_TOK_INVALID;
1220 case BT_LF:
1221 if (ptr == start) {
1222 *nextTokPtr = ptr + MINBPC(enc);
1223 return XML_TOK_DATA_NEWLINE;
1224 }
1225 *nextTokPtr = ptr;
1226 return XML_TOK_DATA_CHARS;
1227 case BT_CR:
1228 if (ptr == start) {
1229 ptr += MINBPC(enc);
1230 if (ptr == end)
1231 return XML_TOK_TRAILING_CR;
1232 if (BYTE_TYPE(enc, ptr) == BT_LF)
1233 ptr += MINBPC(enc);
1234 *nextTokPtr = ptr;
1235 return XML_TOK_DATA_NEWLINE;
1236 }
1237 *nextTokPtr = ptr;
1238 return XML_TOK_DATA_CHARS;
1239 case BT_S:
1240 if (ptr == start) {
1241 *nextTokPtr = ptr + MINBPC(enc);
1242 return XML_TOK_ATTRIBUTE_VALUE_S;
1243 }
1244 *nextTokPtr = ptr;
1245 return XML_TOK_DATA_CHARS;
1246 default:
1247 ptr += MINBPC(enc);
1248 break;
1249 }
1250 }
1251 *nextTokPtr = ptr;
1252 return XML_TOK_DATA_CHARS;
1253 }
1254
1255 static
PREFIX(entityValueTok)1256 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1257 const char **nextTokPtr)
1258 {
1259 const char *start;
1260 if (ptr == end)
1261 return XML_TOK_NONE;
1262 start = ptr;
1263 while (ptr != end) {
1264 switch (BYTE_TYPE(enc, ptr)) {
1265 #define LEAD_CASE(n) \
1266 case BT_LEAD ## n: ptr += n; break;
1267 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1268 #undef LEAD_CASE
1269 case BT_AMP:
1270 if (ptr == start)
1271 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1272 *nextTokPtr = ptr;
1273 return XML_TOK_DATA_CHARS;
1274 case BT_PERCNT:
1275 if (ptr == start) {
1276 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1277 end, nextTokPtr);
1278 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1279 }
1280 *nextTokPtr = ptr;
1281 return XML_TOK_DATA_CHARS;
1282 case BT_LF:
1283 if (ptr == start) {
1284 *nextTokPtr = ptr + MINBPC(enc);
1285 return XML_TOK_DATA_NEWLINE;
1286 }
1287 *nextTokPtr = ptr;
1288 return XML_TOK_DATA_CHARS;
1289 case BT_CR:
1290 if (ptr == start) {
1291 ptr += MINBPC(enc);
1292 if (ptr == end)
1293 return XML_TOK_TRAILING_CR;
1294 if (BYTE_TYPE(enc, ptr) == BT_LF)
1295 ptr += MINBPC(enc);
1296 *nextTokPtr = ptr;
1297 return XML_TOK_DATA_NEWLINE;
1298 }
1299 *nextTokPtr = ptr;
1300 return XML_TOK_DATA_CHARS;
1301 default:
1302 ptr += MINBPC(enc);
1303 break;
1304 }
1305 }
1306 *nextTokPtr = ptr;
1307 return XML_TOK_DATA_CHARS;
1308 }
1309
1310 #ifdef XML_DTD
1311
1312 static
PREFIX(ignoreSectionTok)1313 int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1314 const char **nextTokPtr)
1315 {
1316 int level = 0;
1317 if (MINBPC(enc) > 1) {
1318 size_t n = end - ptr;
1319 if (n & (MINBPC(enc) - 1)) {
1320 n &= ~(MINBPC(enc) - 1);
1321 end = ptr + n;
1322 }
1323 }
1324 while (ptr != end) {
1325 switch (BYTE_TYPE(enc, ptr)) {
1326 INVALID_CASES(ptr, nextTokPtr)
1327 case BT_LT:
1328 if ((ptr += MINBPC(enc)) == end)
1329 return XML_TOK_PARTIAL;
1330 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1331 if ((ptr += MINBPC(enc)) == end)
1332 return XML_TOK_PARTIAL;
1333 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1334 ++level;
1335 ptr += MINBPC(enc);
1336 }
1337 }
1338 break;
1339 case BT_RSQB:
1340 if ((ptr += MINBPC(enc)) == end)
1341 return XML_TOK_PARTIAL;
1342 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1343 if ((ptr += MINBPC(enc)) == end)
1344 return XML_TOK_PARTIAL;
1345 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1346 ptr += MINBPC(enc);
1347 if (level == 0) {
1348 *nextTokPtr = ptr;
1349 return XML_TOK_IGNORE_SECT;
1350 }
1351 --level;
1352 }
1353 }
1354 break;
1355 default:
1356 ptr += MINBPC(enc);
1357 break;
1358 }
1359 }
1360 return XML_TOK_PARTIAL;
1361 }
1362
1363 #endif /* XML_DTD */
1364
1365 static
PREFIX(isPublicId)1366 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1367 const char **badPtr)
1368 {
1369 ptr += MINBPC(enc);
1370 end -= MINBPC(enc);
1371 for (; ptr != end; ptr += MINBPC(enc)) {
1372 switch (BYTE_TYPE(enc, ptr)) {
1373 case BT_DIGIT:
1374 case BT_HEX:
1375 case BT_MINUS:
1376 case BT_APOS:
1377 case BT_LPAR:
1378 case BT_RPAR:
1379 case BT_PLUS:
1380 case BT_COMMA:
1381 case BT_SOL:
1382 case BT_EQUALS:
1383 case BT_QUEST:
1384 case BT_CR:
1385 case BT_LF:
1386 case BT_SEMI:
1387 case BT_EXCL:
1388 case BT_AST:
1389 case BT_PERCNT:
1390 case BT_NUM:
1391 #ifdef XML_NS
1392 case BT_COLON:
1393 #endif
1394 break;
1395 case BT_S:
1396 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1397 *badPtr = ptr;
1398 return 0;
1399 }
1400 break;
1401 case BT_NAME:
1402 case BT_NMSTRT:
1403 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1404 break;
1405 default:
1406 switch (BYTE_TO_ASCII(enc, ptr)) {
1407 case 0x24: /* $ */
1408 case 0x40: /* @ */
1409 break;
1410 default:
1411 *badPtr = ptr;
1412 return 0;
1413 }
1414 break;
1415 }
1416 }
1417 return 1;
1418 }
1419
1420 /* This must only be called for a well-formed start-tag or empty element tag.
1421 Returns the number of attributes. Pointers to the first attsMax attributes
1422 are stored in atts. */
1423
1424 static
PREFIX(getAtts)1425 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1426 int attsMax, ATTRIBUTE *atts)
1427 {
1428 enum { other, inName, inValue } state = inName;
1429 int nAtts = 0;
1430 int open = 0; /* defined when state == inValue;
1431 initialization just to shut up compilers */
1432
1433 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1434 switch (BYTE_TYPE(enc, ptr)) {
1435 #define START_NAME \
1436 if (state == other) { \
1437 if (nAtts < attsMax) { \
1438 atts[nAtts].name = ptr; \
1439 atts[nAtts].normalized = 1; \
1440 } \
1441 state = inName; \
1442 }
1443 #define LEAD_CASE(n) \
1444 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1445 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1446 #undef LEAD_CASE
1447 case BT_NONASCII:
1448 case BT_NMSTRT:
1449 case BT_HEX:
1450 START_NAME
1451 break;
1452 #undef START_NAME
1453 case BT_QUOT:
1454 if (state != inValue) {
1455 if (nAtts < attsMax)
1456 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1457 state = inValue;
1458 open = BT_QUOT;
1459 }
1460 else if (open == BT_QUOT) {
1461 state = other;
1462 if (nAtts < attsMax)
1463 atts[nAtts].valueEnd = ptr;
1464 nAtts++;
1465 }
1466 break;
1467 case BT_APOS:
1468 if (state != inValue) {
1469 if (nAtts < attsMax)
1470 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1471 state = inValue;
1472 open = BT_APOS;
1473 }
1474 else if (open == BT_APOS) {
1475 state = other;
1476 if (nAtts < attsMax)
1477 atts[nAtts].valueEnd = ptr;
1478 nAtts++;
1479 }
1480 break;
1481 case BT_AMP:
1482 if (nAtts < attsMax)
1483 atts[nAtts].normalized = 0;
1484 break;
1485 case BT_S:
1486 if (state == inName)
1487 state = other;
1488 else if (state == inValue
1489 && nAtts < attsMax
1490 && atts[nAtts].normalized
1491 && (ptr == atts[nAtts].valuePtr
1492 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1493 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1494 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1495 atts[nAtts].normalized = 0;
1496 break;
1497 case BT_CR: case BT_LF:
1498 /* This case ensures that the first attribute name is counted
1499 Apart from that we could just change state on the quote. */
1500 if (state == inName)
1501 state = other;
1502 else if (state == inValue && nAtts < attsMax)
1503 atts[nAtts].normalized = 0;
1504 break;
1505 case BT_GT:
1506 case BT_SOL:
1507 if (state != inValue)
1508 return nAtts;
1509 break;
1510 default:
1511 break;
1512 }
1513 }
1514 /* not reached */
1515 }
1516
1517 static
PREFIX(charRefNumber)1518 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1519 {
1520 int result = 0;
1521 vtkExpatUnused(enc);
1522 /* skip &# */
1523 ptr += 2*MINBPC(enc);
1524 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1525 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1526 int c = BYTE_TO_ASCII(enc, ptr);
1527 switch (c) {
1528 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1529 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1530 result <<= 4;
1531 result |= (c - ASCII_0);
1532 break;
1533 case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
1534 result <<= 4;
1535 result += 10 + (c - ASCII_A);
1536 break;
1537 case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
1538 result <<= 4;
1539 result += 10 + (c - ASCII_a);
1540 break;
1541 }
1542 if (result >= 0x110000)
1543 return -1;
1544 }
1545 }
1546 else {
1547 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1548 int c = BYTE_TO_ASCII(enc, ptr);
1549 result *= 10;
1550 result += (c - ASCII_0);
1551 if (result >= 0x110000)
1552 return -1;
1553 }
1554 }
1555 return checkCharRefNumber(result);
1556 }
1557
1558 static
PREFIX(predefinedEntityName)1559 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
1560 {
1561 vtkExpatUnused(enc);
1562 switch ((end - ptr)/MINBPC(enc)) {
1563 case 2:
1564 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1565 switch (BYTE_TO_ASCII(enc, ptr)) {
1566 case ASCII_l:
1567 return ASCII_LT;
1568 case ASCII_g:
1569 return ASCII_GT;
1570 }
1571 }
1572 break;
1573 case 3:
1574 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1575 ptr += MINBPC(enc);
1576 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1577 ptr += MINBPC(enc);
1578 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1579 return ASCII_AMP;
1580 }
1581 }
1582 break;
1583 case 4:
1584 switch (BYTE_TO_ASCII(enc, ptr)) {
1585 case ASCII_q:
1586 ptr += MINBPC(enc);
1587 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1588 ptr += MINBPC(enc);
1589 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1590 ptr += MINBPC(enc);
1591 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1592 return ASCII_QUOT;
1593 }
1594 }
1595 break;
1596 case ASCII_a:
1597 ptr += MINBPC(enc);
1598 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1599 ptr += MINBPC(enc);
1600 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1601 ptr += MINBPC(enc);
1602 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1603 return ASCII_APOS;
1604 }
1605 }
1606 break;
1607 }
1608 }
1609 return 0;
1610 }
1611
1612 static
PREFIX(sameName)1613 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1614 {
1615 for (;;) {
1616 switch (BYTE_TYPE(enc, ptr1)) {
1617 #define LEAD_CASE(n) \
1618 case BT_LEAD ## n: \
1619 if (*ptr1++ != *ptr2++) \
1620 return 0;
1621 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1622 #undef LEAD_CASE
1623 /* fall through */
1624 if (*ptr1++ != *ptr2++)
1625 return 0;
1626 break;
1627 case BT_NONASCII:
1628 case BT_NMSTRT:
1629 #ifdef XML_NS
1630 case BT_COLON:
1631 #endif
1632 case BT_HEX:
1633 case BT_DIGIT:
1634 case BT_NAME:
1635 case BT_MINUS:
1636 if (*ptr2++ != *ptr1++)
1637 return 0;
1638 if (MINBPC(enc) > 1) {
1639 if (*ptr2++ != *ptr1++)
1640 return 0;
1641 if (MINBPC(enc) > 2) {
1642 if (*ptr2++ != *ptr1++)
1643 return 0;
1644 if (MINBPC(enc) > 3) {
1645 if (*ptr2++ != *ptr1++)
1646 return 0;
1647 }
1648 }
1649 }
1650 break;
1651 default:
1652 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1653 return 1;
1654 switch (BYTE_TYPE(enc, ptr2)) {
1655 case BT_LEAD2:
1656 case BT_LEAD3:
1657 case BT_LEAD4:
1658 case BT_NONASCII:
1659 case BT_NMSTRT:
1660 #ifdef XML_NS
1661 case BT_COLON:
1662 #endif
1663 case BT_HEX:
1664 case BT_DIGIT:
1665 case BT_NAME:
1666 case BT_MINUS:
1667 return 0;
1668 default:
1669 return 1;
1670 }
1671 }
1672 }
1673 /* not reached */
1674 }
1675
1676 static
PREFIX(nameMatchesAscii)1677 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1678 const char *end1, const char *ptr2)
1679 {
1680 vtkExpatUnused(enc);
1681 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1682 if (ptr1 == end1)
1683 return 0;
1684 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1685 return 0;
1686 }
1687 return ptr1 == end1;
1688 }
1689
1690 static
PREFIX(nameLength)1691 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1692 {
1693 const char *start = ptr;
1694 for (;;) {
1695 switch (BYTE_TYPE(enc, ptr)) {
1696 #define LEAD_CASE(n) \
1697 case BT_LEAD ## n: ptr += n; break;
1698 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1699 #undef LEAD_CASE
1700 case BT_NONASCII:
1701 case BT_NMSTRT:
1702 #ifdef XML_NS
1703 case BT_COLON:
1704 #endif
1705 case BT_HEX:
1706 case BT_DIGIT:
1707 case BT_NAME:
1708 case BT_MINUS:
1709 ptr += MINBPC(enc);
1710 break;
1711 default:
1712 return ptr - start;
1713 }
1714 }
1715 }
1716
1717 static
PREFIX(skipS)1718 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1719 {
1720 for (;;) {
1721 switch (BYTE_TYPE(enc, ptr)) {
1722 case BT_LF:
1723 case BT_CR:
1724 case BT_S:
1725 ptr += MINBPC(enc);
1726 break;
1727 default:
1728 return ptr;
1729 }
1730 }
1731 }
1732
1733 static
PREFIX(updatePosition)1734 void PREFIX(updatePosition)(const ENCODING *enc,
1735 const char *ptr,
1736 const char *end,
1737 POSITION *pos)
1738 {
1739 while (ptr != end) {
1740 switch (BYTE_TYPE(enc, ptr)) {
1741 #define LEAD_CASE(n) \
1742 case BT_LEAD ## n: \
1743 ptr += n; \
1744 break;
1745 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1746 #undef LEAD_CASE
1747 case BT_LF:
1748 pos->columnNumber = (unsigned)-1;
1749 pos->lineNumber++;
1750 ptr += MINBPC(enc);
1751 break;
1752 case BT_CR:
1753 pos->lineNumber++;
1754 ptr += MINBPC(enc);
1755 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1756 ptr += MINBPC(enc);
1757 pos->columnNumber = (unsigned)-1;
1758 break;
1759 default:
1760 ptr += MINBPC(enc);
1761 break;
1762 }
1763 pos->columnNumber++;
1764 }
1765 }
1766
1767 #undef DO_LEAD_CASE
1768 #undef MULTIBYTE_CASES
1769
1770 #undef INVALID_LEAD_CASE
1771 #undef INVALID_CASES
1772 #undef CHECK_NAME_CASE
1773 #undef CHECK_NAME_CASES
1774 #undef CHECK_NMSTRT_CASE
1775 #undef CHECK_NMSTRT_CASES
1776