1 /*
2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file COPYING for copying permission.
4 */
5 
6 #ifndef IS_INVALID_CHAR
7 #define IS_INVALID_CHAR(enc, ptr, n) (0)
8 #endif
9 
10 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
11     case BT_LEAD ## n: \
12       if (end - ptr < n) \
13         return XML_TOK_PARTIAL_CHAR; \
14       if (IS_INVALID_CHAR(enc, ptr, n)) { \
15         *(nextTokPtr) = (ptr); \
16         return XML_TOK_INVALID; \
17       } \
18       ptr += n; \
19       break;
20 
21 #define INVALID_CASES(ptr, nextTokPtr) \
22   INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
23   INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
24   INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
25   case BT_NONXML: \
26   case BT_MALFORM: \
27   case BT_TRAIL: \
28     *(nextTokPtr) = (ptr); \
29     return XML_TOK_INVALID;
30 
31 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
32    case BT_LEAD ## n: \
33      if (end - ptr < n) \
34        return XML_TOK_PARTIAL_CHAR; \
35      if (!IS_NAME_CHAR(enc, ptr, n)) { \
36        *nextTokPtr = ptr; \
37        return XML_TOK_INVALID; \
38      } \
39      ptr += n; \
40      break;
41 
42 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
43   case BT_NONASCII: \
44     if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
45       *nextTokPtr = ptr; \
46       return XML_TOK_INVALID; \
47     } \
48   case BT_NMSTRT: \
49   case BT_HEX: \
50   case BT_DIGIT: \
51   case BT_NAME: \
52   case BT_MINUS: \
53     ptr += MINBPC(enc); \
54     break; \
55   CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
56   CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
57   CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
58 
59 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
60    case BT_LEAD ## n: \
61      if (end - ptr < n) \
62        return XML_TOK_PARTIAL_CHAR; \
63      if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
64        *nextTokPtr = ptr; \
65        return XML_TOK_INVALID; \
66      } \
67      ptr += n; \
68      break;
69 
70 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
71   case BT_NONASCII: \
72     if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
73       *nextTokPtr = ptr; \
74       return XML_TOK_INVALID; \
75     } \
76   case BT_NMSTRT: \
77   case BT_HEX: \
78     ptr += MINBPC(enc); \
79     break; \
80   CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
81   CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
82   CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
83 
84 #ifndef PREFIX
85 #define PREFIX(ident) ident
86 #endif
87 
88 /* ptr points to character following "<!-" */
89 
90 static
PREFIX(scanComment)91 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
92                         const char **nextTokPtr)
93 {
94   if (ptr != end) {
95     if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
96       *nextTokPtr = ptr;
97       return XML_TOK_INVALID;
98     }
99     ptr += MINBPC(enc);
100     while (ptr != end) {
101       switch (BYTE_TYPE(enc, ptr)) {
102       INVALID_CASES(ptr, nextTokPtr)
103       case BT_MINUS:
104         if ((ptr += MINBPC(enc)) == end)
105           return XML_TOK_PARTIAL;
106         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
107           if ((ptr += MINBPC(enc)) == end)
108             return XML_TOK_PARTIAL;
109           if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
110             *nextTokPtr = ptr;
111             return XML_TOK_INVALID;
112           }
113           *nextTokPtr = ptr + MINBPC(enc);
114           return XML_TOK_COMMENT;
115         }
116         break;
117       default:
118         ptr += MINBPC(enc);
119         break;
120       }
121     }
122   }
123   return XML_TOK_PARTIAL;
124 }
125 
126 /* ptr points to character following "<!" */
127 
128 static
PREFIX(scanDecl)129 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
130                      const char **nextTokPtr)
131 {
132   if (ptr == end)
133     return XML_TOK_PARTIAL;
134   switch (BYTE_TYPE(enc, ptr)) {
135   case BT_MINUS:
136     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
137   case BT_LSQB:
138     *nextTokPtr = ptr + MINBPC(enc);
139     return XML_TOK_COND_SECT_OPEN;
140   case BT_NMSTRT:
141   case BT_HEX:
142     ptr += MINBPC(enc);
143     break;
144   default:
145     *nextTokPtr = ptr;
146     return XML_TOK_INVALID;
147   }
148   while (ptr != end) {
149     switch (BYTE_TYPE(enc, ptr)) {
150     case BT_PERCNT:
151       if (ptr + MINBPC(enc) == end)
152         return XML_TOK_PARTIAL;
153       /* don't allow <!ENTITY% foo "whatever"> */
154       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
155       case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
156         *nextTokPtr = ptr;
157         return XML_TOK_INVALID;
158       }
159       /* fall through */
160     case BT_S: case BT_CR: case BT_LF:
161       *nextTokPtr = ptr;
162       return XML_TOK_DECL_OPEN;
163     case BT_NMSTRT:
164     case BT_HEX:
165       ptr += MINBPC(enc);
166       break;
167     default:
168       *nextTokPtr = ptr;
169       return XML_TOK_INVALID;
170     }
171   }
172   return XML_TOK_PARTIAL;
173 }
174 
175 static
PREFIX(checkPiTarget)176 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
177 {
178   int upper = 0;
179   vtkExpatUnused(enc);
180   *tokPtr = XML_TOK_PI;
181   if (end - ptr != MINBPC(enc)*3)
182     return 1;
183   switch (BYTE_TO_ASCII(enc, ptr)) {
184   case ASCII_x:
185     break;
186   case ASCII_X:
187     upper = 1;
188     break;
189   default:
190     return 1;
191   }
192   ptr += MINBPC(enc);
193   switch (BYTE_TO_ASCII(enc, ptr)) {
194   case ASCII_m:
195     break;
196   case ASCII_M:
197     upper = 1;
198     break;
199   default:
200     return 1;
201   }
202   ptr += MINBPC(enc);
203   switch (BYTE_TO_ASCII(enc, ptr)) {
204   case ASCII_l:
205     break;
206   case ASCII_L:
207     upper = 1;
208     break;
209   default:
210     return 1;
211   }
212   if (upper)
213     return 0;
214   *tokPtr = XML_TOK_XML_DECL;
215   return 1;
216 }
217 
218 /* ptr points to character following "<?" */
219 
220 static
PREFIX(scanPi)221 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
222                    const char **nextTokPtr)
223 {
224   int tok;
225   const char *target = ptr;
226   if (ptr == end)
227     return XML_TOK_PARTIAL;
228   switch (BYTE_TYPE(enc, ptr)) {
229   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
230   default:
231     *nextTokPtr = ptr;
232     return XML_TOK_INVALID;
233   }
234   while (ptr != end) {
235     switch (BYTE_TYPE(enc, ptr)) {
236     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
237     case BT_S: case BT_CR: case BT_LF:
238       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
239         *nextTokPtr = ptr;
240         return XML_TOK_INVALID;
241       }
242       ptr += MINBPC(enc);
243       while (ptr != end) {
244         switch (BYTE_TYPE(enc, ptr)) {
245         INVALID_CASES(ptr, nextTokPtr)
246         case BT_QUEST:
247           ptr += MINBPC(enc);
248           if (ptr == end)
249             return XML_TOK_PARTIAL;
250           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
251             *nextTokPtr = ptr + MINBPC(enc);
252             return tok;
253           }
254           break;
255         default:
256           ptr += MINBPC(enc);
257           break;
258         }
259       }
260       return XML_TOK_PARTIAL;
261     case BT_QUEST:
262       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
263         *nextTokPtr = ptr;
264         return XML_TOK_INVALID;
265       }
266       ptr += MINBPC(enc);
267       if (ptr == end)
268         return XML_TOK_PARTIAL;
269       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
270         *nextTokPtr = ptr + MINBPC(enc);
271         return tok;
272       }
273       /* fall through */
274     default:
275       *nextTokPtr = ptr;
276       return XML_TOK_INVALID;
277     }
278   }
279   return XML_TOK_PARTIAL;
280 }
281 
282 
283 static
PREFIX(scanCdataSection)284 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
285                              const char **nextTokPtr)
286 {
287   static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
288   int i;
289   vtkExpatUnused(enc);
290   /* CDATA[ */
291   if (end - ptr < 6 * MINBPC(enc))
292     return XML_TOK_PARTIAL;
293   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
294     if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
295       *nextTokPtr = ptr;
296       return XML_TOK_INVALID;
297     }
298   }
299   *nextTokPtr = ptr;
300   return XML_TOK_CDATA_SECT_OPEN;
301 }
302 
303 static
PREFIX(cdataSectionTok)304 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
305                             const char **nextTokPtr)
306 {
307   if (ptr == end)
308     return XML_TOK_NONE;
309   if (MINBPC(enc) > 1) {
310     size_t n = end - ptr;
311     if (n & (MINBPC(enc) - 1)) {
312       n &= ~(MINBPC(enc) - 1);
313       if (n == 0)
314         return XML_TOK_PARTIAL;
315       end = ptr + n;
316     }
317   }
318   switch (BYTE_TYPE(enc, ptr)) {
319   case BT_RSQB:
320     ptr += MINBPC(enc);
321     if (ptr == end)
322       return XML_TOK_PARTIAL;
323     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
324       break;
325     ptr += MINBPC(enc);
326     if (ptr == end)
327       return XML_TOK_PARTIAL;
328     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
329       ptr -= MINBPC(enc);
330       break;
331     }
332     *nextTokPtr = ptr + MINBPC(enc);
333     return XML_TOK_CDATA_SECT_CLOSE;
334   case BT_CR:
335     ptr += MINBPC(enc);
336     if (ptr == end)
337       return XML_TOK_PARTIAL;
338     if (BYTE_TYPE(enc, ptr) == BT_LF)
339       ptr += MINBPC(enc);
340     *nextTokPtr = ptr;
341     return XML_TOK_DATA_NEWLINE;
342   case BT_LF:
343     *nextTokPtr = ptr + MINBPC(enc);
344     return XML_TOK_DATA_NEWLINE;
345   INVALID_CASES(ptr, nextTokPtr)
346   default:
347     ptr += MINBPC(enc);
348     break;
349   }
350   while (ptr != end) {
351     switch (BYTE_TYPE(enc, ptr)) {
352 #define LEAD_CASE(n) \
353     case BT_LEAD ## n: \
354       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
355         *nextTokPtr = ptr; \
356         return XML_TOK_DATA_CHARS; \
357       } \
358       ptr += n; \
359       break;
360     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
361 #undef LEAD_CASE
362     case BT_NONXML:
363     case BT_MALFORM:
364     case BT_TRAIL:
365     case BT_CR:
366     case BT_LF:
367     case BT_RSQB:
368       *nextTokPtr = ptr;
369       return XML_TOK_DATA_CHARS;
370     default:
371       ptr += MINBPC(enc);
372       break;
373     }
374   }
375   *nextTokPtr = ptr;
376   return XML_TOK_DATA_CHARS;
377 }
378 
379 /* ptr points to character following "</" */
380 
381 static
PREFIX(scanEndTag)382 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
383                        const char **nextTokPtr)
384 {
385   if (ptr == end)
386     return XML_TOK_PARTIAL;
387   switch (BYTE_TYPE(enc, ptr)) {
388   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
389   default:
390     *nextTokPtr = ptr;
391     return XML_TOK_INVALID;
392   }
393   while (ptr != end) {
394     switch (BYTE_TYPE(enc, ptr)) {
395     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
396     case BT_S: case BT_CR: case BT_LF:
397       for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
398         switch (BYTE_TYPE(enc, ptr)) {
399         case BT_S: case BT_CR: case BT_LF:
400           break;
401         case BT_GT:
402           *nextTokPtr = ptr + MINBPC(enc);
403           return XML_TOK_END_TAG;
404         default:
405           *nextTokPtr = ptr;
406           return XML_TOK_INVALID;
407         }
408       }
409       return XML_TOK_PARTIAL;
410 #ifdef XML_NS
411     case BT_COLON:
412       /* no need to check qname syntax here, since end-tag must match exactly */
413       ptr += MINBPC(enc);
414       break;
415 #endif
416     case BT_GT:
417       *nextTokPtr = ptr + MINBPC(enc);
418       return XML_TOK_END_TAG;
419     default:
420       *nextTokPtr = ptr;
421       return XML_TOK_INVALID;
422     }
423   }
424   return XML_TOK_PARTIAL;
425 }
426 
427 /* ptr points to character following "&#X" */
428 
429 static
PREFIX(scanHexCharRef)430 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
431                            const char **nextTokPtr)
432 {
433   if (ptr != end) {
434     switch (BYTE_TYPE(enc, ptr)) {
435     case BT_DIGIT:
436     case BT_HEX:
437       break;
438     default:
439       *nextTokPtr = ptr;
440       return XML_TOK_INVALID;
441     }
442     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
443       switch (BYTE_TYPE(enc, ptr)) {
444       case BT_DIGIT:
445       case BT_HEX:
446         break;
447       case BT_SEMI:
448         *nextTokPtr = ptr + MINBPC(enc);
449         return XML_TOK_CHAR_REF;
450       default:
451         *nextTokPtr = ptr;
452         return XML_TOK_INVALID;
453       }
454     }
455   }
456   return XML_TOK_PARTIAL;
457 }
458 
459 /* ptr points to character following "&#" */
460 
461 static
PREFIX(scanCharRef)462 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
463                         const char **nextTokPtr)
464 {
465   if (ptr != end) {
466     if (CHAR_MATCHES(enc, ptr, ASCII_x))
467       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
468     switch (BYTE_TYPE(enc, ptr)) {
469     case BT_DIGIT:
470       break;
471     default:
472       *nextTokPtr = ptr;
473       return XML_TOK_INVALID;
474     }
475     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
476       switch (BYTE_TYPE(enc, ptr)) {
477       case BT_DIGIT:
478         break;
479       case BT_SEMI:
480         *nextTokPtr = ptr + MINBPC(enc);
481         return XML_TOK_CHAR_REF;
482       default:
483         *nextTokPtr = ptr;
484         return XML_TOK_INVALID;
485       }
486     }
487   }
488   return XML_TOK_PARTIAL;
489 }
490 
491 /* ptr points to character following "&" */
492 
493 static
PREFIX(scanRef)494 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
495                     const char **nextTokPtr)
496 {
497   if (ptr == end)
498     return XML_TOK_PARTIAL;
499   switch (BYTE_TYPE(enc, ptr)) {
500   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
501   case BT_NUM:
502     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
503   default:
504     *nextTokPtr = ptr;
505     return XML_TOK_INVALID;
506   }
507   while (ptr != end) {
508     switch (BYTE_TYPE(enc, ptr)) {
509     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
510     case BT_SEMI:
511       *nextTokPtr = ptr + MINBPC(enc);
512       return XML_TOK_ENTITY_REF;
513     default:
514       *nextTokPtr = ptr;
515       return XML_TOK_INVALID;
516     }
517   }
518   return XML_TOK_PARTIAL;
519 }
520 
521 /* ptr points to character following first character of attribute name */
522 
523 static
PREFIX(scanAtts)524 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
525                      const char **nextTokPtr)
526 {
527 #ifdef XML_NS
528   int hadColon = 0;
529 #endif
530   while (ptr != end) {
531     switch (BYTE_TYPE(enc, ptr)) {
532     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
533 #ifdef XML_NS
534     case BT_COLON:
535       if (hadColon) {
536         *nextTokPtr = ptr;
537         return XML_TOK_INVALID;
538       }
539       hadColon = 1;
540       ptr += MINBPC(enc);
541       if (ptr == end)
542         return XML_TOK_PARTIAL;
543       switch (BYTE_TYPE(enc, ptr)) {
544       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
545       default:
546         *nextTokPtr = ptr;
547         return XML_TOK_INVALID;
548       }
549       break;
550 #endif
551     case BT_S: case BT_CR: case BT_LF:
552       for (;;) {
553         int t;
554 
555         ptr += MINBPC(enc);
556         if (ptr == end)
557           return XML_TOK_PARTIAL;
558         t = BYTE_TYPE(enc, ptr);
559         if (t == BT_EQUALS)
560           break;
561         switch (t) {
562         case BT_S:
563         case BT_LF:
564         case BT_CR:
565           break;
566         default:
567           *nextTokPtr = ptr;
568           return XML_TOK_INVALID;
569         }
570       }
571     /* fall through */
572     case BT_EQUALS:
573       {
574         int open;
575 #ifdef XML_NS
576         hadColon = 0;
577 #endif
578         for (;;) {
579 
580           ptr += MINBPC(enc);
581           if (ptr == end)
582             return XML_TOK_PARTIAL;
583           open = BYTE_TYPE(enc, ptr);
584           if (open == BT_QUOT || open == BT_APOS)
585             break;
586           switch (open) {
587           case BT_S:
588           case BT_LF:
589           case BT_CR:
590             break;
591           default:
592             *nextTokPtr = ptr;
593             return XML_TOK_INVALID;
594           }
595         }
596         ptr += MINBPC(enc);
597         /* in attribute value */
598         for (;;) {
599           int t;
600           if (ptr == end)
601             return XML_TOK_PARTIAL;
602           t = BYTE_TYPE(enc, ptr);
603           if (t == open)
604             break;
605           switch (t) {
606           INVALID_CASES(ptr, nextTokPtr)
607           case BT_AMP:
608             {
609               int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
610               if (tok <= 0) {
611                 if (tok == XML_TOK_INVALID)
612                   *nextTokPtr = ptr;
613                 return tok;
614               }
615               break;
616             }
617           case BT_LT:
618             *nextTokPtr = ptr;
619             return XML_TOK_INVALID;
620           default:
621             ptr += MINBPC(enc);
622             break;
623           }
624         }
625         ptr += MINBPC(enc);
626         if (ptr == end)
627           return XML_TOK_PARTIAL;
628         switch (BYTE_TYPE(enc, ptr)) {
629         case BT_S:
630         case BT_CR:
631         case BT_LF:
632           break;
633         case BT_SOL:
634           goto sol;
635         case BT_GT:
636           goto gt;
637         default:
638           *nextTokPtr = ptr;
639           return XML_TOK_INVALID;
640         }
641         /* ptr points to closing quote */
642         for (;;) {
643           ptr += MINBPC(enc);
644           if (ptr == end)
645             return XML_TOK_PARTIAL;
646           switch (BYTE_TYPE(enc, ptr)) {
647           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
648           case BT_S: case BT_CR: case BT_LF:
649             continue;
650           case BT_GT:
651           gt:
652             *nextTokPtr = ptr + MINBPC(enc);
653             return XML_TOK_START_TAG_WITH_ATTS;
654           case BT_SOL:
655           sol:
656             ptr += MINBPC(enc);
657             if (ptr == end)
658               return XML_TOK_PARTIAL;
659             if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
660               *nextTokPtr = ptr;
661               return XML_TOK_INVALID;
662             }
663             *nextTokPtr = ptr + MINBPC(enc);
664             return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
665           default:
666             *nextTokPtr = ptr;
667             return XML_TOK_INVALID;
668           }
669           break;
670         }
671         break;
672       }
673     default:
674       *nextTokPtr = ptr;
675       return XML_TOK_INVALID;
676     }
677   }
678   return XML_TOK_PARTIAL;
679 }
680 
681 /* ptr points to character following "<" */
682 
683 static
PREFIX(scanLt)684 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
685                    const char **nextTokPtr)
686 {
687 #ifdef XML_NS
688   int hadColon;
689 #endif
690   if (ptr == end)
691     return XML_TOK_PARTIAL;
692   switch (BYTE_TYPE(enc, ptr)) {
693   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
694   case BT_EXCL:
695     if ((ptr += MINBPC(enc)) == end)
696       return XML_TOK_PARTIAL;
697     switch (BYTE_TYPE(enc, ptr)) {
698     case BT_MINUS:
699       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
700     case BT_LSQB:
701       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
702     }
703     *nextTokPtr = ptr;
704     return XML_TOK_INVALID;
705   case BT_QUEST:
706     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
707   case BT_SOL:
708     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709   default:
710     *nextTokPtr = ptr;
711     return XML_TOK_INVALID;
712   }
713 #ifdef XML_NS
714   hadColon = 0;
715 #endif
716   /* we have a start-tag */
717   while (ptr != end) {
718     switch (BYTE_TYPE(enc, ptr)) {
719     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
720 #ifdef XML_NS
721     case BT_COLON:
722       if (hadColon) {
723         *nextTokPtr = ptr;
724         return XML_TOK_INVALID;
725       }
726       hadColon = 1;
727       ptr += MINBPC(enc);
728       if (ptr == end)
729         return XML_TOK_PARTIAL;
730       switch (BYTE_TYPE(enc, ptr)) {
731       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
732       default:
733         *nextTokPtr = ptr;
734         return XML_TOK_INVALID;
735       }
736       break;
737 #endif
738     case BT_S: case BT_CR: case BT_LF:
739       {
740         ptr += MINBPC(enc);
741         while (ptr != end) {
742           switch (BYTE_TYPE(enc, ptr)) {
743           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
744           case BT_GT:
745             goto gt;
746           case BT_SOL:
747             goto sol;
748           case BT_S: case BT_CR: case BT_LF:
749             ptr += MINBPC(enc);
750             continue;
751           default:
752             *nextTokPtr = ptr;
753             return XML_TOK_INVALID;
754           }
755           return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
756         }
757         return XML_TOK_PARTIAL;
758       }
759     case BT_GT:
760     gt:
761       *nextTokPtr = ptr + MINBPC(enc);
762       return XML_TOK_START_TAG_NO_ATTS;
763     case BT_SOL:
764     sol:
765       ptr += MINBPC(enc);
766       if (ptr == end)
767         return XML_TOK_PARTIAL;
768       if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
769         *nextTokPtr = ptr;
770         return XML_TOK_INVALID;
771       }
772       *nextTokPtr = ptr + MINBPC(enc);
773       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
774     default:
775       *nextTokPtr = ptr;
776       return XML_TOK_INVALID;
777     }
778   }
779   return XML_TOK_PARTIAL;
780 }
781 
782 static
PREFIX(contentTok)783 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
784                        const char **nextTokPtr)
785 {
786   if (ptr == end)
787     return XML_TOK_NONE;
788   if (MINBPC(enc) > 1) {
789     size_t n = end - ptr;
790     if (n & (MINBPC(enc) - 1)) {
791       n &= ~(MINBPC(enc) - 1);
792       if (n == 0)
793         return XML_TOK_PARTIAL;
794       end = ptr + n;
795     }
796   }
797   switch (BYTE_TYPE(enc, ptr)) {
798   case BT_LT:
799     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
800   case BT_AMP:
801     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802   case BT_CR:
803     ptr += MINBPC(enc);
804     if (ptr == end)
805       return XML_TOK_TRAILING_CR;
806     if (BYTE_TYPE(enc, ptr) == BT_LF)
807       ptr += MINBPC(enc);
808     *nextTokPtr = ptr;
809     return XML_TOK_DATA_NEWLINE;
810   case BT_LF:
811     *nextTokPtr = ptr + MINBPC(enc);
812     return XML_TOK_DATA_NEWLINE;
813   case BT_RSQB:
814     ptr += MINBPC(enc);
815     if (ptr == end)
816       return XML_TOK_TRAILING_RSQB;
817     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
818       break;
819     ptr += MINBPC(enc);
820     if (ptr == end)
821       return XML_TOK_TRAILING_RSQB;
822     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
823       ptr -= MINBPC(enc);
824       break;
825     }
826     *nextTokPtr = ptr;
827     return XML_TOK_INVALID;
828   INVALID_CASES(ptr, nextTokPtr)
829   default:
830     ptr += MINBPC(enc);
831     break;
832   }
833   while (ptr != end) {
834     switch (BYTE_TYPE(enc, ptr)) {
835 #define LEAD_CASE(n) \
836     case BT_LEAD ## n: \
837       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
838         *nextTokPtr = ptr; \
839         return XML_TOK_DATA_CHARS; \
840       } \
841       ptr += n; \
842       break;
843     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
844 #undef LEAD_CASE
845     case BT_RSQB:
846       if (ptr + MINBPC(enc) != end) {
847          if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
848            ptr += MINBPC(enc);
849            break;
850          }
851          if (ptr + 2*MINBPC(enc) != end) {
852            if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
853              ptr += MINBPC(enc);
854              break;
855            }
856            *nextTokPtr = ptr + 2*MINBPC(enc);
857            return XML_TOK_INVALID;
858          }
859       }
860       /* fall through */
861     case BT_AMP:
862     case BT_LT:
863     case BT_NONXML:
864     case BT_MALFORM:
865     case BT_TRAIL:
866     case BT_CR:
867     case BT_LF:
868       *nextTokPtr = ptr;
869       return XML_TOK_DATA_CHARS;
870     default:
871       ptr += MINBPC(enc);
872       break;
873     }
874   }
875   *nextTokPtr = ptr;
876   return XML_TOK_DATA_CHARS;
877 }
878 
879 /* ptr points to character following "%" */
880 
881 static
PREFIX(scanPercent)882 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
883                         const char **nextTokPtr)
884 {
885   if (ptr == end)
886     return XML_TOK_PARTIAL;
887   switch (BYTE_TYPE(enc, ptr)) {
888   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
889   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
890     *nextTokPtr = ptr;
891     return XML_TOK_PERCENT;
892   default:
893     *nextTokPtr = ptr;
894     return XML_TOK_INVALID;
895   }
896   while (ptr != end) {
897     switch (BYTE_TYPE(enc, ptr)) {
898     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
899     case BT_SEMI:
900       *nextTokPtr = ptr + MINBPC(enc);
901       return XML_TOK_PARAM_ENTITY_REF;
902     default:
903       *nextTokPtr = ptr;
904       return XML_TOK_INVALID;
905     }
906   }
907   return XML_TOK_PARTIAL;
908 }
909 
910 static
PREFIX(scanPoundName)911 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
912                           const char **nextTokPtr)
913 {
914   if (ptr == end)
915     return XML_TOK_PARTIAL;
916   switch (BYTE_TYPE(enc, ptr)) {
917   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
918   default:
919     *nextTokPtr = ptr;
920     return XML_TOK_INVALID;
921   }
922   while (ptr != end) {
923     switch (BYTE_TYPE(enc, ptr)) {
924     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
925     case BT_CR: case BT_LF: case BT_S:
926     case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
927       *nextTokPtr = ptr;
928       return XML_TOK_POUND_NAME;
929     default:
930       *nextTokPtr = ptr;
931       return XML_TOK_INVALID;
932     }
933   }
934   return -XML_TOK_POUND_NAME;
935 }
936 
937 static
PREFIX(scanLit)938 int PREFIX(scanLit)(int open, const ENCODING *enc,
939                     const char *ptr, const char *end,
940                     const char **nextTokPtr)
941 {
942   while (ptr != end) {
943     int t = BYTE_TYPE(enc, ptr);
944     switch (t) {
945     INVALID_CASES(ptr, nextTokPtr)
946     case BT_QUOT:
947     case BT_APOS:
948       ptr += MINBPC(enc);
949       if (t != open)
950         break;
951       if (ptr == end)
952         return -XML_TOK_LITERAL;
953       *nextTokPtr = ptr;
954       switch (BYTE_TYPE(enc, ptr)) {
955       case BT_S: case BT_CR: case BT_LF:
956       case BT_GT: case BT_PERCNT: case BT_LSQB:
957         return XML_TOK_LITERAL;
958       default:
959         return XML_TOK_INVALID;
960       }
961     default:
962       ptr += MINBPC(enc);
963       break;
964     }
965   }
966   return XML_TOK_PARTIAL;
967 }
968 
969 static
PREFIX(prologTok)970 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
971                       const char **nextTokPtr)
972 {
973   int tok;
974   if (ptr == end)
975     return XML_TOK_NONE;
976   if (MINBPC(enc) > 1) {
977     size_t n = end - ptr;
978     if (n & (MINBPC(enc) - 1)) {
979       n &= ~(MINBPC(enc) - 1);
980       if (n == 0)
981         return XML_TOK_PARTIAL;
982       end = ptr + n;
983     }
984   }
985   switch (BYTE_TYPE(enc, ptr)) {
986   case BT_QUOT:
987     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
988   case BT_APOS:
989     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
990   case BT_LT:
991     {
992       ptr += MINBPC(enc);
993       if (ptr == end)
994         return XML_TOK_PARTIAL;
995       switch (BYTE_TYPE(enc, ptr)) {
996       case BT_EXCL:
997         return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
998       case BT_QUEST:
999         return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000       case BT_NMSTRT:
1001       case BT_HEX:
1002       case BT_NONASCII:
1003       case BT_LEAD2:
1004       case BT_LEAD3:
1005       case BT_LEAD4:
1006         *nextTokPtr = ptr - MINBPC(enc);
1007         return XML_TOK_INSTANCE_START;
1008       }
1009       *nextTokPtr = ptr;
1010       return XML_TOK_INVALID;
1011     }
1012   case BT_CR:
1013     if (ptr + MINBPC(enc) == end)
1014       return -XML_TOK_PROLOG_S;
1015     /* fall through */
1016   case BT_S: case BT_LF:
1017     for (;;) {
1018       ptr += MINBPC(enc);
1019       if (ptr == end)
1020         break;
1021       switch (BYTE_TYPE(enc, ptr)) {
1022       case BT_S: case BT_LF:
1023         break;
1024       case BT_CR:
1025         /* don't split CR/LF pair */
1026         if (ptr + MINBPC(enc) != end)
1027           break;
1028         /* fall through */
1029       default:
1030         *nextTokPtr = ptr;
1031         return XML_TOK_PROLOG_S;
1032       }
1033     }
1034     *nextTokPtr = ptr;
1035     return XML_TOK_PROLOG_S;
1036   case BT_PERCNT:
1037     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1038   case BT_COMMA:
1039     *nextTokPtr = ptr + MINBPC(enc);
1040     return XML_TOK_COMMA;
1041   case BT_LSQB:
1042     *nextTokPtr = ptr + MINBPC(enc);
1043     return XML_TOK_OPEN_BRACKET;
1044   case BT_RSQB:
1045     ptr += MINBPC(enc);
1046     if (ptr == end)
1047       return -XML_TOK_CLOSE_BRACKET;
1048     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1049       if (ptr + MINBPC(enc) == end)
1050         return XML_TOK_PARTIAL;
1051       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1052         *nextTokPtr = ptr + 2*MINBPC(enc);
1053         return XML_TOK_COND_SECT_CLOSE;
1054       }
1055     }
1056     *nextTokPtr = ptr;
1057     return XML_TOK_CLOSE_BRACKET;
1058   case BT_LPAR:
1059     *nextTokPtr = ptr + MINBPC(enc);
1060     return XML_TOK_OPEN_PAREN;
1061   case BT_RPAR:
1062     ptr += MINBPC(enc);
1063     if (ptr == end)
1064       return -XML_TOK_CLOSE_PAREN;
1065     switch (BYTE_TYPE(enc, ptr)) {
1066     case BT_AST:
1067       *nextTokPtr = ptr + MINBPC(enc);
1068       return XML_TOK_CLOSE_PAREN_ASTERISK;
1069     case BT_QUEST:
1070       *nextTokPtr = ptr + MINBPC(enc);
1071       return XML_TOK_CLOSE_PAREN_QUESTION;
1072     case BT_PLUS:
1073       *nextTokPtr = ptr + MINBPC(enc);
1074       return XML_TOK_CLOSE_PAREN_PLUS;
1075     case BT_CR: case BT_LF: case BT_S:
1076     case BT_GT: case BT_COMMA: case BT_VERBAR:
1077     case BT_RPAR:
1078       *nextTokPtr = ptr;
1079       return XML_TOK_CLOSE_PAREN;
1080     }
1081     *nextTokPtr = ptr;
1082     return XML_TOK_INVALID;
1083   case BT_VERBAR:
1084     *nextTokPtr = ptr + MINBPC(enc);
1085     return XML_TOK_OR;
1086   case BT_GT:
1087     *nextTokPtr = ptr + MINBPC(enc);
1088     return XML_TOK_DECL_CLOSE;
1089   case BT_NUM:
1090     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1091 #define LEAD_CASE(n) \
1092   case BT_LEAD ## n: \
1093     if (end - ptr < n) \
1094       return XML_TOK_PARTIAL_CHAR; \
1095     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1096       ptr += n; \
1097       tok = XML_TOK_NAME; \
1098       break; \
1099     } \
1100     if (IS_NAME_CHAR(enc, ptr, n)) { \
1101       ptr += n; \
1102       tok = XML_TOK_NMTOKEN; \
1103       break; \
1104     } \
1105     *nextTokPtr = ptr; \
1106     return XML_TOK_INVALID;
1107     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1108 #undef LEAD_CASE
1109   case BT_NMSTRT:
1110   case BT_HEX:
1111     tok = XML_TOK_NAME;
1112     ptr += MINBPC(enc);
1113     break;
1114   case BT_DIGIT:
1115   case BT_NAME:
1116   case BT_MINUS:
1117 #ifdef XML_NS
1118   case BT_COLON:
1119 #endif
1120     tok = XML_TOK_NMTOKEN;
1121     ptr += MINBPC(enc);
1122     break;
1123   case BT_NONASCII:
1124     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1125       ptr += MINBPC(enc);
1126       tok = XML_TOK_NAME;
1127       break;
1128     }
1129     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1130       ptr += MINBPC(enc);
1131       tok = XML_TOK_NMTOKEN;
1132       break;
1133     }
1134     /* fall through */
1135   default:
1136     *nextTokPtr = ptr;
1137     return XML_TOK_INVALID;
1138   }
1139   while (ptr != end) {
1140     switch (BYTE_TYPE(enc, ptr)) {
1141     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1142     case BT_GT: case BT_RPAR: case BT_COMMA:
1143     case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1144     case BT_S: case BT_CR: case BT_LF:
1145       *nextTokPtr = ptr;
1146       return tok;
1147 #ifdef XML_NS
1148     case BT_COLON:
1149       ptr += MINBPC(enc);
1150       switch (tok) {
1151       case XML_TOK_NAME:
1152         if (ptr == end)
1153           return XML_TOK_PARTIAL;
1154         tok = XML_TOK_PREFIXED_NAME;
1155         switch (BYTE_TYPE(enc, ptr)) {
1156         CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1157         default:
1158           tok = XML_TOK_NMTOKEN;
1159           break;
1160         }
1161         break;
1162       case XML_TOK_PREFIXED_NAME:
1163         tok = XML_TOK_NMTOKEN;
1164         break;
1165       }
1166       break;
1167 #endif
1168     case BT_PLUS:
1169       if (tok == XML_TOK_NMTOKEN)  {
1170         *nextTokPtr = ptr;
1171         return XML_TOK_INVALID;
1172       }
1173       *nextTokPtr = ptr + MINBPC(enc);
1174       return XML_TOK_NAME_PLUS;
1175     case BT_AST:
1176       if (tok == XML_TOK_NMTOKEN)  {
1177         *nextTokPtr = ptr;
1178         return XML_TOK_INVALID;
1179       }
1180       *nextTokPtr = ptr + MINBPC(enc);
1181       return XML_TOK_NAME_ASTERISK;
1182     case BT_QUEST:
1183       if (tok == XML_TOK_NMTOKEN)  {
1184         *nextTokPtr = ptr;
1185         return XML_TOK_INVALID;
1186       }
1187       *nextTokPtr = ptr + MINBPC(enc);
1188       return XML_TOK_NAME_QUESTION;
1189     default:
1190       *nextTokPtr = ptr;
1191       return XML_TOK_INVALID;
1192     }
1193   }
1194   return -tok;
1195 }
1196 
1197 static
PREFIX(attributeValueTok)1198 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1199                               const char **nextTokPtr)
1200 {
1201   const char *start;
1202   if (ptr == end)
1203     return XML_TOK_NONE;
1204   start = ptr;
1205   while (ptr != end) {
1206     switch (BYTE_TYPE(enc, ptr)) {
1207 #define LEAD_CASE(n) \
1208     case BT_LEAD ## n: ptr += n; break;
1209     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1210 #undef LEAD_CASE
1211     case BT_AMP:
1212       if (ptr == start)
1213         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1214       *nextTokPtr = ptr;
1215       return XML_TOK_DATA_CHARS;
1216     case BT_LT:
1217       /* this is for inside entity references */
1218       *nextTokPtr = ptr;
1219       return XML_TOK_INVALID;
1220     case BT_LF:
1221       if (ptr == start) {
1222         *nextTokPtr = ptr + MINBPC(enc);
1223         return XML_TOK_DATA_NEWLINE;
1224       }
1225       *nextTokPtr = ptr;
1226       return XML_TOK_DATA_CHARS;
1227     case BT_CR:
1228       if (ptr == start) {
1229         ptr += MINBPC(enc);
1230         if (ptr == end)
1231           return XML_TOK_TRAILING_CR;
1232         if (BYTE_TYPE(enc, ptr) == BT_LF)
1233           ptr += MINBPC(enc);
1234         *nextTokPtr = ptr;
1235         return XML_TOK_DATA_NEWLINE;
1236       }
1237       *nextTokPtr = ptr;
1238       return XML_TOK_DATA_CHARS;
1239     case BT_S:
1240       if (ptr == start) {
1241         *nextTokPtr = ptr + MINBPC(enc);
1242         return XML_TOK_ATTRIBUTE_VALUE_S;
1243       }
1244       *nextTokPtr = ptr;
1245       return XML_TOK_DATA_CHARS;
1246     default:
1247       ptr += MINBPC(enc);
1248       break;
1249     }
1250   }
1251   *nextTokPtr = ptr;
1252   return XML_TOK_DATA_CHARS;
1253 }
1254 
1255 static
PREFIX(entityValueTok)1256 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1257                            const char **nextTokPtr)
1258 {
1259   const char *start;
1260   if (ptr == end)
1261     return XML_TOK_NONE;
1262   start = ptr;
1263   while (ptr != end) {
1264     switch (BYTE_TYPE(enc, ptr)) {
1265 #define LEAD_CASE(n) \
1266     case BT_LEAD ## n: ptr += n; break;
1267     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1268 #undef LEAD_CASE
1269     case BT_AMP:
1270       if (ptr == start)
1271         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1272       *nextTokPtr = ptr;
1273       return XML_TOK_DATA_CHARS;
1274     case BT_PERCNT:
1275       if (ptr == start) {
1276         int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1277                                        end, nextTokPtr);
1278         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1279       }
1280       *nextTokPtr = ptr;
1281       return XML_TOK_DATA_CHARS;
1282     case BT_LF:
1283       if (ptr == start) {
1284         *nextTokPtr = ptr + MINBPC(enc);
1285         return XML_TOK_DATA_NEWLINE;
1286       }
1287       *nextTokPtr = ptr;
1288       return XML_TOK_DATA_CHARS;
1289     case BT_CR:
1290       if (ptr == start) {
1291         ptr += MINBPC(enc);
1292         if (ptr == end)
1293           return XML_TOK_TRAILING_CR;
1294         if (BYTE_TYPE(enc, ptr) == BT_LF)
1295           ptr += MINBPC(enc);
1296         *nextTokPtr = ptr;
1297         return XML_TOK_DATA_NEWLINE;
1298       }
1299       *nextTokPtr = ptr;
1300       return XML_TOK_DATA_CHARS;
1301     default:
1302       ptr += MINBPC(enc);
1303       break;
1304     }
1305   }
1306   *nextTokPtr = ptr;
1307   return XML_TOK_DATA_CHARS;
1308 }
1309 
1310 #ifdef XML_DTD
1311 
1312 static
PREFIX(ignoreSectionTok)1313 int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1314                              const char **nextTokPtr)
1315 {
1316   int level = 0;
1317   if (MINBPC(enc) > 1) {
1318     size_t n = end - ptr;
1319     if (n & (MINBPC(enc) - 1)) {
1320       n &= ~(MINBPC(enc) - 1);
1321       end = ptr + n;
1322     }
1323   }
1324   while (ptr != end) {
1325     switch (BYTE_TYPE(enc, ptr)) {
1326     INVALID_CASES(ptr, nextTokPtr)
1327     case BT_LT:
1328       if ((ptr += MINBPC(enc)) == end)
1329         return XML_TOK_PARTIAL;
1330       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1331         if ((ptr += MINBPC(enc)) == end)
1332           return XML_TOK_PARTIAL;
1333         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1334           ++level;
1335           ptr += MINBPC(enc);
1336         }
1337       }
1338       break;
1339     case BT_RSQB:
1340       if ((ptr += MINBPC(enc)) == end)
1341         return XML_TOK_PARTIAL;
1342       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1343         if ((ptr += MINBPC(enc)) == end)
1344           return XML_TOK_PARTIAL;
1345         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1346           ptr += MINBPC(enc);
1347           if (level == 0) {
1348             *nextTokPtr = ptr;
1349             return XML_TOK_IGNORE_SECT;
1350           }
1351           --level;
1352         }
1353       }
1354       break;
1355     default:
1356       ptr += MINBPC(enc);
1357       break;
1358     }
1359   }
1360   return XML_TOK_PARTIAL;
1361 }
1362 
1363 #endif /* XML_DTD */
1364 
1365 static
PREFIX(isPublicId)1366 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1367                        const char **badPtr)
1368 {
1369   ptr += MINBPC(enc);
1370   end -= MINBPC(enc);
1371   for (; ptr != end; ptr += MINBPC(enc)) {
1372     switch (BYTE_TYPE(enc, ptr)) {
1373     case BT_DIGIT:
1374     case BT_HEX:
1375     case BT_MINUS:
1376     case BT_APOS:
1377     case BT_LPAR:
1378     case BT_RPAR:
1379     case BT_PLUS:
1380     case BT_COMMA:
1381     case BT_SOL:
1382     case BT_EQUALS:
1383     case BT_QUEST:
1384     case BT_CR:
1385     case BT_LF:
1386     case BT_SEMI:
1387     case BT_EXCL:
1388     case BT_AST:
1389     case BT_PERCNT:
1390     case BT_NUM:
1391 #ifdef XML_NS
1392     case BT_COLON:
1393 #endif
1394       break;
1395     case BT_S:
1396       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1397         *badPtr = ptr;
1398         return 0;
1399       }
1400       break;
1401     case BT_NAME:
1402     case BT_NMSTRT:
1403       if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1404         break;
1405     default:
1406       switch (BYTE_TO_ASCII(enc, ptr)) {
1407       case 0x24: /* $ */
1408       case 0x40: /* @ */
1409         break;
1410       default:
1411         *badPtr = ptr;
1412         return 0;
1413       }
1414       break;
1415     }
1416   }
1417   return 1;
1418 }
1419 
1420 /* This must only be called for a well-formed start-tag or empty element tag.
1421 Returns the number of attributes.  Pointers to the first attsMax attributes
1422 are stored in atts. */
1423 
1424 static
PREFIX(getAtts)1425 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1426                     int attsMax, ATTRIBUTE *atts)
1427 {
1428   enum { other, inName, inValue } state = inName;
1429   int nAtts = 0;
1430   int open = 0; /* defined when state == inValue;
1431                    initialization just to shut up compilers */
1432 
1433   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1434     switch (BYTE_TYPE(enc, ptr)) {
1435 #define START_NAME \
1436       if (state == other) { \
1437         if (nAtts < attsMax) { \
1438           atts[nAtts].name = ptr; \
1439           atts[nAtts].normalized = 1; \
1440         } \
1441         state = inName; \
1442       }
1443 #define LEAD_CASE(n) \
1444     case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1445     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1446 #undef LEAD_CASE
1447     case BT_NONASCII:
1448     case BT_NMSTRT:
1449     case BT_HEX:
1450       START_NAME
1451       break;
1452 #undef START_NAME
1453     case BT_QUOT:
1454       if (state != inValue) {
1455         if (nAtts < attsMax)
1456           atts[nAtts].valuePtr = ptr + MINBPC(enc);
1457         state = inValue;
1458         open = BT_QUOT;
1459       }
1460       else if (open == BT_QUOT) {
1461         state = other;
1462         if (nAtts < attsMax)
1463           atts[nAtts].valueEnd = ptr;
1464         nAtts++;
1465       }
1466       break;
1467     case BT_APOS:
1468       if (state != inValue) {
1469         if (nAtts < attsMax)
1470           atts[nAtts].valuePtr = ptr + MINBPC(enc);
1471         state = inValue;
1472         open = BT_APOS;
1473       }
1474       else if (open == BT_APOS) {
1475         state = other;
1476         if (nAtts < attsMax)
1477           atts[nAtts].valueEnd = ptr;
1478         nAtts++;
1479       }
1480       break;
1481     case BT_AMP:
1482       if (nAtts < attsMax)
1483         atts[nAtts].normalized = 0;
1484       break;
1485     case BT_S:
1486       if (state == inName)
1487         state = other;
1488       else if (state == inValue
1489                && nAtts < attsMax
1490                && atts[nAtts].normalized
1491                && (ptr == atts[nAtts].valuePtr
1492                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1493                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1494                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1495         atts[nAtts].normalized = 0;
1496       break;
1497     case BT_CR: case BT_LF:
1498       /* This case ensures that the first attribute name is counted
1499          Apart from that we could just change state on the quote. */
1500       if (state == inName)
1501         state = other;
1502       else if (state == inValue && nAtts < attsMax)
1503         atts[nAtts].normalized = 0;
1504       break;
1505     case BT_GT:
1506     case BT_SOL:
1507       if (state != inValue)
1508         return nAtts;
1509       break;
1510     default:
1511       break;
1512     }
1513   }
1514   /* not reached */
1515 }
1516 
1517 static
PREFIX(charRefNumber)1518 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1519 {
1520   int result = 0;
1521   vtkExpatUnused(enc);
1522   /* skip &# */
1523   ptr += 2*MINBPC(enc);
1524   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1525     for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1526       int c = BYTE_TO_ASCII(enc, ptr);
1527       switch (c) {
1528       case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1529       case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1530         result <<= 4;
1531         result |= (c - ASCII_0);
1532         break;
1533       case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
1534         result <<= 4;
1535         result += 10 + (c - ASCII_A);
1536         break;
1537       case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
1538         result <<= 4;
1539         result += 10 + (c - ASCII_a);
1540         break;
1541       }
1542       if (result >= 0x110000)
1543         return -1;
1544     }
1545   }
1546   else {
1547     for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1548       int c = BYTE_TO_ASCII(enc, ptr);
1549       result *= 10;
1550       result += (c - ASCII_0);
1551       if (result >= 0x110000)
1552         return -1;
1553     }
1554   }
1555   return checkCharRefNumber(result);
1556 }
1557 
1558 static
PREFIX(predefinedEntityName)1559 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
1560 {
1561   vtkExpatUnused(enc);
1562   switch ((end - ptr)/MINBPC(enc)) {
1563   case 2:
1564     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1565       switch (BYTE_TO_ASCII(enc, ptr)) {
1566       case ASCII_l:
1567         return ASCII_LT;
1568       case ASCII_g:
1569         return ASCII_GT;
1570       }
1571     }
1572     break;
1573   case 3:
1574     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1575       ptr += MINBPC(enc);
1576       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1577         ptr += MINBPC(enc);
1578         if (CHAR_MATCHES(enc, ptr, ASCII_p))
1579           return ASCII_AMP;
1580       }
1581     }
1582     break;
1583   case 4:
1584     switch (BYTE_TO_ASCII(enc, ptr)) {
1585     case ASCII_q:
1586       ptr += MINBPC(enc);
1587       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1588         ptr += MINBPC(enc);
1589         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1590           ptr += MINBPC(enc);
1591           if (CHAR_MATCHES(enc, ptr, ASCII_t))
1592             return ASCII_QUOT;
1593         }
1594       }
1595       break;
1596     case ASCII_a:
1597       ptr += MINBPC(enc);
1598       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1599         ptr += MINBPC(enc);
1600         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1601           ptr += MINBPC(enc);
1602           if (CHAR_MATCHES(enc, ptr, ASCII_s))
1603             return ASCII_APOS;
1604         }
1605       }
1606       break;
1607     }
1608   }
1609   return 0;
1610 }
1611 
1612 static
PREFIX(sameName)1613 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1614 {
1615   for (;;) {
1616     switch (BYTE_TYPE(enc, ptr1)) {
1617 #define LEAD_CASE(n) \
1618     case BT_LEAD ## n: \
1619       if (*ptr1++ != *ptr2++) \
1620         return 0;
1621     LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1622 #undef LEAD_CASE
1623       /* fall through */
1624       if (*ptr1++ != *ptr2++)
1625         return 0;
1626       break;
1627     case BT_NONASCII:
1628     case BT_NMSTRT:
1629 #ifdef XML_NS
1630     case BT_COLON:
1631 #endif
1632     case BT_HEX:
1633     case BT_DIGIT:
1634     case BT_NAME:
1635     case BT_MINUS:
1636       if (*ptr2++ != *ptr1++)
1637         return 0;
1638       if (MINBPC(enc) > 1) {
1639         if (*ptr2++ != *ptr1++)
1640           return 0;
1641         if (MINBPC(enc) > 2) {
1642           if (*ptr2++ != *ptr1++)
1643             return 0;
1644           if (MINBPC(enc) > 3) {
1645             if (*ptr2++ != *ptr1++)
1646               return 0;
1647           }
1648         }
1649       }
1650       break;
1651     default:
1652       if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1653         return 1;
1654       switch (BYTE_TYPE(enc, ptr2)) {
1655       case BT_LEAD2:
1656       case BT_LEAD3:
1657       case BT_LEAD4:
1658       case BT_NONASCII:
1659       case BT_NMSTRT:
1660 #ifdef XML_NS
1661       case BT_COLON:
1662 #endif
1663       case BT_HEX:
1664       case BT_DIGIT:
1665       case BT_NAME:
1666       case BT_MINUS:
1667         return 0;
1668       default:
1669         return 1;
1670       }
1671     }
1672   }
1673   /* not reached */
1674 }
1675 
1676 static
PREFIX(nameMatchesAscii)1677 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1678                              const char *end1, const char *ptr2)
1679 {
1680   vtkExpatUnused(enc);
1681   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1682     if (ptr1 == end1)
1683       return 0;
1684     if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1685       return 0;
1686   }
1687   return ptr1 == end1;
1688 }
1689 
1690 static
PREFIX(nameLength)1691 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1692 {
1693   const char *start = ptr;
1694   for (;;) {
1695     switch (BYTE_TYPE(enc, ptr)) {
1696 #define LEAD_CASE(n) \
1697     case BT_LEAD ## n: ptr += n; break;
1698     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1699 #undef LEAD_CASE
1700     case BT_NONASCII:
1701     case BT_NMSTRT:
1702 #ifdef XML_NS
1703     case BT_COLON:
1704 #endif
1705     case BT_HEX:
1706     case BT_DIGIT:
1707     case BT_NAME:
1708     case BT_MINUS:
1709       ptr += MINBPC(enc);
1710       break;
1711     default:
1712       return ptr - start;
1713     }
1714   }
1715 }
1716 
1717 static
PREFIX(skipS)1718 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1719 {
1720   for (;;) {
1721     switch (BYTE_TYPE(enc, ptr)) {
1722     case BT_LF:
1723     case BT_CR:
1724     case BT_S:
1725       ptr += MINBPC(enc);
1726       break;
1727     default:
1728       return ptr;
1729     }
1730   }
1731 }
1732 
1733 static
PREFIX(updatePosition)1734 void PREFIX(updatePosition)(const ENCODING *enc,
1735                             const char *ptr,
1736                             const char *end,
1737                             POSITION *pos)
1738 {
1739   while (ptr != end) {
1740     switch (BYTE_TYPE(enc, ptr)) {
1741 #define LEAD_CASE(n) \
1742     case BT_LEAD ## n: \
1743       ptr += n; \
1744       break;
1745     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1746 #undef LEAD_CASE
1747     case BT_LF:
1748       pos->columnNumber = (unsigned)-1;
1749       pos->lineNumber++;
1750       ptr += MINBPC(enc);
1751       break;
1752     case BT_CR:
1753       pos->lineNumber++;
1754       ptr += MINBPC(enc);
1755       if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1756         ptr += MINBPC(enc);
1757       pos->columnNumber = (unsigned)-1;
1758       break;
1759     default:
1760       ptr += MINBPC(enc);
1761       break;
1762     }
1763     pos->columnNumber++;
1764   }
1765 }
1766 
1767 #undef DO_LEAD_CASE
1768 #undef MULTIBYTE_CASES
1769 
1770 #undef INVALID_LEAD_CASE
1771 #undef INVALID_CASES
1772 #undef CHECK_NAME_CASE
1773 #undef CHECK_NAME_CASES
1774 #undef CHECK_NMSTRT_CASE
1775 #undef CHECK_NMSTRT_CASES
1776