xref: /dragonfly/contrib/expat/lib/xmltok_impl.c (revision 0c65ac1d)
1*0c65ac1dSAntonio Huete Jimenez /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
2*0c65ac1dSAntonio Huete Jimenez                             __  __            _
3*0c65ac1dSAntonio Huete Jimenez                          ___\ \/ /_ __   __ _| |_
4*0c65ac1dSAntonio Huete Jimenez                         / _ \\  /| '_ \ / _` | __|
5*0c65ac1dSAntonio Huete Jimenez                        |  __//  \| |_) | (_| | |_
6*0c65ac1dSAntonio Huete Jimenez                         \___/_/\_\ .__/ \__,_|\__|
7*0c65ac1dSAntonio Huete Jimenez                                  |_| XML parser
8*0c65ac1dSAntonio Huete Jimenez 
9*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
13*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
14*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
15*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
16*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
17*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
18*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2020      Boris Kolpackov <boris@codesynthesis.com>
19*0c65ac1dSAntonio Huete Jimenez    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
20*0c65ac1dSAntonio Huete Jimenez    Licensed under the MIT license:
21*0c65ac1dSAntonio Huete Jimenez 
22*0c65ac1dSAntonio Huete Jimenez    Permission is  hereby granted,  free of charge,  to any  person obtaining
23*0c65ac1dSAntonio Huete Jimenez    a  copy  of  this  software   and  associated  documentation  files  (the
24*0c65ac1dSAntonio Huete Jimenez    "Software"),  to  deal in  the  Software  without restriction,  including
25*0c65ac1dSAntonio Huete Jimenez    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
26*0c65ac1dSAntonio Huete Jimenez    distribute, sublicense, and/or sell copies of the Software, and to permit
27*0c65ac1dSAntonio Huete Jimenez    persons  to whom  the Software  is  furnished to  do so,  subject to  the
28*0c65ac1dSAntonio Huete Jimenez    following conditions:
29*0c65ac1dSAntonio Huete Jimenez 
30*0c65ac1dSAntonio Huete Jimenez    The above copyright  notice and this permission notice  shall be included
31*0c65ac1dSAntonio Huete Jimenez    in all copies or substantial portions of the Software.
32*0c65ac1dSAntonio Huete Jimenez 
33*0c65ac1dSAntonio Huete Jimenez    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
34*0c65ac1dSAntonio Huete Jimenez    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
35*0c65ac1dSAntonio Huete Jimenez    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
36*0c65ac1dSAntonio Huete Jimenez    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
37*0c65ac1dSAntonio Huete Jimenez    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
38*0c65ac1dSAntonio Huete Jimenez    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
39*0c65ac1dSAntonio Huete Jimenez    USE OR OTHER DEALINGS IN THE SOFTWARE.
40fb9a9224SMatthew Dillon */
41fb9a9224SMatthew Dillon 
42fb9a9224SMatthew Dillon #ifdef XML_TOK_IMPL_C
43fb9a9224SMatthew Dillon 
44*0c65ac1dSAntonio Huete Jimenez #  ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
45fb9a9224SMatthew Dillon #    define IS_INVALID_CHAR(enc, ptr, n) (0)
46fb9a9224SMatthew Dillon #  endif
47fb9a9224SMatthew Dillon 
48fb9a9224SMatthew Dillon #  define INVALID_LEAD_CASE(n, ptr, nextTokPtr)                                \
49fb9a9224SMatthew Dillon   case BT_LEAD##n:                                                             \
50fb9a9224SMatthew Dillon     if (end - ptr < n)                                                         \
51fb9a9224SMatthew Dillon       return XML_TOK_PARTIAL_CHAR;                                             \
52fb9a9224SMatthew Dillon     if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
53fb9a9224SMatthew Dillon       *(nextTokPtr) = (ptr);                                                   \
54fb9a9224SMatthew Dillon       return XML_TOK_INVALID;                                                  \
55fb9a9224SMatthew Dillon     }                                                                          \
56fb9a9224SMatthew Dillon     ptr += n;                                                                  \
57fb9a9224SMatthew Dillon     break;
58fb9a9224SMatthew Dillon 
59fb9a9224SMatthew Dillon #  define INVALID_CASES(ptr, nextTokPtr)                                       \
60fb9a9224SMatthew Dillon     INVALID_LEAD_CASE(2, ptr, nextTokPtr)                                      \
61fb9a9224SMatthew Dillon     INVALID_LEAD_CASE(3, ptr, nextTokPtr)                                      \
62fb9a9224SMatthew Dillon     INVALID_LEAD_CASE(4, ptr, nextTokPtr)                                      \
63fb9a9224SMatthew Dillon   case BT_NONXML:                                                              \
64fb9a9224SMatthew Dillon   case BT_MALFORM:                                                             \
65fb9a9224SMatthew Dillon   case BT_TRAIL:                                                               \
66fb9a9224SMatthew Dillon     *(nextTokPtr) = (ptr);                                                     \
67fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
68fb9a9224SMatthew Dillon 
69fb9a9224SMatthew Dillon #  define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr)                        \
70fb9a9224SMatthew Dillon   case BT_LEAD##n:                                                             \
71fb9a9224SMatthew Dillon     if (end - ptr < n)                                                         \
72fb9a9224SMatthew Dillon       return XML_TOK_PARTIAL_CHAR;                                             \
73*0c65ac1dSAntonio Huete Jimenez     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
74fb9a9224SMatthew Dillon       *nextTokPtr = ptr;                                                       \
75fb9a9224SMatthew Dillon       return XML_TOK_INVALID;                                                  \
76fb9a9224SMatthew Dillon     }                                                                          \
77fb9a9224SMatthew Dillon     ptr += n;                                                                  \
78fb9a9224SMatthew Dillon     break;
79fb9a9224SMatthew Dillon 
80fb9a9224SMatthew Dillon #  define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)                          \
81fb9a9224SMatthew Dillon   case BT_NONASCII:                                                            \
82fb9a9224SMatthew Dillon     if (! IS_NAME_CHAR_MINBPC(enc, ptr)) {                                     \
83fb9a9224SMatthew Dillon       *nextTokPtr = ptr;                                                       \
84fb9a9224SMatthew Dillon       return XML_TOK_INVALID;                                                  \
85fb9a9224SMatthew Dillon     }                                                                          \
86*0c65ac1dSAntonio Huete Jimenez     /* fall through */                                                         \
87fb9a9224SMatthew Dillon   case BT_NMSTRT:                                                              \
88fb9a9224SMatthew Dillon   case BT_HEX:                                                                 \
89fb9a9224SMatthew Dillon   case BT_DIGIT:                                                               \
90fb9a9224SMatthew Dillon   case BT_NAME:                                                                \
91fb9a9224SMatthew Dillon   case BT_MINUS:                                                               \
92fb9a9224SMatthew Dillon     ptr += MINBPC(enc);                                                        \
93fb9a9224SMatthew Dillon     break;                                                                     \
94fb9a9224SMatthew Dillon     CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr)                              \
95fb9a9224SMatthew Dillon     CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr)                              \
96fb9a9224SMatthew Dillon     CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
97fb9a9224SMatthew Dillon 
98fb9a9224SMatthew Dillon #  define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr)                      \
99fb9a9224SMatthew Dillon   case BT_LEAD##n:                                                             \
100*0c65ac1dSAntonio Huete Jimenez     if ((end) - (ptr) < (n))                                                   \
101fb9a9224SMatthew Dillon       return XML_TOK_PARTIAL_CHAR;                                             \
102*0c65ac1dSAntonio Huete Jimenez     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
103fb9a9224SMatthew Dillon       *nextTokPtr = ptr;                                                       \
104fb9a9224SMatthew Dillon       return XML_TOK_INVALID;                                                  \
105fb9a9224SMatthew Dillon     }                                                                          \
106fb9a9224SMatthew Dillon     ptr += n;                                                                  \
107fb9a9224SMatthew Dillon     break;
108fb9a9224SMatthew Dillon 
109fb9a9224SMatthew Dillon #  define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)                        \
110fb9a9224SMatthew Dillon   case BT_NONASCII:                                                            \
111fb9a9224SMatthew Dillon     if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {                                   \
112fb9a9224SMatthew Dillon       *nextTokPtr = ptr;                                                       \
113fb9a9224SMatthew Dillon       return XML_TOK_INVALID;                                                  \
114fb9a9224SMatthew Dillon     }                                                                          \
115*0c65ac1dSAntonio Huete Jimenez     /* fall through */                                                         \
116fb9a9224SMatthew Dillon   case BT_NMSTRT:                                                              \
117fb9a9224SMatthew Dillon   case BT_HEX:                                                                 \
118fb9a9224SMatthew Dillon     ptr += MINBPC(enc);                                                        \
119fb9a9224SMatthew Dillon     break;                                                                     \
120fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr)                            \
121fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr)                            \
122fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
123fb9a9224SMatthew Dillon 
124fb9a9224SMatthew Dillon #  ifndef PREFIX
125fb9a9224SMatthew Dillon #    define PREFIX(ident) ident
126fb9a9224SMatthew Dillon #  endif
127fb9a9224SMatthew Dillon 
128*0c65ac1dSAntonio Huete Jimenez #  define HAS_CHARS(enc, ptr, end, count)                                      \
129*0c65ac1dSAntonio Huete Jimenez     ((end) - (ptr) >= ((count)*MINBPC(enc)))
130*0c65ac1dSAntonio Huete Jimenez 
131*0c65ac1dSAntonio Huete Jimenez #  define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
132*0c65ac1dSAntonio Huete Jimenez 
133*0c65ac1dSAntonio Huete Jimenez #  define REQUIRE_CHARS(enc, ptr, end, count)                                  \
134*0c65ac1dSAntonio Huete Jimenez     {                                                                          \
135*0c65ac1dSAntonio Huete Jimenez       if (! HAS_CHARS(enc, ptr, end, count)) {                                 \
136*0c65ac1dSAntonio Huete Jimenez         return XML_TOK_PARTIAL;                                                \
137*0c65ac1dSAntonio Huete Jimenez       }                                                                        \
138*0c65ac1dSAntonio Huete Jimenez     }
139*0c65ac1dSAntonio Huete Jimenez 
140*0c65ac1dSAntonio Huete Jimenez #  define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
141*0c65ac1dSAntonio Huete Jimenez 
142fb9a9224SMatthew Dillon /* ptr points to character following "<!-" */
143fb9a9224SMatthew Dillon 
144fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanComment)145*0c65ac1dSAntonio Huete Jimenez PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
146*0c65ac1dSAntonio Huete Jimenez                     const char **nextTokPtr) {
147*0c65ac1dSAntonio Huete Jimenez   if (HAS_CHAR(enc, ptr, end)) {
148fb9a9224SMatthew Dillon     if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
149fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
150fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
151fb9a9224SMatthew Dillon     }
152fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
153*0c65ac1dSAntonio Huete Jimenez     while (HAS_CHAR(enc, ptr, end)) {
154fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
155fb9a9224SMatthew Dillon         INVALID_CASES(ptr, nextTokPtr)
156fb9a9224SMatthew Dillon       case BT_MINUS:
157*0c65ac1dSAntonio Huete Jimenez         ptr += MINBPC(enc);
158*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
159fb9a9224SMatthew Dillon         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160*0c65ac1dSAntonio Huete Jimenez           ptr += MINBPC(enc);
161*0c65ac1dSAntonio Huete Jimenez           REQUIRE_CHAR(enc, ptr, end);
162fb9a9224SMatthew Dillon           if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
163fb9a9224SMatthew Dillon             *nextTokPtr = ptr;
164fb9a9224SMatthew Dillon             return XML_TOK_INVALID;
165fb9a9224SMatthew Dillon           }
166fb9a9224SMatthew Dillon           *nextTokPtr = ptr + MINBPC(enc);
167fb9a9224SMatthew Dillon           return XML_TOK_COMMENT;
168fb9a9224SMatthew Dillon         }
169fb9a9224SMatthew Dillon         break;
170fb9a9224SMatthew Dillon       default:
171fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
172fb9a9224SMatthew Dillon         break;
173fb9a9224SMatthew Dillon       }
174fb9a9224SMatthew Dillon     }
175fb9a9224SMatthew Dillon   }
176fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
177fb9a9224SMatthew Dillon }
178fb9a9224SMatthew Dillon 
179fb9a9224SMatthew Dillon /* ptr points to character following "<!" */
180fb9a9224SMatthew Dillon 
181fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanDecl)182*0c65ac1dSAntonio Huete Jimenez PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
183*0c65ac1dSAntonio Huete Jimenez                  const char **nextTokPtr) {
184*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHAR(enc, ptr, end);
185fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
186fb9a9224SMatthew Dillon   case BT_MINUS:
187fb9a9224SMatthew Dillon     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
188fb9a9224SMatthew Dillon   case BT_LSQB:
189fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
190fb9a9224SMatthew Dillon     return XML_TOK_COND_SECT_OPEN;
191fb9a9224SMatthew Dillon   case BT_NMSTRT:
192fb9a9224SMatthew Dillon   case BT_HEX:
193fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
194fb9a9224SMatthew Dillon     break;
195fb9a9224SMatthew Dillon   default:
196fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
197fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
198fb9a9224SMatthew Dillon   }
199*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
200fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
201fb9a9224SMatthew Dillon     case BT_PERCNT:
202*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHARS(enc, ptr, end, 2);
203fb9a9224SMatthew Dillon       /* don't allow <!ENTITY% foo "whatever"> */
204fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
205*0c65ac1dSAntonio Huete Jimenez       case BT_S:
206*0c65ac1dSAntonio Huete Jimenez       case BT_CR:
207*0c65ac1dSAntonio Huete Jimenez       case BT_LF:
208*0c65ac1dSAntonio Huete Jimenez       case BT_PERCNT:
209fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
210fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
211fb9a9224SMatthew Dillon       }
212fb9a9224SMatthew Dillon       /* fall through */
213*0c65ac1dSAntonio Huete Jimenez     case BT_S:
214*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
215*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
216fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
217fb9a9224SMatthew Dillon       return XML_TOK_DECL_OPEN;
218fb9a9224SMatthew Dillon     case BT_NMSTRT:
219fb9a9224SMatthew Dillon     case BT_HEX:
220fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
221fb9a9224SMatthew Dillon       break;
222fb9a9224SMatthew Dillon     default:
223fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
224fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
225fb9a9224SMatthew Dillon     }
226fb9a9224SMatthew Dillon   }
227fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
228fb9a9224SMatthew Dillon }
229fb9a9224SMatthew Dillon 
230fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(checkPiTarget)231*0c65ac1dSAntonio Huete Jimenez PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
232*0c65ac1dSAntonio Huete Jimenez                       int *tokPtr) {
233fb9a9224SMatthew Dillon   int upper = 0;
234*0c65ac1dSAntonio Huete Jimenez   UNUSED_P(enc);
235fb9a9224SMatthew Dillon   *tokPtr = XML_TOK_PI;
236fb9a9224SMatthew Dillon   if (end - ptr != MINBPC(enc) * 3)
237fb9a9224SMatthew Dillon     return 1;
238fb9a9224SMatthew Dillon   switch (BYTE_TO_ASCII(enc, ptr)) {
239fb9a9224SMatthew Dillon   case ASCII_x:
240fb9a9224SMatthew Dillon     break;
241fb9a9224SMatthew Dillon   case ASCII_X:
242fb9a9224SMatthew Dillon     upper = 1;
243fb9a9224SMatthew Dillon     break;
244fb9a9224SMatthew Dillon   default:
245fb9a9224SMatthew Dillon     return 1;
246fb9a9224SMatthew Dillon   }
247fb9a9224SMatthew Dillon   ptr += MINBPC(enc);
248fb9a9224SMatthew Dillon   switch (BYTE_TO_ASCII(enc, ptr)) {
249fb9a9224SMatthew Dillon   case ASCII_m:
250fb9a9224SMatthew Dillon     break;
251fb9a9224SMatthew Dillon   case ASCII_M:
252fb9a9224SMatthew Dillon     upper = 1;
253fb9a9224SMatthew Dillon     break;
254fb9a9224SMatthew Dillon   default:
255fb9a9224SMatthew Dillon     return 1;
256fb9a9224SMatthew Dillon   }
257fb9a9224SMatthew Dillon   ptr += MINBPC(enc);
258fb9a9224SMatthew Dillon   switch (BYTE_TO_ASCII(enc, ptr)) {
259fb9a9224SMatthew Dillon   case ASCII_l:
260fb9a9224SMatthew Dillon     break;
261fb9a9224SMatthew Dillon   case ASCII_L:
262fb9a9224SMatthew Dillon     upper = 1;
263fb9a9224SMatthew Dillon     break;
264fb9a9224SMatthew Dillon   default:
265fb9a9224SMatthew Dillon     return 1;
266fb9a9224SMatthew Dillon   }
267fb9a9224SMatthew Dillon   if (upper)
268fb9a9224SMatthew Dillon     return 0;
269fb9a9224SMatthew Dillon   *tokPtr = XML_TOK_XML_DECL;
270fb9a9224SMatthew Dillon   return 1;
271fb9a9224SMatthew Dillon }
272fb9a9224SMatthew Dillon 
273fb9a9224SMatthew Dillon /* ptr points to character following "<?" */
274fb9a9224SMatthew Dillon 
275fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanPi)276*0c65ac1dSAntonio Huete Jimenez PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
277*0c65ac1dSAntonio Huete Jimenez                const char **nextTokPtr) {
278fb9a9224SMatthew Dillon   int tok;
279fb9a9224SMatthew Dillon   const char *target = ptr;
280*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHAR(enc, ptr, end);
281fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
282fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
283fb9a9224SMatthew Dillon   default:
284fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
285fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
286fb9a9224SMatthew Dillon   }
287*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
288fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
289fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
290*0c65ac1dSAntonio Huete Jimenez     case BT_S:
291*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
292*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
293fb9a9224SMatthew Dillon       if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
294fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
295fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
296fb9a9224SMatthew Dillon       }
297fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
298*0c65ac1dSAntonio Huete Jimenez       while (HAS_CHAR(enc, ptr, end)) {
299fb9a9224SMatthew Dillon         switch (BYTE_TYPE(enc, ptr)) {
300fb9a9224SMatthew Dillon           INVALID_CASES(ptr, nextTokPtr)
301fb9a9224SMatthew Dillon         case BT_QUEST:
302fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
303*0c65ac1dSAntonio Huete Jimenez           REQUIRE_CHAR(enc, ptr, end);
304fb9a9224SMatthew Dillon           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
305fb9a9224SMatthew Dillon             *nextTokPtr = ptr + MINBPC(enc);
306fb9a9224SMatthew Dillon             return tok;
307fb9a9224SMatthew Dillon           }
308fb9a9224SMatthew Dillon           break;
309fb9a9224SMatthew Dillon         default:
310fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
311fb9a9224SMatthew Dillon           break;
312fb9a9224SMatthew Dillon         }
313fb9a9224SMatthew Dillon       }
314fb9a9224SMatthew Dillon       return XML_TOK_PARTIAL;
315fb9a9224SMatthew Dillon     case BT_QUEST:
316fb9a9224SMatthew Dillon       if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
317fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
318fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
319fb9a9224SMatthew Dillon       }
320fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
321*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHAR(enc, ptr, end);
322fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
323fb9a9224SMatthew Dillon         *nextTokPtr = ptr + MINBPC(enc);
324fb9a9224SMatthew Dillon         return tok;
325fb9a9224SMatthew Dillon       }
326fb9a9224SMatthew Dillon       /* fall through */
327fb9a9224SMatthew Dillon     default:
328fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
329fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
330fb9a9224SMatthew Dillon     }
331fb9a9224SMatthew Dillon   }
332fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
333fb9a9224SMatthew Dillon }
334fb9a9224SMatthew Dillon 
335fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanCdataSection)336*0c65ac1dSAntonio Huete Jimenez PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
337*0c65ac1dSAntonio Huete Jimenez                          const char **nextTokPtr) {
338*0c65ac1dSAntonio Huete Jimenez   static const char CDATA_LSQB[]
339*0c65ac1dSAntonio Huete Jimenez       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
340fb9a9224SMatthew Dillon   int i;
341*0c65ac1dSAntonio Huete Jimenez   UNUSED_P(enc);
342fb9a9224SMatthew Dillon   /* CDATA[ */
343*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHARS(enc, ptr, end, 6);
344fb9a9224SMatthew Dillon   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
345fb9a9224SMatthew Dillon     if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
346fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
347fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
348fb9a9224SMatthew Dillon     }
349fb9a9224SMatthew Dillon   }
350fb9a9224SMatthew Dillon   *nextTokPtr = ptr;
351fb9a9224SMatthew Dillon   return XML_TOK_CDATA_SECT_OPEN;
352fb9a9224SMatthew Dillon }
353fb9a9224SMatthew Dillon 
354fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(cdataSectionTok)355*0c65ac1dSAntonio Huete Jimenez PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
356*0c65ac1dSAntonio Huete Jimenez                         const char **nextTokPtr) {
357*0c65ac1dSAntonio Huete Jimenez   if (ptr >= end)
358fb9a9224SMatthew Dillon     return XML_TOK_NONE;
359fb9a9224SMatthew Dillon   if (MINBPC(enc) > 1) {
360fb9a9224SMatthew Dillon     size_t n = end - ptr;
361fb9a9224SMatthew Dillon     if (n & (MINBPC(enc) - 1)) {
362fb9a9224SMatthew Dillon       n &= ~(MINBPC(enc) - 1);
363fb9a9224SMatthew Dillon       if (n == 0)
364fb9a9224SMatthew Dillon         return XML_TOK_PARTIAL;
365fb9a9224SMatthew Dillon       end = ptr + n;
366fb9a9224SMatthew Dillon     }
367fb9a9224SMatthew Dillon   }
368fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
369fb9a9224SMatthew Dillon   case BT_RSQB:
370fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
371*0c65ac1dSAntonio Huete Jimenez     REQUIRE_CHAR(enc, ptr, end);
372fb9a9224SMatthew Dillon     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
373fb9a9224SMatthew Dillon       break;
374fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
375*0c65ac1dSAntonio Huete Jimenez     REQUIRE_CHAR(enc, ptr, end);
376fb9a9224SMatthew Dillon     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
377fb9a9224SMatthew Dillon       ptr -= MINBPC(enc);
378fb9a9224SMatthew Dillon       break;
379fb9a9224SMatthew Dillon     }
380fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
381fb9a9224SMatthew Dillon     return XML_TOK_CDATA_SECT_CLOSE;
382fb9a9224SMatthew Dillon   case BT_CR:
383fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
384*0c65ac1dSAntonio Huete Jimenez     REQUIRE_CHAR(enc, ptr, end);
385fb9a9224SMatthew Dillon     if (BYTE_TYPE(enc, ptr) == BT_LF)
386fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
387fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
388fb9a9224SMatthew Dillon     return XML_TOK_DATA_NEWLINE;
389fb9a9224SMatthew Dillon   case BT_LF:
390fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
391fb9a9224SMatthew Dillon     return XML_TOK_DATA_NEWLINE;
392fb9a9224SMatthew Dillon     INVALID_CASES(ptr, nextTokPtr)
393fb9a9224SMatthew Dillon   default:
394fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
395fb9a9224SMatthew Dillon     break;
396fb9a9224SMatthew Dillon   }
397*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
398fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
399fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
400fb9a9224SMatthew Dillon   case BT_LEAD##n:                                                             \
401fb9a9224SMatthew Dillon     if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
402fb9a9224SMatthew Dillon       *nextTokPtr = ptr;                                                       \
403fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;                                               \
404fb9a9224SMatthew Dillon     }                                                                          \
405fb9a9224SMatthew Dillon     ptr += n;                                                                  \
406fb9a9224SMatthew Dillon     break;
407*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(2)
408*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(3)
409*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(4)
410fb9a9224SMatthew Dillon #  undef LEAD_CASE
411fb9a9224SMatthew Dillon     case BT_NONXML:
412fb9a9224SMatthew Dillon     case BT_MALFORM:
413fb9a9224SMatthew Dillon     case BT_TRAIL:
414fb9a9224SMatthew Dillon     case BT_CR:
415fb9a9224SMatthew Dillon     case BT_LF:
416fb9a9224SMatthew Dillon     case BT_RSQB:
417fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
418fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
419fb9a9224SMatthew Dillon     default:
420fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
421fb9a9224SMatthew Dillon       break;
422fb9a9224SMatthew Dillon     }
423fb9a9224SMatthew Dillon   }
424fb9a9224SMatthew Dillon   *nextTokPtr = ptr;
425fb9a9224SMatthew Dillon   return XML_TOK_DATA_CHARS;
426fb9a9224SMatthew Dillon }
427fb9a9224SMatthew Dillon 
428fb9a9224SMatthew Dillon /* ptr points to character following "</" */
429fb9a9224SMatthew Dillon 
430fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanEndTag)431*0c65ac1dSAntonio Huete Jimenez PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
432*0c65ac1dSAntonio Huete Jimenez                    const char **nextTokPtr) {
433*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHAR(enc, ptr, end);
434fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
435fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
436fb9a9224SMatthew Dillon   default:
437fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
438fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
439fb9a9224SMatthew Dillon   }
440*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
441fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
442fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
443*0c65ac1dSAntonio Huete Jimenez     case BT_S:
444*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
445*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
446*0c65ac1dSAntonio Huete Jimenez       for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
447fb9a9224SMatthew Dillon         switch (BYTE_TYPE(enc, ptr)) {
448*0c65ac1dSAntonio Huete Jimenez         case BT_S:
449*0c65ac1dSAntonio Huete Jimenez         case BT_CR:
450*0c65ac1dSAntonio Huete Jimenez         case BT_LF:
451fb9a9224SMatthew Dillon           break;
452fb9a9224SMatthew Dillon         case BT_GT:
453fb9a9224SMatthew Dillon           *nextTokPtr = ptr + MINBPC(enc);
454fb9a9224SMatthew Dillon           return XML_TOK_END_TAG;
455fb9a9224SMatthew Dillon         default:
456fb9a9224SMatthew Dillon           *nextTokPtr = ptr;
457fb9a9224SMatthew Dillon           return XML_TOK_INVALID;
458fb9a9224SMatthew Dillon         }
459fb9a9224SMatthew Dillon       }
460fb9a9224SMatthew Dillon       return XML_TOK_PARTIAL;
461fb9a9224SMatthew Dillon #  ifdef XML_NS
462fb9a9224SMatthew Dillon     case BT_COLON:
463fb9a9224SMatthew Dillon       /* no need to check qname syntax here,
464fb9a9224SMatthew Dillon          since end-tag must match exactly */
465fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
466fb9a9224SMatthew Dillon       break;
467fb9a9224SMatthew Dillon #  endif
468fb9a9224SMatthew Dillon     case BT_GT:
469fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
470fb9a9224SMatthew Dillon       return XML_TOK_END_TAG;
471fb9a9224SMatthew Dillon     default:
472fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
473fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
474fb9a9224SMatthew Dillon     }
475fb9a9224SMatthew Dillon   }
476fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
477fb9a9224SMatthew Dillon }
478fb9a9224SMatthew Dillon 
479fb9a9224SMatthew Dillon /* ptr points to character following "&#X" */
480fb9a9224SMatthew Dillon 
481fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanHexCharRef)482*0c65ac1dSAntonio Huete Jimenez PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
483*0c65ac1dSAntonio Huete Jimenez                        const char **nextTokPtr) {
484*0c65ac1dSAntonio Huete Jimenez   if (HAS_CHAR(enc, ptr, end)) {
485fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
486fb9a9224SMatthew Dillon     case BT_DIGIT:
487fb9a9224SMatthew Dillon     case BT_HEX:
488fb9a9224SMatthew Dillon       break;
489fb9a9224SMatthew Dillon     default:
490fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
491fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
492fb9a9224SMatthew Dillon     }
493*0c65ac1dSAntonio Huete Jimenez     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
494fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
495fb9a9224SMatthew Dillon       case BT_DIGIT:
496fb9a9224SMatthew Dillon       case BT_HEX:
497fb9a9224SMatthew Dillon         break;
498fb9a9224SMatthew Dillon       case BT_SEMI:
499fb9a9224SMatthew Dillon         *nextTokPtr = ptr + MINBPC(enc);
500fb9a9224SMatthew Dillon         return XML_TOK_CHAR_REF;
501fb9a9224SMatthew Dillon       default:
502fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
503fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
504fb9a9224SMatthew Dillon       }
505fb9a9224SMatthew Dillon     }
506fb9a9224SMatthew Dillon   }
507fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
508fb9a9224SMatthew Dillon }
509fb9a9224SMatthew Dillon 
510fb9a9224SMatthew Dillon /* ptr points to character following "&#" */
511fb9a9224SMatthew Dillon 
512fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanCharRef)513*0c65ac1dSAntonio Huete Jimenez PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
514*0c65ac1dSAntonio Huete Jimenez                     const char **nextTokPtr) {
515*0c65ac1dSAntonio Huete Jimenez   if (HAS_CHAR(enc, ptr, end)) {
516fb9a9224SMatthew Dillon     if (CHAR_MATCHES(enc, ptr, ASCII_x))
517fb9a9224SMatthew Dillon       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
518fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
519fb9a9224SMatthew Dillon     case BT_DIGIT:
520fb9a9224SMatthew Dillon       break;
521fb9a9224SMatthew Dillon     default:
522fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
523fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
524fb9a9224SMatthew Dillon     }
525*0c65ac1dSAntonio Huete Jimenez     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
526fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
527fb9a9224SMatthew Dillon       case BT_DIGIT:
528fb9a9224SMatthew Dillon         break;
529fb9a9224SMatthew Dillon       case BT_SEMI:
530fb9a9224SMatthew Dillon         *nextTokPtr = ptr + MINBPC(enc);
531fb9a9224SMatthew Dillon         return XML_TOK_CHAR_REF;
532fb9a9224SMatthew Dillon       default:
533fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
534fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
535fb9a9224SMatthew Dillon       }
536fb9a9224SMatthew Dillon     }
537fb9a9224SMatthew Dillon   }
538fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
539fb9a9224SMatthew Dillon }
540fb9a9224SMatthew Dillon 
541fb9a9224SMatthew Dillon /* ptr points to character following "&" */
542fb9a9224SMatthew Dillon 
543fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanRef)544fb9a9224SMatthew Dillon PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
545*0c65ac1dSAntonio Huete Jimenez                 const char **nextTokPtr) {
546*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHAR(enc, ptr, end);
547fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
548fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
549fb9a9224SMatthew Dillon   case BT_NUM:
550fb9a9224SMatthew Dillon     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
551fb9a9224SMatthew Dillon   default:
552fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
553fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
554fb9a9224SMatthew Dillon   }
555*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
556fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
557fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
558fb9a9224SMatthew Dillon     case BT_SEMI:
559fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
560fb9a9224SMatthew Dillon       return XML_TOK_ENTITY_REF;
561fb9a9224SMatthew Dillon     default:
562fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
563fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
564fb9a9224SMatthew Dillon     }
565fb9a9224SMatthew Dillon   }
566fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
567fb9a9224SMatthew Dillon }
568fb9a9224SMatthew Dillon 
569fb9a9224SMatthew Dillon /* ptr points to character following first character of attribute name */
570fb9a9224SMatthew Dillon 
571fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanAtts)572fb9a9224SMatthew Dillon PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
573*0c65ac1dSAntonio Huete Jimenez                  const char **nextTokPtr) {
574fb9a9224SMatthew Dillon #  ifdef XML_NS
575fb9a9224SMatthew Dillon   int hadColon = 0;
576fb9a9224SMatthew Dillon #  endif
577*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
578fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
579fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
580fb9a9224SMatthew Dillon #  ifdef XML_NS
581fb9a9224SMatthew Dillon     case BT_COLON:
582fb9a9224SMatthew Dillon       if (hadColon) {
583fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
584fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
585fb9a9224SMatthew Dillon       }
586fb9a9224SMatthew Dillon       hadColon = 1;
587fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
588*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHAR(enc, ptr, end);
589fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
590fb9a9224SMatthew Dillon         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
591fb9a9224SMatthew Dillon       default:
592fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
593fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
594fb9a9224SMatthew Dillon       }
595fb9a9224SMatthew Dillon       break;
596fb9a9224SMatthew Dillon #  endif
597*0c65ac1dSAntonio Huete Jimenez     case BT_S:
598*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
599*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
600fb9a9224SMatthew Dillon       for (;;) {
601fb9a9224SMatthew Dillon         int t;
602fb9a9224SMatthew Dillon 
603fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
604*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
605fb9a9224SMatthew Dillon         t = BYTE_TYPE(enc, ptr);
606fb9a9224SMatthew Dillon         if (t == BT_EQUALS)
607fb9a9224SMatthew Dillon           break;
608fb9a9224SMatthew Dillon         switch (t) {
609fb9a9224SMatthew Dillon         case BT_S:
610fb9a9224SMatthew Dillon         case BT_LF:
611fb9a9224SMatthew Dillon         case BT_CR:
612fb9a9224SMatthew Dillon           break;
613fb9a9224SMatthew Dillon         default:
614fb9a9224SMatthew Dillon           *nextTokPtr = ptr;
615fb9a9224SMatthew Dillon           return XML_TOK_INVALID;
616fb9a9224SMatthew Dillon         }
617fb9a9224SMatthew Dillon       }
618fb9a9224SMatthew Dillon       /* fall through */
619*0c65ac1dSAntonio Huete Jimenez     case BT_EQUALS: {
620fb9a9224SMatthew Dillon       int open;
621fb9a9224SMatthew Dillon #  ifdef XML_NS
622fb9a9224SMatthew Dillon       hadColon = 0;
623fb9a9224SMatthew Dillon #  endif
624fb9a9224SMatthew Dillon       for (;;) {
625fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
626*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
627fb9a9224SMatthew Dillon         open = BYTE_TYPE(enc, ptr);
628fb9a9224SMatthew Dillon         if (open == BT_QUOT || open == BT_APOS)
629fb9a9224SMatthew Dillon           break;
630fb9a9224SMatthew Dillon         switch (open) {
631fb9a9224SMatthew Dillon         case BT_S:
632fb9a9224SMatthew Dillon         case BT_LF:
633fb9a9224SMatthew Dillon         case BT_CR:
634fb9a9224SMatthew Dillon           break;
635fb9a9224SMatthew Dillon         default:
636fb9a9224SMatthew Dillon           *nextTokPtr = ptr;
637fb9a9224SMatthew Dillon           return XML_TOK_INVALID;
638fb9a9224SMatthew Dillon         }
639fb9a9224SMatthew Dillon       }
640fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
641fb9a9224SMatthew Dillon       /* in attribute value */
642fb9a9224SMatthew Dillon       for (;;) {
643fb9a9224SMatthew Dillon         int t;
644*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
645fb9a9224SMatthew Dillon         t = BYTE_TYPE(enc, ptr);
646fb9a9224SMatthew Dillon         if (t == open)
647fb9a9224SMatthew Dillon           break;
648fb9a9224SMatthew Dillon         switch (t) {
649fb9a9224SMatthew Dillon           INVALID_CASES(ptr, nextTokPtr)
650*0c65ac1dSAntonio Huete Jimenez         case BT_AMP: {
651fb9a9224SMatthew Dillon           int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
652fb9a9224SMatthew Dillon           if (tok <= 0) {
653fb9a9224SMatthew Dillon             if (tok == XML_TOK_INVALID)
654fb9a9224SMatthew Dillon               *nextTokPtr = ptr;
655fb9a9224SMatthew Dillon             return tok;
656fb9a9224SMatthew Dillon           }
657fb9a9224SMatthew Dillon           break;
658fb9a9224SMatthew Dillon         }
659fb9a9224SMatthew Dillon         case BT_LT:
660fb9a9224SMatthew Dillon           *nextTokPtr = ptr;
661fb9a9224SMatthew Dillon           return XML_TOK_INVALID;
662fb9a9224SMatthew Dillon         default:
663fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
664fb9a9224SMatthew Dillon           break;
665fb9a9224SMatthew Dillon         }
666fb9a9224SMatthew Dillon       }
667fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
668*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHAR(enc, ptr, end);
669fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
670fb9a9224SMatthew Dillon       case BT_S:
671fb9a9224SMatthew Dillon       case BT_CR:
672fb9a9224SMatthew Dillon       case BT_LF:
673fb9a9224SMatthew Dillon         break;
674fb9a9224SMatthew Dillon       case BT_SOL:
675fb9a9224SMatthew Dillon         goto sol;
676fb9a9224SMatthew Dillon       case BT_GT:
677fb9a9224SMatthew Dillon         goto gt;
678fb9a9224SMatthew Dillon       default:
679fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
680fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
681fb9a9224SMatthew Dillon       }
682fb9a9224SMatthew Dillon       /* ptr points to closing quote */
683fb9a9224SMatthew Dillon       for (;;) {
684fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
685*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
686fb9a9224SMatthew Dillon         switch (BYTE_TYPE(enc, ptr)) {
687fb9a9224SMatthew Dillon           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
688*0c65ac1dSAntonio Huete Jimenez         case BT_S:
689*0c65ac1dSAntonio Huete Jimenez         case BT_CR:
690*0c65ac1dSAntonio Huete Jimenez         case BT_LF:
691fb9a9224SMatthew Dillon           continue;
692fb9a9224SMatthew Dillon         case BT_GT:
693fb9a9224SMatthew Dillon         gt:
694fb9a9224SMatthew Dillon           *nextTokPtr = ptr + MINBPC(enc);
695fb9a9224SMatthew Dillon           return XML_TOK_START_TAG_WITH_ATTS;
696fb9a9224SMatthew Dillon         case BT_SOL:
697fb9a9224SMatthew Dillon         sol:
698fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
699*0c65ac1dSAntonio Huete Jimenez           REQUIRE_CHAR(enc, ptr, end);
700fb9a9224SMatthew Dillon           if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
701fb9a9224SMatthew Dillon             *nextTokPtr = ptr;
702fb9a9224SMatthew Dillon             return XML_TOK_INVALID;
703fb9a9224SMatthew Dillon           }
704fb9a9224SMatthew Dillon           *nextTokPtr = ptr + MINBPC(enc);
705fb9a9224SMatthew Dillon           return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
706fb9a9224SMatthew Dillon         default:
707fb9a9224SMatthew Dillon           *nextTokPtr = ptr;
708fb9a9224SMatthew Dillon           return XML_TOK_INVALID;
709fb9a9224SMatthew Dillon         }
710fb9a9224SMatthew Dillon         break;
711fb9a9224SMatthew Dillon       }
712fb9a9224SMatthew Dillon       break;
713fb9a9224SMatthew Dillon     }
714fb9a9224SMatthew Dillon     default:
715fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
716fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
717fb9a9224SMatthew Dillon     }
718fb9a9224SMatthew Dillon   }
719fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
720fb9a9224SMatthew Dillon }
721fb9a9224SMatthew Dillon 
722fb9a9224SMatthew Dillon /* ptr points to character following "<" */
723fb9a9224SMatthew Dillon 
724fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanLt)725fb9a9224SMatthew Dillon PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
726*0c65ac1dSAntonio Huete Jimenez                const char **nextTokPtr) {
727fb9a9224SMatthew Dillon #  ifdef XML_NS
728fb9a9224SMatthew Dillon   int hadColon;
729fb9a9224SMatthew Dillon #  endif
730*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHAR(enc, ptr, end);
731fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
732fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
733fb9a9224SMatthew Dillon   case BT_EXCL:
734*0c65ac1dSAntonio Huete Jimenez     ptr += MINBPC(enc);
735*0c65ac1dSAntonio Huete Jimenez     REQUIRE_CHAR(enc, ptr, end);
736fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
737fb9a9224SMatthew Dillon     case BT_MINUS:
738fb9a9224SMatthew Dillon       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
739fb9a9224SMatthew Dillon     case BT_LSQB:
740*0c65ac1dSAntonio Huete Jimenez       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
741fb9a9224SMatthew Dillon     }
742fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
743fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
744fb9a9224SMatthew Dillon   case BT_QUEST:
745fb9a9224SMatthew Dillon     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
746fb9a9224SMatthew Dillon   case BT_SOL:
747fb9a9224SMatthew Dillon     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
748fb9a9224SMatthew Dillon   default:
749fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
750fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
751fb9a9224SMatthew Dillon   }
752fb9a9224SMatthew Dillon #  ifdef XML_NS
753fb9a9224SMatthew Dillon   hadColon = 0;
754fb9a9224SMatthew Dillon #  endif
755fb9a9224SMatthew Dillon   /* we have a start-tag */
756*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
757fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
758fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
759fb9a9224SMatthew Dillon #  ifdef XML_NS
760fb9a9224SMatthew Dillon     case BT_COLON:
761fb9a9224SMatthew Dillon       if (hadColon) {
762fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
763fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
764fb9a9224SMatthew Dillon       }
765fb9a9224SMatthew Dillon       hadColon = 1;
766fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
767*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHAR(enc, ptr, end);
768fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
769fb9a9224SMatthew Dillon         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
770fb9a9224SMatthew Dillon       default:
771fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
772fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
773fb9a9224SMatthew Dillon       }
774fb9a9224SMatthew Dillon       break;
775fb9a9224SMatthew Dillon #  endif
776*0c65ac1dSAntonio Huete Jimenez     case BT_S:
777*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
778*0c65ac1dSAntonio Huete Jimenez     case BT_LF: {
779fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
780*0c65ac1dSAntonio Huete Jimenez       while (HAS_CHAR(enc, ptr, end)) {
781fb9a9224SMatthew Dillon         switch (BYTE_TYPE(enc, ptr)) {
782fb9a9224SMatthew Dillon           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
783fb9a9224SMatthew Dillon         case BT_GT:
784fb9a9224SMatthew Dillon           goto gt;
785fb9a9224SMatthew Dillon         case BT_SOL:
786fb9a9224SMatthew Dillon           goto sol;
787*0c65ac1dSAntonio Huete Jimenez         case BT_S:
788*0c65ac1dSAntonio Huete Jimenez         case BT_CR:
789*0c65ac1dSAntonio Huete Jimenez         case BT_LF:
790fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
791fb9a9224SMatthew Dillon           continue;
792fb9a9224SMatthew Dillon         default:
793fb9a9224SMatthew Dillon           *nextTokPtr = ptr;
794fb9a9224SMatthew Dillon           return XML_TOK_INVALID;
795fb9a9224SMatthew Dillon         }
796fb9a9224SMatthew Dillon         return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
797fb9a9224SMatthew Dillon       }
798fb9a9224SMatthew Dillon       return XML_TOK_PARTIAL;
799fb9a9224SMatthew Dillon     }
800fb9a9224SMatthew Dillon     case BT_GT:
801fb9a9224SMatthew Dillon     gt:
802fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
803fb9a9224SMatthew Dillon       return XML_TOK_START_TAG_NO_ATTS;
804fb9a9224SMatthew Dillon     case BT_SOL:
805fb9a9224SMatthew Dillon     sol:
806fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
807*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHAR(enc, ptr, end);
808fb9a9224SMatthew Dillon       if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
809fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
810fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
811fb9a9224SMatthew Dillon       }
812fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
813fb9a9224SMatthew Dillon       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
814fb9a9224SMatthew Dillon     default:
815fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
816fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
817fb9a9224SMatthew Dillon     }
818fb9a9224SMatthew Dillon   }
819fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
820fb9a9224SMatthew Dillon }
821fb9a9224SMatthew Dillon 
822fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(contentTok)823fb9a9224SMatthew Dillon PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
824*0c65ac1dSAntonio Huete Jimenez                    const char **nextTokPtr) {
825*0c65ac1dSAntonio Huete Jimenez   if (ptr >= end)
826fb9a9224SMatthew Dillon     return XML_TOK_NONE;
827fb9a9224SMatthew Dillon   if (MINBPC(enc) > 1) {
828fb9a9224SMatthew Dillon     size_t n = end - ptr;
829fb9a9224SMatthew Dillon     if (n & (MINBPC(enc) - 1)) {
830fb9a9224SMatthew Dillon       n &= ~(MINBPC(enc) - 1);
831fb9a9224SMatthew Dillon       if (n == 0)
832fb9a9224SMatthew Dillon         return XML_TOK_PARTIAL;
833fb9a9224SMatthew Dillon       end = ptr + n;
834fb9a9224SMatthew Dillon     }
835fb9a9224SMatthew Dillon   }
836fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
837fb9a9224SMatthew Dillon   case BT_LT:
838fb9a9224SMatthew Dillon     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
839fb9a9224SMatthew Dillon   case BT_AMP:
840fb9a9224SMatthew Dillon     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
841fb9a9224SMatthew Dillon   case BT_CR:
842fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
843*0c65ac1dSAntonio Huete Jimenez     if (! HAS_CHAR(enc, ptr, end))
844fb9a9224SMatthew Dillon       return XML_TOK_TRAILING_CR;
845fb9a9224SMatthew Dillon     if (BYTE_TYPE(enc, ptr) == BT_LF)
846fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
847fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
848fb9a9224SMatthew Dillon     return XML_TOK_DATA_NEWLINE;
849fb9a9224SMatthew Dillon   case BT_LF:
850fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
851fb9a9224SMatthew Dillon     return XML_TOK_DATA_NEWLINE;
852fb9a9224SMatthew Dillon   case BT_RSQB:
853fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
854*0c65ac1dSAntonio Huete Jimenez     if (! HAS_CHAR(enc, ptr, end))
855fb9a9224SMatthew Dillon       return XML_TOK_TRAILING_RSQB;
856fb9a9224SMatthew Dillon     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
857fb9a9224SMatthew Dillon       break;
858fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
859*0c65ac1dSAntonio Huete Jimenez     if (! HAS_CHAR(enc, ptr, end))
860fb9a9224SMatthew Dillon       return XML_TOK_TRAILING_RSQB;
861fb9a9224SMatthew Dillon     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
862fb9a9224SMatthew Dillon       ptr -= MINBPC(enc);
863fb9a9224SMatthew Dillon       break;
864fb9a9224SMatthew Dillon     }
865fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
866fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
867fb9a9224SMatthew Dillon     INVALID_CASES(ptr, nextTokPtr)
868fb9a9224SMatthew Dillon   default:
869fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
870fb9a9224SMatthew Dillon     break;
871fb9a9224SMatthew Dillon   }
872*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
873fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
874fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
875fb9a9224SMatthew Dillon   case BT_LEAD##n:                                                             \
876fb9a9224SMatthew Dillon     if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
877fb9a9224SMatthew Dillon       *nextTokPtr = ptr;                                                       \
878fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;                                               \
879fb9a9224SMatthew Dillon     }                                                                          \
880fb9a9224SMatthew Dillon     ptr += n;                                                                  \
881fb9a9224SMatthew Dillon     break;
882*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(2)
883*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(3)
884*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(4)
885fb9a9224SMatthew Dillon #  undef LEAD_CASE
886fb9a9224SMatthew Dillon     case BT_RSQB:
887*0c65ac1dSAntonio Huete Jimenez       if (HAS_CHARS(enc, ptr, end, 2)) {
888fb9a9224SMatthew Dillon         if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
889fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
890fb9a9224SMatthew Dillon           break;
891fb9a9224SMatthew Dillon         }
892*0c65ac1dSAntonio Huete Jimenez         if (HAS_CHARS(enc, ptr, end, 3)) {
893fb9a9224SMatthew Dillon           if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
894fb9a9224SMatthew Dillon             ptr += MINBPC(enc);
895fb9a9224SMatthew Dillon             break;
896fb9a9224SMatthew Dillon           }
897fb9a9224SMatthew Dillon           *nextTokPtr = ptr + 2 * MINBPC(enc);
898fb9a9224SMatthew Dillon           return XML_TOK_INVALID;
899fb9a9224SMatthew Dillon         }
900fb9a9224SMatthew Dillon       }
901fb9a9224SMatthew Dillon       /* fall through */
902fb9a9224SMatthew Dillon     case BT_AMP:
903fb9a9224SMatthew Dillon     case BT_LT:
904fb9a9224SMatthew Dillon     case BT_NONXML:
905fb9a9224SMatthew Dillon     case BT_MALFORM:
906fb9a9224SMatthew Dillon     case BT_TRAIL:
907fb9a9224SMatthew Dillon     case BT_CR:
908fb9a9224SMatthew Dillon     case BT_LF:
909fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
910fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
911fb9a9224SMatthew Dillon     default:
912fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
913fb9a9224SMatthew Dillon       break;
914fb9a9224SMatthew Dillon     }
915fb9a9224SMatthew Dillon   }
916fb9a9224SMatthew Dillon   *nextTokPtr = ptr;
917fb9a9224SMatthew Dillon   return XML_TOK_DATA_CHARS;
918fb9a9224SMatthew Dillon }
919fb9a9224SMatthew Dillon 
920fb9a9224SMatthew Dillon /* ptr points to character following "%" */
921fb9a9224SMatthew Dillon 
922fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanPercent)923fb9a9224SMatthew Dillon PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
924*0c65ac1dSAntonio Huete Jimenez                     const char **nextTokPtr) {
925*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHAR(enc, ptr, end);
926fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
927fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
928*0c65ac1dSAntonio Huete Jimenez   case BT_S:
929*0c65ac1dSAntonio Huete Jimenez   case BT_LF:
930*0c65ac1dSAntonio Huete Jimenez   case BT_CR:
931*0c65ac1dSAntonio Huete Jimenez   case BT_PERCNT:
932fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
933fb9a9224SMatthew Dillon     return XML_TOK_PERCENT;
934fb9a9224SMatthew Dillon   default:
935fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
936fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
937fb9a9224SMatthew Dillon   }
938*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
939fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
940fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941fb9a9224SMatthew Dillon     case BT_SEMI:
942fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
943fb9a9224SMatthew Dillon       return XML_TOK_PARAM_ENTITY_REF;
944fb9a9224SMatthew Dillon     default:
945fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
946fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
947fb9a9224SMatthew Dillon     }
948fb9a9224SMatthew Dillon   }
949fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
950fb9a9224SMatthew Dillon }
951fb9a9224SMatthew Dillon 
952fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanPoundName)953fb9a9224SMatthew Dillon PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
954*0c65ac1dSAntonio Huete Jimenez                       const char **nextTokPtr) {
955*0c65ac1dSAntonio Huete Jimenez   REQUIRE_CHAR(enc, ptr, end);
956fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
957fb9a9224SMatthew Dillon     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
958fb9a9224SMatthew Dillon   default:
959fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
960fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
961fb9a9224SMatthew Dillon   }
962*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
963fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
964fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
965*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
966*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
967*0c65ac1dSAntonio Huete Jimenez     case BT_S:
968*0c65ac1dSAntonio Huete Jimenez     case BT_RPAR:
969*0c65ac1dSAntonio Huete Jimenez     case BT_GT:
970*0c65ac1dSAntonio Huete Jimenez     case BT_PERCNT:
971*0c65ac1dSAntonio Huete Jimenez     case BT_VERBAR:
972fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
973fb9a9224SMatthew Dillon       return XML_TOK_POUND_NAME;
974fb9a9224SMatthew Dillon     default:
975fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
976fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
977fb9a9224SMatthew Dillon     }
978fb9a9224SMatthew Dillon   }
979fb9a9224SMatthew Dillon   return -XML_TOK_POUND_NAME;
980fb9a9224SMatthew Dillon }
981fb9a9224SMatthew Dillon 
982fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(scanLit)983*0c65ac1dSAntonio Huete Jimenez PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end,
984*0c65ac1dSAntonio Huete Jimenez                 const char **nextTokPtr) {
985*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
986fb9a9224SMatthew Dillon     int t = BYTE_TYPE(enc, ptr);
987fb9a9224SMatthew Dillon     switch (t) {
988fb9a9224SMatthew Dillon       INVALID_CASES(ptr, nextTokPtr)
989fb9a9224SMatthew Dillon     case BT_QUOT:
990fb9a9224SMatthew Dillon     case BT_APOS:
991fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
992fb9a9224SMatthew Dillon       if (t != open)
993fb9a9224SMatthew Dillon         break;
994*0c65ac1dSAntonio Huete Jimenez       if (! HAS_CHAR(enc, ptr, end))
995fb9a9224SMatthew Dillon         return -XML_TOK_LITERAL;
996fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
997fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
998*0c65ac1dSAntonio Huete Jimenez       case BT_S:
999*0c65ac1dSAntonio Huete Jimenez       case BT_CR:
1000*0c65ac1dSAntonio Huete Jimenez       case BT_LF:
1001*0c65ac1dSAntonio Huete Jimenez       case BT_GT:
1002*0c65ac1dSAntonio Huete Jimenez       case BT_PERCNT:
1003*0c65ac1dSAntonio Huete Jimenez       case BT_LSQB:
1004fb9a9224SMatthew Dillon         return XML_TOK_LITERAL;
1005fb9a9224SMatthew Dillon       default:
1006fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
1007fb9a9224SMatthew Dillon       }
1008fb9a9224SMatthew Dillon     default:
1009fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1010fb9a9224SMatthew Dillon       break;
1011fb9a9224SMatthew Dillon     }
1012fb9a9224SMatthew Dillon   }
1013fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
1014fb9a9224SMatthew Dillon }
1015fb9a9224SMatthew Dillon 
1016fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(prologTok)1017fb9a9224SMatthew Dillon PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
1018*0c65ac1dSAntonio Huete Jimenez                   const char **nextTokPtr) {
1019fb9a9224SMatthew Dillon   int tok;
1020*0c65ac1dSAntonio Huete Jimenez   if (ptr >= end)
1021fb9a9224SMatthew Dillon     return XML_TOK_NONE;
1022fb9a9224SMatthew Dillon   if (MINBPC(enc) > 1) {
1023fb9a9224SMatthew Dillon     size_t n = end - ptr;
1024fb9a9224SMatthew Dillon     if (n & (MINBPC(enc) - 1)) {
1025fb9a9224SMatthew Dillon       n &= ~(MINBPC(enc) - 1);
1026fb9a9224SMatthew Dillon       if (n == 0)
1027fb9a9224SMatthew Dillon         return XML_TOK_PARTIAL;
1028fb9a9224SMatthew Dillon       end = ptr + n;
1029fb9a9224SMatthew Dillon     }
1030fb9a9224SMatthew Dillon   }
1031fb9a9224SMatthew Dillon   switch (BYTE_TYPE(enc, ptr)) {
1032fb9a9224SMatthew Dillon   case BT_QUOT:
1033fb9a9224SMatthew Dillon     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1034fb9a9224SMatthew Dillon   case BT_APOS:
1035fb9a9224SMatthew Dillon     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1036*0c65ac1dSAntonio Huete Jimenez   case BT_LT: {
1037fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
1038*0c65ac1dSAntonio Huete Jimenez     REQUIRE_CHAR(enc, ptr, end);
1039fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1040fb9a9224SMatthew Dillon     case BT_EXCL:
1041fb9a9224SMatthew Dillon       return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1042fb9a9224SMatthew Dillon     case BT_QUEST:
1043fb9a9224SMatthew Dillon       return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1044fb9a9224SMatthew Dillon     case BT_NMSTRT:
1045fb9a9224SMatthew Dillon     case BT_HEX:
1046fb9a9224SMatthew Dillon     case BT_NONASCII:
1047fb9a9224SMatthew Dillon     case BT_LEAD2:
1048fb9a9224SMatthew Dillon     case BT_LEAD3:
1049fb9a9224SMatthew Dillon     case BT_LEAD4:
1050fb9a9224SMatthew Dillon       *nextTokPtr = ptr - MINBPC(enc);
1051fb9a9224SMatthew Dillon       return XML_TOK_INSTANCE_START;
1052fb9a9224SMatthew Dillon     }
1053fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
1054fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
1055fb9a9224SMatthew Dillon   }
1056fb9a9224SMatthew Dillon   case BT_CR:
1057fb9a9224SMatthew Dillon     if (ptr + MINBPC(enc) == end) {
1058fb9a9224SMatthew Dillon       *nextTokPtr = end;
1059fb9a9224SMatthew Dillon       /* indicate that this might be part of a CR/LF pair */
1060fb9a9224SMatthew Dillon       return -XML_TOK_PROLOG_S;
1061fb9a9224SMatthew Dillon     }
1062fb9a9224SMatthew Dillon     /* fall through */
1063*0c65ac1dSAntonio Huete Jimenez   case BT_S:
1064*0c65ac1dSAntonio Huete Jimenez   case BT_LF:
1065fb9a9224SMatthew Dillon     for (;;) {
1066fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1067*0c65ac1dSAntonio Huete Jimenez       if (! HAS_CHAR(enc, ptr, end))
1068fb9a9224SMatthew Dillon         break;
1069fb9a9224SMatthew Dillon       switch (BYTE_TYPE(enc, ptr)) {
1070*0c65ac1dSAntonio Huete Jimenez       case BT_S:
1071*0c65ac1dSAntonio Huete Jimenez       case BT_LF:
1072fb9a9224SMatthew Dillon         break;
1073fb9a9224SMatthew Dillon       case BT_CR:
1074fb9a9224SMatthew Dillon         /* don't split CR/LF pair */
1075fb9a9224SMatthew Dillon         if (ptr + MINBPC(enc) != end)
1076fb9a9224SMatthew Dillon           break;
1077fb9a9224SMatthew Dillon         /* fall through */
1078fb9a9224SMatthew Dillon       default:
1079fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
1080fb9a9224SMatthew Dillon         return XML_TOK_PROLOG_S;
1081fb9a9224SMatthew Dillon       }
1082fb9a9224SMatthew Dillon     }
1083fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
1084fb9a9224SMatthew Dillon     return XML_TOK_PROLOG_S;
1085fb9a9224SMatthew Dillon   case BT_PERCNT:
1086fb9a9224SMatthew Dillon     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1087fb9a9224SMatthew Dillon   case BT_COMMA:
1088fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
1089fb9a9224SMatthew Dillon     return XML_TOK_COMMA;
1090fb9a9224SMatthew Dillon   case BT_LSQB:
1091fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
1092fb9a9224SMatthew Dillon     return XML_TOK_OPEN_BRACKET;
1093fb9a9224SMatthew Dillon   case BT_RSQB:
1094fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
1095*0c65ac1dSAntonio Huete Jimenez     if (! HAS_CHAR(enc, ptr, end))
1096fb9a9224SMatthew Dillon       return -XML_TOK_CLOSE_BRACKET;
1097fb9a9224SMatthew Dillon     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHARS(enc, ptr, end, 2);
1099fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1100fb9a9224SMatthew Dillon         *nextTokPtr = ptr + 2 * MINBPC(enc);
1101fb9a9224SMatthew Dillon         return XML_TOK_COND_SECT_CLOSE;
1102fb9a9224SMatthew Dillon       }
1103fb9a9224SMatthew Dillon     }
1104fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
1105fb9a9224SMatthew Dillon     return XML_TOK_CLOSE_BRACKET;
1106fb9a9224SMatthew Dillon   case BT_LPAR:
1107fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
1108fb9a9224SMatthew Dillon     return XML_TOK_OPEN_PAREN;
1109fb9a9224SMatthew Dillon   case BT_RPAR:
1110fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
1111*0c65ac1dSAntonio Huete Jimenez     if (! HAS_CHAR(enc, ptr, end))
1112fb9a9224SMatthew Dillon       return -XML_TOK_CLOSE_PAREN;
1113fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1114fb9a9224SMatthew Dillon     case BT_AST:
1115fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
1116fb9a9224SMatthew Dillon       return XML_TOK_CLOSE_PAREN_ASTERISK;
1117fb9a9224SMatthew Dillon     case BT_QUEST:
1118fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
1119fb9a9224SMatthew Dillon       return XML_TOK_CLOSE_PAREN_QUESTION;
1120fb9a9224SMatthew Dillon     case BT_PLUS:
1121fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
1122fb9a9224SMatthew Dillon       return XML_TOK_CLOSE_PAREN_PLUS;
1123*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
1124*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
1125*0c65ac1dSAntonio Huete Jimenez     case BT_S:
1126*0c65ac1dSAntonio Huete Jimenez     case BT_GT:
1127*0c65ac1dSAntonio Huete Jimenez     case BT_COMMA:
1128*0c65ac1dSAntonio Huete Jimenez     case BT_VERBAR:
1129fb9a9224SMatthew Dillon     case BT_RPAR:
1130fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1131fb9a9224SMatthew Dillon       return XML_TOK_CLOSE_PAREN;
1132fb9a9224SMatthew Dillon     }
1133fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
1134fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
1135fb9a9224SMatthew Dillon   case BT_VERBAR:
1136fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
1137fb9a9224SMatthew Dillon     return XML_TOK_OR;
1138fb9a9224SMatthew Dillon   case BT_GT:
1139fb9a9224SMatthew Dillon     *nextTokPtr = ptr + MINBPC(enc);
1140fb9a9224SMatthew Dillon     return XML_TOK_DECL_CLOSE;
1141fb9a9224SMatthew Dillon   case BT_NUM:
1142fb9a9224SMatthew Dillon     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1143fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
1144fb9a9224SMatthew Dillon   case BT_LEAD##n:                                                             \
1145fb9a9224SMatthew Dillon     if (end - ptr < n)                                                         \
1146fb9a9224SMatthew Dillon       return XML_TOK_PARTIAL_CHAR;                                             \
1147*0c65ac1dSAntonio Huete Jimenez     if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
1148*0c65ac1dSAntonio Huete Jimenez       *nextTokPtr = ptr;                                                       \
1149*0c65ac1dSAntonio Huete Jimenez       return XML_TOK_INVALID;                                                  \
1150*0c65ac1dSAntonio Huete Jimenez     }                                                                          \
1151fb9a9224SMatthew Dillon     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
1152fb9a9224SMatthew Dillon       ptr += n;                                                                \
1153fb9a9224SMatthew Dillon       tok = XML_TOK_NAME;                                                      \
1154fb9a9224SMatthew Dillon       break;                                                                   \
1155fb9a9224SMatthew Dillon     }                                                                          \
1156fb9a9224SMatthew Dillon     if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
1157fb9a9224SMatthew Dillon       ptr += n;                                                                \
1158fb9a9224SMatthew Dillon       tok = XML_TOK_NMTOKEN;                                                   \
1159fb9a9224SMatthew Dillon       break;                                                                   \
1160fb9a9224SMatthew Dillon     }                                                                          \
1161fb9a9224SMatthew Dillon     *nextTokPtr = ptr;                                                         \
1162fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
1163*0c65ac1dSAntonio Huete Jimenez     LEAD_CASE(2)
1164*0c65ac1dSAntonio Huete Jimenez     LEAD_CASE(3)
1165*0c65ac1dSAntonio Huete Jimenez     LEAD_CASE(4)
1166fb9a9224SMatthew Dillon #  undef LEAD_CASE
1167fb9a9224SMatthew Dillon   case BT_NMSTRT:
1168fb9a9224SMatthew Dillon   case BT_HEX:
1169fb9a9224SMatthew Dillon     tok = XML_TOK_NAME;
1170fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
1171fb9a9224SMatthew Dillon     break;
1172fb9a9224SMatthew Dillon   case BT_DIGIT:
1173fb9a9224SMatthew Dillon   case BT_NAME:
1174fb9a9224SMatthew Dillon   case BT_MINUS:
1175fb9a9224SMatthew Dillon #  ifdef XML_NS
1176fb9a9224SMatthew Dillon   case BT_COLON:
1177fb9a9224SMatthew Dillon #  endif
1178fb9a9224SMatthew Dillon     tok = XML_TOK_NMTOKEN;
1179fb9a9224SMatthew Dillon     ptr += MINBPC(enc);
1180fb9a9224SMatthew Dillon     break;
1181fb9a9224SMatthew Dillon   case BT_NONASCII:
1182fb9a9224SMatthew Dillon     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1183fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1184fb9a9224SMatthew Dillon       tok = XML_TOK_NAME;
1185fb9a9224SMatthew Dillon       break;
1186fb9a9224SMatthew Dillon     }
1187fb9a9224SMatthew Dillon     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1188fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1189fb9a9224SMatthew Dillon       tok = XML_TOK_NMTOKEN;
1190fb9a9224SMatthew Dillon       break;
1191fb9a9224SMatthew Dillon     }
1192fb9a9224SMatthew Dillon     /* fall through */
1193fb9a9224SMatthew Dillon   default:
1194fb9a9224SMatthew Dillon     *nextTokPtr = ptr;
1195fb9a9224SMatthew Dillon     return XML_TOK_INVALID;
1196fb9a9224SMatthew Dillon   }
1197*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
1198fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1199fb9a9224SMatthew Dillon       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1200*0c65ac1dSAntonio Huete Jimenez     case BT_GT:
1201*0c65ac1dSAntonio Huete Jimenez     case BT_RPAR:
1202*0c65ac1dSAntonio Huete Jimenez     case BT_COMMA:
1203*0c65ac1dSAntonio Huete Jimenez     case BT_VERBAR:
1204*0c65ac1dSAntonio Huete Jimenez     case BT_LSQB:
1205*0c65ac1dSAntonio Huete Jimenez     case BT_PERCNT:
1206*0c65ac1dSAntonio Huete Jimenez     case BT_S:
1207*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
1208*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
1209fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1210fb9a9224SMatthew Dillon       return tok;
1211fb9a9224SMatthew Dillon #  ifdef XML_NS
1212fb9a9224SMatthew Dillon     case BT_COLON:
1213fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1214fb9a9224SMatthew Dillon       switch (tok) {
1215fb9a9224SMatthew Dillon       case XML_TOK_NAME:
1216*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
1217fb9a9224SMatthew Dillon         tok = XML_TOK_PREFIXED_NAME;
1218fb9a9224SMatthew Dillon         switch (BYTE_TYPE(enc, ptr)) {
1219fb9a9224SMatthew Dillon           CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1220fb9a9224SMatthew Dillon         default:
1221fb9a9224SMatthew Dillon           tok = XML_TOK_NMTOKEN;
1222fb9a9224SMatthew Dillon           break;
1223fb9a9224SMatthew Dillon         }
1224fb9a9224SMatthew Dillon         break;
1225fb9a9224SMatthew Dillon       case XML_TOK_PREFIXED_NAME:
1226fb9a9224SMatthew Dillon         tok = XML_TOK_NMTOKEN;
1227fb9a9224SMatthew Dillon         break;
1228fb9a9224SMatthew Dillon       }
1229fb9a9224SMatthew Dillon       break;
1230fb9a9224SMatthew Dillon #  endif
1231fb9a9224SMatthew Dillon     case BT_PLUS:
1232fb9a9224SMatthew Dillon       if (tok == XML_TOK_NMTOKEN) {
1233fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
1234fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
1235fb9a9224SMatthew Dillon       }
1236fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
1237fb9a9224SMatthew Dillon       return XML_TOK_NAME_PLUS;
1238fb9a9224SMatthew Dillon     case BT_AST:
1239fb9a9224SMatthew Dillon       if (tok == XML_TOK_NMTOKEN) {
1240fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
1241fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
1242fb9a9224SMatthew Dillon       }
1243fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
1244fb9a9224SMatthew Dillon       return XML_TOK_NAME_ASTERISK;
1245fb9a9224SMatthew Dillon     case BT_QUEST:
1246fb9a9224SMatthew Dillon       if (tok == XML_TOK_NMTOKEN) {
1247fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
1248fb9a9224SMatthew Dillon         return XML_TOK_INVALID;
1249fb9a9224SMatthew Dillon       }
1250fb9a9224SMatthew Dillon       *nextTokPtr = ptr + MINBPC(enc);
1251fb9a9224SMatthew Dillon       return XML_TOK_NAME_QUESTION;
1252fb9a9224SMatthew Dillon     default:
1253fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1254fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
1255fb9a9224SMatthew Dillon     }
1256fb9a9224SMatthew Dillon   }
1257fb9a9224SMatthew Dillon   return -tok;
1258fb9a9224SMatthew Dillon }
1259fb9a9224SMatthew Dillon 
1260fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(attributeValueTok)1261*0c65ac1dSAntonio Huete Jimenez PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1262*0c65ac1dSAntonio Huete Jimenez                           const char **nextTokPtr) {
1263fb9a9224SMatthew Dillon   const char *start;
1264*0c65ac1dSAntonio Huete Jimenez   if (ptr >= end)
1265fb9a9224SMatthew Dillon     return XML_TOK_NONE;
1266*0c65ac1dSAntonio Huete Jimenez   else if (! HAS_CHAR(enc, ptr, end)) {
1267*0c65ac1dSAntonio Huete Jimenez     /* This line cannot be executed.  The incoming data has already
1268*0c65ac1dSAntonio Huete Jimenez      * been tokenized once, so incomplete characters like this have
1269*0c65ac1dSAntonio Huete Jimenez      * already been eliminated from the input.  Retaining the paranoia
1270*0c65ac1dSAntonio Huete Jimenez      * check is still valuable, however.
1271*0c65ac1dSAntonio Huete Jimenez      */
1272*0c65ac1dSAntonio Huete Jimenez     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1273*0c65ac1dSAntonio Huete Jimenez   }
1274fb9a9224SMatthew Dillon   start = ptr;
1275*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
1276fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1277fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
1278*0c65ac1dSAntonio Huete Jimenez   case BT_LEAD##n:                                                             \
1279*0c65ac1dSAntonio Huete Jimenez     ptr += n; /* NOTE: The encoding has already been validated. */             \
1280*0c65ac1dSAntonio Huete Jimenez     break;
1281*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(2)
1282*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(3)
1283*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(4)
1284fb9a9224SMatthew Dillon #  undef LEAD_CASE
1285fb9a9224SMatthew Dillon     case BT_AMP:
1286fb9a9224SMatthew Dillon       if (ptr == start)
1287fb9a9224SMatthew Dillon         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1288fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1289fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1290fb9a9224SMatthew Dillon     case BT_LT:
1291fb9a9224SMatthew Dillon       /* this is for inside entity references */
1292fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1293fb9a9224SMatthew Dillon       return XML_TOK_INVALID;
1294fb9a9224SMatthew Dillon     case BT_LF:
1295fb9a9224SMatthew Dillon       if (ptr == start) {
1296fb9a9224SMatthew Dillon         *nextTokPtr = ptr + MINBPC(enc);
1297fb9a9224SMatthew Dillon         return XML_TOK_DATA_NEWLINE;
1298fb9a9224SMatthew Dillon       }
1299fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1300fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1301fb9a9224SMatthew Dillon     case BT_CR:
1302fb9a9224SMatthew Dillon       if (ptr == start) {
1303fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
1304*0c65ac1dSAntonio Huete Jimenez         if (! HAS_CHAR(enc, ptr, end))
1305fb9a9224SMatthew Dillon           return XML_TOK_TRAILING_CR;
1306fb9a9224SMatthew Dillon         if (BYTE_TYPE(enc, ptr) == BT_LF)
1307fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
1308fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
1309fb9a9224SMatthew Dillon         return XML_TOK_DATA_NEWLINE;
1310fb9a9224SMatthew Dillon       }
1311fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1312fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1313fb9a9224SMatthew Dillon     case BT_S:
1314fb9a9224SMatthew Dillon       if (ptr == start) {
1315fb9a9224SMatthew Dillon         *nextTokPtr = ptr + MINBPC(enc);
1316fb9a9224SMatthew Dillon         return XML_TOK_ATTRIBUTE_VALUE_S;
1317fb9a9224SMatthew Dillon       }
1318fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1319fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1320fb9a9224SMatthew Dillon     default:
1321fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1322fb9a9224SMatthew Dillon       break;
1323fb9a9224SMatthew Dillon     }
1324fb9a9224SMatthew Dillon   }
1325fb9a9224SMatthew Dillon   *nextTokPtr = ptr;
1326fb9a9224SMatthew Dillon   return XML_TOK_DATA_CHARS;
1327fb9a9224SMatthew Dillon }
1328fb9a9224SMatthew Dillon 
1329fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(entityValueTok)1330*0c65ac1dSAntonio Huete Jimenez PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1331*0c65ac1dSAntonio Huete Jimenez                        const char **nextTokPtr) {
1332fb9a9224SMatthew Dillon   const char *start;
1333*0c65ac1dSAntonio Huete Jimenez   if (ptr >= end)
1334fb9a9224SMatthew Dillon     return XML_TOK_NONE;
1335*0c65ac1dSAntonio Huete Jimenez   else if (! HAS_CHAR(enc, ptr, end)) {
1336*0c65ac1dSAntonio Huete Jimenez     /* This line cannot be executed.  The incoming data has already
1337*0c65ac1dSAntonio Huete Jimenez      * been tokenized once, so incomplete characters like this have
1338*0c65ac1dSAntonio Huete Jimenez      * already been eliminated from the input.  Retaining the paranoia
1339*0c65ac1dSAntonio Huete Jimenez      * check is still valuable, however.
1340*0c65ac1dSAntonio Huete Jimenez      */
1341*0c65ac1dSAntonio Huete Jimenez     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1342*0c65ac1dSAntonio Huete Jimenez   }
1343fb9a9224SMatthew Dillon   start = ptr;
1344*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
1345fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1346fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
1347*0c65ac1dSAntonio Huete Jimenez   case BT_LEAD##n:                                                             \
1348*0c65ac1dSAntonio Huete Jimenez     ptr += n; /* NOTE: The encoding has already been validated. */             \
1349*0c65ac1dSAntonio Huete Jimenez     break;
1350*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(2)
1351*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(3)
1352*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(4)
1353fb9a9224SMatthew Dillon #  undef LEAD_CASE
1354fb9a9224SMatthew Dillon     case BT_AMP:
1355fb9a9224SMatthew Dillon       if (ptr == start)
1356fb9a9224SMatthew Dillon         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1357fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1358fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1359fb9a9224SMatthew Dillon     case BT_PERCNT:
1360fb9a9224SMatthew Dillon       if (ptr == start) {
1361*0c65ac1dSAntonio Huete Jimenez         int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1362fb9a9224SMatthew Dillon         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1363fb9a9224SMatthew Dillon       }
1364fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1365fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1366fb9a9224SMatthew Dillon     case BT_LF:
1367fb9a9224SMatthew Dillon       if (ptr == start) {
1368fb9a9224SMatthew Dillon         *nextTokPtr = ptr + MINBPC(enc);
1369fb9a9224SMatthew Dillon         return XML_TOK_DATA_NEWLINE;
1370fb9a9224SMatthew Dillon       }
1371fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1372fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1373fb9a9224SMatthew Dillon     case BT_CR:
1374fb9a9224SMatthew Dillon       if (ptr == start) {
1375fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
1376*0c65ac1dSAntonio Huete Jimenez         if (! HAS_CHAR(enc, ptr, end))
1377fb9a9224SMatthew Dillon           return XML_TOK_TRAILING_CR;
1378fb9a9224SMatthew Dillon         if (BYTE_TYPE(enc, ptr) == BT_LF)
1379fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
1380fb9a9224SMatthew Dillon         *nextTokPtr = ptr;
1381fb9a9224SMatthew Dillon         return XML_TOK_DATA_NEWLINE;
1382fb9a9224SMatthew Dillon       }
1383fb9a9224SMatthew Dillon       *nextTokPtr = ptr;
1384fb9a9224SMatthew Dillon       return XML_TOK_DATA_CHARS;
1385fb9a9224SMatthew Dillon     default:
1386fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1387fb9a9224SMatthew Dillon       break;
1388fb9a9224SMatthew Dillon     }
1389fb9a9224SMatthew Dillon   }
1390fb9a9224SMatthew Dillon   *nextTokPtr = ptr;
1391fb9a9224SMatthew Dillon   return XML_TOK_DATA_CHARS;
1392fb9a9224SMatthew Dillon }
1393fb9a9224SMatthew Dillon 
1394fb9a9224SMatthew Dillon #  ifdef XML_DTD
1395fb9a9224SMatthew Dillon 
1396fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(ignoreSectionTok)1397*0c65ac1dSAntonio Huete Jimenez PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1398*0c65ac1dSAntonio Huete Jimenez                          const char **nextTokPtr) {
1399fb9a9224SMatthew Dillon   int level = 0;
1400fb9a9224SMatthew Dillon   if (MINBPC(enc) > 1) {
1401fb9a9224SMatthew Dillon     size_t n = end - ptr;
1402fb9a9224SMatthew Dillon     if (n & (MINBPC(enc) - 1)) {
1403fb9a9224SMatthew Dillon       n &= ~(MINBPC(enc) - 1);
1404fb9a9224SMatthew Dillon       end = ptr + n;
1405fb9a9224SMatthew Dillon     }
1406fb9a9224SMatthew Dillon   }
1407*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
1408fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1409fb9a9224SMatthew Dillon       INVALID_CASES(ptr, nextTokPtr)
1410fb9a9224SMatthew Dillon     case BT_LT:
1411*0c65ac1dSAntonio Huete Jimenez       ptr += MINBPC(enc);
1412*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHAR(enc, ptr, end);
1413fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1414*0c65ac1dSAntonio Huete Jimenez         ptr += MINBPC(enc);
1415*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
1416fb9a9224SMatthew Dillon         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1417fb9a9224SMatthew Dillon           ++level;
1418fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
1419fb9a9224SMatthew Dillon         }
1420fb9a9224SMatthew Dillon       }
1421fb9a9224SMatthew Dillon       break;
1422fb9a9224SMatthew Dillon     case BT_RSQB:
1423*0c65ac1dSAntonio Huete Jimenez       ptr += MINBPC(enc);
1424*0c65ac1dSAntonio Huete Jimenez       REQUIRE_CHAR(enc, ptr, end);
1425fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1426*0c65ac1dSAntonio Huete Jimenez         ptr += MINBPC(enc);
1427*0c65ac1dSAntonio Huete Jimenez         REQUIRE_CHAR(enc, ptr, end);
1428fb9a9224SMatthew Dillon         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1429fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
1430fb9a9224SMatthew Dillon           if (level == 0) {
1431fb9a9224SMatthew Dillon             *nextTokPtr = ptr;
1432fb9a9224SMatthew Dillon             return XML_TOK_IGNORE_SECT;
1433fb9a9224SMatthew Dillon           }
1434fb9a9224SMatthew Dillon           --level;
1435fb9a9224SMatthew Dillon         }
1436fb9a9224SMatthew Dillon       }
1437fb9a9224SMatthew Dillon       break;
1438fb9a9224SMatthew Dillon     default:
1439fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1440fb9a9224SMatthew Dillon       break;
1441fb9a9224SMatthew Dillon     }
1442fb9a9224SMatthew Dillon   }
1443fb9a9224SMatthew Dillon   return XML_TOK_PARTIAL;
1444fb9a9224SMatthew Dillon }
1445fb9a9224SMatthew Dillon 
1446fb9a9224SMatthew Dillon #  endif /* XML_DTD */
1447fb9a9224SMatthew Dillon 
1448fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(isPublicId)1449fb9a9224SMatthew Dillon PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1450*0c65ac1dSAntonio Huete Jimenez                    const char **badPtr) {
1451fb9a9224SMatthew Dillon   ptr += MINBPC(enc);
1452fb9a9224SMatthew Dillon   end -= MINBPC(enc);
1453*0c65ac1dSAntonio Huete Jimenez   for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1454fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1455fb9a9224SMatthew Dillon     case BT_DIGIT:
1456fb9a9224SMatthew Dillon     case BT_HEX:
1457fb9a9224SMatthew Dillon     case BT_MINUS:
1458fb9a9224SMatthew Dillon     case BT_APOS:
1459fb9a9224SMatthew Dillon     case BT_LPAR:
1460fb9a9224SMatthew Dillon     case BT_RPAR:
1461fb9a9224SMatthew Dillon     case BT_PLUS:
1462fb9a9224SMatthew Dillon     case BT_COMMA:
1463fb9a9224SMatthew Dillon     case BT_SOL:
1464fb9a9224SMatthew Dillon     case BT_EQUALS:
1465fb9a9224SMatthew Dillon     case BT_QUEST:
1466fb9a9224SMatthew Dillon     case BT_CR:
1467fb9a9224SMatthew Dillon     case BT_LF:
1468fb9a9224SMatthew Dillon     case BT_SEMI:
1469fb9a9224SMatthew Dillon     case BT_EXCL:
1470fb9a9224SMatthew Dillon     case BT_AST:
1471fb9a9224SMatthew Dillon     case BT_PERCNT:
1472fb9a9224SMatthew Dillon     case BT_NUM:
1473fb9a9224SMatthew Dillon #  ifdef XML_NS
1474fb9a9224SMatthew Dillon     case BT_COLON:
1475fb9a9224SMatthew Dillon #  endif
1476fb9a9224SMatthew Dillon       break;
1477fb9a9224SMatthew Dillon     case BT_S:
1478fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1479fb9a9224SMatthew Dillon         *badPtr = ptr;
1480fb9a9224SMatthew Dillon         return 0;
1481fb9a9224SMatthew Dillon       }
1482fb9a9224SMatthew Dillon       break;
1483fb9a9224SMatthew Dillon     case BT_NAME:
1484fb9a9224SMatthew Dillon     case BT_NMSTRT:
1485fb9a9224SMatthew Dillon       if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1486fb9a9224SMatthew Dillon         break;
1487*0c65ac1dSAntonio Huete Jimenez       /* fall through */
1488fb9a9224SMatthew Dillon     default:
1489fb9a9224SMatthew Dillon       switch (BYTE_TO_ASCII(enc, ptr)) {
1490fb9a9224SMatthew Dillon       case 0x24: /* $ */
1491fb9a9224SMatthew Dillon       case 0x40: /* @ */
1492fb9a9224SMatthew Dillon         break;
1493fb9a9224SMatthew Dillon       default:
1494fb9a9224SMatthew Dillon         *badPtr = ptr;
1495fb9a9224SMatthew Dillon         return 0;
1496fb9a9224SMatthew Dillon       }
1497fb9a9224SMatthew Dillon       break;
1498fb9a9224SMatthew Dillon     }
1499fb9a9224SMatthew Dillon   }
1500fb9a9224SMatthew Dillon   return 1;
1501fb9a9224SMatthew Dillon }
1502fb9a9224SMatthew Dillon 
1503fb9a9224SMatthew Dillon /* This must only be called for a well-formed start-tag or empty
1504fb9a9224SMatthew Dillon    element tag.  Returns the number of attributes.  Pointers to the
1505fb9a9224SMatthew Dillon    first attsMax attributes are stored in atts.
1506fb9a9224SMatthew Dillon */
1507fb9a9224SMatthew Dillon 
1508fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(getAtts)1509*0c65ac1dSAntonio Huete Jimenez PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
1510*0c65ac1dSAntonio Huete Jimenez                 ATTRIBUTE *atts) {
1511fb9a9224SMatthew Dillon   enum { other, inName, inValue } state = inName;
1512fb9a9224SMatthew Dillon   int nAtts = 0;
1513fb9a9224SMatthew Dillon   int open = 0; /* defined when state == inValue;
1514fb9a9224SMatthew Dillon                    initialization just to shut up compilers */
1515fb9a9224SMatthew Dillon 
1516fb9a9224SMatthew Dillon   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1517fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1518fb9a9224SMatthew Dillon #  define START_NAME                                                           \
1519fb9a9224SMatthew Dillon     if (state == other) {                                                      \
1520fb9a9224SMatthew Dillon       if (nAtts < attsMax) {                                                   \
1521fb9a9224SMatthew Dillon         atts[nAtts].name = ptr;                                                \
1522fb9a9224SMatthew Dillon         atts[nAtts].normalized = 1;                                            \
1523fb9a9224SMatthew Dillon       }                                                                        \
1524fb9a9224SMatthew Dillon       state = inName;                                                          \
1525fb9a9224SMatthew Dillon     }
1526fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
1527*0c65ac1dSAntonio Huete Jimenez   case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
1528*0c65ac1dSAntonio Huete Jimenez     START_NAME ptr += (n - MINBPC(enc));                                       \
1529*0c65ac1dSAntonio Huete Jimenez     break;
1530*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(2)
1531*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(3)
1532*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(4)
1533fb9a9224SMatthew Dillon #  undef LEAD_CASE
1534fb9a9224SMatthew Dillon     case BT_NONASCII:
1535fb9a9224SMatthew Dillon     case BT_NMSTRT:
1536fb9a9224SMatthew Dillon     case BT_HEX:
1537fb9a9224SMatthew Dillon       START_NAME
1538fb9a9224SMatthew Dillon       break;
1539fb9a9224SMatthew Dillon #  undef START_NAME
1540fb9a9224SMatthew Dillon     case BT_QUOT:
1541fb9a9224SMatthew Dillon       if (state != inValue) {
1542fb9a9224SMatthew Dillon         if (nAtts < attsMax)
1543fb9a9224SMatthew Dillon           atts[nAtts].valuePtr = ptr + MINBPC(enc);
1544fb9a9224SMatthew Dillon         state = inValue;
1545fb9a9224SMatthew Dillon         open = BT_QUOT;
1546*0c65ac1dSAntonio Huete Jimenez       } else if (open == BT_QUOT) {
1547fb9a9224SMatthew Dillon         state = other;
1548fb9a9224SMatthew Dillon         if (nAtts < attsMax)
1549fb9a9224SMatthew Dillon           atts[nAtts].valueEnd = ptr;
1550fb9a9224SMatthew Dillon         nAtts++;
1551fb9a9224SMatthew Dillon       }
1552fb9a9224SMatthew Dillon       break;
1553fb9a9224SMatthew Dillon     case BT_APOS:
1554fb9a9224SMatthew Dillon       if (state != inValue) {
1555fb9a9224SMatthew Dillon         if (nAtts < attsMax)
1556fb9a9224SMatthew Dillon           atts[nAtts].valuePtr = ptr + MINBPC(enc);
1557fb9a9224SMatthew Dillon         state = inValue;
1558fb9a9224SMatthew Dillon         open = BT_APOS;
1559*0c65ac1dSAntonio Huete Jimenez       } else if (open == BT_APOS) {
1560fb9a9224SMatthew Dillon         state = other;
1561fb9a9224SMatthew Dillon         if (nAtts < attsMax)
1562fb9a9224SMatthew Dillon           atts[nAtts].valueEnd = ptr;
1563fb9a9224SMatthew Dillon         nAtts++;
1564fb9a9224SMatthew Dillon       }
1565fb9a9224SMatthew Dillon       break;
1566fb9a9224SMatthew Dillon     case BT_AMP:
1567fb9a9224SMatthew Dillon       if (nAtts < attsMax)
1568fb9a9224SMatthew Dillon         atts[nAtts].normalized = 0;
1569fb9a9224SMatthew Dillon       break;
1570fb9a9224SMatthew Dillon     case BT_S:
1571fb9a9224SMatthew Dillon       if (state == inName)
1572fb9a9224SMatthew Dillon         state = other;
1573*0c65ac1dSAntonio Huete Jimenez       else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
1574fb9a9224SMatthew Dillon                && (ptr == atts[nAtts].valuePtr
1575fb9a9224SMatthew Dillon                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1576fb9a9224SMatthew Dillon                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1577fb9a9224SMatthew Dillon                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1578fb9a9224SMatthew Dillon         atts[nAtts].normalized = 0;
1579fb9a9224SMatthew Dillon       break;
1580*0c65ac1dSAntonio Huete Jimenez     case BT_CR:
1581*0c65ac1dSAntonio Huete Jimenez     case BT_LF:
1582fb9a9224SMatthew Dillon       /* This case ensures that the first attribute name is counted
1583fb9a9224SMatthew Dillon          Apart from that we could just change state on the quote. */
1584fb9a9224SMatthew Dillon       if (state == inName)
1585fb9a9224SMatthew Dillon         state = other;
1586fb9a9224SMatthew Dillon       else if (state == inValue && nAtts < attsMax)
1587fb9a9224SMatthew Dillon         atts[nAtts].normalized = 0;
1588fb9a9224SMatthew Dillon       break;
1589fb9a9224SMatthew Dillon     case BT_GT:
1590fb9a9224SMatthew Dillon     case BT_SOL:
1591fb9a9224SMatthew Dillon       if (state != inValue)
1592fb9a9224SMatthew Dillon         return nAtts;
1593fb9a9224SMatthew Dillon       break;
1594fb9a9224SMatthew Dillon     default:
1595fb9a9224SMatthew Dillon       break;
1596fb9a9224SMatthew Dillon     }
1597fb9a9224SMatthew Dillon   }
1598fb9a9224SMatthew Dillon   /* not reached */
1599fb9a9224SMatthew Dillon }
1600fb9a9224SMatthew Dillon 
1601fb9a9224SMatthew Dillon static int PTRFASTCALL
PREFIX(charRefNumber)1602*0c65ac1dSAntonio Huete Jimenez PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
1603fb9a9224SMatthew Dillon   int result = 0;
1604fb9a9224SMatthew Dillon   /* skip &# */
1605*0c65ac1dSAntonio Huete Jimenez   UNUSED_P(enc);
1606fb9a9224SMatthew Dillon   ptr += 2 * MINBPC(enc);
1607fb9a9224SMatthew Dillon   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1608*0c65ac1dSAntonio Huete Jimenez     for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1609fb9a9224SMatthew Dillon          ptr += MINBPC(enc)) {
1610fb9a9224SMatthew Dillon       int c = BYTE_TO_ASCII(enc, ptr);
1611fb9a9224SMatthew Dillon       switch (c) {
1612*0c65ac1dSAntonio Huete Jimenez       case ASCII_0:
1613*0c65ac1dSAntonio Huete Jimenez       case ASCII_1:
1614*0c65ac1dSAntonio Huete Jimenez       case ASCII_2:
1615*0c65ac1dSAntonio Huete Jimenez       case ASCII_3:
1616*0c65ac1dSAntonio Huete Jimenez       case ASCII_4:
1617*0c65ac1dSAntonio Huete Jimenez       case ASCII_5:
1618*0c65ac1dSAntonio Huete Jimenez       case ASCII_6:
1619*0c65ac1dSAntonio Huete Jimenez       case ASCII_7:
1620*0c65ac1dSAntonio Huete Jimenez       case ASCII_8:
1621*0c65ac1dSAntonio Huete Jimenez       case ASCII_9:
1622fb9a9224SMatthew Dillon         result <<= 4;
1623fb9a9224SMatthew Dillon         result |= (c - ASCII_0);
1624fb9a9224SMatthew Dillon         break;
1625*0c65ac1dSAntonio Huete Jimenez       case ASCII_A:
1626*0c65ac1dSAntonio Huete Jimenez       case ASCII_B:
1627*0c65ac1dSAntonio Huete Jimenez       case ASCII_C:
1628*0c65ac1dSAntonio Huete Jimenez       case ASCII_D:
1629*0c65ac1dSAntonio Huete Jimenez       case ASCII_E:
1630*0c65ac1dSAntonio Huete Jimenez       case ASCII_F:
1631fb9a9224SMatthew Dillon         result <<= 4;
1632fb9a9224SMatthew Dillon         result += 10 + (c - ASCII_A);
1633fb9a9224SMatthew Dillon         break;
1634*0c65ac1dSAntonio Huete Jimenez       case ASCII_a:
1635*0c65ac1dSAntonio Huete Jimenez       case ASCII_b:
1636*0c65ac1dSAntonio Huete Jimenez       case ASCII_c:
1637*0c65ac1dSAntonio Huete Jimenez       case ASCII_d:
1638*0c65ac1dSAntonio Huete Jimenez       case ASCII_e:
1639*0c65ac1dSAntonio Huete Jimenez       case ASCII_f:
1640fb9a9224SMatthew Dillon         result <<= 4;
1641fb9a9224SMatthew Dillon         result += 10 + (c - ASCII_a);
1642fb9a9224SMatthew Dillon         break;
1643fb9a9224SMatthew Dillon       }
1644fb9a9224SMatthew Dillon       if (result >= 0x110000)
1645fb9a9224SMatthew Dillon         return -1;
1646fb9a9224SMatthew Dillon     }
1647*0c65ac1dSAntonio Huete Jimenez   } else {
1648fb9a9224SMatthew Dillon     for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1649fb9a9224SMatthew Dillon       int c = BYTE_TO_ASCII(enc, ptr);
1650fb9a9224SMatthew Dillon       result *= 10;
1651fb9a9224SMatthew Dillon       result += (c - ASCII_0);
1652fb9a9224SMatthew Dillon       if (result >= 0x110000)
1653fb9a9224SMatthew Dillon         return -1;
1654fb9a9224SMatthew Dillon     }
1655fb9a9224SMatthew Dillon   }
1656fb9a9224SMatthew Dillon   return checkCharRefNumber(result);
1657fb9a9224SMatthew Dillon }
1658fb9a9224SMatthew Dillon 
1659fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(predefinedEntityName)1660fb9a9224SMatthew Dillon PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1661*0c65ac1dSAntonio Huete Jimenez                              const char *end) {
1662*0c65ac1dSAntonio Huete Jimenez   UNUSED_P(enc);
1663fb9a9224SMatthew Dillon   switch ((end - ptr) / MINBPC(enc)) {
1664fb9a9224SMatthew Dillon   case 2:
1665fb9a9224SMatthew Dillon     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1666fb9a9224SMatthew Dillon       switch (BYTE_TO_ASCII(enc, ptr)) {
1667fb9a9224SMatthew Dillon       case ASCII_l:
1668fb9a9224SMatthew Dillon         return ASCII_LT;
1669fb9a9224SMatthew Dillon       case ASCII_g:
1670fb9a9224SMatthew Dillon         return ASCII_GT;
1671fb9a9224SMatthew Dillon       }
1672fb9a9224SMatthew Dillon     }
1673fb9a9224SMatthew Dillon     break;
1674fb9a9224SMatthew Dillon   case 3:
1675fb9a9224SMatthew Dillon     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1676fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1677fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1678fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
1679fb9a9224SMatthew Dillon         if (CHAR_MATCHES(enc, ptr, ASCII_p))
1680fb9a9224SMatthew Dillon           return ASCII_AMP;
1681fb9a9224SMatthew Dillon       }
1682fb9a9224SMatthew Dillon     }
1683fb9a9224SMatthew Dillon     break;
1684fb9a9224SMatthew Dillon   case 4:
1685fb9a9224SMatthew Dillon     switch (BYTE_TO_ASCII(enc, ptr)) {
1686fb9a9224SMatthew Dillon     case ASCII_q:
1687fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1688fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1689fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
1690fb9a9224SMatthew Dillon         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1691fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
1692fb9a9224SMatthew Dillon           if (CHAR_MATCHES(enc, ptr, ASCII_t))
1693fb9a9224SMatthew Dillon             return ASCII_QUOT;
1694fb9a9224SMatthew Dillon         }
1695fb9a9224SMatthew Dillon       }
1696fb9a9224SMatthew Dillon       break;
1697fb9a9224SMatthew Dillon     case ASCII_a:
1698fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1699fb9a9224SMatthew Dillon       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1700fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
1701fb9a9224SMatthew Dillon         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1702fb9a9224SMatthew Dillon           ptr += MINBPC(enc);
1703fb9a9224SMatthew Dillon           if (CHAR_MATCHES(enc, ptr, ASCII_s))
1704fb9a9224SMatthew Dillon             return ASCII_APOS;
1705fb9a9224SMatthew Dillon         }
1706fb9a9224SMatthew Dillon       }
1707fb9a9224SMatthew Dillon       break;
1708fb9a9224SMatthew Dillon     }
1709fb9a9224SMatthew Dillon   }
1710fb9a9224SMatthew Dillon   return 0;
1711fb9a9224SMatthew Dillon }
1712fb9a9224SMatthew Dillon 
1713fb9a9224SMatthew Dillon static int PTRCALL
PREFIX(nameMatchesAscii)1714fb9a9224SMatthew Dillon PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1715*0c65ac1dSAntonio Huete Jimenez                          const char *end1, const char *ptr2) {
1716*0c65ac1dSAntonio Huete Jimenez   UNUSED_P(enc);
1717fb9a9224SMatthew Dillon   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1718*0c65ac1dSAntonio Huete Jimenez     if (end1 - ptr1 < MINBPC(enc)) {
1719*0c65ac1dSAntonio Huete Jimenez       /* This line cannot be executed.  The incoming data has already
1720*0c65ac1dSAntonio Huete Jimenez        * been tokenized once, so incomplete characters like this have
1721*0c65ac1dSAntonio Huete Jimenez        * already been eliminated from the input.  Retaining the
1722*0c65ac1dSAntonio Huete Jimenez        * paranoia check is still valuable, however.
1723*0c65ac1dSAntonio Huete Jimenez        */
1724*0c65ac1dSAntonio Huete Jimenez       return 0; /* LCOV_EXCL_LINE */
1725*0c65ac1dSAntonio Huete Jimenez     }
1726fb9a9224SMatthew Dillon     if (! CHAR_MATCHES(enc, ptr1, *ptr2))
1727fb9a9224SMatthew Dillon       return 0;
1728fb9a9224SMatthew Dillon   }
1729fb9a9224SMatthew Dillon   return ptr1 == end1;
1730fb9a9224SMatthew Dillon }
1731fb9a9224SMatthew Dillon 
1732fb9a9224SMatthew Dillon static int PTRFASTCALL
PREFIX(nameLength)1733*0c65ac1dSAntonio Huete Jimenez PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
1734fb9a9224SMatthew Dillon   const char *start = ptr;
1735fb9a9224SMatthew Dillon   for (;;) {
1736fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1737fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
1738*0c65ac1dSAntonio Huete Jimenez   case BT_LEAD##n:                                                             \
1739*0c65ac1dSAntonio Huete Jimenez     ptr += n; /* NOTE: The encoding has already been validated. */             \
1740*0c65ac1dSAntonio Huete Jimenez     break;
1741*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(2)
1742*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(3)
1743*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(4)
1744fb9a9224SMatthew Dillon #  undef LEAD_CASE
1745fb9a9224SMatthew Dillon     case BT_NONASCII:
1746fb9a9224SMatthew Dillon     case BT_NMSTRT:
1747fb9a9224SMatthew Dillon #  ifdef XML_NS
1748fb9a9224SMatthew Dillon     case BT_COLON:
1749fb9a9224SMatthew Dillon #  endif
1750fb9a9224SMatthew Dillon     case BT_HEX:
1751fb9a9224SMatthew Dillon     case BT_DIGIT:
1752fb9a9224SMatthew Dillon     case BT_NAME:
1753fb9a9224SMatthew Dillon     case BT_MINUS:
1754fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1755fb9a9224SMatthew Dillon       break;
1756fb9a9224SMatthew Dillon     default:
1757fb9a9224SMatthew Dillon       return (int)(ptr - start);
1758fb9a9224SMatthew Dillon     }
1759fb9a9224SMatthew Dillon   }
1760fb9a9224SMatthew Dillon }
1761fb9a9224SMatthew Dillon 
1762fb9a9224SMatthew Dillon static const char *PTRFASTCALL
PREFIX(skipS)1763*0c65ac1dSAntonio Huete Jimenez PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
1764fb9a9224SMatthew Dillon   for (;;) {
1765fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1766fb9a9224SMatthew Dillon     case BT_LF:
1767fb9a9224SMatthew Dillon     case BT_CR:
1768fb9a9224SMatthew Dillon     case BT_S:
1769fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1770fb9a9224SMatthew Dillon       break;
1771fb9a9224SMatthew Dillon     default:
1772fb9a9224SMatthew Dillon       return ptr;
1773fb9a9224SMatthew Dillon     }
1774fb9a9224SMatthew Dillon   }
1775fb9a9224SMatthew Dillon }
1776fb9a9224SMatthew Dillon 
1777fb9a9224SMatthew Dillon static void PTRCALL
PREFIX(updatePosition)1778*0c65ac1dSAntonio Huete Jimenez PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
1779*0c65ac1dSAntonio Huete Jimenez                        POSITION *pos) {
1780*0c65ac1dSAntonio Huete Jimenez   while (HAS_CHAR(enc, ptr, end)) {
1781fb9a9224SMatthew Dillon     switch (BYTE_TYPE(enc, ptr)) {
1782fb9a9224SMatthew Dillon #  define LEAD_CASE(n)                                                         \
1783fb9a9224SMatthew Dillon   case BT_LEAD##n:                                                             \
1784*0c65ac1dSAntonio Huete Jimenez     ptr += n; /* NOTE: The encoding has already been validated. */             \
1785*0c65ac1dSAntonio Huete Jimenez     pos->columnNumber++;                                                       \
1786fb9a9224SMatthew Dillon     break;
1787*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(2)
1788*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(3)
1789*0c65ac1dSAntonio Huete Jimenez       LEAD_CASE(4)
1790fb9a9224SMatthew Dillon #  undef LEAD_CASE
1791fb9a9224SMatthew Dillon     case BT_LF:
1792*0c65ac1dSAntonio Huete Jimenez       pos->columnNumber = 0;
1793fb9a9224SMatthew Dillon       pos->lineNumber++;
1794fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1795fb9a9224SMatthew Dillon       break;
1796fb9a9224SMatthew Dillon     case BT_CR:
1797fb9a9224SMatthew Dillon       pos->lineNumber++;
1798fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1799*0c65ac1dSAntonio Huete Jimenez       if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1800fb9a9224SMatthew Dillon         ptr += MINBPC(enc);
1801*0c65ac1dSAntonio Huete Jimenez       pos->columnNumber = 0;
1802fb9a9224SMatthew Dillon       break;
1803fb9a9224SMatthew Dillon     default:
1804fb9a9224SMatthew Dillon       ptr += MINBPC(enc);
1805*0c65ac1dSAntonio Huete Jimenez       pos->columnNumber++;
1806fb9a9224SMatthew Dillon       break;
1807fb9a9224SMatthew Dillon     }
1808fb9a9224SMatthew Dillon   }
1809fb9a9224SMatthew Dillon }
1810fb9a9224SMatthew Dillon 
1811fb9a9224SMatthew Dillon #  undef DO_LEAD_CASE
1812fb9a9224SMatthew Dillon #  undef MULTIBYTE_CASES
1813fb9a9224SMatthew Dillon #  undef INVALID_CASES
1814fb9a9224SMatthew Dillon #  undef CHECK_NAME_CASE
1815fb9a9224SMatthew Dillon #  undef CHECK_NAME_CASES
1816fb9a9224SMatthew Dillon #  undef CHECK_NMSTRT_CASE
1817fb9a9224SMatthew Dillon #  undef CHECK_NMSTRT_CASES
1818fb9a9224SMatthew Dillon 
1819fb9a9224SMatthew Dillon #endif /* XML_TOK_IMPL_C */
1820