1 /*
2 The contents of this file are subject to the Mozilla Public License
3 Version 1.0 (the "License"); you may not use this file except in
4 compliance with the License. You may obtain a copy of the License at
5 http://www.mozilla.org/MPL/
6 
7 Software distributed under the License is distributed on an "AS IS"
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9 License for the specific language governing rights and limitations
10 under the License.
11 
12 The Original Code is expat.
13 
14 The Initial Developer of the Original Code is James Clark.
15 Portions created by James Clark are Copyright (C) 1998
16 James Clark. All Rights Reserved.
17 
18 Contributor(s):
19 $Id: xmltok.h,v 1.2 2001/09/15 10:14:27 torcs Exp $
20 */
21 
22 #ifndef XmlTok_INCLUDED
23 #define XmlTok_INCLUDED 1
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 #ifndef XMLTOKAPI
30 #define XMLTOKAPI /* as nothing */
31 #endif
32 
33 /* The following token may be returned by XmlContentTok */
34 #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of
35                                     illegal ]]> sequence */
36 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
37 #define XML_TOK_NONE -4    /* The string to be scanned is empty */
38 #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
39                                   might be part of CRLF sequence */
40 #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
41 #define XML_TOK_PARTIAL -1 /* only part of a token */
42 #define XML_TOK_INVALID 0
43 
44 /* The following tokens are returned by XmlContentTok; some are also
45   returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */
46 
47 #define XML_TOK_START_TAG_WITH_ATTS 1
48 #define XML_TOK_START_TAG_NO_ATTS 2
49 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
50 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
51 #define XML_TOK_END_TAG 5
52 #define XML_TOK_DATA_CHARS 6
53 #define XML_TOK_DATA_NEWLINE 7
54 #define XML_TOK_CDATA_SECT_OPEN 8
55 #define XML_TOK_ENTITY_REF 9
56 #define XML_TOK_CHAR_REF 10     /* numeric character reference */
57 
58 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
59 #define XML_TOK_PI 11      /* processing instruction */
60 #define XML_TOK_XML_DECL 12 /* XML decl or text decl */
61 #define XML_TOK_COMMENT 13
62 #define XML_TOK_BOM 14     /* Byte order mark */
63 
64 /* The following tokens are returned only by XmlPrologTok */
65 #define XML_TOK_PROLOG_S 15
66 #define XML_TOK_DECL_OPEN 16 /* <!foo */
67 #define XML_TOK_DECL_CLOSE 17 /* > */
68 #define XML_TOK_NAME 18
69 #define XML_TOK_NMTOKEN 19
70 #define XML_TOK_POUND_NAME 20 /* #name */
71 #define XML_TOK_OR 21 /* | */
72 #define XML_TOK_PERCENT 22
73 #define XML_TOK_OPEN_PAREN 23
74 #define XML_TOK_CLOSE_PAREN 24
75 #define XML_TOK_OPEN_BRACKET 25
76 #define XML_TOK_CLOSE_BRACKET 26
77 #define XML_TOK_LITERAL 27
78 #define XML_TOK_PARAM_ENTITY_REF 28
79 #define XML_TOK_INSTANCE_START 29
80 
81 /* The following occur only in element type declarations */
82 #define XML_TOK_NAME_QUESTION 30 /* name? */
83 #define XML_TOK_NAME_ASTERISK 31 /* name* */
84 #define XML_TOK_NAME_PLUS 32 /* name+ */
85 #define XML_TOK_COND_SECT_OPEN 33 /* <![ */
86 #define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
87 #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
88 #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
89 #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
90 #define XML_TOK_COMMA 38
91 
92 /* The following token is returned only by XmlAttributeValueTok */
93 #define XML_TOK_ATTRIBUTE_VALUE_S 39
94 
95 /* The following token is returned only by XmlCdataSectionTok */
96 #define XML_TOK_CDATA_SECT_CLOSE 40
97 
98 #define XML_N_STATES 3
99 #define XML_PROLOG_STATE 0
100 #define XML_CONTENT_STATE 1
101 #define XML_CDATA_SECTION_STATE 2
102 
103 #define XML_N_LITERAL_TYPES 2
104 #define XML_ATTRIBUTE_VALUE_LITERAL 0
105 #define XML_ENTITY_VALUE_LITERAL 1
106 
107 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */
108 #define XML_UTF8_ENCODE_MAX 4
109 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */
110 #define XML_UTF16_ENCODE_MAX 2
111 
112 typedef struct position {
113   /* first line and first column are 0 not 1 */
114   unsigned long lineNumber;
115   unsigned long columnNumber;
116 } POSITION;
117 
118 typedef struct {
119   const char *name;
120   const char *valuePtr;
121   const char *valueEnd;
122   char normalized;
123 } ATTRIBUTE;
124 
125 struct encoding;
126 typedef struct encoding ENCODING;
127 
128 struct encoding {
129   int (*scanners[XML_N_STATES])(const ENCODING *,
130 			        const char *,
131 			        const char *,
132 			        const char **);
133   int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *,
134 					      const char *,
135 					      const char *,
136 					      const char **);
137   int (*sameName)(const ENCODING *,
138 	          const char *, const char *);
139   int (*nameMatchesAscii)(const ENCODING *,
140 			  const char *, const char *);
141   int (*nameLength)(const ENCODING *, const char *);
142   const char *(*skipS)(const ENCODING *, const char *);
143   int (*getAtts)(const ENCODING *enc, const char *ptr,
144 	         int attsMax, ATTRIBUTE *atts);
145   int (*charRefNumber)(const ENCODING *enc, const char *ptr);
146   int (*predefinedEntityName)(const ENCODING *, const char *, const char *);
147   void (*updatePosition)(const ENCODING *,
148 			 const char *ptr,
149 			 const char *end,
150 			 POSITION *);
151   int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
152 		    const char **badPtr);
153   void (*utf8Convert)(const ENCODING *enc,
154 		      const char **fromP,
155 		      const char *fromLim,
156 		      char **toP,
157 		      const char *toLim);
158   void (*utf16Convert)(const ENCODING *enc,
159 		       const char **fromP,
160 		       const char *fromLim,
161 		       unsigned short **toP,
162 		       const unsigned short *toLim);
163   int minBytesPerChar;
164   char isUtf8;
165   char isUtf16;
166 };
167 
168 /*
169 Scan the string starting at ptr until the end of the next complete token,
170 but do not scan past eptr.  Return an integer giving the type of token.
171 
172 Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
173 
174 Return XML_TOK_PARTIAL when the string does not contain a complete token;
175 nextTokPtr will not be set.
176 
177 Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr
178 will be set to point to the character which made the token invalid.
179 
180 Otherwise the string starts with a valid token; nextTokPtr will be set to point
181 to the character following the end of that token.
182 
183 Each data character counts as a single token, but adjacent data characters
184 may be returned together.  Similarly for characters in the prolog outside
185 literals, comments and processing instructions.
186 */
187 
188 
189 #define XmlTok(enc, state, ptr, end, nextTokPtr) \
190   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
191 
192 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \
193    XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
194 
195 #define XmlContentTok(enc, ptr, end, nextTokPtr) \
196    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
197 
198 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
199    XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
200 
201 /* This is used for performing a 2nd-level tokenization on
202 the content of a literal that has already been returned by XmlTok. */
203 
204 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
205   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
206 
207 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
208    XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
209 
210 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
211    XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
212 
213 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
214 
215 #define XmlNameMatchesAscii(enc, ptr1, ptr2) \
216   (((enc)->nameMatchesAscii)(enc, ptr1, ptr2))
217 
218 #define XmlNameLength(enc, ptr) \
219   (((enc)->nameLength)(enc, ptr))
220 
221 #define XmlSkipS(enc, ptr) \
222   (((enc)->skipS)(enc, ptr))
223 
224 #define XmlGetAttributes(enc, ptr, attsMax, atts) \
225   (((enc)->getAtts)(enc, ptr, attsMax, atts))
226 
227 #define XmlCharRefNumber(enc, ptr) \
228   (((enc)->charRefNumber)(enc, ptr))
229 
230 #define XmlPredefinedEntityName(enc, ptr, end) \
231   (((enc)->predefinedEntityName)(enc, ptr, end))
232 
233 #define XmlUpdatePosition(enc, ptr, end, pos) \
234   (((enc)->updatePosition)(enc, ptr, end, pos))
235 
236 #define XmlIsPublicId(enc, ptr, end, badPtr) \
237   (((enc)->isPublicId)(enc, ptr, end, badPtr))
238 
239 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
240   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
241 
242 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
243   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
244 
245 typedef struct {
246   ENCODING initEnc;
247   const ENCODING **encPtr;
248 } INIT_ENCODING;
249 
250 int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity,
251 			      const ENCODING *enc,
252 			      const char *ptr,
253 	  		      const char *end,
254 			      const char **badPtr,
255 			      const char **versionPtr,
256 			      const char **encodingNamePtr,
257 			      const ENCODING **namedEncodingPtr,
258 			      int *standalonePtr);
259 
260 int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
261 const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding(void);
262 const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding(void);
263 int XMLTOKAPI XmlUtf8Encode(int charNumber, char *buf);
264 int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf);
265 
266 int XMLTOKAPI XmlSizeOfUnknownEncoding(void);
267 ENCODING XMLTOKAPI *
268 XmlInitUnknownEncoding(void *mem,
269 		       int *table,
270 		       int (*convert)(void *userData, const char *p),
271 		       void *userData);
272 
273 #ifdef __cplusplus
274 }
275 #endif
276 
277 #endif /* not XmlTok_INCLUDED */
278