1 /*
2  * Ostatnia aktualizacja:
3  *
4  * - $Id: xmltok.h,v 1.4 2002/12/14 19:36:12 mati Exp $
5  *
6  */
7 
8 #ifndef XmlTok_INCLUDED
9 #define XmlTok_INCLUDED 1
10 
11 #ifdef __cplusplus
12 extern "C"
13 {
14 #endif
15 
16 #ifndef XMLTOKAPI
17 #define XMLTOKAPI		/* as nothing */
18 #endif
19 
20 /* The following token may be returned by XmlContentTok */
21 #define XML_TOK_TRAILING_RSQB -5	/* ] or ]] at the end of the scan; might be start of
22 					 * illegal ]]> sequence */
23 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
24 #define XML_TOK_NONE -4		/* The string to be scanned is empty */
25 #define XML_TOK_TRAILING_CR -3	/* A CR at the end of the scan;
26 				 * might be part of CRLF sequence */
27 #define XML_TOK_PARTIAL_CHAR -2	/* only part of a multibyte sequence */
28 #define XML_TOK_PARTIAL -1	/* only part of a token */
29 #define XML_TOK_INVALID 0
30 
31 /* The following tokens are returned by XmlContentTok; some are also
32   returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */
33 
34 #define XML_TOK_START_TAG_WITH_ATTS 1
35 #define XML_TOK_START_TAG_NO_ATTS 2
36 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3	/* empty element tag <e/> */
37 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
38 #define XML_TOK_END_TAG 5
39 #define XML_TOK_DATA_CHARS 6
40 #define XML_TOK_DATA_NEWLINE 7
41 #define XML_TOK_CDATA_SECT_OPEN 8
42 #define XML_TOK_ENTITY_REF 9
43 #define XML_TOK_CHAR_REF 10	/* numeric character reference */
44 
45 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
46 #define XML_TOK_PI 11		/* processing instruction */
47 #define XML_TOK_XML_DECL 12	/* XML decl or text decl */
48 #define XML_TOK_COMMENT 13
49 #define XML_TOK_BOM 14		/* Byte order mark */
50 
51 /* The following tokens are returned only by XmlPrologTok */
52 #define XML_TOK_PROLOG_S 15
53 #define XML_TOK_DECL_OPEN 16	/* <!foo */
54 #define XML_TOK_DECL_CLOSE 17	/* > */
55 #define XML_TOK_NAME 18
56 #define XML_TOK_NMTOKEN 19
57 #define XML_TOK_POUND_NAME 20	/* #name */
58 #define XML_TOK_OR 21		/* | */
59 #define XML_TOK_PERCENT 22
60 #define XML_TOK_OPEN_PAREN 23
61 #define XML_TOK_CLOSE_PAREN 24
62 #define XML_TOK_OPEN_BRACKET 25
63 #define XML_TOK_CLOSE_BRACKET 26
64 #define XML_TOK_LITERAL 27
65 #define XML_TOK_PARAM_ENTITY_REF 28
66 #define XML_TOK_INSTANCE_START 29
67 
68 /* The following occur only in element type declarations */
69 #define XML_TOK_NAME_QUESTION 30	/* name? */
70 #define XML_TOK_NAME_ASTERISK 31	/* name* */
71 #define XML_TOK_NAME_PLUS 32	/* name+ */
72 #define XML_TOK_COND_SECT_OPEN 33	/* <![ */
73 #define XML_TOK_COND_SECT_CLOSE 34	/* ]]> */
74 #define XML_TOK_CLOSE_PAREN_QUESTION 35	/* )? */
75 #define XML_TOK_CLOSE_PAREN_ASTERISK 36	/* )* */
76 #define XML_TOK_CLOSE_PAREN_PLUS 37	/* )+ */
77 #define XML_TOK_COMMA 38
78 
79 /* The following token is returned only by XmlAttributeValueTok */
80 #define XML_TOK_ATTRIBUTE_VALUE_S 39
81 
82 /* The following token is returned only by XmlCdataSectionTok */
83 #define XML_TOK_CDATA_SECT_CLOSE 40
84 
85 /* With namespace processing this is returned by XmlPrologTok
86    for a name with a colon. */
87 #define XML_TOK_PREFIXED_NAME 41
88 
89 #define XML_N_STATES 3
90 #define XML_PROLOG_STATE 0
91 #define XML_CONTENT_STATE 1
92 #define XML_CDATA_SECTION_STATE 2
93 
94 #define XML_N_LITERAL_TYPES 2
95 #define XML_ATTRIBUTE_VALUE_LITERAL 0
96 #define XML_ENTITY_VALUE_LITERAL 1
97 
98 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */
99 #define XML_UTF8_ENCODE_MAX 4
100 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */
101 #define XML_UTF16_ENCODE_MAX 2
102 
103 	typedef struct position
104 	{
105 		/* first line and first column are 0 not 1 */
106 		unsigned long lineNumber;
107 		unsigned long columnNumber;
108 	}
109 	POSITION;
110 
111 	typedef struct
112 	{
113 		const char *name;
114 		const char *valuePtr;
115 		const char *valueEnd;
116 		char normalized;
117 	}
118 	ATTRIBUTE;
119 
120 	struct encoding;
121 	typedef struct encoding ENCODING;
122 
123 	struct encoding
124 	{
125 		int (*scanners[XML_N_STATES]) (const ENCODING *,
126 					       const char *,
127 					       const char *, const char **);
128 		int (*literalScanners[XML_N_LITERAL_TYPES]) (const ENCODING *,
129 							     const char *,
130 							     const char *,
131 							     const char **);
132 		int (*sameName) (const ENCODING *,
133 				 const char *, const char *);
134 		int (*nameMatchesAscii) (const ENCODING *,
135 					 const char *, const char *);
136 		int (*nameLength) (const ENCODING *, const char *);
137 		const char *(*skipS) (const ENCODING *, const char *);
138 		int (*getAtts) (const ENCODING * enc, const char *ptr,
139 				int attsMax, ATTRIBUTE * atts);
140 		int (*charRefNumber) (const ENCODING * enc, const char *ptr);
141 		int (*predefinedEntityName) (const ENCODING *, const char *,
142 					     const char *);
143 		void (*updatePosition) (const ENCODING *, const char *ptr,
144 					const char *end, POSITION *);
145 		int (*isPublicId) (const ENCODING * enc, const char *ptr,
146 				   const char *end, const char **badPtr);
147 		void (*utf8Convert) (const ENCODING * enc, const char **fromP,
148 				     const char *fromLim, char **toP,
149 				     const char *toLim);
150 		void (*utf16Convert) (const ENCODING * enc,
151 				      const char **fromP, const char *fromLim,
152 				      unsigned short **toP,
153 				      const unsigned short *toLim);
154 		int minBytesPerChar;
155 		char isUtf8;
156 		char isUtf16;
157 	};
158 
159 /*
160 Scan the string starting at ptr until the end of the next complete token,
161 but do not scan past eptr.  Return an integer giving the type of token.
162 
163 Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
164 
165 Return XML_TOK_PARTIAL when the string does not contain a complete token;
166 nextTokPtr will not be set.
167 
168 Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr
169 will be set to point to the character which made the token invalid.
170 
171 Otherwise the string starts with a valid token; nextTokPtr will be set to point
172 to the character following the end of that token.
173 
174 Each data character counts as a single token, but adjacent data characters
175 may be returned together.  Similarly for characters in the prolog outside
176 literals, comments and processing instructions.
177 */
178 
179 
180 #define XmlTok(enc, state, ptr, end, nextTokPtr) \
181   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
182 
183 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \
184    XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
185 
186 #define XmlContentTok(enc, ptr, end, nextTokPtr) \
187    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
188 
189 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
190    XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
191 
192 /* This is used for performing a 2nd-level tokenization on
193 the content of a literal that has already been returned by XmlTok. */
194 
195 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
196   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
197 
198 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
199    XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
200 
201 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
202    XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
203 
204 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
205 
206 #define XmlNameMatchesAscii(enc, ptr1, ptr2) \
207   (((enc)->nameMatchesAscii)(enc, ptr1, ptr2))
208 
209 #define XmlNameLength(enc, ptr) \
210   (((enc)->nameLength)(enc, ptr))
211 
212 #define XmlSkipS(enc, ptr) \
213   (((enc)->skipS)(enc, ptr))
214 
215 #define XmlGetAttributes(enc, ptr, attsMax, atts) \
216   (((enc)->getAtts)(enc, ptr, attsMax, atts))
217 
218 #define XmlCharRefNumber(enc, ptr) \
219   (((enc)->charRefNumber)(enc, ptr))
220 
221 #define XmlPredefinedEntityName(enc, ptr, end) \
222   (((enc)->predefinedEntityName)(enc, ptr, end))
223 
224 #define XmlUpdatePosition(enc, ptr, end, pos) \
225   (((enc)->updatePosition)(enc, ptr, end, pos))
226 
227 #define XmlIsPublicId(enc, ptr, end, badPtr) \
228   (((enc)->isPublicId)(enc, ptr, end, badPtr))
229 
230 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
231   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
232 
233 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
234   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
235 
236 	typedef struct
237 	{
238 		ENCODING initEnc;
239 		const ENCODING **encPtr;
240 	}
241 	INIT_ENCODING;
242 
243 	int XMLTOKAPI XmlParseXmlDecl (int isGeneralTextEntity,
244 				       const ENCODING * enc,
245 				       const char *ptr,
246 				       const char *end,
247 				       const char **badPtr,
248 				       const char **versionPtr,
249 				       const char **encodingNamePtr,
250 				       const ENCODING ** namedEncodingPtr,
251 				       int *standalonePtr);
252 
253 	int XMLTOKAPI XmlInitEncoding (INIT_ENCODING *, const ENCODING **,
254 				       const char *name);
255 	const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding ();
256 	const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding ();
257 	int XMLTOKAPI XmlUtf8Encode (int charNumber, char *buf);
258 	int XMLTOKAPI XmlUtf16Encode (int charNumber, unsigned short *buf);
259 
260 	int XMLTOKAPI XmlSizeOfUnknownEncoding ();
261 	ENCODING XMLTOKAPI *XmlInitUnknownEncoding (void *mem,
262 						    int *table,
263 						    int (*conv) (void
264 								 *userData,
265 								 const char
266 								 *p),
267 						    void *userData);
268 
269 	int XMLTOKAPI XmlParseXmlDeclNS (int isGeneralTextEntity,
270 					 const ENCODING * enc,
271 					 const char *ptr,
272 					 const char *end,
273 					 const char **badPtr,
274 					 const char **versionPtr,
275 					 const char **encodingNamePtr,
276 					 const ENCODING ** namedEncodingPtr,
277 					 int *standalonePtr);
278 	int XMLTOKAPI XmlInitEncodingNS (INIT_ENCODING *, const ENCODING **,
279 					 const char *name);
280 	const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS ();
281 	const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS ();
282 	ENCODING XMLTOKAPI *XmlInitUnknownEncodingNS (void *mem,
283 						      int *table,
284 						      int (*conv) (void
285 								   *userData,
286 								   const char
287 								   *p),
288 						      void *userData);
289 #ifdef __cplusplus
290 }
291 #endif
292 
293 #endif				/* not XmlTok_INCLUDED */
294