1 /* 2 The contents of this file are subject to the Mozilla Public License 3 Version 1.0 (the "License"); you may not use this file except in 4 compliance with the License. You may obtain a copy of the License at 5 http://www.mozilla.org/MPL/ 6 7 Software distributed under the License is distributed on an "AS IS" 8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 9 License for the specific language governing rights and limitations 10 under the License. 11 12 The Original Code is expat. 13 14 The Initial Developer of the Original Code is James Clark. 15 Portions created by James Clark are Copyright (C) 1998 16 James Clark. All Rights Reserved. 17 18 Contributor(s): 19 $Id: xmltok.h,v 1.2 2001/09/15 10:14:27 torcs Exp $ 20 */ 21 22 #ifndef XmlTok_INCLUDED 23 #define XmlTok_INCLUDED 1 24 25 #ifdef __cplusplus 26 extern "C" { 27 #endif 28 29 #ifndef XMLTOKAPI 30 #define XMLTOKAPI /* as nothing */ 31 #endif 32 33 /* The following token may be returned by XmlContentTok */ 34 #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of 35 illegal ]]> sequence */ 36 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ 37 #define XML_TOK_NONE -4 /* The string to be scanned is empty */ 38 #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; 39 might be part of CRLF sequence */ 40 #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ 41 #define XML_TOK_PARTIAL -1 /* only part of a token */ 42 #define XML_TOK_INVALID 0 43 44 /* The following tokens are returned by XmlContentTok; some are also 45 returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ 46 47 #define XML_TOK_START_TAG_WITH_ATTS 1 48 #define XML_TOK_START_TAG_NO_ATTS 2 49 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ 50 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 51 #define XML_TOK_END_TAG 5 52 #define XML_TOK_DATA_CHARS 6 53 #define XML_TOK_DATA_NEWLINE 7 54 #define XML_TOK_CDATA_SECT_OPEN 8 55 #define XML_TOK_ENTITY_REF 9 56 #define XML_TOK_CHAR_REF 10 /* numeric character reference */ 57 58 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ 59 #define XML_TOK_PI 11 /* processing instruction */ 60 #define XML_TOK_XML_DECL 12 /* XML decl or text decl */ 61 #define XML_TOK_COMMENT 13 62 #define XML_TOK_BOM 14 /* Byte order mark */ 63 64 /* The following tokens are returned only by XmlPrologTok */ 65 #define XML_TOK_PROLOG_S 15 66 #define XML_TOK_DECL_OPEN 16 /* <!foo */ 67 #define XML_TOK_DECL_CLOSE 17 /* > */ 68 #define XML_TOK_NAME 18 69 #define XML_TOK_NMTOKEN 19 70 #define XML_TOK_POUND_NAME 20 /* #name */ 71 #define XML_TOK_OR 21 /* | */ 72 #define XML_TOK_PERCENT 22 73 #define XML_TOK_OPEN_PAREN 23 74 #define XML_TOK_CLOSE_PAREN 24 75 #define XML_TOK_OPEN_BRACKET 25 76 #define XML_TOK_CLOSE_BRACKET 26 77 #define XML_TOK_LITERAL 27 78 #define XML_TOK_PARAM_ENTITY_REF 28 79 #define XML_TOK_INSTANCE_START 29 80 81 /* The following occur only in element type declarations */ 82 #define XML_TOK_NAME_QUESTION 30 /* name? */ 83 #define XML_TOK_NAME_ASTERISK 31 /* name* */ 84 #define XML_TOK_NAME_PLUS 32 /* name+ */ 85 #define XML_TOK_COND_SECT_OPEN 33 /* <![ */ 86 #define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ 87 #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ 88 #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ 89 #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ 90 #define XML_TOK_COMMA 38 91 92 /* The following token is returned only by XmlAttributeValueTok */ 93 #define XML_TOK_ATTRIBUTE_VALUE_S 39 94 95 /* The following token is returned only by XmlCdataSectionTok */ 96 #define XML_TOK_CDATA_SECT_CLOSE 40 97 98 #define XML_N_STATES 3 99 #define XML_PROLOG_STATE 0 100 #define XML_CONTENT_STATE 1 101 #define XML_CDATA_SECTION_STATE 2 102 103 #define XML_N_LITERAL_TYPES 2 104 #define XML_ATTRIBUTE_VALUE_LITERAL 0 105 #define XML_ENTITY_VALUE_LITERAL 1 106 107 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ 108 #define XML_UTF8_ENCODE_MAX 4 109 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ 110 #define XML_UTF16_ENCODE_MAX 2 111 112 typedef struct position { 113 /* first line and first column are 0 not 1 */ 114 unsigned long lineNumber; 115 unsigned long columnNumber; 116 } POSITION; 117 118 typedef struct { 119 const char *name; 120 const char *valuePtr; 121 const char *valueEnd; 122 char normalized; 123 } ATTRIBUTE; 124 125 struct encoding; 126 typedef struct encoding ENCODING; 127 128 struct encoding { 129 int (*scanners[XML_N_STATES])(const ENCODING *, 130 const char *, 131 const char *, 132 const char **); 133 int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *, 134 const char *, 135 const char *, 136 const char **); 137 int (*sameName)(const ENCODING *, 138 const char *, const char *); 139 int (*nameMatchesAscii)(const ENCODING *, 140 const char *, const char *); 141 int (*nameLength)(const ENCODING *, const char *); 142 const char *(*skipS)(const ENCODING *, const char *); 143 int (*getAtts)(const ENCODING *enc, const char *ptr, 144 int attsMax, ATTRIBUTE *atts); 145 int (*charRefNumber)(const ENCODING *enc, const char *ptr); 146 int (*predefinedEntityName)(const ENCODING *, const char *, const char *); 147 void (*updatePosition)(const ENCODING *, 148 const char *ptr, 149 const char *end, 150 POSITION *); 151 int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, 152 const char **badPtr); 153 void (*utf8Convert)(const ENCODING *enc, 154 const char **fromP, 155 const char *fromLim, 156 char **toP, 157 const char *toLim); 158 void (*utf16Convert)(const ENCODING *enc, 159 const char **fromP, 160 const char *fromLim, 161 unsigned short **toP, 162 const unsigned short *toLim); 163 int minBytesPerChar; 164 char isUtf8; 165 char isUtf16; 166 }; 167 168 /* 169 Scan the string starting at ptr until the end of the next complete token, 170 but do not scan past eptr. Return an integer giving the type of token. 171 172 Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. 173 174 Return XML_TOK_PARTIAL when the string does not contain a complete token; 175 nextTokPtr will not be set. 176 177 Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr 178 will be set to point to the character which made the token invalid. 179 180 Otherwise the string starts with a valid token; nextTokPtr will be set to point 181 to the character following the end of that token. 182 183 Each data character counts as a single token, but adjacent data characters 184 may be returned together. Similarly for characters in the prolog outside 185 literals, comments and processing instructions. 186 */ 187 188 189 #define XmlTok(enc, state, ptr, end, nextTokPtr) \ 190 (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) 191 192 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \ 193 XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) 194 195 #define XmlContentTok(enc, ptr, end, nextTokPtr) \ 196 XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) 197 198 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ 199 XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) 200 201 /* This is used for performing a 2nd-level tokenization on 202 the content of a literal that has already been returned by XmlTok. */ 203 204 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ 205 (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) 206 207 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ 208 XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) 209 210 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ 211 XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) 212 213 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) 214 215 #define XmlNameMatchesAscii(enc, ptr1, ptr2) \ 216 (((enc)->nameMatchesAscii)(enc, ptr1, ptr2)) 217 218 #define XmlNameLength(enc, ptr) \ 219 (((enc)->nameLength)(enc, ptr)) 220 221 #define XmlSkipS(enc, ptr) \ 222 (((enc)->skipS)(enc, ptr)) 223 224 #define XmlGetAttributes(enc, ptr, attsMax, atts) \ 225 (((enc)->getAtts)(enc, ptr, attsMax, atts)) 226 227 #define XmlCharRefNumber(enc, ptr) \ 228 (((enc)->charRefNumber)(enc, ptr)) 229 230 #define XmlPredefinedEntityName(enc, ptr, end) \ 231 (((enc)->predefinedEntityName)(enc, ptr, end)) 232 233 #define XmlUpdatePosition(enc, ptr, end, pos) \ 234 (((enc)->updatePosition)(enc, ptr, end, pos)) 235 236 #define XmlIsPublicId(enc, ptr, end, badPtr) \ 237 (((enc)->isPublicId)(enc, ptr, end, badPtr)) 238 239 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ 240 (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) 241 242 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ 243 (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) 244 245 typedef struct { 246 ENCODING initEnc; 247 const ENCODING **encPtr; 248 } INIT_ENCODING; 249 250 int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity, 251 const ENCODING *enc, 252 const char *ptr, 253 const char *end, 254 const char **badPtr, 255 const char **versionPtr, 256 const char **encodingNamePtr, 257 const ENCODING **namedEncodingPtr, 258 int *standalonePtr); 259 260 int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); 261 const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding(void); 262 const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding(void); 263 int XMLTOKAPI XmlUtf8Encode(int charNumber, char *buf); 264 int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf); 265 266 int XMLTOKAPI XmlSizeOfUnknownEncoding(void); 267 ENCODING XMLTOKAPI * 268 XmlInitUnknownEncoding(void *mem, 269 int *table, 270 int (*convert)(void *userData, const char *p), 271 void *userData); 272 273 #ifdef __cplusplus 274 } 275 #endif 276 277 #endif /* not XmlTok_INCLUDED */ 278