1 /* 2 * Ostatnia aktualizacja: 3 * 4 * - $Id: xmltok.h,v 1.4 2002/12/14 19:36:12 mati Exp $ 5 * 6 */ 7 8 #ifndef XmlTok_INCLUDED 9 #define XmlTok_INCLUDED 1 10 11 #ifdef __cplusplus 12 extern "C" 13 { 14 #endif 15 16 #ifndef XMLTOKAPI 17 #define XMLTOKAPI /* as nothing */ 18 #endif 19 20 /* The following token may be returned by XmlContentTok */ 21 #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of 22 * illegal ]]> sequence */ 23 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ 24 #define XML_TOK_NONE -4 /* The string to be scanned is empty */ 25 #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; 26 * might be part of CRLF sequence */ 27 #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ 28 #define XML_TOK_PARTIAL -1 /* only part of a token */ 29 #define XML_TOK_INVALID 0 30 31 /* The following tokens are returned by XmlContentTok; some are also 32 returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ 33 34 #define XML_TOK_START_TAG_WITH_ATTS 1 35 #define XML_TOK_START_TAG_NO_ATTS 2 36 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ 37 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 38 #define XML_TOK_END_TAG 5 39 #define XML_TOK_DATA_CHARS 6 40 #define XML_TOK_DATA_NEWLINE 7 41 #define XML_TOK_CDATA_SECT_OPEN 8 42 #define XML_TOK_ENTITY_REF 9 43 #define XML_TOK_CHAR_REF 10 /* numeric character reference */ 44 45 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ 46 #define XML_TOK_PI 11 /* processing instruction */ 47 #define XML_TOK_XML_DECL 12 /* XML decl or text decl */ 48 #define XML_TOK_COMMENT 13 49 #define XML_TOK_BOM 14 /* Byte order mark */ 50 51 /* The following tokens are returned only by XmlPrologTok */ 52 #define XML_TOK_PROLOG_S 15 53 #define XML_TOK_DECL_OPEN 16 /* <!foo */ 54 #define XML_TOK_DECL_CLOSE 17 /* > */ 55 #define XML_TOK_NAME 18 56 #define XML_TOK_NMTOKEN 19 57 #define XML_TOK_POUND_NAME 20 /* #name */ 58 #define XML_TOK_OR 21 /* | */ 59 #define XML_TOK_PERCENT 22 60 #define XML_TOK_OPEN_PAREN 23 61 #define XML_TOK_CLOSE_PAREN 24 62 #define XML_TOK_OPEN_BRACKET 25 63 #define XML_TOK_CLOSE_BRACKET 26 64 #define XML_TOK_LITERAL 27 65 #define XML_TOK_PARAM_ENTITY_REF 28 66 #define XML_TOK_INSTANCE_START 29 67 68 /* The following occur only in element type declarations */ 69 #define XML_TOK_NAME_QUESTION 30 /* name? */ 70 #define XML_TOK_NAME_ASTERISK 31 /* name* */ 71 #define XML_TOK_NAME_PLUS 32 /* name+ */ 72 #define XML_TOK_COND_SECT_OPEN 33 /* <![ */ 73 #define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ 74 #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ 75 #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ 76 #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ 77 #define XML_TOK_COMMA 38 78 79 /* The following token is returned only by XmlAttributeValueTok */ 80 #define XML_TOK_ATTRIBUTE_VALUE_S 39 81 82 /* The following token is returned only by XmlCdataSectionTok */ 83 #define XML_TOK_CDATA_SECT_CLOSE 40 84 85 /* With namespace processing this is returned by XmlPrologTok 86 for a name with a colon. */ 87 #define XML_TOK_PREFIXED_NAME 41 88 89 #define XML_N_STATES 3 90 #define XML_PROLOG_STATE 0 91 #define XML_CONTENT_STATE 1 92 #define XML_CDATA_SECTION_STATE 2 93 94 #define XML_N_LITERAL_TYPES 2 95 #define XML_ATTRIBUTE_VALUE_LITERAL 0 96 #define XML_ENTITY_VALUE_LITERAL 1 97 98 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ 99 #define XML_UTF8_ENCODE_MAX 4 100 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ 101 #define XML_UTF16_ENCODE_MAX 2 102 103 typedef struct position 104 { 105 /* first line and first column are 0 not 1 */ 106 unsigned long lineNumber; 107 unsigned long columnNumber; 108 } 109 POSITION; 110 111 typedef struct 112 { 113 const char *name; 114 const char *valuePtr; 115 const char *valueEnd; 116 char normalized; 117 } 118 ATTRIBUTE; 119 120 struct encoding; 121 typedef struct encoding ENCODING; 122 123 struct encoding 124 { 125 int (*scanners[XML_N_STATES]) (const ENCODING *, 126 const char *, 127 const char *, const char **); 128 int (*literalScanners[XML_N_LITERAL_TYPES]) (const ENCODING *, 129 const char *, 130 const char *, 131 const char **); 132 int (*sameName) (const ENCODING *, 133 const char *, const char *); 134 int (*nameMatchesAscii) (const ENCODING *, 135 const char *, const char *); 136 int (*nameLength) (const ENCODING *, const char *); 137 const char *(*skipS) (const ENCODING *, const char *); 138 int (*getAtts) (const ENCODING * enc, const char *ptr, 139 int attsMax, ATTRIBUTE * atts); 140 int (*charRefNumber) (const ENCODING * enc, const char *ptr); 141 int (*predefinedEntityName) (const ENCODING *, const char *, 142 const char *); 143 void (*updatePosition) (const ENCODING *, const char *ptr, 144 const char *end, POSITION *); 145 int (*isPublicId) (const ENCODING * enc, const char *ptr, 146 const char *end, const char **badPtr); 147 void (*utf8Convert) (const ENCODING * enc, const char **fromP, 148 const char *fromLim, char **toP, 149 const char *toLim); 150 void (*utf16Convert) (const ENCODING * enc, 151 const char **fromP, const char *fromLim, 152 unsigned short **toP, 153 const unsigned short *toLim); 154 int minBytesPerChar; 155 char isUtf8; 156 char isUtf16; 157 }; 158 159 /* 160 Scan the string starting at ptr until the end of the next complete token, 161 but do not scan past eptr. Return an integer giving the type of token. 162 163 Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. 164 165 Return XML_TOK_PARTIAL when the string does not contain a complete token; 166 nextTokPtr will not be set. 167 168 Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr 169 will be set to point to the character which made the token invalid. 170 171 Otherwise the string starts with a valid token; nextTokPtr will be set to point 172 to the character following the end of that token. 173 174 Each data character counts as a single token, but adjacent data characters 175 may be returned together. Similarly for characters in the prolog outside 176 literals, comments and processing instructions. 177 */ 178 179 180 #define XmlTok(enc, state, ptr, end, nextTokPtr) \ 181 (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) 182 183 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \ 184 XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) 185 186 #define XmlContentTok(enc, ptr, end, nextTokPtr) \ 187 XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) 188 189 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ 190 XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) 191 192 /* This is used for performing a 2nd-level tokenization on 193 the content of a literal that has already been returned by XmlTok. */ 194 195 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ 196 (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) 197 198 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ 199 XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) 200 201 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ 202 XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) 203 204 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) 205 206 #define XmlNameMatchesAscii(enc, ptr1, ptr2) \ 207 (((enc)->nameMatchesAscii)(enc, ptr1, ptr2)) 208 209 #define XmlNameLength(enc, ptr) \ 210 (((enc)->nameLength)(enc, ptr)) 211 212 #define XmlSkipS(enc, ptr) \ 213 (((enc)->skipS)(enc, ptr)) 214 215 #define XmlGetAttributes(enc, ptr, attsMax, atts) \ 216 (((enc)->getAtts)(enc, ptr, attsMax, atts)) 217 218 #define XmlCharRefNumber(enc, ptr) \ 219 (((enc)->charRefNumber)(enc, ptr)) 220 221 #define XmlPredefinedEntityName(enc, ptr, end) \ 222 (((enc)->predefinedEntityName)(enc, ptr, end)) 223 224 #define XmlUpdatePosition(enc, ptr, end, pos) \ 225 (((enc)->updatePosition)(enc, ptr, end, pos)) 226 227 #define XmlIsPublicId(enc, ptr, end, badPtr) \ 228 (((enc)->isPublicId)(enc, ptr, end, badPtr)) 229 230 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ 231 (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) 232 233 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ 234 (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) 235 236 typedef struct 237 { 238 ENCODING initEnc; 239 const ENCODING **encPtr; 240 } 241 INIT_ENCODING; 242 243 int XMLTOKAPI XmlParseXmlDecl (int isGeneralTextEntity, 244 const ENCODING * enc, 245 const char *ptr, 246 const char *end, 247 const char **badPtr, 248 const char **versionPtr, 249 const char **encodingNamePtr, 250 const ENCODING ** namedEncodingPtr, 251 int *standalonePtr); 252 253 int XMLTOKAPI XmlInitEncoding (INIT_ENCODING *, const ENCODING **, 254 const char *name); 255 const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding (); 256 const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding (); 257 int XMLTOKAPI XmlUtf8Encode (int charNumber, char *buf); 258 int XMLTOKAPI XmlUtf16Encode (int charNumber, unsigned short *buf); 259 260 int XMLTOKAPI XmlSizeOfUnknownEncoding (); 261 ENCODING XMLTOKAPI *XmlInitUnknownEncoding (void *mem, 262 int *table, 263 int (*conv) (void 264 *userData, 265 const char 266 *p), 267 void *userData); 268 269 int XMLTOKAPI XmlParseXmlDeclNS (int isGeneralTextEntity, 270 const ENCODING * enc, 271 const char *ptr, 272 const char *end, 273 const char **badPtr, 274 const char **versionPtr, 275 const char **encodingNamePtr, 276 const ENCODING ** namedEncodingPtr, 277 int *standalonePtr); 278 int XMLTOKAPI XmlInitEncodingNS (INIT_ENCODING *, const ENCODING **, 279 const char *name); 280 const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS (); 281 const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS (); 282 ENCODING XMLTOKAPI *XmlInitUnknownEncodingNS (void *mem, 283 int *table, 284 int (*conv) (void 285 *userData, 286 const char 287 *p), 288 void *userData); 289 #ifdef __cplusplus 290 } 291 #endif 292 293 #endif /* not XmlTok_INCLUDED */ 294