1 /* 4b74aa710b4ed5ce464b0ce544852cb47bf905c85a49c7bae2749f5885cb966d (2.2.5+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #if !defined(_GNU_SOURCE)
34 # define _GNU_SOURCE 1 /* syscall prototype */
35 #endif
36
37 #include <stddef.h>
38 #include <string.h> /* memset(), memcpy() */
39 #include <assert.h>
40 #include <limits.h> /* UINT_MAX */
41 #include <stdio.h> /* fprintf */
42 #include <stdlib.h> /* getenv */
43
44 #ifdef _WIN32
45 #define getpid GetCurrentProcessId
46 #else
47 #include <sys/time.h> /* gettimeofday() */
48 #include <sys/types.h> /* getpid() */
49 #include <unistd.h> /* getpid() */
50 #include <fcntl.h> /* O_RDONLY */
51 #include <errno.h>
52 #endif
53
54 #define XML_BUILDING_EXPAT 1
55
56 #ifdef _WIN32
57 #include "winconfig.h"
58 #elif defined(HAVE_EXPAT_CONFIG_H)
59 #include <expat_config.h>
60 #endif /* ndef _WIN32 */
61
62 #include "ascii.h"
63 #include "expat.h"
64 #include "siphash.h"
65
66 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
67 # if defined(HAVE_GETRANDOM)
68 # include <sys/random.h> /* getrandom */
69 # else
70 # include <unistd.h> /* syscall */
71 # include <sys/syscall.h> /* SYS_getrandom */
72 # endif
73 # if ! defined(GRND_NONBLOCK)
74 # define GRND_NONBLOCK 0x0001
75 # endif /* defined(GRND_NONBLOCK) */
76 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
77
78 #if defined(HAVE_LIBBSD) \
79 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
80 # include <bsd/stdlib.h>
81 #endif
82
83 #if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
84 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
85 #endif
86
87 #if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
88 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
89 && !defined(XML_DEV_URANDOM) \
90 && !defined(_WIN32) \
91 && !defined(XML_POOR_ENTROPY)
92 # error \
93 You do not have support for any sources of high quality entropy \
94 enabled. For end user security, that is probably not what you want. \
95 \
96 Your options include: \
97 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
98 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
99 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
100 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
101 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
102 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
103 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
104 * Windows (RtlGenRandom): _WIN32. \
105 \
106 If insist on not using any of these, bypass this error by defining \
107 XML_POOR_ENTROPY; you have been warned. \
108 \
109 If you have reasons to patch this detection code away or need changes \
110 to the build system, please open a bug. Thank you!
111 #endif
112
113
114 #ifdef XML_UNICODE
115 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
116 #define XmlConvert XmlUtf16Convert
117 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
118 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
119 #define XmlEncode XmlUtf16Encode
120 /* Using pointer subtraction to convert to integer type. */
121 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
122 typedef unsigned short ICHAR;
123 #else
124 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
125 #define XmlConvert XmlUtf8Convert
126 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
127 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
128 #define XmlEncode XmlUtf8Encode
129 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
130 typedef char ICHAR;
131 #endif
132
133
134 #ifndef XML_NS
135
136 #define XmlInitEncodingNS XmlInitEncoding
137 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
138 #undef XmlGetInternalEncodingNS
139 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
140 #define XmlParseXmlDeclNS XmlParseXmlDecl
141
142 #endif
143
144 #ifdef XML_UNICODE
145
146 #ifdef XML_UNICODE_WCHAR_T
147 #define XML_T(x) (const wchar_t)x
148 #define XML_L(x) L ## x
149 #else
150 #define XML_T(x) (const unsigned short)x
151 #define XML_L(x) x
152 #endif
153
154 #else
155
156 #define XML_T(x) x
157 #define XML_L(x) x
158
159 #endif
160
161 /* Round up n to be a multiple of sz, where sz is a power of 2. */
162 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
163
164 /* Handle the case where memmove() doesn't exist. */
165 #ifndef HAVE_MEMMOVE
166 #ifdef HAVE_BCOPY
167 #define memmove(d,s,l) bcopy((s),(d),(l))
168 #else
169 #error memmove does not exist on this platform, nor is a substitute available
170 #endif /* HAVE_BCOPY */
171 #endif /* HAVE_MEMMOVE */
172
173 #include "internal.h"
174 #include "xmltok.h"
175 #include "xmlrole.h"
176
177 typedef const XML_Char *KEY;
178
179 typedef struct {
180 KEY name;
181 } NAMED;
182
183 typedef struct {
184 NAMED **v;
185 unsigned char power;
186 size_t size;
187 size_t used;
188 const XML_Memory_Handling_Suite *mem;
189 } HASH_TABLE;
190
191 static size_t
192 keylen(KEY s);
193
194 static void
195 copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
196
197 /* For probing (after a collision) we need a step size relative prime
198 to the hash table size, which is a power of 2. We use double-hashing,
199 since we can calculate a second hash value cheaply by taking those bits
200 of the first hash value that were discarded (masked out) when the table
201 index was calculated: index = hash & mask, where mask = table->size - 1.
202 We limit the maximum step size to table->size / 4 (mask >> 2) and make
203 it odd, since odd numbers are always relative prime to a power of 2.
204 */
205 #define SECOND_HASH(hash, mask, power) \
206 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
207 #define PROBE_STEP(hash, mask, power) \
208 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
209
210 typedef struct {
211 NAMED **p;
212 NAMED **end;
213 } HASH_TABLE_ITER;
214
215 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
216 #define INIT_DATA_BUF_SIZE 1024
217 #define INIT_ATTS_SIZE 16
218 #define INIT_ATTS_VERSION 0xFFFFFFFF
219 #define INIT_BLOCK_SIZE 1024
220 #define INIT_BUFFER_SIZE 1024
221
222 #define EXPAND_SPARE 24
223
224 typedef struct binding {
225 struct prefix *prefix;
226 struct binding *nextTagBinding;
227 struct binding *prevPrefixBinding;
228 const struct attribute_id *attId;
229 XML_Char *uri;
230 int uriLen;
231 int uriAlloc;
232 } BINDING;
233
234 typedef struct prefix {
235 const XML_Char *name;
236 BINDING *binding;
237 } PREFIX;
238
239 typedef struct {
240 const XML_Char *str;
241 const XML_Char *localPart;
242 const XML_Char *prefix;
243 int strLen;
244 int uriLen;
245 int prefixLen;
246 } TAG_NAME;
247
248 /* TAG represents an open element.
249 The name of the element is stored in both the document and API
250 encodings. The memory buffer 'buf' is a separately-allocated
251 memory area which stores the name. During the XML_Parse()/
252 XMLParseBuffer() when the element is open, the memory for the 'raw'
253 version of the name (in the document encoding) is shared with the
254 document buffer. If the element is open across calls to
255 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
256 contain the 'raw' name as well.
257
258 A parser re-uses these structures, maintaining a list of allocated
259 TAG objects in a free list.
260 */
261 typedef struct tag {
262 struct tag *parent; /* parent of this element */
263 const char *rawName; /* tagName in the original encoding */
264 int rawNameLength;
265 TAG_NAME name; /* tagName in the API encoding */
266 char *buf; /* buffer for name components */
267 char *bufEnd; /* end of the buffer */
268 BINDING *bindings;
269 } TAG;
270
271 typedef struct {
272 const XML_Char *name;
273 const XML_Char *textPtr;
274 int textLen; /* length in XML_Chars */
275 int processed; /* # of processed bytes - when suspended */
276 const XML_Char *systemId;
277 const XML_Char *base;
278 const XML_Char *publicId;
279 const XML_Char *notation;
280 XML_Bool open;
281 XML_Bool is_param;
282 XML_Bool is_internal; /* true if declared in internal subset outside PE */
283 } ENTITY;
284
285 typedef struct {
286 enum XML_Content_Type type;
287 enum XML_Content_Quant quant;
288 const XML_Char * name;
289 int firstchild;
290 int lastchild;
291 int childcnt;
292 int nextsib;
293 } CONTENT_SCAFFOLD;
294
295 #define INIT_SCAFFOLD_ELEMENTS 32
296
297 typedef struct block {
298 struct block *next;
299 int size;
300 XML_Char s[1];
301 } BLOCK;
302
303 typedef struct {
304 BLOCK *blocks;
305 BLOCK *freeBlocks;
306 const XML_Char *end;
307 XML_Char *ptr;
308 XML_Char *start;
309 const XML_Memory_Handling_Suite *mem;
310 } STRING_POOL;
311
312 /* The XML_Char before the name is used to determine whether
313 an attribute has been specified. */
314 typedef struct attribute_id {
315 XML_Char *name;
316 PREFIX *prefix;
317 XML_Bool maybeTokenized;
318 XML_Bool xmlns;
319 } ATTRIBUTE_ID;
320
321 typedef struct {
322 const ATTRIBUTE_ID *id;
323 XML_Bool isCdata;
324 const XML_Char *value;
325 } DEFAULT_ATTRIBUTE;
326
327 typedef struct {
328 unsigned long version;
329 unsigned long hash;
330 const XML_Char *uriName;
331 } NS_ATT;
332
333 typedef struct {
334 const XML_Char *name;
335 PREFIX *prefix;
336 const ATTRIBUTE_ID *idAtt;
337 int nDefaultAtts;
338 int allocDefaultAtts;
339 DEFAULT_ATTRIBUTE *defaultAtts;
340 } ELEMENT_TYPE;
341
342 typedef struct {
343 HASH_TABLE generalEntities;
344 HASH_TABLE elementTypes;
345 HASH_TABLE attributeIds;
346 HASH_TABLE prefixes;
347 STRING_POOL pool;
348 STRING_POOL entityValuePool;
349 /* false once a parameter entity reference has been skipped */
350 XML_Bool keepProcessing;
351 /* true once an internal or external PE reference has been encountered;
352 this includes the reference to an external subset */
353 XML_Bool hasParamEntityRefs;
354 XML_Bool standalone;
355 #ifdef XML_DTD
356 /* indicates if external PE has been read */
357 XML_Bool paramEntityRead;
358 HASH_TABLE paramEntities;
359 #endif /* XML_DTD */
360 PREFIX defaultPrefix;
361 /* === scaffolding for building content model === */
362 XML_Bool in_eldecl;
363 CONTENT_SCAFFOLD *scaffold;
364 unsigned contentStringLen;
365 unsigned scaffSize;
366 unsigned scaffCount;
367 int scaffLevel;
368 int *scaffIndex;
369 } DTD;
370
371 typedef struct open_internal_entity {
372 const char *internalEventPtr;
373 const char *internalEventEndPtr;
374 struct open_internal_entity *next;
375 ENTITY *entity;
376 int startTagLevel;
377 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
378 } OPEN_INTERNAL_ENTITY;
379
380 typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
381 const char *start,
382 const char *end,
383 const char **endPtr);
384
385 static Processor prologProcessor;
386 static Processor prologInitProcessor;
387 static Processor contentProcessor;
388 static Processor cdataSectionProcessor;
389 #ifdef XML_DTD
390 static Processor ignoreSectionProcessor;
391 static Processor externalParEntProcessor;
392 static Processor externalParEntInitProcessor;
393 static Processor entityValueProcessor;
394 static Processor entityValueInitProcessor;
395 #endif /* XML_DTD */
396 static Processor epilogProcessor;
397 static Processor errorProcessor;
398 static Processor externalEntityInitProcessor;
399 static Processor externalEntityInitProcessor2;
400 static Processor externalEntityInitProcessor3;
401 static Processor externalEntityContentProcessor;
402 static Processor internalEntityProcessor;
403
404 static enum XML_Error
405 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
406 static enum XML_Error
407 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
408 const char *s, const char *next);
409 static enum XML_Error
410 initializeEncoding(XML_Parser parser);
411 static enum XML_Error
412 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
413 const char *end, int tok, const char *next, const char **nextPtr,
414 XML_Bool haveMore);
415 static enum XML_Error
416 processInternalEntity(XML_Parser parser, ENTITY *entity,
417 XML_Bool betweenDecl);
418 static enum XML_Error
419 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
420 const char *start, const char *end, const char **endPtr,
421 XML_Bool haveMore);
422 static enum XML_Error
423 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
424 const char *end, const char **nextPtr, XML_Bool haveMore);
425 #ifdef XML_DTD
426 static enum XML_Error
427 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
428 const char *end, const char **nextPtr, XML_Bool haveMore);
429 #endif /* XML_DTD */
430
431 static void
432 freeBindings(XML_Parser parser, BINDING *bindings);
433 static enum XML_Error
434 storeAtts(XML_Parser parser, const ENCODING *, const char *s,
435 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
436 static enum XML_Error
437 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
438 const XML_Char *uri, BINDING **bindingsPtr);
439 static int
440 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
441 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
442 static enum XML_Error
443 storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
444 const char *, const char *, STRING_POOL *);
445 static enum XML_Error
446 appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
447 const char *, const char *, STRING_POOL *);
448 static ATTRIBUTE_ID *
449 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
450 const char *end);
451 static int
452 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
453 static enum XML_Error
454 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
455 const char *end);
456 static int
457 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
458 const char *start, const char *end);
459 static int
460 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
461 const char *end);
462 static void
463 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
464 const char *end);
465
466 static const XML_Char * getContext(XML_Parser parser);
467 static XML_Bool
468 setContext(XML_Parser parser, const XML_Char *context);
469
470 static void FASTCALL normalizePublicId(XML_Char *s);
471
472 static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
473 /* do not call if m_parentParser != NULL */
474 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
475 static void
476 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
477 static int
478 dtdCopy(XML_Parser oldParser,
479 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
480 static int
481 copyEntityTable(XML_Parser oldParser,
482 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
483 static NAMED *
484 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
485 static void FASTCALL
486 hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
487 static void FASTCALL hashTableClear(HASH_TABLE *);
488 static void FASTCALL hashTableDestroy(HASH_TABLE *);
489 static void FASTCALL
490 hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
491 static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
492
493 static void FASTCALL
494 poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
495 static void FASTCALL poolClear(STRING_POOL *);
496 static void FASTCALL poolDestroy(STRING_POOL *);
497 static XML_Char *
498 poolAppend(STRING_POOL *pool, const ENCODING *enc,
499 const char *ptr, const char *end);
500 static XML_Char *
501 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
502 const char *ptr, const char *end);
503 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
504 static const XML_Char * FASTCALL
505 poolCopyString(STRING_POOL *pool, const XML_Char *s);
506 static const XML_Char *
507 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
508 static const XML_Char * FASTCALL
509 poolAppendString(STRING_POOL *pool, const XML_Char *s);
510
511 static int FASTCALL nextScaffoldPart(XML_Parser parser);
512 static XML_Content * build_model(XML_Parser parser);
513 static ELEMENT_TYPE *
514 getElementType(XML_Parser parser, const ENCODING *enc,
515 const char *ptr, const char *end);
516
517 static XML_Char *copyString(const XML_Char *s,
518 const XML_Memory_Handling_Suite *memsuite);
519
520 static unsigned long generate_hash_secret_salt(XML_Parser parser);
521 static XML_Bool startParsing(XML_Parser parser);
522
523 static XML_Parser
524 parserCreate(const XML_Char *encodingName,
525 const XML_Memory_Handling_Suite *memsuite,
526 const XML_Char *nameSep,
527 DTD *dtd);
528
529 static void
530 parserInit(XML_Parser parser, const XML_Char *encodingName);
531
532 #define poolStart(pool) ((pool)->start)
533 #define poolEnd(pool) ((pool)->ptr)
534 #define poolLength(pool) ((pool)->ptr - (pool)->start)
535 #define poolChop(pool) ((void)--(pool->ptr))
536 #define poolLastChar(pool) (((pool)->ptr)[-1])
537 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
538 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
539 #define poolAppendChar(pool, c) \
540 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
541 ? 0 \
542 : ((*((pool)->ptr)++ = c), 1))
543
544 struct XML_ParserStruct {
545 /* The first member must be m_userData so that the XML_GetUserData
546 macro works. */
547 void *m_userData;
548 void *m_handlerArg;
549 char *m_buffer;
550 const XML_Memory_Handling_Suite m_mem;
551 /* first character to be parsed */
552 const char *m_bufferPtr;
553 /* past last character to be parsed */
554 char *m_bufferEnd;
555 /* allocated end of m_buffer */
556 const char *m_bufferLim;
557 XML_Index m_parseEndByteIndex;
558 const char *m_parseEndPtr;
559 XML_Char *m_dataBuf;
560 XML_Char *m_dataBufEnd;
561 XML_StartElementHandler m_startElementHandler;
562 XML_EndElementHandler m_endElementHandler;
563 XML_CharacterDataHandler m_characterDataHandler;
564 XML_ProcessingInstructionHandler m_processingInstructionHandler;
565 XML_CommentHandler m_commentHandler;
566 XML_StartCdataSectionHandler m_startCdataSectionHandler;
567 XML_EndCdataSectionHandler m_endCdataSectionHandler;
568 XML_DefaultHandler m_defaultHandler;
569 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
570 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
571 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
572 XML_NotationDeclHandler m_notationDeclHandler;
573 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
574 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
575 XML_NotStandaloneHandler m_notStandaloneHandler;
576 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
577 XML_Parser m_externalEntityRefHandlerArg;
578 XML_SkippedEntityHandler m_skippedEntityHandler;
579 XML_UnknownEncodingHandler m_unknownEncodingHandler;
580 XML_ElementDeclHandler m_elementDeclHandler;
581 XML_AttlistDeclHandler m_attlistDeclHandler;
582 XML_EntityDeclHandler m_entityDeclHandler;
583 XML_XmlDeclHandler m_xmlDeclHandler;
584 const ENCODING *m_encoding;
585 INIT_ENCODING m_initEncoding;
586 const ENCODING *m_internalEncoding;
587 const XML_Char *m_protocolEncodingName;
588 XML_Bool m_ns;
589 XML_Bool m_ns_triplets;
590 void *m_unknownEncodingMem;
591 void *m_unknownEncodingData;
592 void *m_unknownEncodingHandlerData;
593 void (XMLCALL *m_unknownEncodingRelease)(void *);
594 PROLOG_STATE m_prologState;
595 Processor *m_processor;
596 enum XML_Error m_errorCode;
597 const char *m_eventPtr;
598 const char *m_eventEndPtr;
599 const char *m_positionPtr;
600 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
601 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
602 XML_Bool m_defaultExpandInternalEntities;
603 int m_tagLevel;
604 ENTITY *m_declEntity;
605 const XML_Char *m_doctypeName;
606 const XML_Char *m_doctypeSysid;
607 const XML_Char *m_doctypePubid;
608 const XML_Char *m_declAttributeType;
609 const XML_Char *m_declNotationName;
610 const XML_Char *m_declNotationPublicId;
611 ELEMENT_TYPE *m_declElementType;
612 ATTRIBUTE_ID *m_declAttributeId;
613 XML_Bool m_declAttributeIsCdata;
614 XML_Bool m_declAttributeIsId;
615 DTD *m_dtd;
616 const XML_Char *m_curBase;
617 TAG *m_tagStack;
618 TAG *m_freeTagList;
619 BINDING *m_inheritedBindings;
620 BINDING *m_freeBindingList;
621 int m_attsSize;
622 int m_nSpecifiedAtts;
623 int m_idAttIndex;
624 ATTRIBUTE *m_atts;
625 NS_ATT *m_nsAtts;
626 unsigned long m_nsAttsVersion;
627 unsigned char m_nsAttsPower;
628 #ifdef XML_ATTR_INFO
629 XML_AttrInfo *m_attInfo;
630 #endif
631 POSITION m_position;
632 STRING_POOL m_tempPool;
633 STRING_POOL m_temp2Pool;
634 char *m_groupConnector;
635 unsigned int m_groupSize;
636 XML_Char m_namespaceSeparator;
637 XML_Parser m_parentParser;
638 XML_ParsingStatus m_parsingStatus;
639 #ifdef XML_DTD
640 XML_Bool m_isParamEntity;
641 XML_Bool m_useForeignDTD;
642 enum XML_ParamEntityParsing m_paramEntityParsing;
643 #endif
644 unsigned long m_hash_secret_salt;
645 };
646
647 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
648 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p),(s)))
649 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
650
651
652 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)653 XML_ParserCreate(const XML_Char *encodingName)
654 {
655 return XML_ParserCreate_MM(encodingName, NULL, NULL);
656 }
657
658 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)659 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
660 {
661 XML_Char tmp[2];
662 *tmp = nsSep;
663 return XML_ParserCreate_MM(encodingName, NULL, tmp);
664 }
665
666 static const XML_Char implicitContext[] = {
667 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
668 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
669 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
670 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
671 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
672 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
673 };
674
675
676 /* To avoid warnings about unused functions: */
677 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
678
679 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
680
681 /* Obtain entropy on Linux 3.17+ */
682 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)683 writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
684 int success = 0; /* full count bytes written? */
685 size_t bytesWrittenTotal = 0;
686 const unsigned int getrandomFlags = GRND_NONBLOCK;
687
688 do {
689 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
690 const size_t bytesToWrite = count - bytesWrittenTotal;
691
692 const int bytesWrittenMore =
693 #if defined(HAVE_GETRANDOM)
694 getrandom(currentTarget, bytesToWrite, getrandomFlags);
695 #else
696 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
697 #endif
698
699 if (bytesWrittenMore > 0) {
700 bytesWrittenTotal += bytesWrittenMore;
701 if (bytesWrittenTotal >= count)
702 success = 1;
703 }
704 } while (! success && (errno == EINTR));
705
706 return success;
707 }
708
709 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
710
711
712 #if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
713
714 /* Extract entropy from /dev/urandom */
715 static int
writeRandomBytes_dev_urandom(void * target,size_t count)716 writeRandomBytes_dev_urandom(void * target, size_t count) {
717 int success = 0; /* full count bytes written? */
718 size_t bytesWrittenTotal = 0;
719
720 const int fd = open("/dev/urandom", O_RDONLY);
721 if (fd < 0) {
722 return 0;
723 }
724
725 do {
726 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
727 const size_t bytesToWrite = count - bytesWrittenTotal;
728
729 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
730
731 if (bytesWrittenMore > 0) {
732 bytesWrittenTotal += bytesWrittenMore;
733 if (bytesWrittenTotal >= count)
734 success = 1;
735 }
736 } while (! success && (errno == EINTR));
737
738 close(fd);
739 return success;
740 }
741
742 #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
743
744 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
745
746
747 #if defined(HAVE_ARC4RANDOM)
748
749 static void
writeRandomBytes_arc4random(void * target,size_t count)750 writeRandomBytes_arc4random(void * target, size_t count) {
751 size_t bytesWrittenTotal = 0;
752
753 while (bytesWrittenTotal < count) {
754 const uint32_t random32 = arc4random();
755 size_t i = 0;
756
757 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
758 i++, bytesWrittenTotal++) {
759 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
760 ((uint8_t *)target)[bytesWrittenTotal] = random8;
761 }
762 }
763 }
764
765 #endif /* defined(HAVE_ARC4RANDOM) */
766
767
768 #ifdef _WIN32
769
770 typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
771 HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
772
773 /* Obtain entropy on Windows XP / Windows Server 2003 and later.
774 * Hint on RtlGenRandom and the following article from libsodium.
775 *
776 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
777 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
778 */
779 static int
writeRandomBytes_RtlGenRandom(void * target,size_t count)780 writeRandomBytes_RtlGenRandom(void * target, size_t count) {
781 int success = 0; /* full count bytes written? */
782 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
783
784 if (advapi32) {
785 const RTLGENRANDOM_FUNC RtlGenRandom
786 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
787 if (RtlGenRandom) {
788 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
789 success = 1;
790 }
791 }
792 FreeLibrary(advapi32);
793 }
794
795 return success;
796 }
797
798 #endif /* _WIN32 */
799
800
801 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
802
803 static unsigned long
gather_time_entropy(void)804 gather_time_entropy(void)
805 {
806 #ifdef _WIN32
807 FILETIME ft;
808 GetSystemTimeAsFileTime(&ft); /* never fails */
809 return ft.dwHighDateTime ^ ft.dwLowDateTime;
810 #else
811 struct timeval tv;
812 int gettimeofday_res;
813
814 gettimeofday_res = gettimeofday(&tv, NULL);
815
816 #if defined(NDEBUG)
817 (void)gettimeofday_res;
818 #else
819 assert (gettimeofday_res == 0);
820 #endif /* defined(NDEBUG) */
821
822 /* Microseconds time is <20 bits entropy */
823 return tv.tv_usec;
824 #endif
825 }
826
827 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
828
829
830 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)831 ENTROPY_DEBUG(const char * label, unsigned long entropy) {
832 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
833 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
834 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
835 label,
836 (int)sizeof(entropy) * 2, entropy,
837 (unsigned long)sizeof(entropy));
838 }
839 return entropy;
840 }
841
842 static unsigned long
generate_hash_secret_salt(XML_Parser parser)843 generate_hash_secret_salt(XML_Parser parser)
844 {
845 unsigned long entropy;
846 (void)parser;
847
848 /* "Failproof" high quality providers: */
849 #if defined(HAVE_ARC4RANDOM_BUF)
850 arc4random_buf(&entropy, sizeof(entropy));
851 return ENTROPY_DEBUG("arc4random_buf", entropy);
852 #elif defined(HAVE_ARC4RANDOM)
853 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
854 return ENTROPY_DEBUG("arc4random", entropy);
855 #else
856 /* Try high quality providers first .. */
857 #ifdef _WIN32
858 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
859 return ENTROPY_DEBUG("RtlGenRandom", entropy);
860 }
861 #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
862 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
863 return ENTROPY_DEBUG("getrandom", entropy);
864 }
865 #endif
866 #if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
867 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
868 return ENTROPY_DEBUG("/dev/urandom", entropy);
869 }
870 #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
871 /* .. and self-made low quality for backup: */
872
873 /* Process ID is 0 bits entropy if attacker has local access */
874 entropy = gather_time_entropy() ^ getpid();
875
876 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
877 if (sizeof(unsigned long) == 4) {
878 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
879 } else {
880 return ENTROPY_DEBUG("fallback(8)",
881 entropy * (unsigned long)2305843009213693951ULL);
882 }
883 #endif
884 }
885
886 static unsigned long
get_hash_secret_salt(XML_Parser parser)887 get_hash_secret_salt(XML_Parser parser) {
888 if (parser->m_parentParser != NULL)
889 return get_hash_secret_salt(parser->m_parentParser);
890 return parser->m_hash_secret_salt;
891 }
892
893 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)894 startParsing(XML_Parser parser)
895 {
896 /* hash functions must be initialized before setContext() is called */
897 if (parser->m_hash_secret_salt == 0)
898 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
899 if (parser->m_ns) {
900 /* implicit context only set for root parser, since child
901 parsers (i.e. external entity parsers) will inherit it
902 */
903 return setContext(parser, implicitContext);
904 }
905 return XML_TRUE;
906 }
907
908 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)909 XML_ParserCreate_MM(const XML_Char *encodingName,
910 const XML_Memory_Handling_Suite *memsuite,
911 const XML_Char *nameSep)
912 {
913 return parserCreate(encodingName, memsuite, nameSep, NULL);
914 }
915
916 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)917 parserCreate(const XML_Char *encodingName,
918 const XML_Memory_Handling_Suite *memsuite,
919 const XML_Char *nameSep,
920 DTD *dtd)
921 {
922 XML_Parser parser;
923
924 if (memsuite) {
925 XML_Memory_Handling_Suite *mtemp;
926 parser = (XML_Parser)
927 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
928 if (parser != NULL) {
929 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
930 mtemp->malloc_fcn = memsuite->malloc_fcn;
931 mtemp->realloc_fcn = memsuite->realloc_fcn;
932 mtemp->free_fcn = memsuite->free_fcn;
933 }
934 }
935 else {
936 XML_Memory_Handling_Suite *mtemp;
937 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
938 if (parser != NULL) {
939 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
940 mtemp->malloc_fcn = malloc;
941 mtemp->realloc_fcn = realloc;
942 mtemp->free_fcn = free;
943 }
944 }
945
946 if (!parser)
947 return parser;
948
949 parser->m_buffer = NULL;
950 parser->m_bufferLim = NULL;
951
952 parser->m_attsSize = INIT_ATTS_SIZE;
953 parser->m_atts = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
954 if (parser->m_atts == NULL) {
955 FREE(parser, parser);
956 return NULL;
957 }
958 #ifdef XML_ATTR_INFO
959 parser->m_attInfo = (XML_AttrInfo*)MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
960 if (parser->m_attInfo == NULL) {
961 FREE(parser, parser->m_atts);
962 FREE(parser, parser);
963 return NULL;
964 }
965 #endif
966 parser->m_dataBuf = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
967 if (parser->m_dataBuf == NULL) {
968 FREE(parser, parser->m_atts);
969 #ifdef XML_ATTR_INFO
970 FREE(parser, parser->m_attInfo);
971 #endif
972 FREE(parser, parser);
973 return NULL;
974 }
975 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
976
977 if (dtd)
978 parser->m_dtd = dtd;
979 else {
980 parser->m_dtd = dtdCreate(&parser->m_mem);
981 if (parser->m_dtd == NULL) {
982 FREE(parser, parser->m_dataBuf);
983 FREE(parser, parser->m_atts);
984 #ifdef XML_ATTR_INFO
985 FREE(parser, parser->m_attInfo);
986 #endif
987 FREE(parser, parser);
988 return NULL;
989 }
990 }
991
992 parser->m_freeBindingList = NULL;
993 parser->m_freeTagList = NULL;
994 parser->m_freeInternalEntities = NULL;
995
996 parser->m_groupSize = 0;
997 parser->m_groupConnector = NULL;
998
999 parser->m_unknownEncodingHandler = NULL;
1000 parser->m_unknownEncodingHandlerData = NULL;
1001
1002 parser->m_namespaceSeparator = ASCII_EXCL;
1003 parser->m_ns = XML_FALSE;
1004 parser->m_ns_triplets = XML_FALSE;
1005
1006 parser->m_nsAtts = NULL;
1007 parser->m_nsAttsVersion = 0;
1008 parser->m_nsAttsPower = 0;
1009
1010 parser->m_protocolEncodingName = NULL;
1011
1012 poolInit(&parser->m_tempPool, &(parser->m_mem));
1013 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1014 parserInit(parser, encodingName);
1015
1016 if (encodingName && !parser->m_protocolEncodingName) {
1017 XML_ParserFree(parser);
1018 return NULL;
1019 }
1020
1021 if (nameSep) {
1022 parser->m_ns = XML_TRUE;
1023 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1024 parser->m_namespaceSeparator = *nameSep;
1025 }
1026 else {
1027 parser->m_internalEncoding = XmlGetInternalEncoding();
1028 }
1029
1030 return parser;
1031 }
1032
1033 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1034 parserInit(XML_Parser parser, const XML_Char *encodingName)
1035 {
1036 parser->m_processor = prologInitProcessor;
1037 XmlPrologStateInit(&parser->m_prologState);
1038 if (encodingName != NULL) {
1039 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1040 }
1041 parser->m_curBase = NULL;
1042 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1043 parser->m_userData = NULL;
1044 parser->m_handlerArg = NULL;
1045 parser->m_startElementHandler = NULL;
1046 parser->m_endElementHandler = NULL;
1047 parser->m_characterDataHandler = NULL;
1048 parser->m_processingInstructionHandler = NULL;
1049 parser->m_commentHandler = NULL;
1050 parser->m_startCdataSectionHandler = NULL;
1051 parser->m_endCdataSectionHandler = NULL;
1052 parser->m_defaultHandler = NULL;
1053 parser->m_startDoctypeDeclHandler = NULL;
1054 parser->m_endDoctypeDeclHandler = NULL;
1055 parser->m_unparsedEntityDeclHandler = NULL;
1056 parser->m_notationDeclHandler = NULL;
1057 parser->m_startNamespaceDeclHandler = NULL;
1058 parser->m_endNamespaceDeclHandler = NULL;
1059 parser->m_notStandaloneHandler = NULL;
1060 parser->m_externalEntityRefHandler = NULL;
1061 parser->m_externalEntityRefHandlerArg = parser;
1062 parser->m_skippedEntityHandler = NULL;
1063 parser->m_elementDeclHandler = NULL;
1064 parser->m_attlistDeclHandler = NULL;
1065 parser->m_entityDeclHandler = NULL;
1066 parser->m_xmlDeclHandler = NULL;
1067 parser->m_bufferPtr = parser->m_buffer;
1068 parser->m_bufferEnd = parser->m_buffer;
1069 parser->m_parseEndByteIndex = 0;
1070 parser->m_parseEndPtr = NULL;
1071 parser->m_declElementType = NULL;
1072 parser->m_declAttributeId = NULL;
1073 parser->m_declEntity = NULL;
1074 parser->m_doctypeName = NULL;
1075 parser->m_doctypeSysid = NULL;
1076 parser->m_doctypePubid = NULL;
1077 parser->m_declAttributeType = NULL;
1078 parser->m_declNotationName = NULL;
1079 parser->m_declNotationPublicId = NULL;
1080 parser->m_declAttributeIsCdata = XML_FALSE;
1081 parser->m_declAttributeIsId = XML_FALSE;
1082 memset(&parser->m_position, 0, sizeof(POSITION));
1083 parser->m_errorCode = XML_ERROR_NONE;
1084 parser->m_eventPtr = NULL;
1085 parser->m_eventEndPtr = NULL;
1086 parser->m_positionPtr = NULL;
1087 parser->m_openInternalEntities = NULL;
1088 parser->m_defaultExpandInternalEntities = XML_TRUE;
1089 parser->m_tagLevel = 0;
1090 parser->m_tagStack = NULL;
1091 parser->m_inheritedBindings = NULL;
1092 parser->m_nSpecifiedAtts = 0;
1093 parser->m_unknownEncodingMem = NULL;
1094 parser->m_unknownEncodingRelease = NULL;
1095 parser->m_unknownEncodingData = NULL;
1096 parser->m_parentParser = NULL;
1097 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1098 #ifdef XML_DTD
1099 parser->m_isParamEntity = XML_FALSE;
1100 parser->m_useForeignDTD = XML_FALSE;
1101 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1102 #endif
1103 parser->m_hash_secret_salt = 0;
1104 }
1105
1106 /* moves list of bindings to m_freeBindingList */
1107 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1108 moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1109 {
1110 while (bindings) {
1111 BINDING *b = bindings;
1112 bindings = bindings->nextTagBinding;
1113 b->nextTagBinding = parser->m_freeBindingList;
1114 parser->m_freeBindingList = b;
1115 }
1116 }
1117
1118 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1119 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1120 {
1121 TAG *tStk;
1122 OPEN_INTERNAL_ENTITY *openEntityList;
1123
1124 if (parser == NULL)
1125 return XML_FALSE;
1126
1127 if (parser->m_parentParser)
1128 return XML_FALSE;
1129 /* move m_tagStack to m_freeTagList */
1130 tStk = parser->m_tagStack;
1131 while (tStk) {
1132 TAG *tag = tStk;
1133 tStk = tStk->parent;
1134 tag->parent = parser->m_freeTagList;
1135 moveToFreeBindingList(parser, tag->bindings);
1136 tag->bindings = NULL;
1137 parser->m_freeTagList = tag;
1138 }
1139 /* move m_openInternalEntities to m_freeInternalEntities */
1140 openEntityList = parser->m_openInternalEntities;
1141 while (openEntityList) {
1142 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1143 openEntityList = openEntity->next;
1144 openEntity->next = parser->m_freeInternalEntities;
1145 parser->m_freeInternalEntities = openEntity;
1146 }
1147 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1148 FREE(parser, parser->m_unknownEncodingMem);
1149 if (parser->m_unknownEncodingRelease)
1150 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1151 poolClear(&parser->m_tempPool);
1152 poolClear(&parser->m_temp2Pool);
1153 FREE(parser, (void *)parser->m_protocolEncodingName);
1154 parser->m_protocolEncodingName = NULL;
1155 parserInit(parser, encodingName);
1156 dtdReset(parser->m_dtd, &parser->m_mem);
1157 return XML_TRUE;
1158 }
1159
1160 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1161 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1162 {
1163 if (parser == NULL)
1164 return XML_STATUS_ERROR;
1165 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1166 XXX There's no way for the caller to determine which of the
1167 XXX possible error cases caused the XML_STATUS_ERROR return.
1168 */
1169 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1170 return XML_STATUS_ERROR;
1171
1172 /* Get rid of any previous encoding name */
1173 FREE(parser, (void *)parser->m_protocolEncodingName);
1174
1175 if (encodingName == NULL)
1176 /* No new encoding name */
1177 parser->m_protocolEncodingName = NULL;
1178 else {
1179 /* Copy the new encoding name into allocated memory */
1180 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1181 if (!parser->m_protocolEncodingName)
1182 return XML_STATUS_ERROR;
1183 }
1184 return XML_STATUS_OK;
1185 }
1186
1187 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1188 XML_ExternalEntityParserCreate(XML_Parser oldParser,
1189 const XML_Char *context,
1190 const XML_Char *encodingName)
1191 {
1192 XML_Parser parser = oldParser;
1193 DTD *newDtd = NULL;
1194 DTD *oldDtd;
1195 XML_StartElementHandler oldStartElementHandler;
1196 XML_EndElementHandler oldEndElementHandler;
1197 XML_CharacterDataHandler oldCharacterDataHandler;
1198 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1199 XML_CommentHandler oldCommentHandler;
1200 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1201 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1202 XML_DefaultHandler oldDefaultHandler;
1203 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1204 XML_NotationDeclHandler oldNotationDeclHandler;
1205 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1206 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1207 XML_NotStandaloneHandler oldNotStandaloneHandler;
1208 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1209 XML_SkippedEntityHandler oldSkippedEntityHandler;
1210 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1211 XML_ElementDeclHandler oldElementDeclHandler;
1212 XML_AttlistDeclHandler oldAttlistDeclHandler;
1213 XML_EntityDeclHandler oldEntityDeclHandler;
1214 XML_XmlDeclHandler oldXmlDeclHandler;
1215 ELEMENT_TYPE * oldDeclElementType;
1216
1217 void *oldUserData;
1218 void *oldHandlerArg;
1219 XML_Bool oldDefaultExpandInternalEntities;
1220 XML_Parser oldExternalEntityRefHandlerArg;
1221 #ifdef XML_DTD
1222 enum XML_ParamEntityParsing oldParamEntityParsing;
1223 int oldInEntityValue;
1224 #endif
1225 XML_Bool oldns_triplets;
1226 /* Note that the new parser shares the same hash secret as the old
1227 parser, so that dtdCopy and copyEntityTable can lookup values
1228 from hash tables associated with either parser without us having
1229 to worry which hash secrets each table has.
1230 */
1231 unsigned long oldhash_secret_salt;
1232
1233 /* Validate the oldParser parameter before we pull everything out of it */
1234 if (oldParser == NULL)
1235 return NULL;
1236
1237 /* Stash the original parser contents on the stack */
1238 oldDtd = parser->m_dtd;
1239 oldStartElementHandler = parser->m_startElementHandler;
1240 oldEndElementHandler = parser->m_endElementHandler;
1241 oldCharacterDataHandler = parser->m_characterDataHandler;
1242 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1243 oldCommentHandler = parser->m_commentHandler;
1244 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1245 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1246 oldDefaultHandler = parser->m_defaultHandler;
1247 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1248 oldNotationDeclHandler = parser->m_notationDeclHandler;
1249 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1250 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1251 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1252 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1253 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1254 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1255 oldElementDeclHandler = parser->m_elementDeclHandler;
1256 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1257 oldEntityDeclHandler = parser->m_entityDeclHandler;
1258 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1259 oldDeclElementType = parser->m_declElementType;
1260
1261 oldUserData = parser->m_userData;
1262 oldHandlerArg = parser->m_handlerArg;
1263 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1264 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1265 #ifdef XML_DTD
1266 oldParamEntityParsing = parser->m_paramEntityParsing;
1267 oldInEntityValue = parser->m_prologState.inEntityValue;
1268 #endif
1269 oldns_triplets = parser->m_ns_triplets;
1270 /* Note that the new parser shares the same hash secret as the old
1271 parser, so that dtdCopy and copyEntityTable can lookup values
1272 from hash tables associated with either parser without us having
1273 to worry which hash secrets each table has.
1274 */
1275 oldhash_secret_salt = parser->m_hash_secret_salt;
1276
1277 #ifdef XML_DTD
1278 if (!context)
1279 newDtd = oldDtd;
1280 #endif /* XML_DTD */
1281
1282 /* Note that the magical uses of the pre-processor to make field
1283 access look more like C++ require that `parser' be overwritten
1284 here. This makes this function more painful to follow than it
1285 would be otherwise.
1286 */
1287 if (parser->m_ns) {
1288 XML_Char tmp[2];
1289 *tmp = parser->m_namespaceSeparator;
1290 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1291 }
1292 else {
1293 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1294 }
1295
1296 if (!parser)
1297 return NULL;
1298
1299 parser->m_startElementHandler = oldStartElementHandler;
1300 parser->m_endElementHandler = oldEndElementHandler;
1301 parser->m_characterDataHandler = oldCharacterDataHandler;
1302 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1303 parser->m_commentHandler = oldCommentHandler;
1304 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1305 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1306 parser->m_defaultHandler = oldDefaultHandler;
1307 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1308 parser->m_notationDeclHandler = oldNotationDeclHandler;
1309 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1310 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1311 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1312 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1313 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1314 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1315 parser->m_elementDeclHandler = oldElementDeclHandler;
1316 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1317 parser->m_entityDeclHandler = oldEntityDeclHandler;
1318 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1319 parser->m_declElementType = oldDeclElementType;
1320 parser->m_userData = oldUserData;
1321 if (oldUserData == oldHandlerArg)
1322 parser->m_handlerArg = parser->m_userData;
1323 else
1324 parser->m_handlerArg = parser;
1325 if (oldExternalEntityRefHandlerArg != oldParser)
1326 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1327 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1328 parser->m_ns_triplets = oldns_triplets;
1329 parser->m_hash_secret_salt = oldhash_secret_salt;
1330 parser->m_parentParser = oldParser;
1331 #ifdef XML_DTD
1332 parser->m_paramEntityParsing = oldParamEntityParsing;
1333 parser->m_prologState.inEntityValue = oldInEntityValue;
1334 if (context) {
1335 #endif /* XML_DTD */
1336 if (!dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1337 || !setContext(parser, context)) {
1338 XML_ParserFree(parser);
1339 return NULL;
1340 }
1341 parser->m_processor = externalEntityInitProcessor;
1342 #ifdef XML_DTD
1343 }
1344 else {
1345 /* The DTD instance referenced by parser->m_dtd is shared between the document's
1346 root parser and external PE parsers, therefore one does not need to
1347 call setContext. In addition, one also *must* not call setContext,
1348 because this would overwrite existing prefix->binding pointers in
1349 parser->m_dtd with ones that get destroyed with the external PE parser.
1350 This would leave those prefixes with dangling pointers.
1351 */
1352 parser->m_isParamEntity = XML_TRUE;
1353 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1354 parser->m_processor = externalParEntInitProcessor;
1355 }
1356 #endif /* XML_DTD */
1357 return parser;
1358 }
1359
1360 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1361 destroyBindings(BINDING *bindings, XML_Parser parser)
1362 {
1363 for (;;) {
1364 BINDING *b = bindings;
1365 if (!b)
1366 break;
1367 bindings = b->nextTagBinding;
1368 FREE(parser, b->uri);
1369 FREE(parser, b);
1370 }
1371 }
1372
1373 void XMLCALL
XML_ParserFree(XML_Parser parser)1374 XML_ParserFree(XML_Parser parser)
1375 {
1376 TAG *tagList;
1377 OPEN_INTERNAL_ENTITY *entityList;
1378 if (parser == NULL)
1379 return;
1380 /* free m_tagStack and m_freeTagList */
1381 tagList = parser->m_tagStack;
1382 for (;;) {
1383 TAG *p;
1384 if (tagList == NULL) {
1385 if (parser->m_freeTagList == NULL)
1386 break;
1387 tagList = parser->m_freeTagList;
1388 parser->m_freeTagList = NULL;
1389 }
1390 p = tagList;
1391 tagList = tagList->parent;
1392 FREE(parser, p->buf);
1393 destroyBindings(p->bindings, parser);
1394 FREE(parser, p);
1395 }
1396 /* free m_openInternalEntities and m_freeInternalEntities */
1397 entityList = parser->m_openInternalEntities;
1398 for (;;) {
1399 OPEN_INTERNAL_ENTITY *openEntity;
1400 if (entityList == NULL) {
1401 if (parser->m_freeInternalEntities == NULL)
1402 break;
1403 entityList = parser->m_freeInternalEntities;
1404 parser->m_freeInternalEntities = NULL;
1405 }
1406 openEntity = entityList;
1407 entityList = entityList->next;
1408 FREE(parser, openEntity);
1409 }
1410
1411 destroyBindings(parser->m_freeBindingList, parser);
1412 destroyBindings(parser->m_inheritedBindings, parser);
1413 poolDestroy(&parser->m_tempPool);
1414 poolDestroy(&parser->m_temp2Pool);
1415 FREE(parser, (void *)parser->m_protocolEncodingName);
1416 #ifdef XML_DTD
1417 /* external parameter entity parsers share the DTD structure
1418 parser->m_dtd with the root parser, so we must not destroy it
1419 */
1420 if (!parser->m_isParamEntity && parser->m_dtd)
1421 #else
1422 if (parser->m_dtd)
1423 #endif /* XML_DTD */
1424 dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem);
1425 FREE(parser, (void *)parser->m_atts);
1426 #ifdef XML_ATTR_INFO
1427 FREE(parser, (void *)parser->m_attInfo);
1428 #endif
1429 FREE(parser, parser->m_groupConnector);
1430 FREE(parser, parser->m_buffer);
1431 FREE(parser, parser->m_dataBuf);
1432 FREE(parser, parser->m_nsAtts);
1433 FREE(parser, parser->m_unknownEncodingMem);
1434 if (parser->m_unknownEncodingRelease)
1435 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1436 FREE(parser, parser);
1437 }
1438
1439 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1440 XML_UseParserAsHandlerArg(XML_Parser parser)
1441 {
1442 if (parser != NULL)
1443 parser->m_handlerArg = parser;
1444 }
1445
1446 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1447 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1448 {
1449 if (parser == NULL)
1450 return XML_ERROR_INVALID_ARGUMENT;
1451 #ifdef XML_DTD
1452 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1453 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1454 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1455 parser->m_useForeignDTD = useDTD;
1456 return XML_ERROR_NONE;
1457 #else
1458 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1459 #endif
1460 }
1461
1462 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1463 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1464 {
1465 if (parser == NULL)
1466 return;
1467 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1468 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1469 return;
1470 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1471 }
1472
1473 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1474 XML_SetUserData(XML_Parser parser, void *p)
1475 {
1476 if (parser == NULL)
1477 return;
1478 if (parser->m_handlerArg == parser->m_userData)
1479 parser->m_handlerArg = parser->m_userData = p;
1480 else
1481 parser->m_userData = p;
1482 }
1483
1484 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1485 XML_SetBase(XML_Parser parser, const XML_Char *p)
1486 {
1487 if (parser == NULL)
1488 return XML_STATUS_ERROR;
1489 if (p) {
1490 p = poolCopyString(&parser->m_dtd->pool, p);
1491 if (!p)
1492 return XML_STATUS_ERROR;
1493 parser->m_curBase = p;
1494 }
1495 else
1496 parser->m_curBase = NULL;
1497 return XML_STATUS_OK;
1498 }
1499
1500 const XML_Char * XMLCALL
XML_GetBase(XML_Parser parser)1501 XML_GetBase(XML_Parser parser)
1502 {
1503 if (parser == NULL)
1504 return NULL;
1505 return parser->m_curBase;
1506 }
1507
1508 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1509 XML_GetSpecifiedAttributeCount(XML_Parser parser)
1510 {
1511 if (parser == NULL)
1512 return -1;
1513 return parser->m_nSpecifiedAtts;
1514 }
1515
1516 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1517 XML_GetIdAttributeIndex(XML_Parser parser)
1518 {
1519 if (parser == NULL)
1520 return -1;
1521 return parser->m_idAttIndex;
1522 }
1523
1524 #ifdef XML_ATTR_INFO
1525 const XML_AttrInfo * XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1526 XML_GetAttributeInfo(XML_Parser parser)
1527 {
1528 if (parser == NULL)
1529 return NULL;
1530 return parser->m_attInfo;
1531 }
1532 #endif
1533
1534 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1535 XML_SetElementHandler(XML_Parser parser,
1536 XML_StartElementHandler start,
1537 XML_EndElementHandler end)
1538 {
1539 if (parser == NULL)
1540 return;
1541 parser->m_startElementHandler = start;
1542 parser->m_endElementHandler = end;
1543 }
1544
1545 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1546 XML_SetStartElementHandler(XML_Parser parser,
1547 XML_StartElementHandler start) {
1548 if (parser != NULL)
1549 parser->m_startElementHandler = start;
1550 }
1551
1552 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1553 XML_SetEndElementHandler(XML_Parser parser,
1554 XML_EndElementHandler end) {
1555 if (parser != NULL)
1556 parser->m_endElementHandler = end;
1557 }
1558
1559 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1560 XML_SetCharacterDataHandler(XML_Parser parser,
1561 XML_CharacterDataHandler handler)
1562 {
1563 if (parser != NULL)
1564 parser->m_characterDataHandler = handler;
1565 }
1566
1567 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1568 XML_SetProcessingInstructionHandler(XML_Parser parser,
1569 XML_ProcessingInstructionHandler handler)
1570 {
1571 if (parser != NULL)
1572 parser->m_processingInstructionHandler = handler;
1573 }
1574
1575 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1576 XML_SetCommentHandler(XML_Parser parser,
1577 XML_CommentHandler handler)
1578 {
1579 if (parser != NULL)
1580 parser->m_commentHandler = handler;
1581 }
1582
1583 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1584 XML_SetCdataSectionHandler(XML_Parser parser,
1585 XML_StartCdataSectionHandler start,
1586 XML_EndCdataSectionHandler end)
1587 {
1588 if (parser == NULL)
1589 return;
1590 parser->m_startCdataSectionHandler = start;
1591 parser->m_endCdataSectionHandler = end;
1592 }
1593
1594 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1595 XML_SetStartCdataSectionHandler(XML_Parser parser,
1596 XML_StartCdataSectionHandler start) {
1597 if (parser != NULL)
1598 parser->m_startCdataSectionHandler = start;
1599 }
1600
1601 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1602 XML_SetEndCdataSectionHandler(XML_Parser parser,
1603 XML_EndCdataSectionHandler end) {
1604 if (parser != NULL)
1605 parser->m_endCdataSectionHandler = end;
1606 }
1607
1608 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1609 XML_SetDefaultHandler(XML_Parser parser,
1610 XML_DefaultHandler handler)
1611 {
1612 if (parser == NULL)
1613 return;
1614 parser->m_defaultHandler = handler;
1615 parser->m_defaultExpandInternalEntities = XML_FALSE;
1616 }
1617
1618 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1619 XML_SetDefaultHandlerExpand(XML_Parser parser,
1620 XML_DefaultHandler handler)
1621 {
1622 if (parser == NULL)
1623 return;
1624 parser->m_defaultHandler = handler;
1625 parser->m_defaultExpandInternalEntities = XML_TRUE;
1626 }
1627
1628 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1629 XML_SetDoctypeDeclHandler(XML_Parser parser,
1630 XML_StartDoctypeDeclHandler start,
1631 XML_EndDoctypeDeclHandler end)
1632 {
1633 if (parser == NULL)
1634 return;
1635 parser->m_startDoctypeDeclHandler = start;
1636 parser->m_endDoctypeDeclHandler = end;
1637 }
1638
1639 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1640 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1641 XML_StartDoctypeDeclHandler start) {
1642 if (parser != NULL)
1643 parser->m_startDoctypeDeclHandler = start;
1644 }
1645
1646 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1647 XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1648 XML_EndDoctypeDeclHandler end) {
1649 if (parser != NULL)
1650 parser->m_endDoctypeDeclHandler = end;
1651 }
1652
1653 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1654 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1655 XML_UnparsedEntityDeclHandler handler)
1656 {
1657 if (parser != NULL)
1658 parser->m_unparsedEntityDeclHandler = handler;
1659 }
1660
1661 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1662 XML_SetNotationDeclHandler(XML_Parser parser,
1663 XML_NotationDeclHandler handler)
1664 {
1665 if (parser != NULL)
1666 parser->m_notationDeclHandler = handler;
1667 }
1668
1669 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1670 XML_SetNamespaceDeclHandler(XML_Parser parser,
1671 XML_StartNamespaceDeclHandler start,
1672 XML_EndNamespaceDeclHandler end)
1673 {
1674 if (parser == NULL)
1675 return;
1676 parser->m_startNamespaceDeclHandler = start;
1677 parser->m_endNamespaceDeclHandler = end;
1678 }
1679
1680 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1681 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1682 XML_StartNamespaceDeclHandler start) {
1683 if (parser != NULL)
1684 parser->m_startNamespaceDeclHandler = start;
1685 }
1686
1687 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1688 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1689 XML_EndNamespaceDeclHandler end) {
1690 if (parser != NULL)
1691 parser->m_endNamespaceDeclHandler = end;
1692 }
1693
1694 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1695 XML_SetNotStandaloneHandler(XML_Parser parser,
1696 XML_NotStandaloneHandler handler)
1697 {
1698 if (parser != NULL)
1699 parser->m_notStandaloneHandler = handler;
1700 }
1701
1702 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1703 XML_SetExternalEntityRefHandler(XML_Parser parser,
1704 XML_ExternalEntityRefHandler handler)
1705 {
1706 if (parser != NULL)
1707 parser->m_externalEntityRefHandler = handler;
1708 }
1709
1710 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1711 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
1712 {
1713 if (parser == NULL)
1714 return;
1715 if (arg)
1716 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1717 else
1718 parser->m_externalEntityRefHandlerArg = parser;
1719 }
1720
1721 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1722 XML_SetSkippedEntityHandler(XML_Parser parser,
1723 XML_SkippedEntityHandler handler)
1724 {
1725 if (parser != NULL)
1726 parser->m_skippedEntityHandler = handler;
1727 }
1728
1729 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1730 XML_SetUnknownEncodingHandler(XML_Parser parser,
1731 XML_UnknownEncodingHandler handler,
1732 void *data)
1733 {
1734 if (parser == NULL)
1735 return;
1736 parser->m_unknownEncodingHandler = handler;
1737 parser->m_unknownEncodingHandlerData = data;
1738 }
1739
1740 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1741 XML_SetElementDeclHandler(XML_Parser parser,
1742 XML_ElementDeclHandler eldecl)
1743 {
1744 if (parser != NULL)
1745 parser->m_elementDeclHandler = eldecl;
1746 }
1747
1748 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1749 XML_SetAttlistDeclHandler(XML_Parser parser,
1750 XML_AttlistDeclHandler attdecl)
1751 {
1752 if (parser != NULL)
1753 parser->m_attlistDeclHandler = attdecl;
1754 }
1755
1756 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1757 XML_SetEntityDeclHandler(XML_Parser parser,
1758 XML_EntityDeclHandler handler)
1759 {
1760 if (parser != NULL)
1761 parser->m_entityDeclHandler = handler;
1762 }
1763
1764 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1765 XML_SetXmlDeclHandler(XML_Parser parser,
1766 XML_XmlDeclHandler handler) {
1767 if (parser != NULL)
1768 parser->m_xmlDeclHandler = handler;
1769 }
1770
1771 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1772 XML_SetParamEntityParsing(XML_Parser parser,
1773 enum XML_ParamEntityParsing peParsing)
1774 {
1775 if (parser == NULL)
1776 return 0;
1777 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1778 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1779 return 0;
1780 #ifdef XML_DTD
1781 parser->m_paramEntityParsing = peParsing;
1782 return 1;
1783 #else
1784 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1785 #endif
1786 }
1787
1788 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1789 XML_SetHashSalt(XML_Parser parser,
1790 unsigned long hash_salt)
1791 {
1792 if (parser == NULL)
1793 return 0;
1794 if (parser->m_parentParser)
1795 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1796 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1797 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1798 return 0;
1799 parser->m_hash_secret_salt = hash_salt;
1800 return 1;
1801 }
1802
1803 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1804 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
1805 {
1806 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1807 if (parser != NULL)
1808 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1809 return XML_STATUS_ERROR;
1810 }
1811 switch (parser->m_parsingStatus.parsing) {
1812 case XML_SUSPENDED:
1813 parser->m_errorCode = XML_ERROR_SUSPENDED;
1814 return XML_STATUS_ERROR;
1815 case XML_FINISHED:
1816 parser->m_errorCode = XML_ERROR_FINISHED;
1817 return XML_STATUS_ERROR;
1818 case XML_INITIALIZED:
1819 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1820 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1821 return XML_STATUS_ERROR;
1822 }
1823 default:
1824 parser->m_parsingStatus.parsing = XML_PARSING;
1825 }
1826
1827 if (len == 0) {
1828 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1829 if (!isFinal)
1830 return XML_STATUS_OK;
1831 parser->m_positionPtr = parser->m_bufferPtr;
1832 parser->m_parseEndPtr = parser->m_bufferEnd;
1833
1834 /* If data are left over from last buffer, and we now know that these
1835 data are the final chunk of input, then we have to check them again
1836 to detect errors based on that fact.
1837 */
1838 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
1839
1840 if (parser->m_errorCode == XML_ERROR_NONE) {
1841 switch (parser->m_parsingStatus.parsing) {
1842 case XML_SUSPENDED:
1843 /* It is hard to be certain, but it seems that this case
1844 * cannot occur. This code is cleaning up a previous parse
1845 * with no new data (since len == 0). Changing the parsing
1846 * state requires getting to execute a handler function, and
1847 * there doesn't seem to be an opportunity for that while in
1848 * this circumstance.
1849 *
1850 * Given the uncertainty, we retain the code but exclude it
1851 * from coverage tests.
1852 *
1853 * LCOV_EXCL_START
1854 */
1855 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
1856 parser->m_positionPtr = parser->m_bufferPtr;
1857 return XML_STATUS_SUSPENDED;
1858 /* LCOV_EXCL_STOP */
1859 case XML_INITIALIZED:
1860 case XML_PARSING:
1861 parser->m_parsingStatus.parsing = XML_FINISHED;
1862 /* fall through */
1863 default:
1864 return XML_STATUS_OK;
1865 }
1866 }
1867 parser->m_eventEndPtr = parser->m_eventPtr;
1868 parser->m_processor = errorProcessor;
1869 return XML_STATUS_ERROR;
1870 }
1871 #ifndef XML_CONTEXT_BYTES
1872 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1873 const char *end;
1874 int nLeftOver;
1875 enum XML_Status result;
1876 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1877 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1878 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1879 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1880 parser->m_processor = errorProcessor;
1881 return XML_STATUS_ERROR;
1882 }
1883 parser->m_parseEndByteIndex += len;
1884 parser->m_positionPtr = s;
1885 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1886
1887 parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1888
1889 if (parser->m_errorCode != XML_ERROR_NONE) {
1890 parser->m_eventEndPtr = parser->m_eventPtr;
1891 parser->m_processor = errorProcessor;
1892 return XML_STATUS_ERROR;
1893 }
1894 else {
1895 switch (parser->m_parsingStatus.parsing) {
1896 case XML_SUSPENDED:
1897 result = XML_STATUS_SUSPENDED;
1898 break;
1899 case XML_INITIALIZED:
1900 case XML_PARSING:
1901 if (isFinal) {
1902 parser->m_parsingStatus.parsing = XML_FINISHED;
1903 return XML_STATUS_OK;
1904 }
1905 /* fall through */
1906 default:
1907 result = XML_STATUS_OK;
1908 }
1909 }
1910
1911 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position);
1912 nLeftOver = s + len - end;
1913 if (nLeftOver) {
1914 if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1915 /* avoid _signed_ integer overflow */
1916 char *temp = NULL;
1917 const int bytesToAllocate = (int)((unsigned)len * 2U);
1918 if (bytesToAllocate > 0) {
1919 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1920 }
1921 if (temp == NULL) {
1922 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1923 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1924 parser->m_processor = errorProcessor;
1925 return XML_STATUS_ERROR;
1926 }
1927 parser->m_buffer = temp;
1928 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1929 }
1930 memcpy(parser->m_buffer, end, nLeftOver);
1931 }
1932 parser->m_bufferPtr = parser->m_buffer;
1933 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1934 parser->m_positionPtr = parser->m_bufferPtr;
1935 parser->m_parseEndPtr = parser->m_bufferEnd;
1936 parser->m_eventPtr = parser->m_bufferPtr;
1937 parser->m_eventEndPtr = parser->m_bufferPtr;
1938 return result;
1939 }
1940 #endif /* not defined XML_CONTEXT_BYTES */
1941 else {
1942 void *buff = XML_GetBuffer(parser, len);
1943 if (buff == NULL)
1944 return XML_STATUS_ERROR;
1945 else {
1946 memcpy(buff, s, len);
1947 return XML_ParseBuffer(parser, len, isFinal);
1948 }
1949 }
1950 }
1951
1952 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)1953 XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
1954 {
1955 const char *start;
1956 enum XML_Status result = XML_STATUS_OK;
1957
1958 if (parser == NULL)
1959 return XML_STATUS_ERROR;
1960 switch (parser->m_parsingStatus.parsing) {
1961 case XML_SUSPENDED:
1962 parser->m_errorCode = XML_ERROR_SUSPENDED;
1963 return XML_STATUS_ERROR;
1964 case XML_FINISHED:
1965 parser->m_errorCode = XML_ERROR_FINISHED;
1966 return XML_STATUS_ERROR;
1967 case XML_INITIALIZED:
1968 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1969 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1970 return XML_STATUS_ERROR;
1971 }
1972 default:
1973 parser->m_parsingStatus.parsing = XML_PARSING;
1974 }
1975
1976 start = parser->m_bufferPtr;
1977 parser->m_positionPtr = start;
1978 parser->m_bufferEnd += len;
1979 parser->m_parseEndPtr = parser->m_bufferEnd;
1980 parser->m_parseEndByteIndex += len;
1981 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1982
1983 parser->m_errorCode = parser->m_processor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
1984
1985 if (parser->m_errorCode != XML_ERROR_NONE) {
1986 parser->m_eventEndPtr = parser->m_eventPtr;
1987 parser->m_processor = errorProcessor;
1988 return XML_STATUS_ERROR;
1989 }
1990 else {
1991 switch (parser->m_parsingStatus.parsing) {
1992 case XML_SUSPENDED:
1993 result = XML_STATUS_SUSPENDED;
1994 break;
1995 case XML_INITIALIZED:
1996 case XML_PARSING:
1997 if (isFinal) {
1998 parser->m_parsingStatus.parsing = XML_FINISHED;
1999 return result;
2000 }
2001 default: ; /* should not happen */
2002 }
2003 }
2004
2005 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2006 parser->m_positionPtr = parser->m_bufferPtr;
2007 return result;
2008 }
2009
2010 void * XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2011 XML_GetBuffer(XML_Parser parser, int len)
2012 {
2013 if (parser == NULL)
2014 return NULL;
2015 if (len < 0) {
2016 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2017 return NULL;
2018 }
2019 switch (parser->m_parsingStatus.parsing) {
2020 case XML_SUSPENDED:
2021 parser->m_errorCode = XML_ERROR_SUSPENDED;
2022 return NULL;
2023 case XML_FINISHED:
2024 parser->m_errorCode = XML_ERROR_FINISHED;
2025 return NULL;
2026 default: ;
2027 }
2028
2029 if (len > parser->m_bufferLim - parser->m_bufferEnd) {
2030 #ifdef XML_CONTEXT_BYTES
2031 int keep;
2032 #endif /* defined XML_CONTEXT_BYTES */
2033 /* Do not invoke signed arithmetic overflow: */
2034 int neededSize = (int) ((unsigned)len + (unsigned)(parser->m_bufferEnd - parser->m_bufferPtr));
2035 if (neededSize < 0) {
2036 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2037 return NULL;
2038 }
2039 #ifdef XML_CONTEXT_BYTES
2040 keep = (int)(parser->m_bufferPtr - parser->m_buffer);
2041 if (keep > XML_CONTEXT_BYTES)
2042 keep = XML_CONTEXT_BYTES;
2043 neededSize += keep;
2044 #endif /* defined XML_CONTEXT_BYTES */
2045 if (neededSize <= parser->m_bufferLim - parser->m_buffer) {
2046 #ifdef XML_CONTEXT_BYTES
2047 if (keep < parser->m_bufferPtr - parser->m_buffer) {
2048 int offset = (int)(parser->m_bufferPtr - parser->m_buffer) - keep;
2049 memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2050 parser->m_bufferEnd -= offset;
2051 parser->m_bufferPtr -= offset;
2052 }
2053 #else
2054 memmove(parser->m_buffer, parser->m_bufferPtr, parser->m_bufferEnd - parser->m_bufferPtr);
2055 parser->m_bufferEnd = parser->m_buffer + (parser->m_bufferEnd - parser->m_bufferPtr);
2056 parser->m_bufferPtr = parser->m_buffer;
2057 #endif /* not defined XML_CONTEXT_BYTES */
2058 }
2059 else {
2060 char *newBuf;
2061 int bufferSize = (int)(parser->m_bufferLim - parser->m_bufferPtr);
2062 if (bufferSize == 0)
2063 bufferSize = INIT_BUFFER_SIZE;
2064 do {
2065 /* Do not invoke signed arithmetic overflow: */
2066 bufferSize = (int) (2U * (unsigned) bufferSize);
2067 } while (bufferSize < neededSize && bufferSize > 0);
2068 if (bufferSize <= 0) {
2069 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2070 return NULL;
2071 }
2072 newBuf = (char *)MALLOC(parser, bufferSize);
2073 if (newBuf == 0) {
2074 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2075 return NULL;
2076 }
2077 parser->m_bufferLim = newBuf + bufferSize;
2078 #ifdef XML_CONTEXT_BYTES
2079 if (parser->m_bufferPtr) {
2080 int keep = (int)(parser->m_bufferPtr - parser->m_buffer);
2081 if (keep > XML_CONTEXT_BYTES)
2082 keep = XML_CONTEXT_BYTES;
2083 memcpy(newBuf, &parser->m_bufferPtr[-keep], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2084 FREE(parser, parser->m_buffer);
2085 parser->m_buffer = newBuf;
2086 parser->m_bufferEnd = parser->m_buffer + (parser->m_bufferEnd - parser->m_bufferPtr) + keep;
2087 parser->m_bufferPtr = parser->m_buffer + keep;
2088 }
2089 else {
2090 parser->m_bufferEnd = newBuf + (parser->m_bufferEnd - parser->m_bufferPtr);
2091 parser->m_bufferPtr = parser->m_buffer = newBuf;
2092 }
2093 #else
2094 if (parser->m_bufferPtr) {
2095 memcpy(newBuf, parser->m_bufferPtr, parser->m_bufferEnd - parser->m_bufferPtr);
2096 FREE(parser, parser->m_buffer);
2097 }
2098 parser->m_bufferEnd = newBuf + (parser->m_bufferEnd - parser->m_bufferPtr);
2099 parser->m_bufferPtr = parser->m_buffer = newBuf;
2100 #endif /* not defined XML_CONTEXT_BYTES */
2101 }
2102 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2103 parser->m_positionPtr = NULL;
2104 }
2105 return parser->m_bufferEnd;
2106 }
2107
2108 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2109 XML_StopParser(XML_Parser parser, XML_Bool resumable)
2110 {
2111 if (parser == NULL)
2112 return XML_STATUS_ERROR;
2113 switch (parser->m_parsingStatus.parsing) {
2114 case XML_SUSPENDED:
2115 if (resumable) {
2116 parser->m_errorCode = XML_ERROR_SUSPENDED;
2117 return XML_STATUS_ERROR;
2118 }
2119 parser->m_parsingStatus.parsing = XML_FINISHED;
2120 break;
2121 case XML_FINISHED:
2122 parser->m_errorCode = XML_ERROR_FINISHED;
2123 return XML_STATUS_ERROR;
2124 default:
2125 if (resumable) {
2126 #ifdef XML_DTD
2127 if (parser->m_isParamEntity) {
2128 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2129 return XML_STATUS_ERROR;
2130 }
2131 #endif
2132 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2133 }
2134 else
2135 parser->m_parsingStatus.parsing = XML_FINISHED;
2136 }
2137 return XML_STATUS_OK;
2138 }
2139
2140 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2141 XML_ResumeParser(XML_Parser parser)
2142 {
2143 enum XML_Status result = XML_STATUS_OK;
2144
2145 if (parser == NULL)
2146 return XML_STATUS_ERROR;
2147 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2148 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2149 return XML_STATUS_ERROR;
2150 }
2151 parser->m_parsingStatus.parsing = XML_PARSING;
2152
2153 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2154
2155 if (parser->m_errorCode != XML_ERROR_NONE) {
2156 parser->m_eventEndPtr = parser->m_eventPtr;
2157 parser->m_processor = errorProcessor;
2158 return XML_STATUS_ERROR;
2159 }
2160 else {
2161 switch (parser->m_parsingStatus.parsing) {
2162 case XML_SUSPENDED:
2163 result = XML_STATUS_SUSPENDED;
2164 break;
2165 case XML_INITIALIZED:
2166 case XML_PARSING:
2167 if (parser->m_parsingStatus.finalBuffer) {
2168 parser->m_parsingStatus.parsing = XML_FINISHED;
2169 return result;
2170 }
2171 default: ;
2172 }
2173 }
2174
2175 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2176 parser->m_positionPtr = parser->m_bufferPtr;
2177 return result;
2178 }
2179
2180 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2181 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2182 {
2183 if (parser == NULL)
2184 return;
2185 assert(status != NULL);
2186 *status = parser->m_parsingStatus;
2187 }
2188
2189 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2190 XML_GetErrorCode(XML_Parser parser)
2191 {
2192 if (parser == NULL)
2193 return XML_ERROR_INVALID_ARGUMENT;
2194 return parser->m_errorCode;
2195 }
2196
2197 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2198 XML_GetCurrentByteIndex(XML_Parser parser)
2199 {
2200 if (parser == NULL)
2201 return -1;
2202 if (parser->m_eventPtr)
2203 return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr));
2204 return -1;
2205 }
2206
2207 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2208 XML_GetCurrentByteCount(XML_Parser parser)
2209 {
2210 if (parser == NULL)
2211 return 0;
2212 if (parser->m_eventEndPtr && parser->m_eventPtr)
2213 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2214 return 0;
2215 }
2216
2217 const char * XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2218 XML_GetInputContext(XML_Parser parser, int *offset, int *size)
2219 {
2220 #ifdef XML_CONTEXT_BYTES
2221 if (parser == NULL)
2222 return NULL;
2223 if (parser->m_eventPtr && parser->m_buffer) {
2224 if (offset != NULL)
2225 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2226 if (size != NULL)
2227 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2228 return parser->m_buffer;
2229 }
2230 #else
2231 (void)parser;
2232 (void)offset;
2233 (void)size;
2234 #endif /* defined XML_CONTEXT_BYTES */
2235 return (char *) 0;
2236 }
2237
2238 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2239 XML_GetCurrentLineNumber(XML_Parser parser)
2240 {
2241 if (parser == NULL)
2242 return 0;
2243 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2244 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2245 parser->m_positionPtr = parser->m_eventPtr;
2246 }
2247 return parser->m_position.lineNumber + 1;
2248 }
2249
2250 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2251 XML_GetCurrentColumnNumber(XML_Parser parser)
2252 {
2253 if (parser == NULL)
2254 return 0;
2255 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2256 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2257 parser->m_positionPtr = parser->m_eventPtr;
2258 }
2259 return parser->m_position.columnNumber;
2260 }
2261
2262 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2263 XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2264 {
2265 if (parser != NULL)
2266 FREE(parser, model);
2267 }
2268
2269 void * XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2270 XML_MemMalloc(XML_Parser parser, size_t size)
2271 {
2272 if (parser == NULL)
2273 return NULL;
2274 return MALLOC(parser, size);
2275 }
2276
2277 void * XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2278 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2279 {
2280 if (parser == NULL)
2281 return NULL;
2282 return REALLOC(parser, ptr, size);
2283 }
2284
2285 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2286 XML_MemFree(XML_Parser parser, void *ptr)
2287 {
2288 if (parser != NULL)
2289 FREE(parser, ptr);
2290 }
2291
2292 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2293 XML_DefaultCurrent(XML_Parser parser)
2294 {
2295 if (parser == NULL)
2296 return;
2297 if (parser->m_defaultHandler) {
2298 if (parser->m_openInternalEntities)
2299 reportDefault(parser,
2300 parser->m_internalEncoding,
2301 parser->m_openInternalEntities->internalEventPtr,
2302 parser->m_openInternalEntities->internalEventEndPtr);
2303 else
2304 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr);
2305 }
2306 }
2307
2308 const XML_LChar * XMLCALL
XML_ErrorString(enum XML_Error code)2309 XML_ErrorString(enum XML_Error code)
2310 {
2311 switch (code) {
2312 case XML_ERROR_NONE:
2313 return NULL;
2314 case XML_ERROR_NO_MEMORY:
2315 return XML_L("out of memory");
2316 case XML_ERROR_SYNTAX:
2317 return XML_L("syntax error");
2318 case XML_ERROR_NO_ELEMENTS:
2319 return XML_L("no element found");
2320 case XML_ERROR_INVALID_TOKEN:
2321 return XML_L("not well-formed (invalid token)");
2322 case XML_ERROR_UNCLOSED_TOKEN:
2323 return XML_L("unclosed token");
2324 case XML_ERROR_PARTIAL_CHAR:
2325 return XML_L("partial character");
2326 case XML_ERROR_TAG_MISMATCH:
2327 return XML_L("mismatched tag");
2328 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2329 return XML_L("duplicate attribute");
2330 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2331 return XML_L("junk after document element");
2332 case XML_ERROR_PARAM_ENTITY_REF:
2333 return XML_L("illegal parameter entity reference");
2334 case XML_ERROR_UNDEFINED_ENTITY:
2335 return XML_L("undefined entity");
2336 case XML_ERROR_RECURSIVE_ENTITY_REF:
2337 return XML_L("recursive entity reference");
2338 case XML_ERROR_ASYNC_ENTITY:
2339 return XML_L("asynchronous entity");
2340 case XML_ERROR_BAD_CHAR_REF:
2341 return XML_L("reference to invalid character number");
2342 case XML_ERROR_BINARY_ENTITY_REF:
2343 return XML_L("reference to binary entity");
2344 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2345 return XML_L("reference to external entity in attribute");
2346 case XML_ERROR_MISPLACED_XML_PI:
2347 return XML_L("XML or text declaration not at start of entity");
2348 case XML_ERROR_UNKNOWN_ENCODING:
2349 return XML_L("unknown encoding");
2350 case XML_ERROR_INCORRECT_ENCODING:
2351 return XML_L("encoding specified in XML declaration is incorrect");
2352 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2353 return XML_L("unclosed CDATA section");
2354 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2355 return XML_L("error in processing external entity reference");
2356 case XML_ERROR_NOT_STANDALONE:
2357 return XML_L("document is not standalone");
2358 case XML_ERROR_UNEXPECTED_STATE:
2359 return XML_L("unexpected parser state - please send a bug report");
2360 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2361 return XML_L("entity declared in parameter entity");
2362 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2363 return XML_L("requested feature requires XML_DTD support in Expat");
2364 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2365 return XML_L("cannot change setting once parsing has begun");
2366 /* Added in 1.95.7. */
2367 case XML_ERROR_UNBOUND_PREFIX:
2368 return XML_L("unbound prefix");
2369 /* Added in 1.95.8. */
2370 case XML_ERROR_UNDECLARING_PREFIX:
2371 return XML_L("must not undeclare prefix");
2372 case XML_ERROR_INCOMPLETE_PE:
2373 return XML_L("incomplete markup in parameter entity");
2374 case XML_ERROR_XML_DECL:
2375 return XML_L("XML declaration not well-formed");
2376 case XML_ERROR_TEXT_DECL:
2377 return XML_L("text declaration not well-formed");
2378 case XML_ERROR_PUBLICID:
2379 return XML_L("illegal character(s) in public id");
2380 case XML_ERROR_SUSPENDED:
2381 return XML_L("parser suspended");
2382 case XML_ERROR_NOT_SUSPENDED:
2383 return XML_L("parser not suspended");
2384 case XML_ERROR_ABORTED:
2385 return XML_L("parsing aborted");
2386 case XML_ERROR_FINISHED:
2387 return XML_L("parsing finished");
2388 case XML_ERROR_SUSPEND_PE:
2389 return XML_L("cannot suspend in external parameter entity");
2390 /* Added in 2.0.0. */
2391 case XML_ERROR_RESERVED_PREFIX_XML:
2392 return XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name");
2393 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2394 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2395 case XML_ERROR_RESERVED_NAMESPACE_URI:
2396 return XML_L("prefix must not be bound to one of the reserved namespace names");
2397 /* Added in 2.2.5. */
2398 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2399 return XML_L("invalid argument");
2400 }
2401 return NULL;
2402 }
2403
2404 const XML_LChar * XMLCALL
XML_ExpatVersion(void)2405 XML_ExpatVersion(void) {
2406
2407 /* V1 is used to string-ize the version number. However, it would
2408 string-ize the actual version macro *names* unless we get them
2409 substituted before being passed to V1. CPP is defined to expand
2410 a macro, then rescan for more expansions. Thus, we use V2 to expand
2411 the version macros, then CPP will expand the resulting V1() macro
2412 with the correct numerals. */
2413 /* ### I'm assuming cpp is portable in this respect... */
2414
2415 #define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2416 #define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2417
2418 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2419
2420 #undef V1
2421 #undef V2
2422 }
2423
2424 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2425 XML_ExpatVersionInfo(void)
2426 {
2427 XML_Expat_Version version;
2428
2429 version.major = XML_MAJOR_VERSION;
2430 version.minor = XML_MINOR_VERSION;
2431 version.micro = XML_MICRO_VERSION;
2432
2433 return version;
2434 }
2435
2436 const XML_Feature * XMLCALL
XML_GetFeatureList(void)2437 XML_GetFeatureList(void)
2438 {
2439 static const XML_Feature features[] = {
2440 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2441 sizeof(XML_Char)},
2442 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2443 sizeof(XML_LChar)},
2444 #ifdef XML_UNICODE
2445 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2446 #endif
2447 #ifdef XML_UNICODE_WCHAR_T
2448 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2449 #endif
2450 #ifdef XML_DTD
2451 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2452 #endif
2453 #ifdef XML_CONTEXT_BYTES
2454 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2455 XML_CONTEXT_BYTES},
2456 #endif
2457 #ifdef XML_MIN_SIZE
2458 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2459 #endif
2460 #ifdef XML_NS
2461 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2462 #endif
2463 #ifdef XML_LARGE_SIZE
2464 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2465 #endif
2466 #ifdef XML_ATTR_INFO
2467 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2468 #endif
2469 {XML_FEATURE_END, NULL, 0}
2470 };
2471
2472 return features;
2473 }
2474
2475 /* Initially tag->rawName always points into the parse buffer;
2476 for those TAG instances opened while the current parse buffer was
2477 processed, and not yet closed, we need to store tag->rawName in a more
2478 permanent location, since the parse buffer is about to be discarded.
2479 */
2480 static XML_Bool
storeRawNames(XML_Parser parser)2481 storeRawNames(XML_Parser parser)
2482 {
2483 TAG *tag = parser->m_tagStack;
2484 while (tag) {
2485 int bufSize;
2486 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2487 char *rawNameBuf = tag->buf + nameLen;
2488 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2489 at the first entry that has already been copied; everything
2490 below it in the stack is already been accounted for in a
2491 previous call to this function.
2492 */
2493 if (tag->rawName == rawNameBuf)
2494 break;
2495 /* For re-use purposes we need to ensure that the
2496 size of tag->buf is a multiple of sizeof(XML_Char).
2497 */
2498 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2499 if (bufSize > tag->bufEnd - tag->buf) {
2500 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2501 if (temp == NULL)
2502 return XML_FALSE;
2503 /* if tag->name.str points to tag->buf (only when namespace
2504 processing is off) then we have to update it
2505 */
2506 if (tag->name.str == (XML_Char *)tag->buf)
2507 tag->name.str = (XML_Char *)temp;
2508 /* if tag->name.localPart is set (when namespace processing is on)
2509 then update it as well, since it will always point into tag->buf
2510 */
2511 if (tag->name.localPart)
2512 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2513 (XML_Char *)tag->buf);
2514 tag->buf = temp;
2515 tag->bufEnd = temp + bufSize;
2516 rawNameBuf = temp + nameLen;
2517 }
2518 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2519 tag->rawName = rawNameBuf;
2520 tag = tag->parent;
2521 }
2522 return XML_TRUE;
2523 }
2524
2525 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2526 contentProcessor(XML_Parser parser,
2527 const char *start,
2528 const char *end,
2529 const char **endPtr)
2530 {
2531 enum XML_Error result = doContent(parser, 0, parser->m_encoding, start, end,
2532 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
2533 if (result == XML_ERROR_NONE) {
2534 if (!storeRawNames(parser))
2535 return XML_ERROR_NO_MEMORY;
2536 }
2537 return result;
2538 }
2539
2540 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2541 externalEntityInitProcessor(XML_Parser parser,
2542 const char *start,
2543 const char *end,
2544 const char **endPtr)
2545 {
2546 enum XML_Error result = initializeEncoding(parser);
2547 if (result != XML_ERROR_NONE)
2548 return result;
2549 parser->m_processor = externalEntityInitProcessor2;
2550 return externalEntityInitProcessor2(parser, start, end, endPtr);
2551 }
2552
2553 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2554 externalEntityInitProcessor2(XML_Parser parser,
2555 const char *start,
2556 const char *end,
2557 const char **endPtr)
2558 {
2559 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2560 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2561 switch (tok) {
2562 case XML_TOK_BOM:
2563 /* If we are at the end of the buffer, this would cause the next stage,
2564 i.e. externalEntityInitProcessor3, to pass control directly to
2565 doContent (by detecting XML_TOK_NONE) without processing any xml text
2566 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2567 */
2568 if (next == end && !parser->m_parsingStatus.finalBuffer) {
2569 *endPtr = next;
2570 return XML_ERROR_NONE;
2571 }
2572 start = next;
2573 break;
2574 case XML_TOK_PARTIAL:
2575 if (!parser->m_parsingStatus.finalBuffer) {
2576 *endPtr = start;
2577 return XML_ERROR_NONE;
2578 }
2579 parser->m_eventPtr = start;
2580 return XML_ERROR_UNCLOSED_TOKEN;
2581 case XML_TOK_PARTIAL_CHAR:
2582 if (!parser->m_parsingStatus.finalBuffer) {
2583 *endPtr = start;
2584 return XML_ERROR_NONE;
2585 }
2586 parser->m_eventPtr = start;
2587 return XML_ERROR_PARTIAL_CHAR;
2588 }
2589 parser->m_processor = externalEntityInitProcessor3;
2590 return externalEntityInitProcessor3(parser, start, end, endPtr);
2591 }
2592
2593 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2594 externalEntityInitProcessor3(XML_Parser parser,
2595 const char *start,
2596 const char *end,
2597 const char **endPtr)
2598 {
2599 int tok;
2600 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2601 parser->m_eventPtr = start;
2602 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2603 parser->m_eventEndPtr = next;
2604
2605 switch (tok) {
2606 case XML_TOK_XML_DECL:
2607 {
2608 enum XML_Error result;
2609 result = processXmlDecl(parser, 1, start, next);
2610 if (result != XML_ERROR_NONE)
2611 return result;
2612 switch (parser->m_parsingStatus.parsing) {
2613 case XML_SUSPENDED:
2614 *endPtr = next;
2615 return XML_ERROR_NONE;
2616 case XML_FINISHED:
2617 return XML_ERROR_ABORTED;
2618 default:
2619 start = next;
2620 }
2621 }
2622 break;
2623 case XML_TOK_PARTIAL:
2624 if (!parser->m_parsingStatus.finalBuffer) {
2625 *endPtr = start;
2626 return XML_ERROR_NONE;
2627 }
2628 return XML_ERROR_UNCLOSED_TOKEN;
2629 case XML_TOK_PARTIAL_CHAR:
2630 if (!parser->m_parsingStatus.finalBuffer) {
2631 *endPtr = start;
2632 return XML_ERROR_NONE;
2633 }
2634 return XML_ERROR_PARTIAL_CHAR;
2635 }
2636 parser->m_processor = externalEntityContentProcessor;
2637 parser->m_tagLevel = 1;
2638 return externalEntityContentProcessor(parser, start, end, endPtr);
2639 }
2640
2641 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2642 externalEntityContentProcessor(XML_Parser parser,
2643 const char *start,
2644 const char *end,
2645 const char **endPtr)
2646 {
2647 enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end,
2648 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
2649 if (result == XML_ERROR_NONE) {
2650 if (!storeRawNames(parser))
2651 return XML_ERROR_NO_MEMORY;
2652 }
2653 return result;
2654 }
2655
2656 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore)2657 doContent(XML_Parser parser,
2658 int startTagLevel,
2659 const ENCODING *enc,
2660 const char *s,
2661 const char *end,
2662 const char **nextPtr,
2663 XML_Bool haveMore)
2664 {
2665 /* save one level of indirection */
2666 DTD * const dtd = parser->m_dtd;
2667
2668 const char **eventPP;
2669 const char **eventEndPP;
2670 if (enc == parser->m_encoding) {
2671 eventPP = &parser->m_eventPtr;
2672 eventEndPP = &parser->m_eventEndPtr;
2673 }
2674 else {
2675 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2676 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2677 }
2678 *eventPP = s;
2679
2680 for (;;) {
2681 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2682 int tok = XmlContentTok(enc, s, end, &next);
2683 *eventEndPP = next;
2684 switch (tok) {
2685 case XML_TOK_TRAILING_CR:
2686 if (haveMore) {
2687 *nextPtr = s;
2688 return XML_ERROR_NONE;
2689 }
2690 *eventEndPP = end;
2691 if (parser->m_characterDataHandler) {
2692 XML_Char c = 0xA;
2693 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2694 }
2695 else if (parser->m_defaultHandler)
2696 reportDefault(parser, enc, s, end);
2697 /* We are at the end of the final buffer, should we check for
2698 XML_SUSPENDED, XML_FINISHED?
2699 */
2700 if (startTagLevel == 0)
2701 return XML_ERROR_NO_ELEMENTS;
2702 if (parser->m_tagLevel != startTagLevel)
2703 return XML_ERROR_ASYNC_ENTITY;
2704 *nextPtr = end;
2705 return XML_ERROR_NONE;
2706 case XML_TOK_NONE:
2707 if (haveMore) {
2708 *nextPtr = s;
2709 return XML_ERROR_NONE;
2710 }
2711 if (startTagLevel > 0) {
2712 if (parser->m_tagLevel != startTagLevel)
2713 return XML_ERROR_ASYNC_ENTITY;
2714 *nextPtr = s;
2715 return XML_ERROR_NONE;
2716 }
2717 return XML_ERROR_NO_ELEMENTS;
2718 case XML_TOK_INVALID:
2719 *eventPP = next;
2720 return XML_ERROR_INVALID_TOKEN;
2721 case XML_TOK_PARTIAL:
2722 if (haveMore) {
2723 *nextPtr = s;
2724 return XML_ERROR_NONE;
2725 }
2726 return XML_ERROR_UNCLOSED_TOKEN;
2727 case XML_TOK_PARTIAL_CHAR:
2728 if (haveMore) {
2729 *nextPtr = s;
2730 return XML_ERROR_NONE;
2731 }
2732 return XML_ERROR_PARTIAL_CHAR;
2733 case XML_TOK_ENTITY_REF:
2734 {
2735 const XML_Char *name;
2736 ENTITY *entity;
2737 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2738 s + enc->minBytesPerChar,
2739 next - enc->minBytesPerChar);
2740 if (ch) {
2741 if (parser->m_characterDataHandler)
2742 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2743 else if (parser->m_defaultHandler)
2744 reportDefault(parser, enc, s, next);
2745 break;
2746 }
2747 name = poolStoreString(&dtd->pool, enc,
2748 s + enc->minBytesPerChar,
2749 next - enc->minBytesPerChar);
2750 if (!name)
2751 return XML_ERROR_NO_MEMORY;
2752 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2753 poolDiscard(&dtd->pool);
2754 /* First, determine if a check for an existing declaration is needed;
2755 if yes, check that the entity exists, and that it is internal,
2756 otherwise call the skipped entity or default handler.
2757 */
2758 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2759 if (!entity)
2760 return XML_ERROR_UNDEFINED_ENTITY;
2761 else if (!entity->is_internal)
2762 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2763 }
2764 else if (!entity) {
2765 if (parser->m_skippedEntityHandler)
2766 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2767 else if (parser->m_defaultHandler)
2768 reportDefault(parser, enc, s, next);
2769 break;
2770 }
2771 if (entity->open)
2772 return XML_ERROR_RECURSIVE_ENTITY_REF;
2773 if (entity->notation)
2774 return XML_ERROR_BINARY_ENTITY_REF;
2775 if (entity->textPtr) {
2776 enum XML_Error result;
2777 if (!parser->m_defaultExpandInternalEntities) {
2778 if (parser->m_skippedEntityHandler)
2779 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0);
2780 else if (parser->m_defaultHandler)
2781 reportDefault(parser, enc, s, next);
2782 break;
2783 }
2784 result = processInternalEntity(parser, entity, XML_FALSE);
2785 if (result != XML_ERROR_NONE)
2786 return result;
2787 }
2788 else if (parser->m_externalEntityRefHandler) {
2789 const XML_Char *context;
2790 entity->open = XML_TRUE;
2791 context = getContext(parser);
2792 entity->open = XML_FALSE;
2793 if (!context)
2794 return XML_ERROR_NO_MEMORY;
2795 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
2796 context,
2797 entity->base,
2798 entity->systemId,
2799 entity->publicId))
2800 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2801 poolDiscard(&parser->m_tempPool);
2802 }
2803 else if (parser->m_defaultHandler)
2804 reportDefault(parser, enc, s, next);
2805 break;
2806 }
2807 case XML_TOK_START_TAG_NO_ATTS:
2808 /* fall through */
2809 case XML_TOK_START_TAG_WITH_ATTS:
2810 {
2811 TAG *tag;
2812 enum XML_Error result;
2813 XML_Char *toPtr;
2814 if (parser->m_freeTagList) {
2815 tag = parser->m_freeTagList;
2816 parser->m_freeTagList = parser->m_freeTagList->parent;
2817 }
2818 else {
2819 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2820 if (!tag)
2821 return XML_ERROR_NO_MEMORY;
2822 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2823 if (!tag->buf) {
2824 FREE(parser, tag);
2825 return XML_ERROR_NO_MEMORY;
2826 }
2827 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2828 }
2829 tag->bindings = NULL;
2830 tag->parent = parser->m_tagStack;
2831 parser->m_tagStack = tag;
2832 tag->name.localPart = NULL;
2833 tag->name.prefix = NULL;
2834 tag->rawName = s + enc->minBytesPerChar;
2835 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2836 ++parser->m_tagLevel;
2837 {
2838 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2839 const char *fromPtr = tag->rawName;
2840 toPtr = (XML_Char *)tag->buf;
2841 for (;;) {
2842 int bufSize;
2843 int convLen;
2844 const enum XML_Convert_Result convert_res = XmlConvert(enc,
2845 &fromPtr, rawNameEnd,
2846 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
2847 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2848 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2849 tag->name.strLen = convLen;
2850 break;
2851 }
2852 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2853 {
2854 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2855 if (temp == NULL)
2856 return XML_ERROR_NO_MEMORY;
2857 tag->buf = temp;
2858 tag->bufEnd = temp + bufSize;
2859 toPtr = (XML_Char *)temp + convLen;
2860 }
2861 }
2862 }
2863 tag->name.str = (XML_Char *)tag->buf;
2864 *toPtr = XML_T('\0');
2865 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2866 if (result)
2867 return result;
2868 if (parser->m_startElementHandler)
2869 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2870 (const XML_Char **)parser->m_atts);
2871 else if (parser->m_defaultHandler)
2872 reportDefault(parser, enc, s, next);
2873 poolClear(&parser->m_tempPool);
2874 break;
2875 }
2876 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2877 /* fall through */
2878 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2879 {
2880 const char *rawName = s + enc->minBytesPerChar;
2881 enum XML_Error result;
2882 BINDING *bindings = NULL;
2883 XML_Bool noElmHandlers = XML_TRUE;
2884 TAG_NAME name;
2885 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2886 rawName + XmlNameLength(enc, rawName));
2887 if (!name.str)
2888 return XML_ERROR_NO_MEMORY;
2889 poolFinish(&parser->m_tempPool);
2890 result = storeAtts(parser, enc, s, &name, &bindings);
2891 if (result != XML_ERROR_NONE) {
2892 freeBindings(parser, bindings);
2893 return result;
2894 }
2895 poolFinish(&parser->m_tempPool);
2896 if (parser->m_startElementHandler) {
2897 parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts);
2898 noElmHandlers = XML_FALSE;
2899 }
2900 if (parser->m_endElementHandler) {
2901 if (parser->m_startElementHandler)
2902 *eventPP = *eventEndPP;
2903 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2904 noElmHandlers = XML_FALSE;
2905 }
2906 if (noElmHandlers && parser->m_defaultHandler)
2907 reportDefault(parser, enc, s, next);
2908 poolClear(&parser->m_tempPool);
2909 freeBindings(parser, bindings);
2910 }
2911 if ((parser->m_tagLevel == 0) &&
2912 !((parser->m_parsingStatus.parsing == XML_FINISHED) || (parser->m_parsingStatus.parsing == XML_SUSPENDED))) {
2913 return epilogProcessor(parser, next, end, nextPtr);
2914 }
2915 break;
2916 case XML_TOK_END_TAG:
2917 if (parser->m_tagLevel == startTagLevel)
2918 return XML_ERROR_ASYNC_ENTITY;
2919 else {
2920 int len;
2921 const char *rawName;
2922 TAG *tag = parser->m_tagStack;
2923 parser->m_tagStack = tag->parent;
2924 tag->parent = parser->m_freeTagList;
2925 parser->m_freeTagList = tag;
2926 rawName = s + enc->minBytesPerChar*2;
2927 len = XmlNameLength(enc, rawName);
2928 if (len != tag->rawNameLength
2929 || memcmp(tag->rawName, rawName, len) != 0) {
2930 *eventPP = rawName;
2931 return XML_ERROR_TAG_MISMATCH;
2932 }
2933 --parser->m_tagLevel;
2934 if (parser->m_endElementHandler) {
2935 const XML_Char *localPart;
2936 const XML_Char *prefix;
2937 XML_Char *uri;
2938 localPart = tag->name.localPart;
2939 if (parser->m_ns && localPart) {
2940 /* localPart and prefix may have been overwritten in
2941 tag->name.str, since this points to the binding->uri
2942 buffer which gets re-used; so we have to add them again
2943 */
2944 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2945 /* don't need to check for space - already done in storeAtts() */
2946 while (*localPart) *uri++ = *localPart++;
2947 prefix = (XML_Char *)tag->name.prefix;
2948 if (parser->m_ns_triplets && prefix) {
2949 *uri++ = parser->m_namespaceSeparator;
2950 while (*prefix) *uri++ = *prefix++;
2951 }
2952 *uri = XML_T('\0');
2953 }
2954 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
2955 }
2956 else if (parser->m_defaultHandler)
2957 reportDefault(parser, enc, s, next);
2958 while (tag->bindings) {
2959 BINDING *b = tag->bindings;
2960 if (parser->m_endNamespaceDeclHandler)
2961 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
2962 tag->bindings = tag->bindings->nextTagBinding;
2963 b->nextTagBinding = parser->m_freeBindingList;
2964 parser->m_freeBindingList = b;
2965 b->prefix->binding = b->prevPrefixBinding;
2966 }
2967 if (parser->m_tagLevel == 0)
2968 return epilogProcessor(parser, next, end, nextPtr);
2969 }
2970 break;
2971 case XML_TOK_CHAR_REF:
2972 {
2973 int n = XmlCharRefNumber(enc, s);
2974 if (n < 0)
2975 return XML_ERROR_BAD_CHAR_REF;
2976 if (parser->m_characterDataHandler) {
2977 XML_Char buf[XML_ENCODE_MAX];
2978 parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
2979 }
2980 else if (parser->m_defaultHandler)
2981 reportDefault(parser, enc, s, next);
2982 }
2983 break;
2984 case XML_TOK_XML_DECL:
2985 return XML_ERROR_MISPLACED_XML_PI;
2986 case XML_TOK_DATA_NEWLINE:
2987 if (parser->m_characterDataHandler) {
2988 XML_Char c = 0xA;
2989 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2990 }
2991 else if (parser->m_defaultHandler)
2992 reportDefault(parser, enc, s, next);
2993 break;
2994 case XML_TOK_CDATA_SECT_OPEN:
2995 {
2996 enum XML_Error result;
2997 if (parser->m_startCdataSectionHandler)
2998 parser->m_startCdataSectionHandler(parser->m_handlerArg);
2999 #if 0
3000 /* Suppose you doing a transformation on a document that involves
3001 changing only the character data. You set up a defaultHandler
3002 and a characterDataHandler. The defaultHandler simply copies
3003 characters through. The characterDataHandler does the
3004 transformation and writes the characters out escaping them as
3005 necessary. This case will fail to work if we leave out the
3006 following two lines (because & and < inside CDATA sections will
3007 be incorrectly escaped).
3008
3009 However, now we have a start/endCdataSectionHandler, so it seems
3010 easier to let the user deal with this.
3011 */
3012 else if (parser->m_characterDataHandler)
3013 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
3014 #endif
3015 else if (parser->m_defaultHandler)
3016 reportDefault(parser, enc, s, next);
3017 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3018 if (result != XML_ERROR_NONE)
3019 return result;
3020 else if (!next) {
3021 parser->m_processor = cdataSectionProcessor;
3022 return result;
3023 }
3024 }
3025 break;
3026 case XML_TOK_TRAILING_RSQB:
3027 if (haveMore) {
3028 *nextPtr = s;
3029 return XML_ERROR_NONE;
3030 }
3031 if (parser->m_characterDataHandler) {
3032 if (MUST_CONVERT(enc, s)) {
3033 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3034 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3035 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3036 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3037 }
3038 else
3039 parser->m_characterDataHandler(parser->m_handlerArg,
3040 (XML_Char *)s,
3041 (int)((XML_Char *)end - (XML_Char *)s));
3042 }
3043 else if (parser->m_defaultHandler)
3044 reportDefault(parser, enc, s, end);
3045 /* We are at the end of the final buffer, should we check for
3046 XML_SUSPENDED, XML_FINISHED?
3047 */
3048 if (startTagLevel == 0) {
3049 *eventPP = end;
3050 return XML_ERROR_NO_ELEMENTS;
3051 }
3052 if (parser->m_tagLevel != startTagLevel) {
3053 *eventPP = end;
3054 return XML_ERROR_ASYNC_ENTITY;
3055 }
3056 *nextPtr = end;
3057 return XML_ERROR_NONE;
3058 case XML_TOK_DATA_CHARS:
3059 {
3060 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3061 if (charDataHandler) {
3062 if (MUST_CONVERT(enc, s)) {
3063 for (;;) {
3064 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3065 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3066 *eventEndPP = s;
3067 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3068 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3069 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3070 break;
3071 *eventPP = s;
3072 }
3073 }
3074 else
3075 charDataHandler(parser->m_handlerArg,
3076 (XML_Char *)s,
3077 (int)((XML_Char *)next - (XML_Char *)s));
3078 }
3079 else if (parser->m_defaultHandler)
3080 reportDefault(parser, enc, s, next);
3081 }
3082 break;
3083 case XML_TOK_PI:
3084 if (!reportProcessingInstruction(parser, enc, s, next))
3085 return XML_ERROR_NO_MEMORY;
3086 break;
3087 case XML_TOK_COMMENT:
3088 if (!reportComment(parser, enc, s, next))
3089 return XML_ERROR_NO_MEMORY;
3090 break;
3091 default:
3092 /* All of the tokens produced by XmlContentTok() have their own
3093 * explicit cases, so this default is not strictly necessary.
3094 * However it is a useful safety net, so we retain the code and
3095 * simply exclude it from the coverage tests.
3096 *
3097 * LCOV_EXCL_START
3098 */
3099 if (parser->m_defaultHandler)
3100 reportDefault(parser, enc, s, next);
3101 break;
3102 /* LCOV_EXCL_STOP */
3103 }
3104 *eventPP = s = next;
3105 switch (parser->m_parsingStatus.parsing) {
3106 case XML_SUSPENDED:
3107 *nextPtr = next;
3108 return XML_ERROR_NONE;
3109 case XML_FINISHED:
3110 return XML_ERROR_ABORTED;
3111 default: ;
3112 }
3113 }
3114 /* not reached */
3115 }
3116
3117 /* This function does not call free() on the allocated memory, merely
3118 * moving it to the parser's m_freeBindingList where it can be freed or
3119 * reused as appropriate.
3120 */
3121 static void
freeBindings(XML_Parser parser,BINDING * bindings)3122 freeBindings(XML_Parser parser, BINDING *bindings)
3123 {
3124 while (bindings) {
3125 BINDING *b = bindings;
3126
3127 /* m_startNamespaceDeclHandler will have been called for this
3128 * binding in addBindings(), so call the end handler now.
3129 */
3130 if (parser->m_endNamespaceDeclHandler)
3131 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3132
3133 bindings = bindings->nextTagBinding;
3134 b->nextTagBinding = parser->m_freeBindingList;
3135 parser->m_freeBindingList = b;
3136 b->prefix->binding = b->prevPrefixBinding;
3137 }
3138 }
3139
3140 /* Precondition: all arguments must be non-NULL;
3141 Purpose:
3142 - normalize attributes
3143 - check attributes for well-formedness
3144 - generate namespace aware attribute names (URI, prefix)
3145 - build list of attributes for startElementHandler
3146 - default attributes
3147 - process namespace declarations (check and report them)
3148 - generate namespace aware element name (URI, prefix)
3149 */
3150 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr)3151 storeAtts(XML_Parser parser, const ENCODING *enc,
3152 const char *attStr, TAG_NAME *tagNamePtr,
3153 BINDING **bindingsPtr)
3154 {
3155 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
3156 ELEMENT_TYPE *elementType;
3157 int nDefaultAtts;
3158 const XML_Char **appAtts; /* the attribute list for the application */
3159 int attIndex = 0;
3160 int prefixLen;
3161 int i;
3162 int n;
3163 XML_Char *uri;
3164 int nPrefixes = 0;
3165 BINDING *binding;
3166 const XML_Char *localPart;
3167
3168 /* lookup the element type name */
3169 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
3170 if (!elementType) {
3171 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3172 if (!name)
3173 return XML_ERROR_NO_MEMORY;
3174 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3175 sizeof(ELEMENT_TYPE));
3176 if (!elementType)
3177 return XML_ERROR_NO_MEMORY;
3178 if (parser->m_ns && !setElementTypePrefix(parser, elementType))
3179 return XML_ERROR_NO_MEMORY;
3180 }
3181 nDefaultAtts = elementType->nDefaultAtts;
3182
3183 /* get the attributes from the tokenizer */
3184 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3185 if (n + nDefaultAtts > parser->m_attsSize) {
3186 int oldAttsSize = parser->m_attsSize;
3187 ATTRIBUTE *temp;
3188 #ifdef XML_ATTR_INFO
3189 XML_AttrInfo *temp2;
3190 #endif
3191 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3192 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE));
3193 if (temp == NULL) {
3194 parser->m_attsSize = oldAttsSize;
3195 return XML_ERROR_NO_MEMORY;
3196 }
3197 parser->m_atts = temp;
3198 #ifdef XML_ATTR_INFO
3199 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo));
3200 if (temp2 == NULL) {
3201 parser->m_attsSize = oldAttsSize;
3202 return XML_ERROR_NO_MEMORY;
3203 }
3204 parser->m_attInfo = temp2;
3205 #endif
3206 if (n > oldAttsSize)
3207 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3208 }
3209
3210 appAtts = (const XML_Char **)parser->m_atts;
3211 for (i = 0; i < n; i++) {
3212 ATTRIBUTE *currAtt = &parser->m_atts[i];
3213 #ifdef XML_ATTR_INFO
3214 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3215 #endif
3216 /* add the name and value to the attribute list */
3217 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3218 currAtt->name
3219 + XmlNameLength(enc, currAtt->name));
3220 if (!attId)
3221 return XML_ERROR_NO_MEMORY;
3222 #ifdef XML_ATTR_INFO
3223 currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3224 currAttInfo->nameEnd = currAttInfo->nameStart +
3225 XmlNameLength(enc, currAtt->name);
3226 currAttInfo->valueStart = parser->m_parseEndByteIndex -
3227 (parser->m_parseEndPtr - currAtt->valuePtr);
3228 currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd);
3229 #endif
3230 /* Detect duplicate attributes by their QNames. This does not work when
3231 namespace processing is turned on and different prefixes for the same
3232 namespace are used. For this case we have a check further down.
3233 */
3234 if ((attId->name)[-1]) {
3235 if (enc == parser->m_encoding)
3236 parser->m_eventPtr = parser->m_atts[i].name;
3237 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3238 }
3239 (attId->name)[-1] = 1;
3240 appAtts[attIndex++] = attId->name;
3241 if (!parser->m_atts[i].normalized) {
3242 enum XML_Error result;
3243 XML_Bool isCdata = XML_TRUE;
3244
3245 /* figure out whether declared as other than CDATA */
3246 if (attId->maybeTokenized) {
3247 int j;
3248 for (j = 0; j < nDefaultAtts; j++) {
3249 if (attId == elementType->defaultAtts[j].id) {
3250 isCdata = elementType->defaultAtts[j].isCdata;
3251 break;
3252 }
3253 }
3254 }
3255
3256 /* normalize the attribute value */
3257 result = storeAttributeValue(parser, enc, isCdata,
3258 parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd,
3259 &parser->m_tempPool);
3260 if (result)
3261 return result;
3262 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3263 poolFinish(&parser->m_tempPool);
3264 }
3265 else {
3266 /* the value did not need normalizing */
3267 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr,
3268 parser->m_atts[i].valueEnd);
3269 if (appAtts[attIndex] == 0)
3270 return XML_ERROR_NO_MEMORY;
3271 poolFinish(&parser->m_tempPool);
3272 }
3273 /* handle prefixed attribute names */
3274 if (attId->prefix) {
3275 if (attId->xmlns) {
3276 /* deal with namespace declarations here */
3277 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3278 appAtts[attIndex], bindingsPtr);
3279 if (result)
3280 return result;
3281 --attIndex;
3282 }
3283 else {
3284 /* deal with other prefixed names later */
3285 attIndex++;
3286 nPrefixes++;
3287 (attId->name)[-1] = 2;
3288 }
3289 }
3290 else
3291 attIndex++;
3292 }
3293
3294 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3295 parser->m_nSpecifiedAtts = attIndex;
3296 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3297 for (i = 0; i < attIndex; i += 2)
3298 if (appAtts[i] == elementType->idAtt->name) {
3299 parser->m_idAttIndex = i;
3300 break;
3301 }
3302 }
3303 else
3304 parser->m_idAttIndex = -1;
3305
3306 /* do attribute defaulting */
3307 for (i = 0; i < nDefaultAtts; i++) {
3308 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3309 if (!(da->id->name)[-1] && da->value) {
3310 if (da->id->prefix) {
3311 if (da->id->xmlns) {
3312 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3313 da->value, bindingsPtr);
3314 if (result)
3315 return result;
3316 }
3317 else {
3318 (da->id->name)[-1] = 2;
3319 nPrefixes++;
3320 appAtts[attIndex++] = da->id->name;
3321 appAtts[attIndex++] = da->value;
3322 }
3323 }
3324 else {
3325 (da->id->name)[-1] = 1;
3326 appAtts[attIndex++] = da->id->name;
3327 appAtts[attIndex++] = da->value;
3328 }
3329 }
3330 }
3331 appAtts[attIndex] = 0;
3332
3333 /* expand prefixed attribute names, check for duplicates,
3334 and clear flags that say whether attributes were specified */
3335 i = 0;
3336 if (nPrefixes) {
3337 int j; /* hash table index */
3338 unsigned long version = parser->m_nsAttsVersion;
3339 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3340 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3341 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3342 if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3343 NS_ATT *temp;
3344 /* hash table size must also be a power of 2 and >= 8 */
3345 while (nPrefixes >> parser->m_nsAttsPower++);
3346 if (parser->m_nsAttsPower < 3)
3347 parser->m_nsAttsPower = 3;
3348 nsAttsSize = (int)1 << parser->m_nsAttsPower;
3349 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
3350 if (!temp) {
3351 /* Restore actual size of memory in m_nsAtts */
3352 parser->m_nsAttsPower = oldNsAttsPower;
3353 return XML_ERROR_NO_MEMORY;
3354 }
3355 parser->m_nsAtts = temp;
3356 version = 0; /* force re-initialization of m_nsAtts hash table */
3357 }
3358 /* using a version flag saves us from initializing m_nsAtts every time */
3359 if (!version) { /* initialize version flags when version wraps around */
3360 version = INIT_ATTS_VERSION;
3361 for (j = nsAttsSize; j != 0; )
3362 parser->m_nsAtts[--j].version = version;
3363 }
3364 parser->m_nsAttsVersion = --version;
3365
3366 /* expand prefixed names and check for duplicates */
3367 for (; i < attIndex; i += 2) {
3368 const XML_Char *s = appAtts[i];
3369 if (s[-1] == 2) { /* prefixed */
3370 ATTRIBUTE_ID *id;
3371 const BINDING *b;
3372 unsigned long uriHash;
3373 struct siphash sip_state;
3374 struct sipkey sip_key;
3375
3376 copy_salt_to_sipkey(parser, &sip_key);
3377 sip24_init(&sip_state, &sip_key);
3378
3379 ((XML_Char *)s)[-1] = 0; /* clear flag */
3380 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3381 if (!id || !id->prefix) {
3382 /* This code is walking through the appAtts array, dealing
3383 * with (in this case) a prefixed attribute name. To be in
3384 * the array, the attribute must have already been bound, so
3385 * has to have passed through the hash table lookup once
3386 * already. That implies that an entry for it already
3387 * exists, so the lookup above will return a pointer to
3388 * already allocated memory. There is no opportunaity for
3389 * the allocator to fail, so the condition above cannot be
3390 * fulfilled.
3391 *
3392 * Since it is difficult to be certain that the above
3393 * analysis is complete, we retain the test and merely
3394 * remove the code from coverage tests.
3395 */
3396 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3397 }
3398 b = id->prefix->binding;
3399 if (!b)
3400 return XML_ERROR_UNBOUND_PREFIX;
3401
3402 for (j = 0; j < b->uriLen; j++) {
3403 const XML_Char c = b->uri[j];
3404 if (!poolAppendChar(&parser->m_tempPool, c))
3405 return XML_ERROR_NO_MEMORY;
3406 }
3407
3408 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3409
3410 while (*s++ != XML_T(ASCII_COLON))
3411 ;
3412
3413 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3414
3415 do { /* copies null terminator */
3416 if (!poolAppendChar(&parser->m_tempPool, *s))
3417 return XML_ERROR_NO_MEMORY;
3418 } while (*s++);
3419
3420 uriHash = (unsigned long)sip24_final(&sip_state);
3421
3422 { /* Check hash table for duplicate of expanded name (uriName).
3423 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3424 */
3425 unsigned char step = 0;
3426 unsigned long mask = nsAttsSize - 1;
3427 j = uriHash & mask; /* index into hash table */
3428 while (parser->m_nsAtts[j].version == version) {
3429 /* for speed we compare stored hash values first */
3430 if (uriHash == parser->m_nsAtts[j].hash) {
3431 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3432 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3433 /* s1 is null terminated, but not s2 */
3434 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3435 if (*s1 == 0)
3436 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3437 }
3438 if (!step)
3439 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3440 j < step ? (j += nsAttsSize - step) : (j -= step);
3441 }
3442 }
3443
3444 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3445 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3446 s = b->prefix->name;
3447 do {
3448 if (!poolAppendChar(&parser->m_tempPool, *s))
3449 return XML_ERROR_NO_MEMORY;
3450 } while (*s++);
3451 }
3452
3453 /* store expanded name in attribute list */
3454 s = poolStart(&parser->m_tempPool);
3455 poolFinish(&parser->m_tempPool);
3456 appAtts[i] = s;
3457
3458 /* fill empty slot with new version, uriName and hash value */
3459 parser->m_nsAtts[j].version = version;
3460 parser->m_nsAtts[j].hash = uriHash;
3461 parser->m_nsAtts[j].uriName = s;
3462
3463 if (!--nPrefixes) {
3464 i += 2;
3465 break;
3466 }
3467 }
3468 else /* not prefixed */
3469 ((XML_Char *)s)[-1] = 0; /* clear flag */
3470 }
3471 }
3472 /* clear flags for the remaining attributes */
3473 for (; i < attIndex; i += 2)
3474 ((XML_Char *)(appAtts[i]))[-1] = 0;
3475 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3476 binding->attId->name[-1] = 0;
3477
3478 if (!parser->m_ns)
3479 return XML_ERROR_NONE;
3480
3481 /* expand the element type name */
3482 if (elementType->prefix) {
3483 binding = elementType->prefix->binding;
3484 if (!binding)
3485 return XML_ERROR_UNBOUND_PREFIX;
3486 localPart = tagNamePtr->str;
3487 while (*localPart++ != XML_T(ASCII_COLON))
3488 ;
3489 }
3490 else if (dtd->defaultPrefix.binding) {
3491 binding = dtd->defaultPrefix.binding;
3492 localPart = tagNamePtr->str;
3493 }
3494 else
3495 return XML_ERROR_NONE;
3496 prefixLen = 0;
3497 if (parser->m_ns_triplets && binding->prefix->name) {
3498 for (; binding->prefix->name[prefixLen++];)
3499 ; /* prefixLen includes null terminator */
3500 }
3501 tagNamePtr->localPart = localPart;
3502 tagNamePtr->uriLen = binding->uriLen;
3503 tagNamePtr->prefix = binding->prefix->name;
3504 tagNamePtr->prefixLen = prefixLen;
3505 for (i = 0; localPart[i++];)
3506 ; /* i includes null terminator */
3507 n = i + binding->uriLen + prefixLen;
3508 if (n > binding->uriAlloc) {
3509 TAG *p;
3510 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3511 if (!uri)
3512 return XML_ERROR_NO_MEMORY;
3513 binding->uriAlloc = n + EXPAND_SPARE;
3514 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3515 for (p = parser->m_tagStack; p; p = p->parent)
3516 if (p->name.str == binding->uri)
3517 p->name.str = uri;
3518 FREE(parser, binding->uri);
3519 binding->uri = uri;
3520 }
3521 /* if m_namespaceSeparator != '\0' then uri includes it already */
3522 uri = binding->uri + binding->uriLen;
3523 memcpy(uri, localPart, i * sizeof(XML_Char));
3524 /* we always have a namespace separator between localPart and prefix */
3525 if (prefixLen) {
3526 uri += i - 1;
3527 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3528 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3529 }
3530 tagNamePtr->str = binding->uri;
3531 return XML_ERROR_NONE;
3532 }
3533
3534 /* addBinding() overwrites the value of prefix->binding without checking.
3535 Therefore one must keep track of the old value outside of addBinding().
3536 */
3537 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3538 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3539 const XML_Char *uri, BINDING **bindingsPtr)
3540 {
3541 static const XML_Char xmlNamespace[] = {
3542 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3543 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3544 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3545 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3546 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3547 ASCII_e, '\0'
3548 };
3549 static const int xmlLen =
3550 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3551 static const XML_Char xmlnsNamespace[] = {
3552 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3553 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3554 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3555 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3556 ASCII_SLASH, '\0'
3557 };
3558 static const int xmlnsLen =
3559 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3560
3561 XML_Bool mustBeXML = XML_FALSE;
3562 XML_Bool isXML = XML_TRUE;
3563 XML_Bool isXMLNS = XML_TRUE;
3564
3565 BINDING *b;
3566 int len;
3567
3568 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3569 if (*uri == XML_T('\0') && prefix->name)
3570 return XML_ERROR_UNDECLARING_PREFIX;
3571
3572 if (prefix->name
3573 && prefix->name[0] == XML_T(ASCII_x)
3574 && prefix->name[1] == XML_T(ASCII_m)
3575 && prefix->name[2] == XML_T(ASCII_l)) {
3576
3577 /* Not allowed to bind xmlns */
3578 if (prefix->name[3] == XML_T(ASCII_n)
3579 && prefix->name[4] == XML_T(ASCII_s)
3580 && prefix->name[5] == XML_T('\0'))
3581 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3582
3583 if (prefix->name[3] == XML_T('\0'))
3584 mustBeXML = XML_TRUE;
3585 }
3586
3587 for (len = 0; uri[len]; len++) {
3588 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3589 isXML = XML_FALSE;
3590
3591 if (!mustBeXML && isXMLNS
3592 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3593 isXMLNS = XML_FALSE;
3594 }
3595 isXML = isXML && len == xmlLen;
3596 isXMLNS = isXMLNS && len == xmlnsLen;
3597
3598 if (mustBeXML != isXML)
3599 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3600 : XML_ERROR_RESERVED_NAMESPACE_URI;
3601
3602 if (isXMLNS)
3603 return XML_ERROR_RESERVED_NAMESPACE_URI;
3604
3605 if (parser->m_namespaceSeparator)
3606 len++;
3607 if (parser->m_freeBindingList) {
3608 b = parser->m_freeBindingList;
3609 if (len > b->uriAlloc) {
3610 XML_Char *temp = (XML_Char *)REALLOC(parser, b->uri,
3611 sizeof(XML_Char) * (len + EXPAND_SPARE));
3612 if (temp == NULL)
3613 return XML_ERROR_NO_MEMORY;
3614 b->uri = temp;
3615 b->uriAlloc = len + EXPAND_SPARE;
3616 }
3617 parser->m_freeBindingList = b->nextTagBinding;
3618 }
3619 else {
3620 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3621 if (!b)
3622 return XML_ERROR_NO_MEMORY;
3623 b->uri = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3624 if (!b->uri) {
3625 FREE(parser, b);
3626 return XML_ERROR_NO_MEMORY;
3627 }
3628 b->uriAlloc = len + EXPAND_SPARE;
3629 }
3630 b->uriLen = len;
3631 memcpy(b->uri, uri, len * sizeof(XML_Char));
3632 if (parser->m_namespaceSeparator)
3633 b->uri[len - 1] = parser->m_namespaceSeparator;
3634 b->prefix = prefix;
3635 b->attId = attId;
3636 b->prevPrefixBinding = prefix->binding;
3637 /* NULL binding when default namespace undeclared */
3638 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3639 prefix->binding = NULL;
3640 else
3641 prefix->binding = b;
3642 b->nextTagBinding = *bindingsPtr;
3643 *bindingsPtr = b;
3644 /* if attId == NULL then we are not starting a namespace scope */
3645 if (attId && parser->m_startNamespaceDeclHandler)
3646 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3647 prefix->binding ? uri : 0);
3648 return XML_ERROR_NONE;
3649 }
3650
3651 /* The idea here is to avoid using stack for each CDATA section when
3652 the whole file is parsed with one call.
3653 */
3654 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3655 cdataSectionProcessor(XML_Parser parser,
3656 const char *start,
3657 const char *end,
3658 const char **endPtr)
3659 {
3660 enum XML_Error result = doCdataSection(parser, parser->m_encoding, &start, end,
3661 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
3662 if (result != XML_ERROR_NONE)
3663 return result;
3664 if (start) {
3665 if (parser->m_parentParser) { /* we are parsing an external entity */
3666 parser->m_processor = externalEntityContentProcessor;
3667 return externalEntityContentProcessor(parser, start, end, endPtr);
3668 }
3669 else {
3670 parser->m_processor = contentProcessor;
3671 return contentProcessor(parser, start, end, endPtr);
3672 }
3673 }
3674 return result;
3675 }
3676
3677 /* startPtr gets set to non-null if the section is closed, and to null if
3678 the section is not yet closed.
3679 */
3680 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)3681 doCdataSection(XML_Parser parser,
3682 const ENCODING *enc,
3683 const char **startPtr,
3684 const char *end,
3685 const char **nextPtr,
3686 XML_Bool haveMore)
3687 {
3688 const char *s = *startPtr;
3689 const char **eventPP;
3690 const char **eventEndPP;
3691 if (enc == parser->m_encoding) {
3692 eventPP = &parser->m_eventPtr;
3693 *eventPP = s;
3694 eventEndPP = &parser->m_eventEndPtr;
3695 }
3696 else {
3697 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3698 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3699 }
3700 *eventPP = s;
3701 *startPtr = NULL;
3702
3703 for (;;) {
3704 const char *next;
3705 int tok = XmlCdataSectionTok(enc, s, end, &next);
3706 *eventEndPP = next;
3707 switch (tok) {
3708 case XML_TOK_CDATA_SECT_CLOSE:
3709 if (parser->m_endCdataSectionHandler)
3710 parser->m_endCdataSectionHandler(parser->m_handlerArg);
3711 #if 0
3712 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3713 else if (parser->m_characterDataHandler)
3714 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
3715 #endif
3716 else if (parser->m_defaultHandler)
3717 reportDefault(parser, enc, s, next);
3718 *startPtr = next;
3719 *nextPtr = next;
3720 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3721 return XML_ERROR_ABORTED;
3722 else
3723 return XML_ERROR_NONE;
3724 case XML_TOK_DATA_NEWLINE:
3725 if (parser->m_characterDataHandler) {
3726 XML_Char c = 0xA;
3727 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3728 }
3729 else if (parser->m_defaultHandler)
3730 reportDefault(parser, enc, s, next);
3731 break;
3732 case XML_TOK_DATA_CHARS:
3733 {
3734 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3735 if (charDataHandler) {
3736 if (MUST_CONVERT(enc, s)) {
3737 for (;;) {
3738 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3739 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3740 *eventEndPP = next;
3741 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3742 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3743 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3744 break;
3745 *eventPP = s;
3746 }
3747 }
3748 else
3749 charDataHandler(parser->m_handlerArg,
3750 (XML_Char *)s,
3751 (int)((XML_Char *)next - (XML_Char *)s));
3752 }
3753 else if (parser->m_defaultHandler)
3754 reportDefault(parser, enc, s, next);
3755 }
3756 break;
3757 case XML_TOK_INVALID:
3758 *eventPP = next;
3759 return XML_ERROR_INVALID_TOKEN;
3760 case XML_TOK_PARTIAL_CHAR:
3761 if (haveMore) {
3762 *nextPtr = s;
3763 return XML_ERROR_NONE;
3764 }
3765 return XML_ERROR_PARTIAL_CHAR;
3766 case XML_TOK_PARTIAL:
3767 case XML_TOK_NONE:
3768 if (haveMore) {
3769 *nextPtr = s;
3770 return XML_ERROR_NONE;
3771 }
3772 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3773 default:
3774 /* Every token returned by XmlCdataSectionTok() has its own
3775 * explicit case, so this default case will never be executed.
3776 * We retain it as a safety net and exclude it from the coverage
3777 * statistics.
3778 *
3779 * LCOV_EXCL_START
3780 */
3781 *eventPP = next;
3782 return XML_ERROR_UNEXPECTED_STATE;
3783 /* LCOV_EXCL_STOP */
3784 }
3785
3786 *eventPP = s = next;
3787 switch (parser->m_parsingStatus.parsing) {
3788 case XML_SUSPENDED:
3789 *nextPtr = next;
3790 return XML_ERROR_NONE;
3791 case XML_FINISHED:
3792 return XML_ERROR_ABORTED;
3793 default: ;
3794 }
3795 }
3796 /* not reached */
3797 }
3798
3799 #ifdef XML_DTD
3800
3801 /* The idea here is to avoid using stack for each IGNORE section when
3802 the whole file is parsed with one call.
3803 */
3804 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3805 ignoreSectionProcessor(XML_Parser parser,
3806 const char *start,
3807 const char *end,
3808 const char **endPtr)
3809 {
3810 enum XML_Error result = doIgnoreSection(parser, parser->m_encoding, &start, end,
3811 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
3812 if (result != XML_ERROR_NONE)
3813 return result;
3814 if (start) {
3815 parser->m_processor = prologProcessor;
3816 return prologProcessor(parser, start, end, endPtr);
3817 }
3818 return result;
3819 }
3820
3821 /* startPtr gets set to non-null is the section is closed, and to null
3822 if the section is not yet closed.
3823 */
3824 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)3825 doIgnoreSection(XML_Parser parser,
3826 const ENCODING *enc,
3827 const char **startPtr,
3828 const char *end,
3829 const char **nextPtr,
3830 XML_Bool haveMore)
3831 {
3832 const char *next;
3833 int tok;
3834 const char *s = *startPtr;
3835 const char **eventPP;
3836 const char **eventEndPP;
3837 if (enc == parser->m_encoding) {
3838 eventPP = &parser->m_eventPtr;
3839 *eventPP = s;
3840 eventEndPP = &parser->m_eventEndPtr;
3841 }
3842 else {
3843 /* It's not entirely clear, but it seems the following two lines
3844 * of code cannot be executed. The only occasions on which 'enc'
3845 * is not 'encoding' are when this function is called
3846 * from the internal entity processing, and IGNORE sections are an
3847 * error in internal entities.
3848 *
3849 * Since it really isn't clear that this is true, we keep the code
3850 * and just remove it from our coverage tests.
3851 *
3852 * LCOV_EXCL_START
3853 */
3854 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3855 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3856 /* LCOV_EXCL_STOP */
3857 }
3858 *eventPP = s;
3859 *startPtr = NULL;
3860 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3861 *eventEndPP = next;
3862 switch (tok) {
3863 case XML_TOK_IGNORE_SECT:
3864 if (parser->m_defaultHandler)
3865 reportDefault(parser, enc, s, next);
3866 *startPtr = next;
3867 *nextPtr = next;
3868 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3869 return XML_ERROR_ABORTED;
3870 else
3871 return XML_ERROR_NONE;
3872 case XML_TOK_INVALID:
3873 *eventPP = next;
3874 return XML_ERROR_INVALID_TOKEN;
3875 case XML_TOK_PARTIAL_CHAR:
3876 if (haveMore) {
3877 *nextPtr = s;
3878 return XML_ERROR_NONE;
3879 }
3880 return XML_ERROR_PARTIAL_CHAR;
3881 case XML_TOK_PARTIAL:
3882 case XML_TOK_NONE:
3883 if (haveMore) {
3884 *nextPtr = s;
3885 return XML_ERROR_NONE;
3886 }
3887 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3888 default:
3889 /* All of the tokens that XmlIgnoreSectionTok() returns have
3890 * explicit cases to handle them, so this default case is never
3891 * executed. We keep it as a safety net anyway, and remove it
3892 * from our test coverage statistics.
3893 *
3894 * LCOV_EXCL_START
3895 */
3896 *eventPP = next;
3897 return XML_ERROR_UNEXPECTED_STATE;
3898 /* LCOV_EXCL_STOP */
3899 }
3900 /* not reached */
3901 }
3902
3903 #endif /* XML_DTD */
3904
3905 static enum XML_Error
initializeEncoding(XML_Parser parser)3906 initializeEncoding(XML_Parser parser)
3907 {
3908 const char *s;
3909 #ifdef XML_UNICODE
3910 char encodingBuf[128];
3911 /* See comments abount `protoclEncodingName` in parserInit() */
3912 if (!parser->m_protocolEncodingName)
3913 s = NULL;
3914 else {
3915 int i;
3916 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
3917 if (i == sizeof(encodingBuf) - 1
3918 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
3919 encodingBuf[0] = '\0';
3920 break;
3921 }
3922 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
3923 }
3924 encodingBuf[i] = '\0';
3925 s = encodingBuf;
3926 }
3927 #else
3928 s = parser->m_protocolEncodingName;
3929 #endif
3930 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(&parser->m_initEncoding, &parser->m_encoding, s))
3931 return XML_ERROR_NONE;
3932 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
3933 }
3934
3935 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)3936 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
3937 const char *s, const char *next)
3938 {
3939 const char *encodingName = NULL;
3940 const XML_Char *storedEncName = NULL;
3941 const ENCODING *newEncoding = NULL;
3942 const char *version = NULL;
3943 const char *versionend;
3944 const XML_Char *storedversion = NULL;
3945 int standalone = -1;
3946 if (!(parser->m_ns
3947 ? XmlParseXmlDeclNS
3948 : XmlParseXmlDecl)(isGeneralTextEntity,
3949 parser->m_encoding,
3950 s,
3951 next,
3952 &parser->m_eventPtr,
3953 &version,
3954 &versionend,
3955 &encodingName,
3956 &newEncoding,
3957 &standalone)) {
3958 if (isGeneralTextEntity)
3959 return XML_ERROR_TEXT_DECL;
3960 else
3961 return XML_ERROR_XML_DECL;
3962 }
3963 if (!isGeneralTextEntity && standalone == 1) {
3964 parser->m_dtd->standalone = XML_TRUE;
3965 #ifdef XML_DTD
3966 if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3967 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3968 #endif /* XML_DTD */
3969 }
3970 if (parser->m_xmlDeclHandler) {
3971 if (encodingName != NULL) {
3972 storedEncName = poolStoreString(&parser->m_temp2Pool,
3973 parser->m_encoding,
3974 encodingName,
3975 encodingName
3976 + XmlNameLength(parser->m_encoding, encodingName));
3977 if (!storedEncName)
3978 return XML_ERROR_NO_MEMORY;
3979 poolFinish(&parser->m_temp2Pool);
3980 }
3981 if (version) {
3982 storedversion = poolStoreString(&parser->m_temp2Pool,
3983 parser->m_encoding,
3984 version,
3985 versionend - parser->m_encoding->minBytesPerChar);
3986 if (!storedversion)
3987 return XML_ERROR_NO_MEMORY;
3988 }
3989 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone);
3990 }
3991 else if (parser->m_defaultHandler)
3992 reportDefault(parser, parser->m_encoding, s, next);
3993 if (parser->m_protocolEncodingName == NULL) {
3994 if (newEncoding) {
3995 /* Check that the specified encoding does not conflict with what
3996 * the parser has already deduced. Do we have the same number
3997 * of bytes in the smallest representation of a character? If
3998 * this is UTF-16, is it the same endianness?
3999 */
4000 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4001 || (newEncoding->minBytesPerChar == 2 &&
4002 newEncoding != parser->m_encoding)) {
4003 parser->m_eventPtr = encodingName;
4004 return XML_ERROR_INCORRECT_ENCODING;
4005 }
4006 parser->m_encoding = newEncoding;
4007 }
4008 else if (encodingName) {
4009 enum XML_Error result;
4010 if (!storedEncName) {
4011 storedEncName = poolStoreString(
4012 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4013 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4014 if (!storedEncName)
4015 return XML_ERROR_NO_MEMORY;
4016 }
4017 result = handleUnknownEncoding(parser, storedEncName);
4018 poolClear(&parser->m_temp2Pool);
4019 if (result == XML_ERROR_UNKNOWN_ENCODING)
4020 parser->m_eventPtr = encodingName;
4021 return result;
4022 }
4023 }
4024
4025 if (storedEncName || storedversion)
4026 poolClear(&parser->m_temp2Pool);
4027
4028 return XML_ERROR_NONE;
4029 }
4030
4031 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4032 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4033 {
4034 if (parser->m_unknownEncodingHandler) {
4035 XML_Encoding info;
4036 int i;
4037 for (i = 0; i < 256; i++)
4038 info.map[i] = -1;
4039 info.convert = NULL;
4040 info.data = NULL;
4041 info.release = NULL;
4042 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName,
4043 &info)) {
4044 ENCODING *enc;
4045 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4046 if (!parser->m_unknownEncodingMem) {
4047 if (info.release)
4048 info.release(info.data);
4049 return XML_ERROR_NO_MEMORY;
4050 }
4051 enc = (parser->m_ns
4052 ? XmlInitUnknownEncodingNS
4053 : XmlInitUnknownEncoding)(parser->m_unknownEncodingMem,
4054 info.map,
4055 info.convert,
4056 info.data);
4057 if (enc) {
4058 parser->m_unknownEncodingData = info.data;
4059 parser->m_unknownEncodingRelease = info.release;
4060 parser->m_encoding = enc;
4061 return XML_ERROR_NONE;
4062 }
4063 }
4064 if (info.release != NULL)
4065 info.release(info.data);
4066 }
4067 return XML_ERROR_UNKNOWN_ENCODING;
4068 }
4069
4070 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4071 prologInitProcessor(XML_Parser parser,
4072 const char *s,
4073 const char *end,
4074 const char **nextPtr)
4075 {
4076 enum XML_Error result = initializeEncoding(parser);
4077 if (result != XML_ERROR_NONE)
4078 return result;
4079 parser->m_processor = prologProcessor;
4080 return prologProcessor(parser, s, end, nextPtr);
4081 }
4082
4083 #ifdef XML_DTD
4084
4085 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4086 externalParEntInitProcessor(XML_Parser parser,
4087 const char *s,
4088 const char *end,
4089 const char **nextPtr)
4090 {
4091 enum XML_Error result = initializeEncoding(parser);
4092 if (result != XML_ERROR_NONE)
4093 return result;
4094
4095 /* we know now that XML_Parse(Buffer) has been called,
4096 so we consider the external parameter entity read */
4097 parser->m_dtd->paramEntityRead = XML_TRUE;
4098
4099 if (parser->m_prologState.inEntityValue) {
4100 parser->m_processor = entityValueInitProcessor;
4101 return entityValueInitProcessor(parser, s, end, nextPtr);
4102 }
4103 else {
4104 parser->m_processor = externalParEntProcessor;
4105 return externalParEntProcessor(parser, s, end, nextPtr);
4106 }
4107 }
4108
4109 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4110 entityValueInitProcessor(XML_Parser parser,
4111 const char *s,
4112 const char *end,
4113 const char **nextPtr)
4114 {
4115 int tok;
4116 const char *start = s;
4117 const char *next = start;
4118 parser->m_eventPtr = start;
4119
4120 for (;;) {
4121 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4122 parser->m_eventEndPtr = next;
4123 if (tok <= 0) {
4124 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4125 *nextPtr = s;
4126 return XML_ERROR_NONE;
4127 }
4128 switch (tok) {
4129 case XML_TOK_INVALID:
4130 return XML_ERROR_INVALID_TOKEN;
4131 case XML_TOK_PARTIAL:
4132 return XML_ERROR_UNCLOSED_TOKEN;
4133 case XML_TOK_PARTIAL_CHAR:
4134 return XML_ERROR_PARTIAL_CHAR;
4135 case XML_TOK_NONE: /* start == end */
4136 default:
4137 break;
4138 }
4139 /* found end of entity value - can store it now */
4140 return storeEntityValue(parser, parser->m_encoding, s, end);
4141 }
4142 else if (tok == XML_TOK_XML_DECL) {
4143 enum XML_Error result;
4144 result = processXmlDecl(parser, 0, start, next);
4145 if (result != XML_ERROR_NONE)
4146 return result;
4147 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For that
4148 * to happen, a parameter entity parsing handler must have
4149 * attempted to suspend the parser, which fails and raises an
4150 * error. The parser can be aborted, but can't be suspended.
4151 */
4152 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4153 return XML_ERROR_ABORTED;
4154 *nextPtr = next;
4155 /* stop scanning for text declaration - we found one */
4156 parser->m_processor = entityValueProcessor;
4157 return entityValueProcessor(parser, next, end, nextPtr);
4158 }
4159 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4160 return XML_TOK_NONE on the next call, which would then cause the
4161 function to exit with *nextPtr set to s - that is what we want for other
4162 tokens, but not for the BOM - we would rather like to skip it;
4163 then, when this routine is entered the next time, XmlPrologTok will
4164 return XML_TOK_INVALID, since the BOM is still in the buffer
4165 */
4166 else if (tok == XML_TOK_BOM && next == end && !parser->m_parsingStatus.finalBuffer) {
4167 *nextPtr = next;
4168 return XML_ERROR_NONE;
4169 }
4170 /* If we get this token, we have the start of what might be a
4171 normal tag, but not a declaration (i.e. it doesn't begin with
4172 "<!"). In a DTD context, that isn't legal.
4173 */
4174 else if (tok == XML_TOK_INSTANCE_START) {
4175 *nextPtr = next;
4176 return XML_ERROR_SYNTAX;
4177 }
4178 start = next;
4179 parser->m_eventPtr = start;
4180 }
4181 }
4182
4183 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4184 externalParEntProcessor(XML_Parser parser,
4185 const char *s,
4186 const char *end,
4187 const char **nextPtr)
4188 {
4189 const char *next = s;
4190 int tok;
4191
4192 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4193 if (tok <= 0) {
4194 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4195 *nextPtr = s;
4196 return XML_ERROR_NONE;
4197 }
4198 switch (tok) {
4199 case XML_TOK_INVALID:
4200 return XML_ERROR_INVALID_TOKEN;
4201 case XML_TOK_PARTIAL:
4202 return XML_ERROR_UNCLOSED_TOKEN;
4203 case XML_TOK_PARTIAL_CHAR:
4204 return XML_ERROR_PARTIAL_CHAR;
4205 case XML_TOK_NONE: /* start == end */
4206 default:
4207 break;
4208 }
4209 }
4210 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4211 However, when parsing an external subset, doProlog will not accept a BOM
4212 as valid, and report a syntax error, so we have to skip the BOM
4213 */
4214 else if (tok == XML_TOK_BOM) {
4215 s = next;
4216 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4217 }
4218
4219 parser->m_processor = prologProcessor;
4220 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4221 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
4222 }
4223
4224 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4225 entityValueProcessor(XML_Parser parser,
4226 const char *s,
4227 const char *end,
4228 const char **nextPtr)
4229 {
4230 const char *start = s;
4231 const char *next = s;
4232 const ENCODING *enc = parser->m_encoding;
4233 int tok;
4234
4235 for (;;) {
4236 tok = XmlPrologTok(enc, start, end, &next);
4237 if (tok <= 0) {
4238 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4239 *nextPtr = s;
4240 return XML_ERROR_NONE;
4241 }
4242 switch (tok) {
4243 case XML_TOK_INVALID:
4244 return XML_ERROR_INVALID_TOKEN;
4245 case XML_TOK_PARTIAL:
4246 return XML_ERROR_UNCLOSED_TOKEN;
4247 case XML_TOK_PARTIAL_CHAR:
4248 return XML_ERROR_PARTIAL_CHAR;
4249 case XML_TOK_NONE: /* start == end */
4250 default:
4251 break;
4252 }
4253 /* found end of entity value - can store it now */
4254 return storeEntityValue(parser, enc, s, end);
4255 }
4256 start = next;
4257 }
4258 }
4259
4260 #endif /* XML_DTD */
4261
4262 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4263 prologProcessor(XML_Parser parser,
4264 const char *s,
4265 const char *end,
4266 const char **nextPtr)
4267 {
4268 const char *next = s;
4269 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4270 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4271 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
4272 }
4273
4274 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore)4275 doProlog(XML_Parser parser,
4276 const ENCODING *enc,
4277 const char *s,
4278 const char *end,
4279 int tok,
4280 const char *next,
4281 const char **nextPtr,
4282 XML_Bool haveMore)
4283 {
4284 #ifdef XML_DTD
4285 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
4286 #endif /* XML_DTD */
4287 static const XML_Char atypeCDATA[] =
4288 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4289 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4290 static const XML_Char atypeIDREF[] =
4291 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4292 static const XML_Char atypeIDREFS[] =
4293 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4294 static const XML_Char atypeENTITY[] =
4295 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4296 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4297 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
4298 static const XML_Char atypeNMTOKEN[] = {
4299 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4300 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4301 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4302 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4303 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4304 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4305 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
4306
4307 /* save one level of indirection */
4308 DTD * const dtd = parser->m_dtd;
4309
4310 const char **eventPP;
4311 const char **eventEndPP;
4312 enum XML_Content_Quant quant;
4313
4314 if (enc == parser->m_encoding) {
4315 eventPP = &parser->m_eventPtr;
4316 eventEndPP = &parser->m_eventEndPtr;
4317 }
4318 else {
4319 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4320 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4321 }
4322
4323 for (;;) {
4324 int role;
4325 XML_Bool handleDefault = XML_TRUE;
4326 *eventPP = s;
4327 *eventEndPP = next;
4328 if (tok <= 0) {
4329 if (haveMore && tok != XML_TOK_INVALID) {
4330 *nextPtr = s;
4331 return XML_ERROR_NONE;
4332 }
4333 switch (tok) {
4334 case XML_TOK_INVALID:
4335 *eventPP = next;
4336 return XML_ERROR_INVALID_TOKEN;
4337 case XML_TOK_PARTIAL:
4338 return XML_ERROR_UNCLOSED_TOKEN;
4339 case XML_TOK_PARTIAL_CHAR:
4340 return XML_ERROR_PARTIAL_CHAR;
4341 case -XML_TOK_PROLOG_S:
4342 tok = -tok;
4343 break;
4344 case XML_TOK_NONE:
4345 #ifdef XML_DTD
4346 /* for internal PE NOT referenced between declarations */
4347 if (enc != parser->m_encoding && !parser->m_openInternalEntities->betweenDecl) {
4348 *nextPtr = s;
4349 return XML_ERROR_NONE;
4350 }
4351 /* WFC: PE Between Declarations - must check that PE contains
4352 complete markup, not only for external PEs, but also for
4353 internal PEs if the reference occurs between declarations.
4354 */
4355 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4356 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4357 == XML_ROLE_ERROR)
4358 return XML_ERROR_INCOMPLETE_PE;
4359 *nextPtr = s;
4360 return XML_ERROR_NONE;
4361 }
4362 #endif /* XML_DTD */
4363 return XML_ERROR_NO_ELEMENTS;
4364 default:
4365 tok = -tok;
4366 next = end;
4367 break;
4368 }
4369 }
4370 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4371 switch (role) {
4372 case XML_ROLE_XML_DECL:
4373 {
4374 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4375 if (result != XML_ERROR_NONE)
4376 return result;
4377 enc = parser->m_encoding;
4378 handleDefault = XML_FALSE;
4379 }
4380 break;
4381 case XML_ROLE_DOCTYPE_NAME:
4382 if (parser->m_startDoctypeDeclHandler) {
4383 parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next);
4384 if (!parser->m_doctypeName)
4385 return XML_ERROR_NO_MEMORY;
4386 poolFinish(&parser->m_tempPool);
4387 parser->m_doctypePubid = NULL;
4388 handleDefault = XML_FALSE;
4389 }
4390 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4391 break;
4392 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4393 if (parser->m_startDoctypeDeclHandler) {
4394 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4395 parser->m_doctypePubid, 1);
4396 parser->m_doctypeName = NULL;
4397 poolClear(&parser->m_tempPool);
4398 handleDefault = XML_FALSE;
4399 }
4400 break;
4401 #ifdef XML_DTD
4402 case XML_ROLE_TEXT_DECL:
4403 {
4404 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4405 if (result != XML_ERROR_NONE)
4406 return result;
4407 enc = parser->m_encoding;
4408 handleDefault = XML_FALSE;
4409 }
4410 break;
4411 #endif /* XML_DTD */
4412 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4413 #ifdef XML_DTD
4414 parser->m_useForeignDTD = XML_FALSE;
4415 parser->m_declEntity = (ENTITY *)lookup(parser,
4416 &dtd->paramEntities,
4417 externalSubsetName,
4418 sizeof(ENTITY));
4419 if (!parser->m_declEntity)
4420 return XML_ERROR_NO_MEMORY;
4421 #endif /* XML_DTD */
4422 dtd->hasParamEntityRefs = XML_TRUE;
4423 if (parser->m_startDoctypeDeclHandler) {
4424 XML_Char *pubId;
4425 if (!XmlIsPublicId(enc, s, next, eventPP))
4426 return XML_ERROR_PUBLICID;
4427 pubId = poolStoreString(&parser->m_tempPool, enc,
4428 s + enc->minBytesPerChar,
4429 next - enc->minBytesPerChar);
4430 if (!pubId)
4431 return XML_ERROR_NO_MEMORY;
4432 normalizePublicId(pubId);
4433 poolFinish(&parser->m_tempPool);
4434 parser->m_doctypePubid = pubId;
4435 handleDefault = XML_FALSE;
4436 goto alreadyChecked;
4437 }
4438 /* fall through */
4439 case XML_ROLE_ENTITY_PUBLIC_ID:
4440 if (!XmlIsPublicId(enc, s, next, eventPP))
4441 return XML_ERROR_PUBLICID;
4442 alreadyChecked:
4443 if (dtd->keepProcessing && parser->m_declEntity) {
4444 XML_Char *tem = poolStoreString(&dtd->pool,
4445 enc,
4446 s + enc->minBytesPerChar,
4447 next - enc->minBytesPerChar);
4448 if (!tem)
4449 return XML_ERROR_NO_MEMORY;
4450 normalizePublicId(tem);
4451 parser->m_declEntity->publicId = tem;
4452 poolFinish(&dtd->pool);
4453 /* Don't suppress the default handler if we fell through from
4454 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4455 */
4456 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4457 handleDefault = XML_FALSE;
4458 }
4459 break;
4460 case XML_ROLE_DOCTYPE_CLOSE:
4461 if (parser->m_doctypeName) {
4462 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName,
4463 parser->m_doctypeSysid, parser->m_doctypePubid, 0);
4464 poolClear(&parser->m_tempPool);
4465 handleDefault = XML_FALSE;
4466 }
4467 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4468 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4469 was not set, indicating an external subset
4470 */
4471 #ifdef XML_DTD
4472 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4473 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4474 dtd->hasParamEntityRefs = XML_TRUE;
4475 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
4476 ENTITY *entity = (ENTITY *)lookup(parser,
4477 &dtd->paramEntities,
4478 externalSubsetName,
4479 sizeof(ENTITY));
4480 if (!entity) {
4481 /* The external subset name "#" will have already been
4482 * inserted into the hash table at the start of the
4483 * external entity parsing, so no allocation will happen
4484 * and lookup() cannot fail.
4485 */
4486 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4487 }
4488 if (parser->m_useForeignDTD)
4489 entity->base = parser->m_curBase;
4490 dtd->paramEntityRead = XML_FALSE;
4491 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
4492 0,
4493 entity->base,
4494 entity->systemId,
4495 entity->publicId))
4496 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4497 if (dtd->paramEntityRead) {
4498 if (!dtd->standalone &&
4499 parser->m_notStandaloneHandler &&
4500 !parser->m_notStandaloneHandler(parser->m_handlerArg))
4501 return XML_ERROR_NOT_STANDALONE;
4502 }
4503 /* if we didn't read the foreign DTD then this means that there
4504 is no external subset and we must reset dtd->hasParamEntityRefs
4505 */
4506 else if (!parser->m_doctypeSysid)
4507 dtd->hasParamEntityRefs = hadParamEntityRefs;
4508 /* end of DTD - no need to update dtd->keepProcessing */
4509 }
4510 parser->m_useForeignDTD = XML_FALSE;
4511 }
4512 #endif /* XML_DTD */
4513 if (parser->m_endDoctypeDeclHandler) {
4514 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4515 handleDefault = XML_FALSE;
4516 }
4517 break;
4518 case XML_ROLE_INSTANCE_START:
4519 #ifdef XML_DTD
4520 /* if there is no DOCTYPE declaration then now is the
4521 last chance to read the foreign DTD
4522 */
4523 if (parser->m_useForeignDTD) {
4524 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4525 dtd->hasParamEntityRefs = XML_TRUE;
4526 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
4527 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4528 externalSubsetName,
4529 sizeof(ENTITY));
4530 if (!entity)
4531 return XML_ERROR_NO_MEMORY;
4532 entity->base = parser->m_curBase;
4533 dtd->paramEntityRead = XML_FALSE;
4534 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
4535 0,
4536 entity->base,
4537 entity->systemId,
4538 entity->publicId))
4539 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4540 if (dtd->paramEntityRead) {
4541 if (!dtd->standalone &&
4542 parser->m_notStandaloneHandler &&
4543 !parser->m_notStandaloneHandler(parser->m_handlerArg))
4544 return XML_ERROR_NOT_STANDALONE;
4545 }
4546 /* if we didn't read the foreign DTD then this means that there
4547 is no external subset and we must reset dtd->hasParamEntityRefs
4548 */
4549 else
4550 dtd->hasParamEntityRefs = hadParamEntityRefs;
4551 /* end of DTD - no need to update dtd->keepProcessing */
4552 }
4553 }
4554 #endif /* XML_DTD */
4555 parser->m_processor = contentProcessor;
4556 return contentProcessor(parser, s, end, nextPtr);
4557 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4558 parser->m_declElementType = getElementType(parser, enc, s, next);
4559 if (!parser->m_declElementType)
4560 return XML_ERROR_NO_MEMORY;
4561 goto checkAttListDeclHandler;
4562 case XML_ROLE_ATTRIBUTE_NAME:
4563 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4564 if (!parser->m_declAttributeId)
4565 return XML_ERROR_NO_MEMORY;
4566 parser->m_declAttributeIsCdata = XML_FALSE;
4567 parser->m_declAttributeType = NULL;
4568 parser->m_declAttributeIsId = XML_FALSE;
4569 goto checkAttListDeclHandler;
4570 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4571 parser->m_declAttributeIsCdata = XML_TRUE;
4572 parser->m_declAttributeType = atypeCDATA;
4573 goto checkAttListDeclHandler;
4574 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4575 parser->m_declAttributeIsId = XML_TRUE;
4576 parser->m_declAttributeType = atypeID;
4577 goto checkAttListDeclHandler;
4578 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4579 parser->m_declAttributeType = atypeIDREF;
4580 goto checkAttListDeclHandler;
4581 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4582 parser->m_declAttributeType = atypeIDREFS;
4583 goto checkAttListDeclHandler;
4584 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4585 parser->m_declAttributeType = atypeENTITY;
4586 goto checkAttListDeclHandler;
4587 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4588 parser->m_declAttributeType = atypeENTITIES;
4589 goto checkAttListDeclHandler;
4590 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4591 parser->m_declAttributeType = atypeNMTOKEN;
4592 goto checkAttListDeclHandler;
4593 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4594 parser->m_declAttributeType = atypeNMTOKENS;
4595 checkAttListDeclHandler:
4596 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4597 handleDefault = XML_FALSE;
4598 break;
4599 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4600 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4601 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4602 const XML_Char *prefix;
4603 if (parser->m_declAttributeType) {
4604 prefix = enumValueSep;
4605 }
4606 else {
4607 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4608 ? notationPrefix
4609 : enumValueStart);
4610 }
4611 if (!poolAppendString(&parser->m_tempPool, prefix))
4612 return XML_ERROR_NO_MEMORY;
4613 if (!poolAppend(&parser->m_tempPool, enc, s, next))
4614 return XML_ERROR_NO_MEMORY;
4615 parser->m_declAttributeType = parser->m_tempPool.start;
4616 handleDefault = XML_FALSE;
4617 }
4618 break;
4619 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4620 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4621 if (dtd->keepProcessing) {
4622 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4623 parser->m_declAttributeIsCdata, parser->m_declAttributeIsId,
4624 0, parser))
4625 return XML_ERROR_NO_MEMORY;
4626 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4627 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4628 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4629 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4630 /* Enumerated or Notation type */
4631 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4632 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4633 return XML_ERROR_NO_MEMORY;
4634 parser->m_declAttributeType = parser->m_tempPool.start;
4635 poolFinish(&parser->m_tempPool);
4636 }
4637 *eventEndPP = s;
4638 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4639 parser->m_declAttributeId->name, parser->m_declAttributeType,
4640 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4641 poolClear(&parser->m_tempPool);
4642 handleDefault = XML_FALSE;
4643 }
4644 }
4645 break;
4646 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4647 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4648 if (dtd->keepProcessing) {
4649 const XML_Char *attVal;
4650 enum XML_Error result =
4651 storeAttributeValue(parser, enc, parser->m_declAttributeIsCdata,
4652 s + enc->minBytesPerChar,
4653 next - enc->minBytesPerChar,
4654 &dtd->pool);
4655 if (result)
4656 return result;
4657 attVal = poolStart(&dtd->pool);
4658 poolFinish(&dtd->pool);
4659 /* ID attributes aren't allowed to have a default */
4660 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4661 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4662 return XML_ERROR_NO_MEMORY;
4663 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4664 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4665 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4666 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4667 /* Enumerated or Notation type */
4668 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4669 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4670 return XML_ERROR_NO_MEMORY;
4671 parser->m_declAttributeType = parser->m_tempPool.start;
4672 poolFinish(&parser->m_tempPool);
4673 }
4674 *eventEndPP = s;
4675 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4676 parser->m_declAttributeId->name, parser->m_declAttributeType,
4677 attVal,
4678 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4679 poolClear(&parser->m_tempPool);
4680 handleDefault = XML_FALSE;
4681 }
4682 }
4683 break;
4684 case XML_ROLE_ENTITY_VALUE:
4685 if (dtd->keepProcessing) {
4686 enum XML_Error result = storeEntityValue(parser, enc,
4687 s + enc->minBytesPerChar,
4688 next - enc->minBytesPerChar);
4689 if (parser->m_declEntity) {
4690 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4691 parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
4692 poolFinish(&dtd->entityValuePool);
4693 if (parser->m_entityDeclHandler) {
4694 *eventEndPP = s;
4695 parser->m_entityDeclHandler(parser->m_handlerArg,
4696 parser->m_declEntity->name,
4697 parser->m_declEntity->is_param,
4698 parser->m_declEntity->textPtr,
4699 parser->m_declEntity->textLen,
4700 parser->m_curBase, 0, 0, 0);
4701 handleDefault = XML_FALSE;
4702 }
4703 }
4704 else
4705 poolDiscard(&dtd->entityValuePool);
4706 if (result != XML_ERROR_NONE)
4707 return result;
4708 }
4709 break;
4710 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4711 #ifdef XML_DTD
4712 parser->m_useForeignDTD = XML_FALSE;
4713 #endif /* XML_DTD */
4714 dtd->hasParamEntityRefs = XML_TRUE;
4715 if (parser->m_startDoctypeDeclHandler) {
4716 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
4717 s + enc->minBytesPerChar,
4718 next - enc->minBytesPerChar);
4719 if (parser->m_doctypeSysid == NULL)
4720 return XML_ERROR_NO_MEMORY;
4721 poolFinish(&parser->m_tempPool);
4722 handleDefault = XML_FALSE;
4723 }
4724 #ifdef XML_DTD
4725 else
4726 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4727 for the case where no parser->m_startDoctypeDeclHandler is set */
4728 parser->m_doctypeSysid = externalSubsetName;
4729 #endif /* XML_DTD */
4730 if (!dtd->standalone
4731 #ifdef XML_DTD
4732 && !parser->m_paramEntityParsing
4733 #endif /* XML_DTD */
4734 && parser->m_notStandaloneHandler
4735 && !parser->m_notStandaloneHandler(parser->m_handlerArg))
4736 return XML_ERROR_NOT_STANDALONE;
4737 #ifndef XML_DTD
4738 break;
4739 #else /* XML_DTD */
4740 if (!parser->m_declEntity) {
4741 parser->m_declEntity = (ENTITY *)lookup(parser,
4742 &dtd->paramEntities,
4743 externalSubsetName,
4744 sizeof(ENTITY));
4745 if (!parser->m_declEntity)
4746 return XML_ERROR_NO_MEMORY;
4747 parser->m_declEntity->publicId = NULL;
4748 }
4749 /* fall through */
4750 #endif /* XML_DTD */
4751 case XML_ROLE_ENTITY_SYSTEM_ID:
4752 if (dtd->keepProcessing && parser->m_declEntity) {
4753 parser->m_declEntity->systemId = poolStoreString(&dtd->pool, enc,
4754 s + enc->minBytesPerChar,
4755 next - enc->minBytesPerChar);
4756 if (!parser->m_declEntity->systemId)
4757 return XML_ERROR_NO_MEMORY;
4758 parser->m_declEntity->base = parser->m_curBase;
4759 poolFinish(&dtd->pool);
4760 /* Don't suppress the default handler if we fell through from
4761 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4762 */
4763 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
4764 handleDefault = XML_FALSE;
4765 }
4766 break;
4767 case XML_ROLE_ENTITY_COMPLETE:
4768 if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) {
4769 *eventEndPP = s;
4770 parser->m_entityDeclHandler(parser->m_handlerArg,
4771 parser->m_declEntity->name,
4772 parser->m_declEntity->is_param,
4773 0,0,
4774 parser->m_declEntity->base,
4775 parser->m_declEntity->systemId,
4776 parser->m_declEntity->publicId,
4777 0);
4778 handleDefault = XML_FALSE;
4779 }
4780 break;
4781 case XML_ROLE_ENTITY_NOTATION_NAME:
4782 if (dtd->keepProcessing && parser->m_declEntity) {
4783 parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4784 if (!parser->m_declEntity->notation)
4785 return XML_ERROR_NO_MEMORY;
4786 poolFinish(&dtd->pool);
4787 if (parser->m_unparsedEntityDeclHandler) {
4788 *eventEndPP = s;
4789 parser->m_unparsedEntityDeclHandler(parser->m_handlerArg,
4790 parser->m_declEntity->name,
4791 parser->m_declEntity->base,
4792 parser->m_declEntity->systemId,
4793 parser->m_declEntity->publicId,
4794 parser->m_declEntity->notation);
4795 handleDefault = XML_FALSE;
4796 }
4797 else if (parser->m_entityDeclHandler) {
4798 *eventEndPP = s;
4799 parser->m_entityDeclHandler(parser->m_handlerArg,
4800 parser->m_declEntity->name,
4801 0,0,0,
4802 parser->m_declEntity->base,
4803 parser->m_declEntity->systemId,
4804 parser->m_declEntity->publicId,
4805 parser->m_declEntity->notation);
4806 handleDefault = XML_FALSE;
4807 }
4808 }
4809 break;
4810 case XML_ROLE_GENERAL_ENTITY_NAME:
4811 {
4812 if (XmlPredefinedEntityName(enc, s, next)) {
4813 parser->m_declEntity = NULL;
4814 break;
4815 }
4816 if (dtd->keepProcessing) {
4817 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4818 if (!name)
4819 return XML_ERROR_NO_MEMORY;
4820 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
4821 sizeof(ENTITY));
4822 if (!parser->m_declEntity)
4823 return XML_ERROR_NO_MEMORY;
4824 if (parser->m_declEntity->name != name) {
4825 poolDiscard(&dtd->pool);
4826 parser->m_declEntity = NULL;
4827 }
4828 else {
4829 poolFinish(&dtd->pool);
4830 parser->m_declEntity->publicId = NULL;
4831 parser->m_declEntity->is_param = XML_FALSE;
4832 /* if we have a parent parser or are reading an internal parameter
4833 entity, then the entity declaration is not considered "internal"
4834 */
4835 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4836 if (parser->m_entityDeclHandler)
4837 handleDefault = XML_FALSE;
4838 }
4839 }
4840 else {
4841 poolDiscard(&dtd->pool);
4842 parser->m_declEntity = NULL;
4843 }
4844 }
4845 break;
4846 case XML_ROLE_PARAM_ENTITY_NAME:
4847 #ifdef XML_DTD
4848 if (dtd->keepProcessing) {
4849 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4850 if (!name)
4851 return XML_ERROR_NO_MEMORY;
4852 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4853 name, sizeof(ENTITY));
4854 if (!parser->m_declEntity)
4855 return XML_ERROR_NO_MEMORY;
4856 if (parser->m_declEntity->name != name) {
4857 poolDiscard(&dtd->pool);
4858 parser->m_declEntity = NULL;
4859 }
4860 else {
4861 poolFinish(&dtd->pool);
4862 parser->m_declEntity->publicId = NULL;
4863 parser->m_declEntity->is_param = XML_TRUE;
4864 /* if we have a parent parser or are reading an internal parameter
4865 entity, then the entity declaration is not considered "internal"
4866 */
4867 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4868 if (parser->m_entityDeclHandler)
4869 handleDefault = XML_FALSE;
4870 }
4871 }
4872 else {
4873 poolDiscard(&dtd->pool);
4874 parser->m_declEntity = NULL;
4875 }
4876 #else /* not XML_DTD */
4877 parser->m_declEntity = NULL;
4878 #endif /* XML_DTD */
4879 break;
4880 case XML_ROLE_NOTATION_NAME:
4881 parser->m_declNotationPublicId = NULL;
4882 parser->m_declNotationName = NULL;
4883 if (parser->m_notationDeclHandler) {
4884 parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next);
4885 if (!parser->m_declNotationName)
4886 return XML_ERROR_NO_MEMORY;
4887 poolFinish(&parser->m_tempPool);
4888 handleDefault = XML_FALSE;
4889 }
4890 break;
4891 case XML_ROLE_NOTATION_PUBLIC_ID:
4892 if (!XmlIsPublicId(enc, s, next, eventPP))
4893 return XML_ERROR_PUBLICID;
4894 if (parser->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4895 XML_Char *tem = poolStoreString(&parser->m_tempPool,
4896 enc,
4897 s + enc->minBytesPerChar,
4898 next - enc->minBytesPerChar);
4899 if (!tem)
4900 return XML_ERROR_NO_MEMORY;
4901 normalizePublicId(tem);
4902 parser->m_declNotationPublicId = tem;
4903 poolFinish(&parser->m_tempPool);
4904 handleDefault = XML_FALSE;
4905 }
4906 break;
4907 case XML_ROLE_NOTATION_SYSTEM_ID:
4908 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
4909 const XML_Char *systemId
4910 = poolStoreString(&parser->m_tempPool, enc,
4911 s + enc->minBytesPerChar,
4912 next - enc->minBytesPerChar);
4913 if (!systemId)
4914 return XML_ERROR_NO_MEMORY;
4915 *eventEndPP = s;
4916 parser->m_notationDeclHandler(parser->m_handlerArg,
4917 parser->m_declNotationName,
4918 parser->m_curBase,
4919 systemId,
4920 parser->m_declNotationPublicId);
4921 handleDefault = XML_FALSE;
4922 }
4923 poolClear(&parser->m_tempPool);
4924 break;
4925 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4926 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
4927 *eventEndPP = s;
4928 parser->m_notationDeclHandler(parser->m_handlerArg,
4929 parser->m_declNotationName,
4930 parser->m_curBase,
4931 0,
4932 parser->m_declNotationPublicId);
4933 handleDefault = XML_FALSE;
4934 }
4935 poolClear(&parser->m_tempPool);
4936 break;
4937 case XML_ROLE_ERROR:
4938 switch (tok) {
4939 case XML_TOK_PARAM_ENTITY_REF:
4940 /* PE references in internal subset are
4941 not allowed within declarations. */
4942 return XML_ERROR_PARAM_ENTITY_REF;
4943 case XML_TOK_XML_DECL:
4944 return XML_ERROR_MISPLACED_XML_PI;
4945 default:
4946 return XML_ERROR_SYNTAX;
4947 }
4948 #ifdef XML_DTD
4949 case XML_ROLE_IGNORE_SECT:
4950 {
4951 enum XML_Error result;
4952 if (parser->m_defaultHandler)
4953 reportDefault(parser, enc, s, next);
4954 handleDefault = XML_FALSE;
4955 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4956 if (result != XML_ERROR_NONE)
4957 return result;
4958 else if (!next) {
4959 parser->m_processor = ignoreSectionProcessor;
4960 return result;
4961 }
4962 }
4963 break;
4964 #endif /* XML_DTD */
4965 case XML_ROLE_GROUP_OPEN:
4966 if (parser->m_prologState.level >= parser->m_groupSize) {
4967 if (parser->m_groupSize) {
4968 char *temp = (char *)REALLOC(parser, parser->m_groupConnector, parser->m_groupSize *= 2);
4969 if (temp == NULL) {
4970 parser->m_groupSize /= 2;
4971 return XML_ERROR_NO_MEMORY;
4972 }
4973 parser->m_groupConnector = temp;
4974 if (dtd->scaffIndex) {
4975 int *temp = (int *)REALLOC(parser, dtd->scaffIndex,
4976 parser->m_groupSize * sizeof(int));
4977 if (temp == NULL)
4978 return XML_ERROR_NO_MEMORY;
4979 dtd->scaffIndex = temp;
4980 }
4981 }
4982 else {
4983 parser->m_groupConnector = (char *)MALLOC(parser, parser->m_groupSize = 32);
4984 if (!parser->m_groupConnector) {
4985 parser->m_groupSize = 0;
4986 return XML_ERROR_NO_MEMORY;
4987 }
4988 }
4989 }
4990 parser->m_groupConnector[parser->m_prologState.level] = 0;
4991 if (dtd->in_eldecl) {
4992 int myindex = nextScaffoldPart(parser);
4993 if (myindex < 0)
4994 return XML_ERROR_NO_MEMORY;
4995 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4996 dtd->scaffLevel++;
4997 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4998 if (parser->m_elementDeclHandler)
4999 handleDefault = XML_FALSE;
5000 }
5001 break;
5002 case XML_ROLE_GROUP_SEQUENCE:
5003 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5004 return XML_ERROR_SYNTAX;
5005 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5006 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5007 handleDefault = XML_FALSE;
5008 break;
5009 case XML_ROLE_GROUP_CHOICE:
5010 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5011 return XML_ERROR_SYNTAX;
5012 if (dtd->in_eldecl
5013 && !parser->m_groupConnector[parser->m_prologState.level]
5014 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5015 != XML_CTYPE_MIXED)
5016 ) {
5017 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5018 = XML_CTYPE_CHOICE;
5019 if (parser->m_elementDeclHandler)
5020 handleDefault = XML_FALSE;
5021 }
5022 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5023 break;
5024 case XML_ROLE_PARAM_ENTITY_REF:
5025 #ifdef XML_DTD
5026 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5027 dtd->hasParamEntityRefs = XML_TRUE;
5028 if (!parser->m_paramEntityParsing)
5029 dtd->keepProcessing = dtd->standalone;
5030 else {
5031 const XML_Char *name;
5032 ENTITY *entity;
5033 name = poolStoreString(&dtd->pool, enc,
5034 s + enc->minBytesPerChar,
5035 next - enc->minBytesPerChar);
5036 if (!name)
5037 return XML_ERROR_NO_MEMORY;
5038 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5039 poolDiscard(&dtd->pool);
5040 /* first, determine if a check for an existing declaration is needed;
5041 if yes, check that the entity exists, and that it is internal,
5042 otherwise call the skipped entity handler
5043 */
5044 if (parser->m_prologState.documentEntity &&
5045 (dtd->standalone
5046 ? !parser->m_openInternalEntities
5047 : !dtd->hasParamEntityRefs)) {
5048 if (!entity)
5049 return XML_ERROR_UNDEFINED_ENTITY;
5050 else if (!entity->is_internal) {
5051 /* It's hard to exhaustively search the code to be sure,
5052 * but there doesn't seem to be a way of executing the
5053 * following line. There are two cases:
5054 *
5055 * If 'standalone' is false, the DTD must have no
5056 * parameter entities or we wouldn't have passed the outer
5057 * 'if' statement. That measn the only entity in the hash
5058 * table is the external subset name "#" which cannot be
5059 * given as a parameter entity name in XML syntax, so the
5060 * lookup must have returned NULL and we don't even reach
5061 * the test for an internal entity.
5062 *
5063 * If 'standalone' is true, it does not seem to be
5064 * possible to create entities taking this code path that
5065 * are not internal entities, so fail the test above.
5066 *
5067 * Because this analysis is very uncertain, the code is
5068 * being left in place and merely removed from the
5069 * coverage test statistics.
5070 */
5071 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5072 }
5073 }
5074 else if (!entity) {
5075 dtd->keepProcessing = dtd->standalone;
5076 /* cannot report skipped entities in declarations */
5077 if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) {
5078 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5079 handleDefault = XML_FALSE;
5080 }
5081 break;
5082 }
5083 if (entity->open)
5084 return XML_ERROR_RECURSIVE_ENTITY_REF;
5085 if (entity->textPtr) {
5086 enum XML_Error result;
5087 XML_Bool betweenDecl =
5088 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5089 result = processInternalEntity(parser, entity, betweenDecl);
5090 if (result != XML_ERROR_NONE)
5091 return result;
5092 handleDefault = XML_FALSE;
5093 break;
5094 }
5095 if (parser->m_externalEntityRefHandler) {
5096 dtd->paramEntityRead = XML_FALSE;
5097 entity->open = XML_TRUE;
5098 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
5099 0,
5100 entity->base,
5101 entity->systemId,
5102 entity->publicId)) {
5103 entity->open = XML_FALSE;
5104 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5105 }
5106 entity->open = XML_FALSE;
5107 handleDefault = XML_FALSE;
5108 if (!dtd->paramEntityRead) {
5109 dtd->keepProcessing = dtd->standalone;
5110 break;
5111 }
5112 }
5113 else {
5114 dtd->keepProcessing = dtd->standalone;
5115 break;
5116 }
5117 }
5118 #endif /* XML_DTD */
5119 if (!dtd->standalone &&
5120 parser->m_notStandaloneHandler &&
5121 !parser->m_notStandaloneHandler(parser->m_handlerArg))
5122 return XML_ERROR_NOT_STANDALONE;
5123 break;
5124
5125 /* Element declaration stuff */
5126
5127 case XML_ROLE_ELEMENT_NAME:
5128 if (parser->m_elementDeclHandler) {
5129 parser->m_declElementType = getElementType(parser, enc, s, next);
5130 if (!parser->m_declElementType)
5131 return XML_ERROR_NO_MEMORY;
5132 dtd->scaffLevel = 0;
5133 dtd->scaffCount = 0;
5134 dtd->in_eldecl = XML_TRUE;
5135 handleDefault = XML_FALSE;
5136 }
5137 break;
5138
5139 case XML_ROLE_CONTENT_ANY:
5140 case XML_ROLE_CONTENT_EMPTY:
5141 if (dtd->in_eldecl) {
5142 if (parser->m_elementDeclHandler) {
5143 XML_Content * content = (XML_Content *) MALLOC(parser, sizeof(XML_Content));
5144 if (!content)
5145 return XML_ERROR_NO_MEMORY;
5146 content->quant = XML_CQUANT_NONE;
5147 content->name = NULL;
5148 content->numchildren = 0;
5149 content->children = NULL;
5150 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5151 XML_CTYPE_ANY :
5152 XML_CTYPE_EMPTY);
5153 *eventEndPP = s;
5154 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, content);
5155 handleDefault = XML_FALSE;
5156 }
5157 dtd->in_eldecl = XML_FALSE;
5158 }
5159 break;
5160
5161 case XML_ROLE_CONTENT_PCDATA:
5162 if (dtd->in_eldecl) {
5163 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5164 = XML_CTYPE_MIXED;
5165 if (parser->m_elementDeclHandler)
5166 handleDefault = XML_FALSE;
5167 }
5168 break;
5169
5170 case XML_ROLE_CONTENT_ELEMENT:
5171 quant = XML_CQUANT_NONE;
5172 goto elementContent;
5173 case XML_ROLE_CONTENT_ELEMENT_OPT:
5174 quant = XML_CQUANT_OPT;
5175 goto elementContent;
5176 case XML_ROLE_CONTENT_ELEMENT_REP:
5177 quant = XML_CQUANT_REP;
5178 goto elementContent;
5179 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5180 quant = XML_CQUANT_PLUS;
5181 elementContent:
5182 if (dtd->in_eldecl) {
5183 ELEMENT_TYPE *el;
5184 const XML_Char *name;
5185 int nameLen;
5186 const char *nxt = (quant == XML_CQUANT_NONE
5187 ? next
5188 : next - enc->minBytesPerChar);
5189 int myindex = nextScaffoldPart(parser);
5190 if (myindex < 0)
5191 return XML_ERROR_NO_MEMORY;
5192 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5193 dtd->scaffold[myindex].quant = quant;
5194 el = getElementType(parser, enc, s, nxt);
5195 if (!el)
5196 return XML_ERROR_NO_MEMORY;
5197 name = el->name;
5198 dtd->scaffold[myindex].name = name;
5199 nameLen = 0;
5200 for (; name[nameLen++]; );
5201 dtd->contentStringLen += nameLen;
5202 if (parser->m_elementDeclHandler)
5203 handleDefault = XML_FALSE;
5204 }
5205 break;
5206
5207 case XML_ROLE_GROUP_CLOSE:
5208 quant = XML_CQUANT_NONE;
5209 goto closeGroup;
5210 case XML_ROLE_GROUP_CLOSE_OPT:
5211 quant = XML_CQUANT_OPT;
5212 goto closeGroup;
5213 case XML_ROLE_GROUP_CLOSE_REP:
5214 quant = XML_CQUANT_REP;
5215 goto closeGroup;
5216 case XML_ROLE_GROUP_CLOSE_PLUS:
5217 quant = XML_CQUANT_PLUS;
5218 closeGroup:
5219 if (dtd->in_eldecl) {
5220 if (parser->m_elementDeclHandler)
5221 handleDefault = XML_FALSE;
5222 dtd->scaffLevel--;
5223 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5224 if (dtd->scaffLevel == 0) {
5225 if (!handleDefault) {
5226 XML_Content *model = build_model(parser);
5227 if (!model)
5228 return XML_ERROR_NO_MEMORY;
5229 *eventEndPP = s;
5230 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, model);
5231 }
5232 dtd->in_eldecl = XML_FALSE;
5233 dtd->contentStringLen = 0;
5234 }
5235 }
5236 break;
5237 /* End element declaration stuff */
5238
5239 case XML_ROLE_PI:
5240 if (!reportProcessingInstruction(parser, enc, s, next))
5241 return XML_ERROR_NO_MEMORY;
5242 handleDefault = XML_FALSE;
5243 break;
5244 case XML_ROLE_COMMENT:
5245 if (!reportComment(parser, enc, s, next))
5246 return XML_ERROR_NO_MEMORY;
5247 handleDefault = XML_FALSE;
5248 break;
5249 case XML_ROLE_NONE:
5250 switch (tok) {
5251 case XML_TOK_BOM:
5252 handleDefault = XML_FALSE;
5253 break;
5254 }
5255 break;
5256 case XML_ROLE_DOCTYPE_NONE:
5257 if (parser->m_startDoctypeDeclHandler)
5258 handleDefault = XML_FALSE;
5259 break;
5260 case XML_ROLE_ENTITY_NONE:
5261 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5262 handleDefault = XML_FALSE;
5263 break;
5264 case XML_ROLE_NOTATION_NONE:
5265 if (parser->m_notationDeclHandler)
5266 handleDefault = XML_FALSE;
5267 break;
5268 case XML_ROLE_ATTLIST_NONE:
5269 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5270 handleDefault = XML_FALSE;
5271 break;
5272 case XML_ROLE_ELEMENT_NONE:
5273 if (parser->m_elementDeclHandler)
5274 handleDefault = XML_FALSE;
5275 break;
5276 } /* end of big switch */
5277
5278 if (handleDefault && parser->m_defaultHandler)
5279 reportDefault(parser, enc, s, next);
5280
5281 switch (parser->m_parsingStatus.parsing) {
5282 case XML_SUSPENDED:
5283 *nextPtr = next;
5284 return XML_ERROR_NONE;
5285 case XML_FINISHED:
5286 return XML_ERROR_ABORTED;
5287 default:
5288 s = next;
5289 tok = XmlPrologTok(enc, s, end, &next);
5290 }
5291 }
5292 /* not reached */
5293 }
5294
5295 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5296 epilogProcessor(XML_Parser parser,
5297 const char *s,
5298 const char *end,
5299 const char **nextPtr)
5300 {
5301 parser->m_processor = epilogProcessor;
5302 parser->m_eventPtr = s;
5303 for (;;) {
5304 const char *next = NULL;
5305 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5306 parser->m_eventEndPtr = next;
5307 switch (tok) {
5308 /* report partial linebreak - it might be the last token */
5309 case -XML_TOK_PROLOG_S:
5310 if (parser->m_defaultHandler) {
5311 reportDefault(parser, parser->m_encoding, s, next);
5312 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5313 return XML_ERROR_ABORTED;
5314 }
5315 *nextPtr = next;
5316 return XML_ERROR_NONE;
5317 case XML_TOK_NONE:
5318 *nextPtr = s;
5319 return XML_ERROR_NONE;
5320 case XML_TOK_PROLOG_S:
5321 if (parser->m_defaultHandler)
5322 reportDefault(parser, parser->m_encoding, s, next);
5323 break;
5324 case XML_TOK_PI:
5325 if (!reportProcessingInstruction(parser, parser->m_encoding, s, next))
5326 return XML_ERROR_NO_MEMORY;
5327 break;
5328 case XML_TOK_COMMENT:
5329 if (!reportComment(parser, parser->m_encoding, s, next))
5330 return XML_ERROR_NO_MEMORY;
5331 break;
5332 case XML_TOK_INVALID:
5333 parser->m_eventPtr = next;
5334 return XML_ERROR_INVALID_TOKEN;
5335 case XML_TOK_PARTIAL:
5336 if (!parser->m_parsingStatus.finalBuffer) {
5337 *nextPtr = s;
5338 return XML_ERROR_NONE;
5339 }
5340 return XML_ERROR_UNCLOSED_TOKEN;
5341 case XML_TOK_PARTIAL_CHAR:
5342 if (!parser->m_parsingStatus.finalBuffer) {
5343 *nextPtr = s;
5344 return XML_ERROR_NONE;
5345 }
5346 return XML_ERROR_PARTIAL_CHAR;
5347 default:
5348 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5349 }
5350 parser->m_eventPtr = s = next;
5351 switch (parser->m_parsingStatus.parsing) {
5352 case XML_SUSPENDED:
5353 *nextPtr = next;
5354 return XML_ERROR_NONE;
5355 case XML_FINISHED:
5356 return XML_ERROR_ABORTED;
5357 default: ;
5358 }
5359 }
5360 }
5361
5362 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5363 processInternalEntity(XML_Parser parser, ENTITY *entity,
5364 XML_Bool betweenDecl)
5365 {
5366 const char *textStart, *textEnd;
5367 const char *next;
5368 enum XML_Error result;
5369 OPEN_INTERNAL_ENTITY *openEntity;
5370
5371 if (parser->m_freeInternalEntities) {
5372 openEntity = parser->m_freeInternalEntities;
5373 parser->m_freeInternalEntities = openEntity->next;
5374 }
5375 else {
5376 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5377 if (!openEntity)
5378 return XML_ERROR_NO_MEMORY;
5379 }
5380 entity->open = XML_TRUE;
5381 entity->processed = 0;
5382 openEntity->next = parser->m_openInternalEntities;
5383 parser->m_openInternalEntities = openEntity;
5384 openEntity->entity = entity;
5385 openEntity->startTagLevel = parser->m_tagLevel;
5386 openEntity->betweenDecl = betweenDecl;
5387 openEntity->internalEventPtr = NULL;
5388 openEntity->internalEventEndPtr = NULL;
5389 textStart = (char *)entity->textPtr;
5390 textEnd = (char *)(entity->textPtr + entity->textLen);
5391 /* Set a safe default value in case 'next' does not get set */
5392 next = textStart;
5393
5394 #ifdef XML_DTD
5395 if (entity->is_param) {
5396 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5397 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
5398 next, &next, XML_FALSE);
5399 }
5400 else
5401 #endif /* XML_DTD */
5402 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart,
5403 textEnd, &next, XML_FALSE);
5404
5405 if (result == XML_ERROR_NONE) {
5406 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5407 entity->processed = (int)(next - textStart);
5408 parser->m_processor = internalEntityProcessor;
5409 }
5410 else {
5411 entity->open = XML_FALSE;
5412 parser->m_openInternalEntities = openEntity->next;
5413 /* put openEntity back in list of free instances */
5414 openEntity->next = parser->m_freeInternalEntities;
5415 parser->m_freeInternalEntities = openEntity;
5416 }
5417 }
5418 return result;
5419 }
5420
5421 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5422 internalEntityProcessor(XML_Parser parser,
5423 const char *s,
5424 const char *end,
5425 const char **nextPtr)
5426 {
5427 ENTITY *entity;
5428 const char *textStart, *textEnd;
5429 const char *next;
5430 enum XML_Error result;
5431 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5432 if (!openEntity)
5433 return XML_ERROR_UNEXPECTED_STATE;
5434
5435 entity = openEntity->entity;
5436 textStart = ((char *)entity->textPtr) + entity->processed;
5437 textEnd = (char *)(entity->textPtr + entity->textLen);
5438 /* Set a safe default value in case 'next' does not get set */
5439 next = textStart;
5440
5441 #ifdef XML_DTD
5442 if (entity->is_param) {
5443 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5444 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
5445 next, &next, XML_FALSE);
5446 }
5447 else
5448 #endif /* XML_DTD */
5449 result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding,
5450 textStart, textEnd, &next, XML_FALSE);
5451
5452 if (result != XML_ERROR_NONE)
5453 return result;
5454 else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5455 entity->processed = (int)(next - (char *)entity->textPtr);
5456 return result;
5457 }
5458 else {
5459 entity->open = XML_FALSE;
5460 parser->m_openInternalEntities = openEntity->next;
5461 /* put openEntity back in list of free instances */
5462 openEntity->next = parser->m_freeInternalEntities;
5463 parser->m_freeInternalEntities = openEntity;
5464 }
5465
5466 #ifdef XML_DTD
5467 if (entity->is_param) {
5468 int tok;
5469 parser->m_processor = prologProcessor;
5470 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5471 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5472 (XML_Bool)!parser->m_parsingStatus.finalBuffer);
5473 }
5474 else
5475 #endif /* XML_DTD */
5476 {
5477 parser->m_processor = contentProcessor;
5478 /* see externalEntityContentProcessor vs contentProcessor */
5479 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end,
5480 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
5481 }
5482 }
5483
5484 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * UNUSED_P (s),const char * UNUSED_P (end),const char ** UNUSED_P (nextPtr))5485 errorProcessor(XML_Parser parser,
5486 const char *UNUSED_P(s),
5487 const char *UNUSED_P(end),
5488 const char **UNUSED_P(nextPtr))
5489 {
5490 return parser->m_errorCode;
5491 }
5492
5493 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool)5494 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5495 const char *ptr, const char *end,
5496 STRING_POOL *pool)
5497 {
5498 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5499 end, pool);
5500 if (result)
5501 return result;
5502 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5503 poolChop(pool);
5504 if (!poolAppendChar(pool, XML_T('\0')))
5505 return XML_ERROR_NO_MEMORY;
5506 return XML_ERROR_NONE;
5507 }
5508
5509 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool)5510 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5511 const char *ptr, const char *end,
5512 STRING_POOL *pool)
5513 {
5514 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
5515 for (;;) {
5516 const char *next;
5517 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5518 switch (tok) {
5519 case XML_TOK_NONE:
5520 return XML_ERROR_NONE;
5521 case XML_TOK_INVALID:
5522 if (enc == parser->m_encoding)
5523 parser->m_eventPtr = next;
5524 return XML_ERROR_INVALID_TOKEN;
5525 case XML_TOK_PARTIAL:
5526 if (enc == parser->m_encoding)
5527 parser->m_eventPtr = ptr;
5528 return XML_ERROR_INVALID_TOKEN;
5529 case XML_TOK_CHAR_REF:
5530 {
5531 XML_Char buf[XML_ENCODE_MAX];
5532 int i;
5533 int n = XmlCharRefNumber(enc, ptr);
5534 if (n < 0) {
5535 if (enc == parser->m_encoding)
5536 parser->m_eventPtr = ptr;
5537 return XML_ERROR_BAD_CHAR_REF;
5538 }
5539 if (!isCdata
5540 && n == 0x20 /* space */
5541 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5542 break;
5543 n = XmlEncode(n, (ICHAR *)buf);
5544 /* The XmlEncode() functions can never return 0 here. That
5545 * error return happens if the code point passed in is either
5546 * negative or greater than or equal to 0x110000. The
5547 * XmlCharRefNumber() functions will all return a number
5548 * strictly less than 0x110000 or a negative value if an error
5549 * occurred. The negative value is intercepted above, so
5550 * XmlEncode() is never passed a value it might return an
5551 * error for.
5552 */
5553 for (i = 0; i < n; i++) {
5554 if (!poolAppendChar(pool, buf[i]))
5555 return XML_ERROR_NO_MEMORY;
5556 }
5557 }
5558 break;
5559 case XML_TOK_DATA_CHARS:
5560 if (!poolAppend(pool, enc, ptr, next))
5561 return XML_ERROR_NO_MEMORY;
5562 break;
5563 case XML_TOK_TRAILING_CR:
5564 next = ptr + enc->minBytesPerChar;
5565 /* fall through */
5566 case XML_TOK_ATTRIBUTE_VALUE_S:
5567 case XML_TOK_DATA_NEWLINE:
5568 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5569 break;
5570 if (!poolAppendChar(pool, 0x20))
5571 return XML_ERROR_NO_MEMORY;
5572 break;
5573 case XML_TOK_ENTITY_REF:
5574 {
5575 const XML_Char *name;
5576 ENTITY *entity;
5577 char checkEntityDecl;
5578 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5579 ptr + enc->minBytesPerChar,
5580 next - enc->minBytesPerChar);
5581 if (ch) {
5582 if (!poolAppendChar(pool, ch))
5583 return XML_ERROR_NO_MEMORY;
5584 break;
5585 }
5586 name = poolStoreString(&parser->m_temp2Pool, enc,
5587 ptr + enc->minBytesPerChar,
5588 next - enc->minBytesPerChar);
5589 if (!name)
5590 return XML_ERROR_NO_MEMORY;
5591 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5592 poolDiscard(&parser->m_temp2Pool);
5593 /* First, determine if a check for an existing declaration is needed;
5594 if yes, check that the entity exists, and that it is internal.
5595 */
5596 if (pool == &dtd->pool) /* are we called from prolog? */
5597 checkEntityDecl =
5598 #ifdef XML_DTD
5599 parser->m_prologState.documentEntity &&
5600 #endif /* XML_DTD */
5601 (dtd->standalone
5602 ? !parser->m_openInternalEntities
5603 : !dtd->hasParamEntityRefs);
5604 else /* if (pool == &parser->m_tempPool): we are called from content */
5605 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5606 if (checkEntityDecl) {
5607 if (!entity)
5608 return XML_ERROR_UNDEFINED_ENTITY;
5609 else if (!entity->is_internal)
5610 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5611 }
5612 else if (!entity) {
5613 /* Cannot report skipped entity here - see comments on
5614 parser->m_skippedEntityHandler.
5615 if (parser->m_skippedEntityHandler)
5616 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5617 */
5618 /* Cannot call the default handler because this would be
5619 out of sync with the call to the startElementHandler.
5620 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5621 reportDefault(parser, enc, ptr, next);
5622 */
5623 break;
5624 }
5625 if (entity->open) {
5626 if (enc == parser->m_encoding) {
5627 /* It does not appear that this line can be executed.
5628 *
5629 * The "if (entity->open)" check catches recursive entity
5630 * definitions. In order to be called with an open
5631 * entity, it must have gone through this code before and
5632 * been through the recursive call to
5633 * appendAttributeValue() some lines below. That call
5634 * sets the local encoding ("enc") to the parser's
5635 * internal encoding (internal_utf8 or internal_utf16),
5636 * which can never be the same as the principle encoding.
5637 * It doesn't appear there is another code path that gets
5638 * here with entity->open being TRUE.
5639 *
5640 * Since it is not certain that this logic is watertight,
5641 * we keep the line and merely exclude it from coverage
5642 * tests.
5643 */
5644 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5645 }
5646 return XML_ERROR_RECURSIVE_ENTITY_REF;
5647 }
5648 if (entity->notation) {
5649 if (enc == parser->m_encoding)
5650 parser->m_eventPtr = ptr;
5651 return XML_ERROR_BINARY_ENTITY_REF;
5652 }
5653 if (!entity->textPtr) {
5654 if (enc == parser->m_encoding)
5655 parser->m_eventPtr = ptr;
5656 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5657 }
5658 else {
5659 enum XML_Error result;
5660 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5661 entity->open = XML_TRUE;
5662 result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata,
5663 (char *)entity->textPtr,
5664 (char *)textEnd, pool);
5665 entity->open = XML_FALSE;
5666 if (result)
5667 return result;
5668 }
5669 }
5670 break;
5671 default:
5672 /* The only token returned by XmlAttributeValueTok() that does
5673 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5674 * Getting that would require an entity name to contain an
5675 * incomplete XML character (e.g. \xE2\x82); however previous
5676 * tokenisers will have already recognised and rejected such
5677 * names before XmlAttributeValueTok() gets a look-in. This
5678 * default case should be retained as a safety net, but the code
5679 * excluded from coverage tests.
5680 *
5681 * LCOV_EXCL_START
5682 */
5683 if (enc == parser->m_encoding)
5684 parser->m_eventPtr = ptr;
5685 return XML_ERROR_UNEXPECTED_STATE;
5686 /* LCOV_EXCL_STOP */
5687 }
5688 ptr = next;
5689 }
5690 /* not reached */
5691 }
5692
5693 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd)5694 storeEntityValue(XML_Parser parser,
5695 const ENCODING *enc,
5696 const char *entityTextPtr,
5697 const char *entityTextEnd)
5698 {
5699 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
5700 STRING_POOL *pool = &(dtd->entityValuePool);
5701 enum XML_Error result = XML_ERROR_NONE;
5702 #ifdef XML_DTD
5703 int oldInEntityValue = parser->m_prologState.inEntityValue;
5704 parser->m_prologState.inEntityValue = 1;
5705 #endif /* XML_DTD */
5706 /* never return Null for the value argument in EntityDeclHandler,
5707 since this would indicate an external entity; therefore we
5708 have to make sure that entityValuePool.start is not null */
5709 if (!pool->blocks) {
5710 if (!poolGrow(pool))
5711 return XML_ERROR_NO_MEMORY;
5712 }
5713
5714 for (;;) {
5715 const char *next;
5716 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5717 switch (tok) {
5718 case XML_TOK_PARAM_ENTITY_REF:
5719 #ifdef XML_DTD
5720 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5721 const XML_Char *name;
5722 ENTITY *entity;
5723 name = poolStoreString(&parser->m_tempPool, enc,
5724 entityTextPtr + enc->minBytesPerChar,
5725 next - enc->minBytesPerChar);
5726 if (!name) {
5727 result = XML_ERROR_NO_MEMORY;
5728 goto endEntityValue;
5729 }
5730 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5731 poolDiscard(&parser->m_tempPool);
5732 if (!entity) {
5733 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5734 /* cannot report skipped entity here - see comments on
5735 parser->m_skippedEntityHandler
5736 if (parser->m_skippedEntityHandler)
5737 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5738 */
5739 dtd->keepProcessing = dtd->standalone;
5740 goto endEntityValue;
5741 }
5742 if (entity->open) {
5743 if (enc == parser->m_encoding)
5744 parser->m_eventPtr = entityTextPtr;
5745 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5746 goto endEntityValue;
5747 }
5748 if (entity->systemId) {
5749 if (parser->m_externalEntityRefHandler) {
5750 dtd->paramEntityRead = XML_FALSE;
5751 entity->open = XML_TRUE;
5752 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
5753 0,
5754 entity->base,
5755 entity->systemId,
5756 entity->publicId)) {
5757 entity->open = XML_FALSE;
5758 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5759 goto endEntityValue;
5760 }
5761 entity->open = XML_FALSE;
5762 if (!dtd->paramEntityRead)
5763 dtd->keepProcessing = dtd->standalone;
5764 }
5765 else
5766 dtd->keepProcessing = dtd->standalone;
5767 }
5768 else {
5769 entity->open = XML_TRUE;
5770 result = storeEntityValue(parser,
5771 parser->m_internalEncoding,
5772 (char *)entity->textPtr,
5773 (char *)(entity->textPtr
5774 + entity->textLen));
5775 entity->open = XML_FALSE;
5776 if (result)
5777 goto endEntityValue;
5778 }
5779 break;
5780 }
5781 #endif /* XML_DTD */
5782 /* In the internal subset, PE references are not legal
5783 within markup declarations, e.g entity values in this case. */
5784 parser->m_eventPtr = entityTextPtr;
5785 result = XML_ERROR_PARAM_ENTITY_REF;
5786 goto endEntityValue;
5787 case XML_TOK_NONE:
5788 result = XML_ERROR_NONE;
5789 goto endEntityValue;
5790 case XML_TOK_ENTITY_REF:
5791 case XML_TOK_DATA_CHARS:
5792 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5793 result = XML_ERROR_NO_MEMORY;
5794 goto endEntityValue;
5795 }
5796 break;
5797 case XML_TOK_TRAILING_CR:
5798 next = entityTextPtr + enc->minBytesPerChar;
5799 /* fall through */
5800 case XML_TOK_DATA_NEWLINE:
5801 if (pool->end == pool->ptr && !poolGrow(pool)) {
5802 result = XML_ERROR_NO_MEMORY;
5803 goto endEntityValue;
5804 }
5805 *(pool->ptr)++ = 0xA;
5806 break;
5807 case XML_TOK_CHAR_REF:
5808 {
5809 XML_Char buf[XML_ENCODE_MAX];
5810 int i;
5811 int n = XmlCharRefNumber(enc, entityTextPtr);
5812 if (n < 0) {
5813 if (enc == parser->m_encoding)
5814 parser->m_eventPtr = entityTextPtr;
5815 result = XML_ERROR_BAD_CHAR_REF;
5816 goto endEntityValue;
5817 }
5818 n = XmlEncode(n, (ICHAR *)buf);
5819 /* The XmlEncode() functions can never return 0 here. That
5820 * error return happens if the code point passed in is either
5821 * negative or greater than or equal to 0x110000. The
5822 * XmlCharRefNumber() functions will all return a number
5823 * strictly less than 0x110000 or a negative value if an error
5824 * occurred. The negative value is intercepted above, so
5825 * XmlEncode() is never passed a value it might return an
5826 * error for.
5827 */
5828 for (i = 0; i < n; i++) {
5829 if (pool->end == pool->ptr && !poolGrow(pool)) {
5830 result = XML_ERROR_NO_MEMORY;
5831 goto endEntityValue;
5832 }
5833 *(pool->ptr)++ = buf[i];
5834 }
5835 }
5836 break;
5837 case XML_TOK_PARTIAL:
5838 if (enc == parser->m_encoding)
5839 parser->m_eventPtr = entityTextPtr;
5840 result = XML_ERROR_INVALID_TOKEN;
5841 goto endEntityValue;
5842 case XML_TOK_INVALID:
5843 if (enc == parser->m_encoding)
5844 parser->m_eventPtr = next;
5845 result = XML_ERROR_INVALID_TOKEN;
5846 goto endEntityValue;
5847 default:
5848 /* This default case should be unnecessary -- all the tokens
5849 * that XmlEntityValueTok() can return have their own explicit
5850 * cases -- but should be retained for safety. We do however
5851 * exclude it from the coverage statistics.
5852 *
5853 * LCOV_EXCL_START
5854 */
5855 if (enc == parser->m_encoding)
5856 parser->m_eventPtr = entityTextPtr;
5857 result = XML_ERROR_UNEXPECTED_STATE;
5858 goto endEntityValue;
5859 /* LCOV_EXCL_STOP */
5860 }
5861 entityTextPtr = next;
5862 }
5863 endEntityValue:
5864 #ifdef XML_DTD
5865 parser->m_prologState.inEntityValue = oldInEntityValue;
5866 #endif /* XML_DTD */
5867 return result;
5868 }
5869
5870 static void FASTCALL
normalizeLines(XML_Char * s)5871 normalizeLines(XML_Char *s)
5872 {
5873 XML_Char *p;
5874 for (;; s++) {
5875 if (*s == XML_T('\0'))
5876 return;
5877 if (*s == 0xD)
5878 break;
5879 }
5880 p = s;
5881 do {
5882 if (*s == 0xD) {
5883 *p++ = 0xA;
5884 if (*++s == 0xA)
5885 s++;
5886 }
5887 else
5888 *p++ = *s++;
5889 } while (*s);
5890 *p = XML_T('\0');
5891 }
5892
5893 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)5894 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5895 const char *start, const char *end)
5896 {
5897 const XML_Char *target;
5898 XML_Char *data;
5899 const char *tem;
5900 if (!parser->m_processingInstructionHandler) {
5901 if (parser->m_defaultHandler)
5902 reportDefault(parser, enc, start, end);
5903 return 1;
5904 }
5905 start += enc->minBytesPerChar * 2;
5906 tem = start + XmlNameLength(enc, start);
5907 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
5908 if (!target)
5909 return 0;
5910 poolFinish(&parser->m_tempPool);
5911 data = poolStoreString(&parser->m_tempPool, enc,
5912 XmlSkipS(enc, tem),
5913 end - enc->minBytesPerChar*2);
5914 if (!data)
5915 return 0;
5916 normalizeLines(data);
5917 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5918 poolClear(&parser->m_tempPool);
5919 return 1;
5920 }
5921
5922 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)5923 reportComment(XML_Parser parser, const ENCODING *enc,
5924 const char *start, const char *end)
5925 {
5926 XML_Char *data;
5927 if (!parser->m_commentHandler) {
5928 if (parser->m_defaultHandler)
5929 reportDefault(parser, enc, start, end);
5930 return 1;
5931 }
5932 data = poolStoreString(&parser->m_tempPool,
5933 enc,
5934 start + enc->minBytesPerChar * 4,
5935 end - enc->minBytesPerChar * 3);
5936 if (!data)
5937 return 0;
5938 normalizeLines(data);
5939 parser->m_commentHandler(parser->m_handlerArg, data);
5940 poolClear(&parser->m_tempPool);
5941 return 1;
5942 }
5943
5944 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)5945 reportDefault(XML_Parser parser, const ENCODING *enc,
5946 const char *s, const char *end)
5947 {
5948 if (MUST_CONVERT(enc, s)) {
5949 enum XML_Convert_Result convert_res;
5950 const char **eventPP;
5951 const char **eventEndPP;
5952 if (enc == parser->m_encoding) {
5953 eventPP = &parser->m_eventPtr;
5954 eventEndPP = &parser->m_eventEndPtr;
5955 }
5956 else {
5957 /* To get here, two things must be true; the parser must be
5958 * using a character encoding that is not the same as the
5959 * encoding passed in, and the encoding passed in must need
5960 * conversion to the internal format (UTF-8 unless XML_UNICODE
5961 * is defined). The only occasions on which the encoding passed
5962 * in is not the same as the parser's encoding are when it is
5963 * the internal encoding (e.g. a previously defined parameter
5964 * entity, already converted to internal format). This by
5965 * definition doesn't need conversion, so the whole branch never
5966 * gets executed.
5967 *
5968 * For safety's sake we don't delete these lines and merely
5969 * exclude them from coverage statistics.
5970 *
5971 * LCOV_EXCL_START
5972 */
5973 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5974 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5975 /* LCOV_EXCL_STOP */
5976 }
5977 do {
5978 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
5979 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
5980 *eventEndPP = s;
5981 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
5982 *eventPP = s;
5983 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
5984 }
5985 else
5986 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
5987 }
5988
5989
5990 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)5991 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5992 XML_Bool isId, const XML_Char *value, XML_Parser parser)
5993 {
5994 DEFAULT_ATTRIBUTE *att;
5995 if (value || isId) {
5996 /* The handling of default attributes gets messed up if we have
5997 a default which duplicates a non-default. */
5998 int i;
5999 for (i = 0; i < type->nDefaultAtts; i++)
6000 if (attId == type->defaultAtts[i].id)
6001 return 1;
6002 if (isId && !type->idAtt && !attId->xmlns)
6003 type->idAtt = attId;
6004 }
6005 if (type->nDefaultAtts == type->allocDefaultAtts) {
6006 if (type->allocDefaultAtts == 0) {
6007 type->allocDefaultAtts = 8;
6008 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(parser, type->allocDefaultAtts
6009 * sizeof(DEFAULT_ATTRIBUTE));
6010 if (!type->defaultAtts) {
6011 type->allocDefaultAtts = 0;
6012 return 0;
6013 }
6014 }
6015 else {
6016 DEFAULT_ATTRIBUTE *temp;
6017 int count = type->allocDefaultAtts * 2;
6018 temp = (DEFAULT_ATTRIBUTE *)
6019 REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
6020 if (temp == NULL)
6021 return 0;
6022 type->allocDefaultAtts = count;
6023 type->defaultAtts = temp;
6024 }
6025 }
6026 att = type->defaultAtts + type->nDefaultAtts;
6027 att->id = attId;
6028 att->value = value;
6029 att->isCdata = isCdata;
6030 if (!isCdata)
6031 attId->maybeTokenized = XML_TRUE;
6032 type->nDefaultAtts += 1;
6033 return 1;
6034 }
6035
6036 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6037 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
6038 {
6039 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6040 const XML_Char *name;
6041 for (name = elementType->name; *name; name++) {
6042 if (*name == XML_T(ASCII_COLON)) {
6043 PREFIX *prefix;
6044 const XML_Char *s;
6045 for (s = elementType->name; s != name; s++) {
6046 if (!poolAppendChar(&dtd->pool, *s))
6047 return 0;
6048 }
6049 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6050 return 0;
6051 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6052 sizeof(PREFIX));
6053 if (!prefix)
6054 return 0;
6055 if (prefix->name == poolStart(&dtd->pool))
6056 poolFinish(&dtd->pool);
6057 else
6058 poolDiscard(&dtd->pool);
6059 elementType->prefix = prefix;
6060
6061 }
6062 }
6063 return 1;
6064 }
6065
6066 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6067 getAttributeId(XML_Parser parser, const ENCODING *enc,
6068 const char *start, const char *end)
6069 {
6070 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6071 ATTRIBUTE_ID *id;
6072 const XML_Char *name;
6073 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6074 return NULL;
6075 name = poolStoreString(&dtd->pool, enc, start, end);
6076 if (!name)
6077 return NULL;
6078 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6079 ++name;
6080 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
6081 if (!id)
6082 return NULL;
6083 if (id->name != name)
6084 poolDiscard(&dtd->pool);
6085 else {
6086 poolFinish(&dtd->pool);
6087 if (!parser->m_ns)
6088 ;
6089 else if (name[0] == XML_T(ASCII_x)
6090 && name[1] == XML_T(ASCII_m)
6091 && name[2] == XML_T(ASCII_l)
6092 && name[3] == XML_T(ASCII_n)
6093 && name[4] == XML_T(ASCII_s)
6094 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6095 if (name[5] == XML_T('\0'))
6096 id->prefix = &dtd->defaultPrefix;
6097 else
6098 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
6099 id->xmlns = XML_TRUE;
6100 }
6101 else {
6102 int i;
6103 for (i = 0; name[i]; i++) {
6104 /* attributes without prefix are *not* in the default namespace */
6105 if (name[i] == XML_T(ASCII_COLON)) {
6106 int j;
6107 for (j = 0; j < i; j++) {
6108 if (!poolAppendChar(&dtd->pool, name[j]))
6109 return NULL;
6110 }
6111 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6112 return NULL;
6113 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6114 sizeof(PREFIX));
6115 if (!id->prefix)
6116 return NULL;
6117 if (id->prefix->name == poolStart(&dtd->pool))
6118 poolFinish(&dtd->pool);
6119 else
6120 poolDiscard(&dtd->pool);
6121 break;
6122 }
6123 }
6124 }
6125 }
6126 return id;
6127 }
6128
6129 #define CONTEXT_SEP XML_T(ASCII_FF)
6130
6131 static const XML_Char *
getContext(XML_Parser parser)6132 getContext(XML_Parser parser)
6133 {
6134 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6135 HASH_TABLE_ITER iter;
6136 XML_Bool needSep = XML_FALSE;
6137
6138 if (dtd->defaultPrefix.binding) {
6139 int i;
6140 int len;
6141 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6142 return NULL;
6143 len = dtd->defaultPrefix.binding->uriLen;
6144 if (parser->m_namespaceSeparator)
6145 len--;
6146 for (i = 0; i < len; i++) {
6147 if (!poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) {
6148 /* Because of memory caching, I don't believe this line can be
6149 * executed.
6150 *
6151 * This is part of a loop copying the default prefix binding
6152 * URI into the parser's temporary string pool. Previously,
6153 * that URI was copied into the same string pool, with a
6154 * terminating NUL character, as part of setContext(). When
6155 * the pool was cleared, that leaves a block definitely big
6156 * enough to hold the URI on the free block list of the pool.
6157 * The URI copy in getContext() therefore cannot run out of
6158 * memory.
6159 *
6160 * If the pool is used between the setContext() and
6161 * getContext() calls, the worst it can do is leave a bigger
6162 * block on the front of the free list. Given that this is
6163 * all somewhat inobvious and program logic can be changed, we
6164 * don't delete the line but we do exclude it from the test
6165 * coverage statistics.
6166 */
6167 return NULL; /* LCOV_EXCL_LINE */
6168 }
6169 }
6170 needSep = XML_TRUE;
6171 }
6172
6173 hashTableIterInit(&iter, &(dtd->prefixes));
6174 for (;;) {
6175 int i;
6176 int len;
6177 const XML_Char *s;
6178 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6179 if (!prefix)
6180 break;
6181 if (!prefix->binding) {
6182 /* This test appears to be (justifiable) paranoia. There does
6183 * not seem to be a way of injecting a prefix without a binding
6184 * that doesn't get errored long before this function is called.
6185 * The test should remain for safety's sake, so we instead
6186 * exclude the following line from the coverage statistics.
6187 */
6188 continue; /* LCOV_EXCL_LINE */
6189 }
6190 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6191 return NULL;
6192 for (s = prefix->name; *s; s++)
6193 if (!poolAppendChar(&parser->m_tempPool, *s))
6194 return NULL;
6195 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6196 return NULL;
6197 len = prefix->binding->uriLen;
6198 if (parser->m_namespaceSeparator)
6199 len--;
6200 for (i = 0; i < len; i++)
6201 if (!poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6202 return NULL;
6203 needSep = XML_TRUE;
6204 }
6205
6206
6207 hashTableIterInit(&iter, &(dtd->generalEntities));
6208 for (;;) {
6209 const XML_Char *s;
6210 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6211 if (!e)
6212 break;
6213 if (!e->open)
6214 continue;
6215 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6216 return NULL;
6217 for (s = e->name; *s; s++)
6218 if (!poolAppendChar(&parser->m_tempPool, *s))
6219 return 0;
6220 needSep = XML_TRUE;
6221 }
6222
6223 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6224 return NULL;
6225 return parser->m_tempPool.start;
6226 }
6227
6228 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6229 setContext(XML_Parser parser, const XML_Char *context)
6230 {
6231 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6232 const XML_Char *s = context;
6233
6234 while (*context != XML_T('\0')) {
6235 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6236 ENTITY *e;
6237 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6238 return XML_FALSE;
6239 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0);
6240 if (e)
6241 e->open = XML_TRUE;
6242 if (*s != XML_T('\0'))
6243 s++;
6244 context = s;
6245 poolDiscard(&parser->m_tempPool);
6246 }
6247 else if (*s == XML_T(ASCII_EQUALS)) {
6248 PREFIX *prefix;
6249 if (poolLength(&parser->m_tempPool) == 0)
6250 prefix = &dtd->defaultPrefix;
6251 else {
6252 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6253 return XML_FALSE;
6254 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool),
6255 sizeof(PREFIX));
6256 if (!prefix)
6257 return XML_FALSE;
6258 if (prefix->name == poolStart(&parser->m_tempPool)) {
6259 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6260 if (!prefix->name)
6261 return XML_FALSE;
6262 }
6263 poolDiscard(&parser->m_tempPool);
6264 }
6265 for (context = s + 1;
6266 *context != CONTEXT_SEP && *context != XML_T('\0');
6267 context++)
6268 if (!poolAppendChar(&parser->m_tempPool, *context))
6269 return XML_FALSE;
6270 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6271 return XML_FALSE;
6272 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6273 &parser->m_inheritedBindings) != XML_ERROR_NONE)
6274 return XML_FALSE;
6275 poolDiscard(&parser->m_tempPool);
6276 if (*context != XML_T('\0'))
6277 ++context;
6278 s = context;
6279 }
6280 else {
6281 if (!poolAppendChar(&parser->m_tempPool, *s))
6282 return XML_FALSE;
6283 s++;
6284 }
6285 }
6286 return XML_TRUE;
6287 }
6288
6289 static void FASTCALL
normalizePublicId(XML_Char * publicId)6290 normalizePublicId(XML_Char *publicId)
6291 {
6292 XML_Char *p = publicId;
6293 XML_Char *s;
6294 for (s = publicId; *s; s++) {
6295 switch (*s) {
6296 case 0x20:
6297 case 0xD:
6298 case 0xA:
6299 if (p != publicId && p[-1] != 0x20)
6300 *p++ = 0x20;
6301 break;
6302 default:
6303 *p++ = *s;
6304 }
6305 }
6306 if (p != publicId && p[-1] == 0x20)
6307 --p;
6308 *p = XML_T('\0');
6309 }
6310
6311 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6312 dtdCreate(const XML_Memory_Handling_Suite *ms)
6313 {
6314 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6315 if (p == NULL)
6316 return p;
6317 poolInit(&(p->pool), ms);
6318 poolInit(&(p->entityValuePool), ms);
6319 hashTableInit(&(p->generalEntities), ms);
6320 hashTableInit(&(p->elementTypes), ms);
6321 hashTableInit(&(p->attributeIds), ms);
6322 hashTableInit(&(p->prefixes), ms);
6323 #ifdef XML_DTD
6324 p->paramEntityRead = XML_FALSE;
6325 hashTableInit(&(p->paramEntities), ms);
6326 #endif /* XML_DTD */
6327 p->defaultPrefix.name = NULL;
6328 p->defaultPrefix.binding = NULL;
6329
6330 p->in_eldecl = XML_FALSE;
6331 p->scaffIndex = NULL;
6332 p->scaffold = NULL;
6333 p->scaffLevel = 0;
6334 p->scaffSize = 0;
6335 p->scaffCount = 0;
6336 p->contentStringLen = 0;
6337
6338 p->keepProcessing = XML_TRUE;
6339 p->hasParamEntityRefs = XML_FALSE;
6340 p->standalone = XML_FALSE;
6341 return p;
6342 }
6343
6344 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6345 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
6346 {
6347 HASH_TABLE_ITER iter;
6348 hashTableIterInit(&iter, &(p->elementTypes));
6349 for (;;) {
6350 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6351 if (!e)
6352 break;
6353 if (e->allocDefaultAtts != 0)
6354 ms->free_fcn(e->defaultAtts);
6355 }
6356 hashTableClear(&(p->generalEntities));
6357 #ifdef XML_DTD
6358 p->paramEntityRead = XML_FALSE;
6359 hashTableClear(&(p->paramEntities));
6360 #endif /* XML_DTD */
6361 hashTableClear(&(p->elementTypes));
6362 hashTableClear(&(p->attributeIds));
6363 hashTableClear(&(p->prefixes));
6364 poolClear(&(p->pool));
6365 poolClear(&(p->entityValuePool));
6366 p->defaultPrefix.name = NULL;
6367 p->defaultPrefix.binding = NULL;
6368
6369 p->in_eldecl = XML_FALSE;
6370
6371 ms->free_fcn(p->scaffIndex);
6372 p->scaffIndex = NULL;
6373 ms->free_fcn(p->scaffold);
6374 p->scaffold = NULL;
6375
6376 p->scaffLevel = 0;
6377 p->scaffSize = 0;
6378 p->scaffCount = 0;
6379 p->contentStringLen = 0;
6380
6381 p->keepProcessing = XML_TRUE;
6382 p->hasParamEntityRefs = XML_FALSE;
6383 p->standalone = XML_FALSE;
6384 }
6385
6386 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6387 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6388 {
6389 HASH_TABLE_ITER iter;
6390 hashTableIterInit(&iter, &(p->elementTypes));
6391 for (;;) {
6392 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6393 if (!e)
6394 break;
6395 if (e->allocDefaultAtts != 0)
6396 ms->free_fcn(e->defaultAtts);
6397 }
6398 hashTableDestroy(&(p->generalEntities));
6399 #ifdef XML_DTD
6400 hashTableDestroy(&(p->paramEntities));
6401 #endif /* XML_DTD */
6402 hashTableDestroy(&(p->elementTypes));
6403 hashTableDestroy(&(p->attributeIds));
6404 hashTableDestroy(&(p->prefixes));
6405 poolDestroy(&(p->pool));
6406 poolDestroy(&(p->entityValuePool));
6407 if (isDocEntity) {
6408 ms->free_fcn(p->scaffIndex);
6409 ms->free_fcn(p->scaffold);
6410 }
6411 ms->free_fcn(p);
6412 }
6413
6414 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6415 The new DTD has already been initialized.
6416 */
6417 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6418 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
6419 {
6420 HASH_TABLE_ITER iter;
6421
6422 /* Copy the prefix table. */
6423
6424 hashTableIterInit(&iter, &(oldDtd->prefixes));
6425 for (;;) {
6426 const XML_Char *name;
6427 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6428 if (!oldP)
6429 break;
6430 name = poolCopyString(&(newDtd->pool), oldP->name);
6431 if (!name)
6432 return 0;
6433 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6434 return 0;
6435 }
6436
6437 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6438
6439 /* Copy the attribute id table. */
6440
6441 for (;;) {
6442 ATTRIBUTE_ID *newA;
6443 const XML_Char *name;
6444 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6445
6446 if (!oldA)
6447 break;
6448 /* Remember to allocate the scratch byte before the name. */
6449 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6450 return 0;
6451 name = poolCopyString(&(newDtd->pool), oldA->name);
6452 if (!name)
6453 return 0;
6454 ++name;
6455 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6456 sizeof(ATTRIBUTE_ID));
6457 if (!newA)
6458 return 0;
6459 newA->maybeTokenized = oldA->maybeTokenized;
6460 if (oldA->prefix) {
6461 newA->xmlns = oldA->xmlns;
6462 if (oldA->prefix == &oldDtd->defaultPrefix)
6463 newA->prefix = &newDtd->defaultPrefix;
6464 else
6465 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6466 oldA->prefix->name, 0);
6467 }
6468 }
6469
6470 /* Copy the element type table. */
6471
6472 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6473
6474 for (;;) {
6475 int i;
6476 ELEMENT_TYPE *newE;
6477 const XML_Char *name;
6478 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6479 if (!oldE)
6480 break;
6481 name = poolCopyString(&(newDtd->pool), oldE->name);
6482 if (!name)
6483 return 0;
6484 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6485 sizeof(ELEMENT_TYPE));
6486 if (!newE)
6487 return 0;
6488 if (oldE->nDefaultAtts) {
6489 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6490 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6491 if (!newE->defaultAtts) {
6492 return 0;
6493 }
6494 }
6495 if (oldE->idAtt)
6496 newE->idAtt = (ATTRIBUTE_ID *)
6497 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
6498 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6499 if (oldE->prefix)
6500 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6501 oldE->prefix->name, 0);
6502 for (i = 0; i < newE->nDefaultAtts; i++) {
6503 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
6504 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6505 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6506 if (oldE->defaultAtts[i].value) {
6507 newE->defaultAtts[i].value
6508 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6509 if (!newE->defaultAtts[i].value)
6510 return 0;
6511 }
6512 else
6513 newE->defaultAtts[i].value = NULL;
6514 }
6515 }
6516
6517 /* Copy the entity tables. */
6518 if (!copyEntityTable(oldParser,
6519 &(newDtd->generalEntities),
6520 &(newDtd->pool),
6521 &(oldDtd->generalEntities)))
6522 return 0;
6523
6524 #ifdef XML_DTD
6525 if (!copyEntityTable(oldParser,
6526 &(newDtd->paramEntities),
6527 &(newDtd->pool),
6528 &(oldDtd->paramEntities)))
6529 return 0;
6530 newDtd->paramEntityRead = oldDtd->paramEntityRead;
6531 #endif /* XML_DTD */
6532
6533 newDtd->keepProcessing = oldDtd->keepProcessing;
6534 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6535 newDtd->standalone = oldDtd->standalone;
6536
6537 /* Don't want deep copying for scaffolding */
6538 newDtd->in_eldecl = oldDtd->in_eldecl;
6539 newDtd->scaffold = oldDtd->scaffold;
6540 newDtd->contentStringLen = oldDtd->contentStringLen;
6541 newDtd->scaffSize = oldDtd->scaffSize;
6542 newDtd->scaffLevel = oldDtd->scaffLevel;
6543 newDtd->scaffIndex = oldDtd->scaffIndex;
6544
6545 return 1;
6546 } /* End dtdCopy */
6547
6548 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)6549 copyEntityTable(XML_Parser oldParser,
6550 HASH_TABLE *newTable,
6551 STRING_POOL *newPool,
6552 const HASH_TABLE *oldTable)
6553 {
6554 HASH_TABLE_ITER iter;
6555 const XML_Char *cachedOldBase = NULL;
6556 const XML_Char *cachedNewBase = NULL;
6557
6558 hashTableIterInit(&iter, oldTable);
6559
6560 for (;;) {
6561 ENTITY *newE;
6562 const XML_Char *name;
6563 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6564 if (!oldE)
6565 break;
6566 name = poolCopyString(newPool, oldE->name);
6567 if (!name)
6568 return 0;
6569 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6570 if (!newE)
6571 return 0;
6572 if (oldE->systemId) {
6573 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6574 if (!tem)
6575 return 0;
6576 newE->systemId = tem;
6577 if (oldE->base) {
6578 if (oldE->base == cachedOldBase)
6579 newE->base = cachedNewBase;
6580 else {
6581 cachedOldBase = oldE->base;
6582 tem = poolCopyString(newPool, cachedOldBase);
6583 if (!tem)
6584 return 0;
6585 cachedNewBase = newE->base = tem;
6586 }
6587 }
6588 if (oldE->publicId) {
6589 tem = poolCopyString(newPool, oldE->publicId);
6590 if (!tem)
6591 return 0;
6592 newE->publicId = tem;
6593 }
6594 }
6595 else {
6596 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6597 oldE->textLen);
6598 if (!tem)
6599 return 0;
6600 newE->textPtr = tem;
6601 newE->textLen = oldE->textLen;
6602 }
6603 if (oldE->notation) {
6604 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6605 if (!tem)
6606 return 0;
6607 newE->notation = tem;
6608 }
6609 newE->is_param = oldE->is_param;
6610 newE->is_internal = oldE->is_internal;
6611 }
6612 return 1;
6613 }
6614
6615 #define INIT_POWER 6
6616
6617 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)6618 keyeq(KEY s1, KEY s2)
6619 {
6620 for (; *s1 == *s2; s1++, s2++)
6621 if (*s1 == 0)
6622 return XML_TRUE;
6623 return XML_FALSE;
6624 }
6625
6626 static size_t
keylen(KEY s)6627 keylen(KEY s)
6628 {
6629 size_t len = 0;
6630 for (; *s; s++, len++);
6631 return len;
6632 }
6633
6634 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)6635 copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6636 {
6637 key->k[0] = 0;
6638 key->k[1] = get_hash_secret_salt(parser);
6639 }
6640
6641 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)6642 hash(XML_Parser parser, KEY s)
6643 {
6644 struct siphash state;
6645 struct sipkey key;
6646 (void)sip_tobin;
6647 (void)sip24_valid;
6648 copy_salt_to_sipkey(parser, &key);
6649 sip24_init(&state, &key);
6650 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6651 return (unsigned long)sip24_final(&state);
6652 }
6653
6654 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)6655 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
6656 {
6657 size_t i;
6658 if (table->size == 0) {
6659 size_t tsize;
6660 if (!createSize)
6661 return NULL;
6662 table->power = INIT_POWER;
6663 /* table->size is a power of 2 */
6664 table->size = (size_t)1 << INIT_POWER;
6665 tsize = table->size * sizeof(NAMED *);
6666 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
6667 if (!table->v) {
6668 table->size = 0;
6669 return NULL;
6670 }
6671 memset(table->v, 0, tsize);
6672 i = hash(parser, name) & ((unsigned long)table->size - 1);
6673 }
6674 else {
6675 unsigned long h = hash(parser, name);
6676 unsigned long mask = (unsigned long)table->size - 1;
6677 unsigned char step = 0;
6678 i = h & mask;
6679 while (table->v[i]) {
6680 if (keyeq(name, table->v[i]->name))
6681 return table->v[i];
6682 if (!step)
6683 step = PROBE_STEP(h, mask, table->power);
6684 i < step ? (i += table->size - step) : (i -= step);
6685 }
6686 if (!createSize)
6687 return NULL;
6688
6689 /* check for overflow (table is half full) */
6690 if (table->used >> (table->power - 1)) {
6691 unsigned char newPower = table->power + 1;
6692 size_t newSize = (size_t)1 << newPower;
6693 unsigned long newMask = (unsigned long)newSize - 1;
6694 size_t tsize = newSize * sizeof(NAMED *);
6695 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6696 if (!newV)
6697 return NULL;
6698 memset(newV, 0, tsize);
6699 for (i = 0; i < table->size; i++)
6700 if (table->v[i]) {
6701 unsigned long newHash = hash(parser, table->v[i]->name);
6702 size_t j = newHash & newMask;
6703 step = 0;
6704 while (newV[j]) {
6705 if (!step)
6706 step = PROBE_STEP(newHash, newMask, newPower);
6707 j < step ? (j += newSize - step) : (j -= step);
6708 }
6709 newV[j] = table->v[i];
6710 }
6711 table->mem->free_fcn(table->v);
6712 table->v = newV;
6713 table->power = newPower;
6714 table->size = newSize;
6715 i = h & newMask;
6716 step = 0;
6717 while (table->v[i]) {
6718 if (!step)
6719 step = PROBE_STEP(h, newMask, newPower);
6720 i < step ? (i += newSize - step) : (i -= step);
6721 }
6722 }
6723 }
6724 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6725 if (!table->v[i])
6726 return NULL;
6727 memset(table->v[i], 0, createSize);
6728 table->v[i]->name = name;
6729 (table->used)++;
6730 return table->v[i];
6731 }
6732
6733 static void FASTCALL
hashTableClear(HASH_TABLE * table)6734 hashTableClear(HASH_TABLE *table)
6735 {
6736 size_t i;
6737 for (i = 0; i < table->size; i++) {
6738 table->mem->free_fcn(table->v[i]);
6739 table->v[i] = NULL;
6740 }
6741 table->used = 0;
6742 }
6743
6744 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)6745 hashTableDestroy(HASH_TABLE *table)
6746 {
6747 size_t i;
6748 for (i = 0; i < table->size; i++)
6749 table->mem->free_fcn(table->v[i]);
6750 table->mem->free_fcn(table->v);
6751 }
6752
6753 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)6754 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
6755 {
6756 p->power = 0;
6757 p->size = 0;
6758 p->used = 0;
6759 p->v = NULL;
6760 p->mem = ms;
6761 }
6762
6763 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)6764 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
6765 {
6766 iter->p = table->v;
6767 iter->end = iter->p + table->size;
6768 }
6769
6770 static NAMED * FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)6771 hashTableIterNext(HASH_TABLE_ITER *iter)
6772 {
6773 while (iter->p != iter->end) {
6774 NAMED *tem = *(iter->p)++;
6775 if (tem)
6776 return tem;
6777 }
6778 return NULL;
6779 }
6780
6781 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)6782 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
6783 {
6784 pool->blocks = NULL;
6785 pool->freeBlocks = NULL;
6786 pool->start = NULL;
6787 pool->ptr = NULL;
6788 pool->end = NULL;
6789 pool->mem = ms;
6790 }
6791
6792 static void FASTCALL
poolClear(STRING_POOL * pool)6793 poolClear(STRING_POOL *pool)
6794 {
6795 if (!pool->freeBlocks)
6796 pool->freeBlocks = pool->blocks;
6797 else {
6798 BLOCK *p = pool->blocks;
6799 while (p) {
6800 BLOCK *tem = p->next;
6801 p->next = pool->freeBlocks;
6802 pool->freeBlocks = p;
6803 p = tem;
6804 }
6805 }
6806 pool->blocks = NULL;
6807 pool->start = NULL;
6808 pool->ptr = NULL;
6809 pool->end = NULL;
6810 }
6811
6812 static void FASTCALL
poolDestroy(STRING_POOL * pool)6813 poolDestroy(STRING_POOL *pool)
6814 {
6815 BLOCK *p = pool->blocks;
6816 while (p) {
6817 BLOCK *tem = p->next;
6818 pool->mem->free_fcn(p);
6819 p = tem;
6820 }
6821 p = pool->freeBlocks;
6822 while (p) {
6823 BLOCK *tem = p->next;
6824 pool->mem->free_fcn(p);
6825 p = tem;
6826 }
6827 }
6828
6829 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)6830 poolAppend(STRING_POOL *pool, const ENCODING *enc,
6831 const char *ptr, const char *end)
6832 {
6833 if (!pool->ptr && !poolGrow(pool))
6834 return NULL;
6835 for (;;) {
6836 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6837 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
6838 break;
6839 if (!poolGrow(pool))
6840 return NULL;
6841 }
6842 return pool->start;
6843 }
6844
6845 static const XML_Char * FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)6846 poolCopyString(STRING_POOL *pool, const XML_Char *s)
6847 {
6848 do {
6849 if (!poolAppendChar(pool, *s))
6850 return NULL;
6851 } while (*s++);
6852 s = pool->start;
6853 poolFinish(pool);
6854 return s;
6855 }
6856
6857 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)6858 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
6859 {
6860 if (!pool->ptr && !poolGrow(pool)) {
6861 /* The following line is unreachable given the current usage of
6862 * poolCopyStringN(). Currently it is called from exactly one
6863 * place to copy the text of a simple general entity. By that
6864 * point, the name of the entity is already stored in the pool, so
6865 * pool->ptr cannot be NULL.
6866 *
6867 * If poolCopyStringN() is used elsewhere as it well might be,
6868 * this line may well become executable again. Regardless, this
6869 * sort of check shouldn't be removed lightly, so we just exclude
6870 * it from the coverage statistics.
6871 */
6872 return NULL; /* LCOV_EXCL_LINE */
6873 }
6874 for (; n > 0; --n, s++) {
6875 if (!poolAppendChar(pool, *s))
6876 return NULL;
6877 }
6878 s = pool->start;
6879 poolFinish(pool);
6880 return s;
6881 }
6882
6883 static const XML_Char * FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)6884 poolAppendString(STRING_POOL *pool, const XML_Char *s)
6885 {
6886 while (*s) {
6887 if (!poolAppendChar(pool, *s))
6888 return NULL;
6889 s++;
6890 }
6891 return pool->start;
6892 }
6893
6894 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)6895 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6896 const char *ptr, const char *end)
6897 {
6898 if (!poolAppend(pool, enc, ptr, end))
6899 return NULL;
6900 if (pool->ptr == pool->end && !poolGrow(pool))
6901 return NULL;
6902 *(pool->ptr)++ = 0;
6903 return pool->start;
6904 }
6905
6906 static size_t
poolBytesToAllocateFor(int blockSize)6907 poolBytesToAllocateFor(int blockSize)
6908 {
6909 /* Unprotected math would be:
6910 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6911 **
6912 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6913 ** For a + b * c we check b * c in isolation first, so that addition of a
6914 ** on top has no chance of making us accept a small non-negative number
6915 */
6916 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6917
6918 if (blockSize <= 0)
6919 return 0;
6920
6921 if (blockSize > (int)(INT_MAX / stretch))
6922 return 0;
6923
6924 {
6925 const int stretchedBlockSize = blockSize * (int)stretch;
6926 const int bytesToAllocate = (int)(
6927 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6928 if (bytesToAllocate < 0)
6929 return 0;
6930
6931 return (size_t)bytesToAllocate;
6932 }
6933 }
6934
6935 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)6936 poolGrow(STRING_POOL *pool)
6937 {
6938 if (pool->freeBlocks) {
6939 if (pool->start == 0) {
6940 pool->blocks = pool->freeBlocks;
6941 pool->freeBlocks = pool->freeBlocks->next;
6942 pool->blocks->next = NULL;
6943 pool->start = pool->blocks->s;
6944 pool->end = pool->start + pool->blocks->size;
6945 pool->ptr = pool->start;
6946 return XML_TRUE;
6947 }
6948 if (pool->end - pool->start < pool->freeBlocks->size) {
6949 BLOCK *tem = pool->freeBlocks->next;
6950 pool->freeBlocks->next = pool->blocks;
6951 pool->blocks = pool->freeBlocks;
6952 pool->freeBlocks = tem;
6953 memcpy(pool->blocks->s, pool->start,
6954 (pool->end - pool->start) * sizeof(XML_Char));
6955 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6956 pool->start = pool->blocks->s;
6957 pool->end = pool->start + pool->blocks->size;
6958 return XML_TRUE;
6959 }
6960 }
6961 if (pool->blocks && pool->start == pool->blocks->s) {
6962 BLOCK *temp;
6963 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
6964 size_t bytesToAllocate;
6965
6966 /* NOTE: Needs to be calculated prior to calling `realloc`
6967 to avoid dangling pointers: */
6968 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6969
6970 if (blockSize < 0) {
6971 /* This condition traps a situation where either more than
6972 * INT_MAX/2 bytes have already been allocated. This isn't
6973 * readily testable, since it is unlikely that an average
6974 * machine will have that much memory, so we exclude it from the
6975 * coverage statistics.
6976 */
6977 return XML_FALSE; /* LCOV_EXCL_LINE */
6978 }
6979
6980 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6981 if (bytesToAllocate == 0)
6982 return XML_FALSE;
6983
6984 temp = (BLOCK *)
6985 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
6986 if (temp == NULL)
6987 return XML_FALSE;
6988 pool->blocks = temp;
6989 pool->blocks->size = blockSize;
6990 pool->ptr = pool->blocks->s + offsetInsideBlock;
6991 pool->start = pool->blocks->s;
6992 pool->end = pool->start + blockSize;
6993 }
6994 else {
6995 BLOCK *tem;
6996 int blockSize = (int)(pool->end - pool->start);
6997 size_t bytesToAllocate;
6998
6999 if (blockSize < 0) {
7000 /* This condition traps a situation where either more than
7001 * INT_MAX bytes have already been allocated (which is prevented
7002 * by various pieces of program logic, not least this one, never
7003 * mind the unlikelihood of actually having that much memory) or
7004 * the pool control fields have been corrupted (which could
7005 * conceivably happen in an extremely buggy user handler
7006 * function). Either way it isn't readily testable, so we
7007 * exclude it from the coverage statistics.
7008 */
7009 return XML_FALSE; /* LCOV_EXCL_LINE */
7010 }
7011
7012 if (blockSize < INIT_BLOCK_SIZE)
7013 blockSize = INIT_BLOCK_SIZE;
7014 else {
7015 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7016 if ((int)((unsigned)blockSize * 2U) < 0) {
7017 return XML_FALSE;
7018 }
7019 blockSize *= 2;
7020 }
7021
7022 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7023 if (bytesToAllocate == 0)
7024 return XML_FALSE;
7025
7026 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
7027 if (!tem)
7028 return XML_FALSE;
7029 tem->size = blockSize;
7030 tem->next = pool->blocks;
7031 pool->blocks = tem;
7032 if (pool->ptr != pool->start)
7033 memcpy(tem->s, pool->start,
7034 (pool->ptr - pool->start) * sizeof(XML_Char));
7035 pool->ptr = tem->s + (pool->ptr - pool->start);
7036 pool->start = tem->s;
7037 pool->end = tem->s + blockSize;
7038 }
7039 return XML_TRUE;
7040 }
7041
7042 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7043 nextScaffoldPart(XML_Parser parser)
7044 {
7045 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7046 CONTENT_SCAFFOLD * me;
7047 int next;
7048
7049 if (!dtd->scaffIndex) {
7050 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7051 if (!dtd->scaffIndex)
7052 return -1;
7053 dtd->scaffIndex[0] = 0;
7054 }
7055
7056 if (dtd->scaffCount >= dtd->scaffSize) {
7057 CONTENT_SCAFFOLD *temp;
7058 if (dtd->scaffold) {
7059 temp = (CONTENT_SCAFFOLD *)
7060 REALLOC(parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7061 if (temp == NULL)
7062 return -1;
7063 dtd->scaffSize *= 2;
7064 }
7065 else {
7066 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7067 * sizeof(CONTENT_SCAFFOLD));
7068 if (temp == NULL)
7069 return -1;
7070 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7071 }
7072 dtd->scaffold = temp;
7073 }
7074 next = dtd->scaffCount++;
7075 me = &dtd->scaffold[next];
7076 if (dtd->scaffLevel) {
7077 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
7078 if (parent->lastchild) {
7079 dtd->scaffold[parent->lastchild].nextsib = next;
7080 }
7081 if (!parent->childcnt)
7082 parent->firstchild = next;
7083 parent->lastchild = next;
7084 parent->childcnt++;
7085 }
7086 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7087 return next;
7088 }
7089
7090 static void
build_node(XML_Parser parser,int src_node,XML_Content * dest,XML_Content ** contpos,XML_Char ** strpos)7091 build_node(XML_Parser parser,
7092 int src_node,
7093 XML_Content *dest,
7094 XML_Content **contpos,
7095 XML_Char **strpos)
7096 {
7097 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7098 dest->type = dtd->scaffold[src_node].type;
7099 dest->quant = dtd->scaffold[src_node].quant;
7100 if (dest->type == XML_CTYPE_NAME) {
7101 const XML_Char *src;
7102 dest->name = *strpos;
7103 src = dtd->scaffold[src_node].name;
7104 for (;;) {
7105 *(*strpos)++ = *src;
7106 if (!*src)
7107 break;
7108 src++;
7109 }
7110 dest->numchildren = 0;
7111 dest->children = NULL;
7112 }
7113 else {
7114 unsigned int i;
7115 int cn;
7116 dest->numchildren = dtd->scaffold[src_node].childcnt;
7117 dest->children = *contpos;
7118 *contpos += dest->numchildren;
7119 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7120 i < dest->numchildren;
7121 i++, cn = dtd->scaffold[cn].nextsib) {
7122 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7123 }
7124 dest->name = NULL;
7125 }
7126 }
7127
7128 static XML_Content *
build_model(XML_Parser parser)7129 build_model (XML_Parser parser)
7130 {
7131 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7132 XML_Content *ret;
7133 XML_Content *cpos;
7134 XML_Char * str;
7135 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7136 + (dtd->contentStringLen * sizeof(XML_Char)));
7137
7138 ret = (XML_Content *)MALLOC(parser, allocsize);
7139 if (!ret)
7140 return NULL;
7141
7142 str = (XML_Char *) (&ret[dtd->scaffCount]);
7143 cpos = &ret[1];
7144
7145 build_node(parser, 0, ret, &cpos, &str);
7146 return ret;
7147 }
7148
7149 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7150 getElementType(XML_Parser parser,
7151 const ENCODING *enc,
7152 const char *ptr,
7153 const char *end)
7154 {
7155 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7156 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7157 ELEMENT_TYPE *ret;
7158
7159 if (!name)
7160 return NULL;
7161 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
7162 if (!ret)
7163 return NULL;
7164 if (ret->name != name)
7165 poolDiscard(&dtd->pool);
7166 else {
7167 poolFinish(&dtd->pool);
7168 if (!setElementTypePrefix(parser, ret))
7169 return NULL;
7170 }
7171 return ret;
7172 }
7173
7174 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7175 copyString(const XML_Char *s,
7176 const XML_Memory_Handling_Suite *memsuite)
7177 {
7178 int charsRequired = 0;
7179 XML_Char *result;
7180
7181 /* First determine how long the string is */
7182 while (s[charsRequired] != 0) {
7183 charsRequired++;
7184 }
7185 /* Include the terminator */
7186 charsRequired++;
7187
7188 /* Now allocate space for the copy */
7189 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7190 if (result == NULL)
7191 return NULL;
7192 /* Copy the original into place */
7193 memcpy(result, s, charsRequired * sizeof(XML_Char));
7194 return result;
7195 }
7196