1 /* c5625880f4bf417c1463deee4eb92d86ff413f802048621c57e25fe483eb59e4 (2.6.4+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
44 Licensed under the MIT license:
45
46 Permission is hereby granted, free of charge, to any person obtaining
47 a copy of this software and associated documentation files (the
48 "Software"), to deal in the Software without restriction, including
49 without limitation the rights to use, copy, modify, merge, publish,
50 distribute, sublicense, and/or sell copies of the Software, and to permit
51 persons to whom the Software is furnished to do so, subject to the
52 following conditions:
53
54 The above copyright notice and this permission notice shall be included
55 in all copies or substantial portions of the Software.
56
57 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
58 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
59 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
60 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
61 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
62 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
63 USE OR OTHER DEALINGS IN THE SOFTWARE.
64 */
65
66 #define XML_BUILDING_EXPAT 1
67
68 #include "expat_config.h"
69
70 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
71 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
72 #endif
73
74 #if defined(XML_DTD) && XML_GE == 0
75 # error Either undefine XML_DTD or define XML_GE to 1.
76 #endif
77
78 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
79 || (XML_CONTEXT_BYTES + 0 < 0)
80 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
81 #endif
82
83 #if defined(HAVE_SYSCALL_GETRANDOM)
84 # if ! defined(_GNU_SOURCE)
85 # define _GNU_SOURCE 1 /* syscall prototype */
86 # endif
87 #endif
88
89 #ifdef _WIN32
90 /* force stdlib to define rand_s() */
91 # if ! defined(_CRT_RAND_S)
92 # define _CRT_RAND_S
93 # endif
94 #endif
95
96 #include <stdbool.h>
97 #include <stddef.h>
98 #include <string.h> /* memset(), memcpy() */
99 #include <assert.h>
100 #include <limits.h> /* UINT_MAX */
101 #include <stdio.h> /* fprintf */
102 #include <stdlib.h> /* getenv, rand_s */
103 #include <stdint.h> /* uintptr_t */
104 #include <math.h> /* isnan */
105
106 #ifdef _WIN32
107 # define getpid GetCurrentProcessId
108 #else
109 # include <sys/time.h> /* gettimeofday() */
110 # include <sys/types.h> /* getpid() */
111 # include <unistd.h> /* getpid() */
112 # include <fcntl.h> /* O_RDONLY */
113 # include <errno.h>
114 #endif
115
116 #ifdef _WIN32
117 # include "winconfig.h"
118 #endif
119
120 #include "ascii.h"
121 #include "expat.h"
122 #include "siphash.h"
123
124 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
125 # if defined(HAVE_GETRANDOM)
126 # include <sys/random.h> /* getrandom */
127 # else
128 # include <unistd.h> /* syscall */
129 # include <sys/syscall.h> /* SYS_getrandom */
130 # endif
131 # if ! defined(GRND_NONBLOCK)
132 # define GRND_NONBLOCK 0x0001
133 # endif /* defined(GRND_NONBLOCK) */
134 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
135
136 #if defined(HAVE_LIBBSD) \
137 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
138 # include <bsd/stdlib.h>
139 #endif
140
141 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
142 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
143 #endif
144
145 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
146 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
147 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
148 && ! defined(XML_POOR_ENTROPY)
149 # error You do not have support for any sources of high quality entropy \
150 enabled. For end user security, that is probably not what you want. \
151 \
152 Your options include: \
153 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
154 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
155 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
156 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
157 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
158 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
159 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
160 * Windows >=Vista (rand_s): _WIN32. \
161 \
162 If insist on not using any of these, bypass this error by defining \
163 XML_POOR_ENTROPY; you have been warned. \
164 \
165 If you have reasons to patch this detection code away or need changes \
166 to the build system, please open a bug. Thank you!
167 #endif
168
169 #ifdef XML_UNICODE
170 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
171 # define XmlConvert XmlUtf16Convert
172 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
173 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
174 # define XmlEncode XmlUtf16Encode
175 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
176 typedef unsigned short ICHAR;
177 #else
178 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
179 # define XmlConvert XmlUtf8Convert
180 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
181 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
182 # define XmlEncode XmlUtf8Encode
183 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
184 typedef char ICHAR;
185 #endif
186
187 #ifndef XML_NS
188
189 # define XmlInitEncodingNS XmlInitEncoding
190 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
191 # undef XmlGetInternalEncodingNS
192 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
193 # define XmlParseXmlDeclNS XmlParseXmlDecl
194
195 #endif
196
197 #ifdef XML_UNICODE
198
199 # ifdef XML_UNICODE_WCHAR_T
200 # define XML_T(x) (const wchar_t) x
201 # define XML_L(x) L##x
202 # else
203 # define XML_T(x) (const unsigned short)x
204 # define XML_L(x) x
205 # endif
206
207 #else
208
209 # define XML_T(x) x
210 # define XML_L(x) x
211
212 #endif
213
214 /* Round up n to be a multiple of sz, where sz is a power of 2. */
215 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
216
217 /* Do safe (NULL-aware) pointer arithmetic */
218 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
219
220 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
221
222 #include "internal.h"
223 #include "xmltok.h"
224 #include "xmlrole.h"
225
226 typedef const XML_Char *KEY;
227
228 typedef struct {
229 KEY name;
230 } NAMED;
231
232 typedef struct {
233 NAMED **v;
234 unsigned char power;
235 size_t size;
236 size_t used;
237 const XML_Memory_Handling_Suite *mem;
238 } HASH_TABLE;
239
240 static size_t keylen(KEY s);
241
242 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
243
244 /* For probing (after a collision) we need a step size relative prime
245 to the hash table size, which is a power of 2. We use double-hashing,
246 since we can calculate a second hash value cheaply by taking those bits
247 of the first hash value that were discarded (masked out) when the table
248 index was calculated: index = hash & mask, where mask = table->size - 1.
249 We limit the maximum step size to table->size / 4 (mask >> 2) and make
250 it odd, since odd numbers are always relative prime to a power of 2.
251 */
252 #define SECOND_HASH(hash, mask, power) \
253 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
254 #define PROBE_STEP(hash, mask, power) \
255 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
256
257 typedef struct {
258 NAMED **p;
259 NAMED **end;
260 } HASH_TABLE_ITER;
261
262 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
263 #define INIT_DATA_BUF_SIZE 1024
264 #define INIT_ATTS_SIZE 16
265 #define INIT_ATTS_VERSION 0xFFFFFFFF
266 #define INIT_BLOCK_SIZE 1024
267 #define INIT_BUFFER_SIZE 1024
268
269 #define EXPAND_SPARE 24
270
271 typedef struct binding {
272 struct prefix *prefix;
273 struct binding *nextTagBinding;
274 struct binding *prevPrefixBinding;
275 const struct attribute_id *attId;
276 XML_Char *uri;
277 int uriLen;
278 int uriAlloc;
279 } BINDING;
280
281 typedef struct prefix {
282 const XML_Char *name;
283 BINDING *binding;
284 } PREFIX;
285
286 typedef struct {
287 const XML_Char *str;
288 const XML_Char *localPart;
289 const XML_Char *prefix;
290 int strLen;
291 int uriLen;
292 int prefixLen;
293 } TAG_NAME;
294
295 /* TAG represents an open element.
296 The name of the element is stored in both the document and API
297 encodings. The memory buffer 'buf' is a separately-allocated
298 memory area which stores the name. During the XML_Parse()/
299 XML_ParseBuffer() when the element is open, the memory for the 'raw'
300 version of the name (in the document encoding) is shared with the
301 document buffer. If the element is open across calls to
302 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
303 contain the 'raw' name as well.
304
305 A parser reuses these structures, maintaining a list of allocated
306 TAG objects in a free list.
307 */
308 typedef struct tag {
309 struct tag *parent; /* parent of this element */
310 const char *rawName; /* tagName in the original encoding */
311 int rawNameLength;
312 TAG_NAME name; /* tagName in the API encoding */
313 char *buf; /* buffer for name components */
314 char *bufEnd; /* end of the buffer */
315 BINDING *bindings;
316 } TAG;
317
318 typedef struct {
319 const XML_Char *name;
320 const XML_Char *textPtr;
321 int textLen; /* length in XML_Chars */
322 int processed; /* # of processed bytes - when suspended */
323 const XML_Char *systemId;
324 const XML_Char *base;
325 const XML_Char *publicId;
326 const XML_Char *notation;
327 XML_Bool open;
328 XML_Bool is_param;
329 XML_Bool is_internal; /* true if declared in internal subset outside PE */
330 } ENTITY;
331
332 typedef struct {
333 enum XML_Content_Type type;
334 enum XML_Content_Quant quant;
335 const XML_Char *name;
336 int firstchild;
337 int lastchild;
338 int childcnt;
339 int nextsib;
340 } CONTENT_SCAFFOLD;
341
342 #define INIT_SCAFFOLD_ELEMENTS 32
343
344 typedef struct block {
345 struct block *next;
346 int size;
347 XML_Char s[1];
348 } BLOCK;
349
350 typedef struct {
351 BLOCK *blocks;
352 BLOCK *freeBlocks;
353 const XML_Char *end;
354 XML_Char *ptr;
355 XML_Char *start;
356 const XML_Memory_Handling_Suite *mem;
357 } STRING_POOL;
358
359 /* The XML_Char before the name is used to determine whether
360 an attribute has been specified. */
361 typedef struct attribute_id {
362 XML_Char *name;
363 PREFIX *prefix;
364 XML_Bool maybeTokenized;
365 XML_Bool xmlns;
366 } ATTRIBUTE_ID;
367
368 typedef struct {
369 const ATTRIBUTE_ID *id;
370 XML_Bool isCdata;
371 const XML_Char *value;
372 } DEFAULT_ATTRIBUTE;
373
374 typedef struct {
375 unsigned long version;
376 unsigned long hash;
377 const XML_Char *uriName;
378 } NS_ATT;
379
380 typedef struct {
381 const XML_Char *name;
382 PREFIX *prefix;
383 const ATTRIBUTE_ID *idAtt;
384 int nDefaultAtts;
385 int allocDefaultAtts;
386 DEFAULT_ATTRIBUTE *defaultAtts;
387 } ELEMENT_TYPE;
388
389 typedef struct {
390 HASH_TABLE generalEntities;
391 HASH_TABLE elementTypes;
392 HASH_TABLE attributeIds;
393 HASH_TABLE prefixes;
394 STRING_POOL pool;
395 STRING_POOL entityValuePool;
396 /* false once a parameter entity reference has been skipped */
397 XML_Bool keepProcessing;
398 /* true once an internal or external PE reference has been encountered;
399 this includes the reference to an external subset */
400 XML_Bool hasParamEntityRefs;
401 XML_Bool standalone;
402 #ifdef XML_DTD
403 /* indicates if external PE has been read */
404 XML_Bool paramEntityRead;
405 HASH_TABLE paramEntities;
406 #endif /* XML_DTD */
407 PREFIX defaultPrefix;
408 /* === scaffolding for building content model === */
409 XML_Bool in_eldecl;
410 CONTENT_SCAFFOLD *scaffold;
411 unsigned contentStringLen;
412 unsigned scaffSize;
413 unsigned scaffCount;
414 int scaffLevel;
415 int *scaffIndex;
416 } DTD;
417
418 typedef struct open_internal_entity {
419 const char *internalEventPtr;
420 const char *internalEventEndPtr;
421 struct open_internal_entity *next;
422 ENTITY *entity;
423 int startTagLevel;
424 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
425 } OPEN_INTERNAL_ENTITY;
426
427 enum XML_Account {
428 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
429 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
430 expansion */
431 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
432 };
433
434 #if XML_GE == 1
435 typedef unsigned long long XmlBigCount;
436 typedef struct accounting {
437 XmlBigCount countBytesDirect;
438 XmlBigCount countBytesIndirect;
439 unsigned long debugLevel;
440 float maximumAmplificationFactor; // >=1.0
441 unsigned long long activationThresholdBytes;
442 } ACCOUNTING;
443
444 typedef struct entity_stats {
445 unsigned int countEverOpened;
446 unsigned int currentDepth;
447 unsigned int maximumDepthSeen;
448 unsigned long debugLevel;
449 } ENTITY_STATS;
450 #endif /* XML_GE == 1 */
451
452 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
453 const char *end, const char **endPtr);
454
455 static Processor prologProcessor;
456 static Processor prologInitProcessor;
457 static Processor contentProcessor;
458 static Processor cdataSectionProcessor;
459 #ifdef XML_DTD
460 static Processor ignoreSectionProcessor;
461 static Processor externalParEntProcessor;
462 static Processor externalParEntInitProcessor;
463 static Processor entityValueProcessor;
464 static Processor entityValueInitProcessor;
465 #endif /* XML_DTD */
466 static Processor epilogProcessor;
467 static Processor errorProcessor;
468 static Processor externalEntityInitProcessor;
469 static Processor externalEntityInitProcessor2;
470 static Processor externalEntityInitProcessor3;
471 static Processor externalEntityContentProcessor;
472 static Processor internalEntityProcessor;
473
474 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
475 const XML_Char *encodingName);
476 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
477 const char *s, const char *next);
478 static enum XML_Error initializeEncoding(XML_Parser parser);
479 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
480 const char *s, const char *end, int tok,
481 const char *next, const char **nextPtr,
482 XML_Bool haveMore, XML_Bool allowClosingDoctype,
483 enum XML_Account account);
484 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
485 XML_Bool betweenDecl);
486 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
487 const ENCODING *enc, const char *start,
488 const char *end, const char **endPtr,
489 XML_Bool haveMore, enum XML_Account account);
490 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
491 const char **startPtr, const char *end,
492 const char **nextPtr, XML_Bool haveMore,
493 enum XML_Account account);
494 #ifdef XML_DTD
495 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
496 const char **startPtr, const char *end,
497 const char **nextPtr, XML_Bool haveMore);
498 #endif /* XML_DTD */
499
500 static void freeBindings(XML_Parser parser, BINDING *bindings);
501 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
502 const char *attStr, TAG_NAME *tagNamePtr,
503 BINDING **bindingsPtr,
504 enum XML_Account account);
505 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
506 const ATTRIBUTE_ID *attId, const XML_Char *uri,
507 BINDING **bindingsPtr);
508 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
509 XML_Bool isCdata, XML_Bool isId,
510 const XML_Char *value, XML_Parser parser);
511 static enum XML_Error storeAttributeValue(XML_Parser parser,
512 const ENCODING *enc, XML_Bool isCdata,
513 const char *ptr, const char *end,
514 STRING_POOL *pool,
515 enum XML_Account account);
516 static enum XML_Error appendAttributeValue(XML_Parser parser,
517 const ENCODING *enc,
518 XML_Bool isCdata, const char *ptr,
519 const char *end, STRING_POOL *pool,
520 enum XML_Account account);
521 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
522 const char *start, const char *end);
523 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
524 #if XML_GE == 1
525 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
526 const char *start, const char *end,
527 enum XML_Account account);
528 #else
529 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
530 #endif
531 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
532 const char *start, const char *end);
533 static int reportComment(XML_Parser parser, const ENCODING *enc,
534 const char *start, const char *end);
535 static void reportDefault(XML_Parser parser, const ENCODING *enc,
536 const char *start, const char *end);
537
538 static const XML_Char *getContext(XML_Parser parser);
539 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
540
541 static void FASTCALL normalizePublicId(XML_Char *s);
542
543 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
544 /* do not call if m_parentParser != NULL */
545 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
546 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
547 const XML_Memory_Handling_Suite *ms);
548 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
549 const XML_Memory_Handling_Suite *ms);
550 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
551 STRING_POOL *newPool, const HASH_TABLE *oldTable);
552 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
553 size_t createSize);
554 static void FASTCALL hashTableInit(HASH_TABLE *table,
555 const XML_Memory_Handling_Suite *ms);
556 static void FASTCALL hashTableClear(HASH_TABLE *table);
557 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
558 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
559 const HASH_TABLE *table);
560 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
561
562 static void FASTCALL poolInit(STRING_POOL *pool,
563 const XML_Memory_Handling_Suite *ms);
564 static void FASTCALL poolClear(STRING_POOL *pool);
565 static void FASTCALL poolDestroy(STRING_POOL *pool);
566 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
567 const char *ptr, const char *end);
568 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
569 const char *ptr, const char *end);
570 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
571 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
572 const XML_Char *s);
573 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
574 int n);
575 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
576 const XML_Char *s);
577
578 static int FASTCALL nextScaffoldPart(XML_Parser parser);
579 static XML_Content *build_model(XML_Parser parser);
580 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
581 const char *ptr, const char *end);
582
583 static XML_Char *copyString(const XML_Char *s,
584 const XML_Memory_Handling_Suite *memsuite);
585
586 static unsigned long generate_hash_secret_salt(XML_Parser parser);
587 static XML_Bool startParsing(XML_Parser parser);
588
589 static XML_Parser parserCreate(const XML_Char *encodingName,
590 const XML_Memory_Handling_Suite *memsuite,
591 const XML_Char *nameSep, DTD *dtd);
592
593 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
594
595 #if XML_GE == 1
596 static float accountingGetCurrentAmplification(XML_Parser rootParser);
597 static void accountingReportStats(XML_Parser originParser, const char *epilog);
598 static void accountingOnAbort(XML_Parser originParser);
599 static void accountingReportDiff(XML_Parser rootParser,
600 unsigned int levelsAwayFromRootParser,
601 const char *before, const char *after,
602 ptrdiff_t bytesMore, int source_line,
603 enum XML_Account account);
604 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
605 const char *before, const char *after,
606 int source_line,
607 enum XML_Account account);
608
609 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
610 const char *action, int sourceLine);
611 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
612 int sourceLine);
613 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
614 int sourceLine);
615
616 static XML_Parser getRootParserOf(XML_Parser parser,
617 unsigned int *outLevelDiff);
618 #endif /* XML_GE == 1 */
619
620 static unsigned long getDebugLevel(const char *variableName,
621 unsigned long defaultDebugLevel);
622
623 #define poolStart(pool) ((pool)->start)
624 #define poolLength(pool) ((pool)->ptr - (pool)->start)
625 #define poolChop(pool) ((void)--(pool->ptr))
626 #define poolLastChar(pool) (((pool)->ptr)[-1])
627 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
628 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
629 #define poolAppendChar(pool, c) \
630 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
631 ? 0 \
632 : ((*((pool)->ptr)++ = c), 1))
633
634 #if ! defined(XML_TESTING)
635 const
636 #endif
637 XML_Bool g_reparseDeferralEnabledDefault
638 = XML_TRUE; // write ONLY in runtests.c
639 #if defined(XML_TESTING)
640 unsigned int g_bytesScanned = 0; // used for testing only
641 #endif
642
643 struct XML_ParserStruct {
644 /* The first member must be m_userData so that the XML_GetUserData
645 macro works. */
646 void *m_userData;
647 void *m_handlerArg;
648
649 // How the four parse buffer pointers below relate in time and space:
650 //
651 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
652 // | | | |
653 // <--parsed-->| | |
654 // <---parsing--->| |
655 // <--unoccupied-->|
656 // <---------total-malloced/realloced-------->|
657
658 char *m_buffer; // malloc/realloc base pointer of parse buffer
659 const XML_Memory_Handling_Suite m_mem;
660 const char *m_bufferPtr; // first character to be parsed
661 char *m_bufferEnd; // past last character to be parsed
662 const char *m_bufferLim; // allocated end of m_buffer
663
664 XML_Index m_parseEndByteIndex;
665 const char *m_parseEndPtr;
666 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
667 XML_Bool m_reparseDeferralEnabled;
668 int m_lastBufferRequestSize;
669 XML_Char *m_dataBuf;
670 XML_Char *m_dataBufEnd;
671 XML_StartElementHandler m_startElementHandler;
672 XML_EndElementHandler m_endElementHandler;
673 XML_CharacterDataHandler m_characterDataHandler;
674 XML_ProcessingInstructionHandler m_processingInstructionHandler;
675 XML_CommentHandler m_commentHandler;
676 XML_StartCdataSectionHandler m_startCdataSectionHandler;
677 XML_EndCdataSectionHandler m_endCdataSectionHandler;
678 XML_DefaultHandler m_defaultHandler;
679 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
680 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
681 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
682 XML_NotationDeclHandler m_notationDeclHandler;
683 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
684 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
685 XML_NotStandaloneHandler m_notStandaloneHandler;
686 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
687 XML_Parser m_externalEntityRefHandlerArg;
688 XML_SkippedEntityHandler m_skippedEntityHandler;
689 XML_UnknownEncodingHandler m_unknownEncodingHandler;
690 XML_ElementDeclHandler m_elementDeclHandler;
691 XML_AttlistDeclHandler m_attlistDeclHandler;
692 XML_EntityDeclHandler m_entityDeclHandler;
693 XML_XmlDeclHandler m_xmlDeclHandler;
694 const ENCODING *m_encoding;
695 INIT_ENCODING m_initEncoding;
696 const ENCODING *m_internalEncoding;
697 const XML_Char *m_protocolEncodingName;
698 XML_Bool m_ns;
699 XML_Bool m_ns_triplets;
700 void *m_unknownEncodingMem;
701 void *m_unknownEncodingData;
702 void *m_unknownEncodingHandlerData;
703 void(XMLCALL *m_unknownEncodingRelease)(void *);
704 PROLOG_STATE m_prologState;
705 Processor *m_processor;
706 enum XML_Error m_errorCode;
707 const char *m_eventPtr;
708 const char *m_eventEndPtr;
709 const char *m_positionPtr;
710 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
711 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
712 XML_Bool m_defaultExpandInternalEntities;
713 int m_tagLevel;
714 ENTITY *m_declEntity;
715 const XML_Char *m_doctypeName;
716 const XML_Char *m_doctypeSysid;
717 const XML_Char *m_doctypePubid;
718 const XML_Char *m_declAttributeType;
719 const XML_Char *m_declNotationName;
720 const XML_Char *m_declNotationPublicId;
721 ELEMENT_TYPE *m_declElementType;
722 ATTRIBUTE_ID *m_declAttributeId;
723 XML_Bool m_declAttributeIsCdata;
724 XML_Bool m_declAttributeIsId;
725 DTD *m_dtd;
726 const XML_Char *m_curBase;
727 TAG *m_tagStack;
728 TAG *m_freeTagList;
729 BINDING *m_inheritedBindings;
730 BINDING *m_freeBindingList;
731 int m_attsSize;
732 int m_nSpecifiedAtts;
733 int m_idAttIndex;
734 ATTRIBUTE *m_atts;
735 NS_ATT *m_nsAtts;
736 unsigned long m_nsAttsVersion;
737 unsigned char m_nsAttsPower;
738 #ifdef XML_ATTR_INFO
739 XML_AttrInfo *m_attInfo;
740 #endif
741 POSITION m_position;
742 STRING_POOL m_tempPool;
743 STRING_POOL m_temp2Pool;
744 char *m_groupConnector;
745 unsigned int m_groupSize;
746 XML_Char m_namespaceSeparator;
747 XML_Parser m_parentParser;
748 XML_ParsingStatus m_parsingStatus;
749 #ifdef XML_DTD
750 XML_Bool m_isParamEntity;
751 XML_Bool m_useForeignDTD;
752 enum XML_ParamEntityParsing m_paramEntityParsing;
753 #endif
754 unsigned long m_hash_secret_salt;
755 #if XML_GE == 1
756 ACCOUNTING m_accounting;
757 ENTITY_STATS m_entity_stats;
758 #endif
759 };
760
761 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
762 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
763 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
764
765 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)766 XML_ParserCreate(const XML_Char *encodingName) {
767 return XML_ParserCreate_MM(encodingName, NULL, NULL);
768 }
769
770 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)771 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
772 XML_Char tmp[2] = {nsSep, 0};
773 return XML_ParserCreate_MM(encodingName, NULL, tmp);
774 }
775
776 // "xml=http://www.w3.org/XML/1998/namespace"
777 static const XML_Char implicitContext[]
778 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
779 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
780 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
781 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
782 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
783 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
784 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
785 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
786 '\0'};
787
788 /* To avoid warnings about unused functions: */
789 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
790
791 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
792
793 /* Obtain entropy on Linux 3.17+ */
794 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)795 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
796 int success = 0; /* full count bytes written? */
797 size_t bytesWrittenTotal = 0;
798 const unsigned int getrandomFlags = GRND_NONBLOCK;
799
800 do {
801 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
802 const size_t bytesToWrite = count - bytesWrittenTotal;
803
804 const int bytesWrittenMore =
805 # if defined(HAVE_GETRANDOM)
806 getrandom(currentTarget, bytesToWrite, getrandomFlags);
807 # else
808 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
809 # endif
810
811 if (bytesWrittenMore > 0) {
812 bytesWrittenTotal += bytesWrittenMore;
813 if (bytesWrittenTotal >= count)
814 success = 1;
815 }
816 } while (! success && (errno == EINTR));
817
818 return success;
819 }
820
821 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
822
823 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
824
825 /* Extract entropy from /dev/urandom */
826 static int
writeRandomBytes_dev_urandom(void * target,size_t count)827 writeRandomBytes_dev_urandom(void *target, size_t count) {
828 int success = 0; /* full count bytes written? */
829 size_t bytesWrittenTotal = 0;
830
831 const int fd = open("/dev/urandom", O_RDONLY);
832 if (fd < 0) {
833 return 0;
834 }
835
836 do {
837 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
838 const size_t bytesToWrite = count - bytesWrittenTotal;
839
840 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
841
842 if (bytesWrittenMore > 0) {
843 bytesWrittenTotal += bytesWrittenMore;
844 if (bytesWrittenTotal >= count)
845 success = 1;
846 }
847 } while (! success && (errno == EINTR));
848
849 close(fd);
850 return success;
851 }
852
853 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
854
855 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
856
857 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
858
859 static void
writeRandomBytes_arc4random(void * target,size_t count)860 writeRandomBytes_arc4random(void *target, size_t count) {
861 size_t bytesWrittenTotal = 0;
862
863 while (bytesWrittenTotal < count) {
864 const uint32_t random32 = arc4random();
865 size_t i = 0;
866
867 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
868 i++, bytesWrittenTotal++) {
869 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
870 ((uint8_t *)target)[bytesWrittenTotal] = random8;
871 }
872 }
873 }
874
875 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
876
877 #ifdef _WIN32
878
879 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
880 as it didn't declare it in its header prior to version 5.3.0 of its
881 runtime package (mingwrt, containing stdlib.h). The upstream fix
882 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
883 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
884 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
885 __declspec(dllimport) int rand_s(unsigned int *);
886 # endif
887
888 /* Obtain entropy on Windows using the rand_s() function which
889 * generates cryptographically secure random numbers. Internally it
890 * uses RtlGenRandom API which is present in Windows XP and later.
891 */
892 static int
writeRandomBytes_rand_s(void * target,size_t count)893 writeRandomBytes_rand_s(void *target, size_t count) {
894 size_t bytesWrittenTotal = 0;
895
896 while (bytesWrittenTotal < count) {
897 unsigned int random32 = 0;
898 size_t i = 0;
899
900 if (rand_s(&random32))
901 return 0; /* failure */
902
903 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
904 i++, bytesWrittenTotal++) {
905 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
906 ((uint8_t *)target)[bytesWrittenTotal] = random8;
907 }
908 }
909 return 1; /* success */
910 }
911
912 #endif /* _WIN32 */
913
914 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
915
916 static unsigned long
gather_time_entropy(void)917 gather_time_entropy(void) {
918 # ifdef _WIN32
919 FILETIME ft;
920 GetSystemTimeAsFileTime(&ft); /* never fails */
921 return ft.dwHighDateTime ^ ft.dwLowDateTime;
922 # else
923 struct timeval tv;
924 int gettimeofday_res;
925
926 gettimeofday_res = gettimeofday(&tv, NULL);
927
928 # if defined(NDEBUG)
929 (void)gettimeofday_res;
930 # else
931 assert(gettimeofday_res == 0);
932 # endif /* defined(NDEBUG) */
933
934 /* Microseconds time is <20 bits entropy */
935 return tv.tv_usec;
936 # endif
937 }
938
939 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
940
941 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)942 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
943 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
944 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
945 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
946 }
947 return entropy;
948 }
949
950 static unsigned long
generate_hash_secret_salt(XML_Parser parser)951 generate_hash_secret_salt(XML_Parser parser) {
952 unsigned long entropy;
953 (void)parser;
954
955 /* "Failproof" high quality providers: */
956 #if defined(HAVE_ARC4RANDOM_BUF)
957 arc4random_buf(&entropy, sizeof(entropy));
958 return ENTROPY_DEBUG("arc4random_buf", entropy);
959 #elif defined(HAVE_ARC4RANDOM)
960 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
961 return ENTROPY_DEBUG("arc4random", entropy);
962 #else
963 /* Try high quality providers first .. */
964 # ifdef _WIN32
965 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
966 return ENTROPY_DEBUG("rand_s", entropy);
967 }
968 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
969 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
970 return ENTROPY_DEBUG("getrandom", entropy);
971 }
972 # endif
973 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
974 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
975 return ENTROPY_DEBUG("/dev/urandom", entropy);
976 }
977 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
978 /* .. and self-made low quality for backup: */
979
980 /* Process ID is 0 bits entropy if attacker has local access */
981 entropy = gather_time_entropy() ^ getpid();
982
983 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
984 if (sizeof(unsigned long) == 4) {
985 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
986 } else {
987 return ENTROPY_DEBUG("fallback(8)",
988 entropy * (unsigned long)2305843009213693951ULL);
989 }
990 #endif
991 }
992
993 static unsigned long
get_hash_secret_salt(XML_Parser parser)994 get_hash_secret_salt(XML_Parser parser) {
995 if (parser->m_parentParser != NULL)
996 return get_hash_secret_salt(parser->m_parentParser);
997 return parser->m_hash_secret_salt;
998 }
999
1000 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1001 callProcessor(XML_Parser parser, const char *start, const char *end,
1002 const char **endPtr) {
1003 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1004
1005 if (parser->m_reparseDeferralEnabled
1006 && ! parser->m_parsingStatus.finalBuffer) {
1007 // Heuristic: don't try to parse a partial token again until the amount of
1008 // available data has increased significantly.
1009 const size_t had_before = parser->m_partialTokenBytesBefore;
1010 // ...but *do* try anyway if we're close to causing a reallocation.
1011 size_t available_buffer
1012 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1013 #if XML_CONTEXT_BYTES > 0
1014 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1015 #endif
1016 available_buffer
1017 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1018 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1019 const bool enough
1020 = (have_now >= 2 * had_before)
1021 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1022
1023 if (! enough) {
1024 *endPtr = start; // callers may expect this to be set
1025 return XML_ERROR_NONE;
1026 }
1027 }
1028 #if defined(XML_TESTING)
1029 g_bytesScanned += (unsigned)have_now;
1030 #endif
1031 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1032 if (ret == XML_ERROR_NONE) {
1033 // if we consumed nothing, remember what we had on this parse attempt.
1034 if (*endPtr == start) {
1035 parser->m_partialTokenBytesBefore = have_now;
1036 } else {
1037 parser->m_partialTokenBytesBefore = 0;
1038 }
1039 }
1040 return ret;
1041 }
1042
1043 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1044 startParsing(XML_Parser parser) {
1045 /* hash functions must be initialized before setContext() is called */
1046 if (parser->m_hash_secret_salt == 0)
1047 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1048 if (parser->m_ns) {
1049 /* implicit context only set for root parser, since child
1050 parsers (i.e. external entity parsers) will inherit it
1051 */
1052 return setContext(parser, implicitContext);
1053 }
1054 return XML_TRUE;
1055 }
1056
1057 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1058 XML_ParserCreate_MM(const XML_Char *encodingName,
1059 const XML_Memory_Handling_Suite *memsuite,
1060 const XML_Char *nameSep) {
1061 return parserCreate(encodingName, memsuite, nameSep, NULL);
1062 }
1063
1064 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1065 parserCreate(const XML_Char *encodingName,
1066 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1067 DTD *dtd) {
1068 XML_Parser parser;
1069
1070 if (memsuite) {
1071 XML_Memory_Handling_Suite *mtemp;
1072 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1073 if (parser != NULL) {
1074 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1075 mtemp->malloc_fcn = memsuite->malloc_fcn;
1076 mtemp->realloc_fcn = memsuite->realloc_fcn;
1077 mtemp->free_fcn = memsuite->free_fcn;
1078 }
1079 } else {
1080 XML_Memory_Handling_Suite *mtemp;
1081 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1082 if (parser != NULL) {
1083 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1084 mtemp->malloc_fcn = malloc;
1085 mtemp->realloc_fcn = realloc;
1086 mtemp->free_fcn = free;
1087 }
1088 }
1089
1090 if (! parser)
1091 return parser;
1092
1093 parser->m_buffer = NULL;
1094 parser->m_bufferLim = NULL;
1095
1096 parser->m_attsSize = INIT_ATTS_SIZE;
1097 parser->m_atts
1098 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1099 if (parser->m_atts == NULL) {
1100 FREE(parser, parser);
1101 return NULL;
1102 }
1103 #ifdef XML_ATTR_INFO
1104 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1105 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1106 if (parser->m_attInfo == NULL) {
1107 FREE(parser, parser->m_atts);
1108 FREE(parser, parser);
1109 return NULL;
1110 }
1111 #endif
1112 parser->m_dataBuf
1113 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1114 if (parser->m_dataBuf == NULL) {
1115 FREE(parser, parser->m_atts);
1116 #ifdef XML_ATTR_INFO
1117 FREE(parser, parser->m_attInfo);
1118 #endif
1119 FREE(parser, parser);
1120 return NULL;
1121 }
1122 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1123
1124 if (dtd)
1125 parser->m_dtd = dtd;
1126 else {
1127 parser->m_dtd = dtdCreate(&parser->m_mem);
1128 if (parser->m_dtd == NULL) {
1129 FREE(parser, parser->m_dataBuf);
1130 FREE(parser, parser->m_atts);
1131 #ifdef XML_ATTR_INFO
1132 FREE(parser, parser->m_attInfo);
1133 #endif
1134 FREE(parser, parser);
1135 return NULL;
1136 }
1137 }
1138
1139 parser->m_freeBindingList = NULL;
1140 parser->m_freeTagList = NULL;
1141 parser->m_freeInternalEntities = NULL;
1142
1143 parser->m_groupSize = 0;
1144 parser->m_groupConnector = NULL;
1145
1146 parser->m_unknownEncodingHandler = NULL;
1147 parser->m_unknownEncodingHandlerData = NULL;
1148
1149 parser->m_namespaceSeparator = ASCII_EXCL;
1150 parser->m_ns = XML_FALSE;
1151 parser->m_ns_triplets = XML_FALSE;
1152
1153 parser->m_nsAtts = NULL;
1154 parser->m_nsAttsVersion = 0;
1155 parser->m_nsAttsPower = 0;
1156
1157 parser->m_protocolEncodingName = NULL;
1158
1159 poolInit(&parser->m_tempPool, &(parser->m_mem));
1160 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1161 parserInit(parser, encodingName);
1162
1163 if (encodingName && ! parser->m_protocolEncodingName) {
1164 if (dtd) {
1165 // We need to stop the upcoming call to XML_ParserFree from happily
1166 // destroying parser->m_dtd because the DTD is shared with the parent
1167 // parser and the only guard that keeps XML_ParserFree from destroying
1168 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1169 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1170 parser->m_dtd = NULL;
1171 }
1172 XML_ParserFree(parser);
1173 return NULL;
1174 }
1175
1176 if (nameSep) {
1177 parser->m_ns = XML_TRUE;
1178 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1179 parser->m_namespaceSeparator = *nameSep;
1180 } else {
1181 parser->m_internalEncoding = XmlGetInternalEncoding();
1182 }
1183
1184 return parser;
1185 }
1186
1187 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1188 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1189 parser->m_processor = prologInitProcessor;
1190 XmlPrologStateInit(&parser->m_prologState);
1191 if (encodingName != NULL) {
1192 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1193 }
1194 parser->m_curBase = NULL;
1195 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1196 parser->m_userData = NULL;
1197 parser->m_handlerArg = NULL;
1198 parser->m_startElementHandler = NULL;
1199 parser->m_endElementHandler = NULL;
1200 parser->m_characterDataHandler = NULL;
1201 parser->m_processingInstructionHandler = NULL;
1202 parser->m_commentHandler = NULL;
1203 parser->m_startCdataSectionHandler = NULL;
1204 parser->m_endCdataSectionHandler = NULL;
1205 parser->m_defaultHandler = NULL;
1206 parser->m_startDoctypeDeclHandler = NULL;
1207 parser->m_endDoctypeDeclHandler = NULL;
1208 parser->m_unparsedEntityDeclHandler = NULL;
1209 parser->m_notationDeclHandler = NULL;
1210 parser->m_startNamespaceDeclHandler = NULL;
1211 parser->m_endNamespaceDeclHandler = NULL;
1212 parser->m_notStandaloneHandler = NULL;
1213 parser->m_externalEntityRefHandler = NULL;
1214 parser->m_externalEntityRefHandlerArg = parser;
1215 parser->m_skippedEntityHandler = NULL;
1216 parser->m_elementDeclHandler = NULL;
1217 parser->m_attlistDeclHandler = NULL;
1218 parser->m_entityDeclHandler = NULL;
1219 parser->m_xmlDeclHandler = NULL;
1220 parser->m_bufferPtr = parser->m_buffer;
1221 parser->m_bufferEnd = parser->m_buffer;
1222 parser->m_parseEndByteIndex = 0;
1223 parser->m_parseEndPtr = NULL;
1224 parser->m_partialTokenBytesBefore = 0;
1225 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1226 parser->m_lastBufferRequestSize = 0;
1227 parser->m_declElementType = NULL;
1228 parser->m_declAttributeId = NULL;
1229 parser->m_declEntity = NULL;
1230 parser->m_doctypeName = NULL;
1231 parser->m_doctypeSysid = NULL;
1232 parser->m_doctypePubid = NULL;
1233 parser->m_declAttributeType = NULL;
1234 parser->m_declNotationName = NULL;
1235 parser->m_declNotationPublicId = NULL;
1236 parser->m_declAttributeIsCdata = XML_FALSE;
1237 parser->m_declAttributeIsId = XML_FALSE;
1238 memset(&parser->m_position, 0, sizeof(POSITION));
1239 parser->m_errorCode = XML_ERROR_NONE;
1240 parser->m_eventPtr = NULL;
1241 parser->m_eventEndPtr = NULL;
1242 parser->m_positionPtr = NULL;
1243 parser->m_openInternalEntities = NULL;
1244 parser->m_defaultExpandInternalEntities = XML_TRUE;
1245 parser->m_tagLevel = 0;
1246 parser->m_tagStack = NULL;
1247 parser->m_inheritedBindings = NULL;
1248 parser->m_nSpecifiedAtts = 0;
1249 parser->m_unknownEncodingMem = NULL;
1250 parser->m_unknownEncodingRelease = NULL;
1251 parser->m_unknownEncodingData = NULL;
1252 parser->m_parentParser = NULL;
1253 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1254 #ifdef XML_DTD
1255 parser->m_isParamEntity = XML_FALSE;
1256 parser->m_useForeignDTD = XML_FALSE;
1257 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1258 #endif
1259 parser->m_hash_secret_salt = 0;
1260
1261 #if XML_GE == 1
1262 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1263 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1264 parser->m_accounting.maximumAmplificationFactor
1265 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1266 parser->m_accounting.activationThresholdBytes
1267 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1268
1269 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1270 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1271 #endif
1272 }
1273
1274 /* moves list of bindings to m_freeBindingList */
1275 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1276 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1277 while (bindings) {
1278 BINDING *b = bindings;
1279 bindings = bindings->nextTagBinding;
1280 b->nextTagBinding = parser->m_freeBindingList;
1281 parser->m_freeBindingList = b;
1282 }
1283 }
1284
1285 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1286 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1287 TAG *tStk;
1288 OPEN_INTERNAL_ENTITY *openEntityList;
1289
1290 if (parser == NULL)
1291 return XML_FALSE;
1292
1293 if (parser->m_parentParser)
1294 return XML_FALSE;
1295 /* move m_tagStack to m_freeTagList */
1296 tStk = parser->m_tagStack;
1297 while (tStk) {
1298 TAG *tag = tStk;
1299 tStk = tStk->parent;
1300 tag->parent = parser->m_freeTagList;
1301 moveToFreeBindingList(parser, tag->bindings);
1302 tag->bindings = NULL;
1303 parser->m_freeTagList = tag;
1304 }
1305 /* move m_openInternalEntities to m_freeInternalEntities */
1306 openEntityList = parser->m_openInternalEntities;
1307 while (openEntityList) {
1308 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1309 openEntityList = openEntity->next;
1310 openEntity->next = parser->m_freeInternalEntities;
1311 parser->m_freeInternalEntities = openEntity;
1312 }
1313 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1314 FREE(parser, parser->m_unknownEncodingMem);
1315 if (parser->m_unknownEncodingRelease)
1316 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1317 poolClear(&parser->m_tempPool);
1318 poolClear(&parser->m_temp2Pool);
1319 FREE(parser, (void *)parser->m_protocolEncodingName);
1320 parser->m_protocolEncodingName = NULL;
1321 parserInit(parser, encodingName);
1322 dtdReset(parser->m_dtd, &parser->m_mem);
1323 return XML_TRUE;
1324 }
1325
1326 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1327 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1328 if (parser == NULL)
1329 return XML_STATUS_ERROR;
1330 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1331 XXX There's no way for the caller to determine which of the
1332 XXX possible error cases caused the XML_STATUS_ERROR return.
1333 */
1334 if (parser->m_parsingStatus.parsing == XML_PARSING
1335 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1336 return XML_STATUS_ERROR;
1337
1338 /* Get rid of any previous encoding name */
1339 FREE(parser, (void *)parser->m_protocolEncodingName);
1340
1341 if (encodingName == NULL)
1342 /* No new encoding name */
1343 parser->m_protocolEncodingName = NULL;
1344 else {
1345 /* Copy the new encoding name into allocated memory */
1346 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1347 if (! parser->m_protocolEncodingName)
1348 return XML_STATUS_ERROR;
1349 }
1350 return XML_STATUS_OK;
1351 }
1352
1353 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1354 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1355 const XML_Char *encodingName) {
1356 XML_Parser parser = oldParser;
1357 DTD *newDtd = NULL;
1358 DTD *oldDtd;
1359 XML_StartElementHandler oldStartElementHandler;
1360 XML_EndElementHandler oldEndElementHandler;
1361 XML_CharacterDataHandler oldCharacterDataHandler;
1362 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1363 XML_CommentHandler oldCommentHandler;
1364 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1365 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1366 XML_DefaultHandler oldDefaultHandler;
1367 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1368 XML_NotationDeclHandler oldNotationDeclHandler;
1369 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1370 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1371 XML_NotStandaloneHandler oldNotStandaloneHandler;
1372 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1373 XML_SkippedEntityHandler oldSkippedEntityHandler;
1374 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1375 XML_ElementDeclHandler oldElementDeclHandler;
1376 XML_AttlistDeclHandler oldAttlistDeclHandler;
1377 XML_EntityDeclHandler oldEntityDeclHandler;
1378 XML_XmlDeclHandler oldXmlDeclHandler;
1379 ELEMENT_TYPE *oldDeclElementType;
1380
1381 void *oldUserData;
1382 void *oldHandlerArg;
1383 XML_Bool oldDefaultExpandInternalEntities;
1384 XML_Parser oldExternalEntityRefHandlerArg;
1385 #ifdef XML_DTD
1386 enum XML_ParamEntityParsing oldParamEntityParsing;
1387 int oldInEntityValue;
1388 #endif
1389 XML_Bool oldns_triplets;
1390 /* Note that the new parser shares the same hash secret as the old
1391 parser, so that dtdCopy and copyEntityTable can lookup values
1392 from hash tables associated with either parser without us having
1393 to worry which hash secrets each table has.
1394 */
1395 unsigned long oldhash_secret_salt;
1396 XML_Bool oldReparseDeferralEnabled;
1397
1398 /* Validate the oldParser parameter before we pull everything out of it */
1399 if (oldParser == NULL)
1400 return NULL;
1401
1402 /* Stash the original parser contents on the stack */
1403 oldDtd = parser->m_dtd;
1404 oldStartElementHandler = parser->m_startElementHandler;
1405 oldEndElementHandler = parser->m_endElementHandler;
1406 oldCharacterDataHandler = parser->m_characterDataHandler;
1407 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1408 oldCommentHandler = parser->m_commentHandler;
1409 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1410 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1411 oldDefaultHandler = parser->m_defaultHandler;
1412 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1413 oldNotationDeclHandler = parser->m_notationDeclHandler;
1414 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1415 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1416 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1417 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1418 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1419 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1420 oldElementDeclHandler = parser->m_elementDeclHandler;
1421 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1422 oldEntityDeclHandler = parser->m_entityDeclHandler;
1423 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1424 oldDeclElementType = parser->m_declElementType;
1425
1426 oldUserData = parser->m_userData;
1427 oldHandlerArg = parser->m_handlerArg;
1428 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1429 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1430 #ifdef XML_DTD
1431 oldParamEntityParsing = parser->m_paramEntityParsing;
1432 oldInEntityValue = parser->m_prologState.inEntityValue;
1433 #endif
1434 oldns_triplets = parser->m_ns_triplets;
1435 /* Note that the new parser shares the same hash secret as the old
1436 parser, so that dtdCopy and copyEntityTable can lookup values
1437 from hash tables associated with either parser without us having
1438 to worry which hash secrets each table has.
1439 */
1440 oldhash_secret_salt = parser->m_hash_secret_salt;
1441 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1442
1443 #ifdef XML_DTD
1444 if (! context)
1445 newDtd = oldDtd;
1446 #endif /* XML_DTD */
1447
1448 /* Note that the magical uses of the pre-processor to make field
1449 access look more like C++ require that `parser' be overwritten
1450 here. This makes this function more painful to follow than it
1451 would be otherwise.
1452 */
1453 if (parser->m_ns) {
1454 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1455 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1456 } else {
1457 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1458 }
1459
1460 if (! parser)
1461 return NULL;
1462
1463 parser->m_startElementHandler = oldStartElementHandler;
1464 parser->m_endElementHandler = oldEndElementHandler;
1465 parser->m_characterDataHandler = oldCharacterDataHandler;
1466 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1467 parser->m_commentHandler = oldCommentHandler;
1468 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1469 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1470 parser->m_defaultHandler = oldDefaultHandler;
1471 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1472 parser->m_notationDeclHandler = oldNotationDeclHandler;
1473 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1474 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1475 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1476 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1477 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1478 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1479 parser->m_elementDeclHandler = oldElementDeclHandler;
1480 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1481 parser->m_entityDeclHandler = oldEntityDeclHandler;
1482 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1483 parser->m_declElementType = oldDeclElementType;
1484 parser->m_userData = oldUserData;
1485 if (oldUserData == oldHandlerArg)
1486 parser->m_handlerArg = parser->m_userData;
1487 else
1488 parser->m_handlerArg = parser;
1489 if (oldExternalEntityRefHandlerArg != oldParser)
1490 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1491 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1492 parser->m_ns_triplets = oldns_triplets;
1493 parser->m_hash_secret_salt = oldhash_secret_salt;
1494 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1495 parser->m_parentParser = oldParser;
1496 #ifdef XML_DTD
1497 parser->m_paramEntityParsing = oldParamEntityParsing;
1498 parser->m_prologState.inEntityValue = oldInEntityValue;
1499 if (context) {
1500 #endif /* XML_DTD */
1501 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1502 || ! setContext(parser, context)) {
1503 XML_ParserFree(parser);
1504 return NULL;
1505 }
1506 parser->m_processor = externalEntityInitProcessor;
1507 #ifdef XML_DTD
1508 } else {
1509 /* The DTD instance referenced by parser->m_dtd is shared between the
1510 document's root parser and external PE parsers, therefore one does not
1511 need to call setContext. In addition, one also *must* not call
1512 setContext, because this would overwrite existing prefix->binding
1513 pointers in parser->m_dtd with ones that get destroyed with the external
1514 PE parser. This would leave those prefixes with dangling pointers.
1515 */
1516 parser->m_isParamEntity = XML_TRUE;
1517 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1518 parser->m_processor = externalParEntInitProcessor;
1519 }
1520 #endif /* XML_DTD */
1521 return parser;
1522 }
1523
1524 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1525 destroyBindings(BINDING *bindings, XML_Parser parser) {
1526 for (;;) {
1527 BINDING *b = bindings;
1528 if (! b)
1529 break;
1530 bindings = b->nextTagBinding;
1531 FREE(parser, b->uri);
1532 FREE(parser, b);
1533 }
1534 }
1535
1536 void XMLCALL
XML_ParserFree(XML_Parser parser)1537 XML_ParserFree(XML_Parser parser) {
1538 TAG *tagList;
1539 OPEN_INTERNAL_ENTITY *entityList;
1540 if (parser == NULL)
1541 return;
1542 /* free m_tagStack and m_freeTagList */
1543 tagList = parser->m_tagStack;
1544 for (;;) {
1545 TAG *p;
1546 if (tagList == NULL) {
1547 if (parser->m_freeTagList == NULL)
1548 break;
1549 tagList = parser->m_freeTagList;
1550 parser->m_freeTagList = NULL;
1551 }
1552 p = tagList;
1553 tagList = tagList->parent;
1554 FREE(parser, p->buf);
1555 destroyBindings(p->bindings, parser);
1556 FREE(parser, p);
1557 }
1558 /* free m_openInternalEntities and m_freeInternalEntities */
1559 entityList = parser->m_openInternalEntities;
1560 for (;;) {
1561 OPEN_INTERNAL_ENTITY *openEntity;
1562 if (entityList == NULL) {
1563 if (parser->m_freeInternalEntities == NULL)
1564 break;
1565 entityList = parser->m_freeInternalEntities;
1566 parser->m_freeInternalEntities = NULL;
1567 }
1568 openEntity = entityList;
1569 entityList = entityList->next;
1570 FREE(parser, openEntity);
1571 }
1572
1573 destroyBindings(parser->m_freeBindingList, parser);
1574 destroyBindings(parser->m_inheritedBindings, parser);
1575 poolDestroy(&parser->m_tempPool);
1576 poolDestroy(&parser->m_temp2Pool);
1577 FREE(parser, (void *)parser->m_protocolEncodingName);
1578 #ifdef XML_DTD
1579 /* external parameter entity parsers share the DTD structure
1580 parser->m_dtd with the root parser, so we must not destroy it
1581 */
1582 if (! parser->m_isParamEntity && parser->m_dtd)
1583 #else
1584 if (parser->m_dtd)
1585 #endif /* XML_DTD */
1586 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1587 &parser->m_mem);
1588 FREE(parser, (void *)parser->m_atts);
1589 #ifdef XML_ATTR_INFO
1590 FREE(parser, (void *)parser->m_attInfo);
1591 #endif
1592 FREE(parser, parser->m_groupConnector);
1593 FREE(parser, parser->m_buffer);
1594 FREE(parser, parser->m_dataBuf);
1595 FREE(parser, parser->m_nsAtts);
1596 FREE(parser, parser->m_unknownEncodingMem);
1597 if (parser->m_unknownEncodingRelease)
1598 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1599 FREE(parser, parser);
1600 }
1601
1602 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1603 XML_UseParserAsHandlerArg(XML_Parser parser) {
1604 if (parser != NULL)
1605 parser->m_handlerArg = parser;
1606 }
1607
1608 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1609 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1610 if (parser == NULL)
1611 return XML_ERROR_INVALID_ARGUMENT;
1612 #ifdef XML_DTD
1613 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1614 if (parser->m_parsingStatus.parsing == XML_PARSING
1615 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1616 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1617 parser->m_useForeignDTD = useDTD;
1618 return XML_ERROR_NONE;
1619 #else
1620 UNUSED_P(useDTD);
1621 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1622 #endif
1623 }
1624
1625 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1626 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1627 if (parser == NULL)
1628 return;
1629 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1630 if (parser->m_parsingStatus.parsing == XML_PARSING
1631 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1632 return;
1633 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1634 }
1635
1636 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1637 XML_SetUserData(XML_Parser parser, void *p) {
1638 if (parser == NULL)
1639 return;
1640 if (parser->m_handlerArg == parser->m_userData)
1641 parser->m_handlerArg = parser->m_userData = p;
1642 else
1643 parser->m_userData = p;
1644 }
1645
1646 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1647 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1648 if (parser == NULL)
1649 return XML_STATUS_ERROR;
1650 if (p) {
1651 p = poolCopyString(&parser->m_dtd->pool, p);
1652 if (! p)
1653 return XML_STATUS_ERROR;
1654 parser->m_curBase = p;
1655 } else
1656 parser->m_curBase = NULL;
1657 return XML_STATUS_OK;
1658 }
1659
1660 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1661 XML_GetBase(XML_Parser parser) {
1662 if (parser == NULL)
1663 return NULL;
1664 return parser->m_curBase;
1665 }
1666
1667 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1668 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1669 if (parser == NULL)
1670 return -1;
1671 return parser->m_nSpecifiedAtts;
1672 }
1673
1674 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1675 XML_GetIdAttributeIndex(XML_Parser parser) {
1676 if (parser == NULL)
1677 return -1;
1678 return parser->m_idAttIndex;
1679 }
1680
1681 #ifdef XML_ATTR_INFO
1682 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1683 XML_GetAttributeInfo(XML_Parser parser) {
1684 if (parser == NULL)
1685 return NULL;
1686 return parser->m_attInfo;
1687 }
1688 #endif
1689
1690 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1691 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1692 XML_EndElementHandler end) {
1693 if (parser == NULL)
1694 return;
1695 parser->m_startElementHandler = start;
1696 parser->m_endElementHandler = end;
1697 }
1698
1699 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1700 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1701 if (parser != NULL)
1702 parser->m_startElementHandler = start;
1703 }
1704
1705 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1706 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1707 if (parser != NULL)
1708 parser->m_endElementHandler = end;
1709 }
1710
1711 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1712 XML_SetCharacterDataHandler(XML_Parser parser,
1713 XML_CharacterDataHandler handler) {
1714 if (parser != NULL)
1715 parser->m_characterDataHandler = handler;
1716 }
1717
1718 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1719 XML_SetProcessingInstructionHandler(XML_Parser parser,
1720 XML_ProcessingInstructionHandler handler) {
1721 if (parser != NULL)
1722 parser->m_processingInstructionHandler = handler;
1723 }
1724
1725 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1726 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1727 if (parser != NULL)
1728 parser->m_commentHandler = handler;
1729 }
1730
1731 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1732 XML_SetCdataSectionHandler(XML_Parser parser,
1733 XML_StartCdataSectionHandler start,
1734 XML_EndCdataSectionHandler end) {
1735 if (parser == NULL)
1736 return;
1737 parser->m_startCdataSectionHandler = start;
1738 parser->m_endCdataSectionHandler = end;
1739 }
1740
1741 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1742 XML_SetStartCdataSectionHandler(XML_Parser parser,
1743 XML_StartCdataSectionHandler start) {
1744 if (parser != NULL)
1745 parser->m_startCdataSectionHandler = start;
1746 }
1747
1748 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1749 XML_SetEndCdataSectionHandler(XML_Parser parser,
1750 XML_EndCdataSectionHandler end) {
1751 if (parser != NULL)
1752 parser->m_endCdataSectionHandler = end;
1753 }
1754
1755 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1756 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1757 if (parser == NULL)
1758 return;
1759 parser->m_defaultHandler = handler;
1760 parser->m_defaultExpandInternalEntities = XML_FALSE;
1761 }
1762
1763 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1764 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1765 if (parser == NULL)
1766 return;
1767 parser->m_defaultHandler = handler;
1768 parser->m_defaultExpandInternalEntities = XML_TRUE;
1769 }
1770
1771 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1772 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1773 XML_EndDoctypeDeclHandler end) {
1774 if (parser == NULL)
1775 return;
1776 parser->m_startDoctypeDeclHandler = start;
1777 parser->m_endDoctypeDeclHandler = end;
1778 }
1779
1780 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1781 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1782 XML_StartDoctypeDeclHandler start) {
1783 if (parser != NULL)
1784 parser->m_startDoctypeDeclHandler = start;
1785 }
1786
1787 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1788 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1789 if (parser != NULL)
1790 parser->m_endDoctypeDeclHandler = end;
1791 }
1792
1793 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1794 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1795 XML_UnparsedEntityDeclHandler handler) {
1796 if (parser != NULL)
1797 parser->m_unparsedEntityDeclHandler = handler;
1798 }
1799
1800 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1801 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1802 if (parser != NULL)
1803 parser->m_notationDeclHandler = handler;
1804 }
1805
1806 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1807 XML_SetNamespaceDeclHandler(XML_Parser parser,
1808 XML_StartNamespaceDeclHandler start,
1809 XML_EndNamespaceDeclHandler end) {
1810 if (parser == NULL)
1811 return;
1812 parser->m_startNamespaceDeclHandler = start;
1813 parser->m_endNamespaceDeclHandler = end;
1814 }
1815
1816 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1817 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1818 XML_StartNamespaceDeclHandler start) {
1819 if (parser != NULL)
1820 parser->m_startNamespaceDeclHandler = start;
1821 }
1822
1823 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1824 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1825 XML_EndNamespaceDeclHandler end) {
1826 if (parser != NULL)
1827 parser->m_endNamespaceDeclHandler = end;
1828 }
1829
1830 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1831 XML_SetNotStandaloneHandler(XML_Parser parser,
1832 XML_NotStandaloneHandler handler) {
1833 if (parser != NULL)
1834 parser->m_notStandaloneHandler = handler;
1835 }
1836
1837 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1838 XML_SetExternalEntityRefHandler(XML_Parser parser,
1839 XML_ExternalEntityRefHandler handler) {
1840 if (parser != NULL)
1841 parser->m_externalEntityRefHandler = handler;
1842 }
1843
1844 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1845 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1846 if (parser == NULL)
1847 return;
1848 if (arg)
1849 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1850 else
1851 parser->m_externalEntityRefHandlerArg = parser;
1852 }
1853
1854 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1855 XML_SetSkippedEntityHandler(XML_Parser parser,
1856 XML_SkippedEntityHandler handler) {
1857 if (parser != NULL)
1858 parser->m_skippedEntityHandler = handler;
1859 }
1860
1861 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1862 XML_SetUnknownEncodingHandler(XML_Parser parser,
1863 XML_UnknownEncodingHandler handler, void *data) {
1864 if (parser == NULL)
1865 return;
1866 parser->m_unknownEncodingHandler = handler;
1867 parser->m_unknownEncodingHandlerData = data;
1868 }
1869
1870 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1871 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1872 if (parser != NULL)
1873 parser->m_elementDeclHandler = eldecl;
1874 }
1875
1876 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1877 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1878 if (parser != NULL)
1879 parser->m_attlistDeclHandler = attdecl;
1880 }
1881
1882 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1883 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1884 if (parser != NULL)
1885 parser->m_entityDeclHandler = handler;
1886 }
1887
1888 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1889 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1890 if (parser != NULL)
1891 parser->m_xmlDeclHandler = handler;
1892 }
1893
1894 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1895 XML_SetParamEntityParsing(XML_Parser parser,
1896 enum XML_ParamEntityParsing peParsing) {
1897 if (parser == NULL)
1898 return 0;
1899 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1900 if (parser->m_parsingStatus.parsing == XML_PARSING
1901 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1902 return 0;
1903 #ifdef XML_DTD
1904 parser->m_paramEntityParsing = peParsing;
1905 return 1;
1906 #else
1907 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1908 #endif
1909 }
1910
1911 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1912 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1913 if (parser == NULL)
1914 return 0;
1915 if (parser->m_parentParser)
1916 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1917 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1918 if (parser->m_parsingStatus.parsing == XML_PARSING
1919 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1920 return 0;
1921 parser->m_hash_secret_salt = hash_salt;
1922 return 1;
1923 }
1924
1925 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1926 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1927 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1928 if (parser != NULL)
1929 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1930 return XML_STATUS_ERROR;
1931 }
1932 switch (parser->m_parsingStatus.parsing) {
1933 case XML_SUSPENDED:
1934 parser->m_errorCode = XML_ERROR_SUSPENDED;
1935 return XML_STATUS_ERROR;
1936 case XML_FINISHED:
1937 parser->m_errorCode = XML_ERROR_FINISHED;
1938 return XML_STATUS_ERROR;
1939 case XML_INITIALIZED:
1940 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1941 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1942 return XML_STATUS_ERROR;
1943 }
1944 /* fall through */
1945 default:
1946 parser->m_parsingStatus.parsing = XML_PARSING;
1947 }
1948
1949 #if XML_CONTEXT_BYTES == 0
1950 if (parser->m_bufferPtr == parser->m_bufferEnd) {
1951 const char *end;
1952 int nLeftOver;
1953 enum XML_Status result;
1954 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1955 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1956 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1957 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1958 parser->m_processor = errorProcessor;
1959 return XML_STATUS_ERROR;
1960 }
1961 // though this isn't a buffer request, we assume that `len` is the app's
1962 // preferred buffer fill size, and therefore save it here.
1963 parser->m_lastBufferRequestSize = len;
1964 parser->m_parseEndByteIndex += len;
1965 parser->m_positionPtr = s;
1966 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1967
1968 parser->m_errorCode
1969 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1970
1971 if (parser->m_errorCode != XML_ERROR_NONE) {
1972 parser->m_eventEndPtr = parser->m_eventPtr;
1973 parser->m_processor = errorProcessor;
1974 return XML_STATUS_ERROR;
1975 } else {
1976 switch (parser->m_parsingStatus.parsing) {
1977 case XML_SUSPENDED:
1978 result = XML_STATUS_SUSPENDED;
1979 break;
1980 case XML_INITIALIZED:
1981 case XML_PARSING:
1982 if (isFinal) {
1983 parser->m_parsingStatus.parsing = XML_FINISHED;
1984 return XML_STATUS_OK;
1985 }
1986 /* fall through */
1987 default:
1988 result = XML_STATUS_OK;
1989 }
1990 }
1991
1992 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1993 &parser->m_position);
1994 nLeftOver = s + len - end;
1995 if (nLeftOver) {
1996 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1997 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
1998 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1999 parser->m_parsingStatus.parsing = XML_PARSING;
2000 void *const temp = XML_GetBuffer(parser, nLeftOver);
2001 parser->m_parsingStatus.parsing = originalStatus;
2002 // GetBuffer may have overwritten this, but we want to remember what the
2003 // app requested, not how many bytes were left over after parsing.
2004 parser->m_lastBufferRequestSize = len;
2005 if (temp == NULL) {
2006 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2007 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2008 parser->m_processor = errorProcessor;
2009 return XML_STATUS_ERROR;
2010 }
2011 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2012 // don't have any data to preserve, and can copy straight into the start
2013 // of the buffer rather than the GetBuffer return pointer (which may be
2014 // pointing further into the allocated buffer).
2015 memcpy(parser->m_buffer, end, nLeftOver);
2016 }
2017 parser->m_bufferPtr = parser->m_buffer;
2018 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2019 parser->m_positionPtr = parser->m_bufferPtr;
2020 parser->m_parseEndPtr = parser->m_bufferEnd;
2021 parser->m_eventPtr = parser->m_bufferPtr;
2022 parser->m_eventEndPtr = parser->m_bufferPtr;
2023 return result;
2024 }
2025 #endif /* XML_CONTEXT_BYTES == 0 */
2026 void *buff = XML_GetBuffer(parser, len);
2027 if (buff == NULL)
2028 return XML_STATUS_ERROR;
2029 if (len > 0) {
2030 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2031 memcpy(buff, s, len);
2032 }
2033 return XML_ParseBuffer(parser, len, isFinal);
2034 }
2035
2036 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2037 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2038 const char *start;
2039 enum XML_Status result = XML_STATUS_OK;
2040
2041 if (parser == NULL)
2042 return XML_STATUS_ERROR;
2043
2044 if (len < 0) {
2045 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2046 return XML_STATUS_ERROR;
2047 }
2048
2049 switch (parser->m_parsingStatus.parsing) {
2050 case XML_SUSPENDED:
2051 parser->m_errorCode = XML_ERROR_SUSPENDED;
2052 return XML_STATUS_ERROR;
2053 case XML_FINISHED:
2054 parser->m_errorCode = XML_ERROR_FINISHED;
2055 return XML_STATUS_ERROR;
2056 case XML_INITIALIZED:
2057 /* Has someone called XML_GetBuffer successfully before? */
2058 if (! parser->m_bufferPtr) {
2059 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2060 return XML_STATUS_ERROR;
2061 }
2062
2063 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2064 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2065 return XML_STATUS_ERROR;
2066 }
2067 /* fall through */
2068 default:
2069 parser->m_parsingStatus.parsing = XML_PARSING;
2070 }
2071
2072 start = parser->m_bufferPtr;
2073 parser->m_positionPtr = start;
2074 parser->m_bufferEnd += len;
2075 parser->m_parseEndPtr = parser->m_bufferEnd;
2076 parser->m_parseEndByteIndex += len;
2077 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2078
2079 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2080 &parser->m_bufferPtr);
2081
2082 if (parser->m_errorCode != XML_ERROR_NONE) {
2083 parser->m_eventEndPtr = parser->m_eventPtr;
2084 parser->m_processor = errorProcessor;
2085 return XML_STATUS_ERROR;
2086 } else {
2087 switch (parser->m_parsingStatus.parsing) {
2088 case XML_SUSPENDED:
2089 result = XML_STATUS_SUSPENDED;
2090 break;
2091 case XML_INITIALIZED:
2092 case XML_PARSING:
2093 if (isFinal) {
2094 parser->m_parsingStatus.parsing = XML_FINISHED;
2095 return result;
2096 }
2097 default:; /* should not happen */
2098 }
2099 }
2100
2101 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2102 parser->m_bufferPtr, &parser->m_position);
2103 parser->m_positionPtr = parser->m_bufferPtr;
2104 return result;
2105 }
2106
2107 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2108 XML_GetBuffer(XML_Parser parser, int len) {
2109 if (parser == NULL)
2110 return NULL;
2111 if (len < 0) {
2112 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2113 return NULL;
2114 }
2115 switch (parser->m_parsingStatus.parsing) {
2116 case XML_SUSPENDED:
2117 parser->m_errorCode = XML_ERROR_SUSPENDED;
2118 return NULL;
2119 case XML_FINISHED:
2120 parser->m_errorCode = XML_ERROR_FINISHED;
2121 return NULL;
2122 default:;
2123 }
2124
2125 // whether or not the request succeeds, `len` seems to be the app's preferred
2126 // buffer fill size; remember it.
2127 parser->m_lastBufferRequestSize = len;
2128 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2129 || parser->m_buffer == NULL) {
2130 #if XML_CONTEXT_BYTES > 0
2131 int keep;
2132 #endif /* XML_CONTEXT_BYTES > 0 */
2133 /* Do not invoke signed arithmetic overflow: */
2134 int neededSize = (int)((unsigned)len
2135 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2136 parser->m_bufferEnd, parser->m_bufferPtr));
2137 if (neededSize < 0) {
2138 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2139 return NULL;
2140 }
2141 #if XML_CONTEXT_BYTES > 0
2142 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2143 if (keep > XML_CONTEXT_BYTES)
2144 keep = XML_CONTEXT_BYTES;
2145 /* Detect and prevent integer overflow */
2146 if (keep > INT_MAX - neededSize) {
2147 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2148 return NULL;
2149 }
2150 neededSize += keep;
2151 #endif /* XML_CONTEXT_BYTES > 0 */
2152 if (parser->m_buffer && parser->m_bufferPtr
2153 && neededSize
2154 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2155 #if XML_CONTEXT_BYTES > 0
2156 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2157 int offset
2158 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2159 - keep;
2160 /* The buffer pointers cannot be NULL here; we have at least some bytes
2161 * in the buffer */
2162 memmove(parser->m_buffer, &parser->m_buffer[offset],
2163 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2164 parser->m_bufferEnd -= offset;
2165 parser->m_bufferPtr -= offset;
2166 }
2167 #else
2168 memmove(parser->m_buffer, parser->m_bufferPtr,
2169 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2170 parser->m_bufferEnd
2171 = parser->m_buffer
2172 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2173 parser->m_bufferPtr = parser->m_buffer;
2174 #endif /* XML_CONTEXT_BYTES > 0 */
2175 } else {
2176 char *newBuf;
2177 int bufferSize
2178 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2179 if (bufferSize == 0)
2180 bufferSize = INIT_BUFFER_SIZE;
2181 do {
2182 /* Do not invoke signed arithmetic overflow: */
2183 bufferSize = (int)(2U * (unsigned)bufferSize);
2184 } while (bufferSize < neededSize && bufferSize > 0);
2185 if (bufferSize <= 0) {
2186 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2187 return NULL;
2188 }
2189 newBuf = (char *)MALLOC(parser, bufferSize);
2190 if (newBuf == 0) {
2191 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2192 return NULL;
2193 }
2194 parser->m_bufferLim = newBuf + bufferSize;
2195 #if XML_CONTEXT_BYTES > 0
2196 if (parser->m_bufferPtr) {
2197 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2198 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2199 + keep);
2200 FREE(parser, parser->m_buffer);
2201 parser->m_buffer = newBuf;
2202 parser->m_bufferEnd
2203 = parser->m_buffer
2204 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2205 + keep;
2206 parser->m_bufferPtr = parser->m_buffer + keep;
2207 } else {
2208 /* This must be a brand new buffer with no data in it yet */
2209 parser->m_bufferEnd = newBuf;
2210 parser->m_bufferPtr = parser->m_buffer = newBuf;
2211 }
2212 #else
2213 if (parser->m_bufferPtr) {
2214 memcpy(newBuf, parser->m_bufferPtr,
2215 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2216 FREE(parser, parser->m_buffer);
2217 parser->m_bufferEnd
2218 = newBuf
2219 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2220 } else {
2221 /* This must be a brand new buffer with no data in it yet */
2222 parser->m_bufferEnd = newBuf;
2223 }
2224 parser->m_bufferPtr = parser->m_buffer = newBuf;
2225 #endif /* XML_CONTEXT_BYTES > 0 */
2226 }
2227 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2228 parser->m_positionPtr = NULL;
2229 }
2230 return parser->m_bufferEnd;
2231 }
2232
2233 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2234 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2235 if (parser == NULL)
2236 return XML_STATUS_ERROR;
2237 switch (parser->m_parsingStatus.parsing) {
2238 case XML_INITIALIZED:
2239 parser->m_errorCode = XML_ERROR_NOT_STARTED;
2240 return XML_STATUS_ERROR;
2241 case XML_SUSPENDED:
2242 if (resumable) {
2243 parser->m_errorCode = XML_ERROR_SUSPENDED;
2244 return XML_STATUS_ERROR;
2245 }
2246 parser->m_parsingStatus.parsing = XML_FINISHED;
2247 break;
2248 case XML_FINISHED:
2249 parser->m_errorCode = XML_ERROR_FINISHED;
2250 return XML_STATUS_ERROR;
2251 case XML_PARSING:
2252 if (resumable) {
2253 #ifdef XML_DTD
2254 if (parser->m_isParamEntity) {
2255 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2256 return XML_STATUS_ERROR;
2257 }
2258 #endif
2259 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2260 } else
2261 parser->m_parsingStatus.parsing = XML_FINISHED;
2262 break;
2263 default:
2264 assert(0);
2265 }
2266 return XML_STATUS_OK;
2267 }
2268
2269 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2270 XML_ResumeParser(XML_Parser parser) {
2271 enum XML_Status result = XML_STATUS_OK;
2272
2273 if (parser == NULL)
2274 return XML_STATUS_ERROR;
2275 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2276 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2277 return XML_STATUS_ERROR;
2278 }
2279 parser->m_parsingStatus.parsing = XML_PARSING;
2280
2281 parser->m_errorCode = callProcessor(
2282 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2283
2284 if (parser->m_errorCode != XML_ERROR_NONE) {
2285 parser->m_eventEndPtr = parser->m_eventPtr;
2286 parser->m_processor = errorProcessor;
2287 return XML_STATUS_ERROR;
2288 } else {
2289 switch (parser->m_parsingStatus.parsing) {
2290 case XML_SUSPENDED:
2291 result = XML_STATUS_SUSPENDED;
2292 break;
2293 case XML_INITIALIZED:
2294 case XML_PARSING:
2295 if (parser->m_parsingStatus.finalBuffer) {
2296 parser->m_parsingStatus.parsing = XML_FINISHED;
2297 return result;
2298 }
2299 default:;
2300 }
2301 }
2302
2303 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2304 parser->m_bufferPtr, &parser->m_position);
2305 parser->m_positionPtr = parser->m_bufferPtr;
2306 return result;
2307 }
2308
2309 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2310 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2311 if (parser == NULL)
2312 return;
2313 assert(status != NULL);
2314 *status = parser->m_parsingStatus;
2315 }
2316
2317 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2318 XML_GetErrorCode(XML_Parser parser) {
2319 if (parser == NULL)
2320 return XML_ERROR_INVALID_ARGUMENT;
2321 return parser->m_errorCode;
2322 }
2323
2324 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2325 XML_GetCurrentByteIndex(XML_Parser parser) {
2326 if (parser == NULL)
2327 return -1;
2328 if (parser->m_eventPtr)
2329 return (XML_Index)(parser->m_parseEndByteIndex
2330 - (parser->m_parseEndPtr - parser->m_eventPtr));
2331 return -1;
2332 }
2333
2334 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2335 XML_GetCurrentByteCount(XML_Parser parser) {
2336 if (parser == NULL)
2337 return 0;
2338 if (parser->m_eventEndPtr && parser->m_eventPtr)
2339 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2340 return 0;
2341 }
2342
2343 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2344 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2345 #if XML_CONTEXT_BYTES > 0
2346 if (parser == NULL)
2347 return NULL;
2348 if (parser->m_eventPtr && parser->m_buffer) {
2349 if (offset != NULL)
2350 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2351 if (size != NULL)
2352 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2353 return parser->m_buffer;
2354 }
2355 #else
2356 (void)parser;
2357 (void)offset;
2358 (void)size;
2359 #endif /* XML_CONTEXT_BYTES > 0 */
2360 return (const char *)0;
2361 }
2362
2363 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2364 XML_GetCurrentLineNumber(XML_Parser parser) {
2365 if (parser == NULL)
2366 return 0;
2367 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2368 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2369 parser->m_eventPtr, &parser->m_position);
2370 parser->m_positionPtr = parser->m_eventPtr;
2371 }
2372 return parser->m_position.lineNumber + 1;
2373 }
2374
2375 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2376 XML_GetCurrentColumnNumber(XML_Parser parser) {
2377 if (parser == NULL)
2378 return 0;
2379 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2380 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2381 parser->m_eventPtr, &parser->m_position);
2382 parser->m_positionPtr = parser->m_eventPtr;
2383 }
2384 return parser->m_position.columnNumber;
2385 }
2386
2387 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2388 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2389 if (parser != NULL)
2390 FREE(parser, model);
2391 }
2392
2393 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2394 XML_MemMalloc(XML_Parser parser, size_t size) {
2395 if (parser == NULL)
2396 return NULL;
2397 return MALLOC(parser, size);
2398 }
2399
2400 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2401 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2402 if (parser == NULL)
2403 return NULL;
2404 return REALLOC(parser, ptr, size);
2405 }
2406
2407 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2408 XML_MemFree(XML_Parser parser, void *ptr) {
2409 if (parser != NULL)
2410 FREE(parser, ptr);
2411 }
2412
2413 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2414 XML_DefaultCurrent(XML_Parser parser) {
2415 if (parser == NULL)
2416 return;
2417 if (parser->m_defaultHandler) {
2418 if (parser->m_openInternalEntities)
2419 reportDefault(parser, parser->m_internalEncoding,
2420 parser->m_openInternalEntities->internalEventPtr,
2421 parser->m_openInternalEntities->internalEventEndPtr);
2422 else
2423 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2424 parser->m_eventEndPtr);
2425 }
2426 }
2427
2428 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2429 XML_ErrorString(enum XML_Error code) {
2430 switch (code) {
2431 case XML_ERROR_NONE:
2432 return NULL;
2433 case XML_ERROR_NO_MEMORY:
2434 return XML_L("out of memory");
2435 case XML_ERROR_SYNTAX:
2436 return XML_L("syntax error");
2437 case XML_ERROR_NO_ELEMENTS:
2438 return XML_L("no element found");
2439 case XML_ERROR_INVALID_TOKEN:
2440 return XML_L("not well-formed (invalid token)");
2441 case XML_ERROR_UNCLOSED_TOKEN:
2442 return XML_L("unclosed token");
2443 case XML_ERROR_PARTIAL_CHAR:
2444 return XML_L("partial character");
2445 case XML_ERROR_TAG_MISMATCH:
2446 return XML_L("mismatched tag");
2447 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2448 return XML_L("duplicate attribute");
2449 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2450 return XML_L("junk after document element");
2451 case XML_ERROR_PARAM_ENTITY_REF:
2452 return XML_L("illegal parameter entity reference");
2453 case XML_ERROR_UNDEFINED_ENTITY:
2454 return XML_L("undefined entity");
2455 case XML_ERROR_RECURSIVE_ENTITY_REF:
2456 return XML_L("recursive entity reference");
2457 case XML_ERROR_ASYNC_ENTITY:
2458 return XML_L("asynchronous entity");
2459 case XML_ERROR_BAD_CHAR_REF:
2460 return XML_L("reference to invalid character number");
2461 case XML_ERROR_BINARY_ENTITY_REF:
2462 return XML_L("reference to binary entity");
2463 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2464 return XML_L("reference to external entity in attribute");
2465 case XML_ERROR_MISPLACED_XML_PI:
2466 return XML_L("XML or text declaration not at start of entity");
2467 case XML_ERROR_UNKNOWN_ENCODING:
2468 return XML_L("unknown encoding");
2469 case XML_ERROR_INCORRECT_ENCODING:
2470 return XML_L("encoding specified in XML declaration is incorrect");
2471 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2472 return XML_L("unclosed CDATA section");
2473 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2474 return XML_L("error in processing external entity reference");
2475 case XML_ERROR_NOT_STANDALONE:
2476 return XML_L("document is not standalone");
2477 case XML_ERROR_UNEXPECTED_STATE:
2478 return XML_L("unexpected parser state - please send a bug report");
2479 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2480 return XML_L("entity declared in parameter entity");
2481 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2482 return XML_L("requested feature requires XML_DTD support in Expat");
2483 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2484 return XML_L("cannot change setting once parsing has begun");
2485 /* Added in 1.95.7. */
2486 case XML_ERROR_UNBOUND_PREFIX:
2487 return XML_L("unbound prefix");
2488 /* Added in 1.95.8. */
2489 case XML_ERROR_UNDECLARING_PREFIX:
2490 return XML_L("must not undeclare prefix");
2491 case XML_ERROR_INCOMPLETE_PE:
2492 return XML_L("incomplete markup in parameter entity");
2493 case XML_ERROR_XML_DECL:
2494 return XML_L("XML declaration not well-formed");
2495 case XML_ERROR_TEXT_DECL:
2496 return XML_L("text declaration not well-formed");
2497 case XML_ERROR_PUBLICID:
2498 return XML_L("illegal character(s) in public id");
2499 case XML_ERROR_SUSPENDED:
2500 return XML_L("parser suspended");
2501 case XML_ERROR_NOT_SUSPENDED:
2502 return XML_L("parser not suspended");
2503 case XML_ERROR_ABORTED:
2504 return XML_L("parsing aborted");
2505 case XML_ERROR_FINISHED:
2506 return XML_L("parsing finished");
2507 case XML_ERROR_SUSPEND_PE:
2508 return XML_L("cannot suspend in external parameter entity");
2509 /* Added in 2.0.0. */
2510 case XML_ERROR_RESERVED_PREFIX_XML:
2511 return XML_L(
2512 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2513 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2514 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2515 case XML_ERROR_RESERVED_NAMESPACE_URI:
2516 return XML_L(
2517 "prefix must not be bound to one of the reserved namespace names");
2518 /* Added in 2.2.5. */
2519 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2520 return XML_L("invalid argument");
2521 /* Added in 2.3.0. */
2522 case XML_ERROR_NO_BUFFER:
2523 return XML_L(
2524 "a successful prior call to function XML_GetBuffer is required");
2525 /* Added in 2.4.0. */
2526 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2527 return XML_L(
2528 "limit on input amplification factor (from DTD and entities) breached");
2529 /* Added in 2.6.4. */
2530 case XML_ERROR_NOT_STARTED:
2531 return XML_L("parser not started");
2532 }
2533 return NULL;
2534 }
2535
2536 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2537 XML_ExpatVersion(void) {
2538 /* V1 is used to string-ize the version number. However, it would
2539 string-ize the actual version macro *names* unless we get them
2540 substituted before being passed to V1. CPP is defined to expand
2541 a macro, then rescan for more expansions. Thus, we use V2 to expand
2542 the version macros, then CPP will expand the resulting V1() macro
2543 with the correct numerals. */
2544 /* ### I'm assuming cpp is portable in this respect... */
2545
2546 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2547 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2548
2549 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2550
2551 #undef V1
2552 #undef V2
2553 }
2554
2555 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2556 XML_ExpatVersionInfo(void) {
2557 XML_Expat_Version version;
2558
2559 version.major = XML_MAJOR_VERSION;
2560 version.minor = XML_MINOR_VERSION;
2561 version.micro = XML_MICRO_VERSION;
2562
2563 return version;
2564 }
2565
2566 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2567 XML_GetFeatureList(void) {
2568 static const XML_Feature features[] = {
2569 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2570 sizeof(XML_Char)},
2571 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2572 sizeof(XML_LChar)},
2573 #ifdef XML_UNICODE
2574 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2575 #endif
2576 #ifdef XML_UNICODE_WCHAR_T
2577 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2578 #endif
2579 #ifdef XML_DTD
2580 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2581 #endif
2582 #if XML_CONTEXT_BYTES > 0
2583 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2584 XML_CONTEXT_BYTES},
2585 #endif
2586 #ifdef XML_MIN_SIZE
2587 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2588 #endif
2589 #ifdef XML_NS
2590 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2591 #endif
2592 #ifdef XML_LARGE_SIZE
2593 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2594 #endif
2595 #ifdef XML_ATTR_INFO
2596 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2597 #endif
2598 #if XML_GE == 1
2599 /* Added in Expat 2.4.0 for XML_DTD defined and
2600 * added in Expat 2.6.0 for XML_GE == 1. */
2601 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2602 XML_L("XML_BLAP_MAX_AMP"),
2603 (long int)
2604 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2605 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2606 XML_L("XML_BLAP_ACT_THRES"),
2607 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2608 /* Added in Expat 2.6.0. */
2609 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2610 #endif
2611 {XML_FEATURE_END, NULL, 0}};
2612
2613 return features;
2614 }
2615
2616 #if XML_GE == 1
2617 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2618 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2619 XML_Parser parser, float maximumAmplificationFactor) {
2620 if ((parser == NULL) || (parser->m_parentParser != NULL)
2621 || isnan(maximumAmplificationFactor)
2622 || (maximumAmplificationFactor < 1.0f)) {
2623 return XML_FALSE;
2624 }
2625 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2626 return XML_TRUE;
2627 }
2628
2629 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2630 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2631 XML_Parser parser, unsigned long long activationThresholdBytes) {
2632 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2633 return XML_FALSE;
2634 }
2635 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2636 return XML_TRUE;
2637 }
2638 #endif /* XML_GE == 1 */
2639
2640 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2641 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2642 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2643 parser->m_reparseDeferralEnabled = enabled;
2644 return XML_TRUE;
2645 }
2646 return XML_FALSE;
2647 }
2648
2649 /* Initially tag->rawName always points into the parse buffer;
2650 for those TAG instances opened while the current parse buffer was
2651 processed, and not yet closed, we need to store tag->rawName in a more
2652 permanent location, since the parse buffer is about to be discarded.
2653 */
2654 static XML_Bool
storeRawNames(XML_Parser parser)2655 storeRawNames(XML_Parser parser) {
2656 TAG *tag = parser->m_tagStack;
2657 while (tag) {
2658 int bufSize;
2659 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2660 size_t rawNameLen;
2661 char *rawNameBuf = tag->buf + nameLen;
2662 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2663 at the first entry that has already been copied; everything
2664 below it in the stack is already been accounted for in a
2665 previous call to this function.
2666 */
2667 if (tag->rawName == rawNameBuf)
2668 break;
2669 /* For reuse purposes we need to ensure that the
2670 size of tag->buf is a multiple of sizeof(XML_Char).
2671 */
2672 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2673 /* Detect and prevent integer overflow. */
2674 if (rawNameLen > (size_t)INT_MAX - nameLen)
2675 return XML_FALSE;
2676 bufSize = nameLen + (int)rawNameLen;
2677 if (bufSize > tag->bufEnd - tag->buf) {
2678 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2679 if (temp == NULL)
2680 return XML_FALSE;
2681 /* if tag->name.str points to tag->buf (only when namespace
2682 processing is off) then we have to update it
2683 */
2684 if (tag->name.str == (XML_Char *)tag->buf)
2685 tag->name.str = (XML_Char *)temp;
2686 /* if tag->name.localPart is set (when namespace processing is on)
2687 then update it as well, since it will always point into tag->buf
2688 */
2689 if (tag->name.localPart)
2690 tag->name.localPart
2691 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2692 tag->buf = temp;
2693 tag->bufEnd = temp + bufSize;
2694 rawNameBuf = temp + nameLen;
2695 }
2696 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2697 tag->rawName = rawNameBuf;
2698 tag = tag->parent;
2699 }
2700 return XML_TRUE;
2701 }
2702
2703 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2704 contentProcessor(XML_Parser parser, const char *start, const char *end,
2705 const char **endPtr) {
2706 enum XML_Error result = doContent(
2707 parser, 0, parser->m_encoding, start, end, endPtr,
2708 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2709 if (result == XML_ERROR_NONE) {
2710 if (! storeRawNames(parser))
2711 return XML_ERROR_NO_MEMORY;
2712 }
2713 return result;
2714 }
2715
2716 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2717 externalEntityInitProcessor(XML_Parser parser, const char *start,
2718 const char *end, const char **endPtr) {
2719 enum XML_Error result = initializeEncoding(parser);
2720 if (result != XML_ERROR_NONE)
2721 return result;
2722 parser->m_processor = externalEntityInitProcessor2;
2723 return externalEntityInitProcessor2(parser, start, end, endPtr);
2724 }
2725
2726 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2727 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2728 const char *end, const char **endPtr) {
2729 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2730 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2731 switch (tok) {
2732 case XML_TOK_BOM:
2733 #if XML_GE == 1
2734 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2735 XML_ACCOUNT_DIRECT)) {
2736 accountingOnAbort(parser);
2737 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2738 }
2739 #endif /* XML_GE == 1 */
2740
2741 /* If we are at the end of the buffer, this would cause the next stage,
2742 i.e. externalEntityInitProcessor3, to pass control directly to
2743 doContent (by detecting XML_TOK_NONE) without processing any xml text
2744 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2745 */
2746 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2747 *endPtr = next;
2748 return XML_ERROR_NONE;
2749 }
2750 start = next;
2751 break;
2752 case XML_TOK_PARTIAL:
2753 if (! parser->m_parsingStatus.finalBuffer) {
2754 *endPtr = start;
2755 return XML_ERROR_NONE;
2756 }
2757 parser->m_eventPtr = start;
2758 return XML_ERROR_UNCLOSED_TOKEN;
2759 case XML_TOK_PARTIAL_CHAR:
2760 if (! parser->m_parsingStatus.finalBuffer) {
2761 *endPtr = start;
2762 return XML_ERROR_NONE;
2763 }
2764 parser->m_eventPtr = start;
2765 return XML_ERROR_PARTIAL_CHAR;
2766 }
2767 parser->m_processor = externalEntityInitProcessor3;
2768 return externalEntityInitProcessor3(parser, start, end, endPtr);
2769 }
2770
2771 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2772 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2773 const char *end, const char **endPtr) {
2774 int tok;
2775 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2776 parser->m_eventPtr = start;
2777 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2778 /* Note: These bytes are accounted later in:
2779 - processXmlDecl
2780 - externalEntityContentProcessor
2781 */
2782 parser->m_eventEndPtr = next;
2783
2784 switch (tok) {
2785 case XML_TOK_XML_DECL: {
2786 enum XML_Error result;
2787 result = processXmlDecl(parser, 1, start, next);
2788 if (result != XML_ERROR_NONE)
2789 return result;
2790 switch (parser->m_parsingStatus.parsing) {
2791 case XML_SUSPENDED:
2792 *endPtr = next;
2793 return XML_ERROR_NONE;
2794 case XML_FINISHED:
2795 return XML_ERROR_ABORTED;
2796 default:
2797 start = next;
2798 }
2799 } break;
2800 case XML_TOK_PARTIAL:
2801 if (! parser->m_parsingStatus.finalBuffer) {
2802 *endPtr = start;
2803 return XML_ERROR_NONE;
2804 }
2805 return XML_ERROR_UNCLOSED_TOKEN;
2806 case XML_TOK_PARTIAL_CHAR:
2807 if (! parser->m_parsingStatus.finalBuffer) {
2808 *endPtr = start;
2809 return XML_ERROR_NONE;
2810 }
2811 return XML_ERROR_PARTIAL_CHAR;
2812 }
2813 parser->m_processor = externalEntityContentProcessor;
2814 parser->m_tagLevel = 1;
2815 return externalEntityContentProcessor(parser, start, end, endPtr);
2816 }
2817
2818 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2819 externalEntityContentProcessor(XML_Parser parser, const char *start,
2820 const char *end, const char **endPtr) {
2821 enum XML_Error result
2822 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2823 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2824 XML_ACCOUNT_ENTITY_EXPANSION);
2825 if (result == XML_ERROR_NONE) {
2826 if (! storeRawNames(parser))
2827 return XML_ERROR_NO_MEMORY;
2828 }
2829 return result;
2830 }
2831
2832 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2833 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2834 const char *s, const char *end, const char **nextPtr,
2835 XML_Bool haveMore, enum XML_Account account) {
2836 /* save one level of indirection */
2837 DTD *const dtd = parser->m_dtd;
2838
2839 const char **eventPP;
2840 const char **eventEndPP;
2841 if (enc == parser->m_encoding) {
2842 eventPP = &parser->m_eventPtr;
2843 eventEndPP = &parser->m_eventEndPtr;
2844 } else {
2845 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2846 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2847 }
2848 *eventPP = s;
2849
2850 for (;;) {
2851 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2852 int tok = XmlContentTok(enc, s, end, &next);
2853 #if XML_GE == 1
2854 const char *accountAfter
2855 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2856 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2857 : next;
2858 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2859 account)) {
2860 accountingOnAbort(parser);
2861 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2862 }
2863 #endif
2864 *eventEndPP = next;
2865 switch (tok) {
2866 case XML_TOK_TRAILING_CR:
2867 if (haveMore) {
2868 *nextPtr = s;
2869 return XML_ERROR_NONE;
2870 }
2871 *eventEndPP = end;
2872 if (parser->m_characterDataHandler) {
2873 XML_Char c = 0xA;
2874 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2875 } else if (parser->m_defaultHandler)
2876 reportDefault(parser, enc, s, end);
2877 /* We are at the end of the final buffer, should we check for
2878 XML_SUSPENDED, XML_FINISHED?
2879 */
2880 if (startTagLevel == 0)
2881 return XML_ERROR_NO_ELEMENTS;
2882 if (parser->m_tagLevel != startTagLevel)
2883 return XML_ERROR_ASYNC_ENTITY;
2884 *nextPtr = end;
2885 return XML_ERROR_NONE;
2886 case XML_TOK_NONE:
2887 if (haveMore) {
2888 *nextPtr = s;
2889 return XML_ERROR_NONE;
2890 }
2891 if (startTagLevel > 0) {
2892 if (parser->m_tagLevel != startTagLevel)
2893 return XML_ERROR_ASYNC_ENTITY;
2894 *nextPtr = s;
2895 return XML_ERROR_NONE;
2896 }
2897 return XML_ERROR_NO_ELEMENTS;
2898 case XML_TOK_INVALID:
2899 *eventPP = next;
2900 return XML_ERROR_INVALID_TOKEN;
2901 case XML_TOK_PARTIAL:
2902 if (haveMore) {
2903 *nextPtr = s;
2904 return XML_ERROR_NONE;
2905 }
2906 return XML_ERROR_UNCLOSED_TOKEN;
2907 case XML_TOK_PARTIAL_CHAR:
2908 if (haveMore) {
2909 *nextPtr = s;
2910 return XML_ERROR_NONE;
2911 }
2912 return XML_ERROR_PARTIAL_CHAR;
2913 case XML_TOK_ENTITY_REF: {
2914 const XML_Char *name;
2915 ENTITY *entity;
2916 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2917 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2918 if (ch) {
2919 #if XML_GE == 1
2920 /* NOTE: We are replacing 4-6 characters original input for 1 character
2921 * so there is no amplification and hence recording without
2922 * protection. */
2923 accountingDiffTolerated(parser, tok, (char *)&ch,
2924 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2925 XML_ACCOUNT_ENTITY_EXPANSION);
2926 #endif /* XML_GE == 1 */
2927 if (parser->m_characterDataHandler)
2928 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2929 else if (parser->m_defaultHandler)
2930 reportDefault(parser, enc, s, next);
2931 break;
2932 }
2933 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2934 next - enc->minBytesPerChar);
2935 if (! name)
2936 return XML_ERROR_NO_MEMORY;
2937 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2938 poolDiscard(&dtd->pool);
2939 /* First, determine if a check for an existing declaration is needed;
2940 if yes, check that the entity exists, and that it is internal,
2941 otherwise call the skipped entity or default handler.
2942 */
2943 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2944 if (! entity)
2945 return XML_ERROR_UNDEFINED_ENTITY;
2946 else if (! entity->is_internal)
2947 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2948 } else if (! entity) {
2949 if (parser->m_skippedEntityHandler)
2950 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2951 else if (parser->m_defaultHandler)
2952 reportDefault(parser, enc, s, next);
2953 break;
2954 }
2955 if (entity->open)
2956 return XML_ERROR_RECURSIVE_ENTITY_REF;
2957 if (entity->notation)
2958 return XML_ERROR_BINARY_ENTITY_REF;
2959 if (entity->textPtr) {
2960 enum XML_Error result;
2961 if (! parser->m_defaultExpandInternalEntities) {
2962 if (parser->m_skippedEntityHandler)
2963 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2964 0);
2965 else if (parser->m_defaultHandler)
2966 reportDefault(parser, enc, s, next);
2967 break;
2968 }
2969 result = processInternalEntity(parser, entity, XML_FALSE);
2970 if (result != XML_ERROR_NONE)
2971 return result;
2972 } else if (parser->m_externalEntityRefHandler) {
2973 const XML_Char *context;
2974 entity->open = XML_TRUE;
2975 context = getContext(parser);
2976 entity->open = XML_FALSE;
2977 if (! context)
2978 return XML_ERROR_NO_MEMORY;
2979 if (! parser->m_externalEntityRefHandler(
2980 parser->m_externalEntityRefHandlerArg, context, entity->base,
2981 entity->systemId, entity->publicId))
2982 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2983 poolDiscard(&parser->m_tempPool);
2984 } else if (parser->m_defaultHandler)
2985 reportDefault(parser, enc, s, next);
2986 break;
2987 }
2988 case XML_TOK_START_TAG_NO_ATTS:
2989 /* fall through */
2990 case XML_TOK_START_TAG_WITH_ATTS: {
2991 TAG *tag;
2992 enum XML_Error result;
2993 XML_Char *toPtr;
2994 if (parser->m_freeTagList) {
2995 tag = parser->m_freeTagList;
2996 parser->m_freeTagList = parser->m_freeTagList->parent;
2997 } else {
2998 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2999 if (! tag)
3000 return XML_ERROR_NO_MEMORY;
3001 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
3002 if (! tag->buf) {
3003 FREE(parser, tag);
3004 return XML_ERROR_NO_MEMORY;
3005 }
3006 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
3007 }
3008 tag->bindings = NULL;
3009 tag->parent = parser->m_tagStack;
3010 parser->m_tagStack = tag;
3011 tag->name.localPart = NULL;
3012 tag->name.prefix = NULL;
3013 tag->rawName = s + enc->minBytesPerChar;
3014 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3015 ++parser->m_tagLevel;
3016 {
3017 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3018 const char *fromPtr = tag->rawName;
3019 toPtr = (XML_Char *)tag->buf;
3020 for (;;) {
3021 int bufSize;
3022 int convLen;
3023 const enum XML_Convert_Result convert_res
3024 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3025 (ICHAR *)tag->bufEnd - 1);
3026 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3027 if ((fromPtr >= rawNameEnd)
3028 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3029 tag->name.strLen = convLen;
3030 break;
3031 }
3032 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3033 {
3034 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3035 if (temp == NULL)
3036 return XML_ERROR_NO_MEMORY;
3037 tag->buf = temp;
3038 tag->bufEnd = temp + bufSize;
3039 toPtr = (XML_Char *)temp + convLen;
3040 }
3041 }
3042 }
3043 tag->name.str = (XML_Char *)tag->buf;
3044 *toPtr = XML_T('\0');
3045 result
3046 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3047 if (result)
3048 return result;
3049 if (parser->m_startElementHandler)
3050 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3051 (const XML_Char **)parser->m_atts);
3052 else if (parser->m_defaultHandler)
3053 reportDefault(parser, enc, s, next);
3054 poolClear(&parser->m_tempPool);
3055 break;
3056 }
3057 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3058 /* fall through */
3059 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3060 const char *rawName = s + enc->minBytesPerChar;
3061 enum XML_Error result;
3062 BINDING *bindings = NULL;
3063 XML_Bool noElmHandlers = XML_TRUE;
3064 TAG_NAME name;
3065 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3066 rawName + XmlNameLength(enc, rawName));
3067 if (! name.str)
3068 return XML_ERROR_NO_MEMORY;
3069 poolFinish(&parser->m_tempPool);
3070 result = storeAtts(parser, enc, s, &name, &bindings,
3071 XML_ACCOUNT_NONE /* token spans whole start tag */);
3072 if (result != XML_ERROR_NONE) {
3073 freeBindings(parser, bindings);
3074 return result;
3075 }
3076 poolFinish(&parser->m_tempPool);
3077 if (parser->m_startElementHandler) {
3078 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3079 (const XML_Char **)parser->m_atts);
3080 noElmHandlers = XML_FALSE;
3081 }
3082 if (parser->m_endElementHandler) {
3083 if (parser->m_startElementHandler)
3084 *eventPP = *eventEndPP;
3085 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3086 noElmHandlers = XML_FALSE;
3087 }
3088 if (noElmHandlers && parser->m_defaultHandler)
3089 reportDefault(parser, enc, s, next);
3090 poolClear(&parser->m_tempPool);
3091 freeBindings(parser, bindings);
3092 }
3093 if ((parser->m_tagLevel == 0)
3094 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3095 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3096 parser->m_processor = epilogProcessor;
3097 else
3098 return epilogProcessor(parser, next, end, nextPtr);
3099 }
3100 break;
3101 case XML_TOK_END_TAG:
3102 if (parser->m_tagLevel == startTagLevel)
3103 return XML_ERROR_ASYNC_ENTITY;
3104 else {
3105 int len;
3106 const char *rawName;
3107 TAG *tag = parser->m_tagStack;
3108 rawName = s + enc->minBytesPerChar * 2;
3109 len = XmlNameLength(enc, rawName);
3110 if (len != tag->rawNameLength
3111 || memcmp(tag->rawName, rawName, len) != 0) {
3112 *eventPP = rawName;
3113 return XML_ERROR_TAG_MISMATCH;
3114 }
3115 parser->m_tagStack = tag->parent;
3116 tag->parent = parser->m_freeTagList;
3117 parser->m_freeTagList = tag;
3118 --parser->m_tagLevel;
3119 if (parser->m_endElementHandler) {
3120 const XML_Char *localPart;
3121 const XML_Char *prefix;
3122 XML_Char *uri;
3123 localPart = tag->name.localPart;
3124 if (parser->m_ns && localPart) {
3125 /* localPart and prefix may have been overwritten in
3126 tag->name.str, since this points to the binding->uri
3127 buffer which gets reused; so we have to add them again
3128 */
3129 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3130 /* don't need to check for space - already done in storeAtts() */
3131 while (*localPart)
3132 *uri++ = *localPart++;
3133 prefix = tag->name.prefix;
3134 if (parser->m_ns_triplets && prefix) {
3135 *uri++ = parser->m_namespaceSeparator;
3136 while (*prefix)
3137 *uri++ = *prefix++;
3138 }
3139 *uri = XML_T('\0');
3140 }
3141 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3142 } else if (parser->m_defaultHandler)
3143 reportDefault(parser, enc, s, next);
3144 while (tag->bindings) {
3145 BINDING *b = tag->bindings;
3146 if (parser->m_endNamespaceDeclHandler)
3147 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3148 b->prefix->name);
3149 tag->bindings = tag->bindings->nextTagBinding;
3150 b->nextTagBinding = parser->m_freeBindingList;
3151 parser->m_freeBindingList = b;
3152 b->prefix->binding = b->prevPrefixBinding;
3153 }
3154 if ((parser->m_tagLevel == 0)
3155 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3156 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3157 parser->m_processor = epilogProcessor;
3158 else
3159 return epilogProcessor(parser, next, end, nextPtr);
3160 }
3161 }
3162 break;
3163 case XML_TOK_CHAR_REF: {
3164 int n = XmlCharRefNumber(enc, s);
3165 if (n < 0)
3166 return XML_ERROR_BAD_CHAR_REF;
3167 if (parser->m_characterDataHandler) {
3168 XML_Char buf[XML_ENCODE_MAX];
3169 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3170 XmlEncode(n, (ICHAR *)buf));
3171 } else if (parser->m_defaultHandler)
3172 reportDefault(parser, enc, s, next);
3173 } break;
3174 case XML_TOK_XML_DECL:
3175 return XML_ERROR_MISPLACED_XML_PI;
3176 case XML_TOK_DATA_NEWLINE:
3177 if (parser->m_characterDataHandler) {
3178 XML_Char c = 0xA;
3179 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3180 } else if (parser->m_defaultHandler)
3181 reportDefault(parser, enc, s, next);
3182 break;
3183 case XML_TOK_CDATA_SECT_OPEN: {
3184 enum XML_Error result;
3185 if (parser->m_startCdataSectionHandler)
3186 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3187 /* BEGIN disabled code */
3188 /* Suppose you doing a transformation on a document that involves
3189 changing only the character data. You set up a defaultHandler
3190 and a characterDataHandler. The defaultHandler simply copies
3191 characters through. The characterDataHandler does the
3192 transformation and writes the characters out escaping them as
3193 necessary. This case will fail to work if we leave out the
3194 following two lines (because & and < inside CDATA sections will
3195 be incorrectly escaped).
3196
3197 However, now we have a start/endCdataSectionHandler, so it seems
3198 easier to let the user deal with this.
3199 */
3200 else if ((0) && parser->m_characterDataHandler)
3201 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3202 0);
3203 /* END disabled code */
3204 else if (parser->m_defaultHandler)
3205 reportDefault(parser, enc, s, next);
3206 result
3207 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3208 if (result != XML_ERROR_NONE)
3209 return result;
3210 else if (! next) {
3211 parser->m_processor = cdataSectionProcessor;
3212 return result;
3213 }
3214 } break;
3215 case XML_TOK_TRAILING_RSQB:
3216 if (haveMore) {
3217 *nextPtr = s;
3218 return XML_ERROR_NONE;
3219 }
3220 if (parser->m_characterDataHandler) {
3221 if (MUST_CONVERT(enc, s)) {
3222 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3223 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3224 parser->m_characterDataHandler(
3225 parser->m_handlerArg, parser->m_dataBuf,
3226 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3227 } else
3228 parser->m_characterDataHandler(
3229 parser->m_handlerArg, (const XML_Char *)s,
3230 (int)((const XML_Char *)end - (const XML_Char *)s));
3231 } else if (parser->m_defaultHandler)
3232 reportDefault(parser, enc, s, end);
3233 /* We are at the end of the final buffer, should we check for
3234 XML_SUSPENDED, XML_FINISHED?
3235 */
3236 if (startTagLevel == 0) {
3237 *eventPP = end;
3238 return XML_ERROR_NO_ELEMENTS;
3239 }
3240 if (parser->m_tagLevel != startTagLevel) {
3241 *eventPP = end;
3242 return XML_ERROR_ASYNC_ENTITY;
3243 }
3244 *nextPtr = end;
3245 return XML_ERROR_NONE;
3246 case XML_TOK_DATA_CHARS: {
3247 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3248 if (charDataHandler) {
3249 if (MUST_CONVERT(enc, s)) {
3250 for (;;) {
3251 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3252 const enum XML_Convert_Result convert_res = XmlConvert(
3253 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3254 *eventEndPP = s;
3255 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3256 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3257 if ((convert_res == XML_CONVERT_COMPLETED)
3258 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3259 break;
3260 *eventPP = s;
3261 }
3262 } else
3263 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3264 (int)((const XML_Char *)next - (const XML_Char *)s));
3265 } else if (parser->m_defaultHandler)
3266 reportDefault(parser, enc, s, next);
3267 } break;
3268 case XML_TOK_PI:
3269 if (! reportProcessingInstruction(parser, enc, s, next))
3270 return XML_ERROR_NO_MEMORY;
3271 break;
3272 case XML_TOK_COMMENT:
3273 if (! reportComment(parser, enc, s, next))
3274 return XML_ERROR_NO_MEMORY;
3275 break;
3276 default:
3277 /* All of the tokens produced by XmlContentTok() have their own
3278 * explicit cases, so this default is not strictly necessary.
3279 * However it is a useful safety net, so we retain the code and
3280 * simply exclude it from the coverage tests.
3281 *
3282 * LCOV_EXCL_START
3283 */
3284 if (parser->m_defaultHandler)
3285 reportDefault(parser, enc, s, next);
3286 break;
3287 /* LCOV_EXCL_STOP */
3288 }
3289 *eventPP = s = next;
3290 switch (parser->m_parsingStatus.parsing) {
3291 case XML_SUSPENDED:
3292 *nextPtr = next;
3293 return XML_ERROR_NONE;
3294 case XML_FINISHED:
3295 return XML_ERROR_ABORTED;
3296 default:;
3297 }
3298 }
3299 /* not reached */
3300 }
3301
3302 /* This function does not call free() on the allocated memory, merely
3303 * moving it to the parser's m_freeBindingList where it can be freed or
3304 * reused as appropriate.
3305 */
3306 static void
freeBindings(XML_Parser parser,BINDING * bindings)3307 freeBindings(XML_Parser parser, BINDING *bindings) {
3308 while (bindings) {
3309 BINDING *b = bindings;
3310
3311 /* m_startNamespaceDeclHandler will have been called for this
3312 * binding in addBindings(), so call the end handler now.
3313 */
3314 if (parser->m_endNamespaceDeclHandler)
3315 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3316
3317 bindings = bindings->nextTagBinding;
3318 b->nextTagBinding = parser->m_freeBindingList;
3319 parser->m_freeBindingList = b;
3320 b->prefix->binding = b->prevPrefixBinding;
3321 }
3322 }
3323
3324 /* Precondition: all arguments must be non-NULL;
3325 Purpose:
3326 - normalize attributes
3327 - check attributes for well-formedness
3328 - generate namespace aware attribute names (URI, prefix)
3329 - build list of attributes for startElementHandler
3330 - default attributes
3331 - process namespace declarations (check and report them)
3332 - generate namespace aware element name (URI, prefix)
3333 */
3334 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3335 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3336 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3337 enum XML_Account account) {
3338 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3339 ELEMENT_TYPE *elementType;
3340 int nDefaultAtts;
3341 const XML_Char **appAtts; /* the attribute list for the application */
3342 int attIndex = 0;
3343 int prefixLen;
3344 int i;
3345 int n;
3346 XML_Char *uri;
3347 int nPrefixes = 0;
3348 BINDING *binding;
3349 const XML_Char *localPart;
3350
3351 /* lookup the element type name */
3352 elementType
3353 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3354 if (! elementType) {
3355 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3356 if (! name)
3357 return XML_ERROR_NO_MEMORY;
3358 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3359 sizeof(ELEMENT_TYPE));
3360 if (! elementType)
3361 return XML_ERROR_NO_MEMORY;
3362 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3363 return XML_ERROR_NO_MEMORY;
3364 }
3365 nDefaultAtts = elementType->nDefaultAtts;
3366
3367 /* get the attributes from the tokenizer */
3368 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3369
3370 /* Detect and prevent integer overflow */
3371 if (n > INT_MAX - nDefaultAtts) {
3372 return XML_ERROR_NO_MEMORY;
3373 }
3374
3375 if (n + nDefaultAtts > parser->m_attsSize) {
3376 int oldAttsSize = parser->m_attsSize;
3377 ATTRIBUTE *temp;
3378 #ifdef XML_ATTR_INFO
3379 XML_AttrInfo *temp2;
3380 #endif
3381
3382 /* Detect and prevent integer overflow */
3383 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3384 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3385 return XML_ERROR_NO_MEMORY;
3386 }
3387
3388 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3389
3390 /* Detect and prevent integer overflow.
3391 * The preprocessor guard addresses the "always false" warning
3392 * from -Wtype-limits on platforms where
3393 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3394 #if UINT_MAX >= SIZE_MAX
3395 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3396 parser->m_attsSize = oldAttsSize;
3397 return XML_ERROR_NO_MEMORY;
3398 }
3399 #endif
3400
3401 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3402 parser->m_attsSize * sizeof(ATTRIBUTE));
3403 if (temp == NULL) {
3404 parser->m_attsSize = oldAttsSize;
3405 return XML_ERROR_NO_MEMORY;
3406 }
3407 parser->m_atts = temp;
3408 #ifdef XML_ATTR_INFO
3409 /* Detect and prevent integer overflow.
3410 * The preprocessor guard addresses the "always false" warning
3411 * from -Wtype-limits on platforms where
3412 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3413 # if UINT_MAX >= SIZE_MAX
3414 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3415 parser->m_attsSize = oldAttsSize;
3416 return XML_ERROR_NO_MEMORY;
3417 }
3418 # endif
3419
3420 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3421 parser->m_attsSize * sizeof(XML_AttrInfo));
3422 if (temp2 == NULL) {
3423 parser->m_attsSize = oldAttsSize;
3424 return XML_ERROR_NO_MEMORY;
3425 }
3426 parser->m_attInfo = temp2;
3427 #endif
3428 if (n > oldAttsSize)
3429 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3430 }
3431
3432 appAtts = (const XML_Char **)parser->m_atts;
3433 for (i = 0; i < n; i++) {
3434 ATTRIBUTE *currAtt = &parser->m_atts[i];
3435 #ifdef XML_ATTR_INFO
3436 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3437 #endif
3438 /* add the name and value to the attribute list */
3439 ATTRIBUTE_ID *attId
3440 = getAttributeId(parser, enc, currAtt->name,
3441 currAtt->name + XmlNameLength(enc, currAtt->name));
3442 if (! attId)
3443 return XML_ERROR_NO_MEMORY;
3444 #ifdef XML_ATTR_INFO
3445 currAttInfo->nameStart
3446 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3447 currAttInfo->nameEnd
3448 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3449 currAttInfo->valueStart = parser->m_parseEndByteIndex
3450 - (parser->m_parseEndPtr - currAtt->valuePtr);
3451 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3452 - (parser->m_parseEndPtr - currAtt->valueEnd);
3453 #endif
3454 /* Detect duplicate attributes by their QNames. This does not work when
3455 namespace processing is turned on and different prefixes for the same
3456 namespace are used. For this case we have a check further down.
3457 */
3458 if ((attId->name)[-1]) {
3459 if (enc == parser->m_encoding)
3460 parser->m_eventPtr = parser->m_atts[i].name;
3461 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3462 }
3463 (attId->name)[-1] = 1;
3464 appAtts[attIndex++] = attId->name;
3465 if (! parser->m_atts[i].normalized) {
3466 enum XML_Error result;
3467 XML_Bool isCdata = XML_TRUE;
3468
3469 /* figure out whether declared as other than CDATA */
3470 if (attId->maybeTokenized) {
3471 int j;
3472 for (j = 0; j < nDefaultAtts; j++) {
3473 if (attId == elementType->defaultAtts[j].id) {
3474 isCdata = elementType->defaultAtts[j].isCdata;
3475 break;
3476 }
3477 }
3478 }
3479
3480 /* normalize the attribute value */
3481 result = storeAttributeValue(
3482 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3483 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3484 if (result)
3485 return result;
3486 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3487 poolFinish(&parser->m_tempPool);
3488 } else {
3489 /* the value did not need normalizing */
3490 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3491 parser->m_atts[i].valuePtr,
3492 parser->m_atts[i].valueEnd);
3493 if (appAtts[attIndex] == 0)
3494 return XML_ERROR_NO_MEMORY;
3495 poolFinish(&parser->m_tempPool);
3496 }
3497 /* handle prefixed attribute names */
3498 if (attId->prefix) {
3499 if (attId->xmlns) {
3500 /* deal with namespace declarations here */
3501 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3502 appAtts[attIndex], bindingsPtr);
3503 if (result)
3504 return result;
3505 --attIndex;
3506 } else {
3507 /* deal with other prefixed names later */
3508 attIndex++;
3509 nPrefixes++;
3510 (attId->name)[-1] = 2;
3511 }
3512 } else
3513 attIndex++;
3514 }
3515
3516 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3517 parser->m_nSpecifiedAtts = attIndex;
3518 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3519 for (i = 0; i < attIndex; i += 2)
3520 if (appAtts[i] == elementType->idAtt->name) {
3521 parser->m_idAttIndex = i;
3522 break;
3523 }
3524 } else
3525 parser->m_idAttIndex = -1;
3526
3527 /* do attribute defaulting */
3528 for (i = 0; i < nDefaultAtts; i++) {
3529 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3530 if (! (da->id->name)[-1] && da->value) {
3531 if (da->id->prefix) {
3532 if (da->id->xmlns) {
3533 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3534 da->value, bindingsPtr);
3535 if (result)
3536 return result;
3537 } else {
3538 (da->id->name)[-1] = 2;
3539 nPrefixes++;
3540 appAtts[attIndex++] = da->id->name;
3541 appAtts[attIndex++] = da->value;
3542 }
3543 } else {
3544 (da->id->name)[-1] = 1;
3545 appAtts[attIndex++] = da->id->name;
3546 appAtts[attIndex++] = da->value;
3547 }
3548 }
3549 }
3550 appAtts[attIndex] = 0;
3551
3552 /* expand prefixed attribute names, check for duplicates,
3553 and clear flags that say whether attributes were specified */
3554 i = 0;
3555 if (nPrefixes) {
3556 int j; /* hash table index */
3557 unsigned long version = parser->m_nsAttsVersion;
3558
3559 /* Detect and prevent invalid shift */
3560 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3561 return XML_ERROR_NO_MEMORY;
3562 }
3563
3564 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3565 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3566 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3567 if ((nPrefixes << 1)
3568 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3569 NS_ATT *temp;
3570 /* hash table size must also be a power of 2 and >= 8 */
3571 while (nPrefixes >> parser->m_nsAttsPower++)
3572 ;
3573 if (parser->m_nsAttsPower < 3)
3574 parser->m_nsAttsPower = 3;
3575
3576 /* Detect and prevent invalid shift */
3577 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3578 /* Restore actual size of memory in m_nsAtts */
3579 parser->m_nsAttsPower = oldNsAttsPower;
3580 return XML_ERROR_NO_MEMORY;
3581 }
3582
3583 nsAttsSize = 1u << parser->m_nsAttsPower;
3584
3585 /* Detect and prevent integer overflow.
3586 * The preprocessor guard addresses the "always false" warning
3587 * from -Wtype-limits on platforms where
3588 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3589 #if UINT_MAX >= SIZE_MAX
3590 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3591 /* Restore actual size of memory in m_nsAtts */
3592 parser->m_nsAttsPower = oldNsAttsPower;
3593 return XML_ERROR_NO_MEMORY;
3594 }
3595 #endif
3596
3597 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3598 nsAttsSize * sizeof(NS_ATT));
3599 if (! temp) {
3600 /* Restore actual size of memory in m_nsAtts */
3601 parser->m_nsAttsPower = oldNsAttsPower;
3602 return XML_ERROR_NO_MEMORY;
3603 }
3604 parser->m_nsAtts = temp;
3605 version = 0; /* force re-initialization of m_nsAtts hash table */
3606 }
3607 /* using a version flag saves us from initializing m_nsAtts every time */
3608 if (! version) { /* initialize version flags when version wraps around */
3609 version = INIT_ATTS_VERSION;
3610 for (j = nsAttsSize; j != 0;)
3611 parser->m_nsAtts[--j].version = version;
3612 }
3613 parser->m_nsAttsVersion = --version;
3614
3615 /* expand prefixed names and check for duplicates */
3616 for (; i < attIndex; i += 2) {
3617 const XML_Char *s = appAtts[i];
3618 if (s[-1] == 2) { /* prefixed */
3619 ATTRIBUTE_ID *id;
3620 const BINDING *b;
3621 unsigned long uriHash;
3622 struct siphash sip_state;
3623 struct sipkey sip_key;
3624
3625 copy_salt_to_sipkey(parser, &sip_key);
3626 sip24_init(&sip_state, &sip_key);
3627
3628 ((XML_Char *)s)[-1] = 0; /* clear flag */
3629 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3630 if (! id || ! id->prefix) {
3631 /* This code is walking through the appAtts array, dealing
3632 * with (in this case) a prefixed attribute name. To be in
3633 * the array, the attribute must have already been bound, so
3634 * has to have passed through the hash table lookup once
3635 * already. That implies that an entry for it already
3636 * exists, so the lookup above will return a pointer to
3637 * already allocated memory. There is no opportunaity for
3638 * the allocator to fail, so the condition above cannot be
3639 * fulfilled.
3640 *
3641 * Since it is difficult to be certain that the above
3642 * analysis is complete, we retain the test and merely
3643 * remove the code from coverage tests.
3644 */
3645 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3646 }
3647 b = id->prefix->binding;
3648 if (! b)
3649 return XML_ERROR_UNBOUND_PREFIX;
3650
3651 for (j = 0; j < b->uriLen; j++) {
3652 const XML_Char c = b->uri[j];
3653 if (! poolAppendChar(&parser->m_tempPool, c))
3654 return XML_ERROR_NO_MEMORY;
3655 }
3656
3657 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3658
3659 while (*s++ != XML_T(ASCII_COLON))
3660 ;
3661
3662 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3663
3664 do { /* copies null terminator */
3665 if (! poolAppendChar(&parser->m_tempPool, *s))
3666 return XML_ERROR_NO_MEMORY;
3667 } while (*s++);
3668
3669 uriHash = (unsigned long)sip24_final(&sip_state);
3670
3671 { /* Check hash table for duplicate of expanded name (uriName).
3672 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3673 */
3674 unsigned char step = 0;
3675 unsigned long mask = nsAttsSize - 1;
3676 j = uriHash & mask; /* index into hash table */
3677 while (parser->m_nsAtts[j].version == version) {
3678 /* for speed we compare stored hash values first */
3679 if (uriHash == parser->m_nsAtts[j].hash) {
3680 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3681 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3682 /* s1 is null terminated, but not s2 */
3683 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3684 ;
3685 if (*s1 == 0)
3686 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3687 }
3688 if (! step)
3689 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3690 j < step ? (j += nsAttsSize - step) : (j -= step);
3691 }
3692 }
3693
3694 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3695 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3696 s = b->prefix->name;
3697 do {
3698 if (! poolAppendChar(&parser->m_tempPool, *s))
3699 return XML_ERROR_NO_MEMORY;
3700 } while (*s++);
3701 }
3702
3703 /* store expanded name in attribute list */
3704 s = poolStart(&parser->m_tempPool);
3705 poolFinish(&parser->m_tempPool);
3706 appAtts[i] = s;
3707
3708 /* fill empty slot with new version, uriName and hash value */
3709 parser->m_nsAtts[j].version = version;
3710 parser->m_nsAtts[j].hash = uriHash;
3711 parser->m_nsAtts[j].uriName = s;
3712
3713 if (! --nPrefixes) {
3714 i += 2;
3715 break;
3716 }
3717 } else /* not prefixed */
3718 ((XML_Char *)s)[-1] = 0; /* clear flag */
3719 }
3720 }
3721 /* clear flags for the remaining attributes */
3722 for (; i < attIndex; i += 2)
3723 ((XML_Char *)(appAtts[i]))[-1] = 0;
3724 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3725 binding->attId->name[-1] = 0;
3726
3727 if (! parser->m_ns)
3728 return XML_ERROR_NONE;
3729
3730 /* expand the element type name */
3731 if (elementType->prefix) {
3732 binding = elementType->prefix->binding;
3733 if (! binding)
3734 return XML_ERROR_UNBOUND_PREFIX;
3735 localPart = tagNamePtr->str;
3736 while (*localPart++ != XML_T(ASCII_COLON))
3737 ;
3738 } else if (dtd->defaultPrefix.binding) {
3739 binding = dtd->defaultPrefix.binding;
3740 localPart = tagNamePtr->str;
3741 } else
3742 return XML_ERROR_NONE;
3743 prefixLen = 0;
3744 if (parser->m_ns_triplets && binding->prefix->name) {
3745 for (; binding->prefix->name[prefixLen++];)
3746 ; /* prefixLen includes null terminator */
3747 }
3748 tagNamePtr->localPart = localPart;
3749 tagNamePtr->uriLen = binding->uriLen;
3750 tagNamePtr->prefix = binding->prefix->name;
3751 tagNamePtr->prefixLen = prefixLen;
3752 for (i = 0; localPart[i++];)
3753 ; /* i includes null terminator */
3754
3755 /* Detect and prevent integer overflow */
3756 if (binding->uriLen > INT_MAX - prefixLen
3757 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3758 return XML_ERROR_NO_MEMORY;
3759 }
3760
3761 n = i + binding->uriLen + prefixLen;
3762 if (n > binding->uriAlloc) {
3763 TAG *p;
3764
3765 /* Detect and prevent integer overflow */
3766 if (n > INT_MAX - EXPAND_SPARE) {
3767 return XML_ERROR_NO_MEMORY;
3768 }
3769 /* Detect and prevent integer overflow.
3770 * The preprocessor guard addresses the "always false" warning
3771 * from -Wtype-limits on platforms where
3772 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3773 #if UINT_MAX >= SIZE_MAX
3774 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3775 return XML_ERROR_NO_MEMORY;
3776 }
3777 #endif
3778
3779 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3780 if (! uri)
3781 return XML_ERROR_NO_MEMORY;
3782 binding->uriAlloc = n + EXPAND_SPARE;
3783 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3784 for (p = parser->m_tagStack; p; p = p->parent)
3785 if (p->name.str == binding->uri)
3786 p->name.str = uri;
3787 FREE(parser, binding->uri);
3788 binding->uri = uri;
3789 }
3790 /* if m_namespaceSeparator != '\0' then uri includes it already */
3791 uri = binding->uri + binding->uriLen;
3792 memcpy(uri, localPart, i * sizeof(XML_Char));
3793 /* we always have a namespace separator between localPart and prefix */
3794 if (prefixLen) {
3795 uri += i - 1;
3796 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3797 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3798 }
3799 tagNamePtr->str = binding->uri;
3800 return XML_ERROR_NONE;
3801 }
3802
3803 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3804 is_rfc3986_uri_char(XML_Char candidate) {
3805 // For the RFC 3986 ANBF grammar see
3806 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3807
3808 switch (candidate) {
3809 // From rule "ALPHA" (uppercase half)
3810 case 'A':
3811 case 'B':
3812 case 'C':
3813 case 'D':
3814 case 'E':
3815 case 'F':
3816 case 'G':
3817 case 'H':
3818 case 'I':
3819 case 'J':
3820 case 'K':
3821 case 'L':
3822 case 'M':
3823 case 'N':
3824 case 'O':
3825 case 'P':
3826 case 'Q':
3827 case 'R':
3828 case 'S':
3829 case 'T':
3830 case 'U':
3831 case 'V':
3832 case 'W':
3833 case 'X':
3834 case 'Y':
3835 case 'Z':
3836
3837 // From rule "ALPHA" (lowercase half)
3838 case 'a':
3839 case 'b':
3840 case 'c':
3841 case 'd':
3842 case 'e':
3843 case 'f':
3844 case 'g':
3845 case 'h':
3846 case 'i':
3847 case 'j':
3848 case 'k':
3849 case 'l':
3850 case 'm':
3851 case 'n':
3852 case 'o':
3853 case 'p':
3854 case 'q':
3855 case 'r':
3856 case 's':
3857 case 't':
3858 case 'u':
3859 case 'v':
3860 case 'w':
3861 case 'x':
3862 case 'y':
3863 case 'z':
3864
3865 // From rule "DIGIT"
3866 case '0':
3867 case '1':
3868 case '2':
3869 case '3':
3870 case '4':
3871 case '5':
3872 case '6':
3873 case '7':
3874 case '8':
3875 case '9':
3876
3877 // From rule "pct-encoded"
3878 case '%':
3879
3880 // From rule "unreserved"
3881 case '-':
3882 case '.':
3883 case '_':
3884 case '~':
3885
3886 // From rule "gen-delims"
3887 case ':':
3888 case '/':
3889 case '?':
3890 case '#':
3891 case '[':
3892 case ']':
3893 case '@':
3894
3895 // From rule "sub-delims"
3896 case '!':
3897 case '$':
3898 case '&':
3899 case '\'':
3900 case '(':
3901 case ')':
3902 case '*':
3903 case '+':
3904 case ',':
3905 case ';':
3906 case '=':
3907 return XML_TRUE;
3908
3909 default:
3910 return XML_FALSE;
3911 }
3912 }
3913
3914 /* addBinding() overwrites the value of prefix->binding without checking.
3915 Therefore one must keep track of the old value outside of addBinding().
3916 */
3917 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3918 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3919 const XML_Char *uri, BINDING **bindingsPtr) {
3920 // "http://www.w3.org/XML/1998/namespace"
3921 static const XML_Char xmlNamespace[]
3922 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3923 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3924 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3925 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3926 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3927 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3928 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3929 ASCII_e, '\0'};
3930 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3931 // "http://www.w3.org/2000/xmlns/"
3932 static const XML_Char xmlnsNamespace[]
3933 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3934 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3935 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3936 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3937 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3938 static const int xmlnsLen
3939 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3940
3941 XML_Bool mustBeXML = XML_FALSE;
3942 XML_Bool isXML = XML_TRUE;
3943 XML_Bool isXMLNS = XML_TRUE;
3944
3945 BINDING *b;
3946 int len;
3947
3948 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3949 if (*uri == XML_T('\0') && prefix->name)
3950 return XML_ERROR_UNDECLARING_PREFIX;
3951
3952 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3953 && prefix->name[1] == XML_T(ASCII_m)
3954 && prefix->name[2] == XML_T(ASCII_l)) {
3955 /* Not allowed to bind xmlns */
3956 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3957 && prefix->name[5] == XML_T('\0'))
3958 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3959
3960 if (prefix->name[3] == XML_T('\0'))
3961 mustBeXML = XML_TRUE;
3962 }
3963
3964 for (len = 0; uri[len]; len++) {
3965 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3966 isXML = XML_FALSE;
3967
3968 if (! mustBeXML && isXMLNS
3969 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3970 isXMLNS = XML_FALSE;
3971
3972 // NOTE: While Expat does not validate namespace URIs against RFC 3986
3973 // today (and is not REQUIRED to do so with regard to the XML 1.0
3974 // namespaces specification) we have to at least make sure, that
3975 // the application on top of Expat (that is likely splitting expanded
3976 // element names ("qualified names") of form
3977 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3978 // in its element handler code) cannot be confused by an attacker
3979 // putting additional namespace separator characters into namespace
3980 // declarations. That would be ambiguous and not to be expected.
3981 //
3982 // While the HTML API docs of function XML_ParserCreateNS have been
3983 // advising against use of a namespace separator character that can
3984 // appear in a URI for >20 years now, some widespread applications
3985 // are using URI characters (':' (colon) in particular) for a
3986 // namespace separator, in practice. To keep these applications
3987 // functional, we only reject namespaces URIs containing the
3988 // application-chosen namespace separator if the chosen separator
3989 // is a non-URI character with regard to RFC 3986.
3990 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3991 && ! is_rfc3986_uri_char(uri[len])) {
3992 return XML_ERROR_SYNTAX;
3993 }
3994 }
3995 isXML = isXML && len == xmlLen;
3996 isXMLNS = isXMLNS && len == xmlnsLen;
3997
3998 if (mustBeXML != isXML)
3999 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4000 : XML_ERROR_RESERVED_NAMESPACE_URI;
4001
4002 if (isXMLNS)
4003 return XML_ERROR_RESERVED_NAMESPACE_URI;
4004
4005 if (parser->m_namespaceSeparator)
4006 len++;
4007 if (parser->m_freeBindingList) {
4008 b = parser->m_freeBindingList;
4009 if (len > b->uriAlloc) {
4010 /* Detect and prevent integer overflow */
4011 if (len > INT_MAX - EXPAND_SPARE) {
4012 return XML_ERROR_NO_MEMORY;
4013 }
4014
4015 /* Detect and prevent integer overflow.
4016 * The preprocessor guard addresses the "always false" warning
4017 * from -Wtype-limits on platforms where
4018 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4019 #if UINT_MAX >= SIZE_MAX
4020 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4021 return XML_ERROR_NO_MEMORY;
4022 }
4023 #endif
4024
4025 XML_Char *temp = (XML_Char *)REALLOC(
4026 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4027 if (temp == NULL)
4028 return XML_ERROR_NO_MEMORY;
4029 b->uri = temp;
4030 b->uriAlloc = len + EXPAND_SPARE;
4031 }
4032 parser->m_freeBindingList = b->nextTagBinding;
4033 } else {
4034 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4035 if (! b)
4036 return XML_ERROR_NO_MEMORY;
4037
4038 /* Detect and prevent integer overflow */
4039 if (len > INT_MAX - EXPAND_SPARE) {
4040 return XML_ERROR_NO_MEMORY;
4041 }
4042 /* Detect and prevent integer overflow.
4043 * The preprocessor guard addresses the "always false" warning
4044 * from -Wtype-limits on platforms where
4045 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4046 #if UINT_MAX >= SIZE_MAX
4047 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4048 return XML_ERROR_NO_MEMORY;
4049 }
4050 #endif
4051
4052 b->uri
4053 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4054 if (! b->uri) {
4055 FREE(parser, b);
4056 return XML_ERROR_NO_MEMORY;
4057 }
4058 b->uriAlloc = len + EXPAND_SPARE;
4059 }
4060 b->uriLen = len;
4061 memcpy(b->uri, uri, len * sizeof(XML_Char));
4062 if (parser->m_namespaceSeparator)
4063 b->uri[len - 1] = parser->m_namespaceSeparator;
4064 b->prefix = prefix;
4065 b->attId = attId;
4066 b->prevPrefixBinding = prefix->binding;
4067 /* NULL binding when default namespace undeclared */
4068 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4069 prefix->binding = NULL;
4070 else
4071 prefix->binding = b;
4072 b->nextTagBinding = *bindingsPtr;
4073 *bindingsPtr = b;
4074 /* if attId == NULL then we are not starting a namespace scope */
4075 if (attId && parser->m_startNamespaceDeclHandler)
4076 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4077 prefix->binding ? uri : 0);
4078 return XML_ERROR_NONE;
4079 }
4080
4081 /* The idea here is to avoid using stack for each CDATA section when
4082 the whole file is parsed with one call.
4083 */
4084 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4085 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4086 const char **endPtr) {
4087 enum XML_Error result = doCdataSection(
4088 parser, parser->m_encoding, &start, end, endPtr,
4089 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4090 if (result != XML_ERROR_NONE)
4091 return result;
4092 if (start) {
4093 if (parser->m_parentParser) { /* we are parsing an external entity */
4094 parser->m_processor = externalEntityContentProcessor;
4095 return externalEntityContentProcessor(parser, start, end, endPtr);
4096 } else {
4097 parser->m_processor = contentProcessor;
4098 return contentProcessor(parser, start, end, endPtr);
4099 }
4100 }
4101 return result;
4102 }
4103
4104 /* startPtr gets set to non-null if the section is closed, and to null if
4105 the section is not yet closed.
4106 */
4107 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4108 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4109 const char *end, const char **nextPtr, XML_Bool haveMore,
4110 enum XML_Account account) {
4111 const char *s = *startPtr;
4112 const char **eventPP;
4113 const char **eventEndPP;
4114 if (enc == parser->m_encoding) {
4115 eventPP = &parser->m_eventPtr;
4116 *eventPP = s;
4117 eventEndPP = &parser->m_eventEndPtr;
4118 } else {
4119 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4120 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4121 }
4122 *eventPP = s;
4123 *startPtr = NULL;
4124
4125 for (;;) {
4126 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4127 int tok = XmlCdataSectionTok(enc, s, end, &next);
4128 #if XML_GE == 1
4129 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4130 accountingOnAbort(parser);
4131 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4132 }
4133 #else
4134 UNUSED_P(account);
4135 #endif
4136 *eventEndPP = next;
4137 switch (tok) {
4138 case XML_TOK_CDATA_SECT_CLOSE:
4139 if (parser->m_endCdataSectionHandler)
4140 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4141 /* BEGIN disabled code */
4142 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4143 else if ((0) && parser->m_characterDataHandler)
4144 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4145 0);
4146 /* END disabled code */
4147 else if (parser->m_defaultHandler)
4148 reportDefault(parser, enc, s, next);
4149 *startPtr = next;
4150 *nextPtr = next;
4151 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4152 return XML_ERROR_ABORTED;
4153 else
4154 return XML_ERROR_NONE;
4155 case XML_TOK_DATA_NEWLINE:
4156 if (parser->m_characterDataHandler) {
4157 XML_Char c = 0xA;
4158 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4159 } else if (parser->m_defaultHandler)
4160 reportDefault(parser, enc, s, next);
4161 break;
4162 case XML_TOK_DATA_CHARS: {
4163 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4164 if (charDataHandler) {
4165 if (MUST_CONVERT(enc, s)) {
4166 for (;;) {
4167 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4168 const enum XML_Convert_Result convert_res = XmlConvert(
4169 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4170 *eventEndPP = next;
4171 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4172 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4173 if ((convert_res == XML_CONVERT_COMPLETED)
4174 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4175 break;
4176 *eventPP = s;
4177 }
4178 } else
4179 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4180 (int)((const XML_Char *)next - (const XML_Char *)s));
4181 } else if (parser->m_defaultHandler)
4182 reportDefault(parser, enc, s, next);
4183 } break;
4184 case XML_TOK_INVALID:
4185 *eventPP = next;
4186 return XML_ERROR_INVALID_TOKEN;
4187 case XML_TOK_PARTIAL_CHAR:
4188 if (haveMore) {
4189 *nextPtr = s;
4190 return XML_ERROR_NONE;
4191 }
4192 return XML_ERROR_PARTIAL_CHAR;
4193 case XML_TOK_PARTIAL:
4194 case XML_TOK_NONE:
4195 if (haveMore) {
4196 *nextPtr = s;
4197 return XML_ERROR_NONE;
4198 }
4199 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4200 default:
4201 /* Every token returned by XmlCdataSectionTok() has its own
4202 * explicit case, so this default case will never be executed.
4203 * We retain it as a safety net and exclude it from the coverage
4204 * statistics.
4205 *
4206 * LCOV_EXCL_START
4207 */
4208 *eventPP = next;
4209 return XML_ERROR_UNEXPECTED_STATE;
4210 /* LCOV_EXCL_STOP */
4211 }
4212
4213 *eventPP = s = next;
4214 switch (parser->m_parsingStatus.parsing) {
4215 case XML_SUSPENDED:
4216 *nextPtr = next;
4217 return XML_ERROR_NONE;
4218 case XML_FINISHED:
4219 return XML_ERROR_ABORTED;
4220 default:;
4221 }
4222 }
4223 /* not reached */
4224 }
4225
4226 #ifdef XML_DTD
4227
4228 /* The idea here is to avoid using stack for each IGNORE section when
4229 the whole file is parsed with one call.
4230 */
4231 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4232 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4233 const char **endPtr) {
4234 enum XML_Error result
4235 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4236 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4237 if (result != XML_ERROR_NONE)
4238 return result;
4239 if (start) {
4240 parser->m_processor = prologProcessor;
4241 return prologProcessor(parser, start, end, endPtr);
4242 }
4243 return result;
4244 }
4245
4246 /* startPtr gets set to non-null is the section is closed, and to null
4247 if the section is not yet closed.
4248 */
4249 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4250 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4251 const char *end, const char **nextPtr, XML_Bool haveMore) {
4252 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4253 int tok;
4254 const char *s = *startPtr;
4255 const char **eventPP;
4256 const char **eventEndPP;
4257 if (enc == parser->m_encoding) {
4258 eventPP = &parser->m_eventPtr;
4259 *eventPP = s;
4260 eventEndPP = &parser->m_eventEndPtr;
4261 } else {
4262 /* It's not entirely clear, but it seems the following two lines
4263 * of code cannot be executed. The only occasions on which 'enc'
4264 * is not 'encoding' are when this function is called
4265 * from the internal entity processing, and IGNORE sections are an
4266 * error in internal entities.
4267 *
4268 * Since it really isn't clear that this is true, we keep the code
4269 * and just remove it from our coverage tests.
4270 *
4271 * LCOV_EXCL_START
4272 */
4273 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4274 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4275 /* LCOV_EXCL_STOP */
4276 }
4277 *eventPP = s;
4278 *startPtr = NULL;
4279 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4280 # if XML_GE == 1
4281 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4282 XML_ACCOUNT_DIRECT)) {
4283 accountingOnAbort(parser);
4284 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4285 }
4286 # endif
4287 *eventEndPP = next;
4288 switch (tok) {
4289 case XML_TOK_IGNORE_SECT:
4290 if (parser->m_defaultHandler)
4291 reportDefault(parser, enc, s, next);
4292 *startPtr = next;
4293 *nextPtr = next;
4294 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4295 return XML_ERROR_ABORTED;
4296 else
4297 return XML_ERROR_NONE;
4298 case XML_TOK_INVALID:
4299 *eventPP = next;
4300 return XML_ERROR_INVALID_TOKEN;
4301 case XML_TOK_PARTIAL_CHAR:
4302 if (haveMore) {
4303 *nextPtr = s;
4304 return XML_ERROR_NONE;
4305 }
4306 return XML_ERROR_PARTIAL_CHAR;
4307 case XML_TOK_PARTIAL:
4308 case XML_TOK_NONE:
4309 if (haveMore) {
4310 *nextPtr = s;
4311 return XML_ERROR_NONE;
4312 }
4313 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4314 default:
4315 /* All of the tokens that XmlIgnoreSectionTok() returns have
4316 * explicit cases to handle them, so this default case is never
4317 * executed. We keep it as a safety net anyway, and remove it
4318 * from our test coverage statistics.
4319 *
4320 * LCOV_EXCL_START
4321 */
4322 *eventPP = next;
4323 return XML_ERROR_UNEXPECTED_STATE;
4324 /* LCOV_EXCL_STOP */
4325 }
4326 /* not reached */
4327 }
4328
4329 #endif /* XML_DTD */
4330
4331 static enum XML_Error
initializeEncoding(XML_Parser parser)4332 initializeEncoding(XML_Parser parser) {
4333 const char *s;
4334 #ifdef XML_UNICODE
4335 char encodingBuf[128];
4336 /* See comments about `protocolEncodingName` in parserInit() */
4337 if (! parser->m_protocolEncodingName)
4338 s = NULL;
4339 else {
4340 int i;
4341 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4342 if (i == sizeof(encodingBuf) - 1
4343 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4344 encodingBuf[0] = '\0';
4345 break;
4346 }
4347 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4348 }
4349 encodingBuf[i] = '\0';
4350 s = encodingBuf;
4351 }
4352 #else
4353 s = parser->m_protocolEncodingName;
4354 #endif
4355 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4356 &parser->m_initEncoding, &parser->m_encoding, s))
4357 return XML_ERROR_NONE;
4358 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4359 }
4360
4361 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4362 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4363 const char *next) {
4364 const char *encodingName = NULL;
4365 const XML_Char *storedEncName = NULL;
4366 const ENCODING *newEncoding = NULL;
4367 const char *version = NULL;
4368 const char *versionend = NULL;
4369 const XML_Char *storedversion = NULL;
4370 int standalone = -1;
4371
4372 #if XML_GE == 1
4373 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4374 XML_ACCOUNT_DIRECT)) {
4375 accountingOnAbort(parser);
4376 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4377 }
4378 #endif
4379
4380 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4381 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4382 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4383 if (isGeneralTextEntity)
4384 return XML_ERROR_TEXT_DECL;
4385 else
4386 return XML_ERROR_XML_DECL;
4387 }
4388 if (! isGeneralTextEntity && standalone == 1) {
4389 parser->m_dtd->standalone = XML_TRUE;
4390 #ifdef XML_DTD
4391 if (parser->m_paramEntityParsing
4392 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4393 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4394 #endif /* XML_DTD */
4395 }
4396 if (parser->m_xmlDeclHandler) {
4397 if (encodingName != NULL) {
4398 storedEncName = poolStoreString(
4399 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4400 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4401 if (! storedEncName)
4402 return XML_ERROR_NO_MEMORY;
4403 poolFinish(&parser->m_temp2Pool);
4404 }
4405 if (version) {
4406 storedversion
4407 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4408 versionend - parser->m_encoding->minBytesPerChar);
4409 if (! storedversion)
4410 return XML_ERROR_NO_MEMORY;
4411 }
4412 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4413 standalone);
4414 } else if (parser->m_defaultHandler)
4415 reportDefault(parser, parser->m_encoding, s, next);
4416 if (parser->m_protocolEncodingName == NULL) {
4417 if (newEncoding) {
4418 /* Check that the specified encoding does not conflict with what
4419 * the parser has already deduced. Do we have the same number
4420 * of bytes in the smallest representation of a character? If
4421 * this is UTF-16, is it the same endianness?
4422 */
4423 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4424 || (newEncoding->minBytesPerChar == 2
4425 && newEncoding != parser->m_encoding)) {
4426 parser->m_eventPtr = encodingName;
4427 return XML_ERROR_INCORRECT_ENCODING;
4428 }
4429 parser->m_encoding = newEncoding;
4430 } else if (encodingName) {
4431 enum XML_Error result;
4432 if (! storedEncName) {
4433 storedEncName = poolStoreString(
4434 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4435 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4436 if (! storedEncName)
4437 return XML_ERROR_NO_MEMORY;
4438 }
4439 result = handleUnknownEncoding(parser, storedEncName);
4440 poolClear(&parser->m_temp2Pool);
4441 if (result == XML_ERROR_UNKNOWN_ENCODING)
4442 parser->m_eventPtr = encodingName;
4443 return result;
4444 }
4445 }
4446
4447 if (storedEncName || storedversion)
4448 poolClear(&parser->m_temp2Pool);
4449
4450 return XML_ERROR_NONE;
4451 }
4452
4453 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4454 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4455 if (parser->m_unknownEncodingHandler) {
4456 XML_Encoding info;
4457 int i;
4458 for (i = 0; i < 256; i++)
4459 info.map[i] = -1;
4460 info.convert = NULL;
4461 info.data = NULL;
4462 info.release = NULL;
4463 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4464 encodingName, &info)) {
4465 ENCODING *enc;
4466 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4467 if (! parser->m_unknownEncodingMem) {
4468 if (info.release)
4469 info.release(info.data);
4470 return XML_ERROR_NO_MEMORY;
4471 }
4472 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4473 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4474 if (enc) {
4475 parser->m_unknownEncodingData = info.data;
4476 parser->m_unknownEncodingRelease = info.release;
4477 parser->m_encoding = enc;
4478 return XML_ERROR_NONE;
4479 }
4480 }
4481 if (info.release != NULL)
4482 info.release(info.data);
4483 }
4484 return XML_ERROR_UNKNOWN_ENCODING;
4485 }
4486
4487 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4488 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4489 const char **nextPtr) {
4490 enum XML_Error result = initializeEncoding(parser);
4491 if (result != XML_ERROR_NONE)
4492 return result;
4493 parser->m_processor = prologProcessor;
4494 return prologProcessor(parser, s, end, nextPtr);
4495 }
4496
4497 #ifdef XML_DTD
4498
4499 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4500 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4501 const char **nextPtr) {
4502 enum XML_Error result = initializeEncoding(parser);
4503 if (result != XML_ERROR_NONE)
4504 return result;
4505
4506 /* we know now that XML_Parse(Buffer) has been called,
4507 so we consider the external parameter entity read */
4508 parser->m_dtd->paramEntityRead = XML_TRUE;
4509
4510 if (parser->m_prologState.inEntityValue) {
4511 parser->m_processor = entityValueInitProcessor;
4512 return entityValueInitProcessor(parser, s, end, nextPtr);
4513 } else {
4514 parser->m_processor = externalParEntProcessor;
4515 return externalParEntProcessor(parser, s, end, nextPtr);
4516 }
4517 }
4518
4519 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4520 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4521 const char **nextPtr) {
4522 int tok;
4523 const char *start = s;
4524 const char *next = start;
4525 parser->m_eventPtr = start;
4526
4527 for (;;) {
4528 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4529 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4530 - storeEntityValue
4531 - processXmlDecl
4532 */
4533 parser->m_eventEndPtr = next;
4534 if (tok <= 0) {
4535 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4536 *nextPtr = s;
4537 return XML_ERROR_NONE;
4538 }
4539 switch (tok) {
4540 case XML_TOK_INVALID:
4541 return XML_ERROR_INVALID_TOKEN;
4542 case XML_TOK_PARTIAL:
4543 return XML_ERROR_UNCLOSED_TOKEN;
4544 case XML_TOK_PARTIAL_CHAR:
4545 return XML_ERROR_PARTIAL_CHAR;
4546 case XML_TOK_NONE: /* start == end */
4547 default:
4548 break;
4549 }
4550 /* found end of entity value - can store it now */
4551 return storeEntityValue(parser, parser->m_encoding, s, end,
4552 XML_ACCOUNT_DIRECT);
4553 } else if (tok == XML_TOK_XML_DECL) {
4554 enum XML_Error result;
4555 result = processXmlDecl(parser, 0, start, next);
4556 if (result != XML_ERROR_NONE)
4557 return result;
4558 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4559 * that to happen, a parameter entity parsing handler must have attempted
4560 * to suspend the parser, which fails and raises an error. The parser can
4561 * be aborted, but can't be suspended.
4562 */
4563 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4564 return XML_ERROR_ABORTED;
4565 *nextPtr = next;
4566 /* stop scanning for text declaration - we found one */
4567 parser->m_processor = entityValueProcessor;
4568 return entityValueProcessor(parser, next, end, nextPtr);
4569 }
4570 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4571 must move s and nextPtr forward to consume the BOM.
4572
4573 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4574 would leave the BOM in the buffer and return. On the next call to this
4575 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4576 is not valid to have multiple BOMs.
4577 */
4578 else if (tok == XML_TOK_BOM) {
4579 # if XML_GE == 1
4580 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4581 XML_ACCOUNT_DIRECT)) {
4582 accountingOnAbort(parser);
4583 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4584 }
4585 # endif
4586
4587 *nextPtr = next;
4588 s = next;
4589 }
4590 /* If we get this token, we have the start of what might be a
4591 normal tag, but not a declaration (i.e. it doesn't begin with
4592 "<!"). In a DTD context, that isn't legal.
4593 */
4594 else if (tok == XML_TOK_INSTANCE_START) {
4595 *nextPtr = next;
4596 return XML_ERROR_SYNTAX;
4597 }
4598 start = next;
4599 parser->m_eventPtr = start;
4600 }
4601 }
4602
4603 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4604 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4605 const char **nextPtr) {
4606 const char *next = s;
4607 int tok;
4608
4609 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4610 if (tok <= 0) {
4611 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4612 *nextPtr = s;
4613 return XML_ERROR_NONE;
4614 }
4615 switch (tok) {
4616 case XML_TOK_INVALID:
4617 return XML_ERROR_INVALID_TOKEN;
4618 case XML_TOK_PARTIAL:
4619 return XML_ERROR_UNCLOSED_TOKEN;
4620 case XML_TOK_PARTIAL_CHAR:
4621 return XML_ERROR_PARTIAL_CHAR;
4622 case XML_TOK_NONE: /* start == end */
4623 default:
4624 break;
4625 }
4626 }
4627 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4628 However, when parsing an external subset, doProlog will not accept a BOM
4629 as valid, and report a syntax error, so we have to skip the BOM, and
4630 account for the BOM bytes.
4631 */
4632 else if (tok == XML_TOK_BOM) {
4633 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4634 XML_ACCOUNT_DIRECT)) {
4635 accountingOnAbort(parser);
4636 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4637 }
4638
4639 s = next;
4640 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4641 }
4642
4643 parser->m_processor = prologProcessor;
4644 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4645 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4646 XML_ACCOUNT_DIRECT);
4647 }
4648
4649 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4650 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4651 const char **nextPtr) {
4652 const char *start = s;
4653 const char *next = s;
4654 const ENCODING *enc = parser->m_encoding;
4655 int tok;
4656
4657 for (;;) {
4658 tok = XmlPrologTok(enc, start, end, &next);
4659 /* Note: These bytes are accounted later in:
4660 - storeEntityValue
4661 */
4662 if (tok <= 0) {
4663 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4664 *nextPtr = s;
4665 return XML_ERROR_NONE;
4666 }
4667 switch (tok) {
4668 case XML_TOK_INVALID:
4669 return XML_ERROR_INVALID_TOKEN;
4670 case XML_TOK_PARTIAL:
4671 return XML_ERROR_UNCLOSED_TOKEN;
4672 case XML_TOK_PARTIAL_CHAR:
4673 return XML_ERROR_PARTIAL_CHAR;
4674 case XML_TOK_NONE: /* start == end */
4675 default:
4676 break;
4677 }
4678 /* found end of entity value - can store it now */
4679 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4680 }
4681 start = next;
4682 }
4683 }
4684
4685 #endif /* XML_DTD */
4686
4687 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4688 prologProcessor(XML_Parser parser, const char *s, const char *end,
4689 const char **nextPtr) {
4690 const char *next = s;
4691 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4692 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4693 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4694 XML_ACCOUNT_DIRECT);
4695 }
4696
4697 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4698 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4699 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4700 XML_Bool allowClosingDoctype, enum XML_Account account) {
4701 #ifdef XML_DTD
4702 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4703 #endif /* XML_DTD */
4704 static const XML_Char atypeCDATA[]
4705 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4706 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4707 static const XML_Char atypeIDREF[]
4708 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4709 static const XML_Char atypeIDREFS[]
4710 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4711 static const XML_Char atypeENTITY[]
4712 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4713 static const XML_Char atypeENTITIES[]
4714 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4715 ASCII_I, ASCII_E, ASCII_S, '\0'};
4716 static const XML_Char atypeNMTOKEN[]
4717 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4718 static const XML_Char atypeNMTOKENS[]
4719 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4720 ASCII_E, ASCII_N, ASCII_S, '\0'};
4721 static const XML_Char notationPrefix[]
4722 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4723 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4724 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4725 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4726
4727 #ifndef XML_DTD
4728 UNUSED_P(account);
4729 #endif
4730
4731 /* save one level of indirection */
4732 DTD *const dtd = parser->m_dtd;
4733
4734 const char **eventPP;
4735 const char **eventEndPP;
4736 enum XML_Content_Quant quant;
4737
4738 if (enc == parser->m_encoding) {
4739 eventPP = &parser->m_eventPtr;
4740 eventEndPP = &parser->m_eventEndPtr;
4741 } else {
4742 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4743 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4744 }
4745
4746 for (;;) {
4747 int role;
4748 XML_Bool handleDefault = XML_TRUE;
4749 *eventPP = s;
4750 *eventEndPP = next;
4751 if (tok <= 0) {
4752 if (haveMore && tok != XML_TOK_INVALID) {
4753 *nextPtr = s;
4754 return XML_ERROR_NONE;
4755 }
4756 switch (tok) {
4757 case XML_TOK_INVALID:
4758 *eventPP = next;
4759 return XML_ERROR_INVALID_TOKEN;
4760 case XML_TOK_PARTIAL:
4761 return XML_ERROR_UNCLOSED_TOKEN;
4762 case XML_TOK_PARTIAL_CHAR:
4763 return XML_ERROR_PARTIAL_CHAR;
4764 case -XML_TOK_PROLOG_S:
4765 tok = -tok;
4766 break;
4767 case XML_TOK_NONE:
4768 #ifdef XML_DTD
4769 /* for internal PE NOT referenced between declarations */
4770 if (enc != parser->m_encoding
4771 && ! parser->m_openInternalEntities->betweenDecl) {
4772 *nextPtr = s;
4773 return XML_ERROR_NONE;
4774 }
4775 /* WFC: PE Between Declarations - must check that PE contains
4776 complete markup, not only for external PEs, but also for
4777 internal PEs if the reference occurs between declarations.
4778 */
4779 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4780 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4781 == XML_ROLE_ERROR)
4782 return XML_ERROR_INCOMPLETE_PE;
4783 *nextPtr = s;
4784 return XML_ERROR_NONE;
4785 }
4786 #endif /* XML_DTD */
4787 return XML_ERROR_NO_ELEMENTS;
4788 default:
4789 tok = -tok;
4790 next = end;
4791 break;
4792 }
4793 }
4794 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4795 #if XML_GE == 1
4796 switch (role) {
4797 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4798 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4799 # ifdef XML_DTD
4800 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4801 # endif
4802 break;
4803 default:
4804 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4805 accountingOnAbort(parser);
4806 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4807 }
4808 }
4809 #endif
4810 switch (role) {
4811 case XML_ROLE_XML_DECL: {
4812 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4813 if (result != XML_ERROR_NONE)
4814 return result;
4815 enc = parser->m_encoding;
4816 handleDefault = XML_FALSE;
4817 } break;
4818 case XML_ROLE_DOCTYPE_NAME:
4819 if (parser->m_startDoctypeDeclHandler) {
4820 parser->m_doctypeName
4821 = poolStoreString(&parser->m_tempPool, enc, s, next);
4822 if (! parser->m_doctypeName)
4823 return XML_ERROR_NO_MEMORY;
4824 poolFinish(&parser->m_tempPool);
4825 parser->m_doctypePubid = NULL;
4826 handleDefault = XML_FALSE;
4827 }
4828 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4829 break;
4830 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4831 if (parser->m_startDoctypeDeclHandler) {
4832 parser->m_startDoctypeDeclHandler(
4833 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4834 parser->m_doctypePubid, 1);
4835 parser->m_doctypeName = NULL;
4836 poolClear(&parser->m_tempPool);
4837 handleDefault = XML_FALSE;
4838 }
4839 break;
4840 #ifdef XML_DTD
4841 case XML_ROLE_TEXT_DECL: {
4842 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4843 if (result != XML_ERROR_NONE)
4844 return result;
4845 enc = parser->m_encoding;
4846 handleDefault = XML_FALSE;
4847 } break;
4848 #endif /* XML_DTD */
4849 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4850 #ifdef XML_DTD
4851 parser->m_useForeignDTD = XML_FALSE;
4852 parser->m_declEntity = (ENTITY *)lookup(
4853 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4854 if (! parser->m_declEntity)
4855 return XML_ERROR_NO_MEMORY;
4856 #endif /* XML_DTD */
4857 dtd->hasParamEntityRefs = XML_TRUE;
4858 if (parser->m_startDoctypeDeclHandler) {
4859 XML_Char *pubId;
4860 if (! XmlIsPublicId(enc, s, next, eventPP))
4861 return XML_ERROR_PUBLICID;
4862 pubId = poolStoreString(&parser->m_tempPool, enc,
4863 s + enc->minBytesPerChar,
4864 next - enc->minBytesPerChar);
4865 if (! pubId)
4866 return XML_ERROR_NO_MEMORY;
4867 normalizePublicId(pubId);
4868 poolFinish(&parser->m_tempPool);
4869 parser->m_doctypePubid = pubId;
4870 handleDefault = XML_FALSE;
4871 goto alreadyChecked;
4872 }
4873 /* fall through */
4874 case XML_ROLE_ENTITY_PUBLIC_ID:
4875 if (! XmlIsPublicId(enc, s, next, eventPP))
4876 return XML_ERROR_PUBLICID;
4877 alreadyChecked:
4878 if (dtd->keepProcessing && parser->m_declEntity) {
4879 XML_Char *tem
4880 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4881 next - enc->minBytesPerChar);
4882 if (! tem)
4883 return XML_ERROR_NO_MEMORY;
4884 normalizePublicId(tem);
4885 parser->m_declEntity->publicId = tem;
4886 poolFinish(&dtd->pool);
4887 /* Don't suppress the default handler if we fell through from
4888 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4889 */
4890 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4891 handleDefault = XML_FALSE;
4892 }
4893 break;
4894 case XML_ROLE_DOCTYPE_CLOSE:
4895 if (allowClosingDoctype != XML_TRUE) {
4896 /* Must not close doctype from within expanded parameter entities */
4897 return XML_ERROR_INVALID_TOKEN;
4898 }
4899
4900 if (parser->m_doctypeName) {
4901 parser->m_startDoctypeDeclHandler(
4902 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4903 parser->m_doctypePubid, 0);
4904 poolClear(&parser->m_tempPool);
4905 handleDefault = XML_FALSE;
4906 }
4907 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4908 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4909 was not set, indicating an external subset
4910 */
4911 #ifdef XML_DTD
4912 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4913 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4914 dtd->hasParamEntityRefs = XML_TRUE;
4915 if (parser->m_paramEntityParsing
4916 && parser->m_externalEntityRefHandler) {
4917 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4918 externalSubsetName, sizeof(ENTITY));
4919 if (! entity) {
4920 /* The external subset name "#" will have already been
4921 * inserted into the hash table at the start of the
4922 * external entity parsing, so no allocation will happen
4923 * and lookup() cannot fail.
4924 */
4925 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4926 }
4927 if (parser->m_useForeignDTD)
4928 entity->base = parser->m_curBase;
4929 dtd->paramEntityRead = XML_FALSE;
4930 if (! parser->m_externalEntityRefHandler(
4931 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4932 entity->systemId, entity->publicId))
4933 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4934 if (dtd->paramEntityRead) {
4935 if (! dtd->standalone && parser->m_notStandaloneHandler
4936 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4937 return XML_ERROR_NOT_STANDALONE;
4938 }
4939 /* if we didn't read the foreign DTD then this means that there
4940 is no external subset and we must reset dtd->hasParamEntityRefs
4941 */
4942 else if (! parser->m_doctypeSysid)
4943 dtd->hasParamEntityRefs = hadParamEntityRefs;
4944 /* end of DTD - no need to update dtd->keepProcessing */
4945 }
4946 parser->m_useForeignDTD = XML_FALSE;
4947 }
4948 #endif /* XML_DTD */
4949 if (parser->m_endDoctypeDeclHandler) {
4950 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4951 handleDefault = XML_FALSE;
4952 }
4953 break;
4954 case XML_ROLE_INSTANCE_START:
4955 #ifdef XML_DTD
4956 /* if there is no DOCTYPE declaration then now is the
4957 last chance to read the foreign DTD
4958 */
4959 if (parser->m_useForeignDTD) {
4960 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4961 dtd->hasParamEntityRefs = XML_TRUE;
4962 if (parser->m_paramEntityParsing
4963 && parser->m_externalEntityRefHandler) {
4964 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4965 externalSubsetName, sizeof(ENTITY));
4966 if (! entity)
4967 return XML_ERROR_NO_MEMORY;
4968 entity->base = parser->m_curBase;
4969 dtd->paramEntityRead = XML_FALSE;
4970 if (! parser->m_externalEntityRefHandler(
4971 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4972 entity->systemId, entity->publicId))
4973 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4974 if (dtd->paramEntityRead) {
4975 if (! dtd->standalone && parser->m_notStandaloneHandler
4976 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4977 return XML_ERROR_NOT_STANDALONE;
4978 }
4979 /* if we didn't read the foreign DTD then this means that there
4980 is no external subset and we must reset dtd->hasParamEntityRefs
4981 */
4982 else
4983 dtd->hasParamEntityRefs = hadParamEntityRefs;
4984 /* end of DTD - no need to update dtd->keepProcessing */
4985 }
4986 }
4987 #endif /* XML_DTD */
4988 parser->m_processor = contentProcessor;
4989 return contentProcessor(parser, s, end, nextPtr);
4990 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4991 parser->m_declElementType = getElementType(parser, enc, s, next);
4992 if (! parser->m_declElementType)
4993 return XML_ERROR_NO_MEMORY;
4994 goto checkAttListDeclHandler;
4995 case XML_ROLE_ATTRIBUTE_NAME:
4996 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4997 if (! parser->m_declAttributeId)
4998 return XML_ERROR_NO_MEMORY;
4999 parser->m_declAttributeIsCdata = XML_FALSE;
5000 parser->m_declAttributeType = NULL;
5001 parser->m_declAttributeIsId = XML_FALSE;
5002 goto checkAttListDeclHandler;
5003 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5004 parser->m_declAttributeIsCdata = XML_TRUE;
5005 parser->m_declAttributeType = atypeCDATA;
5006 goto checkAttListDeclHandler;
5007 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5008 parser->m_declAttributeIsId = XML_TRUE;
5009 parser->m_declAttributeType = atypeID;
5010 goto checkAttListDeclHandler;
5011 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5012 parser->m_declAttributeType = atypeIDREF;
5013 goto checkAttListDeclHandler;
5014 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5015 parser->m_declAttributeType = atypeIDREFS;
5016 goto checkAttListDeclHandler;
5017 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5018 parser->m_declAttributeType = atypeENTITY;
5019 goto checkAttListDeclHandler;
5020 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5021 parser->m_declAttributeType = atypeENTITIES;
5022 goto checkAttListDeclHandler;
5023 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5024 parser->m_declAttributeType = atypeNMTOKEN;
5025 goto checkAttListDeclHandler;
5026 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5027 parser->m_declAttributeType = atypeNMTOKENS;
5028 checkAttListDeclHandler:
5029 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5030 handleDefault = XML_FALSE;
5031 break;
5032 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5033 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5034 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5035 const XML_Char *prefix;
5036 if (parser->m_declAttributeType) {
5037 prefix = enumValueSep;
5038 } else {
5039 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5040 : enumValueStart);
5041 }
5042 if (! poolAppendString(&parser->m_tempPool, prefix))
5043 return XML_ERROR_NO_MEMORY;
5044 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5045 return XML_ERROR_NO_MEMORY;
5046 parser->m_declAttributeType = parser->m_tempPool.start;
5047 handleDefault = XML_FALSE;
5048 }
5049 break;
5050 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5051 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5052 if (dtd->keepProcessing) {
5053 if (! defineAttribute(parser->m_declElementType,
5054 parser->m_declAttributeId,
5055 parser->m_declAttributeIsCdata,
5056 parser->m_declAttributeIsId, 0, parser))
5057 return XML_ERROR_NO_MEMORY;
5058 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5059 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5060 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5061 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5062 /* Enumerated or Notation type */
5063 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5064 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5065 return XML_ERROR_NO_MEMORY;
5066 parser->m_declAttributeType = parser->m_tempPool.start;
5067 poolFinish(&parser->m_tempPool);
5068 }
5069 *eventEndPP = s;
5070 parser->m_attlistDeclHandler(
5071 parser->m_handlerArg, parser->m_declElementType->name,
5072 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5073 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5074 handleDefault = XML_FALSE;
5075 }
5076 }
5077 poolClear(&parser->m_tempPool);
5078 break;
5079 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5080 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5081 if (dtd->keepProcessing) {
5082 const XML_Char *attVal;
5083 enum XML_Error result = storeAttributeValue(
5084 parser, enc, parser->m_declAttributeIsCdata,
5085 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5086 XML_ACCOUNT_NONE);
5087 if (result)
5088 return result;
5089 attVal = poolStart(&dtd->pool);
5090 poolFinish(&dtd->pool);
5091 /* ID attributes aren't allowed to have a default */
5092 if (! defineAttribute(
5093 parser->m_declElementType, parser->m_declAttributeId,
5094 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5095 return XML_ERROR_NO_MEMORY;
5096 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5097 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5098 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5099 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5100 /* Enumerated or Notation type */
5101 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5102 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5103 return XML_ERROR_NO_MEMORY;
5104 parser->m_declAttributeType = parser->m_tempPool.start;
5105 poolFinish(&parser->m_tempPool);
5106 }
5107 *eventEndPP = s;
5108 parser->m_attlistDeclHandler(
5109 parser->m_handlerArg, parser->m_declElementType->name,
5110 parser->m_declAttributeId->name, parser->m_declAttributeType,
5111 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5112 poolClear(&parser->m_tempPool);
5113 handleDefault = XML_FALSE;
5114 }
5115 }
5116 break;
5117 case XML_ROLE_ENTITY_VALUE:
5118 if (dtd->keepProcessing) {
5119 #if XML_GE == 1
5120 // This will store the given replacement text in
5121 // parser->m_declEntity->textPtr.
5122 enum XML_Error result
5123 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5124 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5125 if (parser->m_declEntity) {
5126 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5127 parser->m_declEntity->textLen
5128 = (int)(poolLength(&dtd->entityValuePool));
5129 poolFinish(&dtd->entityValuePool);
5130 if (parser->m_entityDeclHandler) {
5131 *eventEndPP = s;
5132 parser->m_entityDeclHandler(
5133 parser->m_handlerArg, parser->m_declEntity->name,
5134 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5135 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5136 handleDefault = XML_FALSE;
5137 }
5138 } else
5139 poolDiscard(&dtd->entityValuePool);
5140 if (result != XML_ERROR_NONE)
5141 return result;
5142 #else
5143 // This will store "&entity123;" in parser->m_declEntity->textPtr
5144 // to end up as "&entity123;" in the handler.
5145 if (parser->m_declEntity != NULL) {
5146 const enum XML_Error result
5147 = storeSelfEntityValue(parser, parser->m_declEntity);
5148 if (result != XML_ERROR_NONE)
5149 return result;
5150
5151 if (parser->m_entityDeclHandler) {
5152 *eventEndPP = s;
5153 parser->m_entityDeclHandler(
5154 parser->m_handlerArg, parser->m_declEntity->name,
5155 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5156 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5157 handleDefault = XML_FALSE;
5158 }
5159 }
5160 #endif
5161 }
5162 break;
5163 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5164 #ifdef XML_DTD
5165 parser->m_useForeignDTD = XML_FALSE;
5166 #endif /* XML_DTD */
5167 dtd->hasParamEntityRefs = XML_TRUE;
5168 if (parser->m_startDoctypeDeclHandler) {
5169 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5170 s + enc->minBytesPerChar,
5171 next - enc->minBytesPerChar);
5172 if (parser->m_doctypeSysid == NULL)
5173 return XML_ERROR_NO_MEMORY;
5174 poolFinish(&parser->m_tempPool);
5175 handleDefault = XML_FALSE;
5176 }
5177 #ifdef XML_DTD
5178 else
5179 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5180 for the case where no parser->m_startDoctypeDeclHandler is set */
5181 parser->m_doctypeSysid = externalSubsetName;
5182 #endif /* XML_DTD */
5183 if (! dtd->standalone
5184 #ifdef XML_DTD
5185 && ! parser->m_paramEntityParsing
5186 #endif /* XML_DTD */
5187 && parser->m_notStandaloneHandler
5188 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5189 return XML_ERROR_NOT_STANDALONE;
5190 #ifndef XML_DTD
5191 break;
5192 #else /* XML_DTD */
5193 if (! parser->m_declEntity) {
5194 parser->m_declEntity = (ENTITY *)lookup(
5195 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5196 if (! parser->m_declEntity)
5197 return XML_ERROR_NO_MEMORY;
5198 parser->m_declEntity->publicId = NULL;
5199 }
5200 #endif /* XML_DTD */
5201 /* fall through */
5202 case XML_ROLE_ENTITY_SYSTEM_ID:
5203 if (dtd->keepProcessing && parser->m_declEntity) {
5204 parser->m_declEntity->systemId
5205 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5206 next - enc->minBytesPerChar);
5207 if (! parser->m_declEntity->systemId)
5208 return XML_ERROR_NO_MEMORY;
5209 parser->m_declEntity->base = parser->m_curBase;
5210 poolFinish(&dtd->pool);
5211 /* Don't suppress the default handler if we fell through from
5212 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5213 */
5214 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5215 handleDefault = XML_FALSE;
5216 }
5217 break;
5218 case XML_ROLE_ENTITY_COMPLETE:
5219 #if XML_GE == 0
5220 // This will store "&entity123;" in entity->textPtr
5221 // to end up as "&entity123;" in the handler.
5222 if (parser->m_declEntity != NULL) {
5223 const enum XML_Error result
5224 = storeSelfEntityValue(parser, parser->m_declEntity);
5225 if (result != XML_ERROR_NONE)
5226 return result;
5227 }
5228 #endif
5229 if (dtd->keepProcessing && parser->m_declEntity
5230 && parser->m_entityDeclHandler) {
5231 *eventEndPP = s;
5232 parser->m_entityDeclHandler(
5233 parser->m_handlerArg, parser->m_declEntity->name,
5234 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5235 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5236 handleDefault = XML_FALSE;
5237 }
5238 break;
5239 case XML_ROLE_ENTITY_NOTATION_NAME:
5240 if (dtd->keepProcessing && parser->m_declEntity) {
5241 parser->m_declEntity->notation
5242 = poolStoreString(&dtd->pool, enc, s, next);
5243 if (! parser->m_declEntity->notation)
5244 return XML_ERROR_NO_MEMORY;
5245 poolFinish(&dtd->pool);
5246 if (parser->m_unparsedEntityDeclHandler) {
5247 *eventEndPP = s;
5248 parser->m_unparsedEntityDeclHandler(
5249 parser->m_handlerArg, parser->m_declEntity->name,
5250 parser->m_declEntity->base, parser->m_declEntity->systemId,
5251 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5252 handleDefault = XML_FALSE;
5253 } else if (parser->m_entityDeclHandler) {
5254 *eventEndPP = s;
5255 parser->m_entityDeclHandler(
5256 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5257 parser->m_declEntity->base, parser->m_declEntity->systemId,
5258 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5259 handleDefault = XML_FALSE;
5260 }
5261 }
5262 break;
5263 case XML_ROLE_GENERAL_ENTITY_NAME: {
5264 if (XmlPredefinedEntityName(enc, s, next)) {
5265 parser->m_declEntity = NULL;
5266 break;
5267 }
5268 if (dtd->keepProcessing) {
5269 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5270 if (! name)
5271 return XML_ERROR_NO_MEMORY;
5272 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5273 name, sizeof(ENTITY));
5274 if (! parser->m_declEntity)
5275 return XML_ERROR_NO_MEMORY;
5276 if (parser->m_declEntity->name != name) {
5277 poolDiscard(&dtd->pool);
5278 parser->m_declEntity = NULL;
5279 } else {
5280 poolFinish(&dtd->pool);
5281 parser->m_declEntity->publicId = NULL;
5282 parser->m_declEntity->is_param = XML_FALSE;
5283 /* if we have a parent parser or are reading an internal parameter
5284 entity, then the entity declaration is not considered "internal"
5285 */
5286 parser->m_declEntity->is_internal
5287 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5288 if (parser->m_entityDeclHandler)
5289 handleDefault = XML_FALSE;
5290 }
5291 } else {
5292 poolDiscard(&dtd->pool);
5293 parser->m_declEntity = NULL;
5294 }
5295 } break;
5296 case XML_ROLE_PARAM_ENTITY_NAME:
5297 #ifdef XML_DTD
5298 if (dtd->keepProcessing) {
5299 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5300 if (! name)
5301 return XML_ERROR_NO_MEMORY;
5302 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5303 name, sizeof(ENTITY));
5304 if (! parser->m_declEntity)
5305 return XML_ERROR_NO_MEMORY;
5306 if (parser->m_declEntity->name != name) {
5307 poolDiscard(&dtd->pool);
5308 parser->m_declEntity = NULL;
5309 } else {
5310 poolFinish(&dtd->pool);
5311 parser->m_declEntity->publicId = NULL;
5312 parser->m_declEntity->is_param = XML_TRUE;
5313 /* if we have a parent parser or are reading an internal parameter
5314 entity, then the entity declaration is not considered "internal"
5315 */
5316 parser->m_declEntity->is_internal
5317 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5318 if (parser->m_entityDeclHandler)
5319 handleDefault = XML_FALSE;
5320 }
5321 } else {
5322 poolDiscard(&dtd->pool);
5323 parser->m_declEntity = NULL;
5324 }
5325 #else /* not XML_DTD */
5326 parser->m_declEntity = NULL;
5327 #endif /* XML_DTD */
5328 break;
5329 case XML_ROLE_NOTATION_NAME:
5330 parser->m_declNotationPublicId = NULL;
5331 parser->m_declNotationName = NULL;
5332 if (parser->m_notationDeclHandler) {
5333 parser->m_declNotationName
5334 = poolStoreString(&parser->m_tempPool, enc, s, next);
5335 if (! parser->m_declNotationName)
5336 return XML_ERROR_NO_MEMORY;
5337 poolFinish(&parser->m_tempPool);
5338 handleDefault = XML_FALSE;
5339 }
5340 break;
5341 case XML_ROLE_NOTATION_PUBLIC_ID:
5342 if (! XmlIsPublicId(enc, s, next, eventPP))
5343 return XML_ERROR_PUBLICID;
5344 if (parser
5345 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5346 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5347 s + enc->minBytesPerChar,
5348 next - enc->minBytesPerChar);
5349 if (! tem)
5350 return XML_ERROR_NO_MEMORY;
5351 normalizePublicId(tem);
5352 parser->m_declNotationPublicId = tem;
5353 poolFinish(&parser->m_tempPool);
5354 handleDefault = XML_FALSE;
5355 }
5356 break;
5357 case XML_ROLE_NOTATION_SYSTEM_ID:
5358 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5359 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5360 s + enc->minBytesPerChar,
5361 next - enc->minBytesPerChar);
5362 if (! systemId)
5363 return XML_ERROR_NO_MEMORY;
5364 *eventEndPP = s;
5365 parser->m_notationDeclHandler(
5366 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5367 systemId, parser->m_declNotationPublicId);
5368 handleDefault = XML_FALSE;
5369 }
5370 poolClear(&parser->m_tempPool);
5371 break;
5372 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5373 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5374 *eventEndPP = s;
5375 parser->m_notationDeclHandler(
5376 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5377 0, parser->m_declNotationPublicId);
5378 handleDefault = XML_FALSE;
5379 }
5380 poolClear(&parser->m_tempPool);
5381 break;
5382 case XML_ROLE_ERROR:
5383 switch (tok) {
5384 case XML_TOK_PARAM_ENTITY_REF:
5385 /* PE references in internal subset are
5386 not allowed within declarations. */
5387 return XML_ERROR_PARAM_ENTITY_REF;
5388 case XML_TOK_XML_DECL:
5389 return XML_ERROR_MISPLACED_XML_PI;
5390 default:
5391 return XML_ERROR_SYNTAX;
5392 }
5393 #ifdef XML_DTD
5394 case XML_ROLE_IGNORE_SECT: {
5395 enum XML_Error result;
5396 if (parser->m_defaultHandler)
5397 reportDefault(parser, enc, s, next);
5398 handleDefault = XML_FALSE;
5399 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5400 if (result != XML_ERROR_NONE)
5401 return result;
5402 else if (! next) {
5403 parser->m_processor = ignoreSectionProcessor;
5404 return result;
5405 }
5406 } break;
5407 #endif /* XML_DTD */
5408 case XML_ROLE_GROUP_OPEN:
5409 if (parser->m_prologState.level >= parser->m_groupSize) {
5410 if (parser->m_groupSize) {
5411 {
5412 /* Detect and prevent integer overflow */
5413 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5414 return XML_ERROR_NO_MEMORY;
5415 }
5416
5417 char *const new_connector = (char *)REALLOC(
5418 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5419 if (new_connector == NULL) {
5420 parser->m_groupSize /= 2;
5421 return XML_ERROR_NO_MEMORY;
5422 }
5423 parser->m_groupConnector = new_connector;
5424 }
5425
5426 if (dtd->scaffIndex) {
5427 /* Detect and prevent integer overflow.
5428 * The preprocessor guard addresses the "always false" warning
5429 * from -Wtype-limits on platforms where
5430 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5431 #if UINT_MAX >= SIZE_MAX
5432 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5433 return XML_ERROR_NO_MEMORY;
5434 }
5435 #endif
5436
5437 int *const new_scaff_index = (int *)REALLOC(
5438 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5439 if (new_scaff_index == NULL)
5440 return XML_ERROR_NO_MEMORY;
5441 dtd->scaffIndex = new_scaff_index;
5442 }
5443 } else {
5444 parser->m_groupConnector
5445 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5446 if (! parser->m_groupConnector) {
5447 parser->m_groupSize = 0;
5448 return XML_ERROR_NO_MEMORY;
5449 }
5450 }
5451 }
5452 parser->m_groupConnector[parser->m_prologState.level] = 0;
5453 if (dtd->in_eldecl) {
5454 int myindex = nextScaffoldPart(parser);
5455 if (myindex < 0)
5456 return XML_ERROR_NO_MEMORY;
5457 assert(dtd->scaffIndex != NULL);
5458 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5459 dtd->scaffLevel++;
5460 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5461 if (parser->m_elementDeclHandler)
5462 handleDefault = XML_FALSE;
5463 }
5464 break;
5465 case XML_ROLE_GROUP_SEQUENCE:
5466 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5467 return XML_ERROR_SYNTAX;
5468 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5469 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5470 handleDefault = XML_FALSE;
5471 break;
5472 case XML_ROLE_GROUP_CHOICE:
5473 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5474 return XML_ERROR_SYNTAX;
5475 if (dtd->in_eldecl
5476 && ! parser->m_groupConnector[parser->m_prologState.level]
5477 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5478 != XML_CTYPE_MIXED)) {
5479 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5480 = XML_CTYPE_CHOICE;
5481 if (parser->m_elementDeclHandler)
5482 handleDefault = XML_FALSE;
5483 }
5484 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5485 break;
5486 case XML_ROLE_PARAM_ENTITY_REF:
5487 #ifdef XML_DTD
5488 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5489 dtd->hasParamEntityRefs = XML_TRUE;
5490 if (! parser->m_paramEntityParsing)
5491 dtd->keepProcessing = dtd->standalone;
5492 else {
5493 const XML_Char *name;
5494 ENTITY *entity;
5495 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5496 next - enc->minBytesPerChar);
5497 if (! name)
5498 return XML_ERROR_NO_MEMORY;
5499 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5500 poolDiscard(&dtd->pool);
5501 /* first, determine if a check for an existing declaration is needed;
5502 if yes, check that the entity exists, and that it is internal,
5503 otherwise call the skipped entity handler
5504 */
5505 if (parser->m_prologState.documentEntity
5506 && (dtd->standalone ? ! parser->m_openInternalEntities
5507 : ! dtd->hasParamEntityRefs)) {
5508 if (! entity)
5509 return XML_ERROR_UNDEFINED_ENTITY;
5510 else if (! entity->is_internal) {
5511 /* It's hard to exhaustively search the code to be sure,
5512 * but there doesn't seem to be a way of executing the
5513 * following line. There are two cases:
5514 *
5515 * If 'standalone' is false, the DTD must have no
5516 * parameter entities or we wouldn't have passed the outer
5517 * 'if' statement. That means the only entity in the hash
5518 * table is the external subset name "#" which cannot be
5519 * given as a parameter entity name in XML syntax, so the
5520 * lookup must have returned NULL and we don't even reach
5521 * the test for an internal entity.
5522 *
5523 * If 'standalone' is true, it does not seem to be
5524 * possible to create entities taking this code path that
5525 * are not internal entities, so fail the test above.
5526 *
5527 * Because this analysis is very uncertain, the code is
5528 * being left in place and merely removed from the
5529 * coverage test statistics.
5530 */
5531 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5532 }
5533 } else if (! entity) {
5534 dtd->keepProcessing = dtd->standalone;
5535 /* cannot report skipped entities in declarations */
5536 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5537 && parser->m_skippedEntityHandler) {
5538 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5539 handleDefault = XML_FALSE;
5540 }
5541 break;
5542 }
5543 if (entity->open)
5544 return XML_ERROR_RECURSIVE_ENTITY_REF;
5545 if (entity->textPtr) {
5546 enum XML_Error result;
5547 XML_Bool betweenDecl
5548 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5549 result = processInternalEntity(parser, entity, betweenDecl);
5550 if (result != XML_ERROR_NONE)
5551 return result;
5552 handleDefault = XML_FALSE;
5553 break;
5554 }
5555 if (parser->m_externalEntityRefHandler) {
5556 dtd->paramEntityRead = XML_FALSE;
5557 entity->open = XML_TRUE;
5558 entityTrackingOnOpen(parser, entity, __LINE__);
5559 if (! parser->m_externalEntityRefHandler(
5560 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5561 entity->systemId, entity->publicId)) {
5562 entityTrackingOnClose(parser, entity, __LINE__);
5563 entity->open = XML_FALSE;
5564 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5565 }
5566 entityTrackingOnClose(parser, entity, __LINE__);
5567 entity->open = XML_FALSE;
5568 handleDefault = XML_FALSE;
5569 if (! dtd->paramEntityRead) {
5570 dtd->keepProcessing = dtd->standalone;
5571 break;
5572 }
5573 } else {
5574 dtd->keepProcessing = dtd->standalone;
5575 break;
5576 }
5577 }
5578 #endif /* XML_DTD */
5579 if (! dtd->standalone && parser->m_notStandaloneHandler
5580 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5581 return XML_ERROR_NOT_STANDALONE;
5582 break;
5583
5584 /* Element declaration stuff */
5585
5586 case XML_ROLE_ELEMENT_NAME:
5587 if (parser->m_elementDeclHandler) {
5588 parser->m_declElementType = getElementType(parser, enc, s, next);
5589 if (! parser->m_declElementType)
5590 return XML_ERROR_NO_MEMORY;
5591 dtd->scaffLevel = 0;
5592 dtd->scaffCount = 0;
5593 dtd->in_eldecl = XML_TRUE;
5594 handleDefault = XML_FALSE;
5595 }
5596 break;
5597
5598 case XML_ROLE_CONTENT_ANY:
5599 case XML_ROLE_CONTENT_EMPTY:
5600 if (dtd->in_eldecl) {
5601 if (parser->m_elementDeclHandler) {
5602 XML_Content *content
5603 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5604 if (! content)
5605 return XML_ERROR_NO_MEMORY;
5606 content->quant = XML_CQUANT_NONE;
5607 content->name = NULL;
5608 content->numchildren = 0;
5609 content->children = NULL;
5610 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5611 : XML_CTYPE_EMPTY);
5612 *eventEndPP = s;
5613 parser->m_elementDeclHandler(
5614 parser->m_handlerArg, parser->m_declElementType->name, content);
5615 handleDefault = XML_FALSE;
5616 }
5617 dtd->in_eldecl = XML_FALSE;
5618 }
5619 break;
5620
5621 case XML_ROLE_CONTENT_PCDATA:
5622 if (dtd->in_eldecl) {
5623 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5624 = XML_CTYPE_MIXED;
5625 if (parser->m_elementDeclHandler)
5626 handleDefault = XML_FALSE;
5627 }
5628 break;
5629
5630 case XML_ROLE_CONTENT_ELEMENT:
5631 quant = XML_CQUANT_NONE;
5632 goto elementContent;
5633 case XML_ROLE_CONTENT_ELEMENT_OPT:
5634 quant = XML_CQUANT_OPT;
5635 goto elementContent;
5636 case XML_ROLE_CONTENT_ELEMENT_REP:
5637 quant = XML_CQUANT_REP;
5638 goto elementContent;
5639 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5640 quant = XML_CQUANT_PLUS;
5641 elementContent:
5642 if (dtd->in_eldecl) {
5643 ELEMENT_TYPE *el;
5644 const XML_Char *name;
5645 size_t nameLen;
5646 const char *nxt
5647 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5648 int myindex = nextScaffoldPart(parser);
5649 if (myindex < 0)
5650 return XML_ERROR_NO_MEMORY;
5651 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5652 dtd->scaffold[myindex].quant = quant;
5653 el = getElementType(parser, enc, s, nxt);
5654 if (! el)
5655 return XML_ERROR_NO_MEMORY;
5656 name = el->name;
5657 dtd->scaffold[myindex].name = name;
5658 nameLen = 0;
5659 for (; name[nameLen++];)
5660 ;
5661
5662 /* Detect and prevent integer overflow */
5663 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5664 return XML_ERROR_NO_MEMORY;
5665 }
5666
5667 dtd->contentStringLen += (unsigned)nameLen;
5668 if (parser->m_elementDeclHandler)
5669 handleDefault = XML_FALSE;
5670 }
5671 break;
5672
5673 case XML_ROLE_GROUP_CLOSE:
5674 quant = XML_CQUANT_NONE;
5675 goto closeGroup;
5676 case XML_ROLE_GROUP_CLOSE_OPT:
5677 quant = XML_CQUANT_OPT;
5678 goto closeGroup;
5679 case XML_ROLE_GROUP_CLOSE_REP:
5680 quant = XML_CQUANT_REP;
5681 goto closeGroup;
5682 case XML_ROLE_GROUP_CLOSE_PLUS:
5683 quant = XML_CQUANT_PLUS;
5684 closeGroup:
5685 if (dtd->in_eldecl) {
5686 if (parser->m_elementDeclHandler)
5687 handleDefault = XML_FALSE;
5688 dtd->scaffLevel--;
5689 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5690 if (dtd->scaffLevel == 0) {
5691 if (! handleDefault) {
5692 XML_Content *model = build_model(parser);
5693 if (! model)
5694 return XML_ERROR_NO_MEMORY;
5695 *eventEndPP = s;
5696 parser->m_elementDeclHandler(
5697 parser->m_handlerArg, parser->m_declElementType->name, model);
5698 }
5699 dtd->in_eldecl = XML_FALSE;
5700 dtd->contentStringLen = 0;
5701 }
5702 }
5703 break;
5704 /* End element declaration stuff */
5705
5706 case XML_ROLE_PI:
5707 if (! reportProcessingInstruction(parser, enc, s, next))
5708 return XML_ERROR_NO_MEMORY;
5709 handleDefault = XML_FALSE;
5710 break;
5711 case XML_ROLE_COMMENT:
5712 if (! reportComment(parser, enc, s, next))
5713 return XML_ERROR_NO_MEMORY;
5714 handleDefault = XML_FALSE;
5715 break;
5716 case XML_ROLE_NONE:
5717 switch (tok) {
5718 case XML_TOK_BOM:
5719 handleDefault = XML_FALSE;
5720 break;
5721 }
5722 break;
5723 case XML_ROLE_DOCTYPE_NONE:
5724 if (parser->m_startDoctypeDeclHandler)
5725 handleDefault = XML_FALSE;
5726 break;
5727 case XML_ROLE_ENTITY_NONE:
5728 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5729 handleDefault = XML_FALSE;
5730 break;
5731 case XML_ROLE_NOTATION_NONE:
5732 if (parser->m_notationDeclHandler)
5733 handleDefault = XML_FALSE;
5734 break;
5735 case XML_ROLE_ATTLIST_NONE:
5736 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5737 handleDefault = XML_FALSE;
5738 break;
5739 case XML_ROLE_ELEMENT_NONE:
5740 if (parser->m_elementDeclHandler)
5741 handleDefault = XML_FALSE;
5742 break;
5743 } /* end of big switch */
5744
5745 if (handleDefault && parser->m_defaultHandler)
5746 reportDefault(parser, enc, s, next);
5747
5748 switch (parser->m_parsingStatus.parsing) {
5749 case XML_SUSPENDED:
5750 *nextPtr = next;
5751 return XML_ERROR_NONE;
5752 case XML_FINISHED:
5753 return XML_ERROR_ABORTED;
5754 default:
5755 s = next;
5756 tok = XmlPrologTok(enc, s, end, &next);
5757 }
5758 }
5759 /* not reached */
5760 }
5761
5762 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5763 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5764 const char **nextPtr) {
5765 parser->m_processor = epilogProcessor;
5766 parser->m_eventPtr = s;
5767 for (;;) {
5768 const char *next = NULL;
5769 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5770 #if XML_GE == 1
5771 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5772 XML_ACCOUNT_DIRECT)) {
5773 accountingOnAbort(parser);
5774 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5775 }
5776 #endif
5777 parser->m_eventEndPtr = next;
5778 switch (tok) {
5779 /* report partial linebreak - it might be the last token */
5780 case -XML_TOK_PROLOG_S:
5781 if (parser->m_defaultHandler) {
5782 reportDefault(parser, parser->m_encoding, s, next);
5783 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5784 return XML_ERROR_ABORTED;
5785 }
5786 *nextPtr = next;
5787 return XML_ERROR_NONE;
5788 case XML_TOK_NONE:
5789 *nextPtr = s;
5790 return XML_ERROR_NONE;
5791 case XML_TOK_PROLOG_S:
5792 if (parser->m_defaultHandler)
5793 reportDefault(parser, parser->m_encoding, s, next);
5794 break;
5795 case XML_TOK_PI:
5796 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5797 return XML_ERROR_NO_MEMORY;
5798 break;
5799 case XML_TOK_COMMENT:
5800 if (! reportComment(parser, parser->m_encoding, s, next))
5801 return XML_ERROR_NO_MEMORY;
5802 break;
5803 case XML_TOK_INVALID:
5804 parser->m_eventPtr = next;
5805 return XML_ERROR_INVALID_TOKEN;
5806 case XML_TOK_PARTIAL:
5807 if (! parser->m_parsingStatus.finalBuffer) {
5808 *nextPtr = s;
5809 return XML_ERROR_NONE;
5810 }
5811 return XML_ERROR_UNCLOSED_TOKEN;
5812 case XML_TOK_PARTIAL_CHAR:
5813 if (! parser->m_parsingStatus.finalBuffer) {
5814 *nextPtr = s;
5815 return XML_ERROR_NONE;
5816 }
5817 return XML_ERROR_PARTIAL_CHAR;
5818 default:
5819 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5820 }
5821 parser->m_eventPtr = s = next;
5822 switch (parser->m_parsingStatus.parsing) {
5823 case XML_SUSPENDED:
5824 *nextPtr = next;
5825 return XML_ERROR_NONE;
5826 case XML_FINISHED:
5827 return XML_ERROR_ABORTED;
5828 default:;
5829 }
5830 }
5831 }
5832
5833 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5834 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5835 const char *textStart, *textEnd;
5836 const char *next;
5837 enum XML_Error result;
5838 OPEN_INTERNAL_ENTITY *openEntity;
5839
5840 if (parser->m_freeInternalEntities) {
5841 openEntity = parser->m_freeInternalEntities;
5842 parser->m_freeInternalEntities = openEntity->next;
5843 } else {
5844 openEntity
5845 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5846 if (! openEntity)
5847 return XML_ERROR_NO_MEMORY;
5848 }
5849 entity->open = XML_TRUE;
5850 #if XML_GE == 1
5851 entityTrackingOnOpen(parser, entity, __LINE__);
5852 #endif
5853 entity->processed = 0;
5854 openEntity->next = parser->m_openInternalEntities;
5855 parser->m_openInternalEntities = openEntity;
5856 openEntity->entity = entity;
5857 openEntity->startTagLevel = parser->m_tagLevel;
5858 openEntity->betweenDecl = betweenDecl;
5859 openEntity->internalEventPtr = NULL;
5860 openEntity->internalEventEndPtr = NULL;
5861 textStart = (const char *)entity->textPtr;
5862 textEnd = (const char *)(entity->textPtr + entity->textLen);
5863 /* Set a safe default value in case 'next' does not get set */
5864 next = textStart;
5865
5866 if (entity->is_param) {
5867 int tok
5868 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5869 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5870 tok, next, &next, XML_FALSE, XML_FALSE,
5871 XML_ACCOUNT_ENTITY_EXPANSION);
5872 } else {
5873 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5874 textStart, textEnd, &next, XML_FALSE,
5875 XML_ACCOUNT_ENTITY_EXPANSION);
5876 }
5877
5878 if (result == XML_ERROR_NONE) {
5879 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5880 entity->processed = (int)(next - textStart);
5881 parser->m_processor = internalEntityProcessor;
5882 } else if (parser->m_openInternalEntities->entity == entity) {
5883 #if XML_GE == 1
5884 entityTrackingOnClose(parser, entity, __LINE__);
5885 #endif /* XML_GE == 1 */
5886 entity->open = XML_FALSE;
5887 parser->m_openInternalEntities = openEntity->next;
5888 /* put openEntity back in list of free instances */
5889 openEntity->next = parser->m_freeInternalEntities;
5890 parser->m_freeInternalEntities = openEntity;
5891 }
5892 }
5893 return result;
5894 }
5895
5896 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5897 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5898 const char **nextPtr) {
5899 ENTITY *entity;
5900 const char *textStart, *textEnd;
5901 const char *next;
5902 enum XML_Error result;
5903 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5904 if (! openEntity)
5905 return XML_ERROR_UNEXPECTED_STATE;
5906
5907 entity = openEntity->entity;
5908 textStart = ((const char *)entity->textPtr) + entity->processed;
5909 textEnd = (const char *)(entity->textPtr + entity->textLen);
5910 /* Set a safe default value in case 'next' does not get set */
5911 next = textStart;
5912
5913 if (entity->is_param) {
5914 int tok
5915 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5916 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5917 tok, next, &next, XML_FALSE, XML_TRUE,
5918 XML_ACCOUNT_ENTITY_EXPANSION);
5919 } else {
5920 result = doContent(parser, openEntity->startTagLevel,
5921 parser->m_internalEncoding, textStart, textEnd, &next,
5922 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5923 }
5924
5925 if (result != XML_ERROR_NONE)
5926 return result;
5927
5928 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5929 entity->processed = (int)(next - (const char *)entity->textPtr);
5930 return result;
5931 }
5932
5933 #if XML_GE == 1
5934 entityTrackingOnClose(parser, entity, __LINE__);
5935 #endif
5936 entity->open = XML_FALSE;
5937 parser->m_openInternalEntities = openEntity->next;
5938 /* put openEntity back in list of free instances */
5939 openEntity->next = parser->m_freeInternalEntities;
5940 parser->m_freeInternalEntities = openEntity;
5941
5942 // If there are more open entities we want to stop right here and have the
5943 // upcoming call to XML_ResumeParser continue with entity content, or it would
5944 // be ignored altogether.
5945 if (parser->m_openInternalEntities != NULL
5946 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5947 return XML_ERROR_NONE;
5948 }
5949
5950 if (entity->is_param) {
5951 int tok;
5952 parser->m_processor = prologProcessor;
5953 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5954 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5955 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5956 XML_ACCOUNT_DIRECT);
5957 } else {
5958 parser->m_processor = contentProcessor;
5959 /* see externalEntityContentProcessor vs contentProcessor */
5960 result = doContent(parser, parser->m_parentParser ? 1 : 0,
5961 parser->m_encoding, s, end, nextPtr,
5962 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5963 XML_ACCOUNT_DIRECT);
5964 if (result == XML_ERROR_NONE) {
5965 if (! storeRawNames(parser))
5966 return XML_ERROR_NO_MEMORY;
5967 }
5968 return result;
5969 }
5970 }
5971
5972 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5973 errorProcessor(XML_Parser parser, const char *s, const char *end,
5974 const char **nextPtr) {
5975 UNUSED_P(s);
5976 UNUSED_P(end);
5977 UNUSED_P(nextPtr);
5978 return parser->m_errorCode;
5979 }
5980
5981 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5982 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5983 const char *ptr, const char *end, STRING_POOL *pool,
5984 enum XML_Account account) {
5985 enum XML_Error result
5986 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5987 if (result)
5988 return result;
5989 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5990 poolChop(pool);
5991 if (! poolAppendChar(pool, XML_T('\0')))
5992 return XML_ERROR_NO_MEMORY;
5993 return XML_ERROR_NONE;
5994 }
5995
5996 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5997 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5998 const char *ptr, const char *end, STRING_POOL *pool,
5999 enum XML_Account account) {
6000 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6001 #ifndef XML_DTD
6002 UNUSED_P(account);
6003 #endif
6004
6005 for (;;) {
6006 const char *next
6007 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6008 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6009 #if XML_GE == 1
6010 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6011 accountingOnAbort(parser);
6012 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6013 }
6014 #endif
6015 switch (tok) {
6016 case XML_TOK_NONE:
6017 return XML_ERROR_NONE;
6018 case XML_TOK_INVALID:
6019 if (enc == parser->m_encoding)
6020 parser->m_eventPtr = next;
6021 return XML_ERROR_INVALID_TOKEN;
6022 case XML_TOK_PARTIAL:
6023 if (enc == parser->m_encoding)
6024 parser->m_eventPtr = ptr;
6025 return XML_ERROR_INVALID_TOKEN;
6026 case XML_TOK_CHAR_REF: {
6027 XML_Char buf[XML_ENCODE_MAX];
6028 int i;
6029 int n = XmlCharRefNumber(enc, ptr);
6030 if (n < 0) {
6031 if (enc == parser->m_encoding)
6032 parser->m_eventPtr = ptr;
6033 return XML_ERROR_BAD_CHAR_REF;
6034 }
6035 if (! isCdata && n == 0x20 /* space */
6036 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6037 break;
6038 n = XmlEncode(n, (ICHAR *)buf);
6039 /* The XmlEncode() functions can never return 0 here. That
6040 * error return happens if the code point passed in is either
6041 * negative or greater than or equal to 0x110000. The
6042 * XmlCharRefNumber() functions will all return a number
6043 * strictly less than 0x110000 or a negative value if an error
6044 * occurred. The negative value is intercepted above, so
6045 * XmlEncode() is never passed a value it might return an
6046 * error for.
6047 */
6048 for (i = 0; i < n; i++) {
6049 if (! poolAppendChar(pool, buf[i]))
6050 return XML_ERROR_NO_MEMORY;
6051 }
6052 } break;
6053 case XML_TOK_DATA_CHARS:
6054 if (! poolAppend(pool, enc, ptr, next))
6055 return XML_ERROR_NO_MEMORY;
6056 break;
6057 case XML_TOK_TRAILING_CR:
6058 next = ptr + enc->minBytesPerChar;
6059 /* fall through */
6060 case XML_TOK_ATTRIBUTE_VALUE_S:
6061 case XML_TOK_DATA_NEWLINE:
6062 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6063 break;
6064 if (! poolAppendChar(pool, 0x20))
6065 return XML_ERROR_NO_MEMORY;
6066 break;
6067 case XML_TOK_ENTITY_REF: {
6068 const XML_Char *name;
6069 ENTITY *entity;
6070 char checkEntityDecl;
6071 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6072 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6073 if (ch) {
6074 #if XML_GE == 1
6075 /* NOTE: We are replacing 4-6 characters original input for 1 character
6076 * so there is no amplification and hence recording without
6077 * protection. */
6078 accountingDiffTolerated(parser, tok, (char *)&ch,
6079 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6080 XML_ACCOUNT_ENTITY_EXPANSION);
6081 #endif /* XML_GE == 1 */
6082 if (! poolAppendChar(pool, ch))
6083 return XML_ERROR_NO_MEMORY;
6084 break;
6085 }
6086 name = poolStoreString(&parser->m_temp2Pool, enc,
6087 ptr + enc->minBytesPerChar,
6088 next - enc->minBytesPerChar);
6089 if (! name)
6090 return XML_ERROR_NO_MEMORY;
6091 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6092 poolDiscard(&parser->m_temp2Pool);
6093 /* First, determine if a check for an existing declaration is needed;
6094 if yes, check that the entity exists, and that it is internal.
6095 */
6096 if (pool == &dtd->pool) /* are we called from prolog? */
6097 checkEntityDecl =
6098 #ifdef XML_DTD
6099 parser->m_prologState.documentEntity &&
6100 #endif /* XML_DTD */
6101 (dtd->standalone ? ! parser->m_openInternalEntities
6102 : ! dtd->hasParamEntityRefs);
6103 else /* if (pool == &parser->m_tempPool): we are called from content */
6104 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6105 if (checkEntityDecl) {
6106 if (! entity)
6107 return XML_ERROR_UNDEFINED_ENTITY;
6108 else if (! entity->is_internal)
6109 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6110 } else if (! entity) {
6111 /* Cannot report skipped entity here - see comments on
6112 parser->m_skippedEntityHandler.
6113 if (parser->m_skippedEntityHandler)
6114 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6115 */
6116 /* Cannot call the default handler because this would be
6117 out of sync with the call to the startElementHandler.
6118 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6119 reportDefault(parser, enc, ptr, next);
6120 */
6121 break;
6122 }
6123 if (entity->open) {
6124 if (enc == parser->m_encoding) {
6125 /* It does not appear that this line can be executed.
6126 *
6127 * The "if (entity->open)" check catches recursive entity
6128 * definitions. In order to be called with an open
6129 * entity, it must have gone through this code before and
6130 * been through the recursive call to
6131 * appendAttributeValue() some lines below. That call
6132 * sets the local encoding ("enc") to the parser's
6133 * internal encoding (internal_utf8 or internal_utf16),
6134 * which can never be the same as the principle encoding.
6135 * It doesn't appear there is another code path that gets
6136 * here with entity->open being TRUE.
6137 *
6138 * Since it is not certain that this logic is watertight,
6139 * we keep the line and merely exclude it from coverage
6140 * tests.
6141 */
6142 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6143 }
6144 return XML_ERROR_RECURSIVE_ENTITY_REF;
6145 }
6146 if (entity->notation) {
6147 if (enc == parser->m_encoding)
6148 parser->m_eventPtr = ptr;
6149 return XML_ERROR_BINARY_ENTITY_REF;
6150 }
6151 if (! entity->textPtr) {
6152 if (enc == parser->m_encoding)
6153 parser->m_eventPtr = ptr;
6154 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6155 } else {
6156 enum XML_Error result;
6157 const XML_Char *textEnd = entity->textPtr + entity->textLen;
6158 entity->open = XML_TRUE;
6159 #if XML_GE == 1
6160 entityTrackingOnOpen(parser, entity, __LINE__);
6161 #endif
6162 result = appendAttributeValue(parser, parser->m_internalEncoding,
6163 isCdata, (const char *)entity->textPtr,
6164 (const char *)textEnd, pool,
6165 XML_ACCOUNT_ENTITY_EXPANSION);
6166 #if XML_GE == 1
6167 entityTrackingOnClose(parser, entity, __LINE__);
6168 #endif
6169 entity->open = XML_FALSE;
6170 if (result)
6171 return result;
6172 }
6173 } break;
6174 default:
6175 /* The only token returned by XmlAttributeValueTok() that does
6176 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6177 * Getting that would require an entity name to contain an
6178 * incomplete XML character (e.g. \xE2\x82); however previous
6179 * tokenisers will have already recognised and rejected such
6180 * names before XmlAttributeValueTok() gets a look-in. This
6181 * default case should be retained as a safety net, but the code
6182 * excluded from coverage tests.
6183 *
6184 * LCOV_EXCL_START
6185 */
6186 if (enc == parser->m_encoding)
6187 parser->m_eventPtr = ptr;
6188 return XML_ERROR_UNEXPECTED_STATE;
6189 /* LCOV_EXCL_STOP */
6190 }
6191 ptr = next;
6192 }
6193 /* not reached */
6194 }
6195
6196 #if XML_GE == 1
6197 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6198 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6199 const char *entityTextPtr, const char *entityTextEnd,
6200 enum XML_Account account) {
6201 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6202 STRING_POOL *pool = &(dtd->entityValuePool);
6203 enum XML_Error result = XML_ERROR_NONE;
6204 # ifdef XML_DTD
6205 int oldInEntityValue = parser->m_prologState.inEntityValue;
6206 parser->m_prologState.inEntityValue = 1;
6207 # else
6208 UNUSED_P(account);
6209 # endif /* XML_DTD */
6210 /* never return Null for the value argument in EntityDeclHandler,
6211 since this would indicate an external entity; therefore we
6212 have to make sure that entityValuePool.start is not null */
6213 if (! pool->blocks) {
6214 if (! poolGrow(pool))
6215 return XML_ERROR_NO_MEMORY;
6216 }
6217
6218 for (;;) {
6219 const char *next
6220 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6221 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6222
6223 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6224 account)) {
6225 accountingOnAbort(parser);
6226 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6227 goto endEntityValue;
6228 }
6229
6230 switch (tok) {
6231 case XML_TOK_PARAM_ENTITY_REF:
6232 # ifdef XML_DTD
6233 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6234 const XML_Char *name;
6235 ENTITY *entity;
6236 name = poolStoreString(&parser->m_tempPool, enc,
6237 entityTextPtr + enc->minBytesPerChar,
6238 next - enc->minBytesPerChar);
6239 if (! name) {
6240 result = XML_ERROR_NO_MEMORY;
6241 goto endEntityValue;
6242 }
6243 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6244 poolDiscard(&parser->m_tempPool);
6245 if (! entity) {
6246 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6247 /* cannot report skipped entity here - see comments on
6248 parser->m_skippedEntityHandler
6249 if (parser->m_skippedEntityHandler)
6250 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6251 */
6252 dtd->keepProcessing = dtd->standalone;
6253 goto endEntityValue;
6254 }
6255 if (entity->open || (entity == parser->m_declEntity)) {
6256 if (enc == parser->m_encoding)
6257 parser->m_eventPtr = entityTextPtr;
6258 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6259 goto endEntityValue;
6260 }
6261 if (entity->systemId) {
6262 if (parser->m_externalEntityRefHandler) {
6263 dtd->paramEntityRead = XML_FALSE;
6264 entity->open = XML_TRUE;
6265 entityTrackingOnOpen(parser, entity, __LINE__);
6266 if (! parser->m_externalEntityRefHandler(
6267 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6268 entity->systemId, entity->publicId)) {
6269 entityTrackingOnClose(parser, entity, __LINE__);
6270 entity->open = XML_FALSE;
6271 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6272 goto endEntityValue;
6273 }
6274 entityTrackingOnClose(parser, entity, __LINE__);
6275 entity->open = XML_FALSE;
6276 if (! dtd->paramEntityRead)
6277 dtd->keepProcessing = dtd->standalone;
6278 } else
6279 dtd->keepProcessing = dtd->standalone;
6280 } else {
6281 entity->open = XML_TRUE;
6282 entityTrackingOnOpen(parser, entity, __LINE__);
6283 result = storeEntityValue(
6284 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6285 (const char *)(entity->textPtr + entity->textLen),
6286 XML_ACCOUNT_ENTITY_EXPANSION);
6287 entityTrackingOnClose(parser, entity, __LINE__);
6288 entity->open = XML_FALSE;
6289 if (result)
6290 goto endEntityValue;
6291 }
6292 break;
6293 }
6294 # endif /* XML_DTD */
6295 /* In the internal subset, PE references are not legal
6296 within markup declarations, e.g entity values in this case. */
6297 parser->m_eventPtr = entityTextPtr;
6298 result = XML_ERROR_PARAM_ENTITY_REF;
6299 goto endEntityValue;
6300 case XML_TOK_NONE:
6301 result = XML_ERROR_NONE;
6302 goto endEntityValue;
6303 case XML_TOK_ENTITY_REF:
6304 case XML_TOK_DATA_CHARS:
6305 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6306 result = XML_ERROR_NO_MEMORY;
6307 goto endEntityValue;
6308 }
6309 break;
6310 case XML_TOK_TRAILING_CR:
6311 next = entityTextPtr + enc->minBytesPerChar;
6312 /* fall through */
6313 case XML_TOK_DATA_NEWLINE:
6314 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6315 result = XML_ERROR_NO_MEMORY;
6316 goto endEntityValue;
6317 }
6318 *(pool->ptr)++ = 0xA;
6319 break;
6320 case XML_TOK_CHAR_REF: {
6321 XML_Char buf[XML_ENCODE_MAX];
6322 int i;
6323 int n = XmlCharRefNumber(enc, entityTextPtr);
6324 if (n < 0) {
6325 if (enc == parser->m_encoding)
6326 parser->m_eventPtr = entityTextPtr;
6327 result = XML_ERROR_BAD_CHAR_REF;
6328 goto endEntityValue;
6329 }
6330 n = XmlEncode(n, (ICHAR *)buf);
6331 /* The XmlEncode() functions can never return 0 here. That
6332 * error return happens if the code point passed in is either
6333 * negative or greater than or equal to 0x110000. The
6334 * XmlCharRefNumber() functions will all return a number
6335 * strictly less than 0x110000 or a negative value if an error
6336 * occurred. The negative value is intercepted above, so
6337 * XmlEncode() is never passed a value it might return an
6338 * error for.
6339 */
6340 for (i = 0; i < n; i++) {
6341 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6342 result = XML_ERROR_NO_MEMORY;
6343 goto endEntityValue;
6344 }
6345 *(pool->ptr)++ = buf[i];
6346 }
6347 } break;
6348 case XML_TOK_PARTIAL:
6349 if (enc == parser->m_encoding)
6350 parser->m_eventPtr = entityTextPtr;
6351 result = XML_ERROR_INVALID_TOKEN;
6352 goto endEntityValue;
6353 case XML_TOK_INVALID:
6354 if (enc == parser->m_encoding)
6355 parser->m_eventPtr = next;
6356 result = XML_ERROR_INVALID_TOKEN;
6357 goto endEntityValue;
6358 default:
6359 /* This default case should be unnecessary -- all the tokens
6360 * that XmlEntityValueTok() can return have their own explicit
6361 * cases -- but should be retained for safety. We do however
6362 * exclude it from the coverage statistics.
6363 *
6364 * LCOV_EXCL_START
6365 */
6366 if (enc == parser->m_encoding)
6367 parser->m_eventPtr = entityTextPtr;
6368 result = XML_ERROR_UNEXPECTED_STATE;
6369 goto endEntityValue;
6370 /* LCOV_EXCL_STOP */
6371 }
6372 entityTextPtr = next;
6373 }
6374 endEntityValue:
6375 # ifdef XML_DTD
6376 parser->m_prologState.inEntityValue = oldInEntityValue;
6377 # endif /* XML_DTD */
6378 return result;
6379 }
6380
6381 #else /* XML_GE == 0 */
6382
6383 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6384 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6385 // This will store "&entity123;" in entity->textPtr
6386 // to end up as "&entity123;" in the handler.
6387 const char *const entity_start = "&";
6388 const char *const entity_end = ";";
6389
6390 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6391 if (! poolAppendString(pool, entity_start)
6392 || ! poolAppendString(pool, entity->name)
6393 || ! poolAppendString(pool, entity_end)) {
6394 poolDiscard(pool);
6395 return XML_ERROR_NO_MEMORY;
6396 }
6397
6398 entity->textPtr = poolStart(pool);
6399 entity->textLen = (int)(poolLength(pool));
6400 poolFinish(pool);
6401
6402 return XML_ERROR_NONE;
6403 }
6404
6405 #endif /* XML_GE == 0 */
6406
6407 static void FASTCALL
normalizeLines(XML_Char * s)6408 normalizeLines(XML_Char *s) {
6409 XML_Char *p;
6410 for (;; s++) {
6411 if (*s == XML_T('\0'))
6412 return;
6413 if (*s == 0xD)
6414 break;
6415 }
6416 p = s;
6417 do {
6418 if (*s == 0xD) {
6419 *p++ = 0xA;
6420 if (*++s == 0xA)
6421 s++;
6422 } else
6423 *p++ = *s++;
6424 } while (*s);
6425 *p = XML_T('\0');
6426 }
6427
6428 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6429 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6430 const char *start, const char *end) {
6431 const XML_Char *target;
6432 XML_Char *data;
6433 const char *tem;
6434 if (! parser->m_processingInstructionHandler) {
6435 if (parser->m_defaultHandler)
6436 reportDefault(parser, enc, start, end);
6437 return 1;
6438 }
6439 start += enc->minBytesPerChar * 2;
6440 tem = start + XmlNameLength(enc, start);
6441 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6442 if (! target)
6443 return 0;
6444 poolFinish(&parser->m_tempPool);
6445 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6446 end - enc->minBytesPerChar * 2);
6447 if (! data)
6448 return 0;
6449 normalizeLines(data);
6450 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6451 poolClear(&parser->m_tempPool);
6452 return 1;
6453 }
6454
6455 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6456 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6457 const char *end) {
6458 XML_Char *data;
6459 if (! parser->m_commentHandler) {
6460 if (parser->m_defaultHandler)
6461 reportDefault(parser, enc, start, end);
6462 return 1;
6463 }
6464 data = poolStoreString(&parser->m_tempPool, enc,
6465 start + enc->minBytesPerChar * 4,
6466 end - enc->minBytesPerChar * 3);
6467 if (! data)
6468 return 0;
6469 normalizeLines(data);
6470 parser->m_commentHandler(parser->m_handlerArg, data);
6471 poolClear(&parser->m_tempPool);
6472 return 1;
6473 }
6474
6475 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6476 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6477 const char *end) {
6478 if (MUST_CONVERT(enc, s)) {
6479 enum XML_Convert_Result convert_res;
6480 const char **eventPP;
6481 const char **eventEndPP;
6482 if (enc == parser->m_encoding) {
6483 eventPP = &parser->m_eventPtr;
6484 eventEndPP = &parser->m_eventEndPtr;
6485 } else {
6486 /* To get here, two things must be true; the parser must be
6487 * using a character encoding that is not the same as the
6488 * encoding passed in, and the encoding passed in must need
6489 * conversion to the internal format (UTF-8 unless XML_UNICODE
6490 * is defined). The only occasions on which the encoding passed
6491 * in is not the same as the parser's encoding are when it is
6492 * the internal encoding (e.g. a previously defined parameter
6493 * entity, already converted to internal format). This by
6494 * definition doesn't need conversion, so the whole branch never
6495 * gets executed.
6496 *
6497 * For safety's sake we don't delete these lines and merely
6498 * exclude them from coverage statistics.
6499 *
6500 * LCOV_EXCL_START
6501 */
6502 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6503 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6504 /* LCOV_EXCL_STOP */
6505 }
6506 do {
6507 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6508 convert_res
6509 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6510 *eventEndPP = s;
6511 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6512 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6513 *eventPP = s;
6514 } while ((convert_res != XML_CONVERT_COMPLETED)
6515 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6516 } else
6517 parser->m_defaultHandler(
6518 parser->m_handlerArg, (const XML_Char *)s,
6519 (int)((const XML_Char *)end - (const XML_Char *)s));
6520 }
6521
6522 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6523 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6524 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6525 DEFAULT_ATTRIBUTE *att;
6526 if (value || isId) {
6527 /* The handling of default attributes gets messed up if we have
6528 a default which duplicates a non-default. */
6529 int i;
6530 for (i = 0; i < type->nDefaultAtts; i++)
6531 if (attId == type->defaultAtts[i].id)
6532 return 1;
6533 if (isId && ! type->idAtt && ! attId->xmlns)
6534 type->idAtt = attId;
6535 }
6536 if (type->nDefaultAtts == type->allocDefaultAtts) {
6537 if (type->allocDefaultAtts == 0) {
6538 type->allocDefaultAtts = 8;
6539 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6540 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6541 if (! type->defaultAtts) {
6542 type->allocDefaultAtts = 0;
6543 return 0;
6544 }
6545 } else {
6546 DEFAULT_ATTRIBUTE *temp;
6547
6548 /* Detect and prevent integer overflow */
6549 if (type->allocDefaultAtts > INT_MAX / 2) {
6550 return 0;
6551 }
6552
6553 int count = type->allocDefaultAtts * 2;
6554
6555 /* Detect and prevent integer overflow.
6556 * The preprocessor guard addresses the "always false" warning
6557 * from -Wtype-limits on platforms where
6558 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6559 #if UINT_MAX >= SIZE_MAX
6560 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6561 return 0;
6562 }
6563 #endif
6564
6565 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6566 (count * sizeof(DEFAULT_ATTRIBUTE)));
6567 if (temp == NULL)
6568 return 0;
6569 type->allocDefaultAtts = count;
6570 type->defaultAtts = temp;
6571 }
6572 }
6573 att = type->defaultAtts + type->nDefaultAtts;
6574 att->id = attId;
6575 att->value = value;
6576 att->isCdata = isCdata;
6577 if (! isCdata)
6578 attId->maybeTokenized = XML_TRUE;
6579 type->nDefaultAtts += 1;
6580 return 1;
6581 }
6582
6583 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6584 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6585 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6586 const XML_Char *name;
6587 for (name = elementType->name; *name; name++) {
6588 if (*name == XML_T(ASCII_COLON)) {
6589 PREFIX *prefix;
6590 const XML_Char *s;
6591 for (s = elementType->name; s != name; s++) {
6592 if (! poolAppendChar(&dtd->pool, *s))
6593 return 0;
6594 }
6595 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6596 return 0;
6597 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6598 sizeof(PREFIX));
6599 if (! prefix)
6600 return 0;
6601 if (prefix->name == poolStart(&dtd->pool))
6602 poolFinish(&dtd->pool);
6603 else
6604 poolDiscard(&dtd->pool);
6605 elementType->prefix = prefix;
6606 break;
6607 }
6608 }
6609 return 1;
6610 }
6611
6612 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6613 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6614 const char *end) {
6615 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6616 ATTRIBUTE_ID *id;
6617 const XML_Char *name;
6618 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6619 return NULL;
6620 name = poolStoreString(&dtd->pool, enc, start, end);
6621 if (! name)
6622 return NULL;
6623 /* skip quotation mark - its storage will be reused (like in name[-1]) */
6624 ++name;
6625 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6626 sizeof(ATTRIBUTE_ID));
6627 if (! id)
6628 return NULL;
6629 if (id->name != name)
6630 poolDiscard(&dtd->pool);
6631 else {
6632 poolFinish(&dtd->pool);
6633 if (! parser->m_ns)
6634 ;
6635 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6636 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6637 && name[4] == XML_T(ASCII_s)
6638 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6639 if (name[5] == XML_T('\0'))
6640 id->prefix = &dtd->defaultPrefix;
6641 else
6642 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6643 sizeof(PREFIX));
6644 id->xmlns = XML_TRUE;
6645 } else {
6646 int i;
6647 for (i = 0; name[i]; i++) {
6648 /* attributes without prefix are *not* in the default namespace */
6649 if (name[i] == XML_T(ASCII_COLON)) {
6650 int j;
6651 for (j = 0; j < i; j++) {
6652 if (! poolAppendChar(&dtd->pool, name[j]))
6653 return NULL;
6654 }
6655 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6656 return NULL;
6657 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6658 poolStart(&dtd->pool), sizeof(PREFIX));
6659 if (! id->prefix)
6660 return NULL;
6661 if (id->prefix->name == poolStart(&dtd->pool))
6662 poolFinish(&dtd->pool);
6663 else
6664 poolDiscard(&dtd->pool);
6665 break;
6666 }
6667 }
6668 }
6669 }
6670 return id;
6671 }
6672
6673 #define CONTEXT_SEP XML_T(ASCII_FF)
6674
6675 static const XML_Char *
getContext(XML_Parser parser)6676 getContext(XML_Parser parser) {
6677 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6678 HASH_TABLE_ITER iter;
6679 XML_Bool needSep = XML_FALSE;
6680
6681 if (dtd->defaultPrefix.binding) {
6682 int i;
6683 int len;
6684 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6685 return NULL;
6686 len = dtd->defaultPrefix.binding->uriLen;
6687 if (parser->m_namespaceSeparator)
6688 len--;
6689 for (i = 0; i < len; i++) {
6690 if (! poolAppendChar(&parser->m_tempPool,
6691 dtd->defaultPrefix.binding->uri[i])) {
6692 /* Because of memory caching, I don't believe this line can be
6693 * executed.
6694 *
6695 * This is part of a loop copying the default prefix binding
6696 * URI into the parser's temporary string pool. Previously,
6697 * that URI was copied into the same string pool, with a
6698 * terminating NUL character, as part of setContext(). When
6699 * the pool was cleared, that leaves a block definitely big
6700 * enough to hold the URI on the free block list of the pool.
6701 * The URI copy in getContext() therefore cannot run out of
6702 * memory.
6703 *
6704 * If the pool is used between the setContext() and
6705 * getContext() calls, the worst it can do is leave a bigger
6706 * block on the front of the free list. Given that this is
6707 * all somewhat inobvious and program logic can be changed, we
6708 * don't delete the line but we do exclude it from the test
6709 * coverage statistics.
6710 */
6711 return NULL; /* LCOV_EXCL_LINE */
6712 }
6713 }
6714 needSep = XML_TRUE;
6715 }
6716
6717 hashTableIterInit(&iter, &(dtd->prefixes));
6718 for (;;) {
6719 int i;
6720 int len;
6721 const XML_Char *s;
6722 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6723 if (! prefix)
6724 break;
6725 if (! prefix->binding) {
6726 /* This test appears to be (justifiable) paranoia. There does
6727 * not seem to be a way of injecting a prefix without a binding
6728 * that doesn't get errored long before this function is called.
6729 * The test should remain for safety's sake, so we instead
6730 * exclude the following line from the coverage statistics.
6731 */
6732 continue; /* LCOV_EXCL_LINE */
6733 }
6734 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6735 return NULL;
6736 for (s = prefix->name; *s; s++)
6737 if (! poolAppendChar(&parser->m_tempPool, *s))
6738 return NULL;
6739 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6740 return NULL;
6741 len = prefix->binding->uriLen;
6742 if (parser->m_namespaceSeparator)
6743 len--;
6744 for (i = 0; i < len; i++)
6745 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6746 return NULL;
6747 needSep = XML_TRUE;
6748 }
6749
6750 hashTableIterInit(&iter, &(dtd->generalEntities));
6751 for (;;) {
6752 const XML_Char *s;
6753 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6754 if (! e)
6755 break;
6756 if (! e->open)
6757 continue;
6758 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6759 return NULL;
6760 for (s = e->name; *s; s++)
6761 if (! poolAppendChar(&parser->m_tempPool, *s))
6762 return 0;
6763 needSep = XML_TRUE;
6764 }
6765
6766 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6767 return NULL;
6768 return parser->m_tempPool.start;
6769 }
6770
6771 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6772 setContext(XML_Parser parser, const XML_Char *context) {
6773 if (context == NULL) {
6774 return XML_FALSE;
6775 }
6776
6777 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6778 const XML_Char *s = context;
6779
6780 while (*context != XML_T('\0')) {
6781 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6782 ENTITY *e;
6783 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6784 return XML_FALSE;
6785 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6786 poolStart(&parser->m_tempPool), 0);
6787 if (e)
6788 e->open = XML_TRUE;
6789 if (*s != XML_T('\0'))
6790 s++;
6791 context = s;
6792 poolDiscard(&parser->m_tempPool);
6793 } else if (*s == XML_T(ASCII_EQUALS)) {
6794 PREFIX *prefix;
6795 if (poolLength(&parser->m_tempPool) == 0)
6796 prefix = &dtd->defaultPrefix;
6797 else {
6798 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6799 return XML_FALSE;
6800 prefix
6801 = (PREFIX *)lookup(parser, &dtd->prefixes,
6802 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6803 if (! prefix)
6804 return XML_FALSE;
6805 if (prefix->name == poolStart(&parser->m_tempPool)) {
6806 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6807 if (! prefix->name)
6808 return XML_FALSE;
6809 }
6810 poolDiscard(&parser->m_tempPool);
6811 }
6812 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6813 context++)
6814 if (! poolAppendChar(&parser->m_tempPool, *context))
6815 return XML_FALSE;
6816 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6817 return XML_FALSE;
6818 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6819 &parser->m_inheritedBindings)
6820 != XML_ERROR_NONE)
6821 return XML_FALSE;
6822 poolDiscard(&parser->m_tempPool);
6823 if (*context != XML_T('\0'))
6824 ++context;
6825 s = context;
6826 } else {
6827 if (! poolAppendChar(&parser->m_tempPool, *s))
6828 return XML_FALSE;
6829 s++;
6830 }
6831 }
6832 return XML_TRUE;
6833 }
6834
6835 static void FASTCALL
normalizePublicId(XML_Char * publicId)6836 normalizePublicId(XML_Char *publicId) {
6837 XML_Char *p = publicId;
6838 XML_Char *s;
6839 for (s = publicId; *s; s++) {
6840 switch (*s) {
6841 case 0x20:
6842 case 0xD:
6843 case 0xA:
6844 if (p != publicId && p[-1] != 0x20)
6845 *p++ = 0x20;
6846 break;
6847 default:
6848 *p++ = *s;
6849 }
6850 }
6851 if (p != publicId && p[-1] == 0x20)
6852 --p;
6853 *p = XML_T('\0');
6854 }
6855
6856 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6857 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6858 DTD *p = ms->malloc_fcn(sizeof(DTD));
6859 if (p == NULL)
6860 return p;
6861 poolInit(&(p->pool), ms);
6862 poolInit(&(p->entityValuePool), ms);
6863 hashTableInit(&(p->generalEntities), ms);
6864 hashTableInit(&(p->elementTypes), ms);
6865 hashTableInit(&(p->attributeIds), ms);
6866 hashTableInit(&(p->prefixes), ms);
6867 #ifdef XML_DTD
6868 p->paramEntityRead = XML_FALSE;
6869 hashTableInit(&(p->paramEntities), ms);
6870 #endif /* XML_DTD */
6871 p->defaultPrefix.name = NULL;
6872 p->defaultPrefix.binding = NULL;
6873
6874 p->in_eldecl = XML_FALSE;
6875 p->scaffIndex = NULL;
6876 p->scaffold = NULL;
6877 p->scaffLevel = 0;
6878 p->scaffSize = 0;
6879 p->scaffCount = 0;
6880 p->contentStringLen = 0;
6881
6882 p->keepProcessing = XML_TRUE;
6883 p->hasParamEntityRefs = XML_FALSE;
6884 p->standalone = XML_FALSE;
6885 return p;
6886 }
6887
6888 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6889 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6890 HASH_TABLE_ITER iter;
6891 hashTableIterInit(&iter, &(p->elementTypes));
6892 for (;;) {
6893 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6894 if (! e)
6895 break;
6896 if (e->allocDefaultAtts != 0)
6897 ms->free_fcn(e->defaultAtts);
6898 }
6899 hashTableClear(&(p->generalEntities));
6900 #ifdef XML_DTD
6901 p->paramEntityRead = XML_FALSE;
6902 hashTableClear(&(p->paramEntities));
6903 #endif /* XML_DTD */
6904 hashTableClear(&(p->elementTypes));
6905 hashTableClear(&(p->attributeIds));
6906 hashTableClear(&(p->prefixes));
6907 poolClear(&(p->pool));
6908 poolClear(&(p->entityValuePool));
6909 p->defaultPrefix.name = NULL;
6910 p->defaultPrefix.binding = NULL;
6911
6912 p->in_eldecl = XML_FALSE;
6913
6914 ms->free_fcn(p->scaffIndex);
6915 p->scaffIndex = NULL;
6916 ms->free_fcn(p->scaffold);
6917 p->scaffold = NULL;
6918
6919 p->scaffLevel = 0;
6920 p->scaffSize = 0;
6921 p->scaffCount = 0;
6922 p->contentStringLen = 0;
6923
6924 p->keepProcessing = XML_TRUE;
6925 p->hasParamEntityRefs = XML_FALSE;
6926 p->standalone = XML_FALSE;
6927 }
6928
6929 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6930 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6931 HASH_TABLE_ITER iter;
6932 hashTableIterInit(&iter, &(p->elementTypes));
6933 for (;;) {
6934 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6935 if (! e)
6936 break;
6937 if (e->allocDefaultAtts != 0)
6938 ms->free_fcn(e->defaultAtts);
6939 }
6940 hashTableDestroy(&(p->generalEntities));
6941 #ifdef XML_DTD
6942 hashTableDestroy(&(p->paramEntities));
6943 #endif /* XML_DTD */
6944 hashTableDestroy(&(p->elementTypes));
6945 hashTableDestroy(&(p->attributeIds));
6946 hashTableDestroy(&(p->prefixes));
6947 poolDestroy(&(p->pool));
6948 poolDestroy(&(p->entityValuePool));
6949 if (isDocEntity) {
6950 ms->free_fcn(p->scaffIndex);
6951 ms->free_fcn(p->scaffold);
6952 }
6953 ms->free_fcn(p);
6954 }
6955
6956 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6957 The new DTD has already been initialized.
6958 */
6959 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6960 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6961 const XML_Memory_Handling_Suite *ms) {
6962 HASH_TABLE_ITER iter;
6963
6964 /* Copy the prefix table. */
6965
6966 hashTableIterInit(&iter, &(oldDtd->prefixes));
6967 for (;;) {
6968 const XML_Char *name;
6969 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6970 if (! oldP)
6971 break;
6972 name = poolCopyString(&(newDtd->pool), oldP->name);
6973 if (! name)
6974 return 0;
6975 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6976 return 0;
6977 }
6978
6979 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6980
6981 /* Copy the attribute id table. */
6982
6983 for (;;) {
6984 ATTRIBUTE_ID *newA;
6985 const XML_Char *name;
6986 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6987
6988 if (! oldA)
6989 break;
6990 /* Remember to allocate the scratch byte before the name. */
6991 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6992 return 0;
6993 name = poolCopyString(&(newDtd->pool), oldA->name);
6994 if (! name)
6995 return 0;
6996 ++name;
6997 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6998 sizeof(ATTRIBUTE_ID));
6999 if (! newA)
7000 return 0;
7001 newA->maybeTokenized = oldA->maybeTokenized;
7002 if (oldA->prefix) {
7003 newA->xmlns = oldA->xmlns;
7004 if (oldA->prefix == &oldDtd->defaultPrefix)
7005 newA->prefix = &newDtd->defaultPrefix;
7006 else
7007 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7008 oldA->prefix->name, 0);
7009 }
7010 }
7011
7012 /* Copy the element type table. */
7013
7014 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7015
7016 for (;;) {
7017 int i;
7018 ELEMENT_TYPE *newE;
7019 const XML_Char *name;
7020 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7021 if (! oldE)
7022 break;
7023 name = poolCopyString(&(newDtd->pool), oldE->name);
7024 if (! name)
7025 return 0;
7026 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7027 sizeof(ELEMENT_TYPE));
7028 if (! newE)
7029 return 0;
7030 if (oldE->nDefaultAtts) {
7031 /* Detect and prevent integer overflow.
7032 * The preprocessor guard addresses the "always false" warning
7033 * from -Wtype-limits on platforms where
7034 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7035 #if UINT_MAX >= SIZE_MAX
7036 if ((size_t)oldE->nDefaultAtts
7037 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7038 return 0;
7039 }
7040 #endif
7041 newE->defaultAtts
7042 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7043 if (! newE->defaultAtts) {
7044 return 0;
7045 }
7046 }
7047 if (oldE->idAtt)
7048 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7049 oldE->idAtt->name, 0);
7050 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7051 if (oldE->prefix)
7052 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7053 oldE->prefix->name, 0);
7054 for (i = 0; i < newE->nDefaultAtts; i++) {
7055 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7056 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7057 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7058 if (oldE->defaultAtts[i].value) {
7059 newE->defaultAtts[i].value
7060 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7061 if (! newE->defaultAtts[i].value)
7062 return 0;
7063 } else
7064 newE->defaultAtts[i].value = NULL;
7065 }
7066 }
7067
7068 /* Copy the entity tables. */
7069 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7070 &(oldDtd->generalEntities)))
7071 return 0;
7072
7073 #ifdef XML_DTD
7074 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7075 &(oldDtd->paramEntities)))
7076 return 0;
7077 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7078 #endif /* XML_DTD */
7079
7080 newDtd->keepProcessing = oldDtd->keepProcessing;
7081 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7082 newDtd->standalone = oldDtd->standalone;
7083
7084 /* Don't want deep copying for scaffolding */
7085 newDtd->in_eldecl = oldDtd->in_eldecl;
7086 newDtd->scaffold = oldDtd->scaffold;
7087 newDtd->contentStringLen = oldDtd->contentStringLen;
7088 newDtd->scaffSize = oldDtd->scaffSize;
7089 newDtd->scaffLevel = oldDtd->scaffLevel;
7090 newDtd->scaffIndex = oldDtd->scaffIndex;
7091
7092 return 1;
7093 } /* End dtdCopy */
7094
7095 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7096 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7097 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7098 HASH_TABLE_ITER iter;
7099 const XML_Char *cachedOldBase = NULL;
7100 const XML_Char *cachedNewBase = NULL;
7101
7102 hashTableIterInit(&iter, oldTable);
7103
7104 for (;;) {
7105 ENTITY *newE;
7106 const XML_Char *name;
7107 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7108 if (! oldE)
7109 break;
7110 name = poolCopyString(newPool, oldE->name);
7111 if (! name)
7112 return 0;
7113 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7114 if (! newE)
7115 return 0;
7116 if (oldE->systemId) {
7117 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7118 if (! tem)
7119 return 0;
7120 newE->systemId = tem;
7121 if (oldE->base) {
7122 if (oldE->base == cachedOldBase)
7123 newE->base = cachedNewBase;
7124 else {
7125 cachedOldBase = oldE->base;
7126 tem = poolCopyString(newPool, cachedOldBase);
7127 if (! tem)
7128 return 0;
7129 cachedNewBase = newE->base = tem;
7130 }
7131 }
7132 if (oldE->publicId) {
7133 tem = poolCopyString(newPool, oldE->publicId);
7134 if (! tem)
7135 return 0;
7136 newE->publicId = tem;
7137 }
7138 } else {
7139 const XML_Char *tem
7140 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7141 if (! tem)
7142 return 0;
7143 newE->textPtr = tem;
7144 newE->textLen = oldE->textLen;
7145 }
7146 if (oldE->notation) {
7147 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7148 if (! tem)
7149 return 0;
7150 newE->notation = tem;
7151 }
7152 newE->is_param = oldE->is_param;
7153 newE->is_internal = oldE->is_internal;
7154 }
7155 return 1;
7156 }
7157
7158 #define INIT_POWER 6
7159
7160 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7161 keyeq(KEY s1, KEY s2) {
7162 for (; *s1 == *s2; s1++, s2++)
7163 if (*s1 == 0)
7164 return XML_TRUE;
7165 return XML_FALSE;
7166 }
7167
7168 static size_t
keylen(KEY s)7169 keylen(KEY s) {
7170 size_t len = 0;
7171 for (; *s; s++, len++)
7172 ;
7173 return len;
7174 }
7175
7176 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7177 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7178 key->k[0] = 0;
7179 key->k[1] = get_hash_secret_salt(parser);
7180 }
7181
7182 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7183 hash(XML_Parser parser, KEY s) {
7184 struct siphash state;
7185 struct sipkey key;
7186 (void)sip24_valid;
7187 copy_salt_to_sipkey(parser, &key);
7188 sip24_init(&state, &key);
7189 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7190 return (unsigned long)sip24_final(&state);
7191 }
7192
7193 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7194 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7195 size_t i;
7196 if (table->size == 0) {
7197 size_t tsize;
7198 if (! createSize)
7199 return NULL;
7200 table->power = INIT_POWER;
7201 /* table->size is a power of 2 */
7202 table->size = (size_t)1 << INIT_POWER;
7203 tsize = table->size * sizeof(NAMED *);
7204 table->v = table->mem->malloc_fcn(tsize);
7205 if (! table->v) {
7206 table->size = 0;
7207 return NULL;
7208 }
7209 memset(table->v, 0, tsize);
7210 i = hash(parser, name) & ((unsigned long)table->size - 1);
7211 } else {
7212 unsigned long h = hash(parser, name);
7213 unsigned long mask = (unsigned long)table->size - 1;
7214 unsigned char step = 0;
7215 i = h & mask;
7216 while (table->v[i]) {
7217 if (keyeq(name, table->v[i]->name))
7218 return table->v[i];
7219 if (! step)
7220 step = PROBE_STEP(h, mask, table->power);
7221 i < step ? (i += table->size - step) : (i -= step);
7222 }
7223 if (! createSize)
7224 return NULL;
7225
7226 /* check for overflow (table is half full) */
7227 if (table->used >> (table->power - 1)) {
7228 unsigned char newPower = table->power + 1;
7229
7230 /* Detect and prevent invalid shift */
7231 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7232 return NULL;
7233 }
7234
7235 size_t newSize = (size_t)1 << newPower;
7236 unsigned long newMask = (unsigned long)newSize - 1;
7237
7238 /* Detect and prevent integer overflow */
7239 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7240 return NULL;
7241 }
7242
7243 size_t tsize = newSize * sizeof(NAMED *);
7244 NAMED **newV = table->mem->malloc_fcn(tsize);
7245 if (! newV)
7246 return NULL;
7247 memset(newV, 0, tsize);
7248 for (i = 0; i < table->size; i++)
7249 if (table->v[i]) {
7250 unsigned long newHash = hash(parser, table->v[i]->name);
7251 size_t j = newHash & newMask;
7252 step = 0;
7253 while (newV[j]) {
7254 if (! step)
7255 step = PROBE_STEP(newHash, newMask, newPower);
7256 j < step ? (j += newSize - step) : (j -= step);
7257 }
7258 newV[j] = table->v[i];
7259 }
7260 table->mem->free_fcn(table->v);
7261 table->v = newV;
7262 table->power = newPower;
7263 table->size = newSize;
7264 i = h & newMask;
7265 step = 0;
7266 while (table->v[i]) {
7267 if (! step)
7268 step = PROBE_STEP(h, newMask, newPower);
7269 i < step ? (i += newSize - step) : (i -= step);
7270 }
7271 }
7272 }
7273 table->v[i] = table->mem->malloc_fcn(createSize);
7274 if (! table->v[i])
7275 return NULL;
7276 memset(table->v[i], 0, createSize);
7277 table->v[i]->name = name;
7278 (table->used)++;
7279 return table->v[i];
7280 }
7281
7282 static void FASTCALL
hashTableClear(HASH_TABLE * table)7283 hashTableClear(HASH_TABLE *table) {
7284 size_t i;
7285 for (i = 0; i < table->size; i++) {
7286 table->mem->free_fcn(table->v[i]);
7287 table->v[i] = NULL;
7288 }
7289 table->used = 0;
7290 }
7291
7292 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7293 hashTableDestroy(HASH_TABLE *table) {
7294 size_t i;
7295 for (i = 0; i < table->size; i++)
7296 table->mem->free_fcn(table->v[i]);
7297 table->mem->free_fcn(table->v);
7298 }
7299
7300 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7301 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7302 p->power = 0;
7303 p->size = 0;
7304 p->used = 0;
7305 p->v = NULL;
7306 p->mem = ms;
7307 }
7308
7309 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7310 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7311 iter->p = table->v;
7312 iter->end = iter->p ? iter->p + table->size : NULL;
7313 }
7314
7315 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7316 hashTableIterNext(HASH_TABLE_ITER *iter) {
7317 while (iter->p != iter->end) {
7318 NAMED *tem = *(iter->p)++;
7319 if (tem)
7320 return tem;
7321 }
7322 return NULL;
7323 }
7324
7325 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7326 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7327 pool->blocks = NULL;
7328 pool->freeBlocks = NULL;
7329 pool->start = NULL;
7330 pool->ptr = NULL;
7331 pool->end = NULL;
7332 pool->mem = ms;
7333 }
7334
7335 static void FASTCALL
poolClear(STRING_POOL * pool)7336 poolClear(STRING_POOL *pool) {
7337 if (! pool->freeBlocks)
7338 pool->freeBlocks = pool->blocks;
7339 else {
7340 BLOCK *p = pool->blocks;
7341 while (p) {
7342 BLOCK *tem = p->next;
7343 p->next = pool->freeBlocks;
7344 pool->freeBlocks = p;
7345 p = tem;
7346 }
7347 }
7348 pool->blocks = NULL;
7349 pool->start = NULL;
7350 pool->ptr = NULL;
7351 pool->end = NULL;
7352 }
7353
7354 static void FASTCALL
poolDestroy(STRING_POOL * pool)7355 poolDestroy(STRING_POOL *pool) {
7356 BLOCK *p = pool->blocks;
7357 while (p) {
7358 BLOCK *tem = p->next;
7359 pool->mem->free_fcn(p);
7360 p = tem;
7361 }
7362 p = pool->freeBlocks;
7363 while (p) {
7364 BLOCK *tem = p->next;
7365 pool->mem->free_fcn(p);
7366 p = tem;
7367 }
7368 }
7369
7370 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7371 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7372 const char *end) {
7373 if (! pool->ptr && ! poolGrow(pool))
7374 return NULL;
7375 for (;;) {
7376 const enum XML_Convert_Result convert_res = XmlConvert(
7377 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7378 if ((convert_res == XML_CONVERT_COMPLETED)
7379 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7380 break;
7381 if (! poolGrow(pool))
7382 return NULL;
7383 }
7384 return pool->start;
7385 }
7386
7387 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7388 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7389 do {
7390 if (! poolAppendChar(pool, *s))
7391 return NULL;
7392 } while (*s++);
7393 s = pool->start;
7394 poolFinish(pool);
7395 return s;
7396 }
7397
7398 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7399 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7400 if (! pool->ptr && ! poolGrow(pool)) {
7401 /* The following line is unreachable given the current usage of
7402 * poolCopyStringN(). Currently it is called from exactly one
7403 * place to copy the text of a simple general entity. By that
7404 * point, the name of the entity is already stored in the pool, so
7405 * pool->ptr cannot be NULL.
7406 *
7407 * If poolCopyStringN() is used elsewhere as it well might be,
7408 * this line may well become executable again. Regardless, this
7409 * sort of check shouldn't be removed lightly, so we just exclude
7410 * it from the coverage statistics.
7411 */
7412 return NULL; /* LCOV_EXCL_LINE */
7413 }
7414 for (; n > 0; --n, s++) {
7415 if (! poolAppendChar(pool, *s))
7416 return NULL;
7417 }
7418 s = pool->start;
7419 poolFinish(pool);
7420 return s;
7421 }
7422
7423 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7424 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7425 while (*s) {
7426 if (! poolAppendChar(pool, *s))
7427 return NULL;
7428 s++;
7429 }
7430 return pool->start;
7431 }
7432
7433 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7434 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7435 const char *end) {
7436 if (! poolAppend(pool, enc, ptr, end))
7437 return NULL;
7438 if (pool->ptr == pool->end && ! poolGrow(pool))
7439 return NULL;
7440 *(pool->ptr)++ = 0;
7441 return pool->start;
7442 }
7443
7444 static size_t
poolBytesToAllocateFor(int blockSize)7445 poolBytesToAllocateFor(int blockSize) {
7446 /* Unprotected math would be:
7447 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7448 **
7449 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7450 ** For a + b * c we check b * c in isolation first, so that addition of a
7451 ** on top has no chance of making us accept a small non-negative number
7452 */
7453 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7454
7455 if (blockSize <= 0)
7456 return 0;
7457
7458 if (blockSize > (int)(INT_MAX / stretch))
7459 return 0;
7460
7461 {
7462 const int stretchedBlockSize = blockSize * (int)stretch;
7463 const int bytesToAllocate
7464 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7465 if (bytesToAllocate < 0)
7466 return 0;
7467
7468 return (size_t)bytesToAllocate;
7469 }
7470 }
7471
7472 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7473 poolGrow(STRING_POOL *pool) {
7474 if (pool->freeBlocks) {
7475 if (pool->start == 0) {
7476 pool->blocks = pool->freeBlocks;
7477 pool->freeBlocks = pool->freeBlocks->next;
7478 pool->blocks->next = NULL;
7479 pool->start = pool->blocks->s;
7480 pool->end = pool->start + pool->blocks->size;
7481 pool->ptr = pool->start;
7482 return XML_TRUE;
7483 }
7484 if (pool->end - pool->start < pool->freeBlocks->size) {
7485 BLOCK *tem = pool->freeBlocks->next;
7486 pool->freeBlocks->next = pool->blocks;
7487 pool->blocks = pool->freeBlocks;
7488 pool->freeBlocks = tem;
7489 memcpy(pool->blocks->s, pool->start,
7490 (pool->end - pool->start) * sizeof(XML_Char));
7491 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7492 pool->start = pool->blocks->s;
7493 pool->end = pool->start + pool->blocks->size;
7494 return XML_TRUE;
7495 }
7496 }
7497 if (pool->blocks && pool->start == pool->blocks->s) {
7498 BLOCK *temp;
7499 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7500 size_t bytesToAllocate;
7501
7502 /* NOTE: Needs to be calculated prior to calling `realloc`
7503 to avoid dangling pointers: */
7504 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7505
7506 if (blockSize < 0) {
7507 /* This condition traps a situation where either more than
7508 * INT_MAX/2 bytes have already been allocated. This isn't
7509 * readily testable, since it is unlikely that an average
7510 * machine will have that much memory, so we exclude it from the
7511 * coverage statistics.
7512 */
7513 return XML_FALSE; /* LCOV_EXCL_LINE */
7514 }
7515
7516 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7517 if (bytesToAllocate == 0)
7518 return XML_FALSE;
7519
7520 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7521 (unsigned)bytesToAllocate);
7522 if (temp == NULL)
7523 return XML_FALSE;
7524 pool->blocks = temp;
7525 pool->blocks->size = blockSize;
7526 pool->ptr = pool->blocks->s + offsetInsideBlock;
7527 pool->start = pool->blocks->s;
7528 pool->end = pool->start + blockSize;
7529 } else {
7530 BLOCK *tem;
7531 int blockSize = (int)(pool->end - pool->start);
7532 size_t bytesToAllocate;
7533
7534 if (blockSize < 0) {
7535 /* This condition traps a situation where either more than
7536 * INT_MAX bytes have already been allocated (which is prevented
7537 * by various pieces of program logic, not least this one, never
7538 * mind the unlikelihood of actually having that much memory) or
7539 * the pool control fields have been corrupted (which could
7540 * conceivably happen in an extremely buggy user handler
7541 * function). Either way it isn't readily testable, so we
7542 * exclude it from the coverage statistics.
7543 */
7544 return XML_FALSE; /* LCOV_EXCL_LINE */
7545 }
7546
7547 if (blockSize < INIT_BLOCK_SIZE)
7548 blockSize = INIT_BLOCK_SIZE;
7549 else {
7550 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7551 if ((int)((unsigned)blockSize * 2U) < 0) {
7552 return XML_FALSE;
7553 }
7554 blockSize *= 2;
7555 }
7556
7557 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7558 if (bytesToAllocate == 0)
7559 return XML_FALSE;
7560
7561 tem = pool->mem->malloc_fcn(bytesToAllocate);
7562 if (! tem)
7563 return XML_FALSE;
7564 tem->size = blockSize;
7565 tem->next = pool->blocks;
7566 pool->blocks = tem;
7567 if (pool->ptr != pool->start)
7568 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7569 pool->ptr = tem->s + (pool->ptr - pool->start);
7570 pool->start = tem->s;
7571 pool->end = tem->s + blockSize;
7572 }
7573 return XML_TRUE;
7574 }
7575
7576 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7577 nextScaffoldPart(XML_Parser parser) {
7578 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7579 CONTENT_SCAFFOLD *me;
7580 int next;
7581
7582 if (! dtd->scaffIndex) {
7583 /* Detect and prevent integer overflow.
7584 * The preprocessor guard addresses the "always false" warning
7585 * from -Wtype-limits on platforms where
7586 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7587 #if UINT_MAX >= SIZE_MAX
7588 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
7589 return -1;
7590 }
7591 #endif
7592 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7593 if (! dtd->scaffIndex)
7594 return -1;
7595 dtd->scaffIndex[0] = 0;
7596 }
7597
7598 if (dtd->scaffCount >= dtd->scaffSize) {
7599 CONTENT_SCAFFOLD *temp;
7600 if (dtd->scaffold) {
7601 /* Detect and prevent integer overflow */
7602 if (dtd->scaffSize > UINT_MAX / 2u) {
7603 return -1;
7604 }
7605 /* Detect and prevent integer overflow.
7606 * The preprocessor guard addresses the "always false" warning
7607 * from -Wtype-limits on platforms where
7608 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7609 #if UINT_MAX >= SIZE_MAX
7610 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7611 return -1;
7612 }
7613 #endif
7614
7615 temp = (CONTENT_SCAFFOLD *)REALLOC(
7616 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7617 if (temp == NULL)
7618 return -1;
7619 dtd->scaffSize *= 2;
7620 } else {
7621 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7622 * sizeof(CONTENT_SCAFFOLD));
7623 if (temp == NULL)
7624 return -1;
7625 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7626 }
7627 dtd->scaffold = temp;
7628 }
7629 next = dtd->scaffCount++;
7630 me = &dtd->scaffold[next];
7631 if (dtd->scaffLevel) {
7632 CONTENT_SCAFFOLD *parent
7633 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7634 if (parent->lastchild) {
7635 dtd->scaffold[parent->lastchild].nextsib = next;
7636 }
7637 if (! parent->childcnt)
7638 parent->firstchild = next;
7639 parent->lastchild = next;
7640 parent->childcnt++;
7641 }
7642 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7643 return next;
7644 }
7645
7646 static XML_Content *
build_model(XML_Parser parser)7647 build_model(XML_Parser parser) {
7648 /* Function build_model transforms the existing parser->m_dtd->scaffold
7649 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7650 * XML_Content tree nodes followed by a gapless list of zero-terminated
7651 * strings. */
7652 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7653 XML_Content *ret;
7654 XML_Char *str; /* the current string writing location */
7655
7656 /* Detect and prevent integer overflow.
7657 * The preprocessor guard addresses the "always false" warning
7658 * from -Wtype-limits on platforms where
7659 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7660 #if UINT_MAX >= SIZE_MAX
7661 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7662 return NULL;
7663 }
7664 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7665 return NULL;
7666 }
7667 #endif
7668 if (dtd->scaffCount * sizeof(XML_Content)
7669 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7670 return NULL;
7671 }
7672
7673 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7674 + (dtd->contentStringLen * sizeof(XML_Char)));
7675
7676 ret = (XML_Content *)MALLOC(parser, allocsize);
7677 if (! ret)
7678 return NULL;
7679
7680 /* What follows is an iterative implementation (of what was previously done
7681 * recursively in a dedicated function called "build_node". The old recursive
7682 * build_node could be forced into stack exhaustion from input as small as a
7683 * few megabyte, and so that was a security issue. Hence, a function call
7684 * stack is avoided now by resolving recursion.)
7685 *
7686 * The iterative approach works as follows:
7687 *
7688 * - We have two writing pointers, both walking up the result array; one does
7689 * the work, the other creates "jobs" for its colleague to do, and leads
7690 * the way:
7691 *
7692 * - The faster one, pointer jobDest, always leads and writes "what job
7693 * to do" by the other, once they reach that place in the
7694 * array: leader "jobDest" stores the source node array index (relative
7695 * to array dtd->scaffold) in field "numchildren".
7696 *
7697 * - The slower one, pointer dest, looks at the value stored in the
7698 * "numchildren" field (which actually holds a source node array index
7699 * at that time) and puts the real data from dtd->scaffold in.
7700 *
7701 * - Before the loop starts, jobDest writes source array index 0
7702 * (where the root node is located) so that dest will have something to do
7703 * when it starts operation.
7704 *
7705 * - Whenever nodes with children are encountered, jobDest appends
7706 * them as new jobs, in order. As a result, tree node siblings are
7707 * adjacent in the resulting array, for example:
7708 *
7709 * [0] root, has two children
7710 * [1] first child of 0, has three children
7711 * [3] first child of 1, does not have children
7712 * [4] second child of 1, does not have children
7713 * [5] third child of 1, does not have children
7714 * [2] second child of 0, does not have children
7715 *
7716 * Or (the same data) presented in flat array view:
7717 *
7718 * [0] root, has two children
7719 *
7720 * [1] first child of 0, has three children
7721 * [2] second child of 0, does not have children
7722 *
7723 * [3] first child of 1, does not have children
7724 * [4] second child of 1, does not have children
7725 * [5] third child of 1, does not have children
7726 *
7727 * - The algorithm repeats until all target array indices have been processed.
7728 */
7729 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7730 XML_Content *const destLimit = &ret[dtd->scaffCount];
7731 XML_Content *jobDest = ret; /* next free writing location in target array */
7732 str = (XML_Char *)&ret[dtd->scaffCount];
7733
7734 /* Add the starting job, the root node (index 0) of the source tree */
7735 (jobDest++)->numchildren = 0;
7736
7737 for (; dest < destLimit; dest++) {
7738 /* Retrieve source tree array index from job storage */
7739 const int src_node = (int)dest->numchildren;
7740
7741 /* Convert item */
7742 dest->type = dtd->scaffold[src_node].type;
7743 dest->quant = dtd->scaffold[src_node].quant;
7744 if (dest->type == XML_CTYPE_NAME) {
7745 const XML_Char *src;
7746 dest->name = str;
7747 src = dtd->scaffold[src_node].name;
7748 for (;;) {
7749 *str++ = *src;
7750 if (! *src)
7751 break;
7752 src++;
7753 }
7754 dest->numchildren = 0;
7755 dest->children = NULL;
7756 } else {
7757 unsigned int i;
7758 int cn;
7759 dest->name = NULL;
7760 dest->numchildren = dtd->scaffold[src_node].childcnt;
7761 dest->children = jobDest;
7762
7763 /* Append scaffold indices of children to array */
7764 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7765 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7766 (jobDest++)->numchildren = (unsigned int)cn;
7767 }
7768 }
7769
7770 return ret;
7771 }
7772
7773 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7774 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7775 const char *end) {
7776 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7777 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7778 ELEMENT_TYPE *ret;
7779
7780 if (! name)
7781 return NULL;
7782 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7783 sizeof(ELEMENT_TYPE));
7784 if (! ret)
7785 return NULL;
7786 if (ret->name != name)
7787 poolDiscard(&dtd->pool);
7788 else {
7789 poolFinish(&dtd->pool);
7790 if (! setElementTypePrefix(parser, ret))
7791 return NULL;
7792 }
7793 return ret;
7794 }
7795
7796 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7797 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7798 size_t charsRequired = 0;
7799 XML_Char *result;
7800
7801 /* First determine how long the string is */
7802 while (s[charsRequired] != 0) {
7803 charsRequired++;
7804 }
7805 /* Include the terminator */
7806 charsRequired++;
7807
7808 /* Now allocate space for the copy */
7809 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7810 if (result == NULL)
7811 return NULL;
7812 /* Copy the original into place */
7813 memcpy(result, s, charsRequired * sizeof(XML_Char));
7814 return result;
7815 }
7816
7817 #if XML_GE == 1
7818
7819 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7820 accountingGetCurrentAmplification(XML_Parser rootParser) {
7821 // 1.........1.........12 => 22
7822 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
7823 const XmlBigCount countBytesOutput
7824 = rootParser->m_accounting.countBytesDirect
7825 + rootParser->m_accounting.countBytesIndirect;
7826 const float amplificationFactor
7827 = rootParser->m_accounting.countBytesDirect
7828 ? (countBytesOutput
7829 / (float)(rootParser->m_accounting.countBytesDirect))
7830 : ((lenOfShortestInclude
7831 + rootParser->m_accounting.countBytesIndirect)
7832 / (float)lenOfShortestInclude);
7833 assert(! rootParser->m_parentParser);
7834 return amplificationFactor;
7835 }
7836
7837 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7838 accountingReportStats(XML_Parser originParser, const char *epilog) {
7839 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7840 assert(! rootParser->m_parentParser);
7841
7842 if (rootParser->m_accounting.debugLevel == 0u) {
7843 return;
7844 }
7845
7846 const float amplificationFactor
7847 = accountingGetCurrentAmplification(rootParser);
7848 fprintf(stderr,
7849 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7850 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7851 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7852 rootParser->m_accounting.countBytesIndirect,
7853 (double)amplificationFactor, epilog);
7854 }
7855
7856 static void
accountingOnAbort(XML_Parser originParser)7857 accountingOnAbort(XML_Parser originParser) {
7858 accountingReportStats(originParser, " ABORTING\n");
7859 }
7860
7861 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7862 accountingReportDiff(XML_Parser rootParser,
7863 unsigned int levelsAwayFromRootParser, const char *before,
7864 const char *after, ptrdiff_t bytesMore, int source_line,
7865 enum XML_Account account) {
7866 assert(! rootParser->m_parentParser);
7867
7868 fprintf(stderr,
7869 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
7870 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7871 levelsAwayFromRootParser, source_line, 10, "");
7872
7873 const char ellipis[] = "[..]";
7874 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7875 const unsigned int contextLength = 10;
7876
7877 /* Note: Performance is of no concern here */
7878 const char *walker = before;
7879 if ((rootParser->m_accounting.debugLevel >= 3u)
7880 || (after - before)
7881 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7882 for (; walker < after; walker++) {
7883 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7884 }
7885 } else {
7886 for (; walker < before + contextLength; walker++) {
7887 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7888 }
7889 fprintf(stderr, ellipis);
7890 walker = after - contextLength;
7891 for (; walker < after; walker++) {
7892 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7893 }
7894 }
7895 fprintf(stderr, "\"\n");
7896 }
7897
7898 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7899 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7900 const char *after, int source_line,
7901 enum XML_Account account) {
7902 /* Note: We need to check the token type *first* to be sure that
7903 * we can even access variable <after>, safely.
7904 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7905 switch (tok) {
7906 case XML_TOK_INVALID:
7907 case XML_TOK_PARTIAL:
7908 case XML_TOK_PARTIAL_CHAR:
7909 case XML_TOK_NONE:
7910 return XML_TRUE;
7911 }
7912
7913 if (account == XML_ACCOUNT_NONE)
7914 return XML_TRUE; /* because these bytes have been accounted for, already */
7915
7916 unsigned int levelsAwayFromRootParser;
7917 const XML_Parser rootParser
7918 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7919 assert(! rootParser->m_parentParser);
7920
7921 const int isDirect
7922 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7923 const ptrdiff_t bytesMore = after - before;
7924
7925 XmlBigCount *const additionTarget
7926 = isDirect ? &rootParser->m_accounting.countBytesDirect
7927 : &rootParser->m_accounting.countBytesIndirect;
7928
7929 /* Detect and avoid integer overflow */
7930 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7931 return XML_FALSE;
7932 *additionTarget += bytesMore;
7933
7934 const XmlBigCount countBytesOutput
7935 = rootParser->m_accounting.countBytesDirect
7936 + rootParser->m_accounting.countBytesIndirect;
7937 const float amplificationFactor
7938 = accountingGetCurrentAmplification(rootParser);
7939 const XML_Bool tolerated
7940 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7941 || (amplificationFactor
7942 <= rootParser->m_accounting.maximumAmplificationFactor);
7943
7944 if (rootParser->m_accounting.debugLevel >= 2u) {
7945 accountingReportStats(rootParser, "");
7946 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7947 bytesMore, source_line, account);
7948 }
7949
7950 return tolerated;
7951 }
7952
7953 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7954 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7955 if (! parser)
7956 return 0;
7957 return parser->m_accounting.countBytesDirect;
7958 }
7959
7960 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7961 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7962 if (! parser)
7963 return 0;
7964 return parser->m_accounting.countBytesIndirect;
7965 }
7966
7967 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7968 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7969 const char *action, int sourceLine) {
7970 assert(! rootParser->m_parentParser);
7971 if (rootParser->m_entity_stats.debugLevel == 0u)
7972 return;
7973
7974 # if defined(XML_UNICODE)
7975 const char *const entityName = "[..]";
7976 # else
7977 const char *const entityName = entity->name;
7978 # endif
7979
7980 fprintf(
7981 stderr,
7982 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7983 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7984 rootParser->m_entity_stats.currentDepth,
7985 rootParser->m_entity_stats.maximumDepthSeen,
7986 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7987 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7988 sourceLine);
7989 }
7990
7991 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7992 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7993 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7994 assert(! rootParser->m_parentParser);
7995
7996 rootParser->m_entity_stats.countEverOpened++;
7997 rootParser->m_entity_stats.currentDepth++;
7998 if (rootParser->m_entity_stats.currentDepth
7999 > rootParser->m_entity_stats.maximumDepthSeen) {
8000 rootParser->m_entity_stats.maximumDepthSeen++;
8001 }
8002
8003 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8004 }
8005
8006 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)8007 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8008 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8009 assert(! rootParser->m_parentParser);
8010
8011 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8012 rootParser->m_entity_stats.currentDepth--;
8013 }
8014
8015 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)8016 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8017 XML_Parser rootParser = parser;
8018 unsigned int stepsTakenUpwards = 0;
8019 while (rootParser->m_parentParser) {
8020 rootParser = rootParser->m_parentParser;
8021 stepsTakenUpwards++;
8022 }
8023 assert(! rootParser->m_parentParser);
8024 if (outLevelDiff != NULL) {
8025 *outLevelDiff = stepsTakenUpwards;
8026 }
8027 return rootParser;
8028 }
8029
8030 const char *
unsignedCharToPrintable(unsigned char c)8031 unsignedCharToPrintable(unsigned char c) {
8032 switch (c) {
8033 case 0:
8034 return "\\0";
8035 case 1:
8036 return "\\x1";
8037 case 2:
8038 return "\\x2";
8039 case 3:
8040 return "\\x3";
8041 case 4:
8042 return "\\x4";
8043 case 5:
8044 return "\\x5";
8045 case 6:
8046 return "\\x6";
8047 case 7:
8048 return "\\x7";
8049 case 8:
8050 return "\\x8";
8051 case 9:
8052 return "\\t";
8053 case 10:
8054 return "\\n";
8055 case 11:
8056 return "\\xB";
8057 case 12:
8058 return "\\xC";
8059 case 13:
8060 return "\\r";
8061 case 14:
8062 return "\\xE";
8063 case 15:
8064 return "\\xF";
8065 case 16:
8066 return "\\x10";
8067 case 17:
8068 return "\\x11";
8069 case 18:
8070 return "\\x12";
8071 case 19:
8072 return "\\x13";
8073 case 20:
8074 return "\\x14";
8075 case 21:
8076 return "\\x15";
8077 case 22:
8078 return "\\x16";
8079 case 23:
8080 return "\\x17";
8081 case 24:
8082 return "\\x18";
8083 case 25:
8084 return "\\x19";
8085 case 26:
8086 return "\\x1A";
8087 case 27:
8088 return "\\x1B";
8089 case 28:
8090 return "\\x1C";
8091 case 29:
8092 return "\\x1D";
8093 case 30:
8094 return "\\x1E";
8095 case 31:
8096 return "\\x1F";
8097 case 32:
8098 return " ";
8099 case 33:
8100 return "!";
8101 case 34:
8102 return "\\\"";
8103 case 35:
8104 return "#";
8105 case 36:
8106 return "$";
8107 case 37:
8108 return "%";
8109 case 38:
8110 return "&";
8111 case 39:
8112 return "'";
8113 case 40:
8114 return "(";
8115 case 41:
8116 return ")";
8117 case 42:
8118 return "*";
8119 case 43:
8120 return "+";
8121 case 44:
8122 return ",";
8123 case 45:
8124 return "-";
8125 case 46:
8126 return ".";
8127 case 47:
8128 return "/";
8129 case 48:
8130 return "0";
8131 case 49:
8132 return "1";
8133 case 50:
8134 return "2";
8135 case 51:
8136 return "3";
8137 case 52:
8138 return "4";
8139 case 53:
8140 return "5";
8141 case 54:
8142 return "6";
8143 case 55:
8144 return "7";
8145 case 56:
8146 return "8";
8147 case 57:
8148 return "9";
8149 case 58:
8150 return ":";
8151 case 59:
8152 return ";";
8153 case 60:
8154 return "<";
8155 case 61:
8156 return "=";
8157 case 62:
8158 return ">";
8159 case 63:
8160 return "?";
8161 case 64:
8162 return "@";
8163 case 65:
8164 return "A";
8165 case 66:
8166 return "B";
8167 case 67:
8168 return "C";
8169 case 68:
8170 return "D";
8171 case 69:
8172 return "E";
8173 case 70:
8174 return "F";
8175 case 71:
8176 return "G";
8177 case 72:
8178 return "H";
8179 case 73:
8180 return "I";
8181 case 74:
8182 return "J";
8183 case 75:
8184 return "K";
8185 case 76:
8186 return "L";
8187 case 77:
8188 return "M";
8189 case 78:
8190 return "N";
8191 case 79:
8192 return "O";
8193 case 80:
8194 return "P";
8195 case 81:
8196 return "Q";
8197 case 82:
8198 return "R";
8199 case 83:
8200 return "S";
8201 case 84:
8202 return "T";
8203 case 85:
8204 return "U";
8205 case 86:
8206 return "V";
8207 case 87:
8208 return "W";
8209 case 88:
8210 return "X";
8211 case 89:
8212 return "Y";
8213 case 90:
8214 return "Z";
8215 case 91:
8216 return "[";
8217 case 92:
8218 return "\\\\";
8219 case 93:
8220 return "]";
8221 case 94:
8222 return "^";
8223 case 95:
8224 return "_";
8225 case 96:
8226 return "`";
8227 case 97:
8228 return "a";
8229 case 98:
8230 return "b";
8231 case 99:
8232 return "c";
8233 case 100:
8234 return "d";
8235 case 101:
8236 return "e";
8237 case 102:
8238 return "f";
8239 case 103:
8240 return "g";
8241 case 104:
8242 return "h";
8243 case 105:
8244 return "i";
8245 case 106:
8246 return "j";
8247 case 107:
8248 return "k";
8249 case 108:
8250 return "l";
8251 case 109:
8252 return "m";
8253 case 110:
8254 return "n";
8255 case 111:
8256 return "o";
8257 case 112:
8258 return "p";
8259 case 113:
8260 return "q";
8261 case 114:
8262 return "r";
8263 case 115:
8264 return "s";
8265 case 116:
8266 return "t";
8267 case 117:
8268 return "u";
8269 case 118:
8270 return "v";
8271 case 119:
8272 return "w";
8273 case 120:
8274 return "x";
8275 case 121:
8276 return "y";
8277 case 122:
8278 return "z";
8279 case 123:
8280 return "{";
8281 case 124:
8282 return "|";
8283 case 125:
8284 return "}";
8285 case 126:
8286 return "~";
8287 case 127:
8288 return "\\x7F";
8289 case 128:
8290 return "\\x80";
8291 case 129:
8292 return "\\x81";
8293 case 130:
8294 return "\\x82";
8295 case 131:
8296 return "\\x83";
8297 case 132:
8298 return "\\x84";
8299 case 133:
8300 return "\\x85";
8301 case 134:
8302 return "\\x86";
8303 case 135:
8304 return "\\x87";
8305 case 136:
8306 return "\\x88";
8307 case 137:
8308 return "\\x89";
8309 case 138:
8310 return "\\x8A";
8311 case 139:
8312 return "\\x8B";
8313 case 140:
8314 return "\\x8C";
8315 case 141:
8316 return "\\x8D";
8317 case 142:
8318 return "\\x8E";
8319 case 143:
8320 return "\\x8F";
8321 case 144:
8322 return "\\x90";
8323 case 145:
8324 return "\\x91";
8325 case 146:
8326 return "\\x92";
8327 case 147:
8328 return "\\x93";
8329 case 148:
8330 return "\\x94";
8331 case 149:
8332 return "\\x95";
8333 case 150:
8334 return "\\x96";
8335 case 151:
8336 return "\\x97";
8337 case 152:
8338 return "\\x98";
8339 case 153:
8340 return "\\x99";
8341 case 154:
8342 return "\\x9A";
8343 case 155:
8344 return "\\x9B";
8345 case 156:
8346 return "\\x9C";
8347 case 157:
8348 return "\\x9D";
8349 case 158:
8350 return "\\x9E";
8351 case 159:
8352 return "\\x9F";
8353 case 160:
8354 return "\\xA0";
8355 case 161:
8356 return "\\xA1";
8357 case 162:
8358 return "\\xA2";
8359 case 163:
8360 return "\\xA3";
8361 case 164:
8362 return "\\xA4";
8363 case 165:
8364 return "\\xA5";
8365 case 166:
8366 return "\\xA6";
8367 case 167:
8368 return "\\xA7";
8369 case 168:
8370 return "\\xA8";
8371 case 169:
8372 return "\\xA9";
8373 case 170:
8374 return "\\xAA";
8375 case 171:
8376 return "\\xAB";
8377 case 172:
8378 return "\\xAC";
8379 case 173:
8380 return "\\xAD";
8381 case 174:
8382 return "\\xAE";
8383 case 175:
8384 return "\\xAF";
8385 case 176:
8386 return "\\xB0";
8387 case 177:
8388 return "\\xB1";
8389 case 178:
8390 return "\\xB2";
8391 case 179:
8392 return "\\xB3";
8393 case 180:
8394 return "\\xB4";
8395 case 181:
8396 return "\\xB5";
8397 case 182:
8398 return "\\xB6";
8399 case 183:
8400 return "\\xB7";
8401 case 184:
8402 return "\\xB8";
8403 case 185:
8404 return "\\xB9";
8405 case 186:
8406 return "\\xBA";
8407 case 187:
8408 return "\\xBB";
8409 case 188:
8410 return "\\xBC";
8411 case 189:
8412 return "\\xBD";
8413 case 190:
8414 return "\\xBE";
8415 case 191:
8416 return "\\xBF";
8417 case 192:
8418 return "\\xC0";
8419 case 193:
8420 return "\\xC1";
8421 case 194:
8422 return "\\xC2";
8423 case 195:
8424 return "\\xC3";
8425 case 196:
8426 return "\\xC4";
8427 case 197:
8428 return "\\xC5";
8429 case 198:
8430 return "\\xC6";
8431 case 199:
8432 return "\\xC7";
8433 case 200:
8434 return "\\xC8";
8435 case 201:
8436 return "\\xC9";
8437 case 202:
8438 return "\\xCA";
8439 case 203:
8440 return "\\xCB";
8441 case 204:
8442 return "\\xCC";
8443 case 205:
8444 return "\\xCD";
8445 case 206:
8446 return "\\xCE";
8447 case 207:
8448 return "\\xCF";
8449 case 208:
8450 return "\\xD0";
8451 case 209:
8452 return "\\xD1";
8453 case 210:
8454 return "\\xD2";
8455 case 211:
8456 return "\\xD3";
8457 case 212:
8458 return "\\xD4";
8459 case 213:
8460 return "\\xD5";
8461 case 214:
8462 return "\\xD6";
8463 case 215:
8464 return "\\xD7";
8465 case 216:
8466 return "\\xD8";
8467 case 217:
8468 return "\\xD9";
8469 case 218:
8470 return "\\xDA";
8471 case 219:
8472 return "\\xDB";
8473 case 220:
8474 return "\\xDC";
8475 case 221:
8476 return "\\xDD";
8477 case 222:
8478 return "\\xDE";
8479 case 223:
8480 return "\\xDF";
8481 case 224:
8482 return "\\xE0";
8483 case 225:
8484 return "\\xE1";
8485 case 226:
8486 return "\\xE2";
8487 case 227:
8488 return "\\xE3";
8489 case 228:
8490 return "\\xE4";
8491 case 229:
8492 return "\\xE5";
8493 case 230:
8494 return "\\xE6";
8495 case 231:
8496 return "\\xE7";
8497 case 232:
8498 return "\\xE8";
8499 case 233:
8500 return "\\xE9";
8501 case 234:
8502 return "\\xEA";
8503 case 235:
8504 return "\\xEB";
8505 case 236:
8506 return "\\xEC";
8507 case 237:
8508 return "\\xED";
8509 case 238:
8510 return "\\xEE";
8511 case 239:
8512 return "\\xEF";
8513 case 240:
8514 return "\\xF0";
8515 case 241:
8516 return "\\xF1";
8517 case 242:
8518 return "\\xF2";
8519 case 243:
8520 return "\\xF3";
8521 case 244:
8522 return "\\xF4";
8523 case 245:
8524 return "\\xF5";
8525 case 246:
8526 return "\\xF6";
8527 case 247:
8528 return "\\xF7";
8529 case 248:
8530 return "\\xF8";
8531 case 249:
8532 return "\\xF9";
8533 case 250:
8534 return "\\xFA";
8535 case 251:
8536 return "\\xFB";
8537 case 252:
8538 return "\\xFC";
8539 case 253:
8540 return "\\xFD";
8541 case 254:
8542 return "\\xFE";
8543 case 255:
8544 return "\\xFF";
8545 default:
8546 assert(0); /* never gets here */
8547 return "dead code";
8548 }
8549 assert(0); /* never gets here */
8550 }
8551
8552 #endif /* XML_GE == 1 */
8553
8554 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8555 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8556 const char *const valueOrNull = getenv(variableName);
8557 if (valueOrNull == NULL) {
8558 return defaultDebugLevel;
8559 }
8560 const char *const value = valueOrNull;
8561
8562 errno = 0;
8563 char *afterValue = NULL;
8564 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8565 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8566 errno = 0;
8567 return defaultDebugLevel;
8568 }
8569
8570 return debugLevel;
8571 }
8572