1 /* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Licensed under the MIT license:
43
44 Permission is hereby granted, free of charge, to any person obtaining
45 a copy of this software and associated documentation files (the
46 "Software"), to deal in the Software without restriction, including
47 without limitation the rights to use, copy, modify, merge, publish,
48 distribute, sublicense, and/or sell copies of the Software, and to permit
49 persons to whom the Software is furnished to do so, subject to the
50 following conditions:
51
52 The above copyright notice and this permission notice shall be included
53 in all copies or substantial portions of the Software.
54
55 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
56 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
57 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
58 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
59 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
60 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
61 USE OR OTHER DEALINGS IN THE SOFTWARE.
62 */
63
64 #define XML_BUILDING_EXPAT 1
65
66 #include "expat_config.h"
67
68 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
69 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
70 #endif
71
72 #if defined(XML_DTD) && XML_GE == 0
73 # error Either undefine XML_DTD or define XML_GE to 1.
74 #endif
75
76 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
77 || (XML_CONTEXT_BYTES + 0 < 0)
78 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
79 #endif
80
81 #if defined(HAVE_SYSCALL_GETRANDOM)
82 # if ! defined(_GNU_SOURCE)
83 # define _GNU_SOURCE 1 /* syscall prototype */
84 # endif
85 #endif
86
87 #ifdef _WIN32
88 /* force stdlib to define rand_s() */
89 # if ! defined(_CRT_RAND_S)
90 # define _CRT_RAND_S
91 # endif
92 #endif
93
94 #include <stdbool.h>
95 #include <stddef.h>
96 #include <string.h> /* memset(), memcpy() */
97 #include <assert.h>
98 #include <limits.h> /* UINT_MAX */
99 #include <stdio.h> /* fprintf */
100 #include <stdlib.h> /* getenv, rand_s */
101 #include <stdint.h> /* uintptr_t */
102 #include <math.h> /* isnan */
103
104 #ifdef _WIN32
105 # define getpid GetCurrentProcessId
106 #else
107 # include <sys/time.h> /* gettimeofday() */
108 # include <sys/types.h> /* getpid() */
109 # include <unistd.h> /* getpid() */
110 # include <fcntl.h> /* O_RDONLY */
111 # include <errno.h>
112 #endif
113
114 #ifdef _WIN32
115 # include "winconfig.h"
116 #endif
117
118 #include "ascii.h"
119 #include "expat.h"
120 #include "siphash.h"
121
122 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
123 # if defined(HAVE_GETRANDOM)
124 # include <sys/random.h> /* getrandom */
125 # else
126 # include <unistd.h> /* syscall */
127 # include <sys/syscall.h> /* SYS_getrandom */
128 # endif
129 # if ! defined(GRND_NONBLOCK)
130 # define GRND_NONBLOCK 0x0001
131 # endif /* defined(GRND_NONBLOCK) */
132 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
133
134 #if defined(HAVE_LIBBSD) \
135 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
136 # include <bsd/stdlib.h>
137 #endif
138
139 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
140 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
141 #endif
142
143 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
144 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
145 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
146 && ! defined(XML_POOR_ENTROPY)
147 # error You do not have support for any sources of high quality entropy \
148 enabled. For end user security, that is probably not what you want. \
149 \
150 Your options include: \
151 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
152 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
153 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
154 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
155 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
156 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
157 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
158 * Windows >=Vista (rand_s): _WIN32. \
159 \
160 If insist on not using any of these, bypass this error by defining \
161 XML_POOR_ENTROPY; you have been warned. \
162 \
163 If you have reasons to patch this detection code away or need changes \
164 to the build system, please open a bug. Thank you!
165 #endif
166
167 #ifdef XML_UNICODE
168 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
169 # define XmlConvert XmlUtf16Convert
170 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
171 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
172 # define XmlEncode XmlUtf16Encode
173 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
174 typedef unsigned short ICHAR;
175 #else
176 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
177 # define XmlConvert XmlUtf8Convert
178 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
179 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
180 # define XmlEncode XmlUtf8Encode
181 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
182 typedef char ICHAR;
183 #endif
184
185 #ifndef XML_NS
186
187 # define XmlInitEncodingNS XmlInitEncoding
188 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
189 # undef XmlGetInternalEncodingNS
190 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
191 # define XmlParseXmlDeclNS XmlParseXmlDecl
192
193 #endif
194
195 #ifdef XML_UNICODE
196
197 # ifdef XML_UNICODE_WCHAR_T
198 # define XML_T(x) (const wchar_t) x
199 # define XML_L(x) L##x
200 # else
201 # define XML_T(x) (const unsigned short)x
202 # define XML_L(x) x
203 # endif
204
205 #else
206
207 # define XML_T(x) x
208 # define XML_L(x) x
209
210 #endif
211
212 /* Round up n to be a multiple of sz, where sz is a power of 2. */
213 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
214
215 /* Do safe (NULL-aware) pointer arithmetic */
216 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
217
218 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
219
220 #include "internal.h"
221 #include "xmltok.h"
222 #include "xmlrole.h"
223
224 typedef const XML_Char *KEY;
225
226 typedef struct {
227 KEY name;
228 } NAMED;
229
230 typedef struct {
231 NAMED **v;
232 unsigned char power;
233 size_t size;
234 size_t used;
235 const XML_Memory_Handling_Suite *mem;
236 } HASH_TABLE;
237
238 static size_t keylen(KEY s);
239
240 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
241
242 /* For probing (after a collision) we need a step size relative prime
243 to the hash table size, which is a power of 2. We use double-hashing,
244 since we can calculate a second hash value cheaply by taking those bits
245 of the first hash value that were discarded (masked out) when the table
246 index was calculated: index = hash & mask, where mask = table->size - 1.
247 We limit the maximum step size to table->size / 4 (mask >> 2) and make
248 it odd, since odd numbers are always relative prime to a power of 2.
249 */
250 #define SECOND_HASH(hash, mask, power) \
251 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
252 #define PROBE_STEP(hash, mask, power) \
253 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
254
255 typedef struct {
256 NAMED **p;
257 NAMED **end;
258 } HASH_TABLE_ITER;
259
260 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
261 #define INIT_DATA_BUF_SIZE 1024
262 #define INIT_ATTS_SIZE 16
263 #define INIT_ATTS_VERSION 0xFFFFFFFF
264 #define INIT_BLOCK_SIZE 1024
265 #define INIT_BUFFER_SIZE 1024
266
267 #define EXPAND_SPARE 24
268
269 typedef struct binding {
270 struct prefix *prefix;
271 struct binding *nextTagBinding;
272 struct binding *prevPrefixBinding;
273 const struct attribute_id *attId;
274 XML_Char *uri;
275 int uriLen;
276 int uriAlloc;
277 } BINDING;
278
279 typedef struct prefix {
280 const XML_Char *name;
281 BINDING *binding;
282 } PREFIX;
283
284 typedef struct {
285 const XML_Char *str;
286 const XML_Char *localPart;
287 const XML_Char *prefix;
288 int strLen;
289 int uriLen;
290 int prefixLen;
291 } TAG_NAME;
292
293 /* TAG represents an open element.
294 The name of the element is stored in both the document and API
295 encodings. The memory buffer 'buf' is a separately-allocated
296 memory area which stores the name. During the XML_Parse()/
297 XMLParseBuffer() when the element is open, the memory for the 'raw'
298 version of the name (in the document encoding) is shared with the
299 document buffer. If the element is open across calls to
300 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
301 contain the 'raw' name as well.
302
303 A parser reuses these structures, maintaining a list of allocated
304 TAG objects in a free list.
305 */
306 typedef struct tag {
307 struct tag *parent; /* parent of this element */
308 const char *rawName; /* tagName in the original encoding */
309 int rawNameLength;
310 TAG_NAME name; /* tagName in the API encoding */
311 char *buf; /* buffer for name components */
312 char *bufEnd; /* end of the buffer */
313 BINDING *bindings;
314 } TAG;
315
316 typedef struct {
317 const XML_Char *name;
318 const XML_Char *textPtr;
319 int textLen; /* length in XML_Chars */
320 int processed; /* # of processed bytes - when suspended */
321 const XML_Char *systemId;
322 const XML_Char *base;
323 const XML_Char *publicId;
324 const XML_Char *notation;
325 XML_Bool open;
326 XML_Bool is_param;
327 XML_Bool is_internal; /* true if declared in internal subset outside PE */
328 } ENTITY;
329
330 typedef struct {
331 enum XML_Content_Type type;
332 enum XML_Content_Quant quant;
333 const XML_Char *name;
334 int firstchild;
335 int lastchild;
336 int childcnt;
337 int nextsib;
338 } CONTENT_SCAFFOLD;
339
340 #define INIT_SCAFFOLD_ELEMENTS 32
341
342 typedef struct block {
343 struct block *next;
344 int size;
345 XML_Char s[1];
346 } BLOCK;
347
348 typedef struct {
349 BLOCK *blocks;
350 BLOCK *freeBlocks;
351 const XML_Char *end;
352 XML_Char *ptr;
353 XML_Char *start;
354 const XML_Memory_Handling_Suite *mem;
355 } STRING_POOL;
356
357 /* The XML_Char before the name is used to determine whether
358 an attribute has been specified. */
359 typedef struct attribute_id {
360 XML_Char *name;
361 PREFIX *prefix;
362 XML_Bool maybeTokenized;
363 XML_Bool xmlns;
364 } ATTRIBUTE_ID;
365
366 typedef struct {
367 const ATTRIBUTE_ID *id;
368 XML_Bool isCdata;
369 const XML_Char *value;
370 } DEFAULT_ATTRIBUTE;
371
372 typedef struct {
373 unsigned long version;
374 unsigned long hash;
375 const XML_Char *uriName;
376 } NS_ATT;
377
378 typedef struct {
379 const XML_Char *name;
380 PREFIX *prefix;
381 const ATTRIBUTE_ID *idAtt;
382 int nDefaultAtts;
383 int allocDefaultAtts;
384 DEFAULT_ATTRIBUTE *defaultAtts;
385 } ELEMENT_TYPE;
386
387 typedef struct {
388 HASH_TABLE generalEntities;
389 HASH_TABLE elementTypes;
390 HASH_TABLE attributeIds;
391 HASH_TABLE prefixes;
392 STRING_POOL pool;
393 STRING_POOL entityValuePool;
394 /* false once a parameter entity reference has been skipped */
395 XML_Bool keepProcessing;
396 /* true once an internal or external PE reference has been encountered;
397 this includes the reference to an external subset */
398 XML_Bool hasParamEntityRefs;
399 XML_Bool standalone;
400 #ifdef XML_DTD
401 /* indicates if external PE has been read */
402 XML_Bool paramEntityRead;
403 HASH_TABLE paramEntities;
404 #endif /* XML_DTD */
405 PREFIX defaultPrefix;
406 /* === scaffolding for building content model === */
407 XML_Bool in_eldecl;
408 CONTENT_SCAFFOLD *scaffold;
409 unsigned contentStringLen;
410 unsigned scaffSize;
411 unsigned scaffCount;
412 int scaffLevel;
413 int *scaffIndex;
414 } DTD;
415
416 typedef struct open_internal_entity {
417 const char *internalEventPtr;
418 const char *internalEventEndPtr;
419 struct open_internal_entity *next;
420 ENTITY *entity;
421 int startTagLevel;
422 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
423 } OPEN_INTERNAL_ENTITY;
424
425 enum XML_Account {
426 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
427 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
428 expansion */
429 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
430 };
431
432 #if XML_GE == 1
433 typedef unsigned long long XmlBigCount;
434 typedef struct accounting {
435 XmlBigCount countBytesDirect;
436 XmlBigCount countBytesIndirect;
437 unsigned long debugLevel;
438 float maximumAmplificationFactor; // >=1.0
439 unsigned long long activationThresholdBytes;
440 } ACCOUNTING;
441
442 typedef struct entity_stats {
443 unsigned int countEverOpened;
444 unsigned int currentDepth;
445 unsigned int maximumDepthSeen;
446 unsigned long debugLevel;
447 } ENTITY_STATS;
448 #endif /* XML_GE == 1 */
449
450 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
451 const char *end, const char **endPtr);
452
453 static Processor prologProcessor;
454 static Processor prologInitProcessor;
455 static Processor contentProcessor;
456 static Processor cdataSectionProcessor;
457 #ifdef XML_DTD
458 static Processor ignoreSectionProcessor;
459 static Processor externalParEntProcessor;
460 static Processor externalParEntInitProcessor;
461 static Processor entityValueProcessor;
462 static Processor entityValueInitProcessor;
463 #endif /* XML_DTD */
464 static Processor epilogProcessor;
465 static Processor errorProcessor;
466 static Processor externalEntityInitProcessor;
467 static Processor externalEntityInitProcessor2;
468 static Processor externalEntityInitProcessor3;
469 static Processor externalEntityContentProcessor;
470 static Processor internalEntityProcessor;
471
472 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
473 const XML_Char *encodingName);
474 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
475 const char *s, const char *next);
476 static enum XML_Error initializeEncoding(XML_Parser parser);
477 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
478 const char *s, const char *end, int tok,
479 const char *next, const char **nextPtr,
480 XML_Bool haveMore, XML_Bool allowClosingDoctype,
481 enum XML_Account account);
482 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
483 XML_Bool betweenDecl);
484 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
485 const ENCODING *enc, const char *start,
486 const char *end, const char **endPtr,
487 XML_Bool haveMore, enum XML_Account account);
488 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
489 const char **startPtr, const char *end,
490 const char **nextPtr, XML_Bool haveMore,
491 enum XML_Account account);
492 #ifdef XML_DTD
493 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
494 const char **startPtr, const char *end,
495 const char **nextPtr, XML_Bool haveMore);
496 #endif /* XML_DTD */
497
498 static void freeBindings(XML_Parser parser, BINDING *bindings);
499 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
500 const char *attStr, TAG_NAME *tagNamePtr,
501 BINDING **bindingsPtr,
502 enum XML_Account account);
503 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
504 const ATTRIBUTE_ID *attId, const XML_Char *uri,
505 BINDING **bindingsPtr);
506 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
507 XML_Bool isCdata, XML_Bool isId,
508 const XML_Char *value, XML_Parser parser);
509 static enum XML_Error storeAttributeValue(XML_Parser parser,
510 const ENCODING *enc, XML_Bool isCdata,
511 const char *ptr, const char *end,
512 STRING_POOL *pool,
513 enum XML_Account account);
514 static enum XML_Error appendAttributeValue(XML_Parser parser,
515 const ENCODING *enc,
516 XML_Bool isCdata, const char *ptr,
517 const char *end, STRING_POOL *pool,
518 enum XML_Account account);
519 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
520 const char *start, const char *end);
521 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
522 #if XML_GE == 1
523 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
524 const char *start, const char *end,
525 enum XML_Account account);
526 #else
527 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
528 #endif
529 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
530 const char *start, const char *end);
531 static int reportComment(XML_Parser parser, const ENCODING *enc,
532 const char *start, const char *end);
533 static void reportDefault(XML_Parser parser, const ENCODING *enc,
534 const char *start, const char *end);
535
536 static const XML_Char *getContext(XML_Parser parser);
537 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
538
539 static void FASTCALL normalizePublicId(XML_Char *s);
540
541 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
542 /* do not call if m_parentParser != NULL */
543 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
544 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
545 const XML_Memory_Handling_Suite *ms);
546 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
547 const XML_Memory_Handling_Suite *ms);
548 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
549 STRING_POOL *newPool, const HASH_TABLE *oldTable);
550 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
551 size_t createSize);
552 static void FASTCALL hashTableInit(HASH_TABLE *table,
553 const XML_Memory_Handling_Suite *ms);
554 static void FASTCALL hashTableClear(HASH_TABLE *table);
555 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
556 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
557 const HASH_TABLE *table);
558 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
559
560 static void FASTCALL poolInit(STRING_POOL *pool,
561 const XML_Memory_Handling_Suite *ms);
562 static void FASTCALL poolClear(STRING_POOL *pool);
563 static void FASTCALL poolDestroy(STRING_POOL *pool);
564 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
565 const char *ptr, const char *end);
566 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
567 const char *ptr, const char *end);
568 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
569 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
570 const XML_Char *s);
571 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
572 int n);
573 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
574 const XML_Char *s);
575
576 static int FASTCALL nextScaffoldPart(XML_Parser parser);
577 static XML_Content *build_model(XML_Parser parser);
578 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
579 const char *ptr, const char *end);
580
581 static XML_Char *copyString(const XML_Char *s,
582 const XML_Memory_Handling_Suite *memsuite);
583
584 static unsigned long generate_hash_secret_salt(XML_Parser parser);
585 static XML_Bool startParsing(XML_Parser parser);
586
587 static XML_Parser parserCreate(const XML_Char *encodingName,
588 const XML_Memory_Handling_Suite *memsuite,
589 const XML_Char *nameSep, DTD *dtd);
590
591 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
592
593 #if XML_GE == 1
594 static float accountingGetCurrentAmplification(XML_Parser rootParser);
595 static void accountingReportStats(XML_Parser originParser, const char *epilog);
596 static void accountingOnAbort(XML_Parser originParser);
597 static void accountingReportDiff(XML_Parser rootParser,
598 unsigned int levelsAwayFromRootParser,
599 const char *before, const char *after,
600 ptrdiff_t bytesMore, int source_line,
601 enum XML_Account account);
602 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
603 const char *before, const char *after,
604 int source_line,
605 enum XML_Account account);
606
607 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
608 const char *action, int sourceLine);
609 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
610 int sourceLine);
611 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
612 int sourceLine);
613
614 static XML_Parser getRootParserOf(XML_Parser parser,
615 unsigned int *outLevelDiff);
616 #endif /* XML_GE == 1 */
617
618 static unsigned long getDebugLevel(const char *variableName,
619 unsigned long defaultDebugLevel);
620
621 #define poolStart(pool) ((pool)->start)
622 #define poolLength(pool) ((pool)->ptr - (pool)->start)
623 #define poolChop(pool) ((void)--(pool->ptr))
624 #define poolLastChar(pool) (((pool)->ptr)[-1])
625 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
626 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
627 #define poolAppendChar(pool, c) \
628 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
629 ? 0 \
630 : ((*((pool)->ptr)++ = c), 1))
631
632 XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
633 unsigned int g_parseAttempts = 0; // used for testing only
634
635 struct XML_ParserStruct {
636 /* The first member must be m_userData so that the XML_GetUserData
637 macro works. */
638 void *m_userData;
639 void *m_handlerArg;
640
641 // How the four parse buffer pointers below relate in time and space:
642 //
643 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
644 // | | | |
645 // <--parsed-->| | |
646 // <---parsing--->| |
647 // <--unoccupied-->|
648 // <---------total-malloced/realloced-------->|
649
650 char *m_buffer; // malloc/realloc base pointer of parse buffer
651 const XML_Memory_Handling_Suite m_mem;
652 const char *m_bufferPtr; // first character to be parsed
653 char *m_bufferEnd; // past last character to be parsed
654 const char *m_bufferLim; // allocated end of m_buffer
655
656 XML_Index m_parseEndByteIndex;
657 const char *m_parseEndPtr;
658 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
659 XML_Bool m_reparseDeferralEnabled;
660 int m_lastBufferRequestSize;
661 XML_Char *m_dataBuf;
662 XML_Char *m_dataBufEnd;
663 XML_StartElementHandler m_startElementHandler;
664 XML_EndElementHandler m_endElementHandler;
665 XML_CharacterDataHandler m_characterDataHandler;
666 XML_ProcessingInstructionHandler m_processingInstructionHandler;
667 XML_CommentHandler m_commentHandler;
668 XML_StartCdataSectionHandler m_startCdataSectionHandler;
669 XML_EndCdataSectionHandler m_endCdataSectionHandler;
670 XML_DefaultHandler m_defaultHandler;
671 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
672 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
673 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
674 XML_NotationDeclHandler m_notationDeclHandler;
675 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
676 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
677 XML_NotStandaloneHandler m_notStandaloneHandler;
678 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
679 XML_Parser m_externalEntityRefHandlerArg;
680 XML_SkippedEntityHandler m_skippedEntityHandler;
681 XML_UnknownEncodingHandler m_unknownEncodingHandler;
682 XML_ElementDeclHandler m_elementDeclHandler;
683 XML_AttlistDeclHandler m_attlistDeclHandler;
684 XML_EntityDeclHandler m_entityDeclHandler;
685 XML_XmlDeclHandler m_xmlDeclHandler;
686 const ENCODING *m_encoding;
687 INIT_ENCODING m_initEncoding;
688 const ENCODING *m_internalEncoding;
689 const XML_Char *m_protocolEncodingName;
690 XML_Bool m_ns;
691 XML_Bool m_ns_triplets;
692 void *m_unknownEncodingMem;
693 void *m_unknownEncodingData;
694 void *m_unknownEncodingHandlerData;
695 void(XMLCALL *m_unknownEncodingRelease)(void *);
696 PROLOG_STATE m_prologState;
697 Processor *m_processor;
698 enum XML_Error m_errorCode;
699 const char *m_eventPtr;
700 const char *m_eventEndPtr;
701 const char *m_positionPtr;
702 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
703 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
704 XML_Bool m_defaultExpandInternalEntities;
705 int m_tagLevel;
706 ENTITY *m_declEntity;
707 const XML_Char *m_doctypeName;
708 const XML_Char *m_doctypeSysid;
709 const XML_Char *m_doctypePubid;
710 const XML_Char *m_declAttributeType;
711 const XML_Char *m_declNotationName;
712 const XML_Char *m_declNotationPublicId;
713 ELEMENT_TYPE *m_declElementType;
714 ATTRIBUTE_ID *m_declAttributeId;
715 XML_Bool m_declAttributeIsCdata;
716 XML_Bool m_declAttributeIsId;
717 DTD *m_dtd;
718 const XML_Char *m_curBase;
719 TAG *m_tagStack;
720 TAG *m_freeTagList;
721 BINDING *m_inheritedBindings;
722 BINDING *m_freeBindingList;
723 int m_attsSize;
724 int m_nSpecifiedAtts;
725 int m_idAttIndex;
726 ATTRIBUTE *m_atts;
727 NS_ATT *m_nsAtts;
728 unsigned long m_nsAttsVersion;
729 unsigned char m_nsAttsPower;
730 #ifdef XML_ATTR_INFO
731 XML_AttrInfo *m_attInfo;
732 #endif
733 POSITION m_position;
734 STRING_POOL m_tempPool;
735 STRING_POOL m_temp2Pool;
736 char *m_groupConnector;
737 unsigned int m_groupSize;
738 XML_Char m_namespaceSeparator;
739 XML_Parser m_parentParser;
740 XML_ParsingStatus m_parsingStatus;
741 #ifdef XML_DTD
742 XML_Bool m_isParamEntity;
743 XML_Bool m_useForeignDTD;
744 enum XML_ParamEntityParsing m_paramEntityParsing;
745 #endif
746 unsigned long m_hash_secret_salt;
747 #if XML_GE == 1
748 ACCOUNTING m_accounting;
749 ENTITY_STATS m_entity_stats;
750 #endif
751 };
752
753 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
754 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
755 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
756
757 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)758 XML_ParserCreate(const XML_Char *encodingName) {
759 return XML_ParserCreate_MM(encodingName, NULL, NULL);
760 }
761
762 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)763 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
764 XML_Char tmp[2] = {nsSep, 0};
765 return XML_ParserCreate_MM(encodingName, NULL, tmp);
766 }
767
768 // "xml=http://www.w3.org/XML/1998/namespace"
769 static const XML_Char implicitContext[]
770 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
771 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
772 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
773 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
774 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
775 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
776 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
777 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
778 '\0'};
779
780 /* To avoid warnings about unused functions: */
781 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
782
783 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
784
785 /* Obtain entropy on Linux 3.17+ */
786 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)787 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
788 int success = 0; /* full count bytes written? */
789 size_t bytesWrittenTotal = 0;
790 const unsigned int getrandomFlags = GRND_NONBLOCK;
791
792 do {
793 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
794 const size_t bytesToWrite = count - bytesWrittenTotal;
795
796 const int bytesWrittenMore =
797 # if defined(HAVE_GETRANDOM)
798 getrandom(currentTarget, bytesToWrite, getrandomFlags);
799 # else
800 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
801 # endif
802
803 if (bytesWrittenMore > 0) {
804 bytesWrittenTotal += bytesWrittenMore;
805 if (bytesWrittenTotal >= count)
806 success = 1;
807 }
808 } while (! success && (errno == EINTR));
809
810 return success;
811 }
812
813 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
814
815 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
816
817 /* Extract entropy from /dev/urandom */
818 static int
writeRandomBytes_dev_urandom(void * target,size_t count)819 writeRandomBytes_dev_urandom(void *target, size_t count) {
820 int success = 0; /* full count bytes written? */
821 size_t bytesWrittenTotal = 0;
822
823 const int fd = open("/dev/urandom", O_RDONLY);
824 if (fd < 0) {
825 return 0;
826 }
827
828 do {
829 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
830 const size_t bytesToWrite = count - bytesWrittenTotal;
831
832 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
833
834 if (bytesWrittenMore > 0) {
835 bytesWrittenTotal += bytesWrittenMore;
836 if (bytesWrittenTotal >= count)
837 success = 1;
838 }
839 } while (! success && (errno == EINTR));
840
841 close(fd);
842 return success;
843 }
844
845 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
846
847 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
848
849 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
850
851 static void
writeRandomBytes_arc4random(void * target,size_t count)852 writeRandomBytes_arc4random(void *target, size_t count) {
853 size_t bytesWrittenTotal = 0;
854
855 while (bytesWrittenTotal < count) {
856 const uint32_t random32 = arc4random();
857 size_t i = 0;
858
859 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
860 i++, bytesWrittenTotal++) {
861 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
862 ((uint8_t *)target)[bytesWrittenTotal] = random8;
863 }
864 }
865 }
866
867 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
868
869 #ifdef _WIN32
870
871 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
872 as it didn't declare it in its header prior to version 5.3.0 of its
873 runtime package (mingwrt, containing stdlib.h). The upstream fix
874 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
875 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
876 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
877 __declspec(dllimport) int rand_s(unsigned int *);
878 # endif
879
880 /* Obtain entropy on Windows using the rand_s() function which
881 * generates cryptographically secure random numbers. Internally it
882 * uses RtlGenRandom API which is present in Windows XP and later.
883 */
884 static int
writeRandomBytes_rand_s(void * target,size_t count)885 writeRandomBytes_rand_s(void *target, size_t count) {
886 size_t bytesWrittenTotal = 0;
887
888 while (bytesWrittenTotal < count) {
889 unsigned int random32 = 0;
890 size_t i = 0;
891
892 if (rand_s(&random32))
893 return 0; /* failure */
894
895 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
896 i++, bytesWrittenTotal++) {
897 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
898 ((uint8_t *)target)[bytesWrittenTotal] = random8;
899 }
900 }
901 return 1; /* success */
902 }
903
904 #endif /* _WIN32 */
905
906 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
907
908 static unsigned long
gather_time_entropy(void)909 gather_time_entropy(void) {
910 # ifdef _WIN32
911 FILETIME ft;
912 GetSystemTimeAsFileTime(&ft); /* never fails */
913 return ft.dwHighDateTime ^ ft.dwLowDateTime;
914 # else
915 struct timeval tv;
916 int gettimeofday_res;
917
918 gettimeofday_res = gettimeofday(&tv, NULL);
919
920 # if defined(NDEBUG)
921 (void)gettimeofday_res;
922 # else
923 assert(gettimeofday_res == 0);
924 # endif /* defined(NDEBUG) */
925
926 /* Microseconds time is <20 bits entropy */
927 return tv.tv_usec;
928 # endif
929 }
930
931 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
932
933 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)934 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
935 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
936 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
937 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
938 }
939 return entropy;
940 }
941
942 static unsigned long
generate_hash_secret_salt(XML_Parser parser)943 generate_hash_secret_salt(XML_Parser parser) {
944 unsigned long entropy;
945 (void)parser;
946
947 /* "Failproof" high quality providers: */
948 #if defined(HAVE_ARC4RANDOM_BUF)
949 arc4random_buf(&entropy, sizeof(entropy));
950 return ENTROPY_DEBUG("arc4random_buf", entropy);
951 #elif defined(HAVE_ARC4RANDOM)
952 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
953 return ENTROPY_DEBUG("arc4random", entropy);
954 #else
955 /* Try high quality providers first .. */
956 # ifdef _WIN32
957 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
958 return ENTROPY_DEBUG("rand_s", entropy);
959 }
960 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
961 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
962 return ENTROPY_DEBUG("getrandom", entropy);
963 }
964 # endif
965 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
966 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
967 return ENTROPY_DEBUG("/dev/urandom", entropy);
968 }
969 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
970 /* .. and self-made low quality for backup: */
971
972 /* Process ID is 0 bits entropy if attacker has local access */
973 entropy = gather_time_entropy() ^ getpid();
974
975 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
976 if (sizeof(unsigned long) == 4) {
977 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
978 } else {
979 return ENTROPY_DEBUG("fallback(8)",
980 entropy * (unsigned long)2305843009213693951ULL);
981 }
982 #endif
983 }
984
985 static unsigned long
get_hash_secret_salt(XML_Parser parser)986 get_hash_secret_salt(XML_Parser parser) {
987 if (parser->m_parentParser != NULL)
988 return get_hash_secret_salt(parser->m_parentParser);
989 return parser->m_hash_secret_salt;
990 }
991
992 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)993 callProcessor(XML_Parser parser, const char *start, const char *end,
994 const char **endPtr) {
995 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
996
997 if (parser->m_reparseDeferralEnabled
998 && ! parser->m_parsingStatus.finalBuffer) {
999 // Heuristic: don't try to parse a partial token again until the amount of
1000 // available data has increased significantly.
1001 const size_t had_before = parser->m_partialTokenBytesBefore;
1002 // ...but *do* try anyway if we're close to causing a reallocation.
1003 size_t available_buffer
1004 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1005 #if XML_CONTEXT_BYTES > 0
1006 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1007 #endif
1008 available_buffer
1009 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1010 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1011 const bool enough
1012 = (have_now >= 2 * had_before)
1013 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1014
1015 if (! enough) {
1016 *endPtr = start; // callers may expect this to be set
1017 return XML_ERROR_NONE;
1018 }
1019 }
1020 g_parseAttempts += 1;
1021 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1022 if (ret == XML_ERROR_NONE) {
1023 // if we consumed nothing, remember what we had on this parse attempt.
1024 if (*endPtr == start) {
1025 parser->m_partialTokenBytesBefore = have_now;
1026 } else {
1027 parser->m_partialTokenBytesBefore = 0;
1028 }
1029 }
1030 return ret;
1031 }
1032
1033 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1034 startParsing(XML_Parser parser) {
1035 /* hash functions must be initialized before setContext() is called */
1036 if (parser->m_hash_secret_salt == 0)
1037 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1038 if (parser->m_ns) {
1039 /* implicit context only set for root parser, since child
1040 parsers (i.e. external entity parsers) will inherit it
1041 */
1042 return setContext(parser, implicitContext);
1043 }
1044 return XML_TRUE;
1045 }
1046
1047 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1048 XML_ParserCreate_MM(const XML_Char *encodingName,
1049 const XML_Memory_Handling_Suite *memsuite,
1050 const XML_Char *nameSep) {
1051 return parserCreate(encodingName, memsuite, nameSep, NULL);
1052 }
1053
1054 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1055 parserCreate(const XML_Char *encodingName,
1056 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1057 DTD *dtd) {
1058 XML_Parser parser;
1059
1060 if (memsuite) {
1061 XML_Memory_Handling_Suite *mtemp;
1062 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1063 if (parser != NULL) {
1064 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1065 mtemp->malloc_fcn = memsuite->malloc_fcn;
1066 mtemp->realloc_fcn = memsuite->realloc_fcn;
1067 mtemp->free_fcn = memsuite->free_fcn;
1068 }
1069 } else {
1070 XML_Memory_Handling_Suite *mtemp;
1071 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1072 if (parser != NULL) {
1073 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1074 mtemp->malloc_fcn = malloc;
1075 mtemp->realloc_fcn = realloc;
1076 mtemp->free_fcn = free;
1077 }
1078 }
1079
1080 if (! parser)
1081 return parser;
1082
1083 parser->m_buffer = NULL;
1084 parser->m_bufferLim = NULL;
1085
1086 parser->m_attsSize = INIT_ATTS_SIZE;
1087 parser->m_atts
1088 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1089 if (parser->m_atts == NULL) {
1090 FREE(parser, parser);
1091 return NULL;
1092 }
1093 #ifdef XML_ATTR_INFO
1094 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1095 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1096 if (parser->m_attInfo == NULL) {
1097 FREE(parser, parser->m_atts);
1098 FREE(parser, parser);
1099 return NULL;
1100 }
1101 #endif
1102 parser->m_dataBuf
1103 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1104 if (parser->m_dataBuf == NULL) {
1105 FREE(parser, parser->m_atts);
1106 #ifdef XML_ATTR_INFO
1107 FREE(parser, parser->m_attInfo);
1108 #endif
1109 FREE(parser, parser);
1110 return NULL;
1111 }
1112 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1113
1114 if (dtd)
1115 parser->m_dtd = dtd;
1116 else {
1117 parser->m_dtd = dtdCreate(&parser->m_mem);
1118 if (parser->m_dtd == NULL) {
1119 FREE(parser, parser->m_dataBuf);
1120 FREE(parser, parser->m_atts);
1121 #ifdef XML_ATTR_INFO
1122 FREE(parser, parser->m_attInfo);
1123 #endif
1124 FREE(parser, parser);
1125 return NULL;
1126 }
1127 }
1128
1129 parser->m_freeBindingList = NULL;
1130 parser->m_freeTagList = NULL;
1131 parser->m_freeInternalEntities = NULL;
1132
1133 parser->m_groupSize = 0;
1134 parser->m_groupConnector = NULL;
1135
1136 parser->m_unknownEncodingHandler = NULL;
1137 parser->m_unknownEncodingHandlerData = NULL;
1138
1139 parser->m_namespaceSeparator = ASCII_EXCL;
1140 parser->m_ns = XML_FALSE;
1141 parser->m_ns_triplets = XML_FALSE;
1142
1143 parser->m_nsAtts = NULL;
1144 parser->m_nsAttsVersion = 0;
1145 parser->m_nsAttsPower = 0;
1146
1147 parser->m_protocolEncodingName = NULL;
1148
1149 poolInit(&parser->m_tempPool, &(parser->m_mem));
1150 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1151 parserInit(parser, encodingName);
1152
1153 if (encodingName && ! parser->m_protocolEncodingName) {
1154 if (dtd) {
1155 // We need to stop the upcoming call to XML_ParserFree from happily
1156 // destroying parser->m_dtd because the DTD is shared with the parent
1157 // parser and the only guard that keeps XML_ParserFree from destroying
1158 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1159 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1160 parser->m_dtd = NULL;
1161 }
1162 XML_ParserFree(parser);
1163 return NULL;
1164 }
1165
1166 if (nameSep) {
1167 parser->m_ns = XML_TRUE;
1168 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1169 parser->m_namespaceSeparator = *nameSep;
1170 } else {
1171 parser->m_internalEncoding = XmlGetInternalEncoding();
1172 }
1173
1174 return parser;
1175 }
1176
1177 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1178 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1179 parser->m_processor = prologInitProcessor;
1180 XmlPrologStateInit(&parser->m_prologState);
1181 if (encodingName != NULL) {
1182 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1183 }
1184 parser->m_curBase = NULL;
1185 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1186 parser->m_userData = NULL;
1187 parser->m_handlerArg = NULL;
1188 parser->m_startElementHandler = NULL;
1189 parser->m_endElementHandler = NULL;
1190 parser->m_characterDataHandler = NULL;
1191 parser->m_processingInstructionHandler = NULL;
1192 parser->m_commentHandler = NULL;
1193 parser->m_startCdataSectionHandler = NULL;
1194 parser->m_endCdataSectionHandler = NULL;
1195 parser->m_defaultHandler = NULL;
1196 parser->m_startDoctypeDeclHandler = NULL;
1197 parser->m_endDoctypeDeclHandler = NULL;
1198 parser->m_unparsedEntityDeclHandler = NULL;
1199 parser->m_notationDeclHandler = NULL;
1200 parser->m_startNamespaceDeclHandler = NULL;
1201 parser->m_endNamespaceDeclHandler = NULL;
1202 parser->m_notStandaloneHandler = NULL;
1203 parser->m_externalEntityRefHandler = NULL;
1204 parser->m_externalEntityRefHandlerArg = parser;
1205 parser->m_skippedEntityHandler = NULL;
1206 parser->m_elementDeclHandler = NULL;
1207 parser->m_attlistDeclHandler = NULL;
1208 parser->m_entityDeclHandler = NULL;
1209 parser->m_xmlDeclHandler = NULL;
1210 parser->m_bufferPtr = parser->m_buffer;
1211 parser->m_bufferEnd = parser->m_buffer;
1212 parser->m_parseEndByteIndex = 0;
1213 parser->m_parseEndPtr = NULL;
1214 parser->m_partialTokenBytesBefore = 0;
1215 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1216 parser->m_lastBufferRequestSize = 0;
1217 parser->m_declElementType = NULL;
1218 parser->m_declAttributeId = NULL;
1219 parser->m_declEntity = NULL;
1220 parser->m_doctypeName = NULL;
1221 parser->m_doctypeSysid = NULL;
1222 parser->m_doctypePubid = NULL;
1223 parser->m_declAttributeType = NULL;
1224 parser->m_declNotationName = NULL;
1225 parser->m_declNotationPublicId = NULL;
1226 parser->m_declAttributeIsCdata = XML_FALSE;
1227 parser->m_declAttributeIsId = XML_FALSE;
1228 memset(&parser->m_position, 0, sizeof(POSITION));
1229 parser->m_errorCode = XML_ERROR_NONE;
1230 parser->m_eventPtr = NULL;
1231 parser->m_eventEndPtr = NULL;
1232 parser->m_positionPtr = NULL;
1233 parser->m_openInternalEntities = NULL;
1234 parser->m_defaultExpandInternalEntities = XML_TRUE;
1235 parser->m_tagLevel = 0;
1236 parser->m_tagStack = NULL;
1237 parser->m_inheritedBindings = NULL;
1238 parser->m_nSpecifiedAtts = 0;
1239 parser->m_unknownEncodingMem = NULL;
1240 parser->m_unknownEncodingRelease = NULL;
1241 parser->m_unknownEncodingData = NULL;
1242 parser->m_parentParser = NULL;
1243 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1244 #ifdef XML_DTD
1245 parser->m_isParamEntity = XML_FALSE;
1246 parser->m_useForeignDTD = XML_FALSE;
1247 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1248 #endif
1249 parser->m_hash_secret_salt = 0;
1250
1251 #if XML_GE == 1
1252 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1253 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1254 parser->m_accounting.maximumAmplificationFactor
1255 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1256 parser->m_accounting.activationThresholdBytes
1257 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1258
1259 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1260 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1261 #endif
1262 }
1263
1264 /* moves list of bindings to m_freeBindingList */
1265 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1266 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1267 while (bindings) {
1268 BINDING *b = bindings;
1269 bindings = bindings->nextTagBinding;
1270 b->nextTagBinding = parser->m_freeBindingList;
1271 parser->m_freeBindingList = b;
1272 }
1273 }
1274
1275 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1276 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1277 TAG *tStk;
1278 OPEN_INTERNAL_ENTITY *openEntityList;
1279
1280 if (parser == NULL)
1281 return XML_FALSE;
1282
1283 if (parser->m_parentParser)
1284 return XML_FALSE;
1285 /* move m_tagStack to m_freeTagList */
1286 tStk = parser->m_tagStack;
1287 while (tStk) {
1288 TAG *tag = tStk;
1289 tStk = tStk->parent;
1290 tag->parent = parser->m_freeTagList;
1291 moveToFreeBindingList(parser, tag->bindings);
1292 tag->bindings = NULL;
1293 parser->m_freeTagList = tag;
1294 }
1295 /* move m_openInternalEntities to m_freeInternalEntities */
1296 openEntityList = parser->m_openInternalEntities;
1297 while (openEntityList) {
1298 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1299 openEntityList = openEntity->next;
1300 openEntity->next = parser->m_freeInternalEntities;
1301 parser->m_freeInternalEntities = openEntity;
1302 }
1303 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1304 FREE(parser, parser->m_unknownEncodingMem);
1305 if (parser->m_unknownEncodingRelease)
1306 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1307 poolClear(&parser->m_tempPool);
1308 poolClear(&parser->m_temp2Pool);
1309 FREE(parser, (void *)parser->m_protocolEncodingName);
1310 parser->m_protocolEncodingName = NULL;
1311 parserInit(parser, encodingName);
1312 dtdReset(parser->m_dtd, &parser->m_mem);
1313 return XML_TRUE;
1314 }
1315
1316 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1317 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1318 if (parser == NULL)
1319 return XML_STATUS_ERROR;
1320 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1321 XXX There's no way for the caller to determine which of the
1322 XXX possible error cases caused the XML_STATUS_ERROR return.
1323 */
1324 if (parser->m_parsingStatus.parsing == XML_PARSING
1325 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1326 return XML_STATUS_ERROR;
1327
1328 /* Get rid of any previous encoding name */
1329 FREE(parser, (void *)parser->m_protocolEncodingName);
1330
1331 if (encodingName == NULL)
1332 /* No new encoding name */
1333 parser->m_protocolEncodingName = NULL;
1334 else {
1335 /* Copy the new encoding name into allocated memory */
1336 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1337 if (! parser->m_protocolEncodingName)
1338 return XML_STATUS_ERROR;
1339 }
1340 return XML_STATUS_OK;
1341 }
1342
1343 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1344 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1345 const XML_Char *encodingName) {
1346 XML_Parser parser = oldParser;
1347 DTD *newDtd = NULL;
1348 DTD *oldDtd;
1349 XML_StartElementHandler oldStartElementHandler;
1350 XML_EndElementHandler oldEndElementHandler;
1351 XML_CharacterDataHandler oldCharacterDataHandler;
1352 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1353 XML_CommentHandler oldCommentHandler;
1354 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1355 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1356 XML_DefaultHandler oldDefaultHandler;
1357 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1358 XML_NotationDeclHandler oldNotationDeclHandler;
1359 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1360 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1361 XML_NotStandaloneHandler oldNotStandaloneHandler;
1362 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1363 XML_SkippedEntityHandler oldSkippedEntityHandler;
1364 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1365 XML_ElementDeclHandler oldElementDeclHandler;
1366 XML_AttlistDeclHandler oldAttlistDeclHandler;
1367 XML_EntityDeclHandler oldEntityDeclHandler;
1368 XML_XmlDeclHandler oldXmlDeclHandler;
1369 ELEMENT_TYPE *oldDeclElementType;
1370
1371 void *oldUserData;
1372 void *oldHandlerArg;
1373 XML_Bool oldDefaultExpandInternalEntities;
1374 XML_Parser oldExternalEntityRefHandlerArg;
1375 #ifdef XML_DTD
1376 enum XML_ParamEntityParsing oldParamEntityParsing;
1377 int oldInEntityValue;
1378 #endif
1379 XML_Bool oldns_triplets;
1380 /* Note that the new parser shares the same hash secret as the old
1381 parser, so that dtdCopy and copyEntityTable can lookup values
1382 from hash tables associated with either parser without us having
1383 to worry which hash secrets each table has.
1384 */
1385 unsigned long oldhash_secret_salt;
1386 XML_Bool oldReparseDeferralEnabled;
1387
1388 /* Validate the oldParser parameter before we pull everything out of it */
1389 if (oldParser == NULL)
1390 return NULL;
1391
1392 /* Stash the original parser contents on the stack */
1393 oldDtd = parser->m_dtd;
1394 oldStartElementHandler = parser->m_startElementHandler;
1395 oldEndElementHandler = parser->m_endElementHandler;
1396 oldCharacterDataHandler = parser->m_characterDataHandler;
1397 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1398 oldCommentHandler = parser->m_commentHandler;
1399 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1400 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1401 oldDefaultHandler = parser->m_defaultHandler;
1402 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1403 oldNotationDeclHandler = parser->m_notationDeclHandler;
1404 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1405 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1406 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1407 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1408 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1409 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1410 oldElementDeclHandler = parser->m_elementDeclHandler;
1411 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1412 oldEntityDeclHandler = parser->m_entityDeclHandler;
1413 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1414 oldDeclElementType = parser->m_declElementType;
1415
1416 oldUserData = parser->m_userData;
1417 oldHandlerArg = parser->m_handlerArg;
1418 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1419 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1420 #ifdef XML_DTD
1421 oldParamEntityParsing = parser->m_paramEntityParsing;
1422 oldInEntityValue = parser->m_prologState.inEntityValue;
1423 #endif
1424 oldns_triplets = parser->m_ns_triplets;
1425 /* Note that the new parser shares the same hash secret as the old
1426 parser, so that dtdCopy and copyEntityTable can lookup values
1427 from hash tables associated with either parser without us having
1428 to worry which hash secrets each table has.
1429 */
1430 oldhash_secret_salt = parser->m_hash_secret_salt;
1431 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1432
1433 #ifdef XML_DTD
1434 if (! context)
1435 newDtd = oldDtd;
1436 #endif /* XML_DTD */
1437
1438 /* Note that the magical uses of the pre-processor to make field
1439 access look more like C++ require that `parser' be overwritten
1440 here. This makes this function more painful to follow than it
1441 would be otherwise.
1442 */
1443 if (parser->m_ns) {
1444 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1445 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1446 } else {
1447 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1448 }
1449
1450 if (! parser)
1451 return NULL;
1452
1453 parser->m_startElementHandler = oldStartElementHandler;
1454 parser->m_endElementHandler = oldEndElementHandler;
1455 parser->m_characterDataHandler = oldCharacterDataHandler;
1456 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1457 parser->m_commentHandler = oldCommentHandler;
1458 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1459 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1460 parser->m_defaultHandler = oldDefaultHandler;
1461 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1462 parser->m_notationDeclHandler = oldNotationDeclHandler;
1463 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1464 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1465 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1466 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1467 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1468 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1469 parser->m_elementDeclHandler = oldElementDeclHandler;
1470 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1471 parser->m_entityDeclHandler = oldEntityDeclHandler;
1472 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1473 parser->m_declElementType = oldDeclElementType;
1474 parser->m_userData = oldUserData;
1475 if (oldUserData == oldHandlerArg)
1476 parser->m_handlerArg = parser->m_userData;
1477 else
1478 parser->m_handlerArg = parser;
1479 if (oldExternalEntityRefHandlerArg != oldParser)
1480 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1481 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1482 parser->m_ns_triplets = oldns_triplets;
1483 parser->m_hash_secret_salt = oldhash_secret_salt;
1484 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1485 parser->m_parentParser = oldParser;
1486 #ifdef XML_DTD
1487 parser->m_paramEntityParsing = oldParamEntityParsing;
1488 parser->m_prologState.inEntityValue = oldInEntityValue;
1489 if (context) {
1490 #endif /* XML_DTD */
1491 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1492 || ! setContext(parser, context)) {
1493 XML_ParserFree(parser);
1494 return NULL;
1495 }
1496 parser->m_processor = externalEntityInitProcessor;
1497 #ifdef XML_DTD
1498 } else {
1499 /* The DTD instance referenced by parser->m_dtd is shared between the
1500 document's root parser and external PE parsers, therefore one does not
1501 need to call setContext. In addition, one also *must* not call
1502 setContext, because this would overwrite existing prefix->binding
1503 pointers in parser->m_dtd with ones that get destroyed with the external
1504 PE parser. This would leave those prefixes with dangling pointers.
1505 */
1506 parser->m_isParamEntity = XML_TRUE;
1507 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1508 parser->m_processor = externalParEntInitProcessor;
1509 }
1510 #endif /* XML_DTD */
1511 return parser;
1512 }
1513
1514 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1515 destroyBindings(BINDING *bindings, XML_Parser parser) {
1516 for (;;) {
1517 BINDING *b = bindings;
1518 if (! b)
1519 break;
1520 bindings = b->nextTagBinding;
1521 FREE(parser, b->uri);
1522 FREE(parser, b);
1523 }
1524 }
1525
1526 void XMLCALL
XML_ParserFree(XML_Parser parser)1527 XML_ParserFree(XML_Parser parser) {
1528 TAG *tagList;
1529 OPEN_INTERNAL_ENTITY *entityList;
1530 if (parser == NULL)
1531 return;
1532 /* free m_tagStack and m_freeTagList */
1533 tagList = parser->m_tagStack;
1534 for (;;) {
1535 TAG *p;
1536 if (tagList == NULL) {
1537 if (parser->m_freeTagList == NULL)
1538 break;
1539 tagList = parser->m_freeTagList;
1540 parser->m_freeTagList = NULL;
1541 }
1542 p = tagList;
1543 tagList = tagList->parent;
1544 FREE(parser, p->buf);
1545 destroyBindings(p->bindings, parser);
1546 FREE(parser, p);
1547 }
1548 /* free m_openInternalEntities and m_freeInternalEntities */
1549 entityList = parser->m_openInternalEntities;
1550 for (;;) {
1551 OPEN_INTERNAL_ENTITY *openEntity;
1552 if (entityList == NULL) {
1553 if (parser->m_freeInternalEntities == NULL)
1554 break;
1555 entityList = parser->m_freeInternalEntities;
1556 parser->m_freeInternalEntities = NULL;
1557 }
1558 openEntity = entityList;
1559 entityList = entityList->next;
1560 FREE(parser, openEntity);
1561 }
1562
1563 destroyBindings(parser->m_freeBindingList, parser);
1564 destroyBindings(parser->m_inheritedBindings, parser);
1565 poolDestroy(&parser->m_tempPool);
1566 poolDestroy(&parser->m_temp2Pool);
1567 FREE(parser, (void *)parser->m_protocolEncodingName);
1568 #ifdef XML_DTD
1569 /* external parameter entity parsers share the DTD structure
1570 parser->m_dtd with the root parser, so we must not destroy it
1571 */
1572 if (! parser->m_isParamEntity && parser->m_dtd)
1573 #else
1574 if (parser->m_dtd)
1575 #endif /* XML_DTD */
1576 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1577 &parser->m_mem);
1578 FREE(parser, (void *)parser->m_atts);
1579 #ifdef XML_ATTR_INFO
1580 FREE(parser, (void *)parser->m_attInfo);
1581 #endif
1582 FREE(parser, parser->m_groupConnector);
1583 FREE(parser, parser->m_buffer);
1584 FREE(parser, parser->m_dataBuf);
1585 FREE(parser, parser->m_nsAtts);
1586 FREE(parser, parser->m_unknownEncodingMem);
1587 if (parser->m_unknownEncodingRelease)
1588 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1589 FREE(parser, parser);
1590 }
1591
1592 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1593 XML_UseParserAsHandlerArg(XML_Parser parser) {
1594 if (parser != NULL)
1595 parser->m_handlerArg = parser;
1596 }
1597
1598 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1599 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1600 if (parser == NULL)
1601 return XML_ERROR_INVALID_ARGUMENT;
1602 #ifdef XML_DTD
1603 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1604 if (parser->m_parsingStatus.parsing == XML_PARSING
1605 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1606 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1607 parser->m_useForeignDTD = useDTD;
1608 return XML_ERROR_NONE;
1609 #else
1610 UNUSED_P(useDTD);
1611 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1612 #endif
1613 }
1614
1615 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1616 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1617 if (parser == NULL)
1618 return;
1619 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1620 if (parser->m_parsingStatus.parsing == XML_PARSING
1621 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1622 return;
1623 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1624 }
1625
1626 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1627 XML_SetUserData(XML_Parser parser, void *p) {
1628 if (parser == NULL)
1629 return;
1630 if (parser->m_handlerArg == parser->m_userData)
1631 parser->m_handlerArg = parser->m_userData = p;
1632 else
1633 parser->m_userData = p;
1634 }
1635
1636 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1637 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1638 if (parser == NULL)
1639 return XML_STATUS_ERROR;
1640 if (p) {
1641 p = poolCopyString(&parser->m_dtd->pool, p);
1642 if (! p)
1643 return XML_STATUS_ERROR;
1644 parser->m_curBase = p;
1645 } else
1646 parser->m_curBase = NULL;
1647 return XML_STATUS_OK;
1648 }
1649
1650 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1651 XML_GetBase(XML_Parser parser) {
1652 if (parser == NULL)
1653 return NULL;
1654 return parser->m_curBase;
1655 }
1656
1657 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1658 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1659 if (parser == NULL)
1660 return -1;
1661 return parser->m_nSpecifiedAtts;
1662 }
1663
1664 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1665 XML_GetIdAttributeIndex(XML_Parser parser) {
1666 if (parser == NULL)
1667 return -1;
1668 return parser->m_idAttIndex;
1669 }
1670
1671 #ifdef XML_ATTR_INFO
1672 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1673 XML_GetAttributeInfo(XML_Parser parser) {
1674 if (parser == NULL)
1675 return NULL;
1676 return parser->m_attInfo;
1677 }
1678 #endif
1679
1680 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1681 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1682 XML_EndElementHandler end) {
1683 if (parser == NULL)
1684 return;
1685 parser->m_startElementHandler = start;
1686 parser->m_endElementHandler = end;
1687 }
1688
1689 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1690 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1691 if (parser != NULL)
1692 parser->m_startElementHandler = start;
1693 }
1694
1695 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1696 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1697 if (parser != NULL)
1698 parser->m_endElementHandler = end;
1699 }
1700
1701 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1702 XML_SetCharacterDataHandler(XML_Parser parser,
1703 XML_CharacterDataHandler handler) {
1704 if (parser != NULL)
1705 parser->m_characterDataHandler = handler;
1706 }
1707
1708 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1709 XML_SetProcessingInstructionHandler(XML_Parser parser,
1710 XML_ProcessingInstructionHandler handler) {
1711 if (parser != NULL)
1712 parser->m_processingInstructionHandler = handler;
1713 }
1714
1715 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1716 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1717 if (parser != NULL)
1718 parser->m_commentHandler = handler;
1719 }
1720
1721 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1722 XML_SetCdataSectionHandler(XML_Parser parser,
1723 XML_StartCdataSectionHandler start,
1724 XML_EndCdataSectionHandler end) {
1725 if (parser == NULL)
1726 return;
1727 parser->m_startCdataSectionHandler = start;
1728 parser->m_endCdataSectionHandler = end;
1729 }
1730
1731 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1732 XML_SetStartCdataSectionHandler(XML_Parser parser,
1733 XML_StartCdataSectionHandler start) {
1734 if (parser != NULL)
1735 parser->m_startCdataSectionHandler = start;
1736 }
1737
1738 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1739 XML_SetEndCdataSectionHandler(XML_Parser parser,
1740 XML_EndCdataSectionHandler end) {
1741 if (parser != NULL)
1742 parser->m_endCdataSectionHandler = end;
1743 }
1744
1745 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1746 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1747 if (parser == NULL)
1748 return;
1749 parser->m_defaultHandler = handler;
1750 parser->m_defaultExpandInternalEntities = XML_FALSE;
1751 }
1752
1753 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1754 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1755 if (parser == NULL)
1756 return;
1757 parser->m_defaultHandler = handler;
1758 parser->m_defaultExpandInternalEntities = XML_TRUE;
1759 }
1760
1761 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1762 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1763 XML_EndDoctypeDeclHandler end) {
1764 if (parser == NULL)
1765 return;
1766 parser->m_startDoctypeDeclHandler = start;
1767 parser->m_endDoctypeDeclHandler = end;
1768 }
1769
1770 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1771 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1772 XML_StartDoctypeDeclHandler start) {
1773 if (parser != NULL)
1774 parser->m_startDoctypeDeclHandler = start;
1775 }
1776
1777 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1778 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1779 if (parser != NULL)
1780 parser->m_endDoctypeDeclHandler = end;
1781 }
1782
1783 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1784 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1785 XML_UnparsedEntityDeclHandler handler) {
1786 if (parser != NULL)
1787 parser->m_unparsedEntityDeclHandler = handler;
1788 }
1789
1790 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1791 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1792 if (parser != NULL)
1793 parser->m_notationDeclHandler = handler;
1794 }
1795
1796 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1797 XML_SetNamespaceDeclHandler(XML_Parser parser,
1798 XML_StartNamespaceDeclHandler start,
1799 XML_EndNamespaceDeclHandler end) {
1800 if (parser == NULL)
1801 return;
1802 parser->m_startNamespaceDeclHandler = start;
1803 parser->m_endNamespaceDeclHandler = end;
1804 }
1805
1806 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1807 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1808 XML_StartNamespaceDeclHandler start) {
1809 if (parser != NULL)
1810 parser->m_startNamespaceDeclHandler = start;
1811 }
1812
1813 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1814 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1815 XML_EndNamespaceDeclHandler end) {
1816 if (parser != NULL)
1817 parser->m_endNamespaceDeclHandler = end;
1818 }
1819
1820 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1821 XML_SetNotStandaloneHandler(XML_Parser parser,
1822 XML_NotStandaloneHandler handler) {
1823 if (parser != NULL)
1824 parser->m_notStandaloneHandler = handler;
1825 }
1826
1827 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1828 XML_SetExternalEntityRefHandler(XML_Parser parser,
1829 XML_ExternalEntityRefHandler handler) {
1830 if (parser != NULL)
1831 parser->m_externalEntityRefHandler = handler;
1832 }
1833
1834 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1835 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1836 if (parser == NULL)
1837 return;
1838 if (arg)
1839 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1840 else
1841 parser->m_externalEntityRefHandlerArg = parser;
1842 }
1843
1844 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1845 XML_SetSkippedEntityHandler(XML_Parser parser,
1846 XML_SkippedEntityHandler handler) {
1847 if (parser != NULL)
1848 parser->m_skippedEntityHandler = handler;
1849 }
1850
1851 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1852 XML_SetUnknownEncodingHandler(XML_Parser parser,
1853 XML_UnknownEncodingHandler handler, void *data) {
1854 if (parser == NULL)
1855 return;
1856 parser->m_unknownEncodingHandler = handler;
1857 parser->m_unknownEncodingHandlerData = data;
1858 }
1859
1860 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1861 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1862 if (parser != NULL)
1863 parser->m_elementDeclHandler = eldecl;
1864 }
1865
1866 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1867 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1868 if (parser != NULL)
1869 parser->m_attlistDeclHandler = attdecl;
1870 }
1871
1872 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1873 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1874 if (parser != NULL)
1875 parser->m_entityDeclHandler = handler;
1876 }
1877
1878 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1879 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1880 if (parser != NULL)
1881 parser->m_xmlDeclHandler = handler;
1882 }
1883
1884 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1885 XML_SetParamEntityParsing(XML_Parser parser,
1886 enum XML_ParamEntityParsing peParsing) {
1887 if (parser == NULL)
1888 return 0;
1889 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1890 if (parser->m_parsingStatus.parsing == XML_PARSING
1891 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1892 return 0;
1893 #ifdef XML_DTD
1894 parser->m_paramEntityParsing = peParsing;
1895 return 1;
1896 #else
1897 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1898 #endif
1899 }
1900
1901 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1902 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1903 if (parser == NULL)
1904 return 0;
1905 if (parser->m_parentParser)
1906 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1907 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1908 if (parser->m_parsingStatus.parsing == XML_PARSING
1909 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1910 return 0;
1911 parser->m_hash_secret_salt = hash_salt;
1912 return 1;
1913 }
1914
1915 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1916 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1917 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1918 if (parser != NULL)
1919 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1920 return XML_STATUS_ERROR;
1921 }
1922 switch (parser->m_parsingStatus.parsing) {
1923 case XML_SUSPENDED:
1924 parser->m_errorCode = XML_ERROR_SUSPENDED;
1925 return XML_STATUS_ERROR;
1926 case XML_FINISHED:
1927 parser->m_errorCode = XML_ERROR_FINISHED;
1928 return XML_STATUS_ERROR;
1929 case XML_INITIALIZED:
1930 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1931 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1932 return XML_STATUS_ERROR;
1933 }
1934 /* fall through */
1935 default:
1936 parser->m_parsingStatus.parsing = XML_PARSING;
1937 }
1938
1939 #if XML_CONTEXT_BYTES == 0
1940 if (parser->m_bufferPtr == parser->m_bufferEnd) {
1941 const char *end;
1942 int nLeftOver;
1943 enum XML_Status result;
1944 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1945 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1946 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1947 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1948 parser->m_processor = errorProcessor;
1949 return XML_STATUS_ERROR;
1950 }
1951 // though this isn't a buffer request, we assume that `len` is the app's
1952 // preferred buffer fill size, and therefore save it here.
1953 parser->m_lastBufferRequestSize = len;
1954 parser->m_parseEndByteIndex += len;
1955 parser->m_positionPtr = s;
1956 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1957
1958 parser->m_errorCode
1959 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1960
1961 if (parser->m_errorCode != XML_ERROR_NONE) {
1962 parser->m_eventEndPtr = parser->m_eventPtr;
1963 parser->m_processor = errorProcessor;
1964 return XML_STATUS_ERROR;
1965 } else {
1966 switch (parser->m_parsingStatus.parsing) {
1967 case XML_SUSPENDED:
1968 result = XML_STATUS_SUSPENDED;
1969 break;
1970 case XML_INITIALIZED:
1971 case XML_PARSING:
1972 if (isFinal) {
1973 parser->m_parsingStatus.parsing = XML_FINISHED;
1974 return XML_STATUS_OK;
1975 }
1976 /* fall through */
1977 default:
1978 result = XML_STATUS_OK;
1979 }
1980 }
1981
1982 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1983 &parser->m_position);
1984 nLeftOver = s + len - end;
1985 if (nLeftOver) {
1986 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1987 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
1988 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1989 parser->m_parsingStatus.parsing = XML_PARSING;
1990 void *const temp = XML_GetBuffer(parser, nLeftOver);
1991 parser->m_parsingStatus.parsing = originalStatus;
1992 // GetBuffer may have overwritten this, but we want to remember what the
1993 // app requested, not how many bytes were left over after parsing.
1994 parser->m_lastBufferRequestSize = len;
1995 if (temp == NULL) {
1996 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
1997 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1998 parser->m_processor = errorProcessor;
1999 return XML_STATUS_ERROR;
2000 }
2001 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2002 // don't have any data to preserve, and can copy straight into the start
2003 // of the buffer rather than the GetBuffer return pointer (which may be
2004 // pointing further into the allocated buffer).
2005 memcpy(parser->m_buffer, end, nLeftOver);
2006 }
2007 parser->m_bufferPtr = parser->m_buffer;
2008 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2009 parser->m_positionPtr = parser->m_bufferPtr;
2010 parser->m_parseEndPtr = parser->m_bufferEnd;
2011 parser->m_eventPtr = parser->m_bufferPtr;
2012 parser->m_eventEndPtr = parser->m_bufferPtr;
2013 return result;
2014 }
2015 #endif /* XML_CONTEXT_BYTES == 0 */
2016 void *buff = XML_GetBuffer(parser, len);
2017 if (buff == NULL)
2018 return XML_STATUS_ERROR;
2019 if (len > 0) {
2020 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2021 memcpy(buff, s, len);
2022 }
2023 return XML_ParseBuffer(parser, len, isFinal);
2024 }
2025
2026 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2027 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2028 const char *start;
2029 enum XML_Status result = XML_STATUS_OK;
2030
2031 if (parser == NULL)
2032 return XML_STATUS_ERROR;
2033 switch (parser->m_parsingStatus.parsing) {
2034 case XML_SUSPENDED:
2035 parser->m_errorCode = XML_ERROR_SUSPENDED;
2036 return XML_STATUS_ERROR;
2037 case XML_FINISHED:
2038 parser->m_errorCode = XML_ERROR_FINISHED;
2039 return XML_STATUS_ERROR;
2040 case XML_INITIALIZED:
2041 /* Has someone called XML_GetBuffer successfully before? */
2042 if (! parser->m_bufferPtr) {
2043 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2044 return XML_STATUS_ERROR;
2045 }
2046
2047 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2048 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2049 return XML_STATUS_ERROR;
2050 }
2051 /* fall through */
2052 default:
2053 parser->m_parsingStatus.parsing = XML_PARSING;
2054 }
2055
2056 start = parser->m_bufferPtr;
2057 parser->m_positionPtr = start;
2058 parser->m_bufferEnd += len;
2059 parser->m_parseEndPtr = parser->m_bufferEnd;
2060 parser->m_parseEndByteIndex += len;
2061 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2062
2063 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2064 &parser->m_bufferPtr);
2065
2066 if (parser->m_errorCode != XML_ERROR_NONE) {
2067 parser->m_eventEndPtr = parser->m_eventPtr;
2068 parser->m_processor = errorProcessor;
2069 return XML_STATUS_ERROR;
2070 } else {
2071 switch (parser->m_parsingStatus.parsing) {
2072 case XML_SUSPENDED:
2073 result = XML_STATUS_SUSPENDED;
2074 break;
2075 case XML_INITIALIZED:
2076 case XML_PARSING:
2077 if (isFinal) {
2078 parser->m_parsingStatus.parsing = XML_FINISHED;
2079 return result;
2080 }
2081 default:; /* should not happen */
2082 }
2083 }
2084
2085 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2086 parser->m_bufferPtr, &parser->m_position);
2087 parser->m_positionPtr = parser->m_bufferPtr;
2088 return result;
2089 }
2090
2091 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2092 XML_GetBuffer(XML_Parser parser, int len) {
2093 if (parser == NULL)
2094 return NULL;
2095 if (len < 0) {
2096 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2097 return NULL;
2098 }
2099 switch (parser->m_parsingStatus.parsing) {
2100 case XML_SUSPENDED:
2101 parser->m_errorCode = XML_ERROR_SUSPENDED;
2102 return NULL;
2103 case XML_FINISHED:
2104 parser->m_errorCode = XML_ERROR_FINISHED;
2105 return NULL;
2106 default:;
2107 }
2108
2109 // whether or not the request succeeds, `len` seems to be the app's preferred
2110 // buffer fill size; remember it.
2111 parser->m_lastBufferRequestSize = len;
2112 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2113 || parser->m_buffer == NULL) {
2114 #if XML_CONTEXT_BYTES > 0
2115 int keep;
2116 #endif /* XML_CONTEXT_BYTES > 0 */
2117 /* Do not invoke signed arithmetic overflow: */
2118 int neededSize = (int)((unsigned)len
2119 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2120 parser->m_bufferEnd, parser->m_bufferPtr));
2121 if (neededSize < 0) {
2122 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2123 return NULL;
2124 }
2125 #if XML_CONTEXT_BYTES > 0
2126 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2127 if (keep > XML_CONTEXT_BYTES)
2128 keep = XML_CONTEXT_BYTES;
2129 /* Detect and prevent integer overflow */
2130 if (keep > INT_MAX - neededSize) {
2131 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2132 return NULL;
2133 }
2134 neededSize += keep;
2135 #endif /* XML_CONTEXT_BYTES > 0 */
2136 if (parser->m_buffer && parser->m_bufferPtr
2137 && neededSize
2138 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2139 #if XML_CONTEXT_BYTES > 0
2140 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2141 int offset
2142 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2143 - keep;
2144 /* The buffer pointers cannot be NULL here; we have at least some bytes
2145 * in the buffer */
2146 memmove(parser->m_buffer, &parser->m_buffer[offset],
2147 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2148 parser->m_bufferEnd -= offset;
2149 parser->m_bufferPtr -= offset;
2150 }
2151 #else
2152 memmove(parser->m_buffer, parser->m_bufferPtr,
2153 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2154 parser->m_bufferEnd
2155 = parser->m_buffer
2156 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2157 parser->m_bufferPtr = parser->m_buffer;
2158 #endif /* XML_CONTEXT_BYTES > 0 */
2159 } else {
2160 char *newBuf;
2161 int bufferSize
2162 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2163 if (bufferSize == 0)
2164 bufferSize = INIT_BUFFER_SIZE;
2165 do {
2166 /* Do not invoke signed arithmetic overflow: */
2167 bufferSize = (int)(2U * (unsigned)bufferSize);
2168 } while (bufferSize < neededSize && bufferSize > 0);
2169 if (bufferSize <= 0) {
2170 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2171 return NULL;
2172 }
2173 newBuf = (char *)MALLOC(parser, bufferSize);
2174 if (newBuf == 0) {
2175 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2176 return NULL;
2177 }
2178 parser->m_bufferLim = newBuf + bufferSize;
2179 #if XML_CONTEXT_BYTES > 0
2180 if (parser->m_bufferPtr) {
2181 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2182 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2183 + keep);
2184 FREE(parser, parser->m_buffer);
2185 parser->m_buffer = newBuf;
2186 parser->m_bufferEnd
2187 = parser->m_buffer
2188 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2189 + keep;
2190 parser->m_bufferPtr = parser->m_buffer + keep;
2191 } else {
2192 /* This must be a brand new buffer with no data in it yet */
2193 parser->m_bufferEnd = newBuf;
2194 parser->m_bufferPtr = parser->m_buffer = newBuf;
2195 }
2196 #else
2197 if (parser->m_bufferPtr) {
2198 memcpy(newBuf, parser->m_bufferPtr,
2199 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2200 FREE(parser, parser->m_buffer);
2201 parser->m_bufferEnd
2202 = newBuf
2203 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2204 } else {
2205 /* This must be a brand new buffer with no data in it yet */
2206 parser->m_bufferEnd = newBuf;
2207 }
2208 parser->m_bufferPtr = parser->m_buffer = newBuf;
2209 #endif /* XML_CONTEXT_BYTES > 0 */
2210 }
2211 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2212 parser->m_positionPtr = NULL;
2213 }
2214 return parser->m_bufferEnd;
2215 }
2216
2217 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2218 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2219 if (parser == NULL)
2220 return XML_STATUS_ERROR;
2221 switch (parser->m_parsingStatus.parsing) {
2222 case XML_SUSPENDED:
2223 if (resumable) {
2224 parser->m_errorCode = XML_ERROR_SUSPENDED;
2225 return XML_STATUS_ERROR;
2226 }
2227 parser->m_parsingStatus.parsing = XML_FINISHED;
2228 break;
2229 case XML_FINISHED:
2230 parser->m_errorCode = XML_ERROR_FINISHED;
2231 return XML_STATUS_ERROR;
2232 default:
2233 if (resumable) {
2234 #ifdef XML_DTD
2235 if (parser->m_isParamEntity) {
2236 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2237 return XML_STATUS_ERROR;
2238 }
2239 #endif
2240 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2241 } else
2242 parser->m_parsingStatus.parsing = XML_FINISHED;
2243 }
2244 return XML_STATUS_OK;
2245 }
2246
2247 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2248 XML_ResumeParser(XML_Parser parser) {
2249 enum XML_Status result = XML_STATUS_OK;
2250
2251 if (parser == NULL)
2252 return XML_STATUS_ERROR;
2253 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2254 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2255 return XML_STATUS_ERROR;
2256 }
2257 parser->m_parsingStatus.parsing = XML_PARSING;
2258
2259 parser->m_errorCode = callProcessor(
2260 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2261
2262 if (parser->m_errorCode != XML_ERROR_NONE) {
2263 parser->m_eventEndPtr = parser->m_eventPtr;
2264 parser->m_processor = errorProcessor;
2265 return XML_STATUS_ERROR;
2266 } else {
2267 switch (parser->m_parsingStatus.parsing) {
2268 case XML_SUSPENDED:
2269 result = XML_STATUS_SUSPENDED;
2270 break;
2271 case XML_INITIALIZED:
2272 case XML_PARSING:
2273 if (parser->m_parsingStatus.finalBuffer) {
2274 parser->m_parsingStatus.parsing = XML_FINISHED;
2275 return result;
2276 }
2277 default:;
2278 }
2279 }
2280
2281 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2282 parser->m_bufferPtr, &parser->m_position);
2283 parser->m_positionPtr = parser->m_bufferPtr;
2284 return result;
2285 }
2286
2287 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2288 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2289 if (parser == NULL)
2290 return;
2291 assert(status != NULL);
2292 *status = parser->m_parsingStatus;
2293 }
2294
2295 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2296 XML_GetErrorCode(XML_Parser parser) {
2297 if (parser == NULL)
2298 return XML_ERROR_INVALID_ARGUMENT;
2299 return parser->m_errorCode;
2300 }
2301
2302 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2303 XML_GetCurrentByteIndex(XML_Parser parser) {
2304 if (parser == NULL)
2305 return -1;
2306 if (parser->m_eventPtr)
2307 return (XML_Index)(parser->m_parseEndByteIndex
2308 - (parser->m_parseEndPtr - parser->m_eventPtr));
2309 return -1;
2310 }
2311
2312 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2313 XML_GetCurrentByteCount(XML_Parser parser) {
2314 if (parser == NULL)
2315 return 0;
2316 if (parser->m_eventEndPtr && parser->m_eventPtr)
2317 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2318 return 0;
2319 }
2320
2321 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2322 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2323 #if XML_CONTEXT_BYTES > 0
2324 if (parser == NULL)
2325 return NULL;
2326 if (parser->m_eventPtr && parser->m_buffer) {
2327 if (offset != NULL)
2328 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2329 if (size != NULL)
2330 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2331 return parser->m_buffer;
2332 }
2333 #else
2334 (void)parser;
2335 (void)offset;
2336 (void)size;
2337 #endif /* XML_CONTEXT_BYTES > 0 */
2338 return (const char *)0;
2339 }
2340
2341 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2342 XML_GetCurrentLineNumber(XML_Parser parser) {
2343 if (parser == NULL)
2344 return 0;
2345 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2346 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2347 parser->m_eventPtr, &parser->m_position);
2348 parser->m_positionPtr = parser->m_eventPtr;
2349 }
2350 return parser->m_position.lineNumber + 1;
2351 }
2352
2353 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2354 XML_GetCurrentColumnNumber(XML_Parser parser) {
2355 if (parser == NULL)
2356 return 0;
2357 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2358 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2359 parser->m_eventPtr, &parser->m_position);
2360 parser->m_positionPtr = parser->m_eventPtr;
2361 }
2362 return parser->m_position.columnNumber;
2363 }
2364
2365 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2366 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2367 if (parser != NULL)
2368 FREE(parser, model);
2369 }
2370
2371 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2372 XML_MemMalloc(XML_Parser parser, size_t size) {
2373 if (parser == NULL)
2374 return NULL;
2375 return MALLOC(parser, size);
2376 }
2377
2378 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2379 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2380 if (parser == NULL)
2381 return NULL;
2382 return REALLOC(parser, ptr, size);
2383 }
2384
2385 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2386 XML_MemFree(XML_Parser parser, void *ptr) {
2387 if (parser != NULL)
2388 FREE(parser, ptr);
2389 }
2390
2391 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2392 XML_DefaultCurrent(XML_Parser parser) {
2393 if (parser == NULL)
2394 return;
2395 if (parser->m_defaultHandler) {
2396 if (parser->m_openInternalEntities)
2397 reportDefault(parser, parser->m_internalEncoding,
2398 parser->m_openInternalEntities->internalEventPtr,
2399 parser->m_openInternalEntities->internalEventEndPtr);
2400 else
2401 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2402 parser->m_eventEndPtr);
2403 }
2404 }
2405
2406 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2407 XML_ErrorString(enum XML_Error code) {
2408 switch (code) {
2409 case XML_ERROR_NONE:
2410 return NULL;
2411 case XML_ERROR_NO_MEMORY:
2412 return XML_L("out of memory");
2413 case XML_ERROR_SYNTAX:
2414 return XML_L("syntax error");
2415 case XML_ERROR_NO_ELEMENTS:
2416 return XML_L("no element found");
2417 case XML_ERROR_INVALID_TOKEN:
2418 return XML_L("not well-formed (invalid token)");
2419 case XML_ERROR_UNCLOSED_TOKEN:
2420 return XML_L("unclosed token");
2421 case XML_ERROR_PARTIAL_CHAR:
2422 return XML_L("partial character");
2423 case XML_ERROR_TAG_MISMATCH:
2424 return XML_L("mismatched tag");
2425 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2426 return XML_L("duplicate attribute");
2427 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2428 return XML_L("junk after document element");
2429 case XML_ERROR_PARAM_ENTITY_REF:
2430 return XML_L("illegal parameter entity reference");
2431 case XML_ERROR_UNDEFINED_ENTITY:
2432 return XML_L("undefined entity");
2433 case XML_ERROR_RECURSIVE_ENTITY_REF:
2434 return XML_L("recursive entity reference");
2435 case XML_ERROR_ASYNC_ENTITY:
2436 return XML_L("asynchronous entity");
2437 case XML_ERROR_BAD_CHAR_REF:
2438 return XML_L("reference to invalid character number");
2439 case XML_ERROR_BINARY_ENTITY_REF:
2440 return XML_L("reference to binary entity");
2441 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2442 return XML_L("reference to external entity in attribute");
2443 case XML_ERROR_MISPLACED_XML_PI:
2444 return XML_L("XML or text declaration not at start of entity");
2445 case XML_ERROR_UNKNOWN_ENCODING:
2446 return XML_L("unknown encoding");
2447 case XML_ERROR_INCORRECT_ENCODING:
2448 return XML_L("encoding specified in XML declaration is incorrect");
2449 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2450 return XML_L("unclosed CDATA section");
2451 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2452 return XML_L("error in processing external entity reference");
2453 case XML_ERROR_NOT_STANDALONE:
2454 return XML_L("document is not standalone");
2455 case XML_ERROR_UNEXPECTED_STATE:
2456 return XML_L("unexpected parser state - please send a bug report");
2457 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2458 return XML_L("entity declared in parameter entity");
2459 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2460 return XML_L("requested feature requires XML_DTD support in Expat");
2461 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2462 return XML_L("cannot change setting once parsing has begun");
2463 /* Added in 1.95.7. */
2464 case XML_ERROR_UNBOUND_PREFIX:
2465 return XML_L("unbound prefix");
2466 /* Added in 1.95.8. */
2467 case XML_ERROR_UNDECLARING_PREFIX:
2468 return XML_L("must not undeclare prefix");
2469 case XML_ERROR_INCOMPLETE_PE:
2470 return XML_L("incomplete markup in parameter entity");
2471 case XML_ERROR_XML_DECL:
2472 return XML_L("XML declaration not well-formed");
2473 case XML_ERROR_TEXT_DECL:
2474 return XML_L("text declaration not well-formed");
2475 case XML_ERROR_PUBLICID:
2476 return XML_L("illegal character(s) in public id");
2477 case XML_ERROR_SUSPENDED:
2478 return XML_L("parser suspended");
2479 case XML_ERROR_NOT_SUSPENDED:
2480 return XML_L("parser not suspended");
2481 case XML_ERROR_ABORTED:
2482 return XML_L("parsing aborted");
2483 case XML_ERROR_FINISHED:
2484 return XML_L("parsing finished");
2485 case XML_ERROR_SUSPEND_PE:
2486 return XML_L("cannot suspend in external parameter entity");
2487 /* Added in 2.0.0. */
2488 case XML_ERROR_RESERVED_PREFIX_XML:
2489 return XML_L(
2490 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2491 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2492 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2493 case XML_ERROR_RESERVED_NAMESPACE_URI:
2494 return XML_L(
2495 "prefix must not be bound to one of the reserved namespace names");
2496 /* Added in 2.2.5. */
2497 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2498 return XML_L("invalid argument");
2499 /* Added in 2.3.0. */
2500 case XML_ERROR_NO_BUFFER:
2501 return XML_L(
2502 "a successful prior call to function XML_GetBuffer is required");
2503 /* Added in 2.4.0. */
2504 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2505 return XML_L(
2506 "limit on input amplification factor (from DTD and entities) breached");
2507 }
2508 return NULL;
2509 }
2510
2511 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2512 XML_ExpatVersion(void) {
2513 /* V1 is used to string-ize the version number. However, it would
2514 string-ize the actual version macro *names* unless we get them
2515 substituted before being passed to V1. CPP is defined to expand
2516 a macro, then rescan for more expansions. Thus, we use V2 to expand
2517 the version macros, then CPP will expand the resulting V1() macro
2518 with the correct numerals. */
2519 /* ### I'm assuming cpp is portable in this respect... */
2520
2521 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2522 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2523
2524 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2525
2526 #undef V1
2527 #undef V2
2528 }
2529
2530 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2531 XML_ExpatVersionInfo(void) {
2532 XML_Expat_Version version;
2533
2534 version.major = XML_MAJOR_VERSION;
2535 version.minor = XML_MINOR_VERSION;
2536 version.micro = XML_MICRO_VERSION;
2537
2538 return version;
2539 }
2540
2541 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2542 XML_GetFeatureList(void) {
2543 static const XML_Feature features[] = {
2544 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2545 sizeof(XML_Char)},
2546 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2547 sizeof(XML_LChar)},
2548 #ifdef XML_UNICODE
2549 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2550 #endif
2551 #ifdef XML_UNICODE_WCHAR_T
2552 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2553 #endif
2554 #ifdef XML_DTD
2555 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2556 #endif
2557 #if XML_CONTEXT_BYTES > 0
2558 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2559 XML_CONTEXT_BYTES},
2560 #endif
2561 #ifdef XML_MIN_SIZE
2562 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2563 #endif
2564 #ifdef XML_NS
2565 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2566 #endif
2567 #ifdef XML_LARGE_SIZE
2568 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2569 #endif
2570 #ifdef XML_ATTR_INFO
2571 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2572 #endif
2573 #if XML_GE == 1
2574 /* Added in Expat 2.4.0 for XML_DTD defined and
2575 * added in Expat 2.6.0 for XML_GE == 1. */
2576 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2577 XML_L("XML_BLAP_MAX_AMP"),
2578 (long int)
2579 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2580 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2581 XML_L("XML_BLAP_ACT_THRES"),
2582 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2583 /* Added in Expat 2.6.0. */
2584 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2585 #endif
2586 {XML_FEATURE_END, NULL, 0}};
2587
2588 return features;
2589 }
2590
2591 #if XML_GE == 1
2592 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2593 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2594 XML_Parser parser, float maximumAmplificationFactor) {
2595 if ((parser == NULL) || (parser->m_parentParser != NULL)
2596 || isnan(maximumAmplificationFactor)
2597 || (maximumAmplificationFactor < 1.0f)) {
2598 return XML_FALSE;
2599 }
2600 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2601 return XML_TRUE;
2602 }
2603
2604 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2605 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2606 XML_Parser parser, unsigned long long activationThresholdBytes) {
2607 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2608 return XML_FALSE;
2609 }
2610 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2611 return XML_TRUE;
2612 }
2613 #endif /* XML_GE == 1 */
2614
2615 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2616 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2617 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2618 parser->m_reparseDeferralEnabled = enabled;
2619 return XML_TRUE;
2620 }
2621 return XML_FALSE;
2622 }
2623
2624 /* Initially tag->rawName always points into the parse buffer;
2625 for those TAG instances opened while the current parse buffer was
2626 processed, and not yet closed, we need to store tag->rawName in a more
2627 permanent location, since the parse buffer is about to be discarded.
2628 */
2629 static XML_Bool
storeRawNames(XML_Parser parser)2630 storeRawNames(XML_Parser parser) {
2631 TAG *tag = parser->m_tagStack;
2632 while (tag) {
2633 int bufSize;
2634 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2635 size_t rawNameLen;
2636 char *rawNameBuf = tag->buf + nameLen;
2637 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2638 at the first entry that has already been copied; everything
2639 below it in the stack is already been accounted for in a
2640 previous call to this function.
2641 */
2642 if (tag->rawName == rawNameBuf)
2643 break;
2644 /* For reuse purposes we need to ensure that the
2645 size of tag->buf is a multiple of sizeof(XML_Char).
2646 */
2647 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2648 /* Detect and prevent integer overflow. */
2649 if (rawNameLen > (size_t)INT_MAX - nameLen)
2650 return XML_FALSE;
2651 bufSize = nameLen + (int)rawNameLen;
2652 if (bufSize > tag->bufEnd - tag->buf) {
2653 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2654 if (temp == NULL)
2655 return XML_FALSE;
2656 /* if tag->name.str points to tag->buf (only when namespace
2657 processing is off) then we have to update it
2658 */
2659 if (tag->name.str == (XML_Char *)tag->buf)
2660 tag->name.str = (XML_Char *)temp;
2661 /* if tag->name.localPart is set (when namespace processing is on)
2662 then update it as well, since it will always point into tag->buf
2663 */
2664 if (tag->name.localPart)
2665 tag->name.localPart
2666 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2667 tag->buf = temp;
2668 tag->bufEnd = temp + bufSize;
2669 rawNameBuf = temp + nameLen;
2670 }
2671 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2672 tag->rawName = rawNameBuf;
2673 tag = tag->parent;
2674 }
2675 return XML_TRUE;
2676 }
2677
2678 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2679 contentProcessor(XML_Parser parser, const char *start, const char *end,
2680 const char **endPtr) {
2681 enum XML_Error result = doContent(
2682 parser, 0, parser->m_encoding, start, end, endPtr,
2683 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2684 if (result == XML_ERROR_NONE) {
2685 if (! storeRawNames(parser))
2686 return XML_ERROR_NO_MEMORY;
2687 }
2688 return result;
2689 }
2690
2691 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2692 externalEntityInitProcessor(XML_Parser parser, const char *start,
2693 const char *end, const char **endPtr) {
2694 enum XML_Error result = initializeEncoding(parser);
2695 if (result != XML_ERROR_NONE)
2696 return result;
2697 parser->m_processor = externalEntityInitProcessor2;
2698 return externalEntityInitProcessor2(parser, start, end, endPtr);
2699 }
2700
2701 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2702 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2703 const char *end, const char **endPtr) {
2704 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2705 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2706 switch (tok) {
2707 case XML_TOK_BOM:
2708 #if XML_GE == 1
2709 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2710 XML_ACCOUNT_DIRECT)) {
2711 accountingOnAbort(parser);
2712 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2713 }
2714 #endif /* XML_GE == 1 */
2715
2716 /* If we are at the end of the buffer, this would cause the next stage,
2717 i.e. externalEntityInitProcessor3, to pass control directly to
2718 doContent (by detecting XML_TOK_NONE) without processing any xml text
2719 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2720 */
2721 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2722 *endPtr = next;
2723 return XML_ERROR_NONE;
2724 }
2725 start = next;
2726 break;
2727 case XML_TOK_PARTIAL:
2728 if (! parser->m_parsingStatus.finalBuffer) {
2729 *endPtr = start;
2730 return XML_ERROR_NONE;
2731 }
2732 parser->m_eventPtr = start;
2733 return XML_ERROR_UNCLOSED_TOKEN;
2734 case XML_TOK_PARTIAL_CHAR:
2735 if (! parser->m_parsingStatus.finalBuffer) {
2736 *endPtr = start;
2737 return XML_ERROR_NONE;
2738 }
2739 parser->m_eventPtr = start;
2740 return XML_ERROR_PARTIAL_CHAR;
2741 }
2742 parser->m_processor = externalEntityInitProcessor3;
2743 return externalEntityInitProcessor3(parser, start, end, endPtr);
2744 }
2745
2746 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2747 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2748 const char *end, const char **endPtr) {
2749 int tok;
2750 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2751 parser->m_eventPtr = start;
2752 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2753 /* Note: These bytes are accounted later in:
2754 - processXmlDecl
2755 - externalEntityContentProcessor
2756 */
2757 parser->m_eventEndPtr = next;
2758
2759 switch (tok) {
2760 case XML_TOK_XML_DECL: {
2761 enum XML_Error result;
2762 result = processXmlDecl(parser, 1, start, next);
2763 if (result != XML_ERROR_NONE)
2764 return result;
2765 switch (parser->m_parsingStatus.parsing) {
2766 case XML_SUSPENDED:
2767 *endPtr = next;
2768 return XML_ERROR_NONE;
2769 case XML_FINISHED:
2770 return XML_ERROR_ABORTED;
2771 default:
2772 start = next;
2773 }
2774 } break;
2775 case XML_TOK_PARTIAL:
2776 if (! parser->m_parsingStatus.finalBuffer) {
2777 *endPtr = start;
2778 return XML_ERROR_NONE;
2779 }
2780 return XML_ERROR_UNCLOSED_TOKEN;
2781 case XML_TOK_PARTIAL_CHAR:
2782 if (! parser->m_parsingStatus.finalBuffer) {
2783 *endPtr = start;
2784 return XML_ERROR_NONE;
2785 }
2786 return XML_ERROR_PARTIAL_CHAR;
2787 }
2788 parser->m_processor = externalEntityContentProcessor;
2789 parser->m_tagLevel = 1;
2790 return externalEntityContentProcessor(parser, start, end, endPtr);
2791 }
2792
2793 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2794 externalEntityContentProcessor(XML_Parser parser, const char *start,
2795 const char *end, const char **endPtr) {
2796 enum XML_Error result
2797 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2798 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2799 XML_ACCOUNT_ENTITY_EXPANSION);
2800 if (result == XML_ERROR_NONE) {
2801 if (! storeRawNames(parser))
2802 return XML_ERROR_NO_MEMORY;
2803 }
2804 return result;
2805 }
2806
2807 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2808 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2809 const char *s, const char *end, const char **nextPtr,
2810 XML_Bool haveMore, enum XML_Account account) {
2811 /* save one level of indirection */
2812 DTD *const dtd = parser->m_dtd;
2813
2814 const char **eventPP;
2815 const char **eventEndPP;
2816 if (enc == parser->m_encoding) {
2817 eventPP = &parser->m_eventPtr;
2818 eventEndPP = &parser->m_eventEndPtr;
2819 } else {
2820 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2821 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2822 }
2823 *eventPP = s;
2824
2825 for (;;) {
2826 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2827 int tok = XmlContentTok(enc, s, end, &next);
2828 #if XML_GE == 1
2829 const char *accountAfter
2830 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2831 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2832 : next;
2833 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2834 account)) {
2835 accountingOnAbort(parser);
2836 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2837 }
2838 #endif
2839 *eventEndPP = next;
2840 switch (tok) {
2841 case XML_TOK_TRAILING_CR:
2842 if (haveMore) {
2843 *nextPtr = s;
2844 return XML_ERROR_NONE;
2845 }
2846 *eventEndPP = end;
2847 if (parser->m_characterDataHandler) {
2848 XML_Char c = 0xA;
2849 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2850 } else if (parser->m_defaultHandler)
2851 reportDefault(parser, enc, s, end);
2852 /* We are at the end of the final buffer, should we check for
2853 XML_SUSPENDED, XML_FINISHED?
2854 */
2855 if (startTagLevel == 0)
2856 return XML_ERROR_NO_ELEMENTS;
2857 if (parser->m_tagLevel != startTagLevel)
2858 return XML_ERROR_ASYNC_ENTITY;
2859 *nextPtr = end;
2860 return XML_ERROR_NONE;
2861 case XML_TOK_NONE:
2862 if (haveMore) {
2863 *nextPtr = s;
2864 return XML_ERROR_NONE;
2865 }
2866 if (startTagLevel > 0) {
2867 if (parser->m_tagLevel != startTagLevel)
2868 return XML_ERROR_ASYNC_ENTITY;
2869 *nextPtr = s;
2870 return XML_ERROR_NONE;
2871 }
2872 return XML_ERROR_NO_ELEMENTS;
2873 case XML_TOK_INVALID:
2874 *eventPP = next;
2875 return XML_ERROR_INVALID_TOKEN;
2876 case XML_TOK_PARTIAL:
2877 if (haveMore) {
2878 *nextPtr = s;
2879 return XML_ERROR_NONE;
2880 }
2881 return XML_ERROR_UNCLOSED_TOKEN;
2882 case XML_TOK_PARTIAL_CHAR:
2883 if (haveMore) {
2884 *nextPtr = s;
2885 return XML_ERROR_NONE;
2886 }
2887 return XML_ERROR_PARTIAL_CHAR;
2888 case XML_TOK_ENTITY_REF: {
2889 const XML_Char *name;
2890 ENTITY *entity;
2891 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2892 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2893 if (ch) {
2894 #if XML_GE == 1
2895 /* NOTE: We are replacing 4-6 characters original input for 1 character
2896 * so there is no amplification and hence recording without
2897 * protection. */
2898 accountingDiffTolerated(parser, tok, (char *)&ch,
2899 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2900 XML_ACCOUNT_ENTITY_EXPANSION);
2901 #endif /* XML_GE == 1 */
2902 if (parser->m_characterDataHandler)
2903 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2904 else if (parser->m_defaultHandler)
2905 reportDefault(parser, enc, s, next);
2906 break;
2907 }
2908 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2909 next - enc->minBytesPerChar);
2910 if (! name)
2911 return XML_ERROR_NO_MEMORY;
2912 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2913 poolDiscard(&dtd->pool);
2914 /* First, determine if a check for an existing declaration is needed;
2915 if yes, check that the entity exists, and that it is internal,
2916 otherwise call the skipped entity or default handler.
2917 */
2918 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2919 if (! entity)
2920 return XML_ERROR_UNDEFINED_ENTITY;
2921 else if (! entity->is_internal)
2922 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2923 } else if (! entity) {
2924 if (parser->m_skippedEntityHandler)
2925 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2926 else if (parser->m_defaultHandler)
2927 reportDefault(parser, enc, s, next);
2928 break;
2929 }
2930 if (entity->open)
2931 return XML_ERROR_RECURSIVE_ENTITY_REF;
2932 if (entity->notation)
2933 return XML_ERROR_BINARY_ENTITY_REF;
2934 if (entity->textPtr) {
2935 enum XML_Error result;
2936 if (! parser->m_defaultExpandInternalEntities) {
2937 if (parser->m_skippedEntityHandler)
2938 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2939 0);
2940 else if (parser->m_defaultHandler)
2941 reportDefault(parser, enc, s, next);
2942 break;
2943 }
2944 result = processInternalEntity(parser, entity, XML_FALSE);
2945 if (result != XML_ERROR_NONE)
2946 return result;
2947 } else if (parser->m_externalEntityRefHandler) {
2948 const XML_Char *context;
2949 entity->open = XML_TRUE;
2950 context = getContext(parser);
2951 entity->open = XML_FALSE;
2952 if (! context)
2953 return XML_ERROR_NO_MEMORY;
2954 if (! parser->m_externalEntityRefHandler(
2955 parser->m_externalEntityRefHandlerArg, context, entity->base,
2956 entity->systemId, entity->publicId))
2957 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2958 poolDiscard(&parser->m_tempPool);
2959 } else if (parser->m_defaultHandler)
2960 reportDefault(parser, enc, s, next);
2961 break;
2962 }
2963 case XML_TOK_START_TAG_NO_ATTS:
2964 /* fall through */
2965 case XML_TOK_START_TAG_WITH_ATTS: {
2966 TAG *tag;
2967 enum XML_Error result;
2968 XML_Char *toPtr;
2969 if (parser->m_freeTagList) {
2970 tag = parser->m_freeTagList;
2971 parser->m_freeTagList = parser->m_freeTagList->parent;
2972 } else {
2973 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2974 if (! tag)
2975 return XML_ERROR_NO_MEMORY;
2976 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2977 if (! tag->buf) {
2978 FREE(parser, tag);
2979 return XML_ERROR_NO_MEMORY;
2980 }
2981 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2982 }
2983 tag->bindings = NULL;
2984 tag->parent = parser->m_tagStack;
2985 parser->m_tagStack = tag;
2986 tag->name.localPart = NULL;
2987 tag->name.prefix = NULL;
2988 tag->rawName = s + enc->minBytesPerChar;
2989 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2990 ++parser->m_tagLevel;
2991 {
2992 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2993 const char *fromPtr = tag->rawName;
2994 toPtr = (XML_Char *)tag->buf;
2995 for (;;) {
2996 int bufSize;
2997 int convLen;
2998 const enum XML_Convert_Result convert_res
2999 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3000 (ICHAR *)tag->bufEnd - 1);
3001 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3002 if ((fromPtr >= rawNameEnd)
3003 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3004 tag->name.strLen = convLen;
3005 break;
3006 }
3007 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3008 {
3009 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3010 if (temp == NULL)
3011 return XML_ERROR_NO_MEMORY;
3012 tag->buf = temp;
3013 tag->bufEnd = temp + bufSize;
3014 toPtr = (XML_Char *)temp + convLen;
3015 }
3016 }
3017 }
3018 tag->name.str = (XML_Char *)tag->buf;
3019 *toPtr = XML_T('\0');
3020 result
3021 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3022 if (result)
3023 return result;
3024 if (parser->m_startElementHandler)
3025 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3026 (const XML_Char **)parser->m_atts);
3027 else if (parser->m_defaultHandler)
3028 reportDefault(parser, enc, s, next);
3029 poolClear(&parser->m_tempPool);
3030 break;
3031 }
3032 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3033 /* fall through */
3034 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3035 const char *rawName = s + enc->minBytesPerChar;
3036 enum XML_Error result;
3037 BINDING *bindings = NULL;
3038 XML_Bool noElmHandlers = XML_TRUE;
3039 TAG_NAME name;
3040 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3041 rawName + XmlNameLength(enc, rawName));
3042 if (! name.str)
3043 return XML_ERROR_NO_MEMORY;
3044 poolFinish(&parser->m_tempPool);
3045 result = storeAtts(parser, enc, s, &name, &bindings,
3046 XML_ACCOUNT_NONE /* token spans whole start tag */);
3047 if (result != XML_ERROR_NONE) {
3048 freeBindings(parser, bindings);
3049 return result;
3050 }
3051 poolFinish(&parser->m_tempPool);
3052 if (parser->m_startElementHandler) {
3053 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3054 (const XML_Char **)parser->m_atts);
3055 noElmHandlers = XML_FALSE;
3056 }
3057 if (parser->m_endElementHandler) {
3058 if (parser->m_startElementHandler)
3059 *eventPP = *eventEndPP;
3060 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3061 noElmHandlers = XML_FALSE;
3062 }
3063 if (noElmHandlers && parser->m_defaultHandler)
3064 reportDefault(parser, enc, s, next);
3065 poolClear(&parser->m_tempPool);
3066 freeBindings(parser, bindings);
3067 }
3068 if ((parser->m_tagLevel == 0)
3069 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3070 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3071 parser->m_processor = epilogProcessor;
3072 else
3073 return epilogProcessor(parser, next, end, nextPtr);
3074 }
3075 break;
3076 case XML_TOK_END_TAG:
3077 if (parser->m_tagLevel == startTagLevel)
3078 return XML_ERROR_ASYNC_ENTITY;
3079 else {
3080 int len;
3081 const char *rawName;
3082 TAG *tag = parser->m_tagStack;
3083 rawName = s + enc->minBytesPerChar * 2;
3084 len = XmlNameLength(enc, rawName);
3085 if (len != tag->rawNameLength
3086 || memcmp(tag->rawName, rawName, len) != 0) {
3087 *eventPP = rawName;
3088 return XML_ERROR_TAG_MISMATCH;
3089 }
3090 parser->m_tagStack = tag->parent;
3091 tag->parent = parser->m_freeTagList;
3092 parser->m_freeTagList = tag;
3093 --parser->m_tagLevel;
3094 if (parser->m_endElementHandler) {
3095 const XML_Char *localPart;
3096 const XML_Char *prefix;
3097 XML_Char *uri;
3098 localPart = tag->name.localPart;
3099 if (parser->m_ns && localPart) {
3100 /* localPart and prefix may have been overwritten in
3101 tag->name.str, since this points to the binding->uri
3102 buffer which gets reused; so we have to add them again
3103 */
3104 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3105 /* don't need to check for space - already done in storeAtts() */
3106 while (*localPart)
3107 *uri++ = *localPart++;
3108 prefix = tag->name.prefix;
3109 if (parser->m_ns_triplets && prefix) {
3110 *uri++ = parser->m_namespaceSeparator;
3111 while (*prefix)
3112 *uri++ = *prefix++;
3113 }
3114 *uri = XML_T('\0');
3115 }
3116 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3117 } else if (parser->m_defaultHandler)
3118 reportDefault(parser, enc, s, next);
3119 while (tag->bindings) {
3120 BINDING *b = tag->bindings;
3121 if (parser->m_endNamespaceDeclHandler)
3122 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3123 b->prefix->name);
3124 tag->bindings = tag->bindings->nextTagBinding;
3125 b->nextTagBinding = parser->m_freeBindingList;
3126 parser->m_freeBindingList = b;
3127 b->prefix->binding = b->prevPrefixBinding;
3128 }
3129 if ((parser->m_tagLevel == 0)
3130 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3131 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3132 parser->m_processor = epilogProcessor;
3133 else
3134 return epilogProcessor(parser, next, end, nextPtr);
3135 }
3136 }
3137 break;
3138 case XML_TOK_CHAR_REF: {
3139 int n = XmlCharRefNumber(enc, s);
3140 if (n < 0)
3141 return XML_ERROR_BAD_CHAR_REF;
3142 if (parser->m_characterDataHandler) {
3143 XML_Char buf[XML_ENCODE_MAX];
3144 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3145 XmlEncode(n, (ICHAR *)buf));
3146 } else if (parser->m_defaultHandler)
3147 reportDefault(parser, enc, s, next);
3148 } break;
3149 case XML_TOK_XML_DECL:
3150 return XML_ERROR_MISPLACED_XML_PI;
3151 case XML_TOK_DATA_NEWLINE:
3152 if (parser->m_characterDataHandler) {
3153 XML_Char c = 0xA;
3154 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3155 } else if (parser->m_defaultHandler)
3156 reportDefault(parser, enc, s, next);
3157 break;
3158 case XML_TOK_CDATA_SECT_OPEN: {
3159 enum XML_Error result;
3160 if (parser->m_startCdataSectionHandler)
3161 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3162 /* BEGIN disabled code */
3163 /* Suppose you doing a transformation on a document that involves
3164 changing only the character data. You set up a defaultHandler
3165 and a characterDataHandler. The defaultHandler simply copies
3166 characters through. The characterDataHandler does the
3167 transformation and writes the characters out escaping them as
3168 necessary. This case will fail to work if we leave out the
3169 following two lines (because & and < inside CDATA sections will
3170 be incorrectly escaped).
3171
3172 However, now we have a start/endCdataSectionHandler, so it seems
3173 easier to let the user deal with this.
3174 */
3175 else if ((0) && parser->m_characterDataHandler)
3176 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3177 0);
3178 /* END disabled code */
3179 else if (parser->m_defaultHandler)
3180 reportDefault(parser, enc, s, next);
3181 result
3182 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3183 if (result != XML_ERROR_NONE)
3184 return result;
3185 else if (! next) {
3186 parser->m_processor = cdataSectionProcessor;
3187 return result;
3188 }
3189 } break;
3190 case XML_TOK_TRAILING_RSQB:
3191 if (haveMore) {
3192 *nextPtr = s;
3193 return XML_ERROR_NONE;
3194 }
3195 if (parser->m_characterDataHandler) {
3196 if (MUST_CONVERT(enc, s)) {
3197 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3198 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3199 parser->m_characterDataHandler(
3200 parser->m_handlerArg, parser->m_dataBuf,
3201 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3202 } else
3203 parser->m_characterDataHandler(
3204 parser->m_handlerArg, (const XML_Char *)s,
3205 (int)((const XML_Char *)end - (const XML_Char *)s));
3206 } else if (parser->m_defaultHandler)
3207 reportDefault(parser, enc, s, end);
3208 /* We are at the end of the final buffer, should we check for
3209 XML_SUSPENDED, XML_FINISHED?
3210 */
3211 if (startTagLevel == 0) {
3212 *eventPP = end;
3213 return XML_ERROR_NO_ELEMENTS;
3214 }
3215 if (parser->m_tagLevel != startTagLevel) {
3216 *eventPP = end;
3217 return XML_ERROR_ASYNC_ENTITY;
3218 }
3219 *nextPtr = end;
3220 return XML_ERROR_NONE;
3221 case XML_TOK_DATA_CHARS: {
3222 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3223 if (charDataHandler) {
3224 if (MUST_CONVERT(enc, s)) {
3225 for (;;) {
3226 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3227 const enum XML_Convert_Result convert_res = XmlConvert(
3228 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3229 *eventEndPP = s;
3230 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3231 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3232 if ((convert_res == XML_CONVERT_COMPLETED)
3233 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3234 break;
3235 *eventPP = s;
3236 }
3237 } else
3238 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3239 (int)((const XML_Char *)next - (const XML_Char *)s));
3240 } else if (parser->m_defaultHandler)
3241 reportDefault(parser, enc, s, next);
3242 } break;
3243 case XML_TOK_PI:
3244 if (! reportProcessingInstruction(parser, enc, s, next))
3245 return XML_ERROR_NO_MEMORY;
3246 break;
3247 case XML_TOK_COMMENT:
3248 if (! reportComment(parser, enc, s, next))
3249 return XML_ERROR_NO_MEMORY;
3250 break;
3251 default:
3252 /* All of the tokens produced by XmlContentTok() have their own
3253 * explicit cases, so this default is not strictly necessary.
3254 * However it is a useful safety net, so we retain the code and
3255 * simply exclude it from the coverage tests.
3256 *
3257 * LCOV_EXCL_START
3258 */
3259 if (parser->m_defaultHandler)
3260 reportDefault(parser, enc, s, next);
3261 break;
3262 /* LCOV_EXCL_STOP */
3263 }
3264 *eventPP = s = next;
3265 switch (parser->m_parsingStatus.parsing) {
3266 case XML_SUSPENDED:
3267 *nextPtr = next;
3268 return XML_ERROR_NONE;
3269 case XML_FINISHED:
3270 return XML_ERROR_ABORTED;
3271 default:;
3272 }
3273 }
3274 /* not reached */
3275 }
3276
3277 /* This function does not call free() on the allocated memory, merely
3278 * moving it to the parser's m_freeBindingList where it can be freed or
3279 * reused as appropriate.
3280 */
3281 static void
freeBindings(XML_Parser parser,BINDING * bindings)3282 freeBindings(XML_Parser parser, BINDING *bindings) {
3283 while (bindings) {
3284 BINDING *b = bindings;
3285
3286 /* m_startNamespaceDeclHandler will have been called for this
3287 * binding in addBindings(), so call the end handler now.
3288 */
3289 if (parser->m_endNamespaceDeclHandler)
3290 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3291
3292 bindings = bindings->nextTagBinding;
3293 b->nextTagBinding = parser->m_freeBindingList;
3294 parser->m_freeBindingList = b;
3295 b->prefix->binding = b->prevPrefixBinding;
3296 }
3297 }
3298
3299 /* Precondition: all arguments must be non-NULL;
3300 Purpose:
3301 - normalize attributes
3302 - check attributes for well-formedness
3303 - generate namespace aware attribute names (URI, prefix)
3304 - build list of attributes for startElementHandler
3305 - default attributes
3306 - process namespace declarations (check and report them)
3307 - generate namespace aware element name (URI, prefix)
3308 */
3309 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3310 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3311 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3312 enum XML_Account account) {
3313 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3314 ELEMENT_TYPE *elementType;
3315 int nDefaultAtts;
3316 const XML_Char **appAtts; /* the attribute list for the application */
3317 int attIndex = 0;
3318 int prefixLen;
3319 int i;
3320 int n;
3321 XML_Char *uri;
3322 int nPrefixes = 0;
3323 BINDING *binding;
3324 const XML_Char *localPart;
3325
3326 /* lookup the element type name */
3327 elementType
3328 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3329 if (! elementType) {
3330 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3331 if (! name)
3332 return XML_ERROR_NO_MEMORY;
3333 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3334 sizeof(ELEMENT_TYPE));
3335 if (! elementType)
3336 return XML_ERROR_NO_MEMORY;
3337 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3338 return XML_ERROR_NO_MEMORY;
3339 }
3340 nDefaultAtts = elementType->nDefaultAtts;
3341
3342 /* get the attributes from the tokenizer */
3343 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3344
3345 /* Detect and prevent integer overflow */
3346 if (n > INT_MAX - nDefaultAtts) {
3347 return XML_ERROR_NO_MEMORY;
3348 }
3349
3350 if (n + nDefaultAtts > parser->m_attsSize) {
3351 int oldAttsSize = parser->m_attsSize;
3352 ATTRIBUTE *temp;
3353 #ifdef XML_ATTR_INFO
3354 XML_AttrInfo *temp2;
3355 #endif
3356
3357 /* Detect and prevent integer overflow */
3358 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3359 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3360 return XML_ERROR_NO_MEMORY;
3361 }
3362
3363 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3364
3365 /* Detect and prevent integer overflow.
3366 * The preprocessor guard addresses the "always false" warning
3367 * from -Wtype-limits on platforms where
3368 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3369 #if UINT_MAX >= SIZE_MAX
3370 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3371 parser->m_attsSize = oldAttsSize;
3372 return XML_ERROR_NO_MEMORY;
3373 }
3374 #endif
3375
3376 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3377 parser->m_attsSize * sizeof(ATTRIBUTE));
3378 if (temp == NULL) {
3379 parser->m_attsSize = oldAttsSize;
3380 return XML_ERROR_NO_MEMORY;
3381 }
3382 parser->m_atts = temp;
3383 #ifdef XML_ATTR_INFO
3384 /* Detect and prevent integer overflow.
3385 * The preprocessor guard addresses the "always false" warning
3386 * from -Wtype-limits on platforms where
3387 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3388 # if UINT_MAX >= SIZE_MAX
3389 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3390 parser->m_attsSize = oldAttsSize;
3391 return XML_ERROR_NO_MEMORY;
3392 }
3393 # endif
3394
3395 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3396 parser->m_attsSize * sizeof(XML_AttrInfo));
3397 if (temp2 == NULL) {
3398 parser->m_attsSize = oldAttsSize;
3399 return XML_ERROR_NO_MEMORY;
3400 }
3401 parser->m_attInfo = temp2;
3402 #endif
3403 if (n > oldAttsSize)
3404 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3405 }
3406
3407 appAtts = (const XML_Char **)parser->m_atts;
3408 for (i = 0; i < n; i++) {
3409 ATTRIBUTE *currAtt = &parser->m_atts[i];
3410 #ifdef XML_ATTR_INFO
3411 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3412 #endif
3413 /* add the name and value to the attribute list */
3414 ATTRIBUTE_ID *attId
3415 = getAttributeId(parser, enc, currAtt->name,
3416 currAtt->name + XmlNameLength(enc, currAtt->name));
3417 if (! attId)
3418 return XML_ERROR_NO_MEMORY;
3419 #ifdef XML_ATTR_INFO
3420 currAttInfo->nameStart
3421 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3422 currAttInfo->nameEnd
3423 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3424 currAttInfo->valueStart = parser->m_parseEndByteIndex
3425 - (parser->m_parseEndPtr - currAtt->valuePtr);
3426 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3427 - (parser->m_parseEndPtr - currAtt->valueEnd);
3428 #endif
3429 /* Detect duplicate attributes by their QNames. This does not work when
3430 namespace processing is turned on and different prefixes for the same
3431 namespace are used. For this case we have a check further down.
3432 */
3433 if ((attId->name)[-1]) {
3434 if (enc == parser->m_encoding)
3435 parser->m_eventPtr = parser->m_atts[i].name;
3436 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3437 }
3438 (attId->name)[-1] = 1;
3439 appAtts[attIndex++] = attId->name;
3440 if (! parser->m_atts[i].normalized) {
3441 enum XML_Error result;
3442 XML_Bool isCdata = XML_TRUE;
3443
3444 /* figure out whether declared as other than CDATA */
3445 if (attId->maybeTokenized) {
3446 int j;
3447 for (j = 0; j < nDefaultAtts; j++) {
3448 if (attId == elementType->defaultAtts[j].id) {
3449 isCdata = elementType->defaultAtts[j].isCdata;
3450 break;
3451 }
3452 }
3453 }
3454
3455 /* normalize the attribute value */
3456 result = storeAttributeValue(
3457 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3458 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3459 if (result)
3460 return result;
3461 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3462 poolFinish(&parser->m_tempPool);
3463 } else {
3464 /* the value did not need normalizing */
3465 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3466 parser->m_atts[i].valuePtr,
3467 parser->m_atts[i].valueEnd);
3468 if (appAtts[attIndex] == 0)
3469 return XML_ERROR_NO_MEMORY;
3470 poolFinish(&parser->m_tempPool);
3471 }
3472 /* handle prefixed attribute names */
3473 if (attId->prefix) {
3474 if (attId->xmlns) {
3475 /* deal with namespace declarations here */
3476 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3477 appAtts[attIndex], bindingsPtr);
3478 if (result)
3479 return result;
3480 --attIndex;
3481 } else {
3482 /* deal with other prefixed names later */
3483 attIndex++;
3484 nPrefixes++;
3485 (attId->name)[-1] = 2;
3486 }
3487 } else
3488 attIndex++;
3489 }
3490
3491 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3492 parser->m_nSpecifiedAtts = attIndex;
3493 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3494 for (i = 0; i < attIndex; i += 2)
3495 if (appAtts[i] == elementType->idAtt->name) {
3496 parser->m_idAttIndex = i;
3497 break;
3498 }
3499 } else
3500 parser->m_idAttIndex = -1;
3501
3502 /* do attribute defaulting */
3503 for (i = 0; i < nDefaultAtts; i++) {
3504 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3505 if (! (da->id->name)[-1] && da->value) {
3506 if (da->id->prefix) {
3507 if (da->id->xmlns) {
3508 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3509 da->value, bindingsPtr);
3510 if (result)
3511 return result;
3512 } else {
3513 (da->id->name)[-1] = 2;
3514 nPrefixes++;
3515 appAtts[attIndex++] = da->id->name;
3516 appAtts[attIndex++] = da->value;
3517 }
3518 } else {
3519 (da->id->name)[-1] = 1;
3520 appAtts[attIndex++] = da->id->name;
3521 appAtts[attIndex++] = da->value;
3522 }
3523 }
3524 }
3525 appAtts[attIndex] = 0;
3526
3527 /* expand prefixed attribute names, check for duplicates,
3528 and clear flags that say whether attributes were specified */
3529 i = 0;
3530 if (nPrefixes) {
3531 int j; /* hash table index */
3532 unsigned long version = parser->m_nsAttsVersion;
3533
3534 /* Detect and prevent invalid shift */
3535 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3536 return XML_ERROR_NO_MEMORY;
3537 }
3538
3539 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3540 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3541 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3542 if ((nPrefixes << 1)
3543 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3544 NS_ATT *temp;
3545 /* hash table size must also be a power of 2 and >= 8 */
3546 while (nPrefixes >> parser->m_nsAttsPower++)
3547 ;
3548 if (parser->m_nsAttsPower < 3)
3549 parser->m_nsAttsPower = 3;
3550
3551 /* Detect and prevent invalid shift */
3552 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3553 /* Restore actual size of memory in m_nsAtts */
3554 parser->m_nsAttsPower = oldNsAttsPower;
3555 return XML_ERROR_NO_MEMORY;
3556 }
3557
3558 nsAttsSize = 1u << parser->m_nsAttsPower;
3559
3560 /* Detect and prevent integer overflow.
3561 * The preprocessor guard addresses the "always false" warning
3562 * from -Wtype-limits on platforms where
3563 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3564 #if UINT_MAX >= SIZE_MAX
3565 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3566 /* Restore actual size of memory in m_nsAtts */
3567 parser->m_nsAttsPower = oldNsAttsPower;
3568 return XML_ERROR_NO_MEMORY;
3569 }
3570 #endif
3571
3572 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3573 nsAttsSize * sizeof(NS_ATT));
3574 if (! temp) {
3575 /* Restore actual size of memory in m_nsAtts */
3576 parser->m_nsAttsPower = oldNsAttsPower;
3577 return XML_ERROR_NO_MEMORY;
3578 }
3579 parser->m_nsAtts = temp;
3580 version = 0; /* force re-initialization of m_nsAtts hash table */
3581 }
3582 /* using a version flag saves us from initializing m_nsAtts every time */
3583 if (! version) { /* initialize version flags when version wraps around */
3584 version = INIT_ATTS_VERSION;
3585 for (j = nsAttsSize; j != 0;)
3586 parser->m_nsAtts[--j].version = version;
3587 }
3588 parser->m_nsAttsVersion = --version;
3589
3590 /* expand prefixed names and check for duplicates */
3591 for (; i < attIndex; i += 2) {
3592 const XML_Char *s = appAtts[i];
3593 if (s[-1] == 2) { /* prefixed */
3594 ATTRIBUTE_ID *id;
3595 const BINDING *b;
3596 unsigned long uriHash;
3597 struct siphash sip_state;
3598 struct sipkey sip_key;
3599
3600 copy_salt_to_sipkey(parser, &sip_key);
3601 sip24_init(&sip_state, &sip_key);
3602
3603 ((XML_Char *)s)[-1] = 0; /* clear flag */
3604 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3605 if (! id || ! id->prefix) {
3606 /* This code is walking through the appAtts array, dealing
3607 * with (in this case) a prefixed attribute name. To be in
3608 * the array, the attribute must have already been bound, so
3609 * has to have passed through the hash table lookup once
3610 * already. That implies that an entry for it already
3611 * exists, so the lookup above will return a pointer to
3612 * already allocated memory. There is no opportunaity for
3613 * the allocator to fail, so the condition above cannot be
3614 * fulfilled.
3615 *
3616 * Since it is difficult to be certain that the above
3617 * analysis is complete, we retain the test and merely
3618 * remove the code from coverage tests.
3619 */
3620 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3621 }
3622 b = id->prefix->binding;
3623 if (! b)
3624 return XML_ERROR_UNBOUND_PREFIX;
3625
3626 for (j = 0; j < b->uriLen; j++) {
3627 const XML_Char c = b->uri[j];
3628 if (! poolAppendChar(&parser->m_tempPool, c))
3629 return XML_ERROR_NO_MEMORY;
3630 }
3631
3632 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3633
3634 while (*s++ != XML_T(ASCII_COLON))
3635 ;
3636
3637 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3638
3639 do { /* copies null terminator */
3640 if (! poolAppendChar(&parser->m_tempPool, *s))
3641 return XML_ERROR_NO_MEMORY;
3642 } while (*s++);
3643
3644 uriHash = (unsigned long)sip24_final(&sip_state);
3645
3646 { /* Check hash table for duplicate of expanded name (uriName).
3647 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3648 */
3649 unsigned char step = 0;
3650 unsigned long mask = nsAttsSize - 1;
3651 j = uriHash & mask; /* index into hash table */
3652 while (parser->m_nsAtts[j].version == version) {
3653 /* for speed we compare stored hash values first */
3654 if (uriHash == parser->m_nsAtts[j].hash) {
3655 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3656 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3657 /* s1 is null terminated, but not s2 */
3658 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3659 ;
3660 if (*s1 == 0)
3661 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3662 }
3663 if (! step)
3664 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3665 j < step ? (j += nsAttsSize - step) : (j -= step);
3666 }
3667 }
3668
3669 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3670 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3671 s = b->prefix->name;
3672 do {
3673 if (! poolAppendChar(&parser->m_tempPool, *s))
3674 return XML_ERROR_NO_MEMORY;
3675 } while (*s++);
3676 }
3677
3678 /* store expanded name in attribute list */
3679 s = poolStart(&parser->m_tempPool);
3680 poolFinish(&parser->m_tempPool);
3681 appAtts[i] = s;
3682
3683 /* fill empty slot with new version, uriName and hash value */
3684 parser->m_nsAtts[j].version = version;
3685 parser->m_nsAtts[j].hash = uriHash;
3686 parser->m_nsAtts[j].uriName = s;
3687
3688 if (! --nPrefixes) {
3689 i += 2;
3690 break;
3691 }
3692 } else /* not prefixed */
3693 ((XML_Char *)s)[-1] = 0; /* clear flag */
3694 }
3695 }
3696 /* clear flags for the remaining attributes */
3697 for (; i < attIndex; i += 2)
3698 ((XML_Char *)(appAtts[i]))[-1] = 0;
3699 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3700 binding->attId->name[-1] = 0;
3701
3702 if (! parser->m_ns)
3703 return XML_ERROR_NONE;
3704
3705 /* expand the element type name */
3706 if (elementType->prefix) {
3707 binding = elementType->prefix->binding;
3708 if (! binding)
3709 return XML_ERROR_UNBOUND_PREFIX;
3710 localPart = tagNamePtr->str;
3711 while (*localPart++ != XML_T(ASCII_COLON))
3712 ;
3713 } else if (dtd->defaultPrefix.binding) {
3714 binding = dtd->defaultPrefix.binding;
3715 localPart = tagNamePtr->str;
3716 } else
3717 return XML_ERROR_NONE;
3718 prefixLen = 0;
3719 if (parser->m_ns_triplets && binding->prefix->name) {
3720 for (; binding->prefix->name[prefixLen++];)
3721 ; /* prefixLen includes null terminator */
3722 }
3723 tagNamePtr->localPart = localPart;
3724 tagNamePtr->uriLen = binding->uriLen;
3725 tagNamePtr->prefix = binding->prefix->name;
3726 tagNamePtr->prefixLen = prefixLen;
3727 for (i = 0; localPart[i++];)
3728 ; /* i includes null terminator */
3729
3730 /* Detect and prevent integer overflow */
3731 if (binding->uriLen > INT_MAX - prefixLen
3732 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3733 return XML_ERROR_NO_MEMORY;
3734 }
3735
3736 n = i + binding->uriLen + prefixLen;
3737 if (n > binding->uriAlloc) {
3738 TAG *p;
3739
3740 /* Detect and prevent integer overflow */
3741 if (n > INT_MAX - EXPAND_SPARE) {
3742 return XML_ERROR_NO_MEMORY;
3743 }
3744 /* Detect and prevent integer overflow.
3745 * The preprocessor guard addresses the "always false" warning
3746 * from -Wtype-limits on platforms where
3747 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3748 #if UINT_MAX >= SIZE_MAX
3749 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3750 return XML_ERROR_NO_MEMORY;
3751 }
3752 #endif
3753
3754 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3755 if (! uri)
3756 return XML_ERROR_NO_MEMORY;
3757 binding->uriAlloc = n + EXPAND_SPARE;
3758 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3759 for (p = parser->m_tagStack; p; p = p->parent)
3760 if (p->name.str == binding->uri)
3761 p->name.str = uri;
3762 FREE(parser, binding->uri);
3763 binding->uri = uri;
3764 }
3765 /* if m_namespaceSeparator != '\0' then uri includes it already */
3766 uri = binding->uri + binding->uriLen;
3767 memcpy(uri, localPart, i * sizeof(XML_Char));
3768 /* we always have a namespace separator between localPart and prefix */
3769 if (prefixLen) {
3770 uri += i - 1;
3771 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3772 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3773 }
3774 tagNamePtr->str = binding->uri;
3775 return XML_ERROR_NONE;
3776 }
3777
3778 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3779 is_rfc3986_uri_char(XML_Char candidate) {
3780 // For the RFC 3986 ANBF grammar see
3781 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3782
3783 switch (candidate) {
3784 // From rule "ALPHA" (uppercase half)
3785 case 'A':
3786 case 'B':
3787 case 'C':
3788 case 'D':
3789 case 'E':
3790 case 'F':
3791 case 'G':
3792 case 'H':
3793 case 'I':
3794 case 'J':
3795 case 'K':
3796 case 'L':
3797 case 'M':
3798 case 'N':
3799 case 'O':
3800 case 'P':
3801 case 'Q':
3802 case 'R':
3803 case 'S':
3804 case 'T':
3805 case 'U':
3806 case 'V':
3807 case 'W':
3808 case 'X':
3809 case 'Y':
3810 case 'Z':
3811
3812 // From rule "ALPHA" (lowercase half)
3813 case 'a':
3814 case 'b':
3815 case 'c':
3816 case 'd':
3817 case 'e':
3818 case 'f':
3819 case 'g':
3820 case 'h':
3821 case 'i':
3822 case 'j':
3823 case 'k':
3824 case 'l':
3825 case 'm':
3826 case 'n':
3827 case 'o':
3828 case 'p':
3829 case 'q':
3830 case 'r':
3831 case 's':
3832 case 't':
3833 case 'u':
3834 case 'v':
3835 case 'w':
3836 case 'x':
3837 case 'y':
3838 case 'z':
3839
3840 // From rule "DIGIT"
3841 case '0':
3842 case '1':
3843 case '2':
3844 case '3':
3845 case '4':
3846 case '5':
3847 case '6':
3848 case '7':
3849 case '8':
3850 case '9':
3851
3852 // From rule "pct-encoded"
3853 case '%':
3854
3855 // From rule "unreserved"
3856 case '-':
3857 case '.':
3858 case '_':
3859 case '~':
3860
3861 // From rule "gen-delims"
3862 case ':':
3863 case '/':
3864 case '?':
3865 case '#':
3866 case '[':
3867 case ']':
3868 case '@':
3869
3870 // From rule "sub-delims"
3871 case '!':
3872 case '$':
3873 case '&':
3874 case '\'':
3875 case '(':
3876 case ')':
3877 case '*':
3878 case '+':
3879 case ',':
3880 case ';':
3881 case '=':
3882 return XML_TRUE;
3883
3884 default:
3885 return XML_FALSE;
3886 }
3887 }
3888
3889 /* addBinding() overwrites the value of prefix->binding without checking.
3890 Therefore one must keep track of the old value outside of addBinding().
3891 */
3892 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3893 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3894 const XML_Char *uri, BINDING **bindingsPtr) {
3895 // "http://www.w3.org/XML/1998/namespace"
3896 static const XML_Char xmlNamespace[]
3897 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3898 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3899 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3900 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3901 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3902 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3903 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3904 ASCII_e, '\0'};
3905 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3906 // "http://www.w3.org/2000/xmlns/"
3907 static const XML_Char xmlnsNamespace[]
3908 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3909 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3910 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3911 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3912 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3913 static const int xmlnsLen
3914 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3915
3916 XML_Bool mustBeXML = XML_FALSE;
3917 XML_Bool isXML = XML_TRUE;
3918 XML_Bool isXMLNS = XML_TRUE;
3919
3920 BINDING *b;
3921 int len;
3922
3923 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3924 if (*uri == XML_T('\0') && prefix->name)
3925 return XML_ERROR_UNDECLARING_PREFIX;
3926
3927 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3928 && prefix->name[1] == XML_T(ASCII_m)
3929 && prefix->name[2] == XML_T(ASCII_l)) {
3930 /* Not allowed to bind xmlns */
3931 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3932 && prefix->name[5] == XML_T('\0'))
3933 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3934
3935 if (prefix->name[3] == XML_T('\0'))
3936 mustBeXML = XML_TRUE;
3937 }
3938
3939 for (len = 0; uri[len]; len++) {
3940 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3941 isXML = XML_FALSE;
3942
3943 if (! mustBeXML && isXMLNS
3944 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3945 isXMLNS = XML_FALSE;
3946
3947 // NOTE: While Expat does not validate namespace URIs against RFC 3986
3948 // today (and is not REQUIRED to do so with regard to the XML 1.0
3949 // namespaces specification) we have to at least make sure, that
3950 // the application on top of Expat (that is likely splitting expanded
3951 // element names ("qualified names") of form
3952 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3953 // in its element handler code) cannot be confused by an attacker
3954 // putting additional namespace separator characters into namespace
3955 // declarations. That would be ambiguous and not to be expected.
3956 //
3957 // While the HTML API docs of function XML_ParserCreateNS have been
3958 // advising against use of a namespace separator character that can
3959 // appear in a URI for >20 years now, some widespread applications
3960 // are using URI characters (':' (colon) in particular) for a
3961 // namespace separator, in practice. To keep these applications
3962 // functional, we only reject namespaces URIs containing the
3963 // application-chosen namespace separator if the chosen separator
3964 // is a non-URI character with regard to RFC 3986.
3965 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3966 && ! is_rfc3986_uri_char(uri[len])) {
3967 return XML_ERROR_SYNTAX;
3968 }
3969 }
3970 isXML = isXML && len == xmlLen;
3971 isXMLNS = isXMLNS && len == xmlnsLen;
3972
3973 if (mustBeXML != isXML)
3974 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3975 : XML_ERROR_RESERVED_NAMESPACE_URI;
3976
3977 if (isXMLNS)
3978 return XML_ERROR_RESERVED_NAMESPACE_URI;
3979
3980 if (parser->m_namespaceSeparator)
3981 len++;
3982 if (parser->m_freeBindingList) {
3983 b = parser->m_freeBindingList;
3984 if (len > b->uriAlloc) {
3985 /* Detect and prevent integer overflow */
3986 if (len > INT_MAX - EXPAND_SPARE) {
3987 return XML_ERROR_NO_MEMORY;
3988 }
3989
3990 /* Detect and prevent integer overflow.
3991 * The preprocessor guard addresses the "always false" warning
3992 * from -Wtype-limits on platforms where
3993 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3994 #if UINT_MAX >= SIZE_MAX
3995 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3996 return XML_ERROR_NO_MEMORY;
3997 }
3998 #endif
3999
4000 XML_Char *temp = (XML_Char *)REALLOC(
4001 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4002 if (temp == NULL)
4003 return XML_ERROR_NO_MEMORY;
4004 b->uri = temp;
4005 b->uriAlloc = len + EXPAND_SPARE;
4006 }
4007 parser->m_freeBindingList = b->nextTagBinding;
4008 } else {
4009 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4010 if (! b)
4011 return XML_ERROR_NO_MEMORY;
4012
4013 /* Detect and prevent integer overflow */
4014 if (len > INT_MAX - EXPAND_SPARE) {
4015 return XML_ERROR_NO_MEMORY;
4016 }
4017 /* Detect and prevent integer overflow.
4018 * The preprocessor guard addresses the "always false" warning
4019 * from -Wtype-limits on platforms where
4020 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4021 #if UINT_MAX >= SIZE_MAX
4022 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4023 return XML_ERROR_NO_MEMORY;
4024 }
4025 #endif
4026
4027 b->uri
4028 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4029 if (! b->uri) {
4030 FREE(parser, b);
4031 return XML_ERROR_NO_MEMORY;
4032 }
4033 b->uriAlloc = len + EXPAND_SPARE;
4034 }
4035 b->uriLen = len;
4036 memcpy(b->uri, uri, len * sizeof(XML_Char));
4037 if (parser->m_namespaceSeparator)
4038 b->uri[len - 1] = parser->m_namespaceSeparator;
4039 b->prefix = prefix;
4040 b->attId = attId;
4041 b->prevPrefixBinding = prefix->binding;
4042 /* NULL binding when default namespace undeclared */
4043 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4044 prefix->binding = NULL;
4045 else
4046 prefix->binding = b;
4047 b->nextTagBinding = *bindingsPtr;
4048 *bindingsPtr = b;
4049 /* if attId == NULL then we are not starting a namespace scope */
4050 if (attId && parser->m_startNamespaceDeclHandler)
4051 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4052 prefix->binding ? uri : 0);
4053 return XML_ERROR_NONE;
4054 }
4055
4056 /* The idea here is to avoid using stack for each CDATA section when
4057 the whole file is parsed with one call.
4058 */
4059 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4060 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4061 const char **endPtr) {
4062 enum XML_Error result = doCdataSection(
4063 parser, parser->m_encoding, &start, end, endPtr,
4064 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4065 if (result != XML_ERROR_NONE)
4066 return result;
4067 if (start) {
4068 if (parser->m_parentParser) { /* we are parsing an external entity */
4069 parser->m_processor = externalEntityContentProcessor;
4070 return externalEntityContentProcessor(parser, start, end, endPtr);
4071 } else {
4072 parser->m_processor = contentProcessor;
4073 return contentProcessor(parser, start, end, endPtr);
4074 }
4075 }
4076 return result;
4077 }
4078
4079 /* startPtr gets set to non-null if the section is closed, and to null if
4080 the section is not yet closed.
4081 */
4082 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4083 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4084 const char *end, const char **nextPtr, XML_Bool haveMore,
4085 enum XML_Account account) {
4086 const char *s = *startPtr;
4087 const char **eventPP;
4088 const char **eventEndPP;
4089 if (enc == parser->m_encoding) {
4090 eventPP = &parser->m_eventPtr;
4091 *eventPP = s;
4092 eventEndPP = &parser->m_eventEndPtr;
4093 } else {
4094 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4095 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4096 }
4097 *eventPP = s;
4098 *startPtr = NULL;
4099
4100 for (;;) {
4101 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4102 int tok = XmlCdataSectionTok(enc, s, end, &next);
4103 #if XML_GE == 1
4104 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4105 accountingOnAbort(parser);
4106 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4107 }
4108 #else
4109 UNUSED_P(account);
4110 #endif
4111 *eventEndPP = next;
4112 switch (tok) {
4113 case XML_TOK_CDATA_SECT_CLOSE:
4114 if (parser->m_endCdataSectionHandler)
4115 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4116 /* BEGIN disabled code */
4117 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4118 else if ((0) && parser->m_characterDataHandler)
4119 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4120 0);
4121 /* END disabled code */
4122 else if (parser->m_defaultHandler)
4123 reportDefault(parser, enc, s, next);
4124 *startPtr = next;
4125 *nextPtr = next;
4126 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4127 return XML_ERROR_ABORTED;
4128 else
4129 return XML_ERROR_NONE;
4130 case XML_TOK_DATA_NEWLINE:
4131 if (parser->m_characterDataHandler) {
4132 XML_Char c = 0xA;
4133 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4134 } else if (parser->m_defaultHandler)
4135 reportDefault(parser, enc, s, next);
4136 break;
4137 case XML_TOK_DATA_CHARS: {
4138 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4139 if (charDataHandler) {
4140 if (MUST_CONVERT(enc, s)) {
4141 for (;;) {
4142 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4143 const enum XML_Convert_Result convert_res = XmlConvert(
4144 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4145 *eventEndPP = next;
4146 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4147 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4148 if ((convert_res == XML_CONVERT_COMPLETED)
4149 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4150 break;
4151 *eventPP = s;
4152 }
4153 } else
4154 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4155 (int)((const XML_Char *)next - (const XML_Char *)s));
4156 } else if (parser->m_defaultHandler)
4157 reportDefault(parser, enc, s, next);
4158 } break;
4159 case XML_TOK_INVALID:
4160 *eventPP = next;
4161 return XML_ERROR_INVALID_TOKEN;
4162 case XML_TOK_PARTIAL_CHAR:
4163 if (haveMore) {
4164 *nextPtr = s;
4165 return XML_ERROR_NONE;
4166 }
4167 return XML_ERROR_PARTIAL_CHAR;
4168 case XML_TOK_PARTIAL:
4169 case XML_TOK_NONE:
4170 if (haveMore) {
4171 *nextPtr = s;
4172 return XML_ERROR_NONE;
4173 }
4174 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4175 default:
4176 /* Every token returned by XmlCdataSectionTok() has its own
4177 * explicit case, so this default case will never be executed.
4178 * We retain it as a safety net and exclude it from the coverage
4179 * statistics.
4180 *
4181 * LCOV_EXCL_START
4182 */
4183 *eventPP = next;
4184 return XML_ERROR_UNEXPECTED_STATE;
4185 /* LCOV_EXCL_STOP */
4186 }
4187
4188 *eventPP = s = next;
4189 switch (parser->m_parsingStatus.parsing) {
4190 case XML_SUSPENDED:
4191 *nextPtr = next;
4192 return XML_ERROR_NONE;
4193 case XML_FINISHED:
4194 return XML_ERROR_ABORTED;
4195 default:;
4196 }
4197 }
4198 /* not reached */
4199 }
4200
4201 #ifdef XML_DTD
4202
4203 /* The idea here is to avoid using stack for each IGNORE section when
4204 the whole file is parsed with one call.
4205 */
4206 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4207 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4208 const char **endPtr) {
4209 enum XML_Error result
4210 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4211 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4212 if (result != XML_ERROR_NONE)
4213 return result;
4214 if (start) {
4215 parser->m_processor = prologProcessor;
4216 return prologProcessor(parser, start, end, endPtr);
4217 }
4218 return result;
4219 }
4220
4221 /* startPtr gets set to non-null is the section is closed, and to null
4222 if the section is not yet closed.
4223 */
4224 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4225 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4226 const char *end, const char **nextPtr, XML_Bool haveMore) {
4227 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4228 int tok;
4229 const char *s = *startPtr;
4230 const char **eventPP;
4231 const char **eventEndPP;
4232 if (enc == parser->m_encoding) {
4233 eventPP = &parser->m_eventPtr;
4234 *eventPP = s;
4235 eventEndPP = &parser->m_eventEndPtr;
4236 } else {
4237 /* It's not entirely clear, but it seems the following two lines
4238 * of code cannot be executed. The only occasions on which 'enc'
4239 * is not 'encoding' are when this function is called
4240 * from the internal entity processing, and IGNORE sections are an
4241 * error in internal entities.
4242 *
4243 * Since it really isn't clear that this is true, we keep the code
4244 * and just remove it from our coverage tests.
4245 *
4246 * LCOV_EXCL_START
4247 */
4248 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4249 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4250 /* LCOV_EXCL_STOP */
4251 }
4252 *eventPP = s;
4253 *startPtr = NULL;
4254 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4255 # if XML_GE == 1
4256 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4257 XML_ACCOUNT_DIRECT)) {
4258 accountingOnAbort(parser);
4259 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4260 }
4261 # endif
4262 *eventEndPP = next;
4263 switch (tok) {
4264 case XML_TOK_IGNORE_SECT:
4265 if (parser->m_defaultHandler)
4266 reportDefault(parser, enc, s, next);
4267 *startPtr = next;
4268 *nextPtr = next;
4269 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4270 return XML_ERROR_ABORTED;
4271 else
4272 return XML_ERROR_NONE;
4273 case XML_TOK_INVALID:
4274 *eventPP = next;
4275 return XML_ERROR_INVALID_TOKEN;
4276 case XML_TOK_PARTIAL_CHAR:
4277 if (haveMore) {
4278 *nextPtr = s;
4279 return XML_ERROR_NONE;
4280 }
4281 return XML_ERROR_PARTIAL_CHAR;
4282 case XML_TOK_PARTIAL:
4283 case XML_TOK_NONE:
4284 if (haveMore) {
4285 *nextPtr = s;
4286 return XML_ERROR_NONE;
4287 }
4288 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4289 default:
4290 /* All of the tokens that XmlIgnoreSectionTok() returns have
4291 * explicit cases to handle them, so this default case is never
4292 * executed. We keep it as a safety net anyway, and remove it
4293 * from our test coverage statistics.
4294 *
4295 * LCOV_EXCL_START
4296 */
4297 *eventPP = next;
4298 return XML_ERROR_UNEXPECTED_STATE;
4299 /* LCOV_EXCL_STOP */
4300 }
4301 /* not reached */
4302 }
4303
4304 #endif /* XML_DTD */
4305
4306 static enum XML_Error
initializeEncoding(XML_Parser parser)4307 initializeEncoding(XML_Parser parser) {
4308 const char *s;
4309 #ifdef XML_UNICODE
4310 char encodingBuf[128];
4311 /* See comments about `protocolEncodingName` in parserInit() */
4312 if (! parser->m_protocolEncodingName)
4313 s = NULL;
4314 else {
4315 int i;
4316 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4317 if (i == sizeof(encodingBuf) - 1
4318 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4319 encodingBuf[0] = '\0';
4320 break;
4321 }
4322 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4323 }
4324 encodingBuf[i] = '\0';
4325 s = encodingBuf;
4326 }
4327 #else
4328 s = parser->m_protocolEncodingName;
4329 #endif
4330 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4331 &parser->m_initEncoding, &parser->m_encoding, s))
4332 return XML_ERROR_NONE;
4333 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4334 }
4335
4336 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4337 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4338 const char *next) {
4339 const char *encodingName = NULL;
4340 const XML_Char *storedEncName = NULL;
4341 const ENCODING *newEncoding = NULL;
4342 const char *version = NULL;
4343 const char *versionend = NULL;
4344 const XML_Char *storedversion = NULL;
4345 int standalone = -1;
4346
4347 #if XML_GE == 1
4348 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4349 XML_ACCOUNT_DIRECT)) {
4350 accountingOnAbort(parser);
4351 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4352 }
4353 #endif
4354
4355 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4356 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4357 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4358 if (isGeneralTextEntity)
4359 return XML_ERROR_TEXT_DECL;
4360 else
4361 return XML_ERROR_XML_DECL;
4362 }
4363 if (! isGeneralTextEntity && standalone == 1) {
4364 parser->m_dtd->standalone = XML_TRUE;
4365 #ifdef XML_DTD
4366 if (parser->m_paramEntityParsing
4367 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4368 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4369 #endif /* XML_DTD */
4370 }
4371 if (parser->m_xmlDeclHandler) {
4372 if (encodingName != NULL) {
4373 storedEncName = poolStoreString(
4374 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4375 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4376 if (! storedEncName)
4377 return XML_ERROR_NO_MEMORY;
4378 poolFinish(&parser->m_temp2Pool);
4379 }
4380 if (version) {
4381 storedversion
4382 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4383 versionend - parser->m_encoding->minBytesPerChar);
4384 if (! storedversion)
4385 return XML_ERROR_NO_MEMORY;
4386 }
4387 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4388 standalone);
4389 } else if (parser->m_defaultHandler)
4390 reportDefault(parser, parser->m_encoding, s, next);
4391 if (parser->m_protocolEncodingName == NULL) {
4392 if (newEncoding) {
4393 /* Check that the specified encoding does not conflict with what
4394 * the parser has already deduced. Do we have the same number
4395 * of bytes in the smallest representation of a character? If
4396 * this is UTF-16, is it the same endianness?
4397 */
4398 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4399 || (newEncoding->minBytesPerChar == 2
4400 && newEncoding != parser->m_encoding)) {
4401 parser->m_eventPtr = encodingName;
4402 return XML_ERROR_INCORRECT_ENCODING;
4403 }
4404 parser->m_encoding = newEncoding;
4405 } else if (encodingName) {
4406 enum XML_Error result;
4407 if (! storedEncName) {
4408 storedEncName = poolStoreString(
4409 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4410 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4411 if (! storedEncName)
4412 return XML_ERROR_NO_MEMORY;
4413 }
4414 result = handleUnknownEncoding(parser, storedEncName);
4415 poolClear(&parser->m_temp2Pool);
4416 if (result == XML_ERROR_UNKNOWN_ENCODING)
4417 parser->m_eventPtr = encodingName;
4418 return result;
4419 }
4420 }
4421
4422 if (storedEncName || storedversion)
4423 poolClear(&parser->m_temp2Pool);
4424
4425 return XML_ERROR_NONE;
4426 }
4427
4428 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4429 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4430 if (parser->m_unknownEncodingHandler) {
4431 XML_Encoding info;
4432 int i;
4433 for (i = 0; i < 256; i++)
4434 info.map[i] = -1;
4435 info.convert = NULL;
4436 info.data = NULL;
4437 info.release = NULL;
4438 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4439 encodingName, &info)) {
4440 ENCODING *enc;
4441 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4442 if (! parser->m_unknownEncodingMem) {
4443 if (info.release)
4444 info.release(info.data);
4445 return XML_ERROR_NO_MEMORY;
4446 }
4447 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4448 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4449 if (enc) {
4450 parser->m_unknownEncodingData = info.data;
4451 parser->m_unknownEncodingRelease = info.release;
4452 parser->m_encoding = enc;
4453 return XML_ERROR_NONE;
4454 }
4455 }
4456 if (info.release != NULL)
4457 info.release(info.data);
4458 }
4459 return XML_ERROR_UNKNOWN_ENCODING;
4460 }
4461
4462 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4463 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4464 const char **nextPtr) {
4465 enum XML_Error result = initializeEncoding(parser);
4466 if (result != XML_ERROR_NONE)
4467 return result;
4468 parser->m_processor = prologProcessor;
4469 return prologProcessor(parser, s, end, nextPtr);
4470 }
4471
4472 #ifdef XML_DTD
4473
4474 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4475 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4476 const char **nextPtr) {
4477 enum XML_Error result = initializeEncoding(parser);
4478 if (result != XML_ERROR_NONE)
4479 return result;
4480
4481 /* we know now that XML_Parse(Buffer) has been called,
4482 so we consider the external parameter entity read */
4483 parser->m_dtd->paramEntityRead = XML_TRUE;
4484
4485 if (parser->m_prologState.inEntityValue) {
4486 parser->m_processor = entityValueInitProcessor;
4487 return entityValueInitProcessor(parser, s, end, nextPtr);
4488 } else {
4489 parser->m_processor = externalParEntProcessor;
4490 return externalParEntProcessor(parser, s, end, nextPtr);
4491 }
4492 }
4493
4494 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4495 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4496 const char **nextPtr) {
4497 int tok;
4498 const char *start = s;
4499 const char *next = start;
4500 parser->m_eventPtr = start;
4501
4502 for (;;) {
4503 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4504 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4505 - storeEntityValue
4506 - processXmlDecl
4507 */
4508 parser->m_eventEndPtr = next;
4509 if (tok <= 0) {
4510 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4511 *nextPtr = s;
4512 return XML_ERROR_NONE;
4513 }
4514 switch (tok) {
4515 case XML_TOK_INVALID:
4516 return XML_ERROR_INVALID_TOKEN;
4517 case XML_TOK_PARTIAL:
4518 return XML_ERROR_UNCLOSED_TOKEN;
4519 case XML_TOK_PARTIAL_CHAR:
4520 return XML_ERROR_PARTIAL_CHAR;
4521 case XML_TOK_NONE: /* start == end */
4522 default:
4523 break;
4524 }
4525 /* found end of entity value - can store it now */
4526 return storeEntityValue(parser, parser->m_encoding, s, end,
4527 XML_ACCOUNT_DIRECT);
4528 } else if (tok == XML_TOK_XML_DECL) {
4529 enum XML_Error result;
4530 result = processXmlDecl(parser, 0, start, next);
4531 if (result != XML_ERROR_NONE)
4532 return result;
4533 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4534 * that to happen, a parameter entity parsing handler must have attempted
4535 * to suspend the parser, which fails and raises an error. The parser can
4536 * be aborted, but can't be suspended.
4537 */
4538 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4539 return XML_ERROR_ABORTED;
4540 *nextPtr = next;
4541 /* stop scanning for text declaration - we found one */
4542 parser->m_processor = entityValueProcessor;
4543 return entityValueProcessor(parser, next, end, nextPtr);
4544 }
4545 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4546 must move s and nextPtr forward to consume the BOM.
4547
4548 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4549 would leave the BOM in the buffer and return. On the next call to this
4550 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4551 is not valid to have multiple BOMs.
4552 */
4553 else if (tok == XML_TOK_BOM) {
4554 # if XML_GE == 1
4555 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4556 XML_ACCOUNT_DIRECT)) {
4557 accountingOnAbort(parser);
4558 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4559 }
4560 # endif
4561
4562 *nextPtr = next;
4563 s = next;
4564 }
4565 /* If we get this token, we have the start of what might be a
4566 normal tag, but not a declaration (i.e. it doesn't begin with
4567 "<!"). In a DTD context, that isn't legal.
4568 */
4569 else if (tok == XML_TOK_INSTANCE_START) {
4570 *nextPtr = next;
4571 return XML_ERROR_SYNTAX;
4572 }
4573 start = next;
4574 parser->m_eventPtr = start;
4575 }
4576 }
4577
4578 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4579 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4580 const char **nextPtr) {
4581 const char *next = s;
4582 int tok;
4583
4584 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4585 if (tok <= 0) {
4586 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4587 *nextPtr = s;
4588 return XML_ERROR_NONE;
4589 }
4590 switch (tok) {
4591 case XML_TOK_INVALID:
4592 return XML_ERROR_INVALID_TOKEN;
4593 case XML_TOK_PARTIAL:
4594 return XML_ERROR_UNCLOSED_TOKEN;
4595 case XML_TOK_PARTIAL_CHAR:
4596 return XML_ERROR_PARTIAL_CHAR;
4597 case XML_TOK_NONE: /* start == end */
4598 default:
4599 break;
4600 }
4601 }
4602 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4603 However, when parsing an external subset, doProlog will not accept a BOM
4604 as valid, and report a syntax error, so we have to skip the BOM, and
4605 account for the BOM bytes.
4606 */
4607 else if (tok == XML_TOK_BOM) {
4608 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4609 XML_ACCOUNT_DIRECT)) {
4610 accountingOnAbort(parser);
4611 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4612 }
4613
4614 s = next;
4615 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4616 }
4617
4618 parser->m_processor = prologProcessor;
4619 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4620 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4621 XML_ACCOUNT_DIRECT);
4622 }
4623
4624 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4625 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4626 const char **nextPtr) {
4627 const char *start = s;
4628 const char *next = s;
4629 const ENCODING *enc = parser->m_encoding;
4630 int tok;
4631
4632 for (;;) {
4633 tok = XmlPrologTok(enc, start, end, &next);
4634 /* Note: These bytes are accounted later in:
4635 - storeEntityValue
4636 */
4637 if (tok <= 0) {
4638 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4639 *nextPtr = s;
4640 return XML_ERROR_NONE;
4641 }
4642 switch (tok) {
4643 case XML_TOK_INVALID:
4644 return XML_ERROR_INVALID_TOKEN;
4645 case XML_TOK_PARTIAL:
4646 return XML_ERROR_UNCLOSED_TOKEN;
4647 case XML_TOK_PARTIAL_CHAR:
4648 return XML_ERROR_PARTIAL_CHAR;
4649 case XML_TOK_NONE: /* start == end */
4650 default:
4651 break;
4652 }
4653 /* found end of entity value - can store it now */
4654 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4655 }
4656 start = next;
4657 }
4658 }
4659
4660 #endif /* XML_DTD */
4661
4662 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4663 prologProcessor(XML_Parser parser, const char *s, const char *end,
4664 const char **nextPtr) {
4665 const char *next = s;
4666 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4667 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4668 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4669 XML_ACCOUNT_DIRECT);
4670 }
4671
4672 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4673 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4674 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4675 XML_Bool allowClosingDoctype, enum XML_Account account) {
4676 #ifdef XML_DTD
4677 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4678 #endif /* XML_DTD */
4679 static const XML_Char atypeCDATA[]
4680 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4681 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4682 static const XML_Char atypeIDREF[]
4683 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4684 static const XML_Char atypeIDREFS[]
4685 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4686 static const XML_Char atypeENTITY[]
4687 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4688 static const XML_Char atypeENTITIES[]
4689 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4690 ASCII_I, ASCII_E, ASCII_S, '\0'};
4691 static const XML_Char atypeNMTOKEN[]
4692 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4693 static const XML_Char atypeNMTOKENS[]
4694 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4695 ASCII_E, ASCII_N, ASCII_S, '\0'};
4696 static const XML_Char notationPrefix[]
4697 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4698 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4699 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4700 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4701
4702 #ifndef XML_DTD
4703 UNUSED_P(account);
4704 #endif
4705
4706 /* save one level of indirection */
4707 DTD *const dtd = parser->m_dtd;
4708
4709 const char **eventPP;
4710 const char **eventEndPP;
4711 enum XML_Content_Quant quant;
4712
4713 if (enc == parser->m_encoding) {
4714 eventPP = &parser->m_eventPtr;
4715 eventEndPP = &parser->m_eventEndPtr;
4716 } else {
4717 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4718 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4719 }
4720
4721 for (;;) {
4722 int role;
4723 XML_Bool handleDefault = XML_TRUE;
4724 *eventPP = s;
4725 *eventEndPP = next;
4726 if (tok <= 0) {
4727 if (haveMore && tok != XML_TOK_INVALID) {
4728 *nextPtr = s;
4729 return XML_ERROR_NONE;
4730 }
4731 switch (tok) {
4732 case XML_TOK_INVALID:
4733 *eventPP = next;
4734 return XML_ERROR_INVALID_TOKEN;
4735 case XML_TOK_PARTIAL:
4736 return XML_ERROR_UNCLOSED_TOKEN;
4737 case XML_TOK_PARTIAL_CHAR:
4738 return XML_ERROR_PARTIAL_CHAR;
4739 case -XML_TOK_PROLOG_S:
4740 tok = -tok;
4741 break;
4742 case XML_TOK_NONE:
4743 #ifdef XML_DTD
4744 /* for internal PE NOT referenced between declarations */
4745 if (enc != parser->m_encoding
4746 && ! parser->m_openInternalEntities->betweenDecl) {
4747 *nextPtr = s;
4748 return XML_ERROR_NONE;
4749 }
4750 /* WFC: PE Between Declarations - must check that PE contains
4751 complete markup, not only for external PEs, but also for
4752 internal PEs if the reference occurs between declarations.
4753 */
4754 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4755 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4756 == XML_ROLE_ERROR)
4757 return XML_ERROR_INCOMPLETE_PE;
4758 *nextPtr = s;
4759 return XML_ERROR_NONE;
4760 }
4761 #endif /* XML_DTD */
4762 return XML_ERROR_NO_ELEMENTS;
4763 default:
4764 tok = -tok;
4765 next = end;
4766 break;
4767 }
4768 }
4769 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4770 #if XML_GE == 1
4771 switch (role) {
4772 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4773 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4774 # ifdef XML_DTD
4775 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4776 # endif
4777 break;
4778 default:
4779 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4780 accountingOnAbort(parser);
4781 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4782 }
4783 }
4784 #endif
4785 switch (role) {
4786 case XML_ROLE_XML_DECL: {
4787 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4788 if (result != XML_ERROR_NONE)
4789 return result;
4790 enc = parser->m_encoding;
4791 handleDefault = XML_FALSE;
4792 } break;
4793 case XML_ROLE_DOCTYPE_NAME:
4794 if (parser->m_startDoctypeDeclHandler) {
4795 parser->m_doctypeName
4796 = poolStoreString(&parser->m_tempPool, enc, s, next);
4797 if (! parser->m_doctypeName)
4798 return XML_ERROR_NO_MEMORY;
4799 poolFinish(&parser->m_tempPool);
4800 parser->m_doctypePubid = NULL;
4801 handleDefault = XML_FALSE;
4802 }
4803 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4804 break;
4805 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4806 if (parser->m_startDoctypeDeclHandler) {
4807 parser->m_startDoctypeDeclHandler(
4808 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4809 parser->m_doctypePubid, 1);
4810 parser->m_doctypeName = NULL;
4811 poolClear(&parser->m_tempPool);
4812 handleDefault = XML_FALSE;
4813 }
4814 break;
4815 #ifdef XML_DTD
4816 case XML_ROLE_TEXT_DECL: {
4817 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4818 if (result != XML_ERROR_NONE)
4819 return result;
4820 enc = parser->m_encoding;
4821 handleDefault = XML_FALSE;
4822 } break;
4823 #endif /* XML_DTD */
4824 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4825 #ifdef XML_DTD
4826 parser->m_useForeignDTD = XML_FALSE;
4827 parser->m_declEntity = (ENTITY *)lookup(
4828 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4829 if (! parser->m_declEntity)
4830 return XML_ERROR_NO_MEMORY;
4831 #endif /* XML_DTD */
4832 dtd->hasParamEntityRefs = XML_TRUE;
4833 if (parser->m_startDoctypeDeclHandler) {
4834 XML_Char *pubId;
4835 if (! XmlIsPublicId(enc, s, next, eventPP))
4836 return XML_ERROR_PUBLICID;
4837 pubId = poolStoreString(&parser->m_tempPool, enc,
4838 s + enc->minBytesPerChar,
4839 next - enc->minBytesPerChar);
4840 if (! pubId)
4841 return XML_ERROR_NO_MEMORY;
4842 normalizePublicId(pubId);
4843 poolFinish(&parser->m_tempPool);
4844 parser->m_doctypePubid = pubId;
4845 handleDefault = XML_FALSE;
4846 goto alreadyChecked;
4847 }
4848 /* fall through */
4849 case XML_ROLE_ENTITY_PUBLIC_ID:
4850 if (! XmlIsPublicId(enc, s, next, eventPP))
4851 return XML_ERROR_PUBLICID;
4852 alreadyChecked:
4853 if (dtd->keepProcessing && parser->m_declEntity) {
4854 XML_Char *tem
4855 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4856 next - enc->minBytesPerChar);
4857 if (! tem)
4858 return XML_ERROR_NO_MEMORY;
4859 normalizePublicId(tem);
4860 parser->m_declEntity->publicId = tem;
4861 poolFinish(&dtd->pool);
4862 /* Don't suppress the default handler if we fell through from
4863 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4864 */
4865 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4866 handleDefault = XML_FALSE;
4867 }
4868 break;
4869 case XML_ROLE_DOCTYPE_CLOSE:
4870 if (allowClosingDoctype != XML_TRUE) {
4871 /* Must not close doctype from within expanded parameter entities */
4872 return XML_ERROR_INVALID_TOKEN;
4873 }
4874
4875 if (parser->m_doctypeName) {
4876 parser->m_startDoctypeDeclHandler(
4877 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4878 parser->m_doctypePubid, 0);
4879 poolClear(&parser->m_tempPool);
4880 handleDefault = XML_FALSE;
4881 }
4882 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4883 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4884 was not set, indicating an external subset
4885 */
4886 #ifdef XML_DTD
4887 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4888 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4889 dtd->hasParamEntityRefs = XML_TRUE;
4890 if (parser->m_paramEntityParsing
4891 && parser->m_externalEntityRefHandler) {
4892 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4893 externalSubsetName, sizeof(ENTITY));
4894 if (! entity) {
4895 /* The external subset name "#" will have already been
4896 * inserted into the hash table at the start of the
4897 * external entity parsing, so no allocation will happen
4898 * and lookup() cannot fail.
4899 */
4900 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4901 }
4902 if (parser->m_useForeignDTD)
4903 entity->base = parser->m_curBase;
4904 dtd->paramEntityRead = XML_FALSE;
4905 if (! parser->m_externalEntityRefHandler(
4906 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4907 entity->systemId, entity->publicId))
4908 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4909 if (dtd->paramEntityRead) {
4910 if (! dtd->standalone && parser->m_notStandaloneHandler
4911 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4912 return XML_ERROR_NOT_STANDALONE;
4913 }
4914 /* if we didn't read the foreign DTD then this means that there
4915 is no external subset and we must reset dtd->hasParamEntityRefs
4916 */
4917 else if (! parser->m_doctypeSysid)
4918 dtd->hasParamEntityRefs = hadParamEntityRefs;
4919 /* end of DTD - no need to update dtd->keepProcessing */
4920 }
4921 parser->m_useForeignDTD = XML_FALSE;
4922 }
4923 #endif /* XML_DTD */
4924 if (parser->m_endDoctypeDeclHandler) {
4925 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4926 handleDefault = XML_FALSE;
4927 }
4928 break;
4929 case XML_ROLE_INSTANCE_START:
4930 #ifdef XML_DTD
4931 /* if there is no DOCTYPE declaration then now is the
4932 last chance to read the foreign DTD
4933 */
4934 if (parser->m_useForeignDTD) {
4935 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4936 dtd->hasParamEntityRefs = XML_TRUE;
4937 if (parser->m_paramEntityParsing
4938 && parser->m_externalEntityRefHandler) {
4939 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4940 externalSubsetName, sizeof(ENTITY));
4941 if (! entity)
4942 return XML_ERROR_NO_MEMORY;
4943 entity->base = parser->m_curBase;
4944 dtd->paramEntityRead = XML_FALSE;
4945 if (! parser->m_externalEntityRefHandler(
4946 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4947 entity->systemId, entity->publicId))
4948 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4949 if (dtd->paramEntityRead) {
4950 if (! dtd->standalone && parser->m_notStandaloneHandler
4951 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4952 return XML_ERROR_NOT_STANDALONE;
4953 }
4954 /* if we didn't read the foreign DTD then this means that there
4955 is no external subset and we must reset dtd->hasParamEntityRefs
4956 */
4957 else
4958 dtd->hasParamEntityRefs = hadParamEntityRefs;
4959 /* end of DTD - no need to update dtd->keepProcessing */
4960 }
4961 }
4962 #endif /* XML_DTD */
4963 parser->m_processor = contentProcessor;
4964 return contentProcessor(parser, s, end, nextPtr);
4965 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4966 parser->m_declElementType = getElementType(parser, enc, s, next);
4967 if (! parser->m_declElementType)
4968 return XML_ERROR_NO_MEMORY;
4969 goto checkAttListDeclHandler;
4970 case XML_ROLE_ATTRIBUTE_NAME:
4971 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4972 if (! parser->m_declAttributeId)
4973 return XML_ERROR_NO_MEMORY;
4974 parser->m_declAttributeIsCdata = XML_FALSE;
4975 parser->m_declAttributeType = NULL;
4976 parser->m_declAttributeIsId = XML_FALSE;
4977 goto checkAttListDeclHandler;
4978 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4979 parser->m_declAttributeIsCdata = XML_TRUE;
4980 parser->m_declAttributeType = atypeCDATA;
4981 goto checkAttListDeclHandler;
4982 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4983 parser->m_declAttributeIsId = XML_TRUE;
4984 parser->m_declAttributeType = atypeID;
4985 goto checkAttListDeclHandler;
4986 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4987 parser->m_declAttributeType = atypeIDREF;
4988 goto checkAttListDeclHandler;
4989 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4990 parser->m_declAttributeType = atypeIDREFS;
4991 goto checkAttListDeclHandler;
4992 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4993 parser->m_declAttributeType = atypeENTITY;
4994 goto checkAttListDeclHandler;
4995 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4996 parser->m_declAttributeType = atypeENTITIES;
4997 goto checkAttListDeclHandler;
4998 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4999 parser->m_declAttributeType = atypeNMTOKEN;
5000 goto checkAttListDeclHandler;
5001 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5002 parser->m_declAttributeType = atypeNMTOKENS;
5003 checkAttListDeclHandler:
5004 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5005 handleDefault = XML_FALSE;
5006 break;
5007 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5008 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5009 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5010 const XML_Char *prefix;
5011 if (parser->m_declAttributeType) {
5012 prefix = enumValueSep;
5013 } else {
5014 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5015 : enumValueStart);
5016 }
5017 if (! poolAppendString(&parser->m_tempPool, prefix))
5018 return XML_ERROR_NO_MEMORY;
5019 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5020 return XML_ERROR_NO_MEMORY;
5021 parser->m_declAttributeType = parser->m_tempPool.start;
5022 handleDefault = XML_FALSE;
5023 }
5024 break;
5025 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5026 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5027 if (dtd->keepProcessing) {
5028 if (! defineAttribute(parser->m_declElementType,
5029 parser->m_declAttributeId,
5030 parser->m_declAttributeIsCdata,
5031 parser->m_declAttributeIsId, 0, parser))
5032 return XML_ERROR_NO_MEMORY;
5033 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5034 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5035 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5036 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5037 /* Enumerated or Notation type */
5038 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5039 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5040 return XML_ERROR_NO_MEMORY;
5041 parser->m_declAttributeType = parser->m_tempPool.start;
5042 poolFinish(&parser->m_tempPool);
5043 }
5044 *eventEndPP = s;
5045 parser->m_attlistDeclHandler(
5046 parser->m_handlerArg, parser->m_declElementType->name,
5047 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5048 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5049 handleDefault = XML_FALSE;
5050 }
5051 }
5052 poolClear(&parser->m_tempPool);
5053 break;
5054 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5055 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5056 if (dtd->keepProcessing) {
5057 const XML_Char *attVal;
5058 enum XML_Error result = storeAttributeValue(
5059 parser, enc, parser->m_declAttributeIsCdata,
5060 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5061 XML_ACCOUNT_NONE);
5062 if (result)
5063 return result;
5064 attVal = poolStart(&dtd->pool);
5065 poolFinish(&dtd->pool);
5066 /* ID attributes aren't allowed to have a default */
5067 if (! defineAttribute(
5068 parser->m_declElementType, parser->m_declAttributeId,
5069 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5070 return XML_ERROR_NO_MEMORY;
5071 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5072 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5073 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5074 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5075 /* Enumerated or Notation type */
5076 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5077 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5078 return XML_ERROR_NO_MEMORY;
5079 parser->m_declAttributeType = parser->m_tempPool.start;
5080 poolFinish(&parser->m_tempPool);
5081 }
5082 *eventEndPP = s;
5083 parser->m_attlistDeclHandler(
5084 parser->m_handlerArg, parser->m_declElementType->name,
5085 parser->m_declAttributeId->name, parser->m_declAttributeType,
5086 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5087 poolClear(&parser->m_tempPool);
5088 handleDefault = XML_FALSE;
5089 }
5090 }
5091 break;
5092 case XML_ROLE_ENTITY_VALUE:
5093 if (dtd->keepProcessing) {
5094 #if XML_GE == 1
5095 // This will store the given replacement text in
5096 // parser->m_declEntity->textPtr.
5097 enum XML_Error result
5098 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5099 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5100 if (parser->m_declEntity) {
5101 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5102 parser->m_declEntity->textLen
5103 = (int)(poolLength(&dtd->entityValuePool));
5104 poolFinish(&dtd->entityValuePool);
5105 if (parser->m_entityDeclHandler) {
5106 *eventEndPP = s;
5107 parser->m_entityDeclHandler(
5108 parser->m_handlerArg, parser->m_declEntity->name,
5109 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5110 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5111 handleDefault = XML_FALSE;
5112 }
5113 } else
5114 poolDiscard(&dtd->entityValuePool);
5115 if (result != XML_ERROR_NONE)
5116 return result;
5117 #else
5118 // This will store "&entity123;" in parser->m_declEntity->textPtr
5119 // to end up as "&entity123;" in the handler.
5120 if (parser->m_declEntity != NULL) {
5121 const enum XML_Error result
5122 = storeSelfEntityValue(parser, parser->m_declEntity);
5123 if (result != XML_ERROR_NONE)
5124 return result;
5125
5126 if (parser->m_entityDeclHandler) {
5127 *eventEndPP = s;
5128 parser->m_entityDeclHandler(
5129 parser->m_handlerArg, parser->m_declEntity->name,
5130 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5131 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5132 handleDefault = XML_FALSE;
5133 }
5134 }
5135 #endif
5136 }
5137 break;
5138 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5139 #ifdef XML_DTD
5140 parser->m_useForeignDTD = XML_FALSE;
5141 #endif /* XML_DTD */
5142 dtd->hasParamEntityRefs = XML_TRUE;
5143 if (parser->m_startDoctypeDeclHandler) {
5144 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5145 s + enc->minBytesPerChar,
5146 next - enc->minBytesPerChar);
5147 if (parser->m_doctypeSysid == NULL)
5148 return XML_ERROR_NO_MEMORY;
5149 poolFinish(&parser->m_tempPool);
5150 handleDefault = XML_FALSE;
5151 }
5152 #ifdef XML_DTD
5153 else
5154 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5155 for the case where no parser->m_startDoctypeDeclHandler is set */
5156 parser->m_doctypeSysid = externalSubsetName;
5157 #endif /* XML_DTD */
5158 if (! dtd->standalone
5159 #ifdef XML_DTD
5160 && ! parser->m_paramEntityParsing
5161 #endif /* XML_DTD */
5162 && parser->m_notStandaloneHandler
5163 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5164 return XML_ERROR_NOT_STANDALONE;
5165 #ifndef XML_DTD
5166 break;
5167 #else /* XML_DTD */
5168 if (! parser->m_declEntity) {
5169 parser->m_declEntity = (ENTITY *)lookup(
5170 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5171 if (! parser->m_declEntity)
5172 return XML_ERROR_NO_MEMORY;
5173 parser->m_declEntity->publicId = NULL;
5174 }
5175 #endif /* XML_DTD */
5176 /* fall through */
5177 case XML_ROLE_ENTITY_SYSTEM_ID:
5178 if (dtd->keepProcessing && parser->m_declEntity) {
5179 parser->m_declEntity->systemId
5180 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5181 next - enc->minBytesPerChar);
5182 if (! parser->m_declEntity->systemId)
5183 return XML_ERROR_NO_MEMORY;
5184 parser->m_declEntity->base = parser->m_curBase;
5185 poolFinish(&dtd->pool);
5186 /* Don't suppress the default handler if we fell through from
5187 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5188 */
5189 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5190 handleDefault = XML_FALSE;
5191 }
5192 break;
5193 case XML_ROLE_ENTITY_COMPLETE:
5194 #if XML_GE == 0
5195 // This will store "&entity123;" in entity->textPtr
5196 // to end up as "&entity123;" in the handler.
5197 if (parser->m_declEntity != NULL) {
5198 const enum XML_Error result
5199 = storeSelfEntityValue(parser, parser->m_declEntity);
5200 if (result != XML_ERROR_NONE)
5201 return result;
5202 }
5203 #endif
5204 if (dtd->keepProcessing && parser->m_declEntity
5205 && parser->m_entityDeclHandler) {
5206 *eventEndPP = s;
5207 parser->m_entityDeclHandler(
5208 parser->m_handlerArg, parser->m_declEntity->name,
5209 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5210 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5211 handleDefault = XML_FALSE;
5212 }
5213 break;
5214 case XML_ROLE_ENTITY_NOTATION_NAME:
5215 if (dtd->keepProcessing && parser->m_declEntity) {
5216 parser->m_declEntity->notation
5217 = poolStoreString(&dtd->pool, enc, s, next);
5218 if (! parser->m_declEntity->notation)
5219 return XML_ERROR_NO_MEMORY;
5220 poolFinish(&dtd->pool);
5221 if (parser->m_unparsedEntityDeclHandler) {
5222 *eventEndPP = s;
5223 parser->m_unparsedEntityDeclHandler(
5224 parser->m_handlerArg, parser->m_declEntity->name,
5225 parser->m_declEntity->base, parser->m_declEntity->systemId,
5226 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5227 handleDefault = XML_FALSE;
5228 } else if (parser->m_entityDeclHandler) {
5229 *eventEndPP = s;
5230 parser->m_entityDeclHandler(
5231 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5232 parser->m_declEntity->base, parser->m_declEntity->systemId,
5233 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5234 handleDefault = XML_FALSE;
5235 }
5236 }
5237 break;
5238 case XML_ROLE_GENERAL_ENTITY_NAME: {
5239 if (XmlPredefinedEntityName(enc, s, next)) {
5240 parser->m_declEntity = NULL;
5241 break;
5242 }
5243 if (dtd->keepProcessing) {
5244 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5245 if (! name)
5246 return XML_ERROR_NO_MEMORY;
5247 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5248 name, sizeof(ENTITY));
5249 if (! parser->m_declEntity)
5250 return XML_ERROR_NO_MEMORY;
5251 if (parser->m_declEntity->name != name) {
5252 poolDiscard(&dtd->pool);
5253 parser->m_declEntity = NULL;
5254 } else {
5255 poolFinish(&dtd->pool);
5256 parser->m_declEntity->publicId = NULL;
5257 parser->m_declEntity->is_param = XML_FALSE;
5258 /* if we have a parent parser or are reading an internal parameter
5259 entity, then the entity declaration is not considered "internal"
5260 */
5261 parser->m_declEntity->is_internal
5262 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5263 if (parser->m_entityDeclHandler)
5264 handleDefault = XML_FALSE;
5265 }
5266 } else {
5267 poolDiscard(&dtd->pool);
5268 parser->m_declEntity = NULL;
5269 }
5270 } break;
5271 case XML_ROLE_PARAM_ENTITY_NAME:
5272 #ifdef XML_DTD
5273 if (dtd->keepProcessing) {
5274 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5275 if (! name)
5276 return XML_ERROR_NO_MEMORY;
5277 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5278 name, sizeof(ENTITY));
5279 if (! parser->m_declEntity)
5280 return XML_ERROR_NO_MEMORY;
5281 if (parser->m_declEntity->name != name) {
5282 poolDiscard(&dtd->pool);
5283 parser->m_declEntity = NULL;
5284 } else {
5285 poolFinish(&dtd->pool);
5286 parser->m_declEntity->publicId = NULL;
5287 parser->m_declEntity->is_param = XML_TRUE;
5288 /* if we have a parent parser or are reading an internal parameter
5289 entity, then the entity declaration is not considered "internal"
5290 */
5291 parser->m_declEntity->is_internal
5292 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5293 if (parser->m_entityDeclHandler)
5294 handleDefault = XML_FALSE;
5295 }
5296 } else {
5297 poolDiscard(&dtd->pool);
5298 parser->m_declEntity = NULL;
5299 }
5300 #else /* not XML_DTD */
5301 parser->m_declEntity = NULL;
5302 #endif /* XML_DTD */
5303 break;
5304 case XML_ROLE_NOTATION_NAME:
5305 parser->m_declNotationPublicId = NULL;
5306 parser->m_declNotationName = NULL;
5307 if (parser->m_notationDeclHandler) {
5308 parser->m_declNotationName
5309 = poolStoreString(&parser->m_tempPool, enc, s, next);
5310 if (! parser->m_declNotationName)
5311 return XML_ERROR_NO_MEMORY;
5312 poolFinish(&parser->m_tempPool);
5313 handleDefault = XML_FALSE;
5314 }
5315 break;
5316 case XML_ROLE_NOTATION_PUBLIC_ID:
5317 if (! XmlIsPublicId(enc, s, next, eventPP))
5318 return XML_ERROR_PUBLICID;
5319 if (parser
5320 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5321 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5322 s + enc->minBytesPerChar,
5323 next - enc->minBytesPerChar);
5324 if (! tem)
5325 return XML_ERROR_NO_MEMORY;
5326 normalizePublicId(tem);
5327 parser->m_declNotationPublicId = tem;
5328 poolFinish(&parser->m_tempPool);
5329 handleDefault = XML_FALSE;
5330 }
5331 break;
5332 case XML_ROLE_NOTATION_SYSTEM_ID:
5333 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5334 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5335 s + enc->minBytesPerChar,
5336 next - enc->minBytesPerChar);
5337 if (! systemId)
5338 return XML_ERROR_NO_MEMORY;
5339 *eventEndPP = s;
5340 parser->m_notationDeclHandler(
5341 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5342 systemId, parser->m_declNotationPublicId);
5343 handleDefault = XML_FALSE;
5344 }
5345 poolClear(&parser->m_tempPool);
5346 break;
5347 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5348 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5349 *eventEndPP = s;
5350 parser->m_notationDeclHandler(
5351 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5352 0, parser->m_declNotationPublicId);
5353 handleDefault = XML_FALSE;
5354 }
5355 poolClear(&parser->m_tempPool);
5356 break;
5357 case XML_ROLE_ERROR:
5358 switch (tok) {
5359 case XML_TOK_PARAM_ENTITY_REF:
5360 /* PE references in internal subset are
5361 not allowed within declarations. */
5362 return XML_ERROR_PARAM_ENTITY_REF;
5363 case XML_TOK_XML_DECL:
5364 return XML_ERROR_MISPLACED_XML_PI;
5365 default:
5366 return XML_ERROR_SYNTAX;
5367 }
5368 #ifdef XML_DTD
5369 case XML_ROLE_IGNORE_SECT: {
5370 enum XML_Error result;
5371 if (parser->m_defaultHandler)
5372 reportDefault(parser, enc, s, next);
5373 handleDefault = XML_FALSE;
5374 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5375 if (result != XML_ERROR_NONE)
5376 return result;
5377 else if (! next) {
5378 parser->m_processor = ignoreSectionProcessor;
5379 return result;
5380 }
5381 } break;
5382 #endif /* XML_DTD */
5383 case XML_ROLE_GROUP_OPEN:
5384 if (parser->m_prologState.level >= parser->m_groupSize) {
5385 if (parser->m_groupSize) {
5386 {
5387 /* Detect and prevent integer overflow */
5388 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5389 return XML_ERROR_NO_MEMORY;
5390 }
5391
5392 char *const new_connector = (char *)REALLOC(
5393 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5394 if (new_connector == NULL) {
5395 parser->m_groupSize /= 2;
5396 return XML_ERROR_NO_MEMORY;
5397 }
5398 parser->m_groupConnector = new_connector;
5399 }
5400
5401 if (dtd->scaffIndex) {
5402 /* Detect and prevent integer overflow.
5403 * The preprocessor guard addresses the "always false" warning
5404 * from -Wtype-limits on platforms where
5405 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5406 #if UINT_MAX >= SIZE_MAX
5407 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5408 return XML_ERROR_NO_MEMORY;
5409 }
5410 #endif
5411
5412 int *const new_scaff_index = (int *)REALLOC(
5413 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5414 if (new_scaff_index == NULL)
5415 return XML_ERROR_NO_MEMORY;
5416 dtd->scaffIndex = new_scaff_index;
5417 }
5418 } else {
5419 parser->m_groupConnector
5420 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5421 if (! parser->m_groupConnector) {
5422 parser->m_groupSize = 0;
5423 return XML_ERROR_NO_MEMORY;
5424 }
5425 }
5426 }
5427 parser->m_groupConnector[parser->m_prologState.level] = 0;
5428 if (dtd->in_eldecl) {
5429 int myindex = nextScaffoldPart(parser);
5430 if (myindex < 0)
5431 return XML_ERROR_NO_MEMORY;
5432 assert(dtd->scaffIndex != NULL);
5433 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5434 dtd->scaffLevel++;
5435 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5436 if (parser->m_elementDeclHandler)
5437 handleDefault = XML_FALSE;
5438 }
5439 break;
5440 case XML_ROLE_GROUP_SEQUENCE:
5441 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5442 return XML_ERROR_SYNTAX;
5443 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5444 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5445 handleDefault = XML_FALSE;
5446 break;
5447 case XML_ROLE_GROUP_CHOICE:
5448 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5449 return XML_ERROR_SYNTAX;
5450 if (dtd->in_eldecl
5451 && ! parser->m_groupConnector[parser->m_prologState.level]
5452 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5453 != XML_CTYPE_MIXED)) {
5454 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5455 = XML_CTYPE_CHOICE;
5456 if (parser->m_elementDeclHandler)
5457 handleDefault = XML_FALSE;
5458 }
5459 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5460 break;
5461 case XML_ROLE_PARAM_ENTITY_REF:
5462 #ifdef XML_DTD
5463 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5464 dtd->hasParamEntityRefs = XML_TRUE;
5465 if (! parser->m_paramEntityParsing)
5466 dtd->keepProcessing = dtd->standalone;
5467 else {
5468 const XML_Char *name;
5469 ENTITY *entity;
5470 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5471 next - enc->minBytesPerChar);
5472 if (! name)
5473 return XML_ERROR_NO_MEMORY;
5474 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5475 poolDiscard(&dtd->pool);
5476 /* first, determine if a check for an existing declaration is needed;
5477 if yes, check that the entity exists, and that it is internal,
5478 otherwise call the skipped entity handler
5479 */
5480 if (parser->m_prologState.documentEntity
5481 && (dtd->standalone ? ! parser->m_openInternalEntities
5482 : ! dtd->hasParamEntityRefs)) {
5483 if (! entity)
5484 return XML_ERROR_UNDEFINED_ENTITY;
5485 else if (! entity->is_internal) {
5486 /* It's hard to exhaustively search the code to be sure,
5487 * but there doesn't seem to be a way of executing the
5488 * following line. There are two cases:
5489 *
5490 * If 'standalone' is false, the DTD must have no
5491 * parameter entities or we wouldn't have passed the outer
5492 * 'if' statement. That means the only entity in the hash
5493 * table is the external subset name "#" which cannot be
5494 * given as a parameter entity name in XML syntax, so the
5495 * lookup must have returned NULL and we don't even reach
5496 * the test for an internal entity.
5497 *
5498 * If 'standalone' is true, it does not seem to be
5499 * possible to create entities taking this code path that
5500 * are not internal entities, so fail the test above.
5501 *
5502 * Because this analysis is very uncertain, the code is
5503 * being left in place and merely removed from the
5504 * coverage test statistics.
5505 */
5506 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5507 }
5508 } else if (! entity) {
5509 dtd->keepProcessing = dtd->standalone;
5510 /* cannot report skipped entities in declarations */
5511 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5512 && parser->m_skippedEntityHandler) {
5513 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5514 handleDefault = XML_FALSE;
5515 }
5516 break;
5517 }
5518 if (entity->open)
5519 return XML_ERROR_RECURSIVE_ENTITY_REF;
5520 if (entity->textPtr) {
5521 enum XML_Error result;
5522 XML_Bool betweenDecl
5523 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5524 result = processInternalEntity(parser, entity, betweenDecl);
5525 if (result != XML_ERROR_NONE)
5526 return result;
5527 handleDefault = XML_FALSE;
5528 break;
5529 }
5530 if (parser->m_externalEntityRefHandler) {
5531 dtd->paramEntityRead = XML_FALSE;
5532 entity->open = XML_TRUE;
5533 entityTrackingOnOpen(parser, entity, __LINE__);
5534 if (! parser->m_externalEntityRefHandler(
5535 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5536 entity->systemId, entity->publicId)) {
5537 entityTrackingOnClose(parser, entity, __LINE__);
5538 entity->open = XML_FALSE;
5539 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5540 }
5541 entityTrackingOnClose(parser, entity, __LINE__);
5542 entity->open = XML_FALSE;
5543 handleDefault = XML_FALSE;
5544 if (! dtd->paramEntityRead) {
5545 dtd->keepProcessing = dtd->standalone;
5546 break;
5547 }
5548 } else {
5549 dtd->keepProcessing = dtd->standalone;
5550 break;
5551 }
5552 }
5553 #endif /* XML_DTD */
5554 if (! dtd->standalone && parser->m_notStandaloneHandler
5555 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5556 return XML_ERROR_NOT_STANDALONE;
5557 break;
5558
5559 /* Element declaration stuff */
5560
5561 case XML_ROLE_ELEMENT_NAME:
5562 if (parser->m_elementDeclHandler) {
5563 parser->m_declElementType = getElementType(parser, enc, s, next);
5564 if (! parser->m_declElementType)
5565 return XML_ERROR_NO_MEMORY;
5566 dtd->scaffLevel = 0;
5567 dtd->scaffCount = 0;
5568 dtd->in_eldecl = XML_TRUE;
5569 handleDefault = XML_FALSE;
5570 }
5571 break;
5572
5573 case XML_ROLE_CONTENT_ANY:
5574 case XML_ROLE_CONTENT_EMPTY:
5575 if (dtd->in_eldecl) {
5576 if (parser->m_elementDeclHandler) {
5577 XML_Content *content
5578 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5579 if (! content)
5580 return XML_ERROR_NO_MEMORY;
5581 content->quant = XML_CQUANT_NONE;
5582 content->name = NULL;
5583 content->numchildren = 0;
5584 content->children = NULL;
5585 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5586 : XML_CTYPE_EMPTY);
5587 *eventEndPP = s;
5588 parser->m_elementDeclHandler(
5589 parser->m_handlerArg, parser->m_declElementType->name, content);
5590 handleDefault = XML_FALSE;
5591 }
5592 dtd->in_eldecl = XML_FALSE;
5593 }
5594 break;
5595
5596 case XML_ROLE_CONTENT_PCDATA:
5597 if (dtd->in_eldecl) {
5598 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5599 = XML_CTYPE_MIXED;
5600 if (parser->m_elementDeclHandler)
5601 handleDefault = XML_FALSE;
5602 }
5603 break;
5604
5605 case XML_ROLE_CONTENT_ELEMENT:
5606 quant = XML_CQUANT_NONE;
5607 goto elementContent;
5608 case XML_ROLE_CONTENT_ELEMENT_OPT:
5609 quant = XML_CQUANT_OPT;
5610 goto elementContent;
5611 case XML_ROLE_CONTENT_ELEMENT_REP:
5612 quant = XML_CQUANT_REP;
5613 goto elementContent;
5614 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5615 quant = XML_CQUANT_PLUS;
5616 elementContent:
5617 if (dtd->in_eldecl) {
5618 ELEMENT_TYPE *el;
5619 const XML_Char *name;
5620 size_t nameLen;
5621 const char *nxt
5622 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5623 int myindex = nextScaffoldPart(parser);
5624 if (myindex < 0)
5625 return XML_ERROR_NO_MEMORY;
5626 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5627 dtd->scaffold[myindex].quant = quant;
5628 el = getElementType(parser, enc, s, nxt);
5629 if (! el)
5630 return XML_ERROR_NO_MEMORY;
5631 name = el->name;
5632 dtd->scaffold[myindex].name = name;
5633 nameLen = 0;
5634 for (; name[nameLen++];)
5635 ;
5636
5637 /* Detect and prevent integer overflow */
5638 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5639 return XML_ERROR_NO_MEMORY;
5640 }
5641
5642 dtd->contentStringLen += (unsigned)nameLen;
5643 if (parser->m_elementDeclHandler)
5644 handleDefault = XML_FALSE;
5645 }
5646 break;
5647
5648 case XML_ROLE_GROUP_CLOSE:
5649 quant = XML_CQUANT_NONE;
5650 goto closeGroup;
5651 case XML_ROLE_GROUP_CLOSE_OPT:
5652 quant = XML_CQUANT_OPT;
5653 goto closeGroup;
5654 case XML_ROLE_GROUP_CLOSE_REP:
5655 quant = XML_CQUANT_REP;
5656 goto closeGroup;
5657 case XML_ROLE_GROUP_CLOSE_PLUS:
5658 quant = XML_CQUANT_PLUS;
5659 closeGroup:
5660 if (dtd->in_eldecl) {
5661 if (parser->m_elementDeclHandler)
5662 handleDefault = XML_FALSE;
5663 dtd->scaffLevel--;
5664 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5665 if (dtd->scaffLevel == 0) {
5666 if (! handleDefault) {
5667 XML_Content *model = build_model(parser);
5668 if (! model)
5669 return XML_ERROR_NO_MEMORY;
5670 *eventEndPP = s;
5671 parser->m_elementDeclHandler(
5672 parser->m_handlerArg, parser->m_declElementType->name, model);
5673 }
5674 dtd->in_eldecl = XML_FALSE;
5675 dtd->contentStringLen = 0;
5676 }
5677 }
5678 break;
5679 /* End element declaration stuff */
5680
5681 case XML_ROLE_PI:
5682 if (! reportProcessingInstruction(parser, enc, s, next))
5683 return XML_ERROR_NO_MEMORY;
5684 handleDefault = XML_FALSE;
5685 break;
5686 case XML_ROLE_COMMENT:
5687 if (! reportComment(parser, enc, s, next))
5688 return XML_ERROR_NO_MEMORY;
5689 handleDefault = XML_FALSE;
5690 break;
5691 case XML_ROLE_NONE:
5692 switch (tok) {
5693 case XML_TOK_BOM:
5694 handleDefault = XML_FALSE;
5695 break;
5696 }
5697 break;
5698 case XML_ROLE_DOCTYPE_NONE:
5699 if (parser->m_startDoctypeDeclHandler)
5700 handleDefault = XML_FALSE;
5701 break;
5702 case XML_ROLE_ENTITY_NONE:
5703 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5704 handleDefault = XML_FALSE;
5705 break;
5706 case XML_ROLE_NOTATION_NONE:
5707 if (parser->m_notationDeclHandler)
5708 handleDefault = XML_FALSE;
5709 break;
5710 case XML_ROLE_ATTLIST_NONE:
5711 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5712 handleDefault = XML_FALSE;
5713 break;
5714 case XML_ROLE_ELEMENT_NONE:
5715 if (parser->m_elementDeclHandler)
5716 handleDefault = XML_FALSE;
5717 break;
5718 } /* end of big switch */
5719
5720 if (handleDefault && parser->m_defaultHandler)
5721 reportDefault(parser, enc, s, next);
5722
5723 switch (parser->m_parsingStatus.parsing) {
5724 case XML_SUSPENDED:
5725 *nextPtr = next;
5726 return XML_ERROR_NONE;
5727 case XML_FINISHED:
5728 return XML_ERROR_ABORTED;
5729 default:
5730 s = next;
5731 tok = XmlPrologTok(enc, s, end, &next);
5732 }
5733 }
5734 /* not reached */
5735 }
5736
5737 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5738 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5739 const char **nextPtr) {
5740 parser->m_processor = epilogProcessor;
5741 parser->m_eventPtr = s;
5742 for (;;) {
5743 const char *next = NULL;
5744 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5745 #if XML_GE == 1
5746 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5747 XML_ACCOUNT_DIRECT)) {
5748 accountingOnAbort(parser);
5749 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5750 }
5751 #endif
5752 parser->m_eventEndPtr = next;
5753 switch (tok) {
5754 /* report partial linebreak - it might be the last token */
5755 case -XML_TOK_PROLOG_S:
5756 if (parser->m_defaultHandler) {
5757 reportDefault(parser, parser->m_encoding, s, next);
5758 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5759 return XML_ERROR_ABORTED;
5760 }
5761 *nextPtr = next;
5762 return XML_ERROR_NONE;
5763 case XML_TOK_NONE:
5764 *nextPtr = s;
5765 return XML_ERROR_NONE;
5766 case XML_TOK_PROLOG_S:
5767 if (parser->m_defaultHandler)
5768 reportDefault(parser, parser->m_encoding, s, next);
5769 break;
5770 case XML_TOK_PI:
5771 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5772 return XML_ERROR_NO_MEMORY;
5773 break;
5774 case XML_TOK_COMMENT:
5775 if (! reportComment(parser, parser->m_encoding, s, next))
5776 return XML_ERROR_NO_MEMORY;
5777 break;
5778 case XML_TOK_INVALID:
5779 parser->m_eventPtr = next;
5780 return XML_ERROR_INVALID_TOKEN;
5781 case XML_TOK_PARTIAL:
5782 if (! parser->m_parsingStatus.finalBuffer) {
5783 *nextPtr = s;
5784 return XML_ERROR_NONE;
5785 }
5786 return XML_ERROR_UNCLOSED_TOKEN;
5787 case XML_TOK_PARTIAL_CHAR:
5788 if (! parser->m_parsingStatus.finalBuffer) {
5789 *nextPtr = s;
5790 return XML_ERROR_NONE;
5791 }
5792 return XML_ERROR_PARTIAL_CHAR;
5793 default:
5794 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5795 }
5796 parser->m_eventPtr = s = next;
5797 switch (parser->m_parsingStatus.parsing) {
5798 case XML_SUSPENDED:
5799 *nextPtr = next;
5800 return XML_ERROR_NONE;
5801 case XML_FINISHED:
5802 return XML_ERROR_ABORTED;
5803 default:;
5804 }
5805 }
5806 }
5807
5808 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5809 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5810 const char *textStart, *textEnd;
5811 const char *next;
5812 enum XML_Error result;
5813 OPEN_INTERNAL_ENTITY *openEntity;
5814
5815 if (parser->m_freeInternalEntities) {
5816 openEntity = parser->m_freeInternalEntities;
5817 parser->m_freeInternalEntities = openEntity->next;
5818 } else {
5819 openEntity
5820 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5821 if (! openEntity)
5822 return XML_ERROR_NO_MEMORY;
5823 }
5824 entity->open = XML_TRUE;
5825 #if XML_GE == 1
5826 entityTrackingOnOpen(parser, entity, __LINE__);
5827 #endif
5828 entity->processed = 0;
5829 openEntity->next = parser->m_openInternalEntities;
5830 parser->m_openInternalEntities = openEntity;
5831 openEntity->entity = entity;
5832 openEntity->startTagLevel = parser->m_tagLevel;
5833 openEntity->betweenDecl = betweenDecl;
5834 openEntity->internalEventPtr = NULL;
5835 openEntity->internalEventEndPtr = NULL;
5836 textStart = (const char *)entity->textPtr;
5837 textEnd = (const char *)(entity->textPtr + entity->textLen);
5838 /* Set a safe default value in case 'next' does not get set */
5839 next = textStart;
5840
5841 #ifdef XML_DTD
5842 if (entity->is_param) {
5843 int tok
5844 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5845 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5846 tok, next, &next, XML_FALSE, XML_FALSE,
5847 XML_ACCOUNT_ENTITY_EXPANSION);
5848 } else
5849 #endif /* XML_DTD */
5850 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5851 textStart, textEnd, &next, XML_FALSE,
5852 XML_ACCOUNT_ENTITY_EXPANSION);
5853
5854 if (result == XML_ERROR_NONE) {
5855 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5856 entity->processed = (int)(next - textStart);
5857 parser->m_processor = internalEntityProcessor;
5858 } else if (parser->m_openInternalEntities->entity == entity) {
5859 #if XML_GE == 1
5860 entityTrackingOnClose(parser, entity, __LINE__);
5861 #endif /* XML_GE == 1 */
5862 entity->open = XML_FALSE;
5863 parser->m_openInternalEntities = openEntity->next;
5864 /* put openEntity back in list of free instances */
5865 openEntity->next = parser->m_freeInternalEntities;
5866 parser->m_freeInternalEntities = openEntity;
5867 }
5868 }
5869 return result;
5870 }
5871
5872 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5873 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5874 const char **nextPtr) {
5875 ENTITY *entity;
5876 const char *textStart, *textEnd;
5877 const char *next;
5878 enum XML_Error result;
5879 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5880 if (! openEntity)
5881 return XML_ERROR_UNEXPECTED_STATE;
5882
5883 entity = openEntity->entity;
5884 textStart = ((const char *)entity->textPtr) + entity->processed;
5885 textEnd = (const char *)(entity->textPtr + entity->textLen);
5886 /* Set a safe default value in case 'next' does not get set */
5887 next = textStart;
5888
5889 #ifdef XML_DTD
5890 if (entity->is_param) {
5891 int tok
5892 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5893 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5894 tok, next, &next, XML_FALSE, XML_TRUE,
5895 XML_ACCOUNT_ENTITY_EXPANSION);
5896 } else
5897 #endif /* XML_DTD */
5898 result = doContent(parser, openEntity->startTagLevel,
5899 parser->m_internalEncoding, textStart, textEnd, &next,
5900 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5901
5902 if (result != XML_ERROR_NONE)
5903 return result;
5904
5905 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5906 entity->processed = (int)(next - (const char *)entity->textPtr);
5907 return result;
5908 }
5909
5910 #if XML_GE == 1
5911 entityTrackingOnClose(parser, entity, __LINE__);
5912 #endif
5913 entity->open = XML_FALSE;
5914 parser->m_openInternalEntities = openEntity->next;
5915 /* put openEntity back in list of free instances */
5916 openEntity->next = parser->m_freeInternalEntities;
5917 parser->m_freeInternalEntities = openEntity;
5918
5919 // If there are more open entities we want to stop right here and have the
5920 // upcoming call to XML_ResumeParser continue with entity content, or it would
5921 // be ignored altogether.
5922 if (parser->m_openInternalEntities != NULL
5923 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5924 return XML_ERROR_NONE;
5925 }
5926
5927 #ifdef XML_DTD
5928 if (entity->is_param) {
5929 int tok;
5930 parser->m_processor = prologProcessor;
5931 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5932 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5933 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5934 XML_ACCOUNT_DIRECT);
5935 } else
5936 #endif /* XML_DTD */
5937 {
5938 parser->m_processor = contentProcessor;
5939 /* see externalEntityContentProcessor vs contentProcessor */
5940 result = doContent(parser, parser->m_parentParser ? 1 : 0,
5941 parser->m_encoding, s, end, nextPtr,
5942 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5943 XML_ACCOUNT_DIRECT);
5944 if (result == XML_ERROR_NONE) {
5945 if (! storeRawNames(parser))
5946 return XML_ERROR_NO_MEMORY;
5947 }
5948 return result;
5949 }
5950 }
5951
5952 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5953 errorProcessor(XML_Parser parser, const char *s, const char *end,
5954 const char **nextPtr) {
5955 UNUSED_P(s);
5956 UNUSED_P(end);
5957 UNUSED_P(nextPtr);
5958 return parser->m_errorCode;
5959 }
5960
5961 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5962 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5963 const char *ptr, const char *end, STRING_POOL *pool,
5964 enum XML_Account account) {
5965 enum XML_Error result
5966 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5967 if (result)
5968 return result;
5969 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5970 poolChop(pool);
5971 if (! poolAppendChar(pool, XML_T('\0')))
5972 return XML_ERROR_NO_MEMORY;
5973 return XML_ERROR_NONE;
5974 }
5975
5976 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5977 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5978 const char *ptr, const char *end, STRING_POOL *pool,
5979 enum XML_Account account) {
5980 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5981 #ifndef XML_DTD
5982 UNUSED_P(account);
5983 #endif
5984
5985 for (;;) {
5986 const char *next
5987 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5988 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5989 #if XML_GE == 1
5990 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5991 accountingOnAbort(parser);
5992 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5993 }
5994 #endif
5995 switch (tok) {
5996 case XML_TOK_NONE:
5997 return XML_ERROR_NONE;
5998 case XML_TOK_INVALID:
5999 if (enc == parser->m_encoding)
6000 parser->m_eventPtr = next;
6001 return XML_ERROR_INVALID_TOKEN;
6002 case XML_TOK_PARTIAL:
6003 if (enc == parser->m_encoding)
6004 parser->m_eventPtr = ptr;
6005 return XML_ERROR_INVALID_TOKEN;
6006 case XML_TOK_CHAR_REF: {
6007 XML_Char buf[XML_ENCODE_MAX];
6008 int i;
6009 int n = XmlCharRefNumber(enc, ptr);
6010 if (n < 0) {
6011 if (enc == parser->m_encoding)
6012 parser->m_eventPtr = ptr;
6013 return XML_ERROR_BAD_CHAR_REF;
6014 }
6015 if (! isCdata && n == 0x20 /* space */
6016 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6017 break;
6018 n = XmlEncode(n, (ICHAR *)buf);
6019 /* The XmlEncode() functions can never return 0 here. That
6020 * error return happens if the code point passed in is either
6021 * negative or greater than or equal to 0x110000. The
6022 * XmlCharRefNumber() functions will all return a number
6023 * strictly less than 0x110000 or a negative value if an error
6024 * occurred. The negative value is intercepted above, so
6025 * XmlEncode() is never passed a value it might return an
6026 * error for.
6027 */
6028 for (i = 0; i < n; i++) {
6029 if (! poolAppendChar(pool, buf[i]))
6030 return XML_ERROR_NO_MEMORY;
6031 }
6032 } break;
6033 case XML_TOK_DATA_CHARS:
6034 if (! poolAppend(pool, enc, ptr, next))
6035 return XML_ERROR_NO_MEMORY;
6036 break;
6037 case XML_TOK_TRAILING_CR:
6038 next = ptr + enc->minBytesPerChar;
6039 /* fall through */
6040 case XML_TOK_ATTRIBUTE_VALUE_S:
6041 case XML_TOK_DATA_NEWLINE:
6042 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6043 break;
6044 if (! poolAppendChar(pool, 0x20))
6045 return XML_ERROR_NO_MEMORY;
6046 break;
6047 case XML_TOK_ENTITY_REF: {
6048 const XML_Char *name;
6049 ENTITY *entity;
6050 char checkEntityDecl;
6051 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6052 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6053 if (ch) {
6054 #if XML_GE == 1
6055 /* NOTE: We are replacing 4-6 characters original input for 1 character
6056 * so there is no amplification and hence recording without
6057 * protection. */
6058 accountingDiffTolerated(parser, tok, (char *)&ch,
6059 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6060 XML_ACCOUNT_ENTITY_EXPANSION);
6061 #endif /* XML_GE == 1 */
6062 if (! poolAppendChar(pool, ch))
6063 return XML_ERROR_NO_MEMORY;
6064 break;
6065 }
6066 name = poolStoreString(&parser->m_temp2Pool, enc,
6067 ptr + enc->minBytesPerChar,
6068 next - enc->minBytesPerChar);
6069 if (! name)
6070 return XML_ERROR_NO_MEMORY;
6071 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6072 poolDiscard(&parser->m_temp2Pool);
6073 /* First, determine if a check for an existing declaration is needed;
6074 if yes, check that the entity exists, and that it is internal.
6075 */
6076 if (pool == &dtd->pool) /* are we called from prolog? */
6077 checkEntityDecl =
6078 #ifdef XML_DTD
6079 parser->m_prologState.documentEntity &&
6080 #endif /* XML_DTD */
6081 (dtd->standalone ? ! parser->m_openInternalEntities
6082 : ! dtd->hasParamEntityRefs);
6083 else /* if (pool == &parser->m_tempPool): we are called from content */
6084 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6085 if (checkEntityDecl) {
6086 if (! entity)
6087 return XML_ERROR_UNDEFINED_ENTITY;
6088 else if (! entity->is_internal)
6089 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6090 } else if (! entity) {
6091 /* Cannot report skipped entity here - see comments on
6092 parser->m_skippedEntityHandler.
6093 if (parser->m_skippedEntityHandler)
6094 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6095 */
6096 /* Cannot call the default handler because this would be
6097 out of sync with the call to the startElementHandler.
6098 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6099 reportDefault(parser, enc, ptr, next);
6100 */
6101 break;
6102 }
6103 if (entity->open) {
6104 if (enc == parser->m_encoding) {
6105 /* It does not appear that this line can be executed.
6106 *
6107 * The "if (entity->open)" check catches recursive entity
6108 * definitions. In order to be called with an open
6109 * entity, it must have gone through this code before and
6110 * been through the recursive call to
6111 * appendAttributeValue() some lines below. That call
6112 * sets the local encoding ("enc") to the parser's
6113 * internal encoding (internal_utf8 or internal_utf16),
6114 * which can never be the same as the principle encoding.
6115 * It doesn't appear there is another code path that gets
6116 * here with entity->open being TRUE.
6117 *
6118 * Since it is not certain that this logic is watertight,
6119 * we keep the line and merely exclude it from coverage
6120 * tests.
6121 */
6122 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6123 }
6124 return XML_ERROR_RECURSIVE_ENTITY_REF;
6125 }
6126 if (entity->notation) {
6127 if (enc == parser->m_encoding)
6128 parser->m_eventPtr = ptr;
6129 return XML_ERROR_BINARY_ENTITY_REF;
6130 }
6131 if (! entity->textPtr) {
6132 if (enc == parser->m_encoding)
6133 parser->m_eventPtr = ptr;
6134 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6135 } else {
6136 enum XML_Error result;
6137 const XML_Char *textEnd = entity->textPtr + entity->textLen;
6138 entity->open = XML_TRUE;
6139 #if XML_GE == 1
6140 entityTrackingOnOpen(parser, entity, __LINE__);
6141 #endif
6142 result = appendAttributeValue(parser, parser->m_internalEncoding,
6143 isCdata, (const char *)entity->textPtr,
6144 (const char *)textEnd, pool,
6145 XML_ACCOUNT_ENTITY_EXPANSION);
6146 #if XML_GE == 1
6147 entityTrackingOnClose(parser, entity, __LINE__);
6148 #endif
6149 entity->open = XML_FALSE;
6150 if (result)
6151 return result;
6152 }
6153 } break;
6154 default:
6155 /* The only token returned by XmlAttributeValueTok() that does
6156 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6157 * Getting that would require an entity name to contain an
6158 * incomplete XML character (e.g. \xE2\x82); however previous
6159 * tokenisers will have already recognised and rejected such
6160 * names before XmlAttributeValueTok() gets a look-in. This
6161 * default case should be retained as a safety net, but the code
6162 * excluded from coverage tests.
6163 *
6164 * LCOV_EXCL_START
6165 */
6166 if (enc == parser->m_encoding)
6167 parser->m_eventPtr = ptr;
6168 return XML_ERROR_UNEXPECTED_STATE;
6169 /* LCOV_EXCL_STOP */
6170 }
6171 ptr = next;
6172 }
6173 /* not reached */
6174 }
6175
6176 #if XML_GE == 1
6177 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6178 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6179 const char *entityTextPtr, const char *entityTextEnd,
6180 enum XML_Account account) {
6181 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6182 STRING_POOL *pool = &(dtd->entityValuePool);
6183 enum XML_Error result = XML_ERROR_NONE;
6184 # ifdef XML_DTD
6185 int oldInEntityValue = parser->m_prologState.inEntityValue;
6186 parser->m_prologState.inEntityValue = 1;
6187 # else
6188 UNUSED_P(account);
6189 # endif /* XML_DTD */
6190 /* never return Null for the value argument in EntityDeclHandler,
6191 since this would indicate an external entity; therefore we
6192 have to make sure that entityValuePool.start is not null */
6193 if (! pool->blocks) {
6194 if (! poolGrow(pool))
6195 return XML_ERROR_NO_MEMORY;
6196 }
6197
6198 for (;;) {
6199 const char *next
6200 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6201 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6202
6203 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6204 account)) {
6205 accountingOnAbort(parser);
6206 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6207 goto endEntityValue;
6208 }
6209
6210 switch (tok) {
6211 case XML_TOK_PARAM_ENTITY_REF:
6212 # ifdef XML_DTD
6213 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6214 const XML_Char *name;
6215 ENTITY *entity;
6216 name = poolStoreString(&parser->m_tempPool, enc,
6217 entityTextPtr + enc->minBytesPerChar,
6218 next - enc->minBytesPerChar);
6219 if (! name) {
6220 result = XML_ERROR_NO_MEMORY;
6221 goto endEntityValue;
6222 }
6223 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6224 poolDiscard(&parser->m_tempPool);
6225 if (! entity) {
6226 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6227 /* cannot report skipped entity here - see comments on
6228 parser->m_skippedEntityHandler
6229 if (parser->m_skippedEntityHandler)
6230 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6231 */
6232 dtd->keepProcessing = dtd->standalone;
6233 goto endEntityValue;
6234 }
6235 if (entity->open) {
6236 if (enc == parser->m_encoding)
6237 parser->m_eventPtr = entityTextPtr;
6238 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6239 goto endEntityValue;
6240 }
6241 if (entity->systemId) {
6242 if (parser->m_externalEntityRefHandler) {
6243 dtd->paramEntityRead = XML_FALSE;
6244 entity->open = XML_TRUE;
6245 entityTrackingOnOpen(parser, entity, __LINE__);
6246 if (! parser->m_externalEntityRefHandler(
6247 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6248 entity->systemId, entity->publicId)) {
6249 entityTrackingOnClose(parser, entity, __LINE__);
6250 entity->open = XML_FALSE;
6251 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6252 goto endEntityValue;
6253 }
6254 entityTrackingOnClose(parser, entity, __LINE__);
6255 entity->open = XML_FALSE;
6256 if (! dtd->paramEntityRead)
6257 dtd->keepProcessing = dtd->standalone;
6258 } else
6259 dtd->keepProcessing = dtd->standalone;
6260 } else {
6261 entity->open = XML_TRUE;
6262 entityTrackingOnOpen(parser, entity, __LINE__);
6263 result = storeEntityValue(
6264 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6265 (const char *)(entity->textPtr + entity->textLen),
6266 XML_ACCOUNT_ENTITY_EXPANSION);
6267 entityTrackingOnClose(parser, entity, __LINE__);
6268 entity->open = XML_FALSE;
6269 if (result)
6270 goto endEntityValue;
6271 }
6272 break;
6273 }
6274 # endif /* XML_DTD */
6275 /* In the internal subset, PE references are not legal
6276 within markup declarations, e.g entity values in this case. */
6277 parser->m_eventPtr = entityTextPtr;
6278 result = XML_ERROR_PARAM_ENTITY_REF;
6279 goto endEntityValue;
6280 case XML_TOK_NONE:
6281 result = XML_ERROR_NONE;
6282 goto endEntityValue;
6283 case XML_TOK_ENTITY_REF:
6284 case XML_TOK_DATA_CHARS:
6285 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6286 result = XML_ERROR_NO_MEMORY;
6287 goto endEntityValue;
6288 }
6289 break;
6290 case XML_TOK_TRAILING_CR:
6291 next = entityTextPtr + enc->minBytesPerChar;
6292 /* fall through */
6293 case XML_TOK_DATA_NEWLINE:
6294 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6295 result = XML_ERROR_NO_MEMORY;
6296 goto endEntityValue;
6297 }
6298 *(pool->ptr)++ = 0xA;
6299 break;
6300 case XML_TOK_CHAR_REF: {
6301 XML_Char buf[XML_ENCODE_MAX];
6302 int i;
6303 int n = XmlCharRefNumber(enc, entityTextPtr);
6304 if (n < 0) {
6305 if (enc == parser->m_encoding)
6306 parser->m_eventPtr = entityTextPtr;
6307 result = XML_ERROR_BAD_CHAR_REF;
6308 goto endEntityValue;
6309 }
6310 n = XmlEncode(n, (ICHAR *)buf);
6311 /* The XmlEncode() functions can never return 0 here. That
6312 * error return happens if the code point passed in is either
6313 * negative or greater than or equal to 0x110000. The
6314 * XmlCharRefNumber() functions will all return a number
6315 * strictly less than 0x110000 or a negative value if an error
6316 * occurred. The negative value is intercepted above, so
6317 * XmlEncode() is never passed a value it might return an
6318 * error for.
6319 */
6320 for (i = 0; i < n; i++) {
6321 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6322 result = XML_ERROR_NO_MEMORY;
6323 goto endEntityValue;
6324 }
6325 *(pool->ptr)++ = buf[i];
6326 }
6327 } break;
6328 case XML_TOK_PARTIAL:
6329 if (enc == parser->m_encoding)
6330 parser->m_eventPtr = entityTextPtr;
6331 result = XML_ERROR_INVALID_TOKEN;
6332 goto endEntityValue;
6333 case XML_TOK_INVALID:
6334 if (enc == parser->m_encoding)
6335 parser->m_eventPtr = next;
6336 result = XML_ERROR_INVALID_TOKEN;
6337 goto endEntityValue;
6338 default:
6339 /* This default case should be unnecessary -- all the tokens
6340 * that XmlEntityValueTok() can return have their own explicit
6341 * cases -- but should be retained for safety. We do however
6342 * exclude it from the coverage statistics.
6343 *
6344 * LCOV_EXCL_START
6345 */
6346 if (enc == parser->m_encoding)
6347 parser->m_eventPtr = entityTextPtr;
6348 result = XML_ERROR_UNEXPECTED_STATE;
6349 goto endEntityValue;
6350 /* LCOV_EXCL_STOP */
6351 }
6352 entityTextPtr = next;
6353 }
6354 endEntityValue:
6355 # ifdef XML_DTD
6356 parser->m_prologState.inEntityValue = oldInEntityValue;
6357 # endif /* XML_DTD */
6358 return result;
6359 }
6360
6361 #else /* XML_GE == 0 */
6362
6363 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6364 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6365 // This will store "&entity123;" in entity->textPtr
6366 // to end up as "&entity123;" in the handler.
6367 const char *const entity_start = "&";
6368 const char *const entity_end = ";";
6369
6370 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6371 if (! poolAppendString(pool, entity_start)
6372 || ! poolAppendString(pool, entity->name)
6373 || ! poolAppendString(pool, entity_end)) {
6374 poolDiscard(pool);
6375 return XML_ERROR_NO_MEMORY;
6376 }
6377
6378 entity->textPtr = poolStart(pool);
6379 entity->textLen = (int)(poolLength(pool));
6380 poolFinish(pool);
6381
6382 return XML_ERROR_NONE;
6383 }
6384
6385 #endif /* XML_GE == 0 */
6386
6387 static void FASTCALL
normalizeLines(XML_Char * s)6388 normalizeLines(XML_Char *s) {
6389 XML_Char *p;
6390 for (;; s++) {
6391 if (*s == XML_T('\0'))
6392 return;
6393 if (*s == 0xD)
6394 break;
6395 }
6396 p = s;
6397 do {
6398 if (*s == 0xD) {
6399 *p++ = 0xA;
6400 if (*++s == 0xA)
6401 s++;
6402 } else
6403 *p++ = *s++;
6404 } while (*s);
6405 *p = XML_T('\0');
6406 }
6407
6408 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6409 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6410 const char *start, const char *end) {
6411 const XML_Char *target;
6412 XML_Char *data;
6413 const char *tem;
6414 if (! parser->m_processingInstructionHandler) {
6415 if (parser->m_defaultHandler)
6416 reportDefault(parser, enc, start, end);
6417 return 1;
6418 }
6419 start += enc->minBytesPerChar * 2;
6420 tem = start + XmlNameLength(enc, start);
6421 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6422 if (! target)
6423 return 0;
6424 poolFinish(&parser->m_tempPool);
6425 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6426 end - enc->minBytesPerChar * 2);
6427 if (! data)
6428 return 0;
6429 normalizeLines(data);
6430 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6431 poolClear(&parser->m_tempPool);
6432 return 1;
6433 }
6434
6435 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6436 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6437 const char *end) {
6438 XML_Char *data;
6439 if (! parser->m_commentHandler) {
6440 if (parser->m_defaultHandler)
6441 reportDefault(parser, enc, start, end);
6442 return 1;
6443 }
6444 data = poolStoreString(&parser->m_tempPool, enc,
6445 start + enc->minBytesPerChar * 4,
6446 end - enc->minBytesPerChar * 3);
6447 if (! data)
6448 return 0;
6449 normalizeLines(data);
6450 parser->m_commentHandler(parser->m_handlerArg, data);
6451 poolClear(&parser->m_tempPool);
6452 return 1;
6453 }
6454
6455 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6456 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6457 const char *end) {
6458 if (MUST_CONVERT(enc, s)) {
6459 enum XML_Convert_Result convert_res;
6460 const char **eventPP;
6461 const char **eventEndPP;
6462 if (enc == parser->m_encoding) {
6463 eventPP = &parser->m_eventPtr;
6464 eventEndPP = &parser->m_eventEndPtr;
6465 } else {
6466 /* To get here, two things must be true; the parser must be
6467 * using a character encoding that is not the same as the
6468 * encoding passed in, and the encoding passed in must need
6469 * conversion to the internal format (UTF-8 unless XML_UNICODE
6470 * is defined). The only occasions on which the encoding passed
6471 * in is not the same as the parser's encoding are when it is
6472 * the internal encoding (e.g. a previously defined parameter
6473 * entity, already converted to internal format). This by
6474 * definition doesn't need conversion, so the whole branch never
6475 * gets executed.
6476 *
6477 * For safety's sake we don't delete these lines and merely
6478 * exclude them from coverage statistics.
6479 *
6480 * LCOV_EXCL_START
6481 */
6482 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6483 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6484 /* LCOV_EXCL_STOP */
6485 }
6486 do {
6487 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6488 convert_res
6489 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6490 *eventEndPP = s;
6491 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6492 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6493 *eventPP = s;
6494 } while ((convert_res != XML_CONVERT_COMPLETED)
6495 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6496 } else
6497 parser->m_defaultHandler(
6498 parser->m_handlerArg, (const XML_Char *)s,
6499 (int)((const XML_Char *)end - (const XML_Char *)s));
6500 }
6501
6502 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6503 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6504 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6505 DEFAULT_ATTRIBUTE *att;
6506 if (value || isId) {
6507 /* The handling of default attributes gets messed up if we have
6508 a default which duplicates a non-default. */
6509 int i;
6510 for (i = 0; i < type->nDefaultAtts; i++)
6511 if (attId == type->defaultAtts[i].id)
6512 return 1;
6513 if (isId && ! type->idAtt && ! attId->xmlns)
6514 type->idAtt = attId;
6515 }
6516 if (type->nDefaultAtts == type->allocDefaultAtts) {
6517 if (type->allocDefaultAtts == 0) {
6518 type->allocDefaultAtts = 8;
6519 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6520 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6521 if (! type->defaultAtts) {
6522 type->allocDefaultAtts = 0;
6523 return 0;
6524 }
6525 } else {
6526 DEFAULT_ATTRIBUTE *temp;
6527
6528 /* Detect and prevent integer overflow */
6529 if (type->allocDefaultAtts > INT_MAX / 2) {
6530 return 0;
6531 }
6532
6533 int count = type->allocDefaultAtts * 2;
6534
6535 /* Detect and prevent integer overflow.
6536 * The preprocessor guard addresses the "always false" warning
6537 * from -Wtype-limits on platforms where
6538 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6539 #if UINT_MAX >= SIZE_MAX
6540 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6541 return 0;
6542 }
6543 #endif
6544
6545 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6546 (count * sizeof(DEFAULT_ATTRIBUTE)));
6547 if (temp == NULL)
6548 return 0;
6549 type->allocDefaultAtts = count;
6550 type->defaultAtts = temp;
6551 }
6552 }
6553 att = type->defaultAtts + type->nDefaultAtts;
6554 att->id = attId;
6555 att->value = value;
6556 att->isCdata = isCdata;
6557 if (! isCdata)
6558 attId->maybeTokenized = XML_TRUE;
6559 type->nDefaultAtts += 1;
6560 return 1;
6561 }
6562
6563 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6564 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6565 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6566 const XML_Char *name;
6567 for (name = elementType->name; *name; name++) {
6568 if (*name == XML_T(ASCII_COLON)) {
6569 PREFIX *prefix;
6570 const XML_Char *s;
6571 for (s = elementType->name; s != name; s++) {
6572 if (! poolAppendChar(&dtd->pool, *s))
6573 return 0;
6574 }
6575 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6576 return 0;
6577 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6578 sizeof(PREFIX));
6579 if (! prefix)
6580 return 0;
6581 if (prefix->name == poolStart(&dtd->pool))
6582 poolFinish(&dtd->pool);
6583 else
6584 poolDiscard(&dtd->pool);
6585 elementType->prefix = prefix;
6586 break;
6587 }
6588 }
6589 return 1;
6590 }
6591
6592 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6593 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6594 const char *end) {
6595 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6596 ATTRIBUTE_ID *id;
6597 const XML_Char *name;
6598 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6599 return NULL;
6600 name = poolStoreString(&dtd->pool, enc, start, end);
6601 if (! name)
6602 return NULL;
6603 /* skip quotation mark - its storage will be reused (like in name[-1]) */
6604 ++name;
6605 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6606 sizeof(ATTRIBUTE_ID));
6607 if (! id)
6608 return NULL;
6609 if (id->name != name)
6610 poolDiscard(&dtd->pool);
6611 else {
6612 poolFinish(&dtd->pool);
6613 if (! parser->m_ns)
6614 ;
6615 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6616 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6617 && name[4] == XML_T(ASCII_s)
6618 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6619 if (name[5] == XML_T('\0'))
6620 id->prefix = &dtd->defaultPrefix;
6621 else
6622 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6623 sizeof(PREFIX));
6624 id->xmlns = XML_TRUE;
6625 } else {
6626 int i;
6627 for (i = 0; name[i]; i++) {
6628 /* attributes without prefix are *not* in the default namespace */
6629 if (name[i] == XML_T(ASCII_COLON)) {
6630 int j;
6631 for (j = 0; j < i; j++) {
6632 if (! poolAppendChar(&dtd->pool, name[j]))
6633 return NULL;
6634 }
6635 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6636 return NULL;
6637 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6638 poolStart(&dtd->pool), sizeof(PREFIX));
6639 if (! id->prefix)
6640 return NULL;
6641 if (id->prefix->name == poolStart(&dtd->pool))
6642 poolFinish(&dtd->pool);
6643 else
6644 poolDiscard(&dtd->pool);
6645 break;
6646 }
6647 }
6648 }
6649 }
6650 return id;
6651 }
6652
6653 #define CONTEXT_SEP XML_T(ASCII_FF)
6654
6655 static const XML_Char *
getContext(XML_Parser parser)6656 getContext(XML_Parser parser) {
6657 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6658 HASH_TABLE_ITER iter;
6659 XML_Bool needSep = XML_FALSE;
6660
6661 if (dtd->defaultPrefix.binding) {
6662 int i;
6663 int len;
6664 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6665 return NULL;
6666 len = dtd->defaultPrefix.binding->uriLen;
6667 if (parser->m_namespaceSeparator)
6668 len--;
6669 for (i = 0; i < len; i++) {
6670 if (! poolAppendChar(&parser->m_tempPool,
6671 dtd->defaultPrefix.binding->uri[i])) {
6672 /* Because of memory caching, I don't believe this line can be
6673 * executed.
6674 *
6675 * This is part of a loop copying the default prefix binding
6676 * URI into the parser's temporary string pool. Previously,
6677 * that URI was copied into the same string pool, with a
6678 * terminating NUL character, as part of setContext(). When
6679 * the pool was cleared, that leaves a block definitely big
6680 * enough to hold the URI on the free block list of the pool.
6681 * The URI copy in getContext() therefore cannot run out of
6682 * memory.
6683 *
6684 * If the pool is used between the setContext() and
6685 * getContext() calls, the worst it can do is leave a bigger
6686 * block on the front of the free list. Given that this is
6687 * all somewhat inobvious and program logic can be changed, we
6688 * don't delete the line but we do exclude it from the test
6689 * coverage statistics.
6690 */
6691 return NULL; /* LCOV_EXCL_LINE */
6692 }
6693 }
6694 needSep = XML_TRUE;
6695 }
6696
6697 hashTableIterInit(&iter, &(dtd->prefixes));
6698 for (;;) {
6699 int i;
6700 int len;
6701 const XML_Char *s;
6702 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6703 if (! prefix)
6704 break;
6705 if (! prefix->binding) {
6706 /* This test appears to be (justifiable) paranoia. There does
6707 * not seem to be a way of injecting a prefix without a binding
6708 * that doesn't get errored long before this function is called.
6709 * The test should remain for safety's sake, so we instead
6710 * exclude the following line from the coverage statistics.
6711 */
6712 continue; /* LCOV_EXCL_LINE */
6713 }
6714 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6715 return NULL;
6716 for (s = prefix->name; *s; s++)
6717 if (! poolAppendChar(&parser->m_tempPool, *s))
6718 return NULL;
6719 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6720 return NULL;
6721 len = prefix->binding->uriLen;
6722 if (parser->m_namespaceSeparator)
6723 len--;
6724 for (i = 0; i < len; i++)
6725 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6726 return NULL;
6727 needSep = XML_TRUE;
6728 }
6729
6730 hashTableIterInit(&iter, &(dtd->generalEntities));
6731 for (;;) {
6732 const XML_Char *s;
6733 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6734 if (! e)
6735 break;
6736 if (! e->open)
6737 continue;
6738 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6739 return NULL;
6740 for (s = e->name; *s; s++)
6741 if (! poolAppendChar(&parser->m_tempPool, *s))
6742 return 0;
6743 needSep = XML_TRUE;
6744 }
6745
6746 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6747 return NULL;
6748 return parser->m_tempPool.start;
6749 }
6750
6751 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6752 setContext(XML_Parser parser, const XML_Char *context) {
6753 if (context == NULL) {
6754 return XML_FALSE;
6755 }
6756
6757 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6758 const XML_Char *s = context;
6759
6760 while (*context != XML_T('\0')) {
6761 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6762 ENTITY *e;
6763 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6764 return XML_FALSE;
6765 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6766 poolStart(&parser->m_tempPool), 0);
6767 if (e)
6768 e->open = XML_TRUE;
6769 if (*s != XML_T('\0'))
6770 s++;
6771 context = s;
6772 poolDiscard(&parser->m_tempPool);
6773 } else if (*s == XML_T(ASCII_EQUALS)) {
6774 PREFIX *prefix;
6775 if (poolLength(&parser->m_tempPool) == 0)
6776 prefix = &dtd->defaultPrefix;
6777 else {
6778 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6779 return XML_FALSE;
6780 prefix
6781 = (PREFIX *)lookup(parser, &dtd->prefixes,
6782 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6783 if (! prefix)
6784 return XML_FALSE;
6785 if (prefix->name == poolStart(&parser->m_tempPool)) {
6786 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6787 if (! prefix->name)
6788 return XML_FALSE;
6789 }
6790 poolDiscard(&parser->m_tempPool);
6791 }
6792 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6793 context++)
6794 if (! poolAppendChar(&parser->m_tempPool, *context))
6795 return XML_FALSE;
6796 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6797 return XML_FALSE;
6798 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6799 &parser->m_inheritedBindings)
6800 != XML_ERROR_NONE)
6801 return XML_FALSE;
6802 poolDiscard(&parser->m_tempPool);
6803 if (*context != XML_T('\0'))
6804 ++context;
6805 s = context;
6806 } else {
6807 if (! poolAppendChar(&parser->m_tempPool, *s))
6808 return XML_FALSE;
6809 s++;
6810 }
6811 }
6812 return XML_TRUE;
6813 }
6814
6815 static void FASTCALL
normalizePublicId(XML_Char * publicId)6816 normalizePublicId(XML_Char *publicId) {
6817 XML_Char *p = publicId;
6818 XML_Char *s;
6819 for (s = publicId; *s; s++) {
6820 switch (*s) {
6821 case 0x20:
6822 case 0xD:
6823 case 0xA:
6824 if (p != publicId && p[-1] != 0x20)
6825 *p++ = 0x20;
6826 break;
6827 default:
6828 *p++ = *s;
6829 }
6830 }
6831 if (p != publicId && p[-1] == 0x20)
6832 --p;
6833 *p = XML_T('\0');
6834 }
6835
6836 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6837 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6838 DTD *p = ms->malloc_fcn(sizeof(DTD));
6839 if (p == NULL)
6840 return p;
6841 poolInit(&(p->pool), ms);
6842 poolInit(&(p->entityValuePool), ms);
6843 hashTableInit(&(p->generalEntities), ms);
6844 hashTableInit(&(p->elementTypes), ms);
6845 hashTableInit(&(p->attributeIds), ms);
6846 hashTableInit(&(p->prefixes), ms);
6847 #ifdef XML_DTD
6848 p->paramEntityRead = XML_FALSE;
6849 hashTableInit(&(p->paramEntities), ms);
6850 #endif /* XML_DTD */
6851 p->defaultPrefix.name = NULL;
6852 p->defaultPrefix.binding = NULL;
6853
6854 p->in_eldecl = XML_FALSE;
6855 p->scaffIndex = NULL;
6856 p->scaffold = NULL;
6857 p->scaffLevel = 0;
6858 p->scaffSize = 0;
6859 p->scaffCount = 0;
6860 p->contentStringLen = 0;
6861
6862 p->keepProcessing = XML_TRUE;
6863 p->hasParamEntityRefs = XML_FALSE;
6864 p->standalone = XML_FALSE;
6865 return p;
6866 }
6867
6868 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6869 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6870 HASH_TABLE_ITER iter;
6871 hashTableIterInit(&iter, &(p->elementTypes));
6872 for (;;) {
6873 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6874 if (! e)
6875 break;
6876 if (e->allocDefaultAtts != 0)
6877 ms->free_fcn(e->defaultAtts);
6878 }
6879 hashTableClear(&(p->generalEntities));
6880 #ifdef XML_DTD
6881 p->paramEntityRead = XML_FALSE;
6882 hashTableClear(&(p->paramEntities));
6883 #endif /* XML_DTD */
6884 hashTableClear(&(p->elementTypes));
6885 hashTableClear(&(p->attributeIds));
6886 hashTableClear(&(p->prefixes));
6887 poolClear(&(p->pool));
6888 poolClear(&(p->entityValuePool));
6889 p->defaultPrefix.name = NULL;
6890 p->defaultPrefix.binding = NULL;
6891
6892 p->in_eldecl = XML_FALSE;
6893
6894 ms->free_fcn(p->scaffIndex);
6895 p->scaffIndex = NULL;
6896 ms->free_fcn(p->scaffold);
6897 p->scaffold = NULL;
6898
6899 p->scaffLevel = 0;
6900 p->scaffSize = 0;
6901 p->scaffCount = 0;
6902 p->contentStringLen = 0;
6903
6904 p->keepProcessing = XML_TRUE;
6905 p->hasParamEntityRefs = XML_FALSE;
6906 p->standalone = XML_FALSE;
6907 }
6908
6909 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6910 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6911 HASH_TABLE_ITER iter;
6912 hashTableIterInit(&iter, &(p->elementTypes));
6913 for (;;) {
6914 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6915 if (! e)
6916 break;
6917 if (e->allocDefaultAtts != 0)
6918 ms->free_fcn(e->defaultAtts);
6919 }
6920 hashTableDestroy(&(p->generalEntities));
6921 #ifdef XML_DTD
6922 hashTableDestroy(&(p->paramEntities));
6923 #endif /* XML_DTD */
6924 hashTableDestroy(&(p->elementTypes));
6925 hashTableDestroy(&(p->attributeIds));
6926 hashTableDestroy(&(p->prefixes));
6927 poolDestroy(&(p->pool));
6928 poolDestroy(&(p->entityValuePool));
6929 if (isDocEntity) {
6930 ms->free_fcn(p->scaffIndex);
6931 ms->free_fcn(p->scaffold);
6932 }
6933 ms->free_fcn(p);
6934 }
6935
6936 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6937 The new DTD has already been initialized.
6938 */
6939 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6940 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6941 const XML_Memory_Handling_Suite *ms) {
6942 HASH_TABLE_ITER iter;
6943
6944 /* Copy the prefix table. */
6945
6946 hashTableIterInit(&iter, &(oldDtd->prefixes));
6947 for (;;) {
6948 const XML_Char *name;
6949 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6950 if (! oldP)
6951 break;
6952 name = poolCopyString(&(newDtd->pool), oldP->name);
6953 if (! name)
6954 return 0;
6955 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6956 return 0;
6957 }
6958
6959 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6960
6961 /* Copy the attribute id table. */
6962
6963 for (;;) {
6964 ATTRIBUTE_ID *newA;
6965 const XML_Char *name;
6966 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6967
6968 if (! oldA)
6969 break;
6970 /* Remember to allocate the scratch byte before the name. */
6971 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6972 return 0;
6973 name = poolCopyString(&(newDtd->pool), oldA->name);
6974 if (! name)
6975 return 0;
6976 ++name;
6977 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6978 sizeof(ATTRIBUTE_ID));
6979 if (! newA)
6980 return 0;
6981 newA->maybeTokenized = oldA->maybeTokenized;
6982 if (oldA->prefix) {
6983 newA->xmlns = oldA->xmlns;
6984 if (oldA->prefix == &oldDtd->defaultPrefix)
6985 newA->prefix = &newDtd->defaultPrefix;
6986 else
6987 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6988 oldA->prefix->name, 0);
6989 }
6990 }
6991
6992 /* Copy the element type table. */
6993
6994 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6995
6996 for (;;) {
6997 int i;
6998 ELEMENT_TYPE *newE;
6999 const XML_Char *name;
7000 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7001 if (! oldE)
7002 break;
7003 name = poolCopyString(&(newDtd->pool), oldE->name);
7004 if (! name)
7005 return 0;
7006 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7007 sizeof(ELEMENT_TYPE));
7008 if (! newE)
7009 return 0;
7010 if (oldE->nDefaultAtts) {
7011 newE->defaultAtts
7012 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7013 if (! newE->defaultAtts) {
7014 return 0;
7015 }
7016 }
7017 if (oldE->idAtt)
7018 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7019 oldE->idAtt->name, 0);
7020 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7021 if (oldE->prefix)
7022 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7023 oldE->prefix->name, 0);
7024 for (i = 0; i < newE->nDefaultAtts; i++) {
7025 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7026 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7027 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7028 if (oldE->defaultAtts[i].value) {
7029 newE->defaultAtts[i].value
7030 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7031 if (! newE->defaultAtts[i].value)
7032 return 0;
7033 } else
7034 newE->defaultAtts[i].value = NULL;
7035 }
7036 }
7037
7038 /* Copy the entity tables. */
7039 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7040 &(oldDtd->generalEntities)))
7041 return 0;
7042
7043 #ifdef XML_DTD
7044 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7045 &(oldDtd->paramEntities)))
7046 return 0;
7047 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7048 #endif /* XML_DTD */
7049
7050 newDtd->keepProcessing = oldDtd->keepProcessing;
7051 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7052 newDtd->standalone = oldDtd->standalone;
7053
7054 /* Don't want deep copying for scaffolding */
7055 newDtd->in_eldecl = oldDtd->in_eldecl;
7056 newDtd->scaffold = oldDtd->scaffold;
7057 newDtd->contentStringLen = oldDtd->contentStringLen;
7058 newDtd->scaffSize = oldDtd->scaffSize;
7059 newDtd->scaffLevel = oldDtd->scaffLevel;
7060 newDtd->scaffIndex = oldDtd->scaffIndex;
7061
7062 return 1;
7063 } /* End dtdCopy */
7064
7065 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7066 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7067 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7068 HASH_TABLE_ITER iter;
7069 const XML_Char *cachedOldBase = NULL;
7070 const XML_Char *cachedNewBase = NULL;
7071
7072 hashTableIterInit(&iter, oldTable);
7073
7074 for (;;) {
7075 ENTITY *newE;
7076 const XML_Char *name;
7077 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7078 if (! oldE)
7079 break;
7080 name = poolCopyString(newPool, oldE->name);
7081 if (! name)
7082 return 0;
7083 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7084 if (! newE)
7085 return 0;
7086 if (oldE->systemId) {
7087 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7088 if (! tem)
7089 return 0;
7090 newE->systemId = tem;
7091 if (oldE->base) {
7092 if (oldE->base == cachedOldBase)
7093 newE->base = cachedNewBase;
7094 else {
7095 cachedOldBase = oldE->base;
7096 tem = poolCopyString(newPool, cachedOldBase);
7097 if (! tem)
7098 return 0;
7099 cachedNewBase = newE->base = tem;
7100 }
7101 }
7102 if (oldE->publicId) {
7103 tem = poolCopyString(newPool, oldE->publicId);
7104 if (! tem)
7105 return 0;
7106 newE->publicId = tem;
7107 }
7108 } else {
7109 const XML_Char *tem
7110 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7111 if (! tem)
7112 return 0;
7113 newE->textPtr = tem;
7114 newE->textLen = oldE->textLen;
7115 }
7116 if (oldE->notation) {
7117 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7118 if (! tem)
7119 return 0;
7120 newE->notation = tem;
7121 }
7122 newE->is_param = oldE->is_param;
7123 newE->is_internal = oldE->is_internal;
7124 }
7125 return 1;
7126 }
7127
7128 #define INIT_POWER 6
7129
7130 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7131 keyeq(KEY s1, KEY s2) {
7132 for (; *s1 == *s2; s1++, s2++)
7133 if (*s1 == 0)
7134 return XML_TRUE;
7135 return XML_FALSE;
7136 }
7137
7138 static size_t
keylen(KEY s)7139 keylen(KEY s) {
7140 size_t len = 0;
7141 for (; *s; s++, len++)
7142 ;
7143 return len;
7144 }
7145
7146 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7147 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7148 key->k[0] = 0;
7149 key->k[1] = get_hash_secret_salt(parser);
7150 }
7151
7152 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7153 hash(XML_Parser parser, KEY s) {
7154 struct siphash state;
7155 struct sipkey key;
7156 (void)sip24_valid;
7157 copy_salt_to_sipkey(parser, &key);
7158 sip24_init(&state, &key);
7159 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7160 return (unsigned long)sip24_final(&state);
7161 }
7162
7163 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7164 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7165 size_t i;
7166 if (table->size == 0) {
7167 size_t tsize;
7168 if (! createSize)
7169 return NULL;
7170 table->power = INIT_POWER;
7171 /* table->size is a power of 2 */
7172 table->size = (size_t)1 << INIT_POWER;
7173 tsize = table->size * sizeof(NAMED *);
7174 table->v = table->mem->malloc_fcn(tsize);
7175 if (! table->v) {
7176 table->size = 0;
7177 return NULL;
7178 }
7179 memset(table->v, 0, tsize);
7180 i = hash(parser, name) & ((unsigned long)table->size - 1);
7181 } else {
7182 unsigned long h = hash(parser, name);
7183 unsigned long mask = (unsigned long)table->size - 1;
7184 unsigned char step = 0;
7185 i = h & mask;
7186 while (table->v[i]) {
7187 if (keyeq(name, table->v[i]->name))
7188 return table->v[i];
7189 if (! step)
7190 step = PROBE_STEP(h, mask, table->power);
7191 i < step ? (i += table->size - step) : (i -= step);
7192 }
7193 if (! createSize)
7194 return NULL;
7195
7196 /* check for overflow (table is half full) */
7197 if (table->used >> (table->power - 1)) {
7198 unsigned char newPower = table->power + 1;
7199
7200 /* Detect and prevent invalid shift */
7201 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7202 return NULL;
7203 }
7204
7205 size_t newSize = (size_t)1 << newPower;
7206 unsigned long newMask = (unsigned long)newSize - 1;
7207
7208 /* Detect and prevent integer overflow */
7209 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7210 return NULL;
7211 }
7212
7213 size_t tsize = newSize * sizeof(NAMED *);
7214 NAMED **newV = table->mem->malloc_fcn(tsize);
7215 if (! newV)
7216 return NULL;
7217 memset(newV, 0, tsize);
7218 for (i = 0; i < table->size; i++)
7219 if (table->v[i]) {
7220 unsigned long newHash = hash(parser, table->v[i]->name);
7221 size_t j = newHash & newMask;
7222 step = 0;
7223 while (newV[j]) {
7224 if (! step)
7225 step = PROBE_STEP(newHash, newMask, newPower);
7226 j < step ? (j += newSize - step) : (j -= step);
7227 }
7228 newV[j] = table->v[i];
7229 }
7230 table->mem->free_fcn(table->v);
7231 table->v = newV;
7232 table->power = newPower;
7233 table->size = newSize;
7234 i = h & newMask;
7235 step = 0;
7236 while (table->v[i]) {
7237 if (! step)
7238 step = PROBE_STEP(h, newMask, newPower);
7239 i < step ? (i += newSize - step) : (i -= step);
7240 }
7241 }
7242 }
7243 table->v[i] = table->mem->malloc_fcn(createSize);
7244 if (! table->v[i])
7245 return NULL;
7246 memset(table->v[i], 0, createSize);
7247 table->v[i]->name = name;
7248 (table->used)++;
7249 return table->v[i];
7250 }
7251
7252 static void FASTCALL
hashTableClear(HASH_TABLE * table)7253 hashTableClear(HASH_TABLE *table) {
7254 size_t i;
7255 for (i = 0; i < table->size; i++) {
7256 table->mem->free_fcn(table->v[i]);
7257 table->v[i] = NULL;
7258 }
7259 table->used = 0;
7260 }
7261
7262 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7263 hashTableDestroy(HASH_TABLE *table) {
7264 size_t i;
7265 for (i = 0; i < table->size; i++)
7266 table->mem->free_fcn(table->v[i]);
7267 table->mem->free_fcn(table->v);
7268 }
7269
7270 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7271 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7272 p->power = 0;
7273 p->size = 0;
7274 p->used = 0;
7275 p->v = NULL;
7276 p->mem = ms;
7277 }
7278
7279 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7280 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7281 iter->p = table->v;
7282 iter->end = iter->p ? iter->p + table->size : NULL;
7283 }
7284
7285 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7286 hashTableIterNext(HASH_TABLE_ITER *iter) {
7287 while (iter->p != iter->end) {
7288 NAMED *tem = *(iter->p)++;
7289 if (tem)
7290 return tem;
7291 }
7292 return NULL;
7293 }
7294
7295 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7296 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7297 pool->blocks = NULL;
7298 pool->freeBlocks = NULL;
7299 pool->start = NULL;
7300 pool->ptr = NULL;
7301 pool->end = NULL;
7302 pool->mem = ms;
7303 }
7304
7305 static void FASTCALL
poolClear(STRING_POOL * pool)7306 poolClear(STRING_POOL *pool) {
7307 if (! pool->freeBlocks)
7308 pool->freeBlocks = pool->blocks;
7309 else {
7310 BLOCK *p = pool->blocks;
7311 while (p) {
7312 BLOCK *tem = p->next;
7313 p->next = pool->freeBlocks;
7314 pool->freeBlocks = p;
7315 p = tem;
7316 }
7317 }
7318 pool->blocks = NULL;
7319 pool->start = NULL;
7320 pool->ptr = NULL;
7321 pool->end = NULL;
7322 }
7323
7324 static void FASTCALL
poolDestroy(STRING_POOL * pool)7325 poolDestroy(STRING_POOL *pool) {
7326 BLOCK *p = pool->blocks;
7327 while (p) {
7328 BLOCK *tem = p->next;
7329 pool->mem->free_fcn(p);
7330 p = tem;
7331 }
7332 p = pool->freeBlocks;
7333 while (p) {
7334 BLOCK *tem = p->next;
7335 pool->mem->free_fcn(p);
7336 p = tem;
7337 }
7338 }
7339
7340 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7341 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7342 const char *end) {
7343 if (! pool->ptr && ! poolGrow(pool))
7344 return NULL;
7345 for (;;) {
7346 const enum XML_Convert_Result convert_res = XmlConvert(
7347 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7348 if ((convert_res == XML_CONVERT_COMPLETED)
7349 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7350 break;
7351 if (! poolGrow(pool))
7352 return NULL;
7353 }
7354 return pool->start;
7355 }
7356
7357 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7358 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7359 do {
7360 if (! poolAppendChar(pool, *s))
7361 return NULL;
7362 } while (*s++);
7363 s = pool->start;
7364 poolFinish(pool);
7365 return s;
7366 }
7367
7368 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7369 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7370 if (! pool->ptr && ! poolGrow(pool)) {
7371 /* The following line is unreachable given the current usage of
7372 * poolCopyStringN(). Currently it is called from exactly one
7373 * place to copy the text of a simple general entity. By that
7374 * point, the name of the entity is already stored in the pool, so
7375 * pool->ptr cannot be NULL.
7376 *
7377 * If poolCopyStringN() is used elsewhere as it well might be,
7378 * this line may well become executable again. Regardless, this
7379 * sort of check shouldn't be removed lightly, so we just exclude
7380 * it from the coverage statistics.
7381 */
7382 return NULL; /* LCOV_EXCL_LINE */
7383 }
7384 for (; n > 0; --n, s++) {
7385 if (! poolAppendChar(pool, *s))
7386 return NULL;
7387 }
7388 s = pool->start;
7389 poolFinish(pool);
7390 return s;
7391 }
7392
7393 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7394 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7395 while (*s) {
7396 if (! poolAppendChar(pool, *s))
7397 return NULL;
7398 s++;
7399 }
7400 return pool->start;
7401 }
7402
7403 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7404 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7405 const char *end) {
7406 if (! poolAppend(pool, enc, ptr, end))
7407 return NULL;
7408 if (pool->ptr == pool->end && ! poolGrow(pool))
7409 return NULL;
7410 *(pool->ptr)++ = 0;
7411 return pool->start;
7412 }
7413
7414 static size_t
poolBytesToAllocateFor(int blockSize)7415 poolBytesToAllocateFor(int blockSize) {
7416 /* Unprotected math would be:
7417 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7418 **
7419 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7420 ** For a + b * c we check b * c in isolation first, so that addition of a
7421 ** on top has no chance of making us accept a small non-negative number
7422 */
7423 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7424
7425 if (blockSize <= 0)
7426 return 0;
7427
7428 if (blockSize > (int)(INT_MAX / stretch))
7429 return 0;
7430
7431 {
7432 const int stretchedBlockSize = blockSize * (int)stretch;
7433 const int bytesToAllocate
7434 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7435 if (bytesToAllocate < 0)
7436 return 0;
7437
7438 return (size_t)bytesToAllocate;
7439 }
7440 }
7441
7442 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7443 poolGrow(STRING_POOL *pool) {
7444 if (pool->freeBlocks) {
7445 if (pool->start == 0) {
7446 pool->blocks = pool->freeBlocks;
7447 pool->freeBlocks = pool->freeBlocks->next;
7448 pool->blocks->next = NULL;
7449 pool->start = pool->blocks->s;
7450 pool->end = pool->start + pool->blocks->size;
7451 pool->ptr = pool->start;
7452 return XML_TRUE;
7453 }
7454 if (pool->end - pool->start < pool->freeBlocks->size) {
7455 BLOCK *tem = pool->freeBlocks->next;
7456 pool->freeBlocks->next = pool->blocks;
7457 pool->blocks = pool->freeBlocks;
7458 pool->freeBlocks = tem;
7459 memcpy(pool->blocks->s, pool->start,
7460 (pool->end - pool->start) * sizeof(XML_Char));
7461 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7462 pool->start = pool->blocks->s;
7463 pool->end = pool->start + pool->blocks->size;
7464 return XML_TRUE;
7465 }
7466 }
7467 if (pool->blocks && pool->start == pool->blocks->s) {
7468 BLOCK *temp;
7469 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7470 size_t bytesToAllocate;
7471
7472 /* NOTE: Needs to be calculated prior to calling `realloc`
7473 to avoid dangling pointers: */
7474 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7475
7476 if (blockSize < 0) {
7477 /* This condition traps a situation where either more than
7478 * INT_MAX/2 bytes have already been allocated. This isn't
7479 * readily testable, since it is unlikely that an average
7480 * machine will have that much memory, so we exclude it from the
7481 * coverage statistics.
7482 */
7483 return XML_FALSE; /* LCOV_EXCL_LINE */
7484 }
7485
7486 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7487 if (bytesToAllocate == 0)
7488 return XML_FALSE;
7489
7490 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7491 (unsigned)bytesToAllocate);
7492 if (temp == NULL)
7493 return XML_FALSE;
7494 pool->blocks = temp;
7495 pool->blocks->size = blockSize;
7496 pool->ptr = pool->blocks->s + offsetInsideBlock;
7497 pool->start = pool->blocks->s;
7498 pool->end = pool->start + blockSize;
7499 } else {
7500 BLOCK *tem;
7501 int blockSize = (int)(pool->end - pool->start);
7502 size_t bytesToAllocate;
7503
7504 if (blockSize < 0) {
7505 /* This condition traps a situation where either more than
7506 * INT_MAX bytes have already been allocated (which is prevented
7507 * by various pieces of program logic, not least this one, never
7508 * mind the unlikelihood of actually having that much memory) or
7509 * the pool control fields have been corrupted (which could
7510 * conceivably happen in an extremely buggy user handler
7511 * function). Either way it isn't readily testable, so we
7512 * exclude it from the coverage statistics.
7513 */
7514 return XML_FALSE; /* LCOV_EXCL_LINE */
7515 }
7516
7517 if (blockSize < INIT_BLOCK_SIZE)
7518 blockSize = INIT_BLOCK_SIZE;
7519 else {
7520 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7521 if ((int)((unsigned)blockSize * 2U) < 0) {
7522 return XML_FALSE;
7523 }
7524 blockSize *= 2;
7525 }
7526
7527 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7528 if (bytesToAllocate == 0)
7529 return XML_FALSE;
7530
7531 tem = pool->mem->malloc_fcn(bytesToAllocate);
7532 if (! tem)
7533 return XML_FALSE;
7534 tem->size = blockSize;
7535 tem->next = pool->blocks;
7536 pool->blocks = tem;
7537 if (pool->ptr != pool->start)
7538 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7539 pool->ptr = tem->s + (pool->ptr - pool->start);
7540 pool->start = tem->s;
7541 pool->end = tem->s + blockSize;
7542 }
7543 return XML_TRUE;
7544 }
7545
7546 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7547 nextScaffoldPart(XML_Parser parser) {
7548 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7549 CONTENT_SCAFFOLD *me;
7550 int next;
7551
7552 if (! dtd->scaffIndex) {
7553 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7554 if (! dtd->scaffIndex)
7555 return -1;
7556 dtd->scaffIndex[0] = 0;
7557 }
7558
7559 if (dtd->scaffCount >= dtd->scaffSize) {
7560 CONTENT_SCAFFOLD *temp;
7561 if (dtd->scaffold) {
7562 /* Detect and prevent integer overflow */
7563 if (dtd->scaffSize > UINT_MAX / 2u) {
7564 return -1;
7565 }
7566 /* Detect and prevent integer overflow.
7567 * The preprocessor guard addresses the "always false" warning
7568 * from -Wtype-limits on platforms where
7569 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7570 #if UINT_MAX >= SIZE_MAX
7571 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7572 return -1;
7573 }
7574 #endif
7575
7576 temp = (CONTENT_SCAFFOLD *)REALLOC(
7577 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7578 if (temp == NULL)
7579 return -1;
7580 dtd->scaffSize *= 2;
7581 } else {
7582 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7583 * sizeof(CONTENT_SCAFFOLD));
7584 if (temp == NULL)
7585 return -1;
7586 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7587 }
7588 dtd->scaffold = temp;
7589 }
7590 next = dtd->scaffCount++;
7591 me = &dtd->scaffold[next];
7592 if (dtd->scaffLevel) {
7593 CONTENT_SCAFFOLD *parent
7594 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7595 if (parent->lastchild) {
7596 dtd->scaffold[parent->lastchild].nextsib = next;
7597 }
7598 if (! parent->childcnt)
7599 parent->firstchild = next;
7600 parent->lastchild = next;
7601 parent->childcnt++;
7602 }
7603 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7604 return next;
7605 }
7606
7607 static XML_Content *
build_model(XML_Parser parser)7608 build_model(XML_Parser parser) {
7609 /* Function build_model transforms the existing parser->m_dtd->scaffold
7610 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7611 * XML_Content tree nodes followed by a gapless list of zero-terminated
7612 * strings. */
7613 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7614 XML_Content *ret;
7615 XML_Char *str; /* the current string writing location */
7616
7617 /* Detect and prevent integer overflow.
7618 * The preprocessor guard addresses the "always false" warning
7619 * from -Wtype-limits on platforms where
7620 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7621 #if UINT_MAX >= SIZE_MAX
7622 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7623 return NULL;
7624 }
7625 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7626 return NULL;
7627 }
7628 #endif
7629 if (dtd->scaffCount * sizeof(XML_Content)
7630 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7631 return NULL;
7632 }
7633
7634 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7635 + (dtd->contentStringLen * sizeof(XML_Char)));
7636
7637 ret = (XML_Content *)MALLOC(parser, allocsize);
7638 if (! ret)
7639 return NULL;
7640
7641 /* What follows is an iterative implementation (of what was previously done
7642 * recursively in a dedicated function called "build_node". The old recursive
7643 * build_node could be forced into stack exhaustion from input as small as a
7644 * few megabyte, and so that was a security issue. Hence, a function call
7645 * stack is avoided now by resolving recursion.)
7646 *
7647 * The iterative approach works as follows:
7648 *
7649 * - We have two writing pointers, both walking up the result array; one does
7650 * the work, the other creates "jobs" for its colleague to do, and leads
7651 * the way:
7652 *
7653 * - The faster one, pointer jobDest, always leads and writes "what job
7654 * to do" by the other, once they reach that place in the
7655 * array: leader "jobDest" stores the source node array index (relative
7656 * to array dtd->scaffold) in field "numchildren".
7657 *
7658 * - The slower one, pointer dest, looks at the value stored in the
7659 * "numchildren" field (which actually holds a source node array index
7660 * at that time) and puts the real data from dtd->scaffold in.
7661 *
7662 * - Before the loop starts, jobDest writes source array index 0
7663 * (where the root node is located) so that dest will have something to do
7664 * when it starts operation.
7665 *
7666 * - Whenever nodes with children are encountered, jobDest appends
7667 * them as new jobs, in order. As a result, tree node siblings are
7668 * adjacent in the resulting array, for example:
7669 *
7670 * [0] root, has two children
7671 * [1] first child of 0, has three children
7672 * [3] first child of 1, does not have children
7673 * [4] second child of 1, does not have children
7674 * [5] third child of 1, does not have children
7675 * [2] second child of 0, does not have children
7676 *
7677 * Or (the same data) presented in flat array view:
7678 *
7679 * [0] root, has two children
7680 *
7681 * [1] first child of 0, has three children
7682 * [2] second child of 0, does not have children
7683 *
7684 * [3] first child of 1, does not have children
7685 * [4] second child of 1, does not have children
7686 * [5] third child of 1, does not have children
7687 *
7688 * - The algorithm repeats until all target array indices have been processed.
7689 */
7690 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7691 XML_Content *const destLimit = &ret[dtd->scaffCount];
7692 XML_Content *jobDest = ret; /* next free writing location in target array */
7693 str = (XML_Char *)&ret[dtd->scaffCount];
7694
7695 /* Add the starting job, the root node (index 0) of the source tree */
7696 (jobDest++)->numchildren = 0;
7697
7698 for (; dest < destLimit; dest++) {
7699 /* Retrieve source tree array index from job storage */
7700 const int src_node = (int)dest->numchildren;
7701
7702 /* Convert item */
7703 dest->type = dtd->scaffold[src_node].type;
7704 dest->quant = dtd->scaffold[src_node].quant;
7705 if (dest->type == XML_CTYPE_NAME) {
7706 const XML_Char *src;
7707 dest->name = str;
7708 src = dtd->scaffold[src_node].name;
7709 for (;;) {
7710 *str++ = *src;
7711 if (! *src)
7712 break;
7713 src++;
7714 }
7715 dest->numchildren = 0;
7716 dest->children = NULL;
7717 } else {
7718 unsigned int i;
7719 int cn;
7720 dest->name = NULL;
7721 dest->numchildren = dtd->scaffold[src_node].childcnt;
7722 dest->children = jobDest;
7723
7724 /* Append scaffold indices of children to array */
7725 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7726 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7727 (jobDest++)->numchildren = (unsigned int)cn;
7728 }
7729 }
7730
7731 return ret;
7732 }
7733
7734 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7735 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7736 const char *end) {
7737 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7738 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7739 ELEMENT_TYPE *ret;
7740
7741 if (! name)
7742 return NULL;
7743 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7744 sizeof(ELEMENT_TYPE));
7745 if (! ret)
7746 return NULL;
7747 if (ret->name != name)
7748 poolDiscard(&dtd->pool);
7749 else {
7750 poolFinish(&dtd->pool);
7751 if (! setElementTypePrefix(parser, ret))
7752 return NULL;
7753 }
7754 return ret;
7755 }
7756
7757 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7758 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7759 size_t charsRequired = 0;
7760 XML_Char *result;
7761
7762 /* First determine how long the string is */
7763 while (s[charsRequired] != 0) {
7764 charsRequired++;
7765 }
7766 /* Include the terminator */
7767 charsRequired++;
7768
7769 /* Now allocate space for the copy */
7770 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7771 if (result == NULL)
7772 return NULL;
7773 /* Copy the original into place */
7774 memcpy(result, s, charsRequired * sizeof(XML_Char));
7775 return result;
7776 }
7777
7778 #if XML_GE == 1
7779
7780 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7781 accountingGetCurrentAmplification(XML_Parser rootParser) {
7782 const XmlBigCount countBytesOutput
7783 = rootParser->m_accounting.countBytesDirect
7784 + rootParser->m_accounting.countBytesIndirect;
7785 const float amplificationFactor
7786 = rootParser->m_accounting.countBytesDirect
7787 ? (countBytesOutput
7788 / (float)(rootParser->m_accounting.countBytesDirect))
7789 : 1.0f;
7790 assert(! rootParser->m_parentParser);
7791 return amplificationFactor;
7792 }
7793
7794 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7795 accountingReportStats(XML_Parser originParser, const char *epilog) {
7796 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7797 assert(! rootParser->m_parentParser);
7798
7799 if (rootParser->m_accounting.debugLevel == 0u) {
7800 return;
7801 }
7802
7803 const float amplificationFactor
7804 = accountingGetCurrentAmplification(rootParser);
7805 fprintf(stderr,
7806 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7807 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7808 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7809 rootParser->m_accounting.countBytesIndirect,
7810 (double)amplificationFactor, epilog);
7811 }
7812
7813 static void
accountingOnAbort(XML_Parser originParser)7814 accountingOnAbort(XML_Parser originParser) {
7815 accountingReportStats(originParser, " ABORTING\n");
7816 }
7817
7818 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7819 accountingReportDiff(XML_Parser rootParser,
7820 unsigned int levelsAwayFromRootParser, const char *before,
7821 const char *after, ptrdiff_t bytesMore, int source_line,
7822 enum XML_Account account) {
7823 assert(! rootParser->m_parentParser);
7824
7825 fprintf(stderr,
7826 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7827 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7828 levelsAwayFromRootParser, source_line, 10, "");
7829
7830 const char ellipis[] = "[..]";
7831 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7832 const unsigned int contextLength = 10;
7833
7834 /* Note: Performance is of no concern here */
7835 const char *walker = before;
7836 if ((rootParser->m_accounting.debugLevel >= 3u)
7837 || (after - before)
7838 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7839 for (; walker < after; walker++) {
7840 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7841 }
7842 } else {
7843 for (; walker < before + contextLength; walker++) {
7844 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7845 }
7846 fprintf(stderr, ellipis);
7847 walker = after - contextLength;
7848 for (; walker < after; walker++) {
7849 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7850 }
7851 }
7852 fprintf(stderr, "\"\n");
7853 }
7854
7855 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7856 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7857 const char *after, int source_line,
7858 enum XML_Account account) {
7859 /* Note: We need to check the token type *first* to be sure that
7860 * we can even access variable <after>, safely.
7861 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7862 switch (tok) {
7863 case XML_TOK_INVALID:
7864 case XML_TOK_PARTIAL:
7865 case XML_TOK_PARTIAL_CHAR:
7866 case XML_TOK_NONE:
7867 return XML_TRUE;
7868 }
7869
7870 if (account == XML_ACCOUNT_NONE)
7871 return XML_TRUE; /* because these bytes have been accounted for, already */
7872
7873 unsigned int levelsAwayFromRootParser;
7874 const XML_Parser rootParser
7875 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7876 assert(! rootParser->m_parentParser);
7877
7878 const int isDirect
7879 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7880 const ptrdiff_t bytesMore = after - before;
7881
7882 XmlBigCount *const additionTarget
7883 = isDirect ? &rootParser->m_accounting.countBytesDirect
7884 : &rootParser->m_accounting.countBytesIndirect;
7885
7886 /* Detect and avoid integer overflow */
7887 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7888 return XML_FALSE;
7889 *additionTarget += bytesMore;
7890
7891 const XmlBigCount countBytesOutput
7892 = rootParser->m_accounting.countBytesDirect
7893 + rootParser->m_accounting.countBytesIndirect;
7894 const float amplificationFactor
7895 = accountingGetCurrentAmplification(rootParser);
7896 const XML_Bool tolerated
7897 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7898 || (amplificationFactor
7899 <= rootParser->m_accounting.maximumAmplificationFactor);
7900
7901 if (rootParser->m_accounting.debugLevel >= 2u) {
7902 accountingReportStats(rootParser, "");
7903 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7904 bytesMore, source_line, account);
7905 }
7906
7907 return tolerated;
7908 }
7909
7910 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7911 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7912 if (! parser)
7913 return 0;
7914 return parser->m_accounting.countBytesDirect;
7915 }
7916
7917 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7918 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7919 if (! parser)
7920 return 0;
7921 return parser->m_accounting.countBytesIndirect;
7922 }
7923
7924 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7925 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7926 const char *action, int sourceLine) {
7927 assert(! rootParser->m_parentParser);
7928 if (rootParser->m_entity_stats.debugLevel == 0u)
7929 return;
7930
7931 # if defined(XML_UNICODE)
7932 const char *const entityName = "[..]";
7933 # else
7934 const char *const entityName = entity->name;
7935 # endif
7936
7937 fprintf(
7938 stderr,
7939 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7940 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7941 rootParser->m_entity_stats.currentDepth,
7942 rootParser->m_entity_stats.maximumDepthSeen,
7943 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7944 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7945 sourceLine);
7946 }
7947
7948 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7949 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7950 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7951 assert(! rootParser->m_parentParser);
7952
7953 rootParser->m_entity_stats.countEverOpened++;
7954 rootParser->m_entity_stats.currentDepth++;
7955 if (rootParser->m_entity_stats.currentDepth
7956 > rootParser->m_entity_stats.maximumDepthSeen) {
7957 rootParser->m_entity_stats.maximumDepthSeen++;
7958 }
7959
7960 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7961 }
7962
7963 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7964 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7965 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7966 assert(! rootParser->m_parentParser);
7967
7968 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7969 rootParser->m_entity_stats.currentDepth--;
7970 }
7971
7972 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7973 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7974 XML_Parser rootParser = parser;
7975 unsigned int stepsTakenUpwards = 0;
7976 while (rootParser->m_parentParser) {
7977 rootParser = rootParser->m_parentParser;
7978 stepsTakenUpwards++;
7979 }
7980 assert(! rootParser->m_parentParser);
7981 if (outLevelDiff != NULL) {
7982 *outLevelDiff = stepsTakenUpwards;
7983 }
7984 return rootParser;
7985 }
7986
7987 const char *
unsignedCharToPrintable(unsigned char c)7988 unsignedCharToPrintable(unsigned char c) {
7989 switch (c) {
7990 case 0:
7991 return "\\0";
7992 case 1:
7993 return "\\x1";
7994 case 2:
7995 return "\\x2";
7996 case 3:
7997 return "\\x3";
7998 case 4:
7999 return "\\x4";
8000 case 5:
8001 return "\\x5";
8002 case 6:
8003 return "\\x6";
8004 case 7:
8005 return "\\x7";
8006 case 8:
8007 return "\\x8";
8008 case 9:
8009 return "\\t";
8010 case 10:
8011 return "\\n";
8012 case 11:
8013 return "\\xB";
8014 case 12:
8015 return "\\xC";
8016 case 13:
8017 return "\\r";
8018 case 14:
8019 return "\\xE";
8020 case 15:
8021 return "\\xF";
8022 case 16:
8023 return "\\x10";
8024 case 17:
8025 return "\\x11";
8026 case 18:
8027 return "\\x12";
8028 case 19:
8029 return "\\x13";
8030 case 20:
8031 return "\\x14";
8032 case 21:
8033 return "\\x15";
8034 case 22:
8035 return "\\x16";
8036 case 23:
8037 return "\\x17";
8038 case 24:
8039 return "\\x18";
8040 case 25:
8041 return "\\x19";
8042 case 26:
8043 return "\\x1A";
8044 case 27:
8045 return "\\x1B";
8046 case 28:
8047 return "\\x1C";
8048 case 29:
8049 return "\\x1D";
8050 case 30:
8051 return "\\x1E";
8052 case 31:
8053 return "\\x1F";
8054 case 32:
8055 return " ";
8056 case 33:
8057 return "!";
8058 case 34:
8059 return "\\\"";
8060 case 35:
8061 return "#";
8062 case 36:
8063 return "$";
8064 case 37:
8065 return "%";
8066 case 38:
8067 return "&";
8068 case 39:
8069 return "'";
8070 case 40:
8071 return "(";
8072 case 41:
8073 return ")";
8074 case 42:
8075 return "*";
8076 case 43:
8077 return "+";
8078 case 44:
8079 return ",";
8080 case 45:
8081 return "-";
8082 case 46:
8083 return ".";
8084 case 47:
8085 return "/";
8086 case 48:
8087 return "0";
8088 case 49:
8089 return "1";
8090 case 50:
8091 return "2";
8092 case 51:
8093 return "3";
8094 case 52:
8095 return "4";
8096 case 53:
8097 return "5";
8098 case 54:
8099 return "6";
8100 case 55:
8101 return "7";
8102 case 56:
8103 return "8";
8104 case 57:
8105 return "9";
8106 case 58:
8107 return ":";
8108 case 59:
8109 return ";";
8110 case 60:
8111 return "<";
8112 case 61:
8113 return "=";
8114 case 62:
8115 return ">";
8116 case 63:
8117 return "?";
8118 case 64:
8119 return "@";
8120 case 65:
8121 return "A";
8122 case 66:
8123 return "B";
8124 case 67:
8125 return "C";
8126 case 68:
8127 return "D";
8128 case 69:
8129 return "E";
8130 case 70:
8131 return "F";
8132 case 71:
8133 return "G";
8134 case 72:
8135 return "H";
8136 case 73:
8137 return "I";
8138 case 74:
8139 return "J";
8140 case 75:
8141 return "K";
8142 case 76:
8143 return "L";
8144 case 77:
8145 return "M";
8146 case 78:
8147 return "N";
8148 case 79:
8149 return "O";
8150 case 80:
8151 return "P";
8152 case 81:
8153 return "Q";
8154 case 82:
8155 return "R";
8156 case 83:
8157 return "S";
8158 case 84:
8159 return "T";
8160 case 85:
8161 return "U";
8162 case 86:
8163 return "V";
8164 case 87:
8165 return "W";
8166 case 88:
8167 return "X";
8168 case 89:
8169 return "Y";
8170 case 90:
8171 return "Z";
8172 case 91:
8173 return "[";
8174 case 92:
8175 return "\\\\";
8176 case 93:
8177 return "]";
8178 case 94:
8179 return "^";
8180 case 95:
8181 return "_";
8182 case 96:
8183 return "`";
8184 case 97:
8185 return "a";
8186 case 98:
8187 return "b";
8188 case 99:
8189 return "c";
8190 case 100:
8191 return "d";
8192 case 101:
8193 return "e";
8194 case 102:
8195 return "f";
8196 case 103:
8197 return "g";
8198 case 104:
8199 return "h";
8200 case 105:
8201 return "i";
8202 case 106:
8203 return "j";
8204 case 107:
8205 return "k";
8206 case 108:
8207 return "l";
8208 case 109:
8209 return "m";
8210 case 110:
8211 return "n";
8212 case 111:
8213 return "o";
8214 case 112:
8215 return "p";
8216 case 113:
8217 return "q";
8218 case 114:
8219 return "r";
8220 case 115:
8221 return "s";
8222 case 116:
8223 return "t";
8224 case 117:
8225 return "u";
8226 case 118:
8227 return "v";
8228 case 119:
8229 return "w";
8230 case 120:
8231 return "x";
8232 case 121:
8233 return "y";
8234 case 122:
8235 return "z";
8236 case 123:
8237 return "{";
8238 case 124:
8239 return "|";
8240 case 125:
8241 return "}";
8242 case 126:
8243 return "~";
8244 case 127:
8245 return "\\x7F";
8246 case 128:
8247 return "\\x80";
8248 case 129:
8249 return "\\x81";
8250 case 130:
8251 return "\\x82";
8252 case 131:
8253 return "\\x83";
8254 case 132:
8255 return "\\x84";
8256 case 133:
8257 return "\\x85";
8258 case 134:
8259 return "\\x86";
8260 case 135:
8261 return "\\x87";
8262 case 136:
8263 return "\\x88";
8264 case 137:
8265 return "\\x89";
8266 case 138:
8267 return "\\x8A";
8268 case 139:
8269 return "\\x8B";
8270 case 140:
8271 return "\\x8C";
8272 case 141:
8273 return "\\x8D";
8274 case 142:
8275 return "\\x8E";
8276 case 143:
8277 return "\\x8F";
8278 case 144:
8279 return "\\x90";
8280 case 145:
8281 return "\\x91";
8282 case 146:
8283 return "\\x92";
8284 case 147:
8285 return "\\x93";
8286 case 148:
8287 return "\\x94";
8288 case 149:
8289 return "\\x95";
8290 case 150:
8291 return "\\x96";
8292 case 151:
8293 return "\\x97";
8294 case 152:
8295 return "\\x98";
8296 case 153:
8297 return "\\x99";
8298 case 154:
8299 return "\\x9A";
8300 case 155:
8301 return "\\x9B";
8302 case 156:
8303 return "\\x9C";
8304 case 157:
8305 return "\\x9D";
8306 case 158:
8307 return "\\x9E";
8308 case 159:
8309 return "\\x9F";
8310 case 160:
8311 return "\\xA0";
8312 case 161:
8313 return "\\xA1";
8314 case 162:
8315 return "\\xA2";
8316 case 163:
8317 return "\\xA3";
8318 case 164:
8319 return "\\xA4";
8320 case 165:
8321 return "\\xA5";
8322 case 166:
8323 return "\\xA6";
8324 case 167:
8325 return "\\xA7";
8326 case 168:
8327 return "\\xA8";
8328 case 169:
8329 return "\\xA9";
8330 case 170:
8331 return "\\xAA";
8332 case 171:
8333 return "\\xAB";
8334 case 172:
8335 return "\\xAC";
8336 case 173:
8337 return "\\xAD";
8338 case 174:
8339 return "\\xAE";
8340 case 175:
8341 return "\\xAF";
8342 case 176:
8343 return "\\xB0";
8344 case 177:
8345 return "\\xB1";
8346 case 178:
8347 return "\\xB2";
8348 case 179:
8349 return "\\xB3";
8350 case 180:
8351 return "\\xB4";
8352 case 181:
8353 return "\\xB5";
8354 case 182:
8355 return "\\xB6";
8356 case 183:
8357 return "\\xB7";
8358 case 184:
8359 return "\\xB8";
8360 case 185:
8361 return "\\xB9";
8362 case 186:
8363 return "\\xBA";
8364 case 187:
8365 return "\\xBB";
8366 case 188:
8367 return "\\xBC";
8368 case 189:
8369 return "\\xBD";
8370 case 190:
8371 return "\\xBE";
8372 case 191:
8373 return "\\xBF";
8374 case 192:
8375 return "\\xC0";
8376 case 193:
8377 return "\\xC1";
8378 case 194:
8379 return "\\xC2";
8380 case 195:
8381 return "\\xC3";
8382 case 196:
8383 return "\\xC4";
8384 case 197:
8385 return "\\xC5";
8386 case 198:
8387 return "\\xC6";
8388 case 199:
8389 return "\\xC7";
8390 case 200:
8391 return "\\xC8";
8392 case 201:
8393 return "\\xC9";
8394 case 202:
8395 return "\\xCA";
8396 case 203:
8397 return "\\xCB";
8398 case 204:
8399 return "\\xCC";
8400 case 205:
8401 return "\\xCD";
8402 case 206:
8403 return "\\xCE";
8404 case 207:
8405 return "\\xCF";
8406 case 208:
8407 return "\\xD0";
8408 case 209:
8409 return "\\xD1";
8410 case 210:
8411 return "\\xD2";
8412 case 211:
8413 return "\\xD3";
8414 case 212:
8415 return "\\xD4";
8416 case 213:
8417 return "\\xD5";
8418 case 214:
8419 return "\\xD6";
8420 case 215:
8421 return "\\xD7";
8422 case 216:
8423 return "\\xD8";
8424 case 217:
8425 return "\\xD9";
8426 case 218:
8427 return "\\xDA";
8428 case 219:
8429 return "\\xDB";
8430 case 220:
8431 return "\\xDC";
8432 case 221:
8433 return "\\xDD";
8434 case 222:
8435 return "\\xDE";
8436 case 223:
8437 return "\\xDF";
8438 case 224:
8439 return "\\xE0";
8440 case 225:
8441 return "\\xE1";
8442 case 226:
8443 return "\\xE2";
8444 case 227:
8445 return "\\xE3";
8446 case 228:
8447 return "\\xE4";
8448 case 229:
8449 return "\\xE5";
8450 case 230:
8451 return "\\xE6";
8452 case 231:
8453 return "\\xE7";
8454 case 232:
8455 return "\\xE8";
8456 case 233:
8457 return "\\xE9";
8458 case 234:
8459 return "\\xEA";
8460 case 235:
8461 return "\\xEB";
8462 case 236:
8463 return "\\xEC";
8464 case 237:
8465 return "\\xED";
8466 case 238:
8467 return "\\xEE";
8468 case 239:
8469 return "\\xEF";
8470 case 240:
8471 return "\\xF0";
8472 case 241:
8473 return "\\xF1";
8474 case 242:
8475 return "\\xF2";
8476 case 243:
8477 return "\\xF3";
8478 case 244:
8479 return "\\xF4";
8480 case 245:
8481 return "\\xF5";
8482 case 246:
8483 return "\\xF6";
8484 case 247:
8485 return "\\xF7";
8486 case 248:
8487 return "\\xF8";
8488 case 249:
8489 return "\\xF9";
8490 case 250:
8491 return "\\xFA";
8492 case 251:
8493 return "\\xFB";
8494 case 252:
8495 return "\\xFC";
8496 case 253:
8497 return "\\xFD";
8498 case 254:
8499 return "\\xFE";
8500 case 255:
8501 return "\\xFF";
8502 default:
8503 assert(0); /* never gets here */
8504 return "dead code";
8505 }
8506 assert(0); /* never gets here */
8507 }
8508
8509 #endif /* XML_GE == 1 */
8510
8511 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8512 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8513 const char *const valueOrNull = getenv(variableName);
8514 if (valueOrNull == NULL) {
8515 return defaultDebugLevel;
8516 }
8517 const char *const value = valueOrNull;
8518
8519 errno = 0;
8520 char *afterValue = NULL;
8521 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8522 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8523 errno = 0;
8524 return defaultDebugLevel;
8525 }
8526
8527 return debugLevel;
8528 }
8529