1 /* 2a14271ad4d35e82bde8ba210b4edb7998794bcbae54deab114046a300f9639a (2.6.2+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Licensed under the MIT license:
43
44 Permission is hereby granted, free of charge, to any person obtaining
45 a copy of this software and associated documentation files (the
46 "Software"), to deal in the Software without restriction, including
47 without limitation the rights to use, copy, modify, merge, publish,
48 distribute, sublicense, and/or sell copies of the Software, and to permit
49 persons to whom the Software is furnished to do so, subject to the
50 following conditions:
51
52 The above copyright notice and this permission notice shall be included
53 in all copies or substantial portions of the Software.
54
55 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
56 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
57 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
58 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
59 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
60 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
61 USE OR OTHER DEALINGS IN THE SOFTWARE.
62 */
63
64 #define XML_BUILDING_EXPAT 1
65
66 #include "expat_config.h"
67
68 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
69 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
70 #endif
71
72 #if defined(XML_DTD) && XML_GE == 0
73 # error Either undefine XML_DTD or define XML_GE to 1.
74 #endif
75
76 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
77 || (XML_CONTEXT_BYTES + 0 < 0)
78 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
79 #endif
80
81 #if defined(HAVE_SYSCALL_GETRANDOM)
82 # if ! defined(_GNU_SOURCE)
83 # define _GNU_SOURCE 1 /* syscall prototype */
84 # endif
85 #endif
86
87 #ifdef _WIN32
88 /* force stdlib to define rand_s() */
89 # if ! defined(_CRT_RAND_S)
90 # define _CRT_RAND_S
91 # endif
92 #endif
93
94 #include <stdbool.h>
95 #include <stddef.h>
96 #include <string.h> /* memset(), memcpy() */
97 #include <assert.h>
98 #include <limits.h> /* UINT_MAX */
99 #include <stdio.h> /* fprintf */
100 #include <stdlib.h> /* getenv, rand_s */
101 #include <stdint.h> /* uintptr_t */
102 #include <math.h> /* isnan */
103
104 #ifdef _WIN32
105 # define getpid GetCurrentProcessId
106 #else
107 # include <sys/time.h> /* gettimeofday() */
108 # include <sys/types.h> /* getpid() */
109 # include <unistd.h> /* getpid() */
110 # include <fcntl.h> /* O_RDONLY */
111 # include <errno.h>
112 #endif
113
114 #ifdef _WIN32
115 # include "winconfig.h"
116 #endif
117
118 #include "ascii.h"
119 #include "expat.h"
120 #include "siphash.h"
121
122 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
123 # if defined(HAVE_GETRANDOM)
124 # include <sys/random.h> /* getrandom */
125 # else
126 # include <unistd.h> /* syscall */
127 # include <sys/syscall.h> /* SYS_getrandom */
128 # endif
129 # if ! defined(GRND_NONBLOCK)
130 # define GRND_NONBLOCK 0x0001
131 # endif /* defined(GRND_NONBLOCK) */
132 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
133
134 #if defined(HAVE_LIBBSD) \
135 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
136 # include <bsd/stdlib.h>
137 #endif
138
139 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
140 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
141 #endif
142
143 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
144 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
145 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
146 && ! defined(XML_POOR_ENTROPY)
147 # error You do not have support for any sources of high quality entropy \
148 enabled. For end user security, that is probably not what you want. \
149 \
150 Your options include: \
151 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
152 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
153 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
154 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
155 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
156 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
157 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
158 * Windows >=Vista (rand_s): _WIN32. \
159 \
160 If insist on not using any of these, bypass this error by defining \
161 XML_POOR_ENTROPY; you have been warned. \
162 \
163 If you have reasons to patch this detection code away or need changes \
164 to the build system, please open a bug. Thank you!
165 #endif
166
167 #ifdef XML_UNICODE
168 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
169 # define XmlConvert XmlUtf16Convert
170 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
171 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
172 # define XmlEncode XmlUtf16Encode
173 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
174 typedef unsigned short ICHAR;
175 #else
176 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
177 # define XmlConvert XmlUtf8Convert
178 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
179 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
180 # define XmlEncode XmlUtf8Encode
181 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
182 typedef char ICHAR;
183 #endif
184
185 #ifndef XML_NS
186
187 # define XmlInitEncodingNS XmlInitEncoding
188 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
189 # undef XmlGetInternalEncodingNS
190 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
191 # define XmlParseXmlDeclNS XmlParseXmlDecl
192
193 #endif
194
195 #ifdef XML_UNICODE
196
197 # ifdef XML_UNICODE_WCHAR_T
198 # define XML_T(x) (const wchar_t) x
199 # define XML_L(x) L##x
200 # else
201 # define XML_T(x) (const unsigned short)x
202 # define XML_L(x) x
203 # endif
204
205 #else
206
207 # define XML_T(x) x
208 # define XML_L(x) x
209
210 #endif
211
212 /* Round up n to be a multiple of sz, where sz is a power of 2. */
213 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
214
215 /* Do safe (NULL-aware) pointer arithmetic */
216 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
217
218 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
219
220 #include "internal.h"
221 #include "xmltok.h"
222 #include "xmlrole.h"
223
224 typedef const XML_Char *KEY;
225
226 typedef struct {
227 KEY name;
228 } NAMED;
229
230 typedef struct {
231 NAMED **v;
232 unsigned char power;
233 size_t size;
234 size_t used;
235 const XML_Memory_Handling_Suite *mem;
236 } HASH_TABLE;
237
238 static size_t keylen(KEY s);
239
240 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
241
242 /* For probing (after a collision) we need a step size relative prime
243 to the hash table size, which is a power of 2. We use double-hashing,
244 since we can calculate a second hash value cheaply by taking those bits
245 of the first hash value that were discarded (masked out) when the table
246 index was calculated: index = hash & mask, where mask = table->size - 1.
247 We limit the maximum step size to table->size / 4 (mask >> 2) and make
248 it odd, since odd numbers are always relative prime to a power of 2.
249 */
250 #define SECOND_HASH(hash, mask, power) \
251 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
252 #define PROBE_STEP(hash, mask, power) \
253 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
254
255 typedef struct {
256 NAMED **p;
257 NAMED **end;
258 } HASH_TABLE_ITER;
259
260 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
261 #define INIT_DATA_BUF_SIZE 1024
262 #define INIT_ATTS_SIZE 16
263 #define INIT_ATTS_VERSION 0xFFFFFFFF
264 #define INIT_BLOCK_SIZE 1024
265 #define INIT_BUFFER_SIZE 1024
266
267 #define EXPAND_SPARE 24
268
269 typedef struct binding {
270 struct prefix *prefix;
271 struct binding *nextTagBinding;
272 struct binding *prevPrefixBinding;
273 const struct attribute_id *attId;
274 XML_Char *uri;
275 int uriLen;
276 int uriAlloc;
277 } BINDING;
278
279 typedef struct prefix {
280 const XML_Char *name;
281 BINDING *binding;
282 } PREFIX;
283
284 typedef struct {
285 const XML_Char *str;
286 const XML_Char *localPart;
287 const XML_Char *prefix;
288 int strLen;
289 int uriLen;
290 int prefixLen;
291 } TAG_NAME;
292
293 /* TAG represents an open element.
294 The name of the element is stored in both the document and API
295 encodings. The memory buffer 'buf' is a separately-allocated
296 memory area which stores the name. During the XML_Parse()/
297 XMLParseBuffer() when the element is open, the memory for the 'raw'
298 version of the name (in the document encoding) is shared with the
299 document buffer. If the element is open across calls to
300 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
301 contain the 'raw' name as well.
302
303 A parser reuses these structures, maintaining a list of allocated
304 TAG objects in a free list.
305 */
306 typedef struct tag {
307 struct tag *parent; /* parent of this element */
308 const char *rawName; /* tagName in the original encoding */
309 int rawNameLength;
310 TAG_NAME name; /* tagName in the API encoding */
311 char *buf; /* buffer for name components */
312 char *bufEnd; /* end of the buffer */
313 BINDING *bindings;
314 } TAG;
315
316 typedef struct {
317 const XML_Char *name;
318 const XML_Char *textPtr;
319 int textLen; /* length in XML_Chars */
320 int processed; /* # of processed bytes - when suspended */
321 const XML_Char *systemId;
322 const XML_Char *base;
323 const XML_Char *publicId;
324 const XML_Char *notation;
325 XML_Bool open;
326 XML_Bool is_param;
327 XML_Bool is_internal; /* true if declared in internal subset outside PE */
328 } ENTITY;
329
330 typedef struct {
331 enum XML_Content_Type type;
332 enum XML_Content_Quant quant;
333 const XML_Char *name;
334 int firstchild;
335 int lastchild;
336 int childcnt;
337 int nextsib;
338 } CONTENT_SCAFFOLD;
339
340 #define INIT_SCAFFOLD_ELEMENTS 32
341
342 typedef struct block {
343 struct block *next;
344 int size;
345 XML_Char s[1];
346 } BLOCK;
347
348 typedef struct {
349 BLOCK *blocks;
350 BLOCK *freeBlocks;
351 const XML_Char *end;
352 XML_Char *ptr;
353 XML_Char *start;
354 const XML_Memory_Handling_Suite *mem;
355 } STRING_POOL;
356
357 /* The XML_Char before the name is used to determine whether
358 an attribute has been specified. */
359 typedef struct attribute_id {
360 XML_Char *name;
361 PREFIX *prefix;
362 XML_Bool maybeTokenized;
363 XML_Bool xmlns;
364 } ATTRIBUTE_ID;
365
366 typedef struct {
367 const ATTRIBUTE_ID *id;
368 XML_Bool isCdata;
369 const XML_Char *value;
370 } DEFAULT_ATTRIBUTE;
371
372 typedef struct {
373 unsigned long version;
374 unsigned long hash;
375 const XML_Char *uriName;
376 } NS_ATT;
377
378 typedef struct {
379 const XML_Char *name;
380 PREFIX *prefix;
381 const ATTRIBUTE_ID *idAtt;
382 int nDefaultAtts;
383 int allocDefaultAtts;
384 DEFAULT_ATTRIBUTE *defaultAtts;
385 } ELEMENT_TYPE;
386
387 typedef struct {
388 HASH_TABLE generalEntities;
389 HASH_TABLE elementTypes;
390 HASH_TABLE attributeIds;
391 HASH_TABLE prefixes;
392 STRING_POOL pool;
393 STRING_POOL entityValuePool;
394 /* false once a parameter entity reference has been skipped */
395 XML_Bool keepProcessing;
396 /* true once an internal or external PE reference has been encountered;
397 this includes the reference to an external subset */
398 XML_Bool hasParamEntityRefs;
399 XML_Bool standalone;
400 #ifdef XML_DTD
401 /* indicates if external PE has been read */
402 XML_Bool paramEntityRead;
403 HASH_TABLE paramEntities;
404 #endif /* XML_DTD */
405 PREFIX defaultPrefix;
406 /* === scaffolding for building content model === */
407 XML_Bool in_eldecl;
408 CONTENT_SCAFFOLD *scaffold;
409 unsigned contentStringLen;
410 unsigned scaffSize;
411 unsigned scaffCount;
412 int scaffLevel;
413 int *scaffIndex;
414 } DTD;
415
416 typedef struct open_internal_entity {
417 const char *internalEventPtr;
418 const char *internalEventEndPtr;
419 struct open_internal_entity *next;
420 ENTITY *entity;
421 int startTagLevel;
422 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
423 } OPEN_INTERNAL_ENTITY;
424
425 enum XML_Account {
426 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
427 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
428 expansion */
429 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
430 };
431
432 #if XML_GE == 1
433 typedef unsigned long long XmlBigCount;
434 typedef struct accounting {
435 XmlBigCount countBytesDirect;
436 XmlBigCount countBytesIndirect;
437 unsigned long debugLevel;
438 float maximumAmplificationFactor; // >=1.0
439 unsigned long long activationThresholdBytes;
440 } ACCOUNTING;
441
442 typedef struct entity_stats {
443 unsigned int countEverOpened;
444 unsigned int currentDepth;
445 unsigned int maximumDepthSeen;
446 unsigned long debugLevel;
447 } ENTITY_STATS;
448 #endif /* XML_GE == 1 */
449
450 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
451 const char *end, const char **endPtr);
452
453 static Processor prologProcessor;
454 static Processor prologInitProcessor;
455 static Processor contentProcessor;
456 static Processor cdataSectionProcessor;
457 #ifdef XML_DTD
458 static Processor ignoreSectionProcessor;
459 static Processor externalParEntProcessor;
460 static Processor externalParEntInitProcessor;
461 static Processor entityValueProcessor;
462 static Processor entityValueInitProcessor;
463 #endif /* XML_DTD */
464 static Processor epilogProcessor;
465 static Processor errorProcessor;
466 static Processor externalEntityInitProcessor;
467 static Processor externalEntityInitProcessor2;
468 static Processor externalEntityInitProcessor3;
469 static Processor externalEntityContentProcessor;
470 static Processor internalEntityProcessor;
471
472 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
473 const XML_Char *encodingName);
474 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
475 const char *s, const char *next);
476 static enum XML_Error initializeEncoding(XML_Parser parser);
477 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
478 const char *s, const char *end, int tok,
479 const char *next, const char **nextPtr,
480 XML_Bool haveMore, XML_Bool allowClosingDoctype,
481 enum XML_Account account);
482 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
483 XML_Bool betweenDecl);
484 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
485 const ENCODING *enc, const char *start,
486 const char *end, const char **endPtr,
487 XML_Bool haveMore, enum XML_Account account);
488 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
489 const char **startPtr, const char *end,
490 const char **nextPtr, XML_Bool haveMore,
491 enum XML_Account account);
492 #ifdef XML_DTD
493 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
494 const char **startPtr, const char *end,
495 const char **nextPtr, XML_Bool haveMore);
496 #endif /* XML_DTD */
497
498 static void freeBindings(XML_Parser parser, BINDING *bindings);
499 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
500 const char *attStr, TAG_NAME *tagNamePtr,
501 BINDING **bindingsPtr,
502 enum XML_Account account);
503 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
504 const ATTRIBUTE_ID *attId, const XML_Char *uri,
505 BINDING **bindingsPtr);
506 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
507 XML_Bool isCdata, XML_Bool isId,
508 const XML_Char *value, XML_Parser parser);
509 static enum XML_Error storeAttributeValue(XML_Parser parser,
510 const ENCODING *enc, XML_Bool isCdata,
511 const char *ptr, const char *end,
512 STRING_POOL *pool,
513 enum XML_Account account);
514 static enum XML_Error appendAttributeValue(XML_Parser parser,
515 const ENCODING *enc,
516 XML_Bool isCdata, const char *ptr,
517 const char *end, STRING_POOL *pool,
518 enum XML_Account account);
519 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
520 const char *start, const char *end);
521 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
522 #if XML_GE == 1
523 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
524 const char *start, const char *end,
525 enum XML_Account account);
526 #else
527 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
528 #endif
529 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
530 const char *start, const char *end);
531 static int reportComment(XML_Parser parser, const ENCODING *enc,
532 const char *start, const char *end);
533 static void reportDefault(XML_Parser parser, const ENCODING *enc,
534 const char *start, const char *end);
535
536 static const XML_Char *getContext(XML_Parser parser);
537 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
538
539 static void FASTCALL normalizePublicId(XML_Char *s);
540
541 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
542 /* do not call if m_parentParser != NULL */
543 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
544 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
545 const XML_Memory_Handling_Suite *ms);
546 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
547 const XML_Memory_Handling_Suite *ms);
548 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
549 STRING_POOL *newPool, const HASH_TABLE *oldTable);
550 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
551 size_t createSize);
552 static void FASTCALL hashTableInit(HASH_TABLE *table,
553 const XML_Memory_Handling_Suite *ms);
554 static void FASTCALL hashTableClear(HASH_TABLE *table);
555 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
556 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
557 const HASH_TABLE *table);
558 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
559
560 static void FASTCALL poolInit(STRING_POOL *pool,
561 const XML_Memory_Handling_Suite *ms);
562 static void FASTCALL poolClear(STRING_POOL *pool);
563 static void FASTCALL poolDestroy(STRING_POOL *pool);
564 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
565 const char *ptr, const char *end);
566 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
567 const char *ptr, const char *end);
568 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
569 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
570 const XML_Char *s);
571 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
572 int n);
573 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
574 const XML_Char *s);
575
576 static int FASTCALL nextScaffoldPart(XML_Parser parser);
577 static XML_Content *build_model(XML_Parser parser);
578 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
579 const char *ptr, const char *end);
580
581 static XML_Char *copyString(const XML_Char *s,
582 const XML_Memory_Handling_Suite *memsuite);
583
584 static unsigned long generate_hash_secret_salt(XML_Parser parser);
585 static XML_Bool startParsing(XML_Parser parser);
586
587 static XML_Parser parserCreate(const XML_Char *encodingName,
588 const XML_Memory_Handling_Suite *memsuite,
589 const XML_Char *nameSep, DTD *dtd);
590
591 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
592
593 #if XML_GE == 1
594 static float accountingGetCurrentAmplification(XML_Parser rootParser);
595 static void accountingReportStats(XML_Parser originParser, const char *epilog);
596 static void accountingOnAbort(XML_Parser originParser);
597 static void accountingReportDiff(XML_Parser rootParser,
598 unsigned int levelsAwayFromRootParser,
599 const char *before, const char *after,
600 ptrdiff_t bytesMore, int source_line,
601 enum XML_Account account);
602 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
603 const char *before, const char *after,
604 int source_line,
605 enum XML_Account account);
606
607 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
608 const char *action, int sourceLine);
609 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
610 int sourceLine);
611 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
612 int sourceLine);
613
614 static XML_Parser getRootParserOf(XML_Parser parser,
615 unsigned int *outLevelDiff);
616 #endif /* XML_GE == 1 */
617
618 static unsigned long getDebugLevel(const char *variableName,
619 unsigned long defaultDebugLevel);
620
621 #define poolStart(pool) ((pool)->start)
622 #define poolLength(pool) ((pool)->ptr - (pool)->start)
623 #define poolChop(pool) ((void)--(pool->ptr))
624 #define poolLastChar(pool) (((pool)->ptr)[-1])
625 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
626 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
627 #define poolAppendChar(pool, c) \
628 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
629 ? 0 \
630 : ((*((pool)->ptr)++ = c), 1))
631
632 #if ! defined(XML_TESTING)
633 const
634 #endif
635 XML_Bool g_reparseDeferralEnabledDefault
636 = XML_TRUE; // write ONLY in runtests.c
637 #if defined(XML_TESTING)
638 unsigned int g_bytesScanned = 0; // used for testing only
639 #endif
640
641 struct XML_ParserStruct {
642 /* The first member must be m_userData so that the XML_GetUserData
643 macro works. */
644 void *m_userData;
645 void *m_handlerArg;
646
647 // How the four parse buffer pointers below relate in time and space:
648 //
649 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
650 // | | | |
651 // <--parsed-->| | |
652 // <---parsing--->| |
653 // <--unoccupied-->|
654 // <---------total-malloced/realloced-------->|
655
656 char *m_buffer; // malloc/realloc base pointer of parse buffer
657 const XML_Memory_Handling_Suite m_mem;
658 const char *m_bufferPtr; // first character to be parsed
659 char *m_bufferEnd; // past last character to be parsed
660 const char *m_bufferLim; // allocated end of m_buffer
661
662 XML_Index m_parseEndByteIndex;
663 const char *m_parseEndPtr;
664 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
665 XML_Bool m_reparseDeferralEnabled;
666 int m_lastBufferRequestSize;
667 XML_Char *m_dataBuf;
668 XML_Char *m_dataBufEnd;
669 XML_StartElementHandler m_startElementHandler;
670 XML_EndElementHandler m_endElementHandler;
671 XML_CharacterDataHandler m_characterDataHandler;
672 XML_ProcessingInstructionHandler m_processingInstructionHandler;
673 XML_CommentHandler m_commentHandler;
674 XML_StartCdataSectionHandler m_startCdataSectionHandler;
675 XML_EndCdataSectionHandler m_endCdataSectionHandler;
676 XML_DefaultHandler m_defaultHandler;
677 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
678 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
679 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
680 XML_NotationDeclHandler m_notationDeclHandler;
681 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
682 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
683 XML_NotStandaloneHandler m_notStandaloneHandler;
684 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
685 XML_Parser m_externalEntityRefHandlerArg;
686 XML_SkippedEntityHandler m_skippedEntityHandler;
687 XML_UnknownEncodingHandler m_unknownEncodingHandler;
688 XML_ElementDeclHandler m_elementDeclHandler;
689 XML_AttlistDeclHandler m_attlistDeclHandler;
690 XML_EntityDeclHandler m_entityDeclHandler;
691 XML_XmlDeclHandler m_xmlDeclHandler;
692 const ENCODING *m_encoding;
693 INIT_ENCODING m_initEncoding;
694 const ENCODING *m_internalEncoding;
695 const XML_Char *m_protocolEncodingName;
696 XML_Bool m_ns;
697 XML_Bool m_ns_triplets;
698 void *m_unknownEncodingMem;
699 void *m_unknownEncodingData;
700 void *m_unknownEncodingHandlerData;
701 void(XMLCALL *m_unknownEncodingRelease)(void *);
702 PROLOG_STATE m_prologState;
703 Processor *m_processor;
704 enum XML_Error m_errorCode;
705 const char *m_eventPtr;
706 const char *m_eventEndPtr;
707 const char *m_positionPtr;
708 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
709 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
710 XML_Bool m_defaultExpandInternalEntities;
711 int m_tagLevel;
712 ENTITY *m_declEntity;
713 const XML_Char *m_doctypeName;
714 const XML_Char *m_doctypeSysid;
715 const XML_Char *m_doctypePubid;
716 const XML_Char *m_declAttributeType;
717 const XML_Char *m_declNotationName;
718 const XML_Char *m_declNotationPublicId;
719 ELEMENT_TYPE *m_declElementType;
720 ATTRIBUTE_ID *m_declAttributeId;
721 XML_Bool m_declAttributeIsCdata;
722 XML_Bool m_declAttributeIsId;
723 DTD *m_dtd;
724 const XML_Char *m_curBase;
725 TAG *m_tagStack;
726 TAG *m_freeTagList;
727 BINDING *m_inheritedBindings;
728 BINDING *m_freeBindingList;
729 int m_attsSize;
730 int m_nSpecifiedAtts;
731 int m_idAttIndex;
732 ATTRIBUTE *m_atts;
733 NS_ATT *m_nsAtts;
734 unsigned long m_nsAttsVersion;
735 unsigned char m_nsAttsPower;
736 #ifdef XML_ATTR_INFO
737 XML_AttrInfo *m_attInfo;
738 #endif
739 POSITION m_position;
740 STRING_POOL m_tempPool;
741 STRING_POOL m_temp2Pool;
742 char *m_groupConnector;
743 unsigned int m_groupSize;
744 XML_Char m_namespaceSeparator;
745 XML_Parser m_parentParser;
746 XML_ParsingStatus m_parsingStatus;
747 #ifdef XML_DTD
748 XML_Bool m_isParamEntity;
749 XML_Bool m_useForeignDTD;
750 enum XML_ParamEntityParsing m_paramEntityParsing;
751 #endif
752 unsigned long m_hash_secret_salt;
753 #if XML_GE == 1
754 ACCOUNTING m_accounting;
755 ENTITY_STATS m_entity_stats;
756 #endif
757 };
758
759 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
760 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
761 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
762
763 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)764 XML_ParserCreate(const XML_Char *encodingName) {
765 return XML_ParserCreate_MM(encodingName, NULL, NULL);
766 }
767
768 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)769 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
770 XML_Char tmp[2] = {nsSep, 0};
771 return XML_ParserCreate_MM(encodingName, NULL, tmp);
772 }
773
774 // "xml=http://www.w3.org/XML/1998/namespace"
775 static const XML_Char implicitContext[]
776 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
777 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
778 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
779 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
780 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
781 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
782 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
783 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
784 '\0'};
785
786 /* To avoid warnings about unused functions: */
787 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
788
789 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
790
791 /* Obtain entropy on Linux 3.17+ */
792 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)793 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
794 int success = 0; /* full count bytes written? */
795 size_t bytesWrittenTotal = 0;
796 const unsigned int getrandomFlags = GRND_NONBLOCK;
797
798 do {
799 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
800 const size_t bytesToWrite = count - bytesWrittenTotal;
801
802 const int bytesWrittenMore =
803 # if defined(HAVE_GETRANDOM)
804 getrandom(currentTarget, bytesToWrite, getrandomFlags);
805 # else
806 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
807 # endif
808
809 if (bytesWrittenMore > 0) {
810 bytesWrittenTotal += bytesWrittenMore;
811 if (bytesWrittenTotal >= count)
812 success = 1;
813 }
814 } while (! success && (errno == EINTR));
815
816 return success;
817 }
818
819 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
820
821 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
822
823 /* Extract entropy from /dev/urandom */
824 static int
writeRandomBytes_dev_urandom(void * target,size_t count)825 writeRandomBytes_dev_urandom(void *target, size_t count) {
826 int success = 0; /* full count bytes written? */
827 size_t bytesWrittenTotal = 0;
828
829 const int fd = open("/dev/urandom", O_RDONLY);
830 if (fd < 0) {
831 return 0;
832 }
833
834 do {
835 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
836 const size_t bytesToWrite = count - bytesWrittenTotal;
837
838 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
839
840 if (bytesWrittenMore > 0) {
841 bytesWrittenTotal += bytesWrittenMore;
842 if (bytesWrittenTotal >= count)
843 success = 1;
844 }
845 } while (! success && (errno == EINTR));
846
847 close(fd);
848 return success;
849 }
850
851 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
852
853 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
854
855 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
856
857 static void
writeRandomBytes_arc4random(void * target,size_t count)858 writeRandomBytes_arc4random(void *target, size_t count) {
859 size_t bytesWrittenTotal = 0;
860
861 while (bytesWrittenTotal < count) {
862 const uint32_t random32 = arc4random();
863 size_t i = 0;
864
865 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
866 i++, bytesWrittenTotal++) {
867 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
868 ((uint8_t *)target)[bytesWrittenTotal] = random8;
869 }
870 }
871 }
872
873 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
874
875 #ifdef _WIN32
876
877 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
878 as it didn't declare it in its header prior to version 5.3.0 of its
879 runtime package (mingwrt, containing stdlib.h). The upstream fix
880 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
881 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
882 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
883 __declspec(dllimport) int rand_s(unsigned int *);
884 # endif
885
886 /* Obtain entropy on Windows using the rand_s() function which
887 * generates cryptographically secure random numbers. Internally it
888 * uses RtlGenRandom API which is present in Windows XP and later.
889 */
890 static int
writeRandomBytes_rand_s(void * target,size_t count)891 writeRandomBytes_rand_s(void *target, size_t count) {
892 size_t bytesWrittenTotal = 0;
893
894 while (bytesWrittenTotal < count) {
895 unsigned int random32 = 0;
896 size_t i = 0;
897
898 if (rand_s(&random32))
899 return 0; /* failure */
900
901 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
902 i++, bytesWrittenTotal++) {
903 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
904 ((uint8_t *)target)[bytesWrittenTotal] = random8;
905 }
906 }
907 return 1; /* success */
908 }
909
910 #endif /* _WIN32 */
911
912 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
913
914 static unsigned long
gather_time_entropy(void)915 gather_time_entropy(void) {
916 # ifdef _WIN32
917 FILETIME ft;
918 GetSystemTimeAsFileTime(&ft); /* never fails */
919 return ft.dwHighDateTime ^ ft.dwLowDateTime;
920 # else
921 struct timeval tv;
922 int gettimeofday_res;
923
924 gettimeofday_res = gettimeofday(&tv, NULL);
925
926 # if defined(NDEBUG)
927 (void)gettimeofday_res;
928 # else
929 assert(gettimeofday_res == 0);
930 # endif /* defined(NDEBUG) */
931
932 /* Microseconds time is <20 bits entropy */
933 return tv.tv_usec;
934 # endif
935 }
936
937 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
938
939 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)940 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
941 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
942 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
943 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
944 }
945 return entropy;
946 }
947
948 static unsigned long
generate_hash_secret_salt(XML_Parser parser)949 generate_hash_secret_salt(XML_Parser parser) {
950 unsigned long entropy;
951 (void)parser;
952
953 /* "Failproof" high quality providers: */
954 #if defined(HAVE_ARC4RANDOM_BUF)
955 arc4random_buf(&entropy, sizeof(entropy));
956 return ENTROPY_DEBUG("arc4random_buf", entropy);
957 #elif defined(HAVE_ARC4RANDOM)
958 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
959 return ENTROPY_DEBUG("arc4random", entropy);
960 #else
961 /* Try high quality providers first .. */
962 # ifdef _WIN32
963 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
964 return ENTROPY_DEBUG("rand_s", entropy);
965 }
966 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
967 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
968 return ENTROPY_DEBUG("getrandom", entropy);
969 }
970 # endif
971 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
972 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
973 return ENTROPY_DEBUG("/dev/urandom", entropy);
974 }
975 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
976 /* .. and self-made low quality for backup: */
977
978 /* Process ID is 0 bits entropy if attacker has local access */
979 entropy = gather_time_entropy() ^ getpid();
980
981 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
982 if (sizeof(unsigned long) == 4) {
983 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
984 } else {
985 return ENTROPY_DEBUG("fallback(8)",
986 entropy * (unsigned long)2305843009213693951ULL);
987 }
988 #endif
989 }
990
991 static unsigned long
get_hash_secret_salt(XML_Parser parser)992 get_hash_secret_salt(XML_Parser parser) {
993 if (parser->m_parentParser != NULL)
994 return get_hash_secret_salt(parser->m_parentParser);
995 return parser->m_hash_secret_salt;
996 }
997
998 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)999 callProcessor(XML_Parser parser, const char *start, const char *end,
1000 const char **endPtr) {
1001 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1002
1003 if (parser->m_reparseDeferralEnabled
1004 && ! parser->m_parsingStatus.finalBuffer) {
1005 // Heuristic: don't try to parse a partial token again until the amount of
1006 // available data has increased significantly.
1007 const size_t had_before = parser->m_partialTokenBytesBefore;
1008 // ...but *do* try anyway if we're close to causing a reallocation.
1009 size_t available_buffer
1010 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1011 #if XML_CONTEXT_BYTES > 0
1012 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1013 #endif
1014 available_buffer
1015 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1016 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1017 const bool enough
1018 = (have_now >= 2 * had_before)
1019 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1020
1021 if (! enough) {
1022 *endPtr = start; // callers may expect this to be set
1023 return XML_ERROR_NONE;
1024 }
1025 }
1026 #if defined(XML_TESTING)
1027 g_bytesScanned += (unsigned)have_now;
1028 #endif
1029 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1030 if (ret == XML_ERROR_NONE) {
1031 // if we consumed nothing, remember what we had on this parse attempt.
1032 if (*endPtr == start) {
1033 parser->m_partialTokenBytesBefore = have_now;
1034 } else {
1035 parser->m_partialTokenBytesBefore = 0;
1036 }
1037 }
1038 return ret;
1039 }
1040
1041 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1042 startParsing(XML_Parser parser) {
1043 /* hash functions must be initialized before setContext() is called */
1044 if (parser->m_hash_secret_salt == 0)
1045 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1046 if (parser->m_ns) {
1047 /* implicit context only set for root parser, since child
1048 parsers (i.e. external entity parsers) will inherit it
1049 */
1050 return setContext(parser, implicitContext);
1051 }
1052 return XML_TRUE;
1053 }
1054
1055 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1056 XML_ParserCreate_MM(const XML_Char *encodingName,
1057 const XML_Memory_Handling_Suite *memsuite,
1058 const XML_Char *nameSep) {
1059 return parserCreate(encodingName, memsuite, nameSep, NULL);
1060 }
1061
1062 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1063 parserCreate(const XML_Char *encodingName,
1064 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1065 DTD *dtd) {
1066 XML_Parser parser;
1067
1068 if (memsuite) {
1069 XML_Memory_Handling_Suite *mtemp;
1070 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1071 if (parser != NULL) {
1072 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1073 mtemp->malloc_fcn = memsuite->malloc_fcn;
1074 mtemp->realloc_fcn = memsuite->realloc_fcn;
1075 mtemp->free_fcn = memsuite->free_fcn;
1076 }
1077 } else {
1078 XML_Memory_Handling_Suite *mtemp;
1079 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1080 if (parser != NULL) {
1081 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1082 mtemp->malloc_fcn = malloc;
1083 mtemp->realloc_fcn = realloc;
1084 mtemp->free_fcn = free;
1085 }
1086 }
1087
1088 if (! parser)
1089 return parser;
1090
1091 parser->m_buffer = NULL;
1092 parser->m_bufferLim = NULL;
1093
1094 parser->m_attsSize = INIT_ATTS_SIZE;
1095 parser->m_atts
1096 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1097 if (parser->m_atts == NULL) {
1098 FREE(parser, parser);
1099 return NULL;
1100 }
1101 #ifdef XML_ATTR_INFO
1102 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1103 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1104 if (parser->m_attInfo == NULL) {
1105 FREE(parser, parser->m_atts);
1106 FREE(parser, parser);
1107 return NULL;
1108 }
1109 #endif
1110 parser->m_dataBuf
1111 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1112 if (parser->m_dataBuf == NULL) {
1113 FREE(parser, parser->m_atts);
1114 #ifdef XML_ATTR_INFO
1115 FREE(parser, parser->m_attInfo);
1116 #endif
1117 FREE(parser, parser);
1118 return NULL;
1119 }
1120 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1121
1122 if (dtd)
1123 parser->m_dtd = dtd;
1124 else {
1125 parser->m_dtd = dtdCreate(&parser->m_mem);
1126 if (parser->m_dtd == NULL) {
1127 FREE(parser, parser->m_dataBuf);
1128 FREE(parser, parser->m_atts);
1129 #ifdef XML_ATTR_INFO
1130 FREE(parser, parser->m_attInfo);
1131 #endif
1132 FREE(parser, parser);
1133 return NULL;
1134 }
1135 }
1136
1137 parser->m_freeBindingList = NULL;
1138 parser->m_freeTagList = NULL;
1139 parser->m_freeInternalEntities = NULL;
1140
1141 parser->m_groupSize = 0;
1142 parser->m_groupConnector = NULL;
1143
1144 parser->m_unknownEncodingHandler = NULL;
1145 parser->m_unknownEncodingHandlerData = NULL;
1146
1147 parser->m_namespaceSeparator = ASCII_EXCL;
1148 parser->m_ns = XML_FALSE;
1149 parser->m_ns_triplets = XML_FALSE;
1150
1151 parser->m_nsAtts = NULL;
1152 parser->m_nsAttsVersion = 0;
1153 parser->m_nsAttsPower = 0;
1154
1155 parser->m_protocolEncodingName = NULL;
1156
1157 poolInit(&parser->m_tempPool, &(parser->m_mem));
1158 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1159 parserInit(parser, encodingName);
1160
1161 if (encodingName && ! parser->m_protocolEncodingName) {
1162 if (dtd) {
1163 // We need to stop the upcoming call to XML_ParserFree from happily
1164 // destroying parser->m_dtd because the DTD is shared with the parent
1165 // parser and the only guard that keeps XML_ParserFree from destroying
1166 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1167 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1168 parser->m_dtd = NULL;
1169 }
1170 XML_ParserFree(parser);
1171 return NULL;
1172 }
1173
1174 if (nameSep) {
1175 parser->m_ns = XML_TRUE;
1176 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1177 parser->m_namespaceSeparator = *nameSep;
1178 } else {
1179 parser->m_internalEncoding = XmlGetInternalEncoding();
1180 }
1181
1182 return parser;
1183 }
1184
1185 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1186 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1187 parser->m_processor = prologInitProcessor;
1188 XmlPrologStateInit(&parser->m_prologState);
1189 if (encodingName != NULL) {
1190 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1191 }
1192 parser->m_curBase = NULL;
1193 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1194 parser->m_userData = NULL;
1195 parser->m_handlerArg = NULL;
1196 parser->m_startElementHandler = NULL;
1197 parser->m_endElementHandler = NULL;
1198 parser->m_characterDataHandler = NULL;
1199 parser->m_processingInstructionHandler = NULL;
1200 parser->m_commentHandler = NULL;
1201 parser->m_startCdataSectionHandler = NULL;
1202 parser->m_endCdataSectionHandler = NULL;
1203 parser->m_defaultHandler = NULL;
1204 parser->m_startDoctypeDeclHandler = NULL;
1205 parser->m_endDoctypeDeclHandler = NULL;
1206 parser->m_unparsedEntityDeclHandler = NULL;
1207 parser->m_notationDeclHandler = NULL;
1208 parser->m_startNamespaceDeclHandler = NULL;
1209 parser->m_endNamespaceDeclHandler = NULL;
1210 parser->m_notStandaloneHandler = NULL;
1211 parser->m_externalEntityRefHandler = NULL;
1212 parser->m_externalEntityRefHandlerArg = parser;
1213 parser->m_skippedEntityHandler = NULL;
1214 parser->m_elementDeclHandler = NULL;
1215 parser->m_attlistDeclHandler = NULL;
1216 parser->m_entityDeclHandler = NULL;
1217 parser->m_xmlDeclHandler = NULL;
1218 parser->m_bufferPtr = parser->m_buffer;
1219 parser->m_bufferEnd = parser->m_buffer;
1220 parser->m_parseEndByteIndex = 0;
1221 parser->m_parseEndPtr = NULL;
1222 parser->m_partialTokenBytesBefore = 0;
1223 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1224 parser->m_lastBufferRequestSize = 0;
1225 parser->m_declElementType = NULL;
1226 parser->m_declAttributeId = NULL;
1227 parser->m_declEntity = NULL;
1228 parser->m_doctypeName = NULL;
1229 parser->m_doctypeSysid = NULL;
1230 parser->m_doctypePubid = NULL;
1231 parser->m_declAttributeType = NULL;
1232 parser->m_declNotationName = NULL;
1233 parser->m_declNotationPublicId = NULL;
1234 parser->m_declAttributeIsCdata = XML_FALSE;
1235 parser->m_declAttributeIsId = XML_FALSE;
1236 memset(&parser->m_position, 0, sizeof(POSITION));
1237 parser->m_errorCode = XML_ERROR_NONE;
1238 parser->m_eventPtr = NULL;
1239 parser->m_eventEndPtr = NULL;
1240 parser->m_positionPtr = NULL;
1241 parser->m_openInternalEntities = NULL;
1242 parser->m_defaultExpandInternalEntities = XML_TRUE;
1243 parser->m_tagLevel = 0;
1244 parser->m_tagStack = NULL;
1245 parser->m_inheritedBindings = NULL;
1246 parser->m_nSpecifiedAtts = 0;
1247 parser->m_unknownEncodingMem = NULL;
1248 parser->m_unknownEncodingRelease = NULL;
1249 parser->m_unknownEncodingData = NULL;
1250 parser->m_parentParser = NULL;
1251 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1252 #ifdef XML_DTD
1253 parser->m_isParamEntity = XML_FALSE;
1254 parser->m_useForeignDTD = XML_FALSE;
1255 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1256 #endif
1257 parser->m_hash_secret_salt = 0;
1258
1259 #if XML_GE == 1
1260 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1261 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1262 parser->m_accounting.maximumAmplificationFactor
1263 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1264 parser->m_accounting.activationThresholdBytes
1265 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1266
1267 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1268 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1269 #endif
1270 }
1271
1272 /* moves list of bindings to m_freeBindingList */
1273 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1274 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1275 while (bindings) {
1276 BINDING *b = bindings;
1277 bindings = bindings->nextTagBinding;
1278 b->nextTagBinding = parser->m_freeBindingList;
1279 parser->m_freeBindingList = b;
1280 }
1281 }
1282
1283 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1284 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1285 TAG *tStk;
1286 OPEN_INTERNAL_ENTITY *openEntityList;
1287
1288 if (parser == NULL)
1289 return XML_FALSE;
1290
1291 if (parser->m_parentParser)
1292 return XML_FALSE;
1293 /* move m_tagStack to m_freeTagList */
1294 tStk = parser->m_tagStack;
1295 while (tStk) {
1296 TAG *tag = tStk;
1297 tStk = tStk->parent;
1298 tag->parent = parser->m_freeTagList;
1299 moveToFreeBindingList(parser, tag->bindings);
1300 tag->bindings = NULL;
1301 parser->m_freeTagList = tag;
1302 }
1303 /* move m_openInternalEntities to m_freeInternalEntities */
1304 openEntityList = parser->m_openInternalEntities;
1305 while (openEntityList) {
1306 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1307 openEntityList = openEntity->next;
1308 openEntity->next = parser->m_freeInternalEntities;
1309 parser->m_freeInternalEntities = openEntity;
1310 }
1311 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1312 FREE(parser, parser->m_unknownEncodingMem);
1313 if (parser->m_unknownEncodingRelease)
1314 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1315 poolClear(&parser->m_tempPool);
1316 poolClear(&parser->m_temp2Pool);
1317 FREE(parser, (void *)parser->m_protocolEncodingName);
1318 parser->m_protocolEncodingName = NULL;
1319 parserInit(parser, encodingName);
1320 dtdReset(parser->m_dtd, &parser->m_mem);
1321 return XML_TRUE;
1322 }
1323
1324 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1325 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1326 if (parser == NULL)
1327 return XML_STATUS_ERROR;
1328 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1329 XXX There's no way for the caller to determine which of the
1330 XXX possible error cases caused the XML_STATUS_ERROR return.
1331 */
1332 if (parser->m_parsingStatus.parsing == XML_PARSING
1333 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1334 return XML_STATUS_ERROR;
1335
1336 /* Get rid of any previous encoding name */
1337 FREE(parser, (void *)parser->m_protocolEncodingName);
1338
1339 if (encodingName == NULL)
1340 /* No new encoding name */
1341 parser->m_protocolEncodingName = NULL;
1342 else {
1343 /* Copy the new encoding name into allocated memory */
1344 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1345 if (! parser->m_protocolEncodingName)
1346 return XML_STATUS_ERROR;
1347 }
1348 return XML_STATUS_OK;
1349 }
1350
1351 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1352 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1353 const XML_Char *encodingName) {
1354 XML_Parser parser = oldParser;
1355 DTD *newDtd = NULL;
1356 DTD *oldDtd;
1357 XML_StartElementHandler oldStartElementHandler;
1358 XML_EndElementHandler oldEndElementHandler;
1359 XML_CharacterDataHandler oldCharacterDataHandler;
1360 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1361 XML_CommentHandler oldCommentHandler;
1362 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1363 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1364 XML_DefaultHandler oldDefaultHandler;
1365 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1366 XML_NotationDeclHandler oldNotationDeclHandler;
1367 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1368 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1369 XML_NotStandaloneHandler oldNotStandaloneHandler;
1370 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1371 XML_SkippedEntityHandler oldSkippedEntityHandler;
1372 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1373 XML_ElementDeclHandler oldElementDeclHandler;
1374 XML_AttlistDeclHandler oldAttlistDeclHandler;
1375 XML_EntityDeclHandler oldEntityDeclHandler;
1376 XML_XmlDeclHandler oldXmlDeclHandler;
1377 ELEMENT_TYPE *oldDeclElementType;
1378
1379 void *oldUserData;
1380 void *oldHandlerArg;
1381 XML_Bool oldDefaultExpandInternalEntities;
1382 XML_Parser oldExternalEntityRefHandlerArg;
1383 #ifdef XML_DTD
1384 enum XML_ParamEntityParsing oldParamEntityParsing;
1385 int oldInEntityValue;
1386 #endif
1387 XML_Bool oldns_triplets;
1388 /* Note that the new parser shares the same hash secret as the old
1389 parser, so that dtdCopy and copyEntityTable can lookup values
1390 from hash tables associated with either parser without us having
1391 to worry which hash secrets each table has.
1392 */
1393 unsigned long oldhash_secret_salt;
1394 XML_Bool oldReparseDeferralEnabled;
1395
1396 /* Validate the oldParser parameter before we pull everything out of it */
1397 if (oldParser == NULL)
1398 return NULL;
1399
1400 /* Stash the original parser contents on the stack */
1401 oldDtd = parser->m_dtd;
1402 oldStartElementHandler = parser->m_startElementHandler;
1403 oldEndElementHandler = parser->m_endElementHandler;
1404 oldCharacterDataHandler = parser->m_characterDataHandler;
1405 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1406 oldCommentHandler = parser->m_commentHandler;
1407 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1408 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1409 oldDefaultHandler = parser->m_defaultHandler;
1410 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1411 oldNotationDeclHandler = parser->m_notationDeclHandler;
1412 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1413 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1414 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1415 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1416 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1417 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1418 oldElementDeclHandler = parser->m_elementDeclHandler;
1419 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1420 oldEntityDeclHandler = parser->m_entityDeclHandler;
1421 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1422 oldDeclElementType = parser->m_declElementType;
1423
1424 oldUserData = parser->m_userData;
1425 oldHandlerArg = parser->m_handlerArg;
1426 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1427 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1428 #ifdef XML_DTD
1429 oldParamEntityParsing = parser->m_paramEntityParsing;
1430 oldInEntityValue = parser->m_prologState.inEntityValue;
1431 #endif
1432 oldns_triplets = parser->m_ns_triplets;
1433 /* Note that the new parser shares the same hash secret as the old
1434 parser, so that dtdCopy and copyEntityTable can lookup values
1435 from hash tables associated with either parser without us having
1436 to worry which hash secrets each table has.
1437 */
1438 oldhash_secret_salt = parser->m_hash_secret_salt;
1439 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1440
1441 #ifdef XML_DTD
1442 if (! context)
1443 newDtd = oldDtd;
1444 #endif /* XML_DTD */
1445
1446 /* Note that the magical uses of the pre-processor to make field
1447 access look more like C++ require that `parser' be overwritten
1448 here. This makes this function more painful to follow than it
1449 would be otherwise.
1450 */
1451 if (parser->m_ns) {
1452 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1453 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1454 } else {
1455 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1456 }
1457
1458 if (! parser)
1459 return NULL;
1460
1461 parser->m_startElementHandler = oldStartElementHandler;
1462 parser->m_endElementHandler = oldEndElementHandler;
1463 parser->m_characterDataHandler = oldCharacterDataHandler;
1464 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1465 parser->m_commentHandler = oldCommentHandler;
1466 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1467 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1468 parser->m_defaultHandler = oldDefaultHandler;
1469 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1470 parser->m_notationDeclHandler = oldNotationDeclHandler;
1471 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1472 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1473 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1474 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1475 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1476 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1477 parser->m_elementDeclHandler = oldElementDeclHandler;
1478 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1479 parser->m_entityDeclHandler = oldEntityDeclHandler;
1480 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1481 parser->m_declElementType = oldDeclElementType;
1482 parser->m_userData = oldUserData;
1483 if (oldUserData == oldHandlerArg)
1484 parser->m_handlerArg = parser->m_userData;
1485 else
1486 parser->m_handlerArg = parser;
1487 if (oldExternalEntityRefHandlerArg != oldParser)
1488 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1489 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1490 parser->m_ns_triplets = oldns_triplets;
1491 parser->m_hash_secret_salt = oldhash_secret_salt;
1492 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1493 parser->m_parentParser = oldParser;
1494 #ifdef XML_DTD
1495 parser->m_paramEntityParsing = oldParamEntityParsing;
1496 parser->m_prologState.inEntityValue = oldInEntityValue;
1497 if (context) {
1498 #endif /* XML_DTD */
1499 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1500 || ! setContext(parser, context)) {
1501 XML_ParserFree(parser);
1502 return NULL;
1503 }
1504 parser->m_processor = externalEntityInitProcessor;
1505 #ifdef XML_DTD
1506 } else {
1507 /* The DTD instance referenced by parser->m_dtd is shared between the
1508 document's root parser and external PE parsers, therefore one does not
1509 need to call setContext. In addition, one also *must* not call
1510 setContext, because this would overwrite existing prefix->binding
1511 pointers in parser->m_dtd with ones that get destroyed with the external
1512 PE parser. This would leave those prefixes with dangling pointers.
1513 */
1514 parser->m_isParamEntity = XML_TRUE;
1515 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1516 parser->m_processor = externalParEntInitProcessor;
1517 }
1518 #endif /* XML_DTD */
1519 return parser;
1520 }
1521
1522 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1523 destroyBindings(BINDING *bindings, XML_Parser parser) {
1524 for (;;) {
1525 BINDING *b = bindings;
1526 if (! b)
1527 break;
1528 bindings = b->nextTagBinding;
1529 FREE(parser, b->uri);
1530 FREE(parser, b);
1531 }
1532 }
1533
1534 void XMLCALL
XML_ParserFree(XML_Parser parser)1535 XML_ParserFree(XML_Parser parser) {
1536 TAG *tagList;
1537 OPEN_INTERNAL_ENTITY *entityList;
1538 if (parser == NULL)
1539 return;
1540 /* free m_tagStack and m_freeTagList */
1541 tagList = parser->m_tagStack;
1542 for (;;) {
1543 TAG *p;
1544 if (tagList == NULL) {
1545 if (parser->m_freeTagList == NULL)
1546 break;
1547 tagList = parser->m_freeTagList;
1548 parser->m_freeTagList = NULL;
1549 }
1550 p = tagList;
1551 tagList = tagList->parent;
1552 FREE(parser, p->buf);
1553 destroyBindings(p->bindings, parser);
1554 FREE(parser, p);
1555 }
1556 /* free m_openInternalEntities and m_freeInternalEntities */
1557 entityList = parser->m_openInternalEntities;
1558 for (;;) {
1559 OPEN_INTERNAL_ENTITY *openEntity;
1560 if (entityList == NULL) {
1561 if (parser->m_freeInternalEntities == NULL)
1562 break;
1563 entityList = parser->m_freeInternalEntities;
1564 parser->m_freeInternalEntities = NULL;
1565 }
1566 openEntity = entityList;
1567 entityList = entityList->next;
1568 FREE(parser, openEntity);
1569 }
1570
1571 destroyBindings(parser->m_freeBindingList, parser);
1572 destroyBindings(parser->m_inheritedBindings, parser);
1573 poolDestroy(&parser->m_tempPool);
1574 poolDestroy(&parser->m_temp2Pool);
1575 FREE(parser, (void *)parser->m_protocolEncodingName);
1576 #ifdef XML_DTD
1577 /* external parameter entity parsers share the DTD structure
1578 parser->m_dtd with the root parser, so we must not destroy it
1579 */
1580 if (! parser->m_isParamEntity && parser->m_dtd)
1581 #else
1582 if (parser->m_dtd)
1583 #endif /* XML_DTD */
1584 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1585 &parser->m_mem);
1586 FREE(parser, (void *)parser->m_atts);
1587 #ifdef XML_ATTR_INFO
1588 FREE(parser, (void *)parser->m_attInfo);
1589 #endif
1590 FREE(parser, parser->m_groupConnector);
1591 FREE(parser, parser->m_buffer);
1592 FREE(parser, parser->m_dataBuf);
1593 FREE(parser, parser->m_nsAtts);
1594 FREE(parser, parser->m_unknownEncodingMem);
1595 if (parser->m_unknownEncodingRelease)
1596 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1597 FREE(parser, parser);
1598 }
1599
1600 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1601 XML_UseParserAsHandlerArg(XML_Parser parser) {
1602 if (parser != NULL)
1603 parser->m_handlerArg = parser;
1604 }
1605
1606 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1607 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1608 if (parser == NULL)
1609 return XML_ERROR_INVALID_ARGUMENT;
1610 #ifdef XML_DTD
1611 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1612 if (parser->m_parsingStatus.parsing == XML_PARSING
1613 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1614 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1615 parser->m_useForeignDTD = useDTD;
1616 return XML_ERROR_NONE;
1617 #else
1618 UNUSED_P(useDTD);
1619 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1620 #endif
1621 }
1622
1623 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1624 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1625 if (parser == NULL)
1626 return;
1627 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1628 if (parser->m_parsingStatus.parsing == XML_PARSING
1629 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1630 return;
1631 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1632 }
1633
1634 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1635 XML_SetUserData(XML_Parser parser, void *p) {
1636 if (parser == NULL)
1637 return;
1638 if (parser->m_handlerArg == parser->m_userData)
1639 parser->m_handlerArg = parser->m_userData = p;
1640 else
1641 parser->m_userData = p;
1642 }
1643
1644 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1645 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1646 if (parser == NULL)
1647 return XML_STATUS_ERROR;
1648 if (p) {
1649 p = poolCopyString(&parser->m_dtd->pool, p);
1650 if (! p)
1651 return XML_STATUS_ERROR;
1652 parser->m_curBase = p;
1653 } else
1654 parser->m_curBase = NULL;
1655 return XML_STATUS_OK;
1656 }
1657
1658 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1659 XML_GetBase(XML_Parser parser) {
1660 if (parser == NULL)
1661 return NULL;
1662 return parser->m_curBase;
1663 }
1664
1665 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1666 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1667 if (parser == NULL)
1668 return -1;
1669 return parser->m_nSpecifiedAtts;
1670 }
1671
1672 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1673 XML_GetIdAttributeIndex(XML_Parser parser) {
1674 if (parser == NULL)
1675 return -1;
1676 return parser->m_idAttIndex;
1677 }
1678
1679 #ifdef XML_ATTR_INFO
1680 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1681 XML_GetAttributeInfo(XML_Parser parser) {
1682 if (parser == NULL)
1683 return NULL;
1684 return parser->m_attInfo;
1685 }
1686 #endif
1687
1688 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1689 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1690 XML_EndElementHandler end) {
1691 if (parser == NULL)
1692 return;
1693 parser->m_startElementHandler = start;
1694 parser->m_endElementHandler = end;
1695 }
1696
1697 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1698 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1699 if (parser != NULL)
1700 parser->m_startElementHandler = start;
1701 }
1702
1703 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1704 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1705 if (parser != NULL)
1706 parser->m_endElementHandler = end;
1707 }
1708
1709 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1710 XML_SetCharacterDataHandler(XML_Parser parser,
1711 XML_CharacterDataHandler handler) {
1712 if (parser != NULL)
1713 parser->m_characterDataHandler = handler;
1714 }
1715
1716 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1717 XML_SetProcessingInstructionHandler(XML_Parser parser,
1718 XML_ProcessingInstructionHandler handler) {
1719 if (parser != NULL)
1720 parser->m_processingInstructionHandler = handler;
1721 }
1722
1723 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1724 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1725 if (parser != NULL)
1726 parser->m_commentHandler = handler;
1727 }
1728
1729 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1730 XML_SetCdataSectionHandler(XML_Parser parser,
1731 XML_StartCdataSectionHandler start,
1732 XML_EndCdataSectionHandler end) {
1733 if (parser == NULL)
1734 return;
1735 parser->m_startCdataSectionHandler = start;
1736 parser->m_endCdataSectionHandler = end;
1737 }
1738
1739 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1740 XML_SetStartCdataSectionHandler(XML_Parser parser,
1741 XML_StartCdataSectionHandler start) {
1742 if (parser != NULL)
1743 parser->m_startCdataSectionHandler = start;
1744 }
1745
1746 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1747 XML_SetEndCdataSectionHandler(XML_Parser parser,
1748 XML_EndCdataSectionHandler end) {
1749 if (parser != NULL)
1750 parser->m_endCdataSectionHandler = end;
1751 }
1752
1753 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1754 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1755 if (parser == NULL)
1756 return;
1757 parser->m_defaultHandler = handler;
1758 parser->m_defaultExpandInternalEntities = XML_FALSE;
1759 }
1760
1761 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1762 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1763 if (parser == NULL)
1764 return;
1765 parser->m_defaultHandler = handler;
1766 parser->m_defaultExpandInternalEntities = XML_TRUE;
1767 }
1768
1769 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1770 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1771 XML_EndDoctypeDeclHandler end) {
1772 if (parser == NULL)
1773 return;
1774 parser->m_startDoctypeDeclHandler = start;
1775 parser->m_endDoctypeDeclHandler = end;
1776 }
1777
1778 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1779 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1780 XML_StartDoctypeDeclHandler start) {
1781 if (parser != NULL)
1782 parser->m_startDoctypeDeclHandler = start;
1783 }
1784
1785 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1786 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1787 if (parser != NULL)
1788 parser->m_endDoctypeDeclHandler = end;
1789 }
1790
1791 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1792 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1793 XML_UnparsedEntityDeclHandler handler) {
1794 if (parser != NULL)
1795 parser->m_unparsedEntityDeclHandler = handler;
1796 }
1797
1798 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1799 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1800 if (parser != NULL)
1801 parser->m_notationDeclHandler = handler;
1802 }
1803
1804 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1805 XML_SetNamespaceDeclHandler(XML_Parser parser,
1806 XML_StartNamespaceDeclHandler start,
1807 XML_EndNamespaceDeclHandler end) {
1808 if (parser == NULL)
1809 return;
1810 parser->m_startNamespaceDeclHandler = start;
1811 parser->m_endNamespaceDeclHandler = end;
1812 }
1813
1814 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1815 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1816 XML_StartNamespaceDeclHandler start) {
1817 if (parser != NULL)
1818 parser->m_startNamespaceDeclHandler = start;
1819 }
1820
1821 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1822 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1823 XML_EndNamespaceDeclHandler end) {
1824 if (parser != NULL)
1825 parser->m_endNamespaceDeclHandler = end;
1826 }
1827
1828 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1829 XML_SetNotStandaloneHandler(XML_Parser parser,
1830 XML_NotStandaloneHandler handler) {
1831 if (parser != NULL)
1832 parser->m_notStandaloneHandler = handler;
1833 }
1834
1835 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1836 XML_SetExternalEntityRefHandler(XML_Parser parser,
1837 XML_ExternalEntityRefHandler handler) {
1838 if (parser != NULL)
1839 parser->m_externalEntityRefHandler = handler;
1840 }
1841
1842 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1843 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1844 if (parser == NULL)
1845 return;
1846 if (arg)
1847 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1848 else
1849 parser->m_externalEntityRefHandlerArg = parser;
1850 }
1851
1852 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1853 XML_SetSkippedEntityHandler(XML_Parser parser,
1854 XML_SkippedEntityHandler handler) {
1855 if (parser != NULL)
1856 parser->m_skippedEntityHandler = handler;
1857 }
1858
1859 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1860 XML_SetUnknownEncodingHandler(XML_Parser parser,
1861 XML_UnknownEncodingHandler handler, void *data) {
1862 if (parser == NULL)
1863 return;
1864 parser->m_unknownEncodingHandler = handler;
1865 parser->m_unknownEncodingHandlerData = data;
1866 }
1867
1868 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1869 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1870 if (parser != NULL)
1871 parser->m_elementDeclHandler = eldecl;
1872 }
1873
1874 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1875 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1876 if (parser != NULL)
1877 parser->m_attlistDeclHandler = attdecl;
1878 }
1879
1880 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1881 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1882 if (parser != NULL)
1883 parser->m_entityDeclHandler = handler;
1884 }
1885
1886 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1887 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1888 if (parser != NULL)
1889 parser->m_xmlDeclHandler = handler;
1890 }
1891
1892 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1893 XML_SetParamEntityParsing(XML_Parser parser,
1894 enum XML_ParamEntityParsing peParsing) {
1895 if (parser == NULL)
1896 return 0;
1897 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1898 if (parser->m_parsingStatus.parsing == XML_PARSING
1899 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1900 return 0;
1901 #ifdef XML_DTD
1902 parser->m_paramEntityParsing = peParsing;
1903 return 1;
1904 #else
1905 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1906 #endif
1907 }
1908
1909 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1910 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1911 if (parser == NULL)
1912 return 0;
1913 if (parser->m_parentParser)
1914 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1915 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1916 if (parser->m_parsingStatus.parsing == XML_PARSING
1917 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1918 return 0;
1919 parser->m_hash_secret_salt = hash_salt;
1920 return 1;
1921 }
1922
1923 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1924 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1925 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1926 if (parser != NULL)
1927 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1928 return XML_STATUS_ERROR;
1929 }
1930 switch (parser->m_parsingStatus.parsing) {
1931 case XML_SUSPENDED:
1932 parser->m_errorCode = XML_ERROR_SUSPENDED;
1933 return XML_STATUS_ERROR;
1934 case XML_FINISHED:
1935 parser->m_errorCode = XML_ERROR_FINISHED;
1936 return XML_STATUS_ERROR;
1937 case XML_INITIALIZED:
1938 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1939 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1940 return XML_STATUS_ERROR;
1941 }
1942 /* fall through */
1943 default:
1944 parser->m_parsingStatus.parsing = XML_PARSING;
1945 }
1946
1947 #if XML_CONTEXT_BYTES == 0
1948 if (parser->m_bufferPtr == parser->m_bufferEnd) {
1949 const char *end;
1950 int nLeftOver;
1951 enum XML_Status result;
1952 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1953 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1954 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1955 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1956 parser->m_processor = errorProcessor;
1957 return XML_STATUS_ERROR;
1958 }
1959 // though this isn't a buffer request, we assume that `len` is the app's
1960 // preferred buffer fill size, and therefore save it here.
1961 parser->m_lastBufferRequestSize = len;
1962 parser->m_parseEndByteIndex += len;
1963 parser->m_positionPtr = s;
1964 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1965
1966 parser->m_errorCode
1967 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1968
1969 if (parser->m_errorCode != XML_ERROR_NONE) {
1970 parser->m_eventEndPtr = parser->m_eventPtr;
1971 parser->m_processor = errorProcessor;
1972 return XML_STATUS_ERROR;
1973 } else {
1974 switch (parser->m_parsingStatus.parsing) {
1975 case XML_SUSPENDED:
1976 result = XML_STATUS_SUSPENDED;
1977 break;
1978 case XML_INITIALIZED:
1979 case XML_PARSING:
1980 if (isFinal) {
1981 parser->m_parsingStatus.parsing = XML_FINISHED;
1982 return XML_STATUS_OK;
1983 }
1984 /* fall through */
1985 default:
1986 result = XML_STATUS_OK;
1987 }
1988 }
1989
1990 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1991 &parser->m_position);
1992 nLeftOver = s + len - end;
1993 if (nLeftOver) {
1994 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1995 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
1996 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1997 parser->m_parsingStatus.parsing = XML_PARSING;
1998 void *const temp = XML_GetBuffer(parser, nLeftOver);
1999 parser->m_parsingStatus.parsing = originalStatus;
2000 // GetBuffer may have overwritten this, but we want to remember what the
2001 // app requested, not how many bytes were left over after parsing.
2002 parser->m_lastBufferRequestSize = len;
2003 if (temp == NULL) {
2004 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2005 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2006 parser->m_processor = errorProcessor;
2007 return XML_STATUS_ERROR;
2008 }
2009 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2010 // don't have any data to preserve, and can copy straight into the start
2011 // of the buffer rather than the GetBuffer return pointer (which may be
2012 // pointing further into the allocated buffer).
2013 memcpy(parser->m_buffer, end, nLeftOver);
2014 }
2015 parser->m_bufferPtr = parser->m_buffer;
2016 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2017 parser->m_positionPtr = parser->m_bufferPtr;
2018 parser->m_parseEndPtr = parser->m_bufferEnd;
2019 parser->m_eventPtr = parser->m_bufferPtr;
2020 parser->m_eventEndPtr = parser->m_bufferPtr;
2021 return result;
2022 }
2023 #endif /* XML_CONTEXT_BYTES == 0 */
2024 void *buff = XML_GetBuffer(parser, len);
2025 if (buff == NULL)
2026 return XML_STATUS_ERROR;
2027 if (len > 0) {
2028 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2029 memcpy(buff, s, len);
2030 }
2031 return XML_ParseBuffer(parser, len, isFinal);
2032 }
2033
2034 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2035 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2036 const char *start;
2037 enum XML_Status result = XML_STATUS_OK;
2038
2039 if (parser == NULL)
2040 return XML_STATUS_ERROR;
2041 switch (parser->m_parsingStatus.parsing) {
2042 case XML_SUSPENDED:
2043 parser->m_errorCode = XML_ERROR_SUSPENDED;
2044 return XML_STATUS_ERROR;
2045 case XML_FINISHED:
2046 parser->m_errorCode = XML_ERROR_FINISHED;
2047 return XML_STATUS_ERROR;
2048 case XML_INITIALIZED:
2049 /* Has someone called XML_GetBuffer successfully before? */
2050 if (! parser->m_bufferPtr) {
2051 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2052 return XML_STATUS_ERROR;
2053 }
2054
2055 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2056 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2057 return XML_STATUS_ERROR;
2058 }
2059 /* fall through */
2060 default:
2061 parser->m_parsingStatus.parsing = XML_PARSING;
2062 }
2063
2064 start = parser->m_bufferPtr;
2065 parser->m_positionPtr = start;
2066 parser->m_bufferEnd += len;
2067 parser->m_parseEndPtr = parser->m_bufferEnd;
2068 parser->m_parseEndByteIndex += len;
2069 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2070
2071 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2072 &parser->m_bufferPtr);
2073
2074 if (parser->m_errorCode != XML_ERROR_NONE) {
2075 parser->m_eventEndPtr = parser->m_eventPtr;
2076 parser->m_processor = errorProcessor;
2077 return XML_STATUS_ERROR;
2078 } else {
2079 switch (parser->m_parsingStatus.parsing) {
2080 case XML_SUSPENDED:
2081 result = XML_STATUS_SUSPENDED;
2082 break;
2083 case XML_INITIALIZED:
2084 case XML_PARSING:
2085 if (isFinal) {
2086 parser->m_parsingStatus.parsing = XML_FINISHED;
2087 return result;
2088 }
2089 default:; /* should not happen */
2090 }
2091 }
2092
2093 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2094 parser->m_bufferPtr, &parser->m_position);
2095 parser->m_positionPtr = parser->m_bufferPtr;
2096 return result;
2097 }
2098
2099 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2100 XML_GetBuffer(XML_Parser parser, int len) {
2101 if (parser == NULL)
2102 return NULL;
2103 if (len < 0) {
2104 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2105 return NULL;
2106 }
2107 switch (parser->m_parsingStatus.parsing) {
2108 case XML_SUSPENDED:
2109 parser->m_errorCode = XML_ERROR_SUSPENDED;
2110 return NULL;
2111 case XML_FINISHED:
2112 parser->m_errorCode = XML_ERROR_FINISHED;
2113 return NULL;
2114 default:;
2115 }
2116
2117 // whether or not the request succeeds, `len` seems to be the app's preferred
2118 // buffer fill size; remember it.
2119 parser->m_lastBufferRequestSize = len;
2120 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2121 || parser->m_buffer == NULL) {
2122 #if XML_CONTEXT_BYTES > 0
2123 int keep;
2124 #endif /* XML_CONTEXT_BYTES > 0 */
2125 /* Do not invoke signed arithmetic overflow: */
2126 int neededSize = (int)((unsigned)len
2127 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2128 parser->m_bufferEnd, parser->m_bufferPtr));
2129 if (neededSize < 0) {
2130 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2131 return NULL;
2132 }
2133 #if XML_CONTEXT_BYTES > 0
2134 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2135 if (keep > XML_CONTEXT_BYTES)
2136 keep = XML_CONTEXT_BYTES;
2137 /* Detect and prevent integer overflow */
2138 if (keep > INT_MAX - neededSize) {
2139 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2140 return NULL;
2141 }
2142 neededSize += keep;
2143 #endif /* XML_CONTEXT_BYTES > 0 */
2144 if (parser->m_buffer && parser->m_bufferPtr
2145 && neededSize
2146 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2147 #if XML_CONTEXT_BYTES > 0
2148 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2149 int offset
2150 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2151 - keep;
2152 /* The buffer pointers cannot be NULL here; we have at least some bytes
2153 * in the buffer */
2154 memmove(parser->m_buffer, &parser->m_buffer[offset],
2155 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2156 parser->m_bufferEnd -= offset;
2157 parser->m_bufferPtr -= offset;
2158 }
2159 #else
2160 memmove(parser->m_buffer, parser->m_bufferPtr,
2161 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2162 parser->m_bufferEnd
2163 = parser->m_buffer
2164 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2165 parser->m_bufferPtr = parser->m_buffer;
2166 #endif /* XML_CONTEXT_BYTES > 0 */
2167 } else {
2168 char *newBuf;
2169 int bufferSize
2170 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2171 if (bufferSize == 0)
2172 bufferSize = INIT_BUFFER_SIZE;
2173 do {
2174 /* Do not invoke signed arithmetic overflow: */
2175 bufferSize = (int)(2U * (unsigned)bufferSize);
2176 } while (bufferSize < neededSize && bufferSize > 0);
2177 if (bufferSize <= 0) {
2178 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2179 return NULL;
2180 }
2181 newBuf = (char *)MALLOC(parser, bufferSize);
2182 if (newBuf == 0) {
2183 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2184 return NULL;
2185 }
2186 parser->m_bufferLim = newBuf + bufferSize;
2187 #if XML_CONTEXT_BYTES > 0
2188 if (parser->m_bufferPtr) {
2189 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2190 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2191 + keep);
2192 FREE(parser, parser->m_buffer);
2193 parser->m_buffer = newBuf;
2194 parser->m_bufferEnd
2195 = parser->m_buffer
2196 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2197 + keep;
2198 parser->m_bufferPtr = parser->m_buffer + keep;
2199 } else {
2200 /* This must be a brand new buffer with no data in it yet */
2201 parser->m_bufferEnd = newBuf;
2202 parser->m_bufferPtr = parser->m_buffer = newBuf;
2203 }
2204 #else
2205 if (parser->m_bufferPtr) {
2206 memcpy(newBuf, parser->m_bufferPtr,
2207 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2208 FREE(parser, parser->m_buffer);
2209 parser->m_bufferEnd
2210 = newBuf
2211 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2212 } else {
2213 /* This must be a brand new buffer with no data in it yet */
2214 parser->m_bufferEnd = newBuf;
2215 }
2216 parser->m_bufferPtr = parser->m_buffer = newBuf;
2217 #endif /* XML_CONTEXT_BYTES > 0 */
2218 }
2219 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2220 parser->m_positionPtr = NULL;
2221 }
2222 return parser->m_bufferEnd;
2223 }
2224
2225 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2226 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2227 if (parser == NULL)
2228 return XML_STATUS_ERROR;
2229 switch (parser->m_parsingStatus.parsing) {
2230 case XML_SUSPENDED:
2231 if (resumable) {
2232 parser->m_errorCode = XML_ERROR_SUSPENDED;
2233 return XML_STATUS_ERROR;
2234 }
2235 parser->m_parsingStatus.parsing = XML_FINISHED;
2236 break;
2237 case XML_FINISHED:
2238 parser->m_errorCode = XML_ERROR_FINISHED;
2239 return XML_STATUS_ERROR;
2240 default:
2241 if (resumable) {
2242 #ifdef XML_DTD
2243 if (parser->m_isParamEntity) {
2244 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2245 return XML_STATUS_ERROR;
2246 }
2247 #endif
2248 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2249 } else
2250 parser->m_parsingStatus.parsing = XML_FINISHED;
2251 }
2252 return XML_STATUS_OK;
2253 }
2254
2255 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2256 XML_ResumeParser(XML_Parser parser) {
2257 enum XML_Status result = XML_STATUS_OK;
2258
2259 if (parser == NULL)
2260 return XML_STATUS_ERROR;
2261 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2262 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2263 return XML_STATUS_ERROR;
2264 }
2265 parser->m_parsingStatus.parsing = XML_PARSING;
2266
2267 parser->m_errorCode = callProcessor(
2268 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2269
2270 if (parser->m_errorCode != XML_ERROR_NONE) {
2271 parser->m_eventEndPtr = parser->m_eventPtr;
2272 parser->m_processor = errorProcessor;
2273 return XML_STATUS_ERROR;
2274 } else {
2275 switch (parser->m_parsingStatus.parsing) {
2276 case XML_SUSPENDED:
2277 result = XML_STATUS_SUSPENDED;
2278 break;
2279 case XML_INITIALIZED:
2280 case XML_PARSING:
2281 if (parser->m_parsingStatus.finalBuffer) {
2282 parser->m_parsingStatus.parsing = XML_FINISHED;
2283 return result;
2284 }
2285 default:;
2286 }
2287 }
2288
2289 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2290 parser->m_bufferPtr, &parser->m_position);
2291 parser->m_positionPtr = parser->m_bufferPtr;
2292 return result;
2293 }
2294
2295 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2296 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2297 if (parser == NULL)
2298 return;
2299 assert(status != NULL);
2300 *status = parser->m_parsingStatus;
2301 }
2302
2303 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2304 XML_GetErrorCode(XML_Parser parser) {
2305 if (parser == NULL)
2306 return XML_ERROR_INVALID_ARGUMENT;
2307 return parser->m_errorCode;
2308 }
2309
2310 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2311 XML_GetCurrentByteIndex(XML_Parser parser) {
2312 if (parser == NULL)
2313 return -1;
2314 if (parser->m_eventPtr)
2315 return (XML_Index)(parser->m_parseEndByteIndex
2316 - (parser->m_parseEndPtr - parser->m_eventPtr));
2317 return -1;
2318 }
2319
2320 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2321 XML_GetCurrentByteCount(XML_Parser parser) {
2322 if (parser == NULL)
2323 return 0;
2324 if (parser->m_eventEndPtr && parser->m_eventPtr)
2325 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2326 return 0;
2327 }
2328
2329 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2330 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2331 #if XML_CONTEXT_BYTES > 0
2332 if (parser == NULL)
2333 return NULL;
2334 if (parser->m_eventPtr && parser->m_buffer) {
2335 if (offset != NULL)
2336 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2337 if (size != NULL)
2338 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2339 return parser->m_buffer;
2340 }
2341 #else
2342 (void)parser;
2343 (void)offset;
2344 (void)size;
2345 #endif /* XML_CONTEXT_BYTES > 0 */
2346 return (const char *)0;
2347 }
2348
2349 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2350 XML_GetCurrentLineNumber(XML_Parser parser) {
2351 if (parser == NULL)
2352 return 0;
2353 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2354 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2355 parser->m_eventPtr, &parser->m_position);
2356 parser->m_positionPtr = parser->m_eventPtr;
2357 }
2358 return parser->m_position.lineNumber + 1;
2359 }
2360
2361 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2362 XML_GetCurrentColumnNumber(XML_Parser parser) {
2363 if (parser == NULL)
2364 return 0;
2365 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2366 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2367 parser->m_eventPtr, &parser->m_position);
2368 parser->m_positionPtr = parser->m_eventPtr;
2369 }
2370 return parser->m_position.columnNumber;
2371 }
2372
2373 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2374 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2375 if (parser != NULL)
2376 FREE(parser, model);
2377 }
2378
2379 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2380 XML_MemMalloc(XML_Parser parser, size_t size) {
2381 if (parser == NULL)
2382 return NULL;
2383 return MALLOC(parser, size);
2384 }
2385
2386 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2387 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2388 if (parser == NULL)
2389 return NULL;
2390 return REALLOC(parser, ptr, size);
2391 }
2392
2393 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2394 XML_MemFree(XML_Parser parser, void *ptr) {
2395 if (parser != NULL)
2396 FREE(parser, ptr);
2397 }
2398
2399 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2400 XML_DefaultCurrent(XML_Parser parser) {
2401 if (parser == NULL)
2402 return;
2403 if (parser->m_defaultHandler) {
2404 if (parser->m_openInternalEntities)
2405 reportDefault(parser, parser->m_internalEncoding,
2406 parser->m_openInternalEntities->internalEventPtr,
2407 parser->m_openInternalEntities->internalEventEndPtr);
2408 else
2409 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2410 parser->m_eventEndPtr);
2411 }
2412 }
2413
2414 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2415 XML_ErrorString(enum XML_Error code) {
2416 switch (code) {
2417 case XML_ERROR_NONE:
2418 return NULL;
2419 case XML_ERROR_NO_MEMORY:
2420 return XML_L("out of memory");
2421 case XML_ERROR_SYNTAX:
2422 return XML_L("syntax error");
2423 case XML_ERROR_NO_ELEMENTS:
2424 return XML_L("no element found");
2425 case XML_ERROR_INVALID_TOKEN:
2426 return XML_L("not well-formed (invalid token)");
2427 case XML_ERROR_UNCLOSED_TOKEN:
2428 return XML_L("unclosed token");
2429 case XML_ERROR_PARTIAL_CHAR:
2430 return XML_L("partial character");
2431 case XML_ERROR_TAG_MISMATCH:
2432 return XML_L("mismatched tag");
2433 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2434 return XML_L("duplicate attribute");
2435 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2436 return XML_L("junk after document element");
2437 case XML_ERROR_PARAM_ENTITY_REF:
2438 return XML_L("illegal parameter entity reference");
2439 case XML_ERROR_UNDEFINED_ENTITY:
2440 return XML_L("undefined entity");
2441 case XML_ERROR_RECURSIVE_ENTITY_REF:
2442 return XML_L("recursive entity reference");
2443 case XML_ERROR_ASYNC_ENTITY:
2444 return XML_L("asynchronous entity");
2445 case XML_ERROR_BAD_CHAR_REF:
2446 return XML_L("reference to invalid character number");
2447 case XML_ERROR_BINARY_ENTITY_REF:
2448 return XML_L("reference to binary entity");
2449 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2450 return XML_L("reference to external entity in attribute");
2451 case XML_ERROR_MISPLACED_XML_PI:
2452 return XML_L("XML or text declaration not at start of entity");
2453 case XML_ERROR_UNKNOWN_ENCODING:
2454 return XML_L("unknown encoding");
2455 case XML_ERROR_INCORRECT_ENCODING:
2456 return XML_L("encoding specified in XML declaration is incorrect");
2457 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2458 return XML_L("unclosed CDATA section");
2459 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2460 return XML_L("error in processing external entity reference");
2461 case XML_ERROR_NOT_STANDALONE:
2462 return XML_L("document is not standalone");
2463 case XML_ERROR_UNEXPECTED_STATE:
2464 return XML_L("unexpected parser state - please send a bug report");
2465 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2466 return XML_L("entity declared in parameter entity");
2467 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2468 return XML_L("requested feature requires XML_DTD support in Expat");
2469 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2470 return XML_L("cannot change setting once parsing has begun");
2471 /* Added in 1.95.7. */
2472 case XML_ERROR_UNBOUND_PREFIX:
2473 return XML_L("unbound prefix");
2474 /* Added in 1.95.8. */
2475 case XML_ERROR_UNDECLARING_PREFIX:
2476 return XML_L("must not undeclare prefix");
2477 case XML_ERROR_INCOMPLETE_PE:
2478 return XML_L("incomplete markup in parameter entity");
2479 case XML_ERROR_XML_DECL:
2480 return XML_L("XML declaration not well-formed");
2481 case XML_ERROR_TEXT_DECL:
2482 return XML_L("text declaration not well-formed");
2483 case XML_ERROR_PUBLICID:
2484 return XML_L("illegal character(s) in public id");
2485 case XML_ERROR_SUSPENDED:
2486 return XML_L("parser suspended");
2487 case XML_ERROR_NOT_SUSPENDED:
2488 return XML_L("parser not suspended");
2489 case XML_ERROR_ABORTED:
2490 return XML_L("parsing aborted");
2491 case XML_ERROR_FINISHED:
2492 return XML_L("parsing finished");
2493 case XML_ERROR_SUSPEND_PE:
2494 return XML_L("cannot suspend in external parameter entity");
2495 /* Added in 2.0.0. */
2496 case XML_ERROR_RESERVED_PREFIX_XML:
2497 return XML_L(
2498 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2499 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2500 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2501 case XML_ERROR_RESERVED_NAMESPACE_URI:
2502 return XML_L(
2503 "prefix must not be bound to one of the reserved namespace names");
2504 /* Added in 2.2.5. */
2505 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2506 return XML_L("invalid argument");
2507 /* Added in 2.3.0. */
2508 case XML_ERROR_NO_BUFFER:
2509 return XML_L(
2510 "a successful prior call to function XML_GetBuffer is required");
2511 /* Added in 2.4.0. */
2512 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2513 return XML_L(
2514 "limit on input amplification factor (from DTD and entities) breached");
2515 }
2516 return NULL;
2517 }
2518
2519 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2520 XML_ExpatVersion(void) {
2521 /* V1 is used to string-ize the version number. However, it would
2522 string-ize the actual version macro *names* unless we get them
2523 substituted before being passed to V1. CPP is defined to expand
2524 a macro, then rescan for more expansions. Thus, we use V2 to expand
2525 the version macros, then CPP will expand the resulting V1() macro
2526 with the correct numerals. */
2527 /* ### I'm assuming cpp is portable in this respect... */
2528
2529 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2530 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2531
2532 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2533
2534 #undef V1
2535 #undef V2
2536 }
2537
2538 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2539 XML_ExpatVersionInfo(void) {
2540 XML_Expat_Version version;
2541
2542 version.major = XML_MAJOR_VERSION;
2543 version.minor = XML_MINOR_VERSION;
2544 version.micro = XML_MICRO_VERSION;
2545
2546 return version;
2547 }
2548
2549 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2550 XML_GetFeatureList(void) {
2551 static const XML_Feature features[] = {
2552 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2553 sizeof(XML_Char)},
2554 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2555 sizeof(XML_LChar)},
2556 #ifdef XML_UNICODE
2557 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2558 #endif
2559 #ifdef XML_UNICODE_WCHAR_T
2560 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2561 #endif
2562 #ifdef XML_DTD
2563 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2564 #endif
2565 #if XML_CONTEXT_BYTES > 0
2566 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2567 XML_CONTEXT_BYTES},
2568 #endif
2569 #ifdef XML_MIN_SIZE
2570 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2571 #endif
2572 #ifdef XML_NS
2573 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2574 #endif
2575 #ifdef XML_LARGE_SIZE
2576 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2577 #endif
2578 #ifdef XML_ATTR_INFO
2579 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2580 #endif
2581 #if XML_GE == 1
2582 /* Added in Expat 2.4.0 for XML_DTD defined and
2583 * added in Expat 2.6.0 for XML_GE == 1. */
2584 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2585 XML_L("XML_BLAP_MAX_AMP"),
2586 (long int)
2587 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2588 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2589 XML_L("XML_BLAP_ACT_THRES"),
2590 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2591 /* Added in Expat 2.6.0. */
2592 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2593 #endif
2594 {XML_FEATURE_END, NULL, 0}};
2595
2596 return features;
2597 }
2598
2599 #if XML_GE == 1
2600 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2601 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2602 XML_Parser parser, float maximumAmplificationFactor) {
2603 if ((parser == NULL) || (parser->m_parentParser != NULL)
2604 || isnan(maximumAmplificationFactor)
2605 || (maximumAmplificationFactor < 1.0f)) {
2606 return XML_FALSE;
2607 }
2608 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2609 return XML_TRUE;
2610 }
2611
2612 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2613 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2614 XML_Parser parser, unsigned long long activationThresholdBytes) {
2615 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2616 return XML_FALSE;
2617 }
2618 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2619 return XML_TRUE;
2620 }
2621 #endif /* XML_GE == 1 */
2622
2623 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2624 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2625 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2626 parser->m_reparseDeferralEnabled = enabled;
2627 return XML_TRUE;
2628 }
2629 return XML_FALSE;
2630 }
2631
2632 /* Initially tag->rawName always points into the parse buffer;
2633 for those TAG instances opened while the current parse buffer was
2634 processed, and not yet closed, we need to store tag->rawName in a more
2635 permanent location, since the parse buffer is about to be discarded.
2636 */
2637 static XML_Bool
storeRawNames(XML_Parser parser)2638 storeRawNames(XML_Parser parser) {
2639 TAG *tag = parser->m_tagStack;
2640 while (tag) {
2641 int bufSize;
2642 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2643 size_t rawNameLen;
2644 char *rawNameBuf = tag->buf + nameLen;
2645 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2646 at the first entry that has already been copied; everything
2647 below it in the stack is already been accounted for in a
2648 previous call to this function.
2649 */
2650 if (tag->rawName == rawNameBuf)
2651 break;
2652 /* For reuse purposes we need to ensure that the
2653 size of tag->buf is a multiple of sizeof(XML_Char).
2654 */
2655 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2656 /* Detect and prevent integer overflow. */
2657 if (rawNameLen > (size_t)INT_MAX - nameLen)
2658 return XML_FALSE;
2659 bufSize = nameLen + (int)rawNameLen;
2660 if (bufSize > tag->bufEnd - tag->buf) {
2661 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2662 if (temp == NULL)
2663 return XML_FALSE;
2664 /* if tag->name.str points to tag->buf (only when namespace
2665 processing is off) then we have to update it
2666 */
2667 if (tag->name.str == (XML_Char *)tag->buf)
2668 tag->name.str = (XML_Char *)temp;
2669 /* if tag->name.localPart is set (when namespace processing is on)
2670 then update it as well, since it will always point into tag->buf
2671 */
2672 if (tag->name.localPart)
2673 tag->name.localPart
2674 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2675 tag->buf = temp;
2676 tag->bufEnd = temp + bufSize;
2677 rawNameBuf = temp + nameLen;
2678 }
2679 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2680 tag->rawName = rawNameBuf;
2681 tag = tag->parent;
2682 }
2683 return XML_TRUE;
2684 }
2685
2686 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2687 contentProcessor(XML_Parser parser, const char *start, const char *end,
2688 const char **endPtr) {
2689 enum XML_Error result = doContent(
2690 parser, 0, parser->m_encoding, start, end, endPtr,
2691 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2692 if (result == XML_ERROR_NONE) {
2693 if (! storeRawNames(parser))
2694 return XML_ERROR_NO_MEMORY;
2695 }
2696 return result;
2697 }
2698
2699 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2700 externalEntityInitProcessor(XML_Parser parser, const char *start,
2701 const char *end, const char **endPtr) {
2702 enum XML_Error result = initializeEncoding(parser);
2703 if (result != XML_ERROR_NONE)
2704 return result;
2705 parser->m_processor = externalEntityInitProcessor2;
2706 return externalEntityInitProcessor2(parser, start, end, endPtr);
2707 }
2708
2709 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2710 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2711 const char *end, const char **endPtr) {
2712 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2713 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2714 switch (tok) {
2715 case XML_TOK_BOM:
2716 #if XML_GE == 1
2717 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2718 XML_ACCOUNT_DIRECT)) {
2719 accountingOnAbort(parser);
2720 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2721 }
2722 #endif /* XML_GE == 1 */
2723
2724 /* If we are at the end of the buffer, this would cause the next stage,
2725 i.e. externalEntityInitProcessor3, to pass control directly to
2726 doContent (by detecting XML_TOK_NONE) without processing any xml text
2727 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2728 */
2729 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2730 *endPtr = next;
2731 return XML_ERROR_NONE;
2732 }
2733 start = next;
2734 break;
2735 case XML_TOK_PARTIAL:
2736 if (! parser->m_parsingStatus.finalBuffer) {
2737 *endPtr = start;
2738 return XML_ERROR_NONE;
2739 }
2740 parser->m_eventPtr = start;
2741 return XML_ERROR_UNCLOSED_TOKEN;
2742 case XML_TOK_PARTIAL_CHAR:
2743 if (! parser->m_parsingStatus.finalBuffer) {
2744 *endPtr = start;
2745 return XML_ERROR_NONE;
2746 }
2747 parser->m_eventPtr = start;
2748 return XML_ERROR_PARTIAL_CHAR;
2749 }
2750 parser->m_processor = externalEntityInitProcessor3;
2751 return externalEntityInitProcessor3(parser, start, end, endPtr);
2752 }
2753
2754 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2755 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2756 const char *end, const char **endPtr) {
2757 int tok;
2758 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2759 parser->m_eventPtr = start;
2760 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2761 /* Note: These bytes are accounted later in:
2762 - processXmlDecl
2763 - externalEntityContentProcessor
2764 */
2765 parser->m_eventEndPtr = next;
2766
2767 switch (tok) {
2768 case XML_TOK_XML_DECL: {
2769 enum XML_Error result;
2770 result = processXmlDecl(parser, 1, start, next);
2771 if (result != XML_ERROR_NONE)
2772 return result;
2773 switch (parser->m_parsingStatus.parsing) {
2774 case XML_SUSPENDED:
2775 *endPtr = next;
2776 return XML_ERROR_NONE;
2777 case XML_FINISHED:
2778 return XML_ERROR_ABORTED;
2779 default:
2780 start = next;
2781 }
2782 } break;
2783 case XML_TOK_PARTIAL:
2784 if (! parser->m_parsingStatus.finalBuffer) {
2785 *endPtr = start;
2786 return XML_ERROR_NONE;
2787 }
2788 return XML_ERROR_UNCLOSED_TOKEN;
2789 case XML_TOK_PARTIAL_CHAR:
2790 if (! parser->m_parsingStatus.finalBuffer) {
2791 *endPtr = start;
2792 return XML_ERROR_NONE;
2793 }
2794 return XML_ERROR_PARTIAL_CHAR;
2795 }
2796 parser->m_processor = externalEntityContentProcessor;
2797 parser->m_tagLevel = 1;
2798 return externalEntityContentProcessor(parser, start, end, endPtr);
2799 }
2800
2801 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2802 externalEntityContentProcessor(XML_Parser parser, const char *start,
2803 const char *end, const char **endPtr) {
2804 enum XML_Error result
2805 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2806 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2807 XML_ACCOUNT_ENTITY_EXPANSION);
2808 if (result == XML_ERROR_NONE) {
2809 if (! storeRawNames(parser))
2810 return XML_ERROR_NO_MEMORY;
2811 }
2812 return result;
2813 }
2814
2815 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2816 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2817 const char *s, const char *end, const char **nextPtr,
2818 XML_Bool haveMore, enum XML_Account account) {
2819 /* save one level of indirection */
2820 DTD *const dtd = parser->m_dtd;
2821
2822 const char **eventPP;
2823 const char **eventEndPP;
2824 if (enc == parser->m_encoding) {
2825 eventPP = &parser->m_eventPtr;
2826 eventEndPP = &parser->m_eventEndPtr;
2827 } else {
2828 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2829 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2830 }
2831 *eventPP = s;
2832
2833 for (;;) {
2834 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2835 int tok = XmlContentTok(enc, s, end, &next);
2836 #if XML_GE == 1
2837 const char *accountAfter
2838 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2839 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2840 : next;
2841 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2842 account)) {
2843 accountingOnAbort(parser);
2844 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2845 }
2846 #endif
2847 *eventEndPP = next;
2848 switch (tok) {
2849 case XML_TOK_TRAILING_CR:
2850 if (haveMore) {
2851 *nextPtr = s;
2852 return XML_ERROR_NONE;
2853 }
2854 *eventEndPP = end;
2855 if (parser->m_characterDataHandler) {
2856 XML_Char c = 0xA;
2857 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2858 } else if (parser->m_defaultHandler)
2859 reportDefault(parser, enc, s, end);
2860 /* We are at the end of the final buffer, should we check for
2861 XML_SUSPENDED, XML_FINISHED?
2862 */
2863 if (startTagLevel == 0)
2864 return XML_ERROR_NO_ELEMENTS;
2865 if (parser->m_tagLevel != startTagLevel)
2866 return XML_ERROR_ASYNC_ENTITY;
2867 *nextPtr = end;
2868 return XML_ERROR_NONE;
2869 case XML_TOK_NONE:
2870 if (haveMore) {
2871 *nextPtr = s;
2872 return XML_ERROR_NONE;
2873 }
2874 if (startTagLevel > 0) {
2875 if (parser->m_tagLevel != startTagLevel)
2876 return XML_ERROR_ASYNC_ENTITY;
2877 *nextPtr = s;
2878 return XML_ERROR_NONE;
2879 }
2880 return XML_ERROR_NO_ELEMENTS;
2881 case XML_TOK_INVALID:
2882 *eventPP = next;
2883 return XML_ERROR_INVALID_TOKEN;
2884 case XML_TOK_PARTIAL:
2885 if (haveMore) {
2886 *nextPtr = s;
2887 return XML_ERROR_NONE;
2888 }
2889 return XML_ERROR_UNCLOSED_TOKEN;
2890 case XML_TOK_PARTIAL_CHAR:
2891 if (haveMore) {
2892 *nextPtr = s;
2893 return XML_ERROR_NONE;
2894 }
2895 return XML_ERROR_PARTIAL_CHAR;
2896 case XML_TOK_ENTITY_REF: {
2897 const XML_Char *name;
2898 ENTITY *entity;
2899 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2900 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2901 if (ch) {
2902 #if XML_GE == 1
2903 /* NOTE: We are replacing 4-6 characters original input for 1 character
2904 * so there is no amplification and hence recording without
2905 * protection. */
2906 accountingDiffTolerated(parser, tok, (char *)&ch,
2907 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2908 XML_ACCOUNT_ENTITY_EXPANSION);
2909 #endif /* XML_GE == 1 */
2910 if (parser->m_characterDataHandler)
2911 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2912 else if (parser->m_defaultHandler)
2913 reportDefault(parser, enc, s, next);
2914 break;
2915 }
2916 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2917 next - enc->minBytesPerChar);
2918 if (! name)
2919 return XML_ERROR_NO_MEMORY;
2920 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2921 poolDiscard(&dtd->pool);
2922 /* First, determine if a check for an existing declaration is needed;
2923 if yes, check that the entity exists, and that it is internal,
2924 otherwise call the skipped entity or default handler.
2925 */
2926 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2927 if (! entity)
2928 return XML_ERROR_UNDEFINED_ENTITY;
2929 else if (! entity->is_internal)
2930 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2931 } else if (! entity) {
2932 if (parser->m_skippedEntityHandler)
2933 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2934 else if (parser->m_defaultHandler)
2935 reportDefault(parser, enc, s, next);
2936 break;
2937 }
2938 if (entity->open)
2939 return XML_ERROR_RECURSIVE_ENTITY_REF;
2940 if (entity->notation)
2941 return XML_ERROR_BINARY_ENTITY_REF;
2942 if (entity->textPtr) {
2943 enum XML_Error result;
2944 if (! parser->m_defaultExpandInternalEntities) {
2945 if (parser->m_skippedEntityHandler)
2946 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2947 0);
2948 else if (parser->m_defaultHandler)
2949 reportDefault(parser, enc, s, next);
2950 break;
2951 }
2952 result = processInternalEntity(parser, entity, XML_FALSE);
2953 if (result != XML_ERROR_NONE)
2954 return result;
2955 } else if (parser->m_externalEntityRefHandler) {
2956 const XML_Char *context;
2957 entity->open = XML_TRUE;
2958 context = getContext(parser);
2959 entity->open = XML_FALSE;
2960 if (! context)
2961 return XML_ERROR_NO_MEMORY;
2962 if (! parser->m_externalEntityRefHandler(
2963 parser->m_externalEntityRefHandlerArg, context, entity->base,
2964 entity->systemId, entity->publicId))
2965 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2966 poolDiscard(&parser->m_tempPool);
2967 } else if (parser->m_defaultHandler)
2968 reportDefault(parser, enc, s, next);
2969 break;
2970 }
2971 case XML_TOK_START_TAG_NO_ATTS:
2972 /* fall through */
2973 case XML_TOK_START_TAG_WITH_ATTS: {
2974 TAG *tag;
2975 enum XML_Error result;
2976 XML_Char *toPtr;
2977 if (parser->m_freeTagList) {
2978 tag = parser->m_freeTagList;
2979 parser->m_freeTagList = parser->m_freeTagList->parent;
2980 } else {
2981 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2982 if (! tag)
2983 return XML_ERROR_NO_MEMORY;
2984 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2985 if (! tag->buf) {
2986 FREE(parser, tag);
2987 return XML_ERROR_NO_MEMORY;
2988 }
2989 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2990 }
2991 tag->bindings = NULL;
2992 tag->parent = parser->m_tagStack;
2993 parser->m_tagStack = tag;
2994 tag->name.localPart = NULL;
2995 tag->name.prefix = NULL;
2996 tag->rawName = s + enc->minBytesPerChar;
2997 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2998 ++parser->m_tagLevel;
2999 {
3000 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3001 const char *fromPtr = tag->rawName;
3002 toPtr = (XML_Char *)tag->buf;
3003 for (;;) {
3004 int bufSize;
3005 int convLen;
3006 const enum XML_Convert_Result convert_res
3007 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3008 (ICHAR *)tag->bufEnd - 1);
3009 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3010 if ((fromPtr >= rawNameEnd)
3011 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3012 tag->name.strLen = convLen;
3013 break;
3014 }
3015 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3016 {
3017 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3018 if (temp == NULL)
3019 return XML_ERROR_NO_MEMORY;
3020 tag->buf = temp;
3021 tag->bufEnd = temp + bufSize;
3022 toPtr = (XML_Char *)temp + convLen;
3023 }
3024 }
3025 }
3026 tag->name.str = (XML_Char *)tag->buf;
3027 *toPtr = XML_T('\0');
3028 result
3029 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3030 if (result)
3031 return result;
3032 if (parser->m_startElementHandler)
3033 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3034 (const XML_Char **)parser->m_atts);
3035 else if (parser->m_defaultHandler)
3036 reportDefault(parser, enc, s, next);
3037 poolClear(&parser->m_tempPool);
3038 break;
3039 }
3040 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3041 /* fall through */
3042 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3043 const char *rawName = s + enc->minBytesPerChar;
3044 enum XML_Error result;
3045 BINDING *bindings = NULL;
3046 XML_Bool noElmHandlers = XML_TRUE;
3047 TAG_NAME name;
3048 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3049 rawName + XmlNameLength(enc, rawName));
3050 if (! name.str)
3051 return XML_ERROR_NO_MEMORY;
3052 poolFinish(&parser->m_tempPool);
3053 result = storeAtts(parser, enc, s, &name, &bindings,
3054 XML_ACCOUNT_NONE /* token spans whole start tag */);
3055 if (result != XML_ERROR_NONE) {
3056 freeBindings(parser, bindings);
3057 return result;
3058 }
3059 poolFinish(&parser->m_tempPool);
3060 if (parser->m_startElementHandler) {
3061 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3062 (const XML_Char **)parser->m_atts);
3063 noElmHandlers = XML_FALSE;
3064 }
3065 if (parser->m_endElementHandler) {
3066 if (parser->m_startElementHandler)
3067 *eventPP = *eventEndPP;
3068 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3069 noElmHandlers = XML_FALSE;
3070 }
3071 if (noElmHandlers && parser->m_defaultHandler)
3072 reportDefault(parser, enc, s, next);
3073 poolClear(&parser->m_tempPool);
3074 freeBindings(parser, bindings);
3075 }
3076 if ((parser->m_tagLevel == 0)
3077 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3078 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3079 parser->m_processor = epilogProcessor;
3080 else
3081 return epilogProcessor(parser, next, end, nextPtr);
3082 }
3083 break;
3084 case XML_TOK_END_TAG:
3085 if (parser->m_tagLevel == startTagLevel)
3086 return XML_ERROR_ASYNC_ENTITY;
3087 else {
3088 int len;
3089 const char *rawName;
3090 TAG *tag = parser->m_tagStack;
3091 rawName = s + enc->minBytesPerChar * 2;
3092 len = XmlNameLength(enc, rawName);
3093 if (len != tag->rawNameLength
3094 || memcmp(tag->rawName, rawName, len) != 0) {
3095 *eventPP = rawName;
3096 return XML_ERROR_TAG_MISMATCH;
3097 }
3098 parser->m_tagStack = tag->parent;
3099 tag->parent = parser->m_freeTagList;
3100 parser->m_freeTagList = tag;
3101 --parser->m_tagLevel;
3102 if (parser->m_endElementHandler) {
3103 const XML_Char *localPart;
3104 const XML_Char *prefix;
3105 XML_Char *uri;
3106 localPart = tag->name.localPart;
3107 if (parser->m_ns && localPart) {
3108 /* localPart and prefix may have been overwritten in
3109 tag->name.str, since this points to the binding->uri
3110 buffer which gets reused; so we have to add them again
3111 */
3112 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3113 /* don't need to check for space - already done in storeAtts() */
3114 while (*localPart)
3115 *uri++ = *localPart++;
3116 prefix = tag->name.prefix;
3117 if (parser->m_ns_triplets && prefix) {
3118 *uri++ = parser->m_namespaceSeparator;
3119 while (*prefix)
3120 *uri++ = *prefix++;
3121 }
3122 *uri = XML_T('\0');
3123 }
3124 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3125 } else if (parser->m_defaultHandler)
3126 reportDefault(parser, enc, s, next);
3127 while (tag->bindings) {
3128 BINDING *b = tag->bindings;
3129 if (parser->m_endNamespaceDeclHandler)
3130 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3131 b->prefix->name);
3132 tag->bindings = tag->bindings->nextTagBinding;
3133 b->nextTagBinding = parser->m_freeBindingList;
3134 parser->m_freeBindingList = b;
3135 b->prefix->binding = b->prevPrefixBinding;
3136 }
3137 if ((parser->m_tagLevel == 0)
3138 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3139 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3140 parser->m_processor = epilogProcessor;
3141 else
3142 return epilogProcessor(parser, next, end, nextPtr);
3143 }
3144 }
3145 break;
3146 case XML_TOK_CHAR_REF: {
3147 int n = XmlCharRefNumber(enc, s);
3148 if (n < 0)
3149 return XML_ERROR_BAD_CHAR_REF;
3150 if (parser->m_characterDataHandler) {
3151 XML_Char buf[XML_ENCODE_MAX];
3152 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3153 XmlEncode(n, (ICHAR *)buf));
3154 } else if (parser->m_defaultHandler)
3155 reportDefault(parser, enc, s, next);
3156 } break;
3157 case XML_TOK_XML_DECL:
3158 return XML_ERROR_MISPLACED_XML_PI;
3159 case XML_TOK_DATA_NEWLINE:
3160 if (parser->m_characterDataHandler) {
3161 XML_Char c = 0xA;
3162 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3163 } else if (parser->m_defaultHandler)
3164 reportDefault(parser, enc, s, next);
3165 break;
3166 case XML_TOK_CDATA_SECT_OPEN: {
3167 enum XML_Error result;
3168 if (parser->m_startCdataSectionHandler)
3169 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3170 /* BEGIN disabled code */
3171 /* Suppose you doing a transformation on a document that involves
3172 changing only the character data. You set up a defaultHandler
3173 and a characterDataHandler. The defaultHandler simply copies
3174 characters through. The characterDataHandler does the
3175 transformation and writes the characters out escaping them as
3176 necessary. This case will fail to work if we leave out the
3177 following two lines (because & and < inside CDATA sections will
3178 be incorrectly escaped).
3179
3180 However, now we have a start/endCdataSectionHandler, so it seems
3181 easier to let the user deal with this.
3182 */
3183 else if ((0) && parser->m_characterDataHandler)
3184 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3185 0);
3186 /* END disabled code */
3187 else if (parser->m_defaultHandler)
3188 reportDefault(parser, enc, s, next);
3189 result
3190 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3191 if (result != XML_ERROR_NONE)
3192 return result;
3193 else if (! next) {
3194 parser->m_processor = cdataSectionProcessor;
3195 return result;
3196 }
3197 } break;
3198 case XML_TOK_TRAILING_RSQB:
3199 if (haveMore) {
3200 *nextPtr = s;
3201 return XML_ERROR_NONE;
3202 }
3203 if (parser->m_characterDataHandler) {
3204 if (MUST_CONVERT(enc, s)) {
3205 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3206 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3207 parser->m_characterDataHandler(
3208 parser->m_handlerArg, parser->m_dataBuf,
3209 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3210 } else
3211 parser->m_characterDataHandler(
3212 parser->m_handlerArg, (const XML_Char *)s,
3213 (int)((const XML_Char *)end - (const XML_Char *)s));
3214 } else if (parser->m_defaultHandler)
3215 reportDefault(parser, enc, s, end);
3216 /* We are at the end of the final buffer, should we check for
3217 XML_SUSPENDED, XML_FINISHED?
3218 */
3219 if (startTagLevel == 0) {
3220 *eventPP = end;
3221 return XML_ERROR_NO_ELEMENTS;
3222 }
3223 if (parser->m_tagLevel != startTagLevel) {
3224 *eventPP = end;
3225 return XML_ERROR_ASYNC_ENTITY;
3226 }
3227 *nextPtr = end;
3228 return XML_ERROR_NONE;
3229 case XML_TOK_DATA_CHARS: {
3230 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3231 if (charDataHandler) {
3232 if (MUST_CONVERT(enc, s)) {
3233 for (;;) {
3234 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3235 const enum XML_Convert_Result convert_res = XmlConvert(
3236 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3237 *eventEndPP = s;
3238 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3239 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3240 if ((convert_res == XML_CONVERT_COMPLETED)
3241 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3242 break;
3243 *eventPP = s;
3244 }
3245 } else
3246 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3247 (int)((const XML_Char *)next - (const XML_Char *)s));
3248 } else if (parser->m_defaultHandler)
3249 reportDefault(parser, enc, s, next);
3250 } break;
3251 case XML_TOK_PI:
3252 if (! reportProcessingInstruction(parser, enc, s, next))
3253 return XML_ERROR_NO_MEMORY;
3254 break;
3255 case XML_TOK_COMMENT:
3256 if (! reportComment(parser, enc, s, next))
3257 return XML_ERROR_NO_MEMORY;
3258 break;
3259 default:
3260 /* All of the tokens produced by XmlContentTok() have their own
3261 * explicit cases, so this default is not strictly necessary.
3262 * However it is a useful safety net, so we retain the code and
3263 * simply exclude it from the coverage tests.
3264 *
3265 * LCOV_EXCL_START
3266 */
3267 if (parser->m_defaultHandler)
3268 reportDefault(parser, enc, s, next);
3269 break;
3270 /* LCOV_EXCL_STOP */
3271 }
3272 *eventPP = s = next;
3273 switch (parser->m_parsingStatus.parsing) {
3274 case XML_SUSPENDED:
3275 *nextPtr = next;
3276 return XML_ERROR_NONE;
3277 case XML_FINISHED:
3278 return XML_ERROR_ABORTED;
3279 default:;
3280 }
3281 }
3282 /* not reached */
3283 }
3284
3285 /* This function does not call free() on the allocated memory, merely
3286 * moving it to the parser's m_freeBindingList where it can be freed or
3287 * reused as appropriate.
3288 */
3289 static void
freeBindings(XML_Parser parser,BINDING * bindings)3290 freeBindings(XML_Parser parser, BINDING *bindings) {
3291 while (bindings) {
3292 BINDING *b = bindings;
3293
3294 /* m_startNamespaceDeclHandler will have been called for this
3295 * binding in addBindings(), so call the end handler now.
3296 */
3297 if (parser->m_endNamespaceDeclHandler)
3298 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3299
3300 bindings = bindings->nextTagBinding;
3301 b->nextTagBinding = parser->m_freeBindingList;
3302 parser->m_freeBindingList = b;
3303 b->prefix->binding = b->prevPrefixBinding;
3304 }
3305 }
3306
3307 /* Precondition: all arguments must be non-NULL;
3308 Purpose:
3309 - normalize attributes
3310 - check attributes for well-formedness
3311 - generate namespace aware attribute names (URI, prefix)
3312 - build list of attributes for startElementHandler
3313 - default attributes
3314 - process namespace declarations (check and report them)
3315 - generate namespace aware element name (URI, prefix)
3316 */
3317 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3318 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3319 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3320 enum XML_Account account) {
3321 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3322 ELEMENT_TYPE *elementType;
3323 int nDefaultAtts;
3324 const XML_Char **appAtts; /* the attribute list for the application */
3325 int attIndex = 0;
3326 int prefixLen;
3327 int i;
3328 int n;
3329 XML_Char *uri;
3330 int nPrefixes = 0;
3331 BINDING *binding;
3332 const XML_Char *localPart;
3333
3334 /* lookup the element type name */
3335 elementType
3336 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3337 if (! elementType) {
3338 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3339 if (! name)
3340 return XML_ERROR_NO_MEMORY;
3341 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3342 sizeof(ELEMENT_TYPE));
3343 if (! elementType)
3344 return XML_ERROR_NO_MEMORY;
3345 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3346 return XML_ERROR_NO_MEMORY;
3347 }
3348 nDefaultAtts = elementType->nDefaultAtts;
3349
3350 /* get the attributes from the tokenizer */
3351 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3352
3353 /* Detect and prevent integer overflow */
3354 if (n > INT_MAX - nDefaultAtts) {
3355 return XML_ERROR_NO_MEMORY;
3356 }
3357
3358 if (n + nDefaultAtts > parser->m_attsSize) {
3359 int oldAttsSize = parser->m_attsSize;
3360 ATTRIBUTE *temp;
3361 #ifdef XML_ATTR_INFO
3362 XML_AttrInfo *temp2;
3363 #endif
3364
3365 /* Detect and prevent integer overflow */
3366 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3367 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3368 return XML_ERROR_NO_MEMORY;
3369 }
3370
3371 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3372
3373 /* Detect and prevent integer overflow.
3374 * The preprocessor guard addresses the "always false" warning
3375 * from -Wtype-limits on platforms where
3376 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3377 #if UINT_MAX >= SIZE_MAX
3378 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3379 parser->m_attsSize = oldAttsSize;
3380 return XML_ERROR_NO_MEMORY;
3381 }
3382 #endif
3383
3384 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3385 parser->m_attsSize * sizeof(ATTRIBUTE));
3386 if (temp == NULL) {
3387 parser->m_attsSize = oldAttsSize;
3388 return XML_ERROR_NO_MEMORY;
3389 }
3390 parser->m_atts = temp;
3391 #ifdef XML_ATTR_INFO
3392 /* Detect and prevent integer overflow.
3393 * The preprocessor guard addresses the "always false" warning
3394 * from -Wtype-limits on platforms where
3395 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3396 # if UINT_MAX >= SIZE_MAX
3397 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3398 parser->m_attsSize = oldAttsSize;
3399 return XML_ERROR_NO_MEMORY;
3400 }
3401 # endif
3402
3403 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3404 parser->m_attsSize * sizeof(XML_AttrInfo));
3405 if (temp2 == NULL) {
3406 parser->m_attsSize = oldAttsSize;
3407 return XML_ERROR_NO_MEMORY;
3408 }
3409 parser->m_attInfo = temp2;
3410 #endif
3411 if (n > oldAttsSize)
3412 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3413 }
3414
3415 appAtts = (const XML_Char **)parser->m_atts;
3416 for (i = 0; i < n; i++) {
3417 ATTRIBUTE *currAtt = &parser->m_atts[i];
3418 #ifdef XML_ATTR_INFO
3419 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3420 #endif
3421 /* add the name and value to the attribute list */
3422 ATTRIBUTE_ID *attId
3423 = getAttributeId(parser, enc, currAtt->name,
3424 currAtt->name + XmlNameLength(enc, currAtt->name));
3425 if (! attId)
3426 return XML_ERROR_NO_MEMORY;
3427 #ifdef XML_ATTR_INFO
3428 currAttInfo->nameStart
3429 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3430 currAttInfo->nameEnd
3431 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3432 currAttInfo->valueStart = parser->m_parseEndByteIndex
3433 - (parser->m_parseEndPtr - currAtt->valuePtr);
3434 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3435 - (parser->m_parseEndPtr - currAtt->valueEnd);
3436 #endif
3437 /* Detect duplicate attributes by their QNames. This does not work when
3438 namespace processing is turned on and different prefixes for the same
3439 namespace are used. For this case we have a check further down.
3440 */
3441 if ((attId->name)[-1]) {
3442 if (enc == parser->m_encoding)
3443 parser->m_eventPtr = parser->m_atts[i].name;
3444 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3445 }
3446 (attId->name)[-1] = 1;
3447 appAtts[attIndex++] = attId->name;
3448 if (! parser->m_atts[i].normalized) {
3449 enum XML_Error result;
3450 XML_Bool isCdata = XML_TRUE;
3451
3452 /* figure out whether declared as other than CDATA */
3453 if (attId->maybeTokenized) {
3454 int j;
3455 for (j = 0; j < nDefaultAtts; j++) {
3456 if (attId == elementType->defaultAtts[j].id) {
3457 isCdata = elementType->defaultAtts[j].isCdata;
3458 break;
3459 }
3460 }
3461 }
3462
3463 /* normalize the attribute value */
3464 result = storeAttributeValue(
3465 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3466 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3467 if (result)
3468 return result;
3469 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3470 poolFinish(&parser->m_tempPool);
3471 } else {
3472 /* the value did not need normalizing */
3473 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3474 parser->m_atts[i].valuePtr,
3475 parser->m_atts[i].valueEnd);
3476 if (appAtts[attIndex] == 0)
3477 return XML_ERROR_NO_MEMORY;
3478 poolFinish(&parser->m_tempPool);
3479 }
3480 /* handle prefixed attribute names */
3481 if (attId->prefix) {
3482 if (attId->xmlns) {
3483 /* deal with namespace declarations here */
3484 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3485 appAtts[attIndex], bindingsPtr);
3486 if (result)
3487 return result;
3488 --attIndex;
3489 } else {
3490 /* deal with other prefixed names later */
3491 attIndex++;
3492 nPrefixes++;
3493 (attId->name)[-1] = 2;
3494 }
3495 } else
3496 attIndex++;
3497 }
3498
3499 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3500 parser->m_nSpecifiedAtts = attIndex;
3501 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3502 for (i = 0; i < attIndex; i += 2)
3503 if (appAtts[i] == elementType->idAtt->name) {
3504 parser->m_idAttIndex = i;
3505 break;
3506 }
3507 } else
3508 parser->m_idAttIndex = -1;
3509
3510 /* do attribute defaulting */
3511 for (i = 0; i < nDefaultAtts; i++) {
3512 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3513 if (! (da->id->name)[-1] && da->value) {
3514 if (da->id->prefix) {
3515 if (da->id->xmlns) {
3516 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3517 da->value, bindingsPtr);
3518 if (result)
3519 return result;
3520 } else {
3521 (da->id->name)[-1] = 2;
3522 nPrefixes++;
3523 appAtts[attIndex++] = da->id->name;
3524 appAtts[attIndex++] = da->value;
3525 }
3526 } else {
3527 (da->id->name)[-1] = 1;
3528 appAtts[attIndex++] = da->id->name;
3529 appAtts[attIndex++] = da->value;
3530 }
3531 }
3532 }
3533 appAtts[attIndex] = 0;
3534
3535 /* expand prefixed attribute names, check for duplicates,
3536 and clear flags that say whether attributes were specified */
3537 i = 0;
3538 if (nPrefixes) {
3539 int j; /* hash table index */
3540 unsigned long version = parser->m_nsAttsVersion;
3541
3542 /* Detect and prevent invalid shift */
3543 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3544 return XML_ERROR_NO_MEMORY;
3545 }
3546
3547 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3548 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3549 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3550 if ((nPrefixes << 1)
3551 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3552 NS_ATT *temp;
3553 /* hash table size must also be a power of 2 and >= 8 */
3554 while (nPrefixes >> parser->m_nsAttsPower++)
3555 ;
3556 if (parser->m_nsAttsPower < 3)
3557 parser->m_nsAttsPower = 3;
3558
3559 /* Detect and prevent invalid shift */
3560 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3561 /* Restore actual size of memory in m_nsAtts */
3562 parser->m_nsAttsPower = oldNsAttsPower;
3563 return XML_ERROR_NO_MEMORY;
3564 }
3565
3566 nsAttsSize = 1u << parser->m_nsAttsPower;
3567
3568 /* Detect and prevent integer overflow.
3569 * The preprocessor guard addresses the "always false" warning
3570 * from -Wtype-limits on platforms where
3571 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3572 #if UINT_MAX >= SIZE_MAX
3573 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3574 /* Restore actual size of memory in m_nsAtts */
3575 parser->m_nsAttsPower = oldNsAttsPower;
3576 return XML_ERROR_NO_MEMORY;
3577 }
3578 #endif
3579
3580 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3581 nsAttsSize * sizeof(NS_ATT));
3582 if (! temp) {
3583 /* Restore actual size of memory in m_nsAtts */
3584 parser->m_nsAttsPower = oldNsAttsPower;
3585 return XML_ERROR_NO_MEMORY;
3586 }
3587 parser->m_nsAtts = temp;
3588 version = 0; /* force re-initialization of m_nsAtts hash table */
3589 }
3590 /* using a version flag saves us from initializing m_nsAtts every time */
3591 if (! version) { /* initialize version flags when version wraps around */
3592 version = INIT_ATTS_VERSION;
3593 for (j = nsAttsSize; j != 0;)
3594 parser->m_nsAtts[--j].version = version;
3595 }
3596 parser->m_nsAttsVersion = --version;
3597
3598 /* expand prefixed names and check for duplicates */
3599 for (; i < attIndex; i += 2) {
3600 const XML_Char *s = appAtts[i];
3601 if (s[-1] == 2) { /* prefixed */
3602 ATTRIBUTE_ID *id;
3603 const BINDING *b;
3604 unsigned long uriHash;
3605 struct siphash sip_state;
3606 struct sipkey sip_key;
3607
3608 copy_salt_to_sipkey(parser, &sip_key);
3609 sip24_init(&sip_state, &sip_key);
3610
3611 ((XML_Char *)s)[-1] = 0; /* clear flag */
3612 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3613 if (! id || ! id->prefix) {
3614 /* This code is walking through the appAtts array, dealing
3615 * with (in this case) a prefixed attribute name. To be in
3616 * the array, the attribute must have already been bound, so
3617 * has to have passed through the hash table lookup once
3618 * already. That implies that an entry for it already
3619 * exists, so the lookup above will return a pointer to
3620 * already allocated memory. There is no opportunaity for
3621 * the allocator to fail, so the condition above cannot be
3622 * fulfilled.
3623 *
3624 * Since it is difficult to be certain that the above
3625 * analysis is complete, we retain the test and merely
3626 * remove the code from coverage tests.
3627 */
3628 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3629 }
3630 b = id->prefix->binding;
3631 if (! b)
3632 return XML_ERROR_UNBOUND_PREFIX;
3633
3634 for (j = 0; j < b->uriLen; j++) {
3635 const XML_Char c = b->uri[j];
3636 if (! poolAppendChar(&parser->m_tempPool, c))
3637 return XML_ERROR_NO_MEMORY;
3638 }
3639
3640 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3641
3642 while (*s++ != XML_T(ASCII_COLON))
3643 ;
3644
3645 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3646
3647 do { /* copies null terminator */
3648 if (! poolAppendChar(&parser->m_tempPool, *s))
3649 return XML_ERROR_NO_MEMORY;
3650 } while (*s++);
3651
3652 uriHash = (unsigned long)sip24_final(&sip_state);
3653
3654 { /* Check hash table for duplicate of expanded name (uriName).
3655 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3656 */
3657 unsigned char step = 0;
3658 unsigned long mask = nsAttsSize - 1;
3659 j = uriHash & mask; /* index into hash table */
3660 while (parser->m_nsAtts[j].version == version) {
3661 /* for speed we compare stored hash values first */
3662 if (uriHash == parser->m_nsAtts[j].hash) {
3663 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3664 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3665 /* s1 is null terminated, but not s2 */
3666 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3667 ;
3668 if (*s1 == 0)
3669 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3670 }
3671 if (! step)
3672 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3673 j < step ? (j += nsAttsSize - step) : (j -= step);
3674 }
3675 }
3676
3677 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3678 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3679 s = b->prefix->name;
3680 do {
3681 if (! poolAppendChar(&parser->m_tempPool, *s))
3682 return XML_ERROR_NO_MEMORY;
3683 } while (*s++);
3684 }
3685
3686 /* store expanded name in attribute list */
3687 s = poolStart(&parser->m_tempPool);
3688 poolFinish(&parser->m_tempPool);
3689 appAtts[i] = s;
3690
3691 /* fill empty slot with new version, uriName and hash value */
3692 parser->m_nsAtts[j].version = version;
3693 parser->m_nsAtts[j].hash = uriHash;
3694 parser->m_nsAtts[j].uriName = s;
3695
3696 if (! --nPrefixes) {
3697 i += 2;
3698 break;
3699 }
3700 } else /* not prefixed */
3701 ((XML_Char *)s)[-1] = 0; /* clear flag */
3702 }
3703 }
3704 /* clear flags for the remaining attributes */
3705 for (; i < attIndex; i += 2)
3706 ((XML_Char *)(appAtts[i]))[-1] = 0;
3707 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3708 binding->attId->name[-1] = 0;
3709
3710 if (! parser->m_ns)
3711 return XML_ERROR_NONE;
3712
3713 /* expand the element type name */
3714 if (elementType->prefix) {
3715 binding = elementType->prefix->binding;
3716 if (! binding)
3717 return XML_ERROR_UNBOUND_PREFIX;
3718 localPart = tagNamePtr->str;
3719 while (*localPart++ != XML_T(ASCII_COLON))
3720 ;
3721 } else if (dtd->defaultPrefix.binding) {
3722 binding = dtd->defaultPrefix.binding;
3723 localPart = tagNamePtr->str;
3724 } else
3725 return XML_ERROR_NONE;
3726 prefixLen = 0;
3727 if (parser->m_ns_triplets && binding->prefix->name) {
3728 for (; binding->prefix->name[prefixLen++];)
3729 ; /* prefixLen includes null terminator */
3730 }
3731 tagNamePtr->localPart = localPart;
3732 tagNamePtr->uriLen = binding->uriLen;
3733 tagNamePtr->prefix = binding->prefix->name;
3734 tagNamePtr->prefixLen = prefixLen;
3735 for (i = 0; localPart[i++];)
3736 ; /* i includes null terminator */
3737
3738 /* Detect and prevent integer overflow */
3739 if (binding->uriLen > INT_MAX - prefixLen
3740 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3741 return XML_ERROR_NO_MEMORY;
3742 }
3743
3744 n = i + binding->uriLen + prefixLen;
3745 if (n > binding->uriAlloc) {
3746 TAG *p;
3747
3748 /* Detect and prevent integer overflow */
3749 if (n > INT_MAX - EXPAND_SPARE) {
3750 return XML_ERROR_NO_MEMORY;
3751 }
3752 /* Detect and prevent integer overflow.
3753 * The preprocessor guard addresses the "always false" warning
3754 * from -Wtype-limits on platforms where
3755 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3756 #if UINT_MAX >= SIZE_MAX
3757 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3758 return XML_ERROR_NO_MEMORY;
3759 }
3760 #endif
3761
3762 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3763 if (! uri)
3764 return XML_ERROR_NO_MEMORY;
3765 binding->uriAlloc = n + EXPAND_SPARE;
3766 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3767 for (p = parser->m_tagStack; p; p = p->parent)
3768 if (p->name.str == binding->uri)
3769 p->name.str = uri;
3770 FREE(parser, binding->uri);
3771 binding->uri = uri;
3772 }
3773 /* if m_namespaceSeparator != '\0' then uri includes it already */
3774 uri = binding->uri + binding->uriLen;
3775 memcpy(uri, localPart, i * sizeof(XML_Char));
3776 /* we always have a namespace separator between localPart and prefix */
3777 if (prefixLen) {
3778 uri += i - 1;
3779 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3780 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3781 }
3782 tagNamePtr->str = binding->uri;
3783 return XML_ERROR_NONE;
3784 }
3785
3786 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3787 is_rfc3986_uri_char(XML_Char candidate) {
3788 // For the RFC 3986 ANBF grammar see
3789 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3790
3791 switch (candidate) {
3792 // From rule "ALPHA" (uppercase half)
3793 case 'A':
3794 case 'B':
3795 case 'C':
3796 case 'D':
3797 case 'E':
3798 case 'F':
3799 case 'G':
3800 case 'H':
3801 case 'I':
3802 case 'J':
3803 case 'K':
3804 case 'L':
3805 case 'M':
3806 case 'N':
3807 case 'O':
3808 case 'P':
3809 case 'Q':
3810 case 'R':
3811 case 'S':
3812 case 'T':
3813 case 'U':
3814 case 'V':
3815 case 'W':
3816 case 'X':
3817 case 'Y':
3818 case 'Z':
3819
3820 // From rule "ALPHA" (lowercase half)
3821 case 'a':
3822 case 'b':
3823 case 'c':
3824 case 'd':
3825 case 'e':
3826 case 'f':
3827 case 'g':
3828 case 'h':
3829 case 'i':
3830 case 'j':
3831 case 'k':
3832 case 'l':
3833 case 'm':
3834 case 'n':
3835 case 'o':
3836 case 'p':
3837 case 'q':
3838 case 'r':
3839 case 's':
3840 case 't':
3841 case 'u':
3842 case 'v':
3843 case 'w':
3844 case 'x':
3845 case 'y':
3846 case 'z':
3847
3848 // From rule "DIGIT"
3849 case '0':
3850 case '1':
3851 case '2':
3852 case '3':
3853 case '4':
3854 case '5':
3855 case '6':
3856 case '7':
3857 case '8':
3858 case '9':
3859
3860 // From rule "pct-encoded"
3861 case '%':
3862
3863 // From rule "unreserved"
3864 case '-':
3865 case '.':
3866 case '_':
3867 case '~':
3868
3869 // From rule "gen-delims"
3870 case ':':
3871 case '/':
3872 case '?':
3873 case '#':
3874 case '[':
3875 case ']':
3876 case '@':
3877
3878 // From rule "sub-delims"
3879 case '!':
3880 case '$':
3881 case '&':
3882 case '\'':
3883 case '(':
3884 case ')':
3885 case '*':
3886 case '+':
3887 case ',':
3888 case ';':
3889 case '=':
3890 return XML_TRUE;
3891
3892 default:
3893 return XML_FALSE;
3894 }
3895 }
3896
3897 /* addBinding() overwrites the value of prefix->binding without checking.
3898 Therefore one must keep track of the old value outside of addBinding().
3899 */
3900 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3901 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3902 const XML_Char *uri, BINDING **bindingsPtr) {
3903 // "http://www.w3.org/XML/1998/namespace"
3904 static const XML_Char xmlNamespace[]
3905 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3906 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3907 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3908 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3909 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3910 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3911 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3912 ASCII_e, '\0'};
3913 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3914 // "http://www.w3.org/2000/xmlns/"
3915 static const XML_Char xmlnsNamespace[]
3916 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3917 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3918 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3919 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3920 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3921 static const int xmlnsLen
3922 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3923
3924 XML_Bool mustBeXML = XML_FALSE;
3925 XML_Bool isXML = XML_TRUE;
3926 XML_Bool isXMLNS = XML_TRUE;
3927
3928 BINDING *b;
3929 int len;
3930
3931 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3932 if (*uri == XML_T('\0') && prefix->name)
3933 return XML_ERROR_UNDECLARING_PREFIX;
3934
3935 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3936 && prefix->name[1] == XML_T(ASCII_m)
3937 && prefix->name[2] == XML_T(ASCII_l)) {
3938 /* Not allowed to bind xmlns */
3939 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3940 && prefix->name[5] == XML_T('\0'))
3941 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3942
3943 if (prefix->name[3] == XML_T('\0'))
3944 mustBeXML = XML_TRUE;
3945 }
3946
3947 for (len = 0; uri[len]; len++) {
3948 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3949 isXML = XML_FALSE;
3950
3951 if (! mustBeXML && isXMLNS
3952 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3953 isXMLNS = XML_FALSE;
3954
3955 // NOTE: While Expat does not validate namespace URIs against RFC 3986
3956 // today (and is not REQUIRED to do so with regard to the XML 1.0
3957 // namespaces specification) we have to at least make sure, that
3958 // the application on top of Expat (that is likely splitting expanded
3959 // element names ("qualified names") of form
3960 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3961 // in its element handler code) cannot be confused by an attacker
3962 // putting additional namespace separator characters into namespace
3963 // declarations. That would be ambiguous and not to be expected.
3964 //
3965 // While the HTML API docs of function XML_ParserCreateNS have been
3966 // advising against use of a namespace separator character that can
3967 // appear in a URI for >20 years now, some widespread applications
3968 // are using URI characters (':' (colon) in particular) for a
3969 // namespace separator, in practice. To keep these applications
3970 // functional, we only reject namespaces URIs containing the
3971 // application-chosen namespace separator if the chosen separator
3972 // is a non-URI character with regard to RFC 3986.
3973 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3974 && ! is_rfc3986_uri_char(uri[len])) {
3975 return XML_ERROR_SYNTAX;
3976 }
3977 }
3978 isXML = isXML && len == xmlLen;
3979 isXMLNS = isXMLNS && len == xmlnsLen;
3980
3981 if (mustBeXML != isXML)
3982 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3983 : XML_ERROR_RESERVED_NAMESPACE_URI;
3984
3985 if (isXMLNS)
3986 return XML_ERROR_RESERVED_NAMESPACE_URI;
3987
3988 if (parser->m_namespaceSeparator)
3989 len++;
3990 if (parser->m_freeBindingList) {
3991 b = parser->m_freeBindingList;
3992 if (len > b->uriAlloc) {
3993 /* Detect and prevent integer overflow */
3994 if (len > INT_MAX - EXPAND_SPARE) {
3995 return XML_ERROR_NO_MEMORY;
3996 }
3997
3998 /* Detect and prevent integer overflow.
3999 * The preprocessor guard addresses the "always false" warning
4000 * from -Wtype-limits on platforms where
4001 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4002 #if UINT_MAX >= SIZE_MAX
4003 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4004 return XML_ERROR_NO_MEMORY;
4005 }
4006 #endif
4007
4008 XML_Char *temp = (XML_Char *)REALLOC(
4009 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4010 if (temp == NULL)
4011 return XML_ERROR_NO_MEMORY;
4012 b->uri = temp;
4013 b->uriAlloc = len + EXPAND_SPARE;
4014 }
4015 parser->m_freeBindingList = b->nextTagBinding;
4016 } else {
4017 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4018 if (! b)
4019 return XML_ERROR_NO_MEMORY;
4020
4021 /* Detect and prevent integer overflow */
4022 if (len > INT_MAX - EXPAND_SPARE) {
4023 return XML_ERROR_NO_MEMORY;
4024 }
4025 /* Detect and prevent integer overflow.
4026 * The preprocessor guard addresses the "always false" warning
4027 * from -Wtype-limits on platforms where
4028 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4029 #if UINT_MAX >= SIZE_MAX
4030 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4031 return XML_ERROR_NO_MEMORY;
4032 }
4033 #endif
4034
4035 b->uri
4036 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4037 if (! b->uri) {
4038 FREE(parser, b);
4039 return XML_ERROR_NO_MEMORY;
4040 }
4041 b->uriAlloc = len + EXPAND_SPARE;
4042 }
4043 b->uriLen = len;
4044 memcpy(b->uri, uri, len * sizeof(XML_Char));
4045 if (parser->m_namespaceSeparator)
4046 b->uri[len - 1] = parser->m_namespaceSeparator;
4047 b->prefix = prefix;
4048 b->attId = attId;
4049 b->prevPrefixBinding = prefix->binding;
4050 /* NULL binding when default namespace undeclared */
4051 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4052 prefix->binding = NULL;
4053 else
4054 prefix->binding = b;
4055 b->nextTagBinding = *bindingsPtr;
4056 *bindingsPtr = b;
4057 /* if attId == NULL then we are not starting a namespace scope */
4058 if (attId && parser->m_startNamespaceDeclHandler)
4059 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4060 prefix->binding ? uri : 0);
4061 return XML_ERROR_NONE;
4062 }
4063
4064 /* The idea here is to avoid using stack for each CDATA section when
4065 the whole file is parsed with one call.
4066 */
4067 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4068 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4069 const char **endPtr) {
4070 enum XML_Error result = doCdataSection(
4071 parser, parser->m_encoding, &start, end, endPtr,
4072 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4073 if (result != XML_ERROR_NONE)
4074 return result;
4075 if (start) {
4076 if (parser->m_parentParser) { /* we are parsing an external entity */
4077 parser->m_processor = externalEntityContentProcessor;
4078 return externalEntityContentProcessor(parser, start, end, endPtr);
4079 } else {
4080 parser->m_processor = contentProcessor;
4081 return contentProcessor(parser, start, end, endPtr);
4082 }
4083 }
4084 return result;
4085 }
4086
4087 /* startPtr gets set to non-null if the section is closed, and to null if
4088 the section is not yet closed.
4089 */
4090 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4091 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4092 const char *end, const char **nextPtr, XML_Bool haveMore,
4093 enum XML_Account account) {
4094 const char *s = *startPtr;
4095 const char **eventPP;
4096 const char **eventEndPP;
4097 if (enc == parser->m_encoding) {
4098 eventPP = &parser->m_eventPtr;
4099 *eventPP = s;
4100 eventEndPP = &parser->m_eventEndPtr;
4101 } else {
4102 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4103 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4104 }
4105 *eventPP = s;
4106 *startPtr = NULL;
4107
4108 for (;;) {
4109 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4110 int tok = XmlCdataSectionTok(enc, s, end, &next);
4111 #if XML_GE == 1
4112 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4113 accountingOnAbort(parser);
4114 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4115 }
4116 #else
4117 UNUSED_P(account);
4118 #endif
4119 *eventEndPP = next;
4120 switch (tok) {
4121 case XML_TOK_CDATA_SECT_CLOSE:
4122 if (parser->m_endCdataSectionHandler)
4123 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4124 /* BEGIN disabled code */
4125 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4126 else if ((0) && parser->m_characterDataHandler)
4127 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4128 0);
4129 /* END disabled code */
4130 else if (parser->m_defaultHandler)
4131 reportDefault(parser, enc, s, next);
4132 *startPtr = next;
4133 *nextPtr = next;
4134 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4135 return XML_ERROR_ABORTED;
4136 else
4137 return XML_ERROR_NONE;
4138 case XML_TOK_DATA_NEWLINE:
4139 if (parser->m_characterDataHandler) {
4140 XML_Char c = 0xA;
4141 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4142 } else if (parser->m_defaultHandler)
4143 reportDefault(parser, enc, s, next);
4144 break;
4145 case XML_TOK_DATA_CHARS: {
4146 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4147 if (charDataHandler) {
4148 if (MUST_CONVERT(enc, s)) {
4149 for (;;) {
4150 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4151 const enum XML_Convert_Result convert_res = XmlConvert(
4152 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4153 *eventEndPP = next;
4154 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4155 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4156 if ((convert_res == XML_CONVERT_COMPLETED)
4157 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4158 break;
4159 *eventPP = s;
4160 }
4161 } else
4162 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4163 (int)((const XML_Char *)next - (const XML_Char *)s));
4164 } else if (parser->m_defaultHandler)
4165 reportDefault(parser, enc, s, next);
4166 } break;
4167 case XML_TOK_INVALID:
4168 *eventPP = next;
4169 return XML_ERROR_INVALID_TOKEN;
4170 case XML_TOK_PARTIAL_CHAR:
4171 if (haveMore) {
4172 *nextPtr = s;
4173 return XML_ERROR_NONE;
4174 }
4175 return XML_ERROR_PARTIAL_CHAR;
4176 case XML_TOK_PARTIAL:
4177 case XML_TOK_NONE:
4178 if (haveMore) {
4179 *nextPtr = s;
4180 return XML_ERROR_NONE;
4181 }
4182 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4183 default:
4184 /* Every token returned by XmlCdataSectionTok() has its own
4185 * explicit case, so this default case will never be executed.
4186 * We retain it as a safety net and exclude it from the coverage
4187 * statistics.
4188 *
4189 * LCOV_EXCL_START
4190 */
4191 *eventPP = next;
4192 return XML_ERROR_UNEXPECTED_STATE;
4193 /* LCOV_EXCL_STOP */
4194 }
4195
4196 *eventPP = s = next;
4197 switch (parser->m_parsingStatus.parsing) {
4198 case XML_SUSPENDED:
4199 *nextPtr = next;
4200 return XML_ERROR_NONE;
4201 case XML_FINISHED:
4202 return XML_ERROR_ABORTED;
4203 default:;
4204 }
4205 }
4206 /* not reached */
4207 }
4208
4209 #ifdef XML_DTD
4210
4211 /* The idea here is to avoid using stack for each IGNORE section when
4212 the whole file is parsed with one call.
4213 */
4214 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4215 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4216 const char **endPtr) {
4217 enum XML_Error result
4218 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4219 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4220 if (result != XML_ERROR_NONE)
4221 return result;
4222 if (start) {
4223 parser->m_processor = prologProcessor;
4224 return prologProcessor(parser, start, end, endPtr);
4225 }
4226 return result;
4227 }
4228
4229 /* startPtr gets set to non-null is the section is closed, and to null
4230 if the section is not yet closed.
4231 */
4232 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4233 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4234 const char *end, const char **nextPtr, XML_Bool haveMore) {
4235 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4236 int tok;
4237 const char *s = *startPtr;
4238 const char **eventPP;
4239 const char **eventEndPP;
4240 if (enc == parser->m_encoding) {
4241 eventPP = &parser->m_eventPtr;
4242 *eventPP = s;
4243 eventEndPP = &parser->m_eventEndPtr;
4244 } else {
4245 /* It's not entirely clear, but it seems the following two lines
4246 * of code cannot be executed. The only occasions on which 'enc'
4247 * is not 'encoding' are when this function is called
4248 * from the internal entity processing, and IGNORE sections are an
4249 * error in internal entities.
4250 *
4251 * Since it really isn't clear that this is true, we keep the code
4252 * and just remove it from our coverage tests.
4253 *
4254 * LCOV_EXCL_START
4255 */
4256 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4257 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4258 /* LCOV_EXCL_STOP */
4259 }
4260 *eventPP = s;
4261 *startPtr = NULL;
4262 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4263 # if XML_GE == 1
4264 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4265 XML_ACCOUNT_DIRECT)) {
4266 accountingOnAbort(parser);
4267 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4268 }
4269 # endif
4270 *eventEndPP = next;
4271 switch (tok) {
4272 case XML_TOK_IGNORE_SECT:
4273 if (parser->m_defaultHandler)
4274 reportDefault(parser, enc, s, next);
4275 *startPtr = next;
4276 *nextPtr = next;
4277 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4278 return XML_ERROR_ABORTED;
4279 else
4280 return XML_ERROR_NONE;
4281 case XML_TOK_INVALID:
4282 *eventPP = next;
4283 return XML_ERROR_INVALID_TOKEN;
4284 case XML_TOK_PARTIAL_CHAR:
4285 if (haveMore) {
4286 *nextPtr = s;
4287 return XML_ERROR_NONE;
4288 }
4289 return XML_ERROR_PARTIAL_CHAR;
4290 case XML_TOK_PARTIAL:
4291 case XML_TOK_NONE:
4292 if (haveMore) {
4293 *nextPtr = s;
4294 return XML_ERROR_NONE;
4295 }
4296 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4297 default:
4298 /* All of the tokens that XmlIgnoreSectionTok() returns have
4299 * explicit cases to handle them, so this default case is never
4300 * executed. We keep it as a safety net anyway, and remove it
4301 * from our test coverage statistics.
4302 *
4303 * LCOV_EXCL_START
4304 */
4305 *eventPP = next;
4306 return XML_ERROR_UNEXPECTED_STATE;
4307 /* LCOV_EXCL_STOP */
4308 }
4309 /* not reached */
4310 }
4311
4312 #endif /* XML_DTD */
4313
4314 static enum XML_Error
initializeEncoding(XML_Parser parser)4315 initializeEncoding(XML_Parser parser) {
4316 const char *s;
4317 #ifdef XML_UNICODE
4318 char encodingBuf[128];
4319 /* See comments about `protocolEncodingName` in parserInit() */
4320 if (! parser->m_protocolEncodingName)
4321 s = NULL;
4322 else {
4323 int i;
4324 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4325 if (i == sizeof(encodingBuf) - 1
4326 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4327 encodingBuf[0] = '\0';
4328 break;
4329 }
4330 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4331 }
4332 encodingBuf[i] = '\0';
4333 s = encodingBuf;
4334 }
4335 #else
4336 s = parser->m_protocolEncodingName;
4337 #endif
4338 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4339 &parser->m_initEncoding, &parser->m_encoding, s))
4340 return XML_ERROR_NONE;
4341 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4342 }
4343
4344 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4345 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4346 const char *next) {
4347 const char *encodingName = NULL;
4348 const XML_Char *storedEncName = NULL;
4349 const ENCODING *newEncoding = NULL;
4350 const char *version = NULL;
4351 const char *versionend = NULL;
4352 const XML_Char *storedversion = NULL;
4353 int standalone = -1;
4354
4355 #if XML_GE == 1
4356 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4357 XML_ACCOUNT_DIRECT)) {
4358 accountingOnAbort(parser);
4359 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4360 }
4361 #endif
4362
4363 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4364 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4365 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4366 if (isGeneralTextEntity)
4367 return XML_ERROR_TEXT_DECL;
4368 else
4369 return XML_ERROR_XML_DECL;
4370 }
4371 if (! isGeneralTextEntity && standalone == 1) {
4372 parser->m_dtd->standalone = XML_TRUE;
4373 #ifdef XML_DTD
4374 if (parser->m_paramEntityParsing
4375 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4376 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4377 #endif /* XML_DTD */
4378 }
4379 if (parser->m_xmlDeclHandler) {
4380 if (encodingName != NULL) {
4381 storedEncName = poolStoreString(
4382 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4383 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4384 if (! storedEncName)
4385 return XML_ERROR_NO_MEMORY;
4386 poolFinish(&parser->m_temp2Pool);
4387 }
4388 if (version) {
4389 storedversion
4390 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4391 versionend - parser->m_encoding->minBytesPerChar);
4392 if (! storedversion)
4393 return XML_ERROR_NO_MEMORY;
4394 }
4395 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4396 standalone);
4397 } else if (parser->m_defaultHandler)
4398 reportDefault(parser, parser->m_encoding, s, next);
4399 if (parser->m_protocolEncodingName == NULL) {
4400 if (newEncoding) {
4401 /* Check that the specified encoding does not conflict with what
4402 * the parser has already deduced. Do we have the same number
4403 * of bytes in the smallest representation of a character? If
4404 * this is UTF-16, is it the same endianness?
4405 */
4406 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4407 || (newEncoding->minBytesPerChar == 2
4408 && newEncoding != parser->m_encoding)) {
4409 parser->m_eventPtr = encodingName;
4410 return XML_ERROR_INCORRECT_ENCODING;
4411 }
4412 parser->m_encoding = newEncoding;
4413 } else if (encodingName) {
4414 enum XML_Error result;
4415 if (! storedEncName) {
4416 storedEncName = poolStoreString(
4417 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4418 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4419 if (! storedEncName)
4420 return XML_ERROR_NO_MEMORY;
4421 }
4422 result = handleUnknownEncoding(parser, storedEncName);
4423 poolClear(&parser->m_temp2Pool);
4424 if (result == XML_ERROR_UNKNOWN_ENCODING)
4425 parser->m_eventPtr = encodingName;
4426 return result;
4427 }
4428 }
4429
4430 if (storedEncName || storedversion)
4431 poolClear(&parser->m_temp2Pool);
4432
4433 return XML_ERROR_NONE;
4434 }
4435
4436 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4437 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4438 if (parser->m_unknownEncodingHandler) {
4439 XML_Encoding info;
4440 int i;
4441 for (i = 0; i < 256; i++)
4442 info.map[i] = -1;
4443 info.convert = NULL;
4444 info.data = NULL;
4445 info.release = NULL;
4446 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4447 encodingName, &info)) {
4448 ENCODING *enc;
4449 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4450 if (! parser->m_unknownEncodingMem) {
4451 if (info.release)
4452 info.release(info.data);
4453 return XML_ERROR_NO_MEMORY;
4454 }
4455 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4456 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4457 if (enc) {
4458 parser->m_unknownEncodingData = info.data;
4459 parser->m_unknownEncodingRelease = info.release;
4460 parser->m_encoding = enc;
4461 return XML_ERROR_NONE;
4462 }
4463 }
4464 if (info.release != NULL)
4465 info.release(info.data);
4466 }
4467 return XML_ERROR_UNKNOWN_ENCODING;
4468 }
4469
4470 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4471 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4472 const char **nextPtr) {
4473 enum XML_Error result = initializeEncoding(parser);
4474 if (result != XML_ERROR_NONE)
4475 return result;
4476 parser->m_processor = prologProcessor;
4477 return prologProcessor(parser, s, end, nextPtr);
4478 }
4479
4480 #ifdef XML_DTD
4481
4482 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4483 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4484 const char **nextPtr) {
4485 enum XML_Error result = initializeEncoding(parser);
4486 if (result != XML_ERROR_NONE)
4487 return result;
4488
4489 /* we know now that XML_Parse(Buffer) has been called,
4490 so we consider the external parameter entity read */
4491 parser->m_dtd->paramEntityRead = XML_TRUE;
4492
4493 if (parser->m_prologState.inEntityValue) {
4494 parser->m_processor = entityValueInitProcessor;
4495 return entityValueInitProcessor(parser, s, end, nextPtr);
4496 } else {
4497 parser->m_processor = externalParEntProcessor;
4498 return externalParEntProcessor(parser, s, end, nextPtr);
4499 }
4500 }
4501
4502 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4503 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4504 const char **nextPtr) {
4505 int tok;
4506 const char *start = s;
4507 const char *next = start;
4508 parser->m_eventPtr = start;
4509
4510 for (;;) {
4511 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4512 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4513 - storeEntityValue
4514 - processXmlDecl
4515 */
4516 parser->m_eventEndPtr = next;
4517 if (tok <= 0) {
4518 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4519 *nextPtr = s;
4520 return XML_ERROR_NONE;
4521 }
4522 switch (tok) {
4523 case XML_TOK_INVALID:
4524 return XML_ERROR_INVALID_TOKEN;
4525 case XML_TOK_PARTIAL:
4526 return XML_ERROR_UNCLOSED_TOKEN;
4527 case XML_TOK_PARTIAL_CHAR:
4528 return XML_ERROR_PARTIAL_CHAR;
4529 case XML_TOK_NONE: /* start == end */
4530 default:
4531 break;
4532 }
4533 /* found end of entity value - can store it now */
4534 return storeEntityValue(parser, parser->m_encoding, s, end,
4535 XML_ACCOUNT_DIRECT);
4536 } else if (tok == XML_TOK_XML_DECL) {
4537 enum XML_Error result;
4538 result = processXmlDecl(parser, 0, start, next);
4539 if (result != XML_ERROR_NONE)
4540 return result;
4541 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4542 * that to happen, a parameter entity parsing handler must have attempted
4543 * to suspend the parser, which fails and raises an error. The parser can
4544 * be aborted, but can't be suspended.
4545 */
4546 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4547 return XML_ERROR_ABORTED;
4548 *nextPtr = next;
4549 /* stop scanning for text declaration - we found one */
4550 parser->m_processor = entityValueProcessor;
4551 return entityValueProcessor(parser, next, end, nextPtr);
4552 }
4553 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4554 must move s and nextPtr forward to consume the BOM.
4555
4556 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4557 would leave the BOM in the buffer and return. On the next call to this
4558 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4559 is not valid to have multiple BOMs.
4560 */
4561 else if (tok == XML_TOK_BOM) {
4562 # if XML_GE == 1
4563 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4564 XML_ACCOUNT_DIRECT)) {
4565 accountingOnAbort(parser);
4566 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4567 }
4568 # endif
4569
4570 *nextPtr = next;
4571 s = next;
4572 }
4573 /* If we get this token, we have the start of what might be a
4574 normal tag, but not a declaration (i.e. it doesn't begin with
4575 "<!"). In a DTD context, that isn't legal.
4576 */
4577 else if (tok == XML_TOK_INSTANCE_START) {
4578 *nextPtr = next;
4579 return XML_ERROR_SYNTAX;
4580 }
4581 start = next;
4582 parser->m_eventPtr = start;
4583 }
4584 }
4585
4586 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4587 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4588 const char **nextPtr) {
4589 const char *next = s;
4590 int tok;
4591
4592 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4593 if (tok <= 0) {
4594 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4595 *nextPtr = s;
4596 return XML_ERROR_NONE;
4597 }
4598 switch (tok) {
4599 case XML_TOK_INVALID:
4600 return XML_ERROR_INVALID_TOKEN;
4601 case XML_TOK_PARTIAL:
4602 return XML_ERROR_UNCLOSED_TOKEN;
4603 case XML_TOK_PARTIAL_CHAR:
4604 return XML_ERROR_PARTIAL_CHAR;
4605 case XML_TOK_NONE: /* start == end */
4606 default:
4607 break;
4608 }
4609 }
4610 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4611 However, when parsing an external subset, doProlog will not accept a BOM
4612 as valid, and report a syntax error, so we have to skip the BOM, and
4613 account for the BOM bytes.
4614 */
4615 else if (tok == XML_TOK_BOM) {
4616 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4617 XML_ACCOUNT_DIRECT)) {
4618 accountingOnAbort(parser);
4619 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4620 }
4621
4622 s = next;
4623 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4624 }
4625
4626 parser->m_processor = prologProcessor;
4627 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4628 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4629 XML_ACCOUNT_DIRECT);
4630 }
4631
4632 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4633 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4634 const char **nextPtr) {
4635 const char *start = s;
4636 const char *next = s;
4637 const ENCODING *enc = parser->m_encoding;
4638 int tok;
4639
4640 for (;;) {
4641 tok = XmlPrologTok(enc, start, end, &next);
4642 /* Note: These bytes are accounted later in:
4643 - storeEntityValue
4644 */
4645 if (tok <= 0) {
4646 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4647 *nextPtr = s;
4648 return XML_ERROR_NONE;
4649 }
4650 switch (tok) {
4651 case XML_TOK_INVALID:
4652 return XML_ERROR_INVALID_TOKEN;
4653 case XML_TOK_PARTIAL:
4654 return XML_ERROR_UNCLOSED_TOKEN;
4655 case XML_TOK_PARTIAL_CHAR:
4656 return XML_ERROR_PARTIAL_CHAR;
4657 case XML_TOK_NONE: /* start == end */
4658 default:
4659 break;
4660 }
4661 /* found end of entity value - can store it now */
4662 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4663 }
4664 start = next;
4665 }
4666 }
4667
4668 #endif /* XML_DTD */
4669
4670 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4671 prologProcessor(XML_Parser parser, const char *s, const char *end,
4672 const char **nextPtr) {
4673 const char *next = s;
4674 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4675 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4676 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4677 XML_ACCOUNT_DIRECT);
4678 }
4679
4680 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4681 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4682 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4683 XML_Bool allowClosingDoctype, enum XML_Account account) {
4684 #ifdef XML_DTD
4685 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4686 #endif /* XML_DTD */
4687 static const XML_Char atypeCDATA[]
4688 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4689 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4690 static const XML_Char atypeIDREF[]
4691 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4692 static const XML_Char atypeIDREFS[]
4693 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4694 static const XML_Char atypeENTITY[]
4695 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4696 static const XML_Char atypeENTITIES[]
4697 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4698 ASCII_I, ASCII_E, ASCII_S, '\0'};
4699 static const XML_Char atypeNMTOKEN[]
4700 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4701 static const XML_Char atypeNMTOKENS[]
4702 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4703 ASCII_E, ASCII_N, ASCII_S, '\0'};
4704 static const XML_Char notationPrefix[]
4705 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4706 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4707 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4708 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4709
4710 #ifndef XML_DTD
4711 UNUSED_P(account);
4712 #endif
4713
4714 /* save one level of indirection */
4715 DTD *const dtd = parser->m_dtd;
4716
4717 const char **eventPP;
4718 const char **eventEndPP;
4719 enum XML_Content_Quant quant;
4720
4721 if (enc == parser->m_encoding) {
4722 eventPP = &parser->m_eventPtr;
4723 eventEndPP = &parser->m_eventEndPtr;
4724 } else {
4725 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4726 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4727 }
4728
4729 for (;;) {
4730 int role;
4731 XML_Bool handleDefault = XML_TRUE;
4732 *eventPP = s;
4733 *eventEndPP = next;
4734 if (tok <= 0) {
4735 if (haveMore && tok != XML_TOK_INVALID) {
4736 *nextPtr = s;
4737 return XML_ERROR_NONE;
4738 }
4739 switch (tok) {
4740 case XML_TOK_INVALID:
4741 *eventPP = next;
4742 return XML_ERROR_INVALID_TOKEN;
4743 case XML_TOK_PARTIAL:
4744 return XML_ERROR_UNCLOSED_TOKEN;
4745 case XML_TOK_PARTIAL_CHAR:
4746 return XML_ERROR_PARTIAL_CHAR;
4747 case -XML_TOK_PROLOG_S:
4748 tok = -tok;
4749 break;
4750 case XML_TOK_NONE:
4751 #ifdef XML_DTD
4752 /* for internal PE NOT referenced between declarations */
4753 if (enc != parser->m_encoding
4754 && ! parser->m_openInternalEntities->betweenDecl) {
4755 *nextPtr = s;
4756 return XML_ERROR_NONE;
4757 }
4758 /* WFC: PE Between Declarations - must check that PE contains
4759 complete markup, not only for external PEs, but also for
4760 internal PEs if the reference occurs between declarations.
4761 */
4762 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4763 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4764 == XML_ROLE_ERROR)
4765 return XML_ERROR_INCOMPLETE_PE;
4766 *nextPtr = s;
4767 return XML_ERROR_NONE;
4768 }
4769 #endif /* XML_DTD */
4770 return XML_ERROR_NO_ELEMENTS;
4771 default:
4772 tok = -tok;
4773 next = end;
4774 break;
4775 }
4776 }
4777 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4778 #if XML_GE == 1
4779 switch (role) {
4780 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4781 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4782 # ifdef XML_DTD
4783 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4784 # endif
4785 break;
4786 default:
4787 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4788 accountingOnAbort(parser);
4789 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4790 }
4791 }
4792 #endif
4793 switch (role) {
4794 case XML_ROLE_XML_DECL: {
4795 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4796 if (result != XML_ERROR_NONE)
4797 return result;
4798 enc = parser->m_encoding;
4799 handleDefault = XML_FALSE;
4800 } break;
4801 case XML_ROLE_DOCTYPE_NAME:
4802 if (parser->m_startDoctypeDeclHandler) {
4803 parser->m_doctypeName
4804 = poolStoreString(&parser->m_tempPool, enc, s, next);
4805 if (! parser->m_doctypeName)
4806 return XML_ERROR_NO_MEMORY;
4807 poolFinish(&parser->m_tempPool);
4808 parser->m_doctypePubid = NULL;
4809 handleDefault = XML_FALSE;
4810 }
4811 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4812 break;
4813 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4814 if (parser->m_startDoctypeDeclHandler) {
4815 parser->m_startDoctypeDeclHandler(
4816 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4817 parser->m_doctypePubid, 1);
4818 parser->m_doctypeName = NULL;
4819 poolClear(&parser->m_tempPool);
4820 handleDefault = XML_FALSE;
4821 }
4822 break;
4823 #ifdef XML_DTD
4824 case XML_ROLE_TEXT_DECL: {
4825 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4826 if (result != XML_ERROR_NONE)
4827 return result;
4828 enc = parser->m_encoding;
4829 handleDefault = XML_FALSE;
4830 } break;
4831 #endif /* XML_DTD */
4832 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4833 #ifdef XML_DTD
4834 parser->m_useForeignDTD = XML_FALSE;
4835 parser->m_declEntity = (ENTITY *)lookup(
4836 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4837 if (! parser->m_declEntity)
4838 return XML_ERROR_NO_MEMORY;
4839 #endif /* XML_DTD */
4840 dtd->hasParamEntityRefs = XML_TRUE;
4841 if (parser->m_startDoctypeDeclHandler) {
4842 XML_Char *pubId;
4843 if (! XmlIsPublicId(enc, s, next, eventPP))
4844 return XML_ERROR_PUBLICID;
4845 pubId = poolStoreString(&parser->m_tempPool, enc,
4846 s + enc->minBytesPerChar,
4847 next - enc->minBytesPerChar);
4848 if (! pubId)
4849 return XML_ERROR_NO_MEMORY;
4850 normalizePublicId(pubId);
4851 poolFinish(&parser->m_tempPool);
4852 parser->m_doctypePubid = pubId;
4853 handleDefault = XML_FALSE;
4854 goto alreadyChecked;
4855 }
4856 /* fall through */
4857 case XML_ROLE_ENTITY_PUBLIC_ID:
4858 if (! XmlIsPublicId(enc, s, next, eventPP))
4859 return XML_ERROR_PUBLICID;
4860 alreadyChecked:
4861 if (dtd->keepProcessing && parser->m_declEntity) {
4862 XML_Char *tem
4863 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4864 next - enc->minBytesPerChar);
4865 if (! tem)
4866 return XML_ERROR_NO_MEMORY;
4867 normalizePublicId(tem);
4868 parser->m_declEntity->publicId = tem;
4869 poolFinish(&dtd->pool);
4870 /* Don't suppress the default handler if we fell through from
4871 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4872 */
4873 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4874 handleDefault = XML_FALSE;
4875 }
4876 break;
4877 case XML_ROLE_DOCTYPE_CLOSE:
4878 if (allowClosingDoctype != XML_TRUE) {
4879 /* Must not close doctype from within expanded parameter entities */
4880 return XML_ERROR_INVALID_TOKEN;
4881 }
4882
4883 if (parser->m_doctypeName) {
4884 parser->m_startDoctypeDeclHandler(
4885 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4886 parser->m_doctypePubid, 0);
4887 poolClear(&parser->m_tempPool);
4888 handleDefault = XML_FALSE;
4889 }
4890 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4891 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4892 was not set, indicating an external subset
4893 */
4894 #ifdef XML_DTD
4895 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4896 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4897 dtd->hasParamEntityRefs = XML_TRUE;
4898 if (parser->m_paramEntityParsing
4899 && parser->m_externalEntityRefHandler) {
4900 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4901 externalSubsetName, sizeof(ENTITY));
4902 if (! entity) {
4903 /* The external subset name "#" will have already been
4904 * inserted into the hash table at the start of the
4905 * external entity parsing, so no allocation will happen
4906 * and lookup() cannot fail.
4907 */
4908 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4909 }
4910 if (parser->m_useForeignDTD)
4911 entity->base = parser->m_curBase;
4912 dtd->paramEntityRead = XML_FALSE;
4913 if (! parser->m_externalEntityRefHandler(
4914 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4915 entity->systemId, entity->publicId))
4916 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4917 if (dtd->paramEntityRead) {
4918 if (! dtd->standalone && parser->m_notStandaloneHandler
4919 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4920 return XML_ERROR_NOT_STANDALONE;
4921 }
4922 /* if we didn't read the foreign DTD then this means that there
4923 is no external subset and we must reset dtd->hasParamEntityRefs
4924 */
4925 else if (! parser->m_doctypeSysid)
4926 dtd->hasParamEntityRefs = hadParamEntityRefs;
4927 /* end of DTD - no need to update dtd->keepProcessing */
4928 }
4929 parser->m_useForeignDTD = XML_FALSE;
4930 }
4931 #endif /* XML_DTD */
4932 if (parser->m_endDoctypeDeclHandler) {
4933 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4934 handleDefault = XML_FALSE;
4935 }
4936 break;
4937 case XML_ROLE_INSTANCE_START:
4938 #ifdef XML_DTD
4939 /* if there is no DOCTYPE declaration then now is the
4940 last chance to read the foreign DTD
4941 */
4942 if (parser->m_useForeignDTD) {
4943 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4944 dtd->hasParamEntityRefs = XML_TRUE;
4945 if (parser->m_paramEntityParsing
4946 && parser->m_externalEntityRefHandler) {
4947 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4948 externalSubsetName, sizeof(ENTITY));
4949 if (! entity)
4950 return XML_ERROR_NO_MEMORY;
4951 entity->base = parser->m_curBase;
4952 dtd->paramEntityRead = XML_FALSE;
4953 if (! parser->m_externalEntityRefHandler(
4954 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4955 entity->systemId, entity->publicId))
4956 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4957 if (dtd->paramEntityRead) {
4958 if (! dtd->standalone && parser->m_notStandaloneHandler
4959 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4960 return XML_ERROR_NOT_STANDALONE;
4961 }
4962 /* if we didn't read the foreign DTD then this means that there
4963 is no external subset and we must reset dtd->hasParamEntityRefs
4964 */
4965 else
4966 dtd->hasParamEntityRefs = hadParamEntityRefs;
4967 /* end of DTD - no need to update dtd->keepProcessing */
4968 }
4969 }
4970 #endif /* XML_DTD */
4971 parser->m_processor = contentProcessor;
4972 return contentProcessor(parser, s, end, nextPtr);
4973 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4974 parser->m_declElementType = getElementType(parser, enc, s, next);
4975 if (! parser->m_declElementType)
4976 return XML_ERROR_NO_MEMORY;
4977 goto checkAttListDeclHandler;
4978 case XML_ROLE_ATTRIBUTE_NAME:
4979 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4980 if (! parser->m_declAttributeId)
4981 return XML_ERROR_NO_MEMORY;
4982 parser->m_declAttributeIsCdata = XML_FALSE;
4983 parser->m_declAttributeType = NULL;
4984 parser->m_declAttributeIsId = XML_FALSE;
4985 goto checkAttListDeclHandler;
4986 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4987 parser->m_declAttributeIsCdata = XML_TRUE;
4988 parser->m_declAttributeType = atypeCDATA;
4989 goto checkAttListDeclHandler;
4990 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4991 parser->m_declAttributeIsId = XML_TRUE;
4992 parser->m_declAttributeType = atypeID;
4993 goto checkAttListDeclHandler;
4994 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4995 parser->m_declAttributeType = atypeIDREF;
4996 goto checkAttListDeclHandler;
4997 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4998 parser->m_declAttributeType = atypeIDREFS;
4999 goto checkAttListDeclHandler;
5000 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5001 parser->m_declAttributeType = atypeENTITY;
5002 goto checkAttListDeclHandler;
5003 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5004 parser->m_declAttributeType = atypeENTITIES;
5005 goto checkAttListDeclHandler;
5006 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5007 parser->m_declAttributeType = atypeNMTOKEN;
5008 goto checkAttListDeclHandler;
5009 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5010 parser->m_declAttributeType = atypeNMTOKENS;
5011 checkAttListDeclHandler:
5012 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5013 handleDefault = XML_FALSE;
5014 break;
5015 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5016 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5017 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5018 const XML_Char *prefix;
5019 if (parser->m_declAttributeType) {
5020 prefix = enumValueSep;
5021 } else {
5022 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5023 : enumValueStart);
5024 }
5025 if (! poolAppendString(&parser->m_tempPool, prefix))
5026 return XML_ERROR_NO_MEMORY;
5027 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5028 return XML_ERROR_NO_MEMORY;
5029 parser->m_declAttributeType = parser->m_tempPool.start;
5030 handleDefault = XML_FALSE;
5031 }
5032 break;
5033 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5034 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5035 if (dtd->keepProcessing) {
5036 if (! defineAttribute(parser->m_declElementType,
5037 parser->m_declAttributeId,
5038 parser->m_declAttributeIsCdata,
5039 parser->m_declAttributeIsId, 0, parser))
5040 return XML_ERROR_NO_MEMORY;
5041 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5042 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5043 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5044 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5045 /* Enumerated or Notation type */
5046 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5047 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5048 return XML_ERROR_NO_MEMORY;
5049 parser->m_declAttributeType = parser->m_tempPool.start;
5050 poolFinish(&parser->m_tempPool);
5051 }
5052 *eventEndPP = s;
5053 parser->m_attlistDeclHandler(
5054 parser->m_handlerArg, parser->m_declElementType->name,
5055 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5056 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5057 handleDefault = XML_FALSE;
5058 }
5059 }
5060 poolClear(&parser->m_tempPool);
5061 break;
5062 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5063 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5064 if (dtd->keepProcessing) {
5065 const XML_Char *attVal;
5066 enum XML_Error result = storeAttributeValue(
5067 parser, enc, parser->m_declAttributeIsCdata,
5068 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5069 XML_ACCOUNT_NONE);
5070 if (result)
5071 return result;
5072 attVal = poolStart(&dtd->pool);
5073 poolFinish(&dtd->pool);
5074 /* ID attributes aren't allowed to have a default */
5075 if (! defineAttribute(
5076 parser->m_declElementType, parser->m_declAttributeId,
5077 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5078 return XML_ERROR_NO_MEMORY;
5079 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5080 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5081 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5082 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5083 /* Enumerated or Notation type */
5084 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5085 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5086 return XML_ERROR_NO_MEMORY;
5087 parser->m_declAttributeType = parser->m_tempPool.start;
5088 poolFinish(&parser->m_tempPool);
5089 }
5090 *eventEndPP = s;
5091 parser->m_attlistDeclHandler(
5092 parser->m_handlerArg, parser->m_declElementType->name,
5093 parser->m_declAttributeId->name, parser->m_declAttributeType,
5094 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5095 poolClear(&parser->m_tempPool);
5096 handleDefault = XML_FALSE;
5097 }
5098 }
5099 break;
5100 case XML_ROLE_ENTITY_VALUE:
5101 if (dtd->keepProcessing) {
5102 #if XML_GE == 1
5103 // This will store the given replacement text in
5104 // parser->m_declEntity->textPtr.
5105 enum XML_Error result
5106 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5107 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5108 if (parser->m_declEntity) {
5109 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5110 parser->m_declEntity->textLen
5111 = (int)(poolLength(&dtd->entityValuePool));
5112 poolFinish(&dtd->entityValuePool);
5113 if (parser->m_entityDeclHandler) {
5114 *eventEndPP = s;
5115 parser->m_entityDeclHandler(
5116 parser->m_handlerArg, parser->m_declEntity->name,
5117 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5118 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5119 handleDefault = XML_FALSE;
5120 }
5121 } else
5122 poolDiscard(&dtd->entityValuePool);
5123 if (result != XML_ERROR_NONE)
5124 return result;
5125 #else
5126 // This will store "&entity123;" in parser->m_declEntity->textPtr
5127 // to end up as "&entity123;" in the handler.
5128 if (parser->m_declEntity != NULL) {
5129 const enum XML_Error result
5130 = storeSelfEntityValue(parser, parser->m_declEntity);
5131 if (result != XML_ERROR_NONE)
5132 return result;
5133
5134 if (parser->m_entityDeclHandler) {
5135 *eventEndPP = s;
5136 parser->m_entityDeclHandler(
5137 parser->m_handlerArg, parser->m_declEntity->name,
5138 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5139 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5140 handleDefault = XML_FALSE;
5141 }
5142 }
5143 #endif
5144 }
5145 break;
5146 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5147 #ifdef XML_DTD
5148 parser->m_useForeignDTD = XML_FALSE;
5149 #endif /* XML_DTD */
5150 dtd->hasParamEntityRefs = XML_TRUE;
5151 if (parser->m_startDoctypeDeclHandler) {
5152 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5153 s + enc->minBytesPerChar,
5154 next - enc->minBytesPerChar);
5155 if (parser->m_doctypeSysid == NULL)
5156 return XML_ERROR_NO_MEMORY;
5157 poolFinish(&parser->m_tempPool);
5158 handleDefault = XML_FALSE;
5159 }
5160 #ifdef XML_DTD
5161 else
5162 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5163 for the case where no parser->m_startDoctypeDeclHandler is set */
5164 parser->m_doctypeSysid = externalSubsetName;
5165 #endif /* XML_DTD */
5166 if (! dtd->standalone
5167 #ifdef XML_DTD
5168 && ! parser->m_paramEntityParsing
5169 #endif /* XML_DTD */
5170 && parser->m_notStandaloneHandler
5171 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5172 return XML_ERROR_NOT_STANDALONE;
5173 #ifndef XML_DTD
5174 break;
5175 #else /* XML_DTD */
5176 if (! parser->m_declEntity) {
5177 parser->m_declEntity = (ENTITY *)lookup(
5178 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5179 if (! parser->m_declEntity)
5180 return XML_ERROR_NO_MEMORY;
5181 parser->m_declEntity->publicId = NULL;
5182 }
5183 #endif /* XML_DTD */
5184 /* fall through */
5185 case XML_ROLE_ENTITY_SYSTEM_ID:
5186 if (dtd->keepProcessing && parser->m_declEntity) {
5187 parser->m_declEntity->systemId
5188 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5189 next - enc->minBytesPerChar);
5190 if (! parser->m_declEntity->systemId)
5191 return XML_ERROR_NO_MEMORY;
5192 parser->m_declEntity->base = parser->m_curBase;
5193 poolFinish(&dtd->pool);
5194 /* Don't suppress the default handler if we fell through from
5195 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5196 */
5197 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5198 handleDefault = XML_FALSE;
5199 }
5200 break;
5201 case XML_ROLE_ENTITY_COMPLETE:
5202 #if XML_GE == 0
5203 // This will store "&entity123;" in entity->textPtr
5204 // to end up as "&entity123;" in the handler.
5205 if (parser->m_declEntity != NULL) {
5206 const enum XML_Error result
5207 = storeSelfEntityValue(parser, parser->m_declEntity);
5208 if (result != XML_ERROR_NONE)
5209 return result;
5210 }
5211 #endif
5212 if (dtd->keepProcessing && parser->m_declEntity
5213 && parser->m_entityDeclHandler) {
5214 *eventEndPP = s;
5215 parser->m_entityDeclHandler(
5216 parser->m_handlerArg, parser->m_declEntity->name,
5217 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5218 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5219 handleDefault = XML_FALSE;
5220 }
5221 break;
5222 case XML_ROLE_ENTITY_NOTATION_NAME:
5223 if (dtd->keepProcessing && parser->m_declEntity) {
5224 parser->m_declEntity->notation
5225 = poolStoreString(&dtd->pool, enc, s, next);
5226 if (! parser->m_declEntity->notation)
5227 return XML_ERROR_NO_MEMORY;
5228 poolFinish(&dtd->pool);
5229 if (parser->m_unparsedEntityDeclHandler) {
5230 *eventEndPP = s;
5231 parser->m_unparsedEntityDeclHandler(
5232 parser->m_handlerArg, parser->m_declEntity->name,
5233 parser->m_declEntity->base, parser->m_declEntity->systemId,
5234 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5235 handleDefault = XML_FALSE;
5236 } else if (parser->m_entityDeclHandler) {
5237 *eventEndPP = s;
5238 parser->m_entityDeclHandler(
5239 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5240 parser->m_declEntity->base, parser->m_declEntity->systemId,
5241 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5242 handleDefault = XML_FALSE;
5243 }
5244 }
5245 break;
5246 case XML_ROLE_GENERAL_ENTITY_NAME: {
5247 if (XmlPredefinedEntityName(enc, s, next)) {
5248 parser->m_declEntity = NULL;
5249 break;
5250 }
5251 if (dtd->keepProcessing) {
5252 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5253 if (! name)
5254 return XML_ERROR_NO_MEMORY;
5255 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5256 name, sizeof(ENTITY));
5257 if (! parser->m_declEntity)
5258 return XML_ERROR_NO_MEMORY;
5259 if (parser->m_declEntity->name != name) {
5260 poolDiscard(&dtd->pool);
5261 parser->m_declEntity = NULL;
5262 } else {
5263 poolFinish(&dtd->pool);
5264 parser->m_declEntity->publicId = NULL;
5265 parser->m_declEntity->is_param = XML_FALSE;
5266 /* if we have a parent parser or are reading an internal parameter
5267 entity, then the entity declaration is not considered "internal"
5268 */
5269 parser->m_declEntity->is_internal
5270 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5271 if (parser->m_entityDeclHandler)
5272 handleDefault = XML_FALSE;
5273 }
5274 } else {
5275 poolDiscard(&dtd->pool);
5276 parser->m_declEntity = NULL;
5277 }
5278 } break;
5279 case XML_ROLE_PARAM_ENTITY_NAME:
5280 #ifdef XML_DTD
5281 if (dtd->keepProcessing) {
5282 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5283 if (! name)
5284 return XML_ERROR_NO_MEMORY;
5285 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5286 name, sizeof(ENTITY));
5287 if (! parser->m_declEntity)
5288 return XML_ERROR_NO_MEMORY;
5289 if (parser->m_declEntity->name != name) {
5290 poolDiscard(&dtd->pool);
5291 parser->m_declEntity = NULL;
5292 } else {
5293 poolFinish(&dtd->pool);
5294 parser->m_declEntity->publicId = NULL;
5295 parser->m_declEntity->is_param = XML_TRUE;
5296 /* if we have a parent parser or are reading an internal parameter
5297 entity, then the entity declaration is not considered "internal"
5298 */
5299 parser->m_declEntity->is_internal
5300 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5301 if (parser->m_entityDeclHandler)
5302 handleDefault = XML_FALSE;
5303 }
5304 } else {
5305 poolDiscard(&dtd->pool);
5306 parser->m_declEntity = NULL;
5307 }
5308 #else /* not XML_DTD */
5309 parser->m_declEntity = NULL;
5310 #endif /* XML_DTD */
5311 break;
5312 case XML_ROLE_NOTATION_NAME:
5313 parser->m_declNotationPublicId = NULL;
5314 parser->m_declNotationName = NULL;
5315 if (parser->m_notationDeclHandler) {
5316 parser->m_declNotationName
5317 = poolStoreString(&parser->m_tempPool, enc, s, next);
5318 if (! parser->m_declNotationName)
5319 return XML_ERROR_NO_MEMORY;
5320 poolFinish(&parser->m_tempPool);
5321 handleDefault = XML_FALSE;
5322 }
5323 break;
5324 case XML_ROLE_NOTATION_PUBLIC_ID:
5325 if (! XmlIsPublicId(enc, s, next, eventPP))
5326 return XML_ERROR_PUBLICID;
5327 if (parser
5328 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5329 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5330 s + enc->minBytesPerChar,
5331 next - enc->minBytesPerChar);
5332 if (! tem)
5333 return XML_ERROR_NO_MEMORY;
5334 normalizePublicId(tem);
5335 parser->m_declNotationPublicId = tem;
5336 poolFinish(&parser->m_tempPool);
5337 handleDefault = XML_FALSE;
5338 }
5339 break;
5340 case XML_ROLE_NOTATION_SYSTEM_ID:
5341 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5342 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5343 s + enc->minBytesPerChar,
5344 next - enc->minBytesPerChar);
5345 if (! systemId)
5346 return XML_ERROR_NO_MEMORY;
5347 *eventEndPP = s;
5348 parser->m_notationDeclHandler(
5349 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5350 systemId, parser->m_declNotationPublicId);
5351 handleDefault = XML_FALSE;
5352 }
5353 poolClear(&parser->m_tempPool);
5354 break;
5355 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5356 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5357 *eventEndPP = s;
5358 parser->m_notationDeclHandler(
5359 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5360 0, parser->m_declNotationPublicId);
5361 handleDefault = XML_FALSE;
5362 }
5363 poolClear(&parser->m_tempPool);
5364 break;
5365 case XML_ROLE_ERROR:
5366 switch (tok) {
5367 case XML_TOK_PARAM_ENTITY_REF:
5368 /* PE references in internal subset are
5369 not allowed within declarations. */
5370 return XML_ERROR_PARAM_ENTITY_REF;
5371 case XML_TOK_XML_DECL:
5372 return XML_ERROR_MISPLACED_XML_PI;
5373 default:
5374 return XML_ERROR_SYNTAX;
5375 }
5376 #ifdef XML_DTD
5377 case XML_ROLE_IGNORE_SECT: {
5378 enum XML_Error result;
5379 if (parser->m_defaultHandler)
5380 reportDefault(parser, enc, s, next);
5381 handleDefault = XML_FALSE;
5382 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5383 if (result != XML_ERROR_NONE)
5384 return result;
5385 else if (! next) {
5386 parser->m_processor = ignoreSectionProcessor;
5387 return result;
5388 }
5389 } break;
5390 #endif /* XML_DTD */
5391 case XML_ROLE_GROUP_OPEN:
5392 if (parser->m_prologState.level >= parser->m_groupSize) {
5393 if (parser->m_groupSize) {
5394 {
5395 /* Detect and prevent integer overflow */
5396 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5397 return XML_ERROR_NO_MEMORY;
5398 }
5399
5400 char *const new_connector = (char *)REALLOC(
5401 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5402 if (new_connector == NULL) {
5403 parser->m_groupSize /= 2;
5404 return XML_ERROR_NO_MEMORY;
5405 }
5406 parser->m_groupConnector = new_connector;
5407 }
5408
5409 if (dtd->scaffIndex) {
5410 /* Detect and prevent integer overflow.
5411 * The preprocessor guard addresses the "always false" warning
5412 * from -Wtype-limits on platforms where
5413 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5414 #if UINT_MAX >= SIZE_MAX
5415 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5416 return XML_ERROR_NO_MEMORY;
5417 }
5418 #endif
5419
5420 int *const new_scaff_index = (int *)REALLOC(
5421 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5422 if (new_scaff_index == NULL)
5423 return XML_ERROR_NO_MEMORY;
5424 dtd->scaffIndex = new_scaff_index;
5425 }
5426 } else {
5427 parser->m_groupConnector
5428 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5429 if (! parser->m_groupConnector) {
5430 parser->m_groupSize = 0;
5431 return XML_ERROR_NO_MEMORY;
5432 }
5433 }
5434 }
5435 parser->m_groupConnector[parser->m_prologState.level] = 0;
5436 if (dtd->in_eldecl) {
5437 int myindex = nextScaffoldPart(parser);
5438 if (myindex < 0)
5439 return XML_ERROR_NO_MEMORY;
5440 assert(dtd->scaffIndex != NULL);
5441 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5442 dtd->scaffLevel++;
5443 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5444 if (parser->m_elementDeclHandler)
5445 handleDefault = XML_FALSE;
5446 }
5447 break;
5448 case XML_ROLE_GROUP_SEQUENCE:
5449 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5450 return XML_ERROR_SYNTAX;
5451 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5452 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5453 handleDefault = XML_FALSE;
5454 break;
5455 case XML_ROLE_GROUP_CHOICE:
5456 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5457 return XML_ERROR_SYNTAX;
5458 if (dtd->in_eldecl
5459 && ! parser->m_groupConnector[parser->m_prologState.level]
5460 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5461 != XML_CTYPE_MIXED)) {
5462 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5463 = XML_CTYPE_CHOICE;
5464 if (parser->m_elementDeclHandler)
5465 handleDefault = XML_FALSE;
5466 }
5467 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5468 break;
5469 case XML_ROLE_PARAM_ENTITY_REF:
5470 #ifdef XML_DTD
5471 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5472 dtd->hasParamEntityRefs = XML_TRUE;
5473 if (! parser->m_paramEntityParsing)
5474 dtd->keepProcessing = dtd->standalone;
5475 else {
5476 const XML_Char *name;
5477 ENTITY *entity;
5478 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5479 next - enc->minBytesPerChar);
5480 if (! name)
5481 return XML_ERROR_NO_MEMORY;
5482 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5483 poolDiscard(&dtd->pool);
5484 /* first, determine if a check for an existing declaration is needed;
5485 if yes, check that the entity exists, and that it is internal,
5486 otherwise call the skipped entity handler
5487 */
5488 if (parser->m_prologState.documentEntity
5489 && (dtd->standalone ? ! parser->m_openInternalEntities
5490 : ! dtd->hasParamEntityRefs)) {
5491 if (! entity)
5492 return XML_ERROR_UNDEFINED_ENTITY;
5493 else if (! entity->is_internal) {
5494 /* It's hard to exhaustively search the code to be sure,
5495 * but there doesn't seem to be a way of executing the
5496 * following line. There are two cases:
5497 *
5498 * If 'standalone' is false, the DTD must have no
5499 * parameter entities or we wouldn't have passed the outer
5500 * 'if' statement. That means the only entity in the hash
5501 * table is the external subset name "#" which cannot be
5502 * given as a parameter entity name in XML syntax, so the
5503 * lookup must have returned NULL and we don't even reach
5504 * the test for an internal entity.
5505 *
5506 * If 'standalone' is true, it does not seem to be
5507 * possible to create entities taking this code path that
5508 * are not internal entities, so fail the test above.
5509 *
5510 * Because this analysis is very uncertain, the code is
5511 * being left in place and merely removed from the
5512 * coverage test statistics.
5513 */
5514 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5515 }
5516 } else if (! entity) {
5517 dtd->keepProcessing = dtd->standalone;
5518 /* cannot report skipped entities in declarations */
5519 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5520 && parser->m_skippedEntityHandler) {
5521 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5522 handleDefault = XML_FALSE;
5523 }
5524 break;
5525 }
5526 if (entity->open)
5527 return XML_ERROR_RECURSIVE_ENTITY_REF;
5528 if (entity->textPtr) {
5529 enum XML_Error result;
5530 XML_Bool betweenDecl
5531 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5532 result = processInternalEntity(parser, entity, betweenDecl);
5533 if (result != XML_ERROR_NONE)
5534 return result;
5535 handleDefault = XML_FALSE;
5536 break;
5537 }
5538 if (parser->m_externalEntityRefHandler) {
5539 dtd->paramEntityRead = XML_FALSE;
5540 entity->open = XML_TRUE;
5541 entityTrackingOnOpen(parser, entity, __LINE__);
5542 if (! parser->m_externalEntityRefHandler(
5543 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5544 entity->systemId, entity->publicId)) {
5545 entityTrackingOnClose(parser, entity, __LINE__);
5546 entity->open = XML_FALSE;
5547 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5548 }
5549 entityTrackingOnClose(parser, entity, __LINE__);
5550 entity->open = XML_FALSE;
5551 handleDefault = XML_FALSE;
5552 if (! dtd->paramEntityRead) {
5553 dtd->keepProcessing = dtd->standalone;
5554 break;
5555 }
5556 } else {
5557 dtd->keepProcessing = dtd->standalone;
5558 break;
5559 }
5560 }
5561 #endif /* XML_DTD */
5562 if (! dtd->standalone && parser->m_notStandaloneHandler
5563 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5564 return XML_ERROR_NOT_STANDALONE;
5565 break;
5566
5567 /* Element declaration stuff */
5568
5569 case XML_ROLE_ELEMENT_NAME:
5570 if (parser->m_elementDeclHandler) {
5571 parser->m_declElementType = getElementType(parser, enc, s, next);
5572 if (! parser->m_declElementType)
5573 return XML_ERROR_NO_MEMORY;
5574 dtd->scaffLevel = 0;
5575 dtd->scaffCount = 0;
5576 dtd->in_eldecl = XML_TRUE;
5577 handleDefault = XML_FALSE;
5578 }
5579 break;
5580
5581 case XML_ROLE_CONTENT_ANY:
5582 case XML_ROLE_CONTENT_EMPTY:
5583 if (dtd->in_eldecl) {
5584 if (parser->m_elementDeclHandler) {
5585 XML_Content *content
5586 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5587 if (! content)
5588 return XML_ERROR_NO_MEMORY;
5589 content->quant = XML_CQUANT_NONE;
5590 content->name = NULL;
5591 content->numchildren = 0;
5592 content->children = NULL;
5593 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5594 : XML_CTYPE_EMPTY);
5595 *eventEndPP = s;
5596 parser->m_elementDeclHandler(
5597 parser->m_handlerArg, parser->m_declElementType->name, content);
5598 handleDefault = XML_FALSE;
5599 }
5600 dtd->in_eldecl = XML_FALSE;
5601 }
5602 break;
5603
5604 case XML_ROLE_CONTENT_PCDATA:
5605 if (dtd->in_eldecl) {
5606 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5607 = XML_CTYPE_MIXED;
5608 if (parser->m_elementDeclHandler)
5609 handleDefault = XML_FALSE;
5610 }
5611 break;
5612
5613 case XML_ROLE_CONTENT_ELEMENT:
5614 quant = XML_CQUANT_NONE;
5615 goto elementContent;
5616 case XML_ROLE_CONTENT_ELEMENT_OPT:
5617 quant = XML_CQUANT_OPT;
5618 goto elementContent;
5619 case XML_ROLE_CONTENT_ELEMENT_REP:
5620 quant = XML_CQUANT_REP;
5621 goto elementContent;
5622 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5623 quant = XML_CQUANT_PLUS;
5624 elementContent:
5625 if (dtd->in_eldecl) {
5626 ELEMENT_TYPE *el;
5627 const XML_Char *name;
5628 size_t nameLen;
5629 const char *nxt
5630 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5631 int myindex = nextScaffoldPart(parser);
5632 if (myindex < 0)
5633 return XML_ERROR_NO_MEMORY;
5634 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5635 dtd->scaffold[myindex].quant = quant;
5636 el = getElementType(parser, enc, s, nxt);
5637 if (! el)
5638 return XML_ERROR_NO_MEMORY;
5639 name = el->name;
5640 dtd->scaffold[myindex].name = name;
5641 nameLen = 0;
5642 for (; name[nameLen++];)
5643 ;
5644
5645 /* Detect and prevent integer overflow */
5646 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5647 return XML_ERROR_NO_MEMORY;
5648 }
5649
5650 dtd->contentStringLen += (unsigned)nameLen;
5651 if (parser->m_elementDeclHandler)
5652 handleDefault = XML_FALSE;
5653 }
5654 break;
5655
5656 case XML_ROLE_GROUP_CLOSE:
5657 quant = XML_CQUANT_NONE;
5658 goto closeGroup;
5659 case XML_ROLE_GROUP_CLOSE_OPT:
5660 quant = XML_CQUANT_OPT;
5661 goto closeGroup;
5662 case XML_ROLE_GROUP_CLOSE_REP:
5663 quant = XML_CQUANT_REP;
5664 goto closeGroup;
5665 case XML_ROLE_GROUP_CLOSE_PLUS:
5666 quant = XML_CQUANT_PLUS;
5667 closeGroup:
5668 if (dtd->in_eldecl) {
5669 if (parser->m_elementDeclHandler)
5670 handleDefault = XML_FALSE;
5671 dtd->scaffLevel--;
5672 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5673 if (dtd->scaffLevel == 0) {
5674 if (! handleDefault) {
5675 XML_Content *model = build_model(parser);
5676 if (! model)
5677 return XML_ERROR_NO_MEMORY;
5678 *eventEndPP = s;
5679 parser->m_elementDeclHandler(
5680 parser->m_handlerArg, parser->m_declElementType->name, model);
5681 }
5682 dtd->in_eldecl = XML_FALSE;
5683 dtd->contentStringLen = 0;
5684 }
5685 }
5686 break;
5687 /* End element declaration stuff */
5688
5689 case XML_ROLE_PI:
5690 if (! reportProcessingInstruction(parser, enc, s, next))
5691 return XML_ERROR_NO_MEMORY;
5692 handleDefault = XML_FALSE;
5693 break;
5694 case XML_ROLE_COMMENT:
5695 if (! reportComment(parser, enc, s, next))
5696 return XML_ERROR_NO_MEMORY;
5697 handleDefault = XML_FALSE;
5698 break;
5699 case XML_ROLE_NONE:
5700 switch (tok) {
5701 case XML_TOK_BOM:
5702 handleDefault = XML_FALSE;
5703 break;
5704 }
5705 break;
5706 case XML_ROLE_DOCTYPE_NONE:
5707 if (parser->m_startDoctypeDeclHandler)
5708 handleDefault = XML_FALSE;
5709 break;
5710 case XML_ROLE_ENTITY_NONE:
5711 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5712 handleDefault = XML_FALSE;
5713 break;
5714 case XML_ROLE_NOTATION_NONE:
5715 if (parser->m_notationDeclHandler)
5716 handleDefault = XML_FALSE;
5717 break;
5718 case XML_ROLE_ATTLIST_NONE:
5719 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5720 handleDefault = XML_FALSE;
5721 break;
5722 case XML_ROLE_ELEMENT_NONE:
5723 if (parser->m_elementDeclHandler)
5724 handleDefault = XML_FALSE;
5725 break;
5726 } /* end of big switch */
5727
5728 if (handleDefault && parser->m_defaultHandler)
5729 reportDefault(parser, enc, s, next);
5730
5731 switch (parser->m_parsingStatus.parsing) {
5732 case XML_SUSPENDED:
5733 *nextPtr = next;
5734 return XML_ERROR_NONE;
5735 case XML_FINISHED:
5736 return XML_ERROR_ABORTED;
5737 default:
5738 s = next;
5739 tok = XmlPrologTok(enc, s, end, &next);
5740 }
5741 }
5742 /* not reached */
5743 }
5744
5745 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5746 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5747 const char **nextPtr) {
5748 parser->m_processor = epilogProcessor;
5749 parser->m_eventPtr = s;
5750 for (;;) {
5751 const char *next = NULL;
5752 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5753 #if XML_GE == 1
5754 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5755 XML_ACCOUNT_DIRECT)) {
5756 accountingOnAbort(parser);
5757 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5758 }
5759 #endif
5760 parser->m_eventEndPtr = next;
5761 switch (tok) {
5762 /* report partial linebreak - it might be the last token */
5763 case -XML_TOK_PROLOG_S:
5764 if (parser->m_defaultHandler) {
5765 reportDefault(parser, parser->m_encoding, s, next);
5766 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5767 return XML_ERROR_ABORTED;
5768 }
5769 *nextPtr = next;
5770 return XML_ERROR_NONE;
5771 case XML_TOK_NONE:
5772 *nextPtr = s;
5773 return XML_ERROR_NONE;
5774 case XML_TOK_PROLOG_S:
5775 if (parser->m_defaultHandler)
5776 reportDefault(parser, parser->m_encoding, s, next);
5777 break;
5778 case XML_TOK_PI:
5779 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5780 return XML_ERROR_NO_MEMORY;
5781 break;
5782 case XML_TOK_COMMENT:
5783 if (! reportComment(parser, parser->m_encoding, s, next))
5784 return XML_ERROR_NO_MEMORY;
5785 break;
5786 case XML_TOK_INVALID:
5787 parser->m_eventPtr = next;
5788 return XML_ERROR_INVALID_TOKEN;
5789 case XML_TOK_PARTIAL:
5790 if (! parser->m_parsingStatus.finalBuffer) {
5791 *nextPtr = s;
5792 return XML_ERROR_NONE;
5793 }
5794 return XML_ERROR_UNCLOSED_TOKEN;
5795 case XML_TOK_PARTIAL_CHAR:
5796 if (! parser->m_parsingStatus.finalBuffer) {
5797 *nextPtr = s;
5798 return XML_ERROR_NONE;
5799 }
5800 return XML_ERROR_PARTIAL_CHAR;
5801 default:
5802 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5803 }
5804 parser->m_eventPtr = s = next;
5805 switch (parser->m_parsingStatus.parsing) {
5806 case XML_SUSPENDED:
5807 *nextPtr = next;
5808 return XML_ERROR_NONE;
5809 case XML_FINISHED:
5810 return XML_ERROR_ABORTED;
5811 default:;
5812 }
5813 }
5814 }
5815
5816 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5817 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5818 const char *textStart, *textEnd;
5819 const char *next;
5820 enum XML_Error result;
5821 OPEN_INTERNAL_ENTITY *openEntity;
5822
5823 if (parser->m_freeInternalEntities) {
5824 openEntity = parser->m_freeInternalEntities;
5825 parser->m_freeInternalEntities = openEntity->next;
5826 } else {
5827 openEntity
5828 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5829 if (! openEntity)
5830 return XML_ERROR_NO_MEMORY;
5831 }
5832 entity->open = XML_TRUE;
5833 #if XML_GE == 1
5834 entityTrackingOnOpen(parser, entity, __LINE__);
5835 #endif
5836 entity->processed = 0;
5837 openEntity->next = parser->m_openInternalEntities;
5838 parser->m_openInternalEntities = openEntity;
5839 openEntity->entity = entity;
5840 openEntity->startTagLevel = parser->m_tagLevel;
5841 openEntity->betweenDecl = betweenDecl;
5842 openEntity->internalEventPtr = NULL;
5843 openEntity->internalEventEndPtr = NULL;
5844 textStart = (const char *)entity->textPtr;
5845 textEnd = (const char *)(entity->textPtr + entity->textLen);
5846 /* Set a safe default value in case 'next' does not get set */
5847 next = textStart;
5848
5849 #ifdef XML_DTD
5850 if (entity->is_param) {
5851 int tok
5852 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5853 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5854 tok, next, &next, XML_FALSE, XML_FALSE,
5855 XML_ACCOUNT_ENTITY_EXPANSION);
5856 } else
5857 #endif /* XML_DTD */
5858 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5859 textStart, textEnd, &next, XML_FALSE,
5860 XML_ACCOUNT_ENTITY_EXPANSION);
5861
5862 if (result == XML_ERROR_NONE) {
5863 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5864 entity->processed = (int)(next - textStart);
5865 parser->m_processor = internalEntityProcessor;
5866 } else if (parser->m_openInternalEntities->entity == entity) {
5867 #if XML_GE == 1
5868 entityTrackingOnClose(parser, entity, __LINE__);
5869 #endif /* XML_GE == 1 */
5870 entity->open = XML_FALSE;
5871 parser->m_openInternalEntities = openEntity->next;
5872 /* put openEntity back in list of free instances */
5873 openEntity->next = parser->m_freeInternalEntities;
5874 parser->m_freeInternalEntities = openEntity;
5875 }
5876 }
5877 return result;
5878 }
5879
5880 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5881 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5882 const char **nextPtr) {
5883 ENTITY *entity;
5884 const char *textStart, *textEnd;
5885 const char *next;
5886 enum XML_Error result;
5887 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5888 if (! openEntity)
5889 return XML_ERROR_UNEXPECTED_STATE;
5890
5891 entity = openEntity->entity;
5892 textStart = ((const char *)entity->textPtr) + entity->processed;
5893 textEnd = (const char *)(entity->textPtr + entity->textLen);
5894 /* Set a safe default value in case 'next' does not get set */
5895 next = textStart;
5896
5897 #ifdef XML_DTD
5898 if (entity->is_param) {
5899 int tok
5900 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5901 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5902 tok, next, &next, XML_FALSE, XML_TRUE,
5903 XML_ACCOUNT_ENTITY_EXPANSION);
5904 } else
5905 #endif /* XML_DTD */
5906 result = doContent(parser, openEntity->startTagLevel,
5907 parser->m_internalEncoding, textStart, textEnd, &next,
5908 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5909
5910 if (result != XML_ERROR_NONE)
5911 return result;
5912
5913 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5914 entity->processed = (int)(next - (const char *)entity->textPtr);
5915 return result;
5916 }
5917
5918 #if XML_GE == 1
5919 entityTrackingOnClose(parser, entity, __LINE__);
5920 #endif
5921 entity->open = XML_FALSE;
5922 parser->m_openInternalEntities = openEntity->next;
5923 /* put openEntity back in list of free instances */
5924 openEntity->next = parser->m_freeInternalEntities;
5925 parser->m_freeInternalEntities = openEntity;
5926
5927 // If there are more open entities we want to stop right here and have the
5928 // upcoming call to XML_ResumeParser continue with entity content, or it would
5929 // be ignored altogether.
5930 if (parser->m_openInternalEntities != NULL
5931 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5932 return XML_ERROR_NONE;
5933 }
5934
5935 #ifdef XML_DTD
5936 if (entity->is_param) {
5937 int tok;
5938 parser->m_processor = prologProcessor;
5939 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5940 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5941 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5942 XML_ACCOUNT_DIRECT);
5943 } else
5944 #endif /* XML_DTD */
5945 {
5946 parser->m_processor = contentProcessor;
5947 /* see externalEntityContentProcessor vs contentProcessor */
5948 result = doContent(parser, parser->m_parentParser ? 1 : 0,
5949 parser->m_encoding, s, end, nextPtr,
5950 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5951 XML_ACCOUNT_DIRECT);
5952 if (result == XML_ERROR_NONE) {
5953 if (! storeRawNames(parser))
5954 return XML_ERROR_NO_MEMORY;
5955 }
5956 return result;
5957 }
5958 }
5959
5960 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5961 errorProcessor(XML_Parser parser, const char *s, const char *end,
5962 const char **nextPtr) {
5963 UNUSED_P(s);
5964 UNUSED_P(end);
5965 UNUSED_P(nextPtr);
5966 return parser->m_errorCode;
5967 }
5968
5969 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5970 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5971 const char *ptr, const char *end, STRING_POOL *pool,
5972 enum XML_Account account) {
5973 enum XML_Error result
5974 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5975 if (result)
5976 return result;
5977 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5978 poolChop(pool);
5979 if (! poolAppendChar(pool, XML_T('\0')))
5980 return XML_ERROR_NO_MEMORY;
5981 return XML_ERROR_NONE;
5982 }
5983
5984 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5985 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5986 const char *ptr, const char *end, STRING_POOL *pool,
5987 enum XML_Account account) {
5988 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5989 #ifndef XML_DTD
5990 UNUSED_P(account);
5991 #endif
5992
5993 for (;;) {
5994 const char *next
5995 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5996 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5997 #if XML_GE == 1
5998 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5999 accountingOnAbort(parser);
6000 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6001 }
6002 #endif
6003 switch (tok) {
6004 case XML_TOK_NONE:
6005 return XML_ERROR_NONE;
6006 case XML_TOK_INVALID:
6007 if (enc == parser->m_encoding)
6008 parser->m_eventPtr = next;
6009 return XML_ERROR_INVALID_TOKEN;
6010 case XML_TOK_PARTIAL:
6011 if (enc == parser->m_encoding)
6012 parser->m_eventPtr = ptr;
6013 return XML_ERROR_INVALID_TOKEN;
6014 case XML_TOK_CHAR_REF: {
6015 XML_Char buf[XML_ENCODE_MAX];
6016 int i;
6017 int n = XmlCharRefNumber(enc, ptr);
6018 if (n < 0) {
6019 if (enc == parser->m_encoding)
6020 parser->m_eventPtr = ptr;
6021 return XML_ERROR_BAD_CHAR_REF;
6022 }
6023 if (! isCdata && n == 0x20 /* space */
6024 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6025 break;
6026 n = XmlEncode(n, (ICHAR *)buf);
6027 /* The XmlEncode() functions can never return 0 here. That
6028 * error return happens if the code point passed in is either
6029 * negative or greater than or equal to 0x110000. The
6030 * XmlCharRefNumber() functions will all return a number
6031 * strictly less than 0x110000 or a negative value if an error
6032 * occurred. The negative value is intercepted above, so
6033 * XmlEncode() is never passed a value it might return an
6034 * error for.
6035 */
6036 for (i = 0; i < n; i++) {
6037 if (! poolAppendChar(pool, buf[i]))
6038 return XML_ERROR_NO_MEMORY;
6039 }
6040 } break;
6041 case XML_TOK_DATA_CHARS:
6042 if (! poolAppend(pool, enc, ptr, next))
6043 return XML_ERROR_NO_MEMORY;
6044 break;
6045 case XML_TOK_TRAILING_CR:
6046 next = ptr + enc->minBytesPerChar;
6047 /* fall through */
6048 case XML_TOK_ATTRIBUTE_VALUE_S:
6049 case XML_TOK_DATA_NEWLINE:
6050 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6051 break;
6052 if (! poolAppendChar(pool, 0x20))
6053 return XML_ERROR_NO_MEMORY;
6054 break;
6055 case XML_TOK_ENTITY_REF: {
6056 const XML_Char *name;
6057 ENTITY *entity;
6058 char checkEntityDecl;
6059 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6060 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6061 if (ch) {
6062 #if XML_GE == 1
6063 /* NOTE: We are replacing 4-6 characters original input for 1 character
6064 * so there is no amplification and hence recording without
6065 * protection. */
6066 accountingDiffTolerated(parser, tok, (char *)&ch,
6067 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6068 XML_ACCOUNT_ENTITY_EXPANSION);
6069 #endif /* XML_GE == 1 */
6070 if (! poolAppendChar(pool, ch))
6071 return XML_ERROR_NO_MEMORY;
6072 break;
6073 }
6074 name = poolStoreString(&parser->m_temp2Pool, enc,
6075 ptr + enc->minBytesPerChar,
6076 next - enc->minBytesPerChar);
6077 if (! name)
6078 return XML_ERROR_NO_MEMORY;
6079 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6080 poolDiscard(&parser->m_temp2Pool);
6081 /* First, determine if a check for an existing declaration is needed;
6082 if yes, check that the entity exists, and that it is internal.
6083 */
6084 if (pool == &dtd->pool) /* are we called from prolog? */
6085 checkEntityDecl =
6086 #ifdef XML_DTD
6087 parser->m_prologState.documentEntity &&
6088 #endif /* XML_DTD */
6089 (dtd->standalone ? ! parser->m_openInternalEntities
6090 : ! dtd->hasParamEntityRefs);
6091 else /* if (pool == &parser->m_tempPool): we are called from content */
6092 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6093 if (checkEntityDecl) {
6094 if (! entity)
6095 return XML_ERROR_UNDEFINED_ENTITY;
6096 else if (! entity->is_internal)
6097 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6098 } else if (! entity) {
6099 /* Cannot report skipped entity here - see comments on
6100 parser->m_skippedEntityHandler.
6101 if (parser->m_skippedEntityHandler)
6102 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6103 */
6104 /* Cannot call the default handler because this would be
6105 out of sync with the call to the startElementHandler.
6106 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6107 reportDefault(parser, enc, ptr, next);
6108 */
6109 break;
6110 }
6111 if (entity->open) {
6112 if (enc == parser->m_encoding) {
6113 /* It does not appear that this line can be executed.
6114 *
6115 * The "if (entity->open)" check catches recursive entity
6116 * definitions. In order to be called with an open
6117 * entity, it must have gone through this code before and
6118 * been through the recursive call to
6119 * appendAttributeValue() some lines below. That call
6120 * sets the local encoding ("enc") to the parser's
6121 * internal encoding (internal_utf8 or internal_utf16),
6122 * which can never be the same as the principle encoding.
6123 * It doesn't appear there is another code path that gets
6124 * here with entity->open being TRUE.
6125 *
6126 * Since it is not certain that this logic is watertight,
6127 * we keep the line and merely exclude it from coverage
6128 * tests.
6129 */
6130 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6131 }
6132 return XML_ERROR_RECURSIVE_ENTITY_REF;
6133 }
6134 if (entity->notation) {
6135 if (enc == parser->m_encoding)
6136 parser->m_eventPtr = ptr;
6137 return XML_ERROR_BINARY_ENTITY_REF;
6138 }
6139 if (! entity->textPtr) {
6140 if (enc == parser->m_encoding)
6141 parser->m_eventPtr = ptr;
6142 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6143 } else {
6144 enum XML_Error result;
6145 const XML_Char *textEnd = entity->textPtr + entity->textLen;
6146 entity->open = XML_TRUE;
6147 #if XML_GE == 1
6148 entityTrackingOnOpen(parser, entity, __LINE__);
6149 #endif
6150 result = appendAttributeValue(parser, parser->m_internalEncoding,
6151 isCdata, (const char *)entity->textPtr,
6152 (const char *)textEnd, pool,
6153 XML_ACCOUNT_ENTITY_EXPANSION);
6154 #if XML_GE == 1
6155 entityTrackingOnClose(parser, entity, __LINE__);
6156 #endif
6157 entity->open = XML_FALSE;
6158 if (result)
6159 return result;
6160 }
6161 } break;
6162 default:
6163 /* The only token returned by XmlAttributeValueTok() that does
6164 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6165 * Getting that would require an entity name to contain an
6166 * incomplete XML character (e.g. \xE2\x82); however previous
6167 * tokenisers will have already recognised and rejected such
6168 * names before XmlAttributeValueTok() gets a look-in. This
6169 * default case should be retained as a safety net, but the code
6170 * excluded from coverage tests.
6171 *
6172 * LCOV_EXCL_START
6173 */
6174 if (enc == parser->m_encoding)
6175 parser->m_eventPtr = ptr;
6176 return XML_ERROR_UNEXPECTED_STATE;
6177 /* LCOV_EXCL_STOP */
6178 }
6179 ptr = next;
6180 }
6181 /* not reached */
6182 }
6183
6184 #if XML_GE == 1
6185 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6186 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6187 const char *entityTextPtr, const char *entityTextEnd,
6188 enum XML_Account account) {
6189 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6190 STRING_POOL *pool = &(dtd->entityValuePool);
6191 enum XML_Error result = XML_ERROR_NONE;
6192 # ifdef XML_DTD
6193 int oldInEntityValue = parser->m_prologState.inEntityValue;
6194 parser->m_prologState.inEntityValue = 1;
6195 # else
6196 UNUSED_P(account);
6197 # endif /* XML_DTD */
6198 /* never return Null for the value argument in EntityDeclHandler,
6199 since this would indicate an external entity; therefore we
6200 have to make sure that entityValuePool.start is not null */
6201 if (! pool->blocks) {
6202 if (! poolGrow(pool))
6203 return XML_ERROR_NO_MEMORY;
6204 }
6205
6206 for (;;) {
6207 const char *next
6208 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6209 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6210
6211 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6212 account)) {
6213 accountingOnAbort(parser);
6214 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6215 goto endEntityValue;
6216 }
6217
6218 switch (tok) {
6219 case XML_TOK_PARAM_ENTITY_REF:
6220 # ifdef XML_DTD
6221 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6222 const XML_Char *name;
6223 ENTITY *entity;
6224 name = poolStoreString(&parser->m_tempPool, enc,
6225 entityTextPtr + enc->minBytesPerChar,
6226 next - enc->minBytesPerChar);
6227 if (! name) {
6228 result = XML_ERROR_NO_MEMORY;
6229 goto endEntityValue;
6230 }
6231 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6232 poolDiscard(&parser->m_tempPool);
6233 if (! entity) {
6234 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6235 /* cannot report skipped entity here - see comments on
6236 parser->m_skippedEntityHandler
6237 if (parser->m_skippedEntityHandler)
6238 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6239 */
6240 dtd->keepProcessing = dtd->standalone;
6241 goto endEntityValue;
6242 }
6243 if (entity->open || (entity == parser->m_declEntity)) {
6244 if (enc == parser->m_encoding)
6245 parser->m_eventPtr = entityTextPtr;
6246 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6247 goto endEntityValue;
6248 }
6249 if (entity->systemId) {
6250 if (parser->m_externalEntityRefHandler) {
6251 dtd->paramEntityRead = XML_FALSE;
6252 entity->open = XML_TRUE;
6253 entityTrackingOnOpen(parser, entity, __LINE__);
6254 if (! parser->m_externalEntityRefHandler(
6255 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6256 entity->systemId, entity->publicId)) {
6257 entityTrackingOnClose(parser, entity, __LINE__);
6258 entity->open = XML_FALSE;
6259 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6260 goto endEntityValue;
6261 }
6262 entityTrackingOnClose(parser, entity, __LINE__);
6263 entity->open = XML_FALSE;
6264 if (! dtd->paramEntityRead)
6265 dtd->keepProcessing = dtd->standalone;
6266 } else
6267 dtd->keepProcessing = dtd->standalone;
6268 } else {
6269 entity->open = XML_TRUE;
6270 entityTrackingOnOpen(parser, entity, __LINE__);
6271 result = storeEntityValue(
6272 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6273 (const char *)(entity->textPtr + entity->textLen),
6274 XML_ACCOUNT_ENTITY_EXPANSION);
6275 entityTrackingOnClose(parser, entity, __LINE__);
6276 entity->open = XML_FALSE;
6277 if (result)
6278 goto endEntityValue;
6279 }
6280 break;
6281 }
6282 # endif /* XML_DTD */
6283 /* In the internal subset, PE references are not legal
6284 within markup declarations, e.g entity values in this case. */
6285 parser->m_eventPtr = entityTextPtr;
6286 result = XML_ERROR_PARAM_ENTITY_REF;
6287 goto endEntityValue;
6288 case XML_TOK_NONE:
6289 result = XML_ERROR_NONE;
6290 goto endEntityValue;
6291 case XML_TOK_ENTITY_REF:
6292 case XML_TOK_DATA_CHARS:
6293 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6294 result = XML_ERROR_NO_MEMORY;
6295 goto endEntityValue;
6296 }
6297 break;
6298 case XML_TOK_TRAILING_CR:
6299 next = entityTextPtr + enc->minBytesPerChar;
6300 /* fall through */
6301 case XML_TOK_DATA_NEWLINE:
6302 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6303 result = XML_ERROR_NO_MEMORY;
6304 goto endEntityValue;
6305 }
6306 *(pool->ptr)++ = 0xA;
6307 break;
6308 case XML_TOK_CHAR_REF: {
6309 XML_Char buf[XML_ENCODE_MAX];
6310 int i;
6311 int n = XmlCharRefNumber(enc, entityTextPtr);
6312 if (n < 0) {
6313 if (enc == parser->m_encoding)
6314 parser->m_eventPtr = entityTextPtr;
6315 result = XML_ERROR_BAD_CHAR_REF;
6316 goto endEntityValue;
6317 }
6318 n = XmlEncode(n, (ICHAR *)buf);
6319 /* The XmlEncode() functions can never return 0 here. That
6320 * error return happens if the code point passed in is either
6321 * negative or greater than or equal to 0x110000. The
6322 * XmlCharRefNumber() functions will all return a number
6323 * strictly less than 0x110000 or a negative value if an error
6324 * occurred. The negative value is intercepted above, so
6325 * XmlEncode() is never passed a value it might return an
6326 * error for.
6327 */
6328 for (i = 0; i < n; i++) {
6329 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6330 result = XML_ERROR_NO_MEMORY;
6331 goto endEntityValue;
6332 }
6333 *(pool->ptr)++ = buf[i];
6334 }
6335 } break;
6336 case XML_TOK_PARTIAL:
6337 if (enc == parser->m_encoding)
6338 parser->m_eventPtr = entityTextPtr;
6339 result = XML_ERROR_INVALID_TOKEN;
6340 goto endEntityValue;
6341 case XML_TOK_INVALID:
6342 if (enc == parser->m_encoding)
6343 parser->m_eventPtr = next;
6344 result = XML_ERROR_INVALID_TOKEN;
6345 goto endEntityValue;
6346 default:
6347 /* This default case should be unnecessary -- all the tokens
6348 * that XmlEntityValueTok() can return have their own explicit
6349 * cases -- but should be retained for safety. We do however
6350 * exclude it from the coverage statistics.
6351 *
6352 * LCOV_EXCL_START
6353 */
6354 if (enc == parser->m_encoding)
6355 parser->m_eventPtr = entityTextPtr;
6356 result = XML_ERROR_UNEXPECTED_STATE;
6357 goto endEntityValue;
6358 /* LCOV_EXCL_STOP */
6359 }
6360 entityTextPtr = next;
6361 }
6362 endEntityValue:
6363 # ifdef XML_DTD
6364 parser->m_prologState.inEntityValue = oldInEntityValue;
6365 # endif /* XML_DTD */
6366 return result;
6367 }
6368
6369 #else /* XML_GE == 0 */
6370
6371 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6372 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6373 // This will store "&entity123;" in entity->textPtr
6374 // to end up as "&entity123;" in the handler.
6375 const char *const entity_start = "&";
6376 const char *const entity_end = ";";
6377
6378 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6379 if (! poolAppendString(pool, entity_start)
6380 || ! poolAppendString(pool, entity->name)
6381 || ! poolAppendString(pool, entity_end)) {
6382 poolDiscard(pool);
6383 return XML_ERROR_NO_MEMORY;
6384 }
6385
6386 entity->textPtr = poolStart(pool);
6387 entity->textLen = (int)(poolLength(pool));
6388 poolFinish(pool);
6389
6390 return XML_ERROR_NONE;
6391 }
6392
6393 #endif /* XML_GE == 0 */
6394
6395 static void FASTCALL
normalizeLines(XML_Char * s)6396 normalizeLines(XML_Char *s) {
6397 XML_Char *p;
6398 for (;; s++) {
6399 if (*s == XML_T('\0'))
6400 return;
6401 if (*s == 0xD)
6402 break;
6403 }
6404 p = s;
6405 do {
6406 if (*s == 0xD) {
6407 *p++ = 0xA;
6408 if (*++s == 0xA)
6409 s++;
6410 } else
6411 *p++ = *s++;
6412 } while (*s);
6413 *p = XML_T('\0');
6414 }
6415
6416 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6417 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6418 const char *start, const char *end) {
6419 const XML_Char *target;
6420 XML_Char *data;
6421 const char *tem;
6422 if (! parser->m_processingInstructionHandler) {
6423 if (parser->m_defaultHandler)
6424 reportDefault(parser, enc, start, end);
6425 return 1;
6426 }
6427 start += enc->minBytesPerChar * 2;
6428 tem = start + XmlNameLength(enc, start);
6429 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6430 if (! target)
6431 return 0;
6432 poolFinish(&parser->m_tempPool);
6433 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6434 end - enc->minBytesPerChar * 2);
6435 if (! data)
6436 return 0;
6437 normalizeLines(data);
6438 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6439 poolClear(&parser->m_tempPool);
6440 return 1;
6441 }
6442
6443 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6444 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6445 const char *end) {
6446 XML_Char *data;
6447 if (! parser->m_commentHandler) {
6448 if (parser->m_defaultHandler)
6449 reportDefault(parser, enc, start, end);
6450 return 1;
6451 }
6452 data = poolStoreString(&parser->m_tempPool, enc,
6453 start + enc->minBytesPerChar * 4,
6454 end - enc->minBytesPerChar * 3);
6455 if (! data)
6456 return 0;
6457 normalizeLines(data);
6458 parser->m_commentHandler(parser->m_handlerArg, data);
6459 poolClear(&parser->m_tempPool);
6460 return 1;
6461 }
6462
6463 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6464 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6465 const char *end) {
6466 if (MUST_CONVERT(enc, s)) {
6467 enum XML_Convert_Result convert_res;
6468 const char **eventPP;
6469 const char **eventEndPP;
6470 if (enc == parser->m_encoding) {
6471 eventPP = &parser->m_eventPtr;
6472 eventEndPP = &parser->m_eventEndPtr;
6473 } else {
6474 /* To get here, two things must be true; the parser must be
6475 * using a character encoding that is not the same as the
6476 * encoding passed in, and the encoding passed in must need
6477 * conversion to the internal format (UTF-8 unless XML_UNICODE
6478 * is defined). The only occasions on which the encoding passed
6479 * in is not the same as the parser's encoding are when it is
6480 * the internal encoding (e.g. a previously defined parameter
6481 * entity, already converted to internal format). This by
6482 * definition doesn't need conversion, so the whole branch never
6483 * gets executed.
6484 *
6485 * For safety's sake we don't delete these lines and merely
6486 * exclude them from coverage statistics.
6487 *
6488 * LCOV_EXCL_START
6489 */
6490 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6491 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6492 /* LCOV_EXCL_STOP */
6493 }
6494 do {
6495 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6496 convert_res
6497 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6498 *eventEndPP = s;
6499 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6500 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6501 *eventPP = s;
6502 } while ((convert_res != XML_CONVERT_COMPLETED)
6503 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6504 } else
6505 parser->m_defaultHandler(
6506 parser->m_handlerArg, (const XML_Char *)s,
6507 (int)((const XML_Char *)end - (const XML_Char *)s));
6508 }
6509
6510 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6511 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6512 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6513 DEFAULT_ATTRIBUTE *att;
6514 if (value || isId) {
6515 /* The handling of default attributes gets messed up if we have
6516 a default which duplicates a non-default. */
6517 int i;
6518 for (i = 0; i < type->nDefaultAtts; i++)
6519 if (attId == type->defaultAtts[i].id)
6520 return 1;
6521 if (isId && ! type->idAtt && ! attId->xmlns)
6522 type->idAtt = attId;
6523 }
6524 if (type->nDefaultAtts == type->allocDefaultAtts) {
6525 if (type->allocDefaultAtts == 0) {
6526 type->allocDefaultAtts = 8;
6527 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6528 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6529 if (! type->defaultAtts) {
6530 type->allocDefaultAtts = 0;
6531 return 0;
6532 }
6533 } else {
6534 DEFAULT_ATTRIBUTE *temp;
6535
6536 /* Detect and prevent integer overflow */
6537 if (type->allocDefaultAtts > INT_MAX / 2) {
6538 return 0;
6539 }
6540
6541 int count = type->allocDefaultAtts * 2;
6542
6543 /* Detect and prevent integer overflow.
6544 * The preprocessor guard addresses the "always false" warning
6545 * from -Wtype-limits on platforms where
6546 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6547 #if UINT_MAX >= SIZE_MAX
6548 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6549 return 0;
6550 }
6551 #endif
6552
6553 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6554 (count * sizeof(DEFAULT_ATTRIBUTE)));
6555 if (temp == NULL)
6556 return 0;
6557 type->allocDefaultAtts = count;
6558 type->defaultAtts = temp;
6559 }
6560 }
6561 att = type->defaultAtts + type->nDefaultAtts;
6562 att->id = attId;
6563 att->value = value;
6564 att->isCdata = isCdata;
6565 if (! isCdata)
6566 attId->maybeTokenized = XML_TRUE;
6567 type->nDefaultAtts += 1;
6568 return 1;
6569 }
6570
6571 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6572 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6573 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6574 const XML_Char *name;
6575 for (name = elementType->name; *name; name++) {
6576 if (*name == XML_T(ASCII_COLON)) {
6577 PREFIX *prefix;
6578 const XML_Char *s;
6579 for (s = elementType->name; s != name; s++) {
6580 if (! poolAppendChar(&dtd->pool, *s))
6581 return 0;
6582 }
6583 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6584 return 0;
6585 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6586 sizeof(PREFIX));
6587 if (! prefix)
6588 return 0;
6589 if (prefix->name == poolStart(&dtd->pool))
6590 poolFinish(&dtd->pool);
6591 else
6592 poolDiscard(&dtd->pool);
6593 elementType->prefix = prefix;
6594 break;
6595 }
6596 }
6597 return 1;
6598 }
6599
6600 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6601 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6602 const char *end) {
6603 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6604 ATTRIBUTE_ID *id;
6605 const XML_Char *name;
6606 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6607 return NULL;
6608 name = poolStoreString(&dtd->pool, enc, start, end);
6609 if (! name)
6610 return NULL;
6611 /* skip quotation mark - its storage will be reused (like in name[-1]) */
6612 ++name;
6613 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6614 sizeof(ATTRIBUTE_ID));
6615 if (! id)
6616 return NULL;
6617 if (id->name != name)
6618 poolDiscard(&dtd->pool);
6619 else {
6620 poolFinish(&dtd->pool);
6621 if (! parser->m_ns)
6622 ;
6623 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6624 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6625 && name[4] == XML_T(ASCII_s)
6626 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6627 if (name[5] == XML_T('\0'))
6628 id->prefix = &dtd->defaultPrefix;
6629 else
6630 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6631 sizeof(PREFIX));
6632 id->xmlns = XML_TRUE;
6633 } else {
6634 int i;
6635 for (i = 0; name[i]; i++) {
6636 /* attributes without prefix are *not* in the default namespace */
6637 if (name[i] == XML_T(ASCII_COLON)) {
6638 int j;
6639 for (j = 0; j < i; j++) {
6640 if (! poolAppendChar(&dtd->pool, name[j]))
6641 return NULL;
6642 }
6643 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6644 return NULL;
6645 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6646 poolStart(&dtd->pool), sizeof(PREFIX));
6647 if (! id->prefix)
6648 return NULL;
6649 if (id->prefix->name == poolStart(&dtd->pool))
6650 poolFinish(&dtd->pool);
6651 else
6652 poolDiscard(&dtd->pool);
6653 break;
6654 }
6655 }
6656 }
6657 }
6658 return id;
6659 }
6660
6661 #define CONTEXT_SEP XML_T(ASCII_FF)
6662
6663 static const XML_Char *
getContext(XML_Parser parser)6664 getContext(XML_Parser parser) {
6665 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6666 HASH_TABLE_ITER iter;
6667 XML_Bool needSep = XML_FALSE;
6668
6669 if (dtd->defaultPrefix.binding) {
6670 int i;
6671 int len;
6672 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6673 return NULL;
6674 len = dtd->defaultPrefix.binding->uriLen;
6675 if (parser->m_namespaceSeparator)
6676 len--;
6677 for (i = 0; i < len; i++) {
6678 if (! poolAppendChar(&parser->m_tempPool,
6679 dtd->defaultPrefix.binding->uri[i])) {
6680 /* Because of memory caching, I don't believe this line can be
6681 * executed.
6682 *
6683 * This is part of a loop copying the default prefix binding
6684 * URI into the parser's temporary string pool. Previously,
6685 * that URI was copied into the same string pool, with a
6686 * terminating NUL character, as part of setContext(). When
6687 * the pool was cleared, that leaves a block definitely big
6688 * enough to hold the URI on the free block list of the pool.
6689 * The URI copy in getContext() therefore cannot run out of
6690 * memory.
6691 *
6692 * If the pool is used between the setContext() and
6693 * getContext() calls, the worst it can do is leave a bigger
6694 * block on the front of the free list. Given that this is
6695 * all somewhat inobvious and program logic can be changed, we
6696 * don't delete the line but we do exclude it from the test
6697 * coverage statistics.
6698 */
6699 return NULL; /* LCOV_EXCL_LINE */
6700 }
6701 }
6702 needSep = XML_TRUE;
6703 }
6704
6705 hashTableIterInit(&iter, &(dtd->prefixes));
6706 for (;;) {
6707 int i;
6708 int len;
6709 const XML_Char *s;
6710 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6711 if (! prefix)
6712 break;
6713 if (! prefix->binding) {
6714 /* This test appears to be (justifiable) paranoia. There does
6715 * not seem to be a way of injecting a prefix without a binding
6716 * that doesn't get errored long before this function is called.
6717 * The test should remain for safety's sake, so we instead
6718 * exclude the following line from the coverage statistics.
6719 */
6720 continue; /* LCOV_EXCL_LINE */
6721 }
6722 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6723 return NULL;
6724 for (s = prefix->name; *s; s++)
6725 if (! poolAppendChar(&parser->m_tempPool, *s))
6726 return NULL;
6727 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6728 return NULL;
6729 len = prefix->binding->uriLen;
6730 if (parser->m_namespaceSeparator)
6731 len--;
6732 for (i = 0; i < len; i++)
6733 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6734 return NULL;
6735 needSep = XML_TRUE;
6736 }
6737
6738 hashTableIterInit(&iter, &(dtd->generalEntities));
6739 for (;;) {
6740 const XML_Char *s;
6741 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6742 if (! e)
6743 break;
6744 if (! e->open)
6745 continue;
6746 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6747 return NULL;
6748 for (s = e->name; *s; s++)
6749 if (! poolAppendChar(&parser->m_tempPool, *s))
6750 return 0;
6751 needSep = XML_TRUE;
6752 }
6753
6754 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6755 return NULL;
6756 return parser->m_tempPool.start;
6757 }
6758
6759 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6760 setContext(XML_Parser parser, const XML_Char *context) {
6761 if (context == NULL) {
6762 return XML_FALSE;
6763 }
6764
6765 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6766 const XML_Char *s = context;
6767
6768 while (*context != XML_T('\0')) {
6769 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6770 ENTITY *e;
6771 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6772 return XML_FALSE;
6773 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6774 poolStart(&parser->m_tempPool), 0);
6775 if (e)
6776 e->open = XML_TRUE;
6777 if (*s != XML_T('\0'))
6778 s++;
6779 context = s;
6780 poolDiscard(&parser->m_tempPool);
6781 } else if (*s == XML_T(ASCII_EQUALS)) {
6782 PREFIX *prefix;
6783 if (poolLength(&parser->m_tempPool) == 0)
6784 prefix = &dtd->defaultPrefix;
6785 else {
6786 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6787 return XML_FALSE;
6788 prefix
6789 = (PREFIX *)lookup(parser, &dtd->prefixes,
6790 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6791 if (! prefix)
6792 return XML_FALSE;
6793 if (prefix->name == poolStart(&parser->m_tempPool)) {
6794 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6795 if (! prefix->name)
6796 return XML_FALSE;
6797 }
6798 poolDiscard(&parser->m_tempPool);
6799 }
6800 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6801 context++)
6802 if (! poolAppendChar(&parser->m_tempPool, *context))
6803 return XML_FALSE;
6804 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6805 return XML_FALSE;
6806 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6807 &parser->m_inheritedBindings)
6808 != XML_ERROR_NONE)
6809 return XML_FALSE;
6810 poolDiscard(&parser->m_tempPool);
6811 if (*context != XML_T('\0'))
6812 ++context;
6813 s = context;
6814 } else {
6815 if (! poolAppendChar(&parser->m_tempPool, *s))
6816 return XML_FALSE;
6817 s++;
6818 }
6819 }
6820 return XML_TRUE;
6821 }
6822
6823 static void FASTCALL
normalizePublicId(XML_Char * publicId)6824 normalizePublicId(XML_Char *publicId) {
6825 XML_Char *p = publicId;
6826 XML_Char *s;
6827 for (s = publicId; *s; s++) {
6828 switch (*s) {
6829 case 0x20:
6830 case 0xD:
6831 case 0xA:
6832 if (p != publicId && p[-1] != 0x20)
6833 *p++ = 0x20;
6834 break;
6835 default:
6836 *p++ = *s;
6837 }
6838 }
6839 if (p != publicId && p[-1] == 0x20)
6840 --p;
6841 *p = XML_T('\0');
6842 }
6843
6844 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6845 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6846 DTD *p = ms->malloc_fcn(sizeof(DTD));
6847 if (p == NULL)
6848 return p;
6849 poolInit(&(p->pool), ms);
6850 poolInit(&(p->entityValuePool), ms);
6851 hashTableInit(&(p->generalEntities), ms);
6852 hashTableInit(&(p->elementTypes), ms);
6853 hashTableInit(&(p->attributeIds), ms);
6854 hashTableInit(&(p->prefixes), ms);
6855 #ifdef XML_DTD
6856 p->paramEntityRead = XML_FALSE;
6857 hashTableInit(&(p->paramEntities), ms);
6858 #endif /* XML_DTD */
6859 p->defaultPrefix.name = NULL;
6860 p->defaultPrefix.binding = NULL;
6861
6862 p->in_eldecl = XML_FALSE;
6863 p->scaffIndex = NULL;
6864 p->scaffold = NULL;
6865 p->scaffLevel = 0;
6866 p->scaffSize = 0;
6867 p->scaffCount = 0;
6868 p->contentStringLen = 0;
6869
6870 p->keepProcessing = XML_TRUE;
6871 p->hasParamEntityRefs = XML_FALSE;
6872 p->standalone = XML_FALSE;
6873 return p;
6874 }
6875
6876 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6877 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6878 HASH_TABLE_ITER iter;
6879 hashTableIterInit(&iter, &(p->elementTypes));
6880 for (;;) {
6881 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6882 if (! e)
6883 break;
6884 if (e->allocDefaultAtts != 0)
6885 ms->free_fcn(e->defaultAtts);
6886 }
6887 hashTableClear(&(p->generalEntities));
6888 #ifdef XML_DTD
6889 p->paramEntityRead = XML_FALSE;
6890 hashTableClear(&(p->paramEntities));
6891 #endif /* XML_DTD */
6892 hashTableClear(&(p->elementTypes));
6893 hashTableClear(&(p->attributeIds));
6894 hashTableClear(&(p->prefixes));
6895 poolClear(&(p->pool));
6896 poolClear(&(p->entityValuePool));
6897 p->defaultPrefix.name = NULL;
6898 p->defaultPrefix.binding = NULL;
6899
6900 p->in_eldecl = XML_FALSE;
6901
6902 ms->free_fcn(p->scaffIndex);
6903 p->scaffIndex = NULL;
6904 ms->free_fcn(p->scaffold);
6905 p->scaffold = NULL;
6906
6907 p->scaffLevel = 0;
6908 p->scaffSize = 0;
6909 p->scaffCount = 0;
6910 p->contentStringLen = 0;
6911
6912 p->keepProcessing = XML_TRUE;
6913 p->hasParamEntityRefs = XML_FALSE;
6914 p->standalone = XML_FALSE;
6915 }
6916
6917 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6918 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6919 HASH_TABLE_ITER iter;
6920 hashTableIterInit(&iter, &(p->elementTypes));
6921 for (;;) {
6922 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6923 if (! e)
6924 break;
6925 if (e->allocDefaultAtts != 0)
6926 ms->free_fcn(e->defaultAtts);
6927 }
6928 hashTableDestroy(&(p->generalEntities));
6929 #ifdef XML_DTD
6930 hashTableDestroy(&(p->paramEntities));
6931 #endif /* XML_DTD */
6932 hashTableDestroy(&(p->elementTypes));
6933 hashTableDestroy(&(p->attributeIds));
6934 hashTableDestroy(&(p->prefixes));
6935 poolDestroy(&(p->pool));
6936 poolDestroy(&(p->entityValuePool));
6937 if (isDocEntity) {
6938 ms->free_fcn(p->scaffIndex);
6939 ms->free_fcn(p->scaffold);
6940 }
6941 ms->free_fcn(p);
6942 }
6943
6944 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6945 The new DTD has already been initialized.
6946 */
6947 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6948 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6949 const XML_Memory_Handling_Suite *ms) {
6950 HASH_TABLE_ITER iter;
6951
6952 /* Copy the prefix table. */
6953
6954 hashTableIterInit(&iter, &(oldDtd->prefixes));
6955 for (;;) {
6956 const XML_Char *name;
6957 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6958 if (! oldP)
6959 break;
6960 name = poolCopyString(&(newDtd->pool), oldP->name);
6961 if (! name)
6962 return 0;
6963 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6964 return 0;
6965 }
6966
6967 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6968
6969 /* Copy the attribute id table. */
6970
6971 for (;;) {
6972 ATTRIBUTE_ID *newA;
6973 const XML_Char *name;
6974 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6975
6976 if (! oldA)
6977 break;
6978 /* Remember to allocate the scratch byte before the name. */
6979 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6980 return 0;
6981 name = poolCopyString(&(newDtd->pool), oldA->name);
6982 if (! name)
6983 return 0;
6984 ++name;
6985 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6986 sizeof(ATTRIBUTE_ID));
6987 if (! newA)
6988 return 0;
6989 newA->maybeTokenized = oldA->maybeTokenized;
6990 if (oldA->prefix) {
6991 newA->xmlns = oldA->xmlns;
6992 if (oldA->prefix == &oldDtd->defaultPrefix)
6993 newA->prefix = &newDtd->defaultPrefix;
6994 else
6995 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6996 oldA->prefix->name, 0);
6997 }
6998 }
6999
7000 /* Copy the element type table. */
7001
7002 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7003
7004 for (;;) {
7005 int i;
7006 ELEMENT_TYPE *newE;
7007 const XML_Char *name;
7008 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7009 if (! oldE)
7010 break;
7011 name = poolCopyString(&(newDtd->pool), oldE->name);
7012 if (! name)
7013 return 0;
7014 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7015 sizeof(ELEMENT_TYPE));
7016 if (! newE)
7017 return 0;
7018 if (oldE->nDefaultAtts) {
7019 newE->defaultAtts
7020 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7021 if (! newE->defaultAtts) {
7022 return 0;
7023 }
7024 }
7025 if (oldE->idAtt)
7026 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7027 oldE->idAtt->name, 0);
7028 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7029 if (oldE->prefix)
7030 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7031 oldE->prefix->name, 0);
7032 for (i = 0; i < newE->nDefaultAtts; i++) {
7033 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7034 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7035 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7036 if (oldE->defaultAtts[i].value) {
7037 newE->defaultAtts[i].value
7038 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7039 if (! newE->defaultAtts[i].value)
7040 return 0;
7041 } else
7042 newE->defaultAtts[i].value = NULL;
7043 }
7044 }
7045
7046 /* Copy the entity tables. */
7047 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7048 &(oldDtd->generalEntities)))
7049 return 0;
7050
7051 #ifdef XML_DTD
7052 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7053 &(oldDtd->paramEntities)))
7054 return 0;
7055 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7056 #endif /* XML_DTD */
7057
7058 newDtd->keepProcessing = oldDtd->keepProcessing;
7059 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7060 newDtd->standalone = oldDtd->standalone;
7061
7062 /* Don't want deep copying for scaffolding */
7063 newDtd->in_eldecl = oldDtd->in_eldecl;
7064 newDtd->scaffold = oldDtd->scaffold;
7065 newDtd->contentStringLen = oldDtd->contentStringLen;
7066 newDtd->scaffSize = oldDtd->scaffSize;
7067 newDtd->scaffLevel = oldDtd->scaffLevel;
7068 newDtd->scaffIndex = oldDtd->scaffIndex;
7069
7070 return 1;
7071 } /* End dtdCopy */
7072
7073 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7074 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7075 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7076 HASH_TABLE_ITER iter;
7077 const XML_Char *cachedOldBase = NULL;
7078 const XML_Char *cachedNewBase = NULL;
7079
7080 hashTableIterInit(&iter, oldTable);
7081
7082 for (;;) {
7083 ENTITY *newE;
7084 const XML_Char *name;
7085 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7086 if (! oldE)
7087 break;
7088 name = poolCopyString(newPool, oldE->name);
7089 if (! name)
7090 return 0;
7091 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7092 if (! newE)
7093 return 0;
7094 if (oldE->systemId) {
7095 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7096 if (! tem)
7097 return 0;
7098 newE->systemId = tem;
7099 if (oldE->base) {
7100 if (oldE->base == cachedOldBase)
7101 newE->base = cachedNewBase;
7102 else {
7103 cachedOldBase = oldE->base;
7104 tem = poolCopyString(newPool, cachedOldBase);
7105 if (! tem)
7106 return 0;
7107 cachedNewBase = newE->base = tem;
7108 }
7109 }
7110 if (oldE->publicId) {
7111 tem = poolCopyString(newPool, oldE->publicId);
7112 if (! tem)
7113 return 0;
7114 newE->publicId = tem;
7115 }
7116 } else {
7117 const XML_Char *tem
7118 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7119 if (! tem)
7120 return 0;
7121 newE->textPtr = tem;
7122 newE->textLen = oldE->textLen;
7123 }
7124 if (oldE->notation) {
7125 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7126 if (! tem)
7127 return 0;
7128 newE->notation = tem;
7129 }
7130 newE->is_param = oldE->is_param;
7131 newE->is_internal = oldE->is_internal;
7132 }
7133 return 1;
7134 }
7135
7136 #define INIT_POWER 6
7137
7138 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7139 keyeq(KEY s1, KEY s2) {
7140 for (; *s1 == *s2; s1++, s2++)
7141 if (*s1 == 0)
7142 return XML_TRUE;
7143 return XML_FALSE;
7144 }
7145
7146 static size_t
keylen(KEY s)7147 keylen(KEY s) {
7148 size_t len = 0;
7149 for (; *s; s++, len++)
7150 ;
7151 return len;
7152 }
7153
7154 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7155 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7156 key->k[0] = 0;
7157 key->k[1] = get_hash_secret_salt(parser);
7158 }
7159
7160 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7161 hash(XML_Parser parser, KEY s) {
7162 struct siphash state;
7163 struct sipkey key;
7164 (void)sip24_valid;
7165 copy_salt_to_sipkey(parser, &key);
7166 sip24_init(&state, &key);
7167 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7168 return (unsigned long)sip24_final(&state);
7169 }
7170
7171 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7172 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7173 size_t i;
7174 if (table->size == 0) {
7175 size_t tsize;
7176 if (! createSize)
7177 return NULL;
7178 table->power = INIT_POWER;
7179 /* table->size is a power of 2 */
7180 table->size = (size_t)1 << INIT_POWER;
7181 tsize = table->size * sizeof(NAMED *);
7182 table->v = table->mem->malloc_fcn(tsize);
7183 if (! table->v) {
7184 table->size = 0;
7185 return NULL;
7186 }
7187 memset(table->v, 0, tsize);
7188 i = hash(parser, name) & ((unsigned long)table->size - 1);
7189 } else {
7190 unsigned long h = hash(parser, name);
7191 unsigned long mask = (unsigned long)table->size - 1;
7192 unsigned char step = 0;
7193 i = h & mask;
7194 while (table->v[i]) {
7195 if (keyeq(name, table->v[i]->name))
7196 return table->v[i];
7197 if (! step)
7198 step = PROBE_STEP(h, mask, table->power);
7199 i < step ? (i += table->size - step) : (i -= step);
7200 }
7201 if (! createSize)
7202 return NULL;
7203
7204 /* check for overflow (table is half full) */
7205 if (table->used >> (table->power - 1)) {
7206 unsigned char newPower = table->power + 1;
7207
7208 /* Detect and prevent invalid shift */
7209 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7210 return NULL;
7211 }
7212
7213 size_t newSize = (size_t)1 << newPower;
7214 unsigned long newMask = (unsigned long)newSize - 1;
7215
7216 /* Detect and prevent integer overflow */
7217 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7218 return NULL;
7219 }
7220
7221 size_t tsize = newSize * sizeof(NAMED *);
7222 NAMED **newV = table->mem->malloc_fcn(tsize);
7223 if (! newV)
7224 return NULL;
7225 memset(newV, 0, tsize);
7226 for (i = 0; i < table->size; i++)
7227 if (table->v[i]) {
7228 unsigned long newHash = hash(parser, table->v[i]->name);
7229 size_t j = newHash & newMask;
7230 step = 0;
7231 while (newV[j]) {
7232 if (! step)
7233 step = PROBE_STEP(newHash, newMask, newPower);
7234 j < step ? (j += newSize - step) : (j -= step);
7235 }
7236 newV[j] = table->v[i];
7237 }
7238 table->mem->free_fcn(table->v);
7239 table->v = newV;
7240 table->power = newPower;
7241 table->size = newSize;
7242 i = h & newMask;
7243 step = 0;
7244 while (table->v[i]) {
7245 if (! step)
7246 step = PROBE_STEP(h, newMask, newPower);
7247 i < step ? (i += newSize - step) : (i -= step);
7248 }
7249 }
7250 }
7251 table->v[i] = table->mem->malloc_fcn(createSize);
7252 if (! table->v[i])
7253 return NULL;
7254 memset(table->v[i], 0, createSize);
7255 table->v[i]->name = name;
7256 (table->used)++;
7257 return table->v[i];
7258 }
7259
7260 static void FASTCALL
hashTableClear(HASH_TABLE * table)7261 hashTableClear(HASH_TABLE *table) {
7262 size_t i;
7263 for (i = 0; i < table->size; i++) {
7264 table->mem->free_fcn(table->v[i]);
7265 table->v[i] = NULL;
7266 }
7267 table->used = 0;
7268 }
7269
7270 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7271 hashTableDestroy(HASH_TABLE *table) {
7272 size_t i;
7273 for (i = 0; i < table->size; i++)
7274 table->mem->free_fcn(table->v[i]);
7275 table->mem->free_fcn(table->v);
7276 }
7277
7278 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7279 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7280 p->power = 0;
7281 p->size = 0;
7282 p->used = 0;
7283 p->v = NULL;
7284 p->mem = ms;
7285 }
7286
7287 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7288 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7289 iter->p = table->v;
7290 iter->end = iter->p ? iter->p + table->size : NULL;
7291 }
7292
7293 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7294 hashTableIterNext(HASH_TABLE_ITER *iter) {
7295 while (iter->p != iter->end) {
7296 NAMED *tem = *(iter->p)++;
7297 if (tem)
7298 return tem;
7299 }
7300 return NULL;
7301 }
7302
7303 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7304 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7305 pool->blocks = NULL;
7306 pool->freeBlocks = NULL;
7307 pool->start = NULL;
7308 pool->ptr = NULL;
7309 pool->end = NULL;
7310 pool->mem = ms;
7311 }
7312
7313 static void FASTCALL
poolClear(STRING_POOL * pool)7314 poolClear(STRING_POOL *pool) {
7315 if (! pool->freeBlocks)
7316 pool->freeBlocks = pool->blocks;
7317 else {
7318 BLOCK *p = pool->blocks;
7319 while (p) {
7320 BLOCK *tem = p->next;
7321 p->next = pool->freeBlocks;
7322 pool->freeBlocks = p;
7323 p = tem;
7324 }
7325 }
7326 pool->blocks = NULL;
7327 pool->start = NULL;
7328 pool->ptr = NULL;
7329 pool->end = NULL;
7330 }
7331
7332 static void FASTCALL
poolDestroy(STRING_POOL * pool)7333 poolDestroy(STRING_POOL *pool) {
7334 BLOCK *p = pool->blocks;
7335 while (p) {
7336 BLOCK *tem = p->next;
7337 pool->mem->free_fcn(p);
7338 p = tem;
7339 }
7340 p = pool->freeBlocks;
7341 while (p) {
7342 BLOCK *tem = p->next;
7343 pool->mem->free_fcn(p);
7344 p = tem;
7345 }
7346 }
7347
7348 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7349 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7350 const char *end) {
7351 if (! pool->ptr && ! poolGrow(pool))
7352 return NULL;
7353 for (;;) {
7354 const enum XML_Convert_Result convert_res = XmlConvert(
7355 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7356 if ((convert_res == XML_CONVERT_COMPLETED)
7357 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7358 break;
7359 if (! poolGrow(pool))
7360 return NULL;
7361 }
7362 return pool->start;
7363 }
7364
7365 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7366 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7367 do {
7368 if (! poolAppendChar(pool, *s))
7369 return NULL;
7370 } while (*s++);
7371 s = pool->start;
7372 poolFinish(pool);
7373 return s;
7374 }
7375
7376 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7377 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7378 if (! pool->ptr && ! poolGrow(pool)) {
7379 /* The following line is unreachable given the current usage of
7380 * poolCopyStringN(). Currently it is called from exactly one
7381 * place to copy the text of a simple general entity. By that
7382 * point, the name of the entity is already stored in the pool, so
7383 * pool->ptr cannot be NULL.
7384 *
7385 * If poolCopyStringN() is used elsewhere as it well might be,
7386 * this line may well become executable again. Regardless, this
7387 * sort of check shouldn't be removed lightly, so we just exclude
7388 * it from the coverage statistics.
7389 */
7390 return NULL; /* LCOV_EXCL_LINE */
7391 }
7392 for (; n > 0; --n, s++) {
7393 if (! poolAppendChar(pool, *s))
7394 return NULL;
7395 }
7396 s = pool->start;
7397 poolFinish(pool);
7398 return s;
7399 }
7400
7401 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7402 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7403 while (*s) {
7404 if (! poolAppendChar(pool, *s))
7405 return NULL;
7406 s++;
7407 }
7408 return pool->start;
7409 }
7410
7411 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7412 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7413 const char *end) {
7414 if (! poolAppend(pool, enc, ptr, end))
7415 return NULL;
7416 if (pool->ptr == pool->end && ! poolGrow(pool))
7417 return NULL;
7418 *(pool->ptr)++ = 0;
7419 return pool->start;
7420 }
7421
7422 static size_t
poolBytesToAllocateFor(int blockSize)7423 poolBytesToAllocateFor(int blockSize) {
7424 /* Unprotected math would be:
7425 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7426 **
7427 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7428 ** For a + b * c we check b * c in isolation first, so that addition of a
7429 ** on top has no chance of making us accept a small non-negative number
7430 */
7431 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7432
7433 if (blockSize <= 0)
7434 return 0;
7435
7436 if (blockSize > (int)(INT_MAX / stretch))
7437 return 0;
7438
7439 {
7440 const int stretchedBlockSize = blockSize * (int)stretch;
7441 const int bytesToAllocate
7442 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7443 if (bytesToAllocate < 0)
7444 return 0;
7445
7446 return (size_t)bytesToAllocate;
7447 }
7448 }
7449
7450 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7451 poolGrow(STRING_POOL *pool) {
7452 if (pool->freeBlocks) {
7453 if (pool->start == 0) {
7454 pool->blocks = pool->freeBlocks;
7455 pool->freeBlocks = pool->freeBlocks->next;
7456 pool->blocks->next = NULL;
7457 pool->start = pool->blocks->s;
7458 pool->end = pool->start + pool->blocks->size;
7459 pool->ptr = pool->start;
7460 return XML_TRUE;
7461 }
7462 if (pool->end - pool->start < pool->freeBlocks->size) {
7463 BLOCK *tem = pool->freeBlocks->next;
7464 pool->freeBlocks->next = pool->blocks;
7465 pool->blocks = pool->freeBlocks;
7466 pool->freeBlocks = tem;
7467 memcpy(pool->blocks->s, pool->start,
7468 (pool->end - pool->start) * sizeof(XML_Char));
7469 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7470 pool->start = pool->blocks->s;
7471 pool->end = pool->start + pool->blocks->size;
7472 return XML_TRUE;
7473 }
7474 }
7475 if (pool->blocks && pool->start == pool->blocks->s) {
7476 BLOCK *temp;
7477 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7478 size_t bytesToAllocate;
7479
7480 /* NOTE: Needs to be calculated prior to calling `realloc`
7481 to avoid dangling pointers: */
7482 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7483
7484 if (blockSize < 0) {
7485 /* This condition traps a situation where either more than
7486 * INT_MAX/2 bytes have already been allocated. This isn't
7487 * readily testable, since it is unlikely that an average
7488 * machine will have that much memory, so we exclude it from the
7489 * coverage statistics.
7490 */
7491 return XML_FALSE; /* LCOV_EXCL_LINE */
7492 }
7493
7494 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7495 if (bytesToAllocate == 0)
7496 return XML_FALSE;
7497
7498 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7499 (unsigned)bytesToAllocate);
7500 if (temp == NULL)
7501 return XML_FALSE;
7502 pool->blocks = temp;
7503 pool->blocks->size = blockSize;
7504 pool->ptr = pool->blocks->s + offsetInsideBlock;
7505 pool->start = pool->blocks->s;
7506 pool->end = pool->start + blockSize;
7507 } else {
7508 BLOCK *tem;
7509 int blockSize = (int)(pool->end - pool->start);
7510 size_t bytesToAllocate;
7511
7512 if (blockSize < 0) {
7513 /* This condition traps a situation where either more than
7514 * INT_MAX bytes have already been allocated (which is prevented
7515 * by various pieces of program logic, not least this one, never
7516 * mind the unlikelihood of actually having that much memory) or
7517 * the pool control fields have been corrupted (which could
7518 * conceivably happen in an extremely buggy user handler
7519 * function). Either way it isn't readily testable, so we
7520 * exclude it from the coverage statistics.
7521 */
7522 return XML_FALSE; /* LCOV_EXCL_LINE */
7523 }
7524
7525 if (blockSize < INIT_BLOCK_SIZE)
7526 blockSize = INIT_BLOCK_SIZE;
7527 else {
7528 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7529 if ((int)((unsigned)blockSize * 2U) < 0) {
7530 return XML_FALSE;
7531 }
7532 blockSize *= 2;
7533 }
7534
7535 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7536 if (bytesToAllocate == 0)
7537 return XML_FALSE;
7538
7539 tem = pool->mem->malloc_fcn(bytesToAllocate);
7540 if (! tem)
7541 return XML_FALSE;
7542 tem->size = blockSize;
7543 tem->next = pool->blocks;
7544 pool->blocks = tem;
7545 if (pool->ptr != pool->start)
7546 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7547 pool->ptr = tem->s + (pool->ptr - pool->start);
7548 pool->start = tem->s;
7549 pool->end = tem->s + blockSize;
7550 }
7551 return XML_TRUE;
7552 }
7553
7554 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7555 nextScaffoldPart(XML_Parser parser) {
7556 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7557 CONTENT_SCAFFOLD *me;
7558 int next;
7559
7560 if (! dtd->scaffIndex) {
7561 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7562 if (! dtd->scaffIndex)
7563 return -1;
7564 dtd->scaffIndex[0] = 0;
7565 }
7566
7567 if (dtd->scaffCount >= dtd->scaffSize) {
7568 CONTENT_SCAFFOLD *temp;
7569 if (dtd->scaffold) {
7570 /* Detect and prevent integer overflow */
7571 if (dtd->scaffSize > UINT_MAX / 2u) {
7572 return -1;
7573 }
7574 /* Detect and prevent integer overflow.
7575 * The preprocessor guard addresses the "always false" warning
7576 * from -Wtype-limits on platforms where
7577 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7578 #if UINT_MAX >= SIZE_MAX
7579 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7580 return -1;
7581 }
7582 #endif
7583
7584 temp = (CONTENT_SCAFFOLD *)REALLOC(
7585 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7586 if (temp == NULL)
7587 return -1;
7588 dtd->scaffSize *= 2;
7589 } else {
7590 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7591 * sizeof(CONTENT_SCAFFOLD));
7592 if (temp == NULL)
7593 return -1;
7594 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7595 }
7596 dtd->scaffold = temp;
7597 }
7598 next = dtd->scaffCount++;
7599 me = &dtd->scaffold[next];
7600 if (dtd->scaffLevel) {
7601 CONTENT_SCAFFOLD *parent
7602 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7603 if (parent->lastchild) {
7604 dtd->scaffold[parent->lastchild].nextsib = next;
7605 }
7606 if (! parent->childcnt)
7607 parent->firstchild = next;
7608 parent->lastchild = next;
7609 parent->childcnt++;
7610 }
7611 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7612 return next;
7613 }
7614
7615 static XML_Content *
build_model(XML_Parser parser)7616 build_model(XML_Parser parser) {
7617 /* Function build_model transforms the existing parser->m_dtd->scaffold
7618 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7619 * XML_Content tree nodes followed by a gapless list of zero-terminated
7620 * strings. */
7621 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7622 XML_Content *ret;
7623 XML_Char *str; /* the current string writing location */
7624
7625 /* Detect and prevent integer overflow.
7626 * The preprocessor guard addresses the "always false" warning
7627 * from -Wtype-limits on platforms where
7628 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7629 #if UINT_MAX >= SIZE_MAX
7630 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7631 return NULL;
7632 }
7633 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7634 return NULL;
7635 }
7636 #endif
7637 if (dtd->scaffCount * sizeof(XML_Content)
7638 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7639 return NULL;
7640 }
7641
7642 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7643 + (dtd->contentStringLen * sizeof(XML_Char)));
7644
7645 ret = (XML_Content *)MALLOC(parser, allocsize);
7646 if (! ret)
7647 return NULL;
7648
7649 /* What follows is an iterative implementation (of what was previously done
7650 * recursively in a dedicated function called "build_node". The old recursive
7651 * build_node could be forced into stack exhaustion from input as small as a
7652 * few megabyte, and so that was a security issue. Hence, a function call
7653 * stack is avoided now by resolving recursion.)
7654 *
7655 * The iterative approach works as follows:
7656 *
7657 * - We have two writing pointers, both walking up the result array; one does
7658 * the work, the other creates "jobs" for its colleague to do, and leads
7659 * the way:
7660 *
7661 * - The faster one, pointer jobDest, always leads and writes "what job
7662 * to do" by the other, once they reach that place in the
7663 * array: leader "jobDest" stores the source node array index (relative
7664 * to array dtd->scaffold) in field "numchildren".
7665 *
7666 * - The slower one, pointer dest, looks at the value stored in the
7667 * "numchildren" field (which actually holds a source node array index
7668 * at that time) and puts the real data from dtd->scaffold in.
7669 *
7670 * - Before the loop starts, jobDest writes source array index 0
7671 * (where the root node is located) so that dest will have something to do
7672 * when it starts operation.
7673 *
7674 * - Whenever nodes with children are encountered, jobDest appends
7675 * them as new jobs, in order. As a result, tree node siblings are
7676 * adjacent in the resulting array, for example:
7677 *
7678 * [0] root, has two children
7679 * [1] first child of 0, has three children
7680 * [3] first child of 1, does not have children
7681 * [4] second child of 1, does not have children
7682 * [5] third child of 1, does not have children
7683 * [2] second child of 0, does not have children
7684 *
7685 * Or (the same data) presented in flat array view:
7686 *
7687 * [0] root, has two children
7688 *
7689 * [1] first child of 0, has three children
7690 * [2] second child of 0, does not have children
7691 *
7692 * [3] first child of 1, does not have children
7693 * [4] second child of 1, does not have children
7694 * [5] third child of 1, does not have children
7695 *
7696 * - The algorithm repeats until all target array indices have been processed.
7697 */
7698 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7699 XML_Content *const destLimit = &ret[dtd->scaffCount];
7700 XML_Content *jobDest = ret; /* next free writing location in target array */
7701 str = (XML_Char *)&ret[dtd->scaffCount];
7702
7703 /* Add the starting job, the root node (index 0) of the source tree */
7704 (jobDest++)->numchildren = 0;
7705
7706 for (; dest < destLimit; dest++) {
7707 /* Retrieve source tree array index from job storage */
7708 const int src_node = (int)dest->numchildren;
7709
7710 /* Convert item */
7711 dest->type = dtd->scaffold[src_node].type;
7712 dest->quant = dtd->scaffold[src_node].quant;
7713 if (dest->type == XML_CTYPE_NAME) {
7714 const XML_Char *src;
7715 dest->name = str;
7716 src = dtd->scaffold[src_node].name;
7717 for (;;) {
7718 *str++ = *src;
7719 if (! *src)
7720 break;
7721 src++;
7722 }
7723 dest->numchildren = 0;
7724 dest->children = NULL;
7725 } else {
7726 unsigned int i;
7727 int cn;
7728 dest->name = NULL;
7729 dest->numchildren = dtd->scaffold[src_node].childcnt;
7730 dest->children = jobDest;
7731
7732 /* Append scaffold indices of children to array */
7733 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7734 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7735 (jobDest++)->numchildren = (unsigned int)cn;
7736 }
7737 }
7738
7739 return ret;
7740 }
7741
7742 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7743 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7744 const char *end) {
7745 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7746 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7747 ELEMENT_TYPE *ret;
7748
7749 if (! name)
7750 return NULL;
7751 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7752 sizeof(ELEMENT_TYPE));
7753 if (! ret)
7754 return NULL;
7755 if (ret->name != name)
7756 poolDiscard(&dtd->pool);
7757 else {
7758 poolFinish(&dtd->pool);
7759 if (! setElementTypePrefix(parser, ret))
7760 return NULL;
7761 }
7762 return ret;
7763 }
7764
7765 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7766 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7767 size_t charsRequired = 0;
7768 XML_Char *result;
7769
7770 /* First determine how long the string is */
7771 while (s[charsRequired] != 0) {
7772 charsRequired++;
7773 }
7774 /* Include the terminator */
7775 charsRequired++;
7776
7777 /* Now allocate space for the copy */
7778 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7779 if (result == NULL)
7780 return NULL;
7781 /* Copy the original into place */
7782 memcpy(result, s, charsRequired * sizeof(XML_Char));
7783 return result;
7784 }
7785
7786 #if XML_GE == 1
7787
7788 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7789 accountingGetCurrentAmplification(XML_Parser rootParser) {
7790 // 1.........1.........12 => 22
7791 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
7792 const XmlBigCount countBytesOutput
7793 = rootParser->m_accounting.countBytesDirect
7794 + rootParser->m_accounting.countBytesIndirect;
7795 const float amplificationFactor
7796 = rootParser->m_accounting.countBytesDirect
7797 ? (countBytesOutput
7798 / (float)(rootParser->m_accounting.countBytesDirect))
7799 : ((lenOfShortestInclude
7800 + rootParser->m_accounting.countBytesIndirect)
7801 / (float)lenOfShortestInclude);
7802 assert(! rootParser->m_parentParser);
7803 return amplificationFactor;
7804 }
7805
7806 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7807 accountingReportStats(XML_Parser originParser, const char *epilog) {
7808 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7809 assert(! rootParser->m_parentParser);
7810
7811 if (rootParser->m_accounting.debugLevel == 0u) {
7812 return;
7813 }
7814
7815 const float amplificationFactor
7816 = accountingGetCurrentAmplification(rootParser);
7817 fprintf(stderr,
7818 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7819 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7820 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7821 rootParser->m_accounting.countBytesIndirect,
7822 (double)amplificationFactor, epilog);
7823 }
7824
7825 static void
accountingOnAbort(XML_Parser originParser)7826 accountingOnAbort(XML_Parser originParser) {
7827 accountingReportStats(originParser, " ABORTING\n");
7828 }
7829
7830 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7831 accountingReportDiff(XML_Parser rootParser,
7832 unsigned int levelsAwayFromRootParser, const char *before,
7833 const char *after, ptrdiff_t bytesMore, int source_line,
7834 enum XML_Account account) {
7835 assert(! rootParser->m_parentParser);
7836
7837 fprintf(stderr,
7838 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7839 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7840 levelsAwayFromRootParser, source_line, 10, "");
7841
7842 const char ellipis[] = "[..]";
7843 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7844 const unsigned int contextLength = 10;
7845
7846 /* Note: Performance is of no concern here */
7847 const char *walker = before;
7848 if ((rootParser->m_accounting.debugLevel >= 3u)
7849 || (after - before)
7850 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7851 for (; walker < after; walker++) {
7852 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7853 }
7854 } else {
7855 for (; walker < before + contextLength; walker++) {
7856 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7857 }
7858 fprintf(stderr, ellipis);
7859 walker = after - contextLength;
7860 for (; walker < after; walker++) {
7861 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7862 }
7863 }
7864 fprintf(stderr, "\"\n");
7865 }
7866
7867 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7868 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7869 const char *after, int source_line,
7870 enum XML_Account account) {
7871 /* Note: We need to check the token type *first* to be sure that
7872 * we can even access variable <after>, safely.
7873 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7874 switch (tok) {
7875 case XML_TOK_INVALID:
7876 case XML_TOK_PARTIAL:
7877 case XML_TOK_PARTIAL_CHAR:
7878 case XML_TOK_NONE:
7879 return XML_TRUE;
7880 }
7881
7882 if (account == XML_ACCOUNT_NONE)
7883 return XML_TRUE; /* because these bytes have been accounted for, already */
7884
7885 unsigned int levelsAwayFromRootParser;
7886 const XML_Parser rootParser
7887 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7888 assert(! rootParser->m_parentParser);
7889
7890 const int isDirect
7891 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7892 const ptrdiff_t bytesMore = after - before;
7893
7894 XmlBigCount *const additionTarget
7895 = isDirect ? &rootParser->m_accounting.countBytesDirect
7896 : &rootParser->m_accounting.countBytesIndirect;
7897
7898 /* Detect and avoid integer overflow */
7899 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7900 return XML_FALSE;
7901 *additionTarget += bytesMore;
7902
7903 const XmlBigCount countBytesOutput
7904 = rootParser->m_accounting.countBytesDirect
7905 + rootParser->m_accounting.countBytesIndirect;
7906 const float amplificationFactor
7907 = accountingGetCurrentAmplification(rootParser);
7908 const XML_Bool tolerated
7909 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7910 || (amplificationFactor
7911 <= rootParser->m_accounting.maximumAmplificationFactor);
7912
7913 if (rootParser->m_accounting.debugLevel >= 2u) {
7914 accountingReportStats(rootParser, "");
7915 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7916 bytesMore, source_line, account);
7917 }
7918
7919 return tolerated;
7920 }
7921
7922 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7923 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7924 if (! parser)
7925 return 0;
7926 return parser->m_accounting.countBytesDirect;
7927 }
7928
7929 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7930 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7931 if (! parser)
7932 return 0;
7933 return parser->m_accounting.countBytesIndirect;
7934 }
7935
7936 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7937 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7938 const char *action, int sourceLine) {
7939 assert(! rootParser->m_parentParser);
7940 if (rootParser->m_entity_stats.debugLevel == 0u)
7941 return;
7942
7943 # if defined(XML_UNICODE)
7944 const char *const entityName = "[..]";
7945 # else
7946 const char *const entityName = entity->name;
7947 # endif
7948
7949 fprintf(
7950 stderr,
7951 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7952 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7953 rootParser->m_entity_stats.currentDepth,
7954 rootParser->m_entity_stats.maximumDepthSeen,
7955 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7956 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7957 sourceLine);
7958 }
7959
7960 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7961 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7962 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7963 assert(! rootParser->m_parentParser);
7964
7965 rootParser->m_entity_stats.countEverOpened++;
7966 rootParser->m_entity_stats.currentDepth++;
7967 if (rootParser->m_entity_stats.currentDepth
7968 > rootParser->m_entity_stats.maximumDepthSeen) {
7969 rootParser->m_entity_stats.maximumDepthSeen++;
7970 }
7971
7972 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7973 }
7974
7975 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7976 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7977 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7978 assert(! rootParser->m_parentParser);
7979
7980 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7981 rootParser->m_entity_stats.currentDepth--;
7982 }
7983
7984 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7985 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7986 XML_Parser rootParser = parser;
7987 unsigned int stepsTakenUpwards = 0;
7988 while (rootParser->m_parentParser) {
7989 rootParser = rootParser->m_parentParser;
7990 stepsTakenUpwards++;
7991 }
7992 assert(! rootParser->m_parentParser);
7993 if (outLevelDiff != NULL) {
7994 *outLevelDiff = stepsTakenUpwards;
7995 }
7996 return rootParser;
7997 }
7998
7999 const char *
unsignedCharToPrintable(unsigned char c)8000 unsignedCharToPrintable(unsigned char c) {
8001 switch (c) {
8002 case 0:
8003 return "\\0";
8004 case 1:
8005 return "\\x1";
8006 case 2:
8007 return "\\x2";
8008 case 3:
8009 return "\\x3";
8010 case 4:
8011 return "\\x4";
8012 case 5:
8013 return "\\x5";
8014 case 6:
8015 return "\\x6";
8016 case 7:
8017 return "\\x7";
8018 case 8:
8019 return "\\x8";
8020 case 9:
8021 return "\\t";
8022 case 10:
8023 return "\\n";
8024 case 11:
8025 return "\\xB";
8026 case 12:
8027 return "\\xC";
8028 case 13:
8029 return "\\r";
8030 case 14:
8031 return "\\xE";
8032 case 15:
8033 return "\\xF";
8034 case 16:
8035 return "\\x10";
8036 case 17:
8037 return "\\x11";
8038 case 18:
8039 return "\\x12";
8040 case 19:
8041 return "\\x13";
8042 case 20:
8043 return "\\x14";
8044 case 21:
8045 return "\\x15";
8046 case 22:
8047 return "\\x16";
8048 case 23:
8049 return "\\x17";
8050 case 24:
8051 return "\\x18";
8052 case 25:
8053 return "\\x19";
8054 case 26:
8055 return "\\x1A";
8056 case 27:
8057 return "\\x1B";
8058 case 28:
8059 return "\\x1C";
8060 case 29:
8061 return "\\x1D";
8062 case 30:
8063 return "\\x1E";
8064 case 31:
8065 return "\\x1F";
8066 case 32:
8067 return " ";
8068 case 33:
8069 return "!";
8070 case 34:
8071 return "\\\"";
8072 case 35:
8073 return "#";
8074 case 36:
8075 return "$";
8076 case 37:
8077 return "%";
8078 case 38:
8079 return "&";
8080 case 39:
8081 return "'";
8082 case 40:
8083 return "(";
8084 case 41:
8085 return ")";
8086 case 42:
8087 return "*";
8088 case 43:
8089 return "+";
8090 case 44:
8091 return ",";
8092 case 45:
8093 return "-";
8094 case 46:
8095 return ".";
8096 case 47:
8097 return "/";
8098 case 48:
8099 return "0";
8100 case 49:
8101 return "1";
8102 case 50:
8103 return "2";
8104 case 51:
8105 return "3";
8106 case 52:
8107 return "4";
8108 case 53:
8109 return "5";
8110 case 54:
8111 return "6";
8112 case 55:
8113 return "7";
8114 case 56:
8115 return "8";
8116 case 57:
8117 return "9";
8118 case 58:
8119 return ":";
8120 case 59:
8121 return ";";
8122 case 60:
8123 return "<";
8124 case 61:
8125 return "=";
8126 case 62:
8127 return ">";
8128 case 63:
8129 return "?";
8130 case 64:
8131 return "@";
8132 case 65:
8133 return "A";
8134 case 66:
8135 return "B";
8136 case 67:
8137 return "C";
8138 case 68:
8139 return "D";
8140 case 69:
8141 return "E";
8142 case 70:
8143 return "F";
8144 case 71:
8145 return "G";
8146 case 72:
8147 return "H";
8148 case 73:
8149 return "I";
8150 case 74:
8151 return "J";
8152 case 75:
8153 return "K";
8154 case 76:
8155 return "L";
8156 case 77:
8157 return "M";
8158 case 78:
8159 return "N";
8160 case 79:
8161 return "O";
8162 case 80:
8163 return "P";
8164 case 81:
8165 return "Q";
8166 case 82:
8167 return "R";
8168 case 83:
8169 return "S";
8170 case 84:
8171 return "T";
8172 case 85:
8173 return "U";
8174 case 86:
8175 return "V";
8176 case 87:
8177 return "W";
8178 case 88:
8179 return "X";
8180 case 89:
8181 return "Y";
8182 case 90:
8183 return "Z";
8184 case 91:
8185 return "[";
8186 case 92:
8187 return "\\\\";
8188 case 93:
8189 return "]";
8190 case 94:
8191 return "^";
8192 case 95:
8193 return "_";
8194 case 96:
8195 return "`";
8196 case 97:
8197 return "a";
8198 case 98:
8199 return "b";
8200 case 99:
8201 return "c";
8202 case 100:
8203 return "d";
8204 case 101:
8205 return "e";
8206 case 102:
8207 return "f";
8208 case 103:
8209 return "g";
8210 case 104:
8211 return "h";
8212 case 105:
8213 return "i";
8214 case 106:
8215 return "j";
8216 case 107:
8217 return "k";
8218 case 108:
8219 return "l";
8220 case 109:
8221 return "m";
8222 case 110:
8223 return "n";
8224 case 111:
8225 return "o";
8226 case 112:
8227 return "p";
8228 case 113:
8229 return "q";
8230 case 114:
8231 return "r";
8232 case 115:
8233 return "s";
8234 case 116:
8235 return "t";
8236 case 117:
8237 return "u";
8238 case 118:
8239 return "v";
8240 case 119:
8241 return "w";
8242 case 120:
8243 return "x";
8244 case 121:
8245 return "y";
8246 case 122:
8247 return "z";
8248 case 123:
8249 return "{";
8250 case 124:
8251 return "|";
8252 case 125:
8253 return "}";
8254 case 126:
8255 return "~";
8256 case 127:
8257 return "\\x7F";
8258 case 128:
8259 return "\\x80";
8260 case 129:
8261 return "\\x81";
8262 case 130:
8263 return "\\x82";
8264 case 131:
8265 return "\\x83";
8266 case 132:
8267 return "\\x84";
8268 case 133:
8269 return "\\x85";
8270 case 134:
8271 return "\\x86";
8272 case 135:
8273 return "\\x87";
8274 case 136:
8275 return "\\x88";
8276 case 137:
8277 return "\\x89";
8278 case 138:
8279 return "\\x8A";
8280 case 139:
8281 return "\\x8B";
8282 case 140:
8283 return "\\x8C";
8284 case 141:
8285 return "\\x8D";
8286 case 142:
8287 return "\\x8E";
8288 case 143:
8289 return "\\x8F";
8290 case 144:
8291 return "\\x90";
8292 case 145:
8293 return "\\x91";
8294 case 146:
8295 return "\\x92";
8296 case 147:
8297 return "\\x93";
8298 case 148:
8299 return "\\x94";
8300 case 149:
8301 return "\\x95";
8302 case 150:
8303 return "\\x96";
8304 case 151:
8305 return "\\x97";
8306 case 152:
8307 return "\\x98";
8308 case 153:
8309 return "\\x99";
8310 case 154:
8311 return "\\x9A";
8312 case 155:
8313 return "\\x9B";
8314 case 156:
8315 return "\\x9C";
8316 case 157:
8317 return "\\x9D";
8318 case 158:
8319 return "\\x9E";
8320 case 159:
8321 return "\\x9F";
8322 case 160:
8323 return "\\xA0";
8324 case 161:
8325 return "\\xA1";
8326 case 162:
8327 return "\\xA2";
8328 case 163:
8329 return "\\xA3";
8330 case 164:
8331 return "\\xA4";
8332 case 165:
8333 return "\\xA5";
8334 case 166:
8335 return "\\xA6";
8336 case 167:
8337 return "\\xA7";
8338 case 168:
8339 return "\\xA8";
8340 case 169:
8341 return "\\xA9";
8342 case 170:
8343 return "\\xAA";
8344 case 171:
8345 return "\\xAB";
8346 case 172:
8347 return "\\xAC";
8348 case 173:
8349 return "\\xAD";
8350 case 174:
8351 return "\\xAE";
8352 case 175:
8353 return "\\xAF";
8354 case 176:
8355 return "\\xB0";
8356 case 177:
8357 return "\\xB1";
8358 case 178:
8359 return "\\xB2";
8360 case 179:
8361 return "\\xB3";
8362 case 180:
8363 return "\\xB4";
8364 case 181:
8365 return "\\xB5";
8366 case 182:
8367 return "\\xB6";
8368 case 183:
8369 return "\\xB7";
8370 case 184:
8371 return "\\xB8";
8372 case 185:
8373 return "\\xB9";
8374 case 186:
8375 return "\\xBA";
8376 case 187:
8377 return "\\xBB";
8378 case 188:
8379 return "\\xBC";
8380 case 189:
8381 return "\\xBD";
8382 case 190:
8383 return "\\xBE";
8384 case 191:
8385 return "\\xBF";
8386 case 192:
8387 return "\\xC0";
8388 case 193:
8389 return "\\xC1";
8390 case 194:
8391 return "\\xC2";
8392 case 195:
8393 return "\\xC3";
8394 case 196:
8395 return "\\xC4";
8396 case 197:
8397 return "\\xC5";
8398 case 198:
8399 return "\\xC6";
8400 case 199:
8401 return "\\xC7";
8402 case 200:
8403 return "\\xC8";
8404 case 201:
8405 return "\\xC9";
8406 case 202:
8407 return "\\xCA";
8408 case 203:
8409 return "\\xCB";
8410 case 204:
8411 return "\\xCC";
8412 case 205:
8413 return "\\xCD";
8414 case 206:
8415 return "\\xCE";
8416 case 207:
8417 return "\\xCF";
8418 case 208:
8419 return "\\xD0";
8420 case 209:
8421 return "\\xD1";
8422 case 210:
8423 return "\\xD2";
8424 case 211:
8425 return "\\xD3";
8426 case 212:
8427 return "\\xD4";
8428 case 213:
8429 return "\\xD5";
8430 case 214:
8431 return "\\xD6";
8432 case 215:
8433 return "\\xD7";
8434 case 216:
8435 return "\\xD8";
8436 case 217:
8437 return "\\xD9";
8438 case 218:
8439 return "\\xDA";
8440 case 219:
8441 return "\\xDB";
8442 case 220:
8443 return "\\xDC";
8444 case 221:
8445 return "\\xDD";
8446 case 222:
8447 return "\\xDE";
8448 case 223:
8449 return "\\xDF";
8450 case 224:
8451 return "\\xE0";
8452 case 225:
8453 return "\\xE1";
8454 case 226:
8455 return "\\xE2";
8456 case 227:
8457 return "\\xE3";
8458 case 228:
8459 return "\\xE4";
8460 case 229:
8461 return "\\xE5";
8462 case 230:
8463 return "\\xE6";
8464 case 231:
8465 return "\\xE7";
8466 case 232:
8467 return "\\xE8";
8468 case 233:
8469 return "\\xE9";
8470 case 234:
8471 return "\\xEA";
8472 case 235:
8473 return "\\xEB";
8474 case 236:
8475 return "\\xEC";
8476 case 237:
8477 return "\\xED";
8478 case 238:
8479 return "\\xEE";
8480 case 239:
8481 return "\\xEF";
8482 case 240:
8483 return "\\xF0";
8484 case 241:
8485 return "\\xF1";
8486 case 242:
8487 return "\\xF2";
8488 case 243:
8489 return "\\xF3";
8490 case 244:
8491 return "\\xF4";
8492 case 245:
8493 return "\\xF5";
8494 case 246:
8495 return "\\xF6";
8496 case 247:
8497 return "\\xF7";
8498 case 248:
8499 return "\\xF8";
8500 case 249:
8501 return "\\xF9";
8502 case 250:
8503 return "\\xFA";
8504 case 251:
8505 return "\\xFB";
8506 case 252:
8507 return "\\xFC";
8508 case 253:
8509 return "\\xFD";
8510 case 254:
8511 return "\\xFE";
8512 case 255:
8513 return "\\xFF";
8514 default:
8515 assert(0); /* never gets here */
8516 return "dead code";
8517 }
8518 assert(0); /* never gets here */
8519 }
8520
8521 #endif /* XML_GE == 1 */
8522
8523 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8524 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8525 const char *const valueOrNull = getenv(variableName);
8526 if (valueOrNull == NULL) {
8527 return defaultDebugLevel;
8528 }
8529 const char *const value = valueOrNull;
8530
8531 errno = 0;
8532 char *afterValue = NULL;
8533 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8534 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8535 errno = 0;
8536 return defaultDebugLevel;
8537 }
8538
8539 return debugLevel;
8540 }
8541