xref: /openbsd/lib/libexpat/lib/xmlparse.c (revision c033f770)
1 /* 2a14271ad4d35e82bde8ba210b4edb7998794bcbae54deab114046a300f9639a (2.6.2+)
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15    Copyright (c) 2016      Eric Rahm <erahm@mozilla.com>
16    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17    Copyright (c) 2016      Gaurav <g.gupta@samsung.com>
18    Copyright (c) 2016      Thomas Beutlich <tc@tbeu.de>
19    Copyright (c) 2016      Gustavo Grieco <gustavo.grieco@imag.fr>
20    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
21    Copyright (c) 2016      Ed Schouten <ed@nuxi.nl>
22    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23    Copyright (c) 2017      Václav Slavík <vaclav@slavik.io>
24    Copyright (c) 2017      Viktor Szakats <commit@vsz.me>
25    Copyright (c) 2017      Chanho Park <chanho61.park@samsung.com>
26    Copyright (c) 2017      Rolf Eike Beer <eike@sf-mail.de>
27    Copyright (c) 2017      Hans Wennborg <hans@chromium.org>
28    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
29    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
30    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
31    Copyright (c) 2018      Mariusz Zaborski <oshogbo@vexillium.org>
32    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
33    Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34    Copyright (c) 2019      Vadim Zeitlin <vadim@zeitlins.org>
35    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
36    Copyright (c) 2022      Samanta Navarro <ferivoz@riseup.net>
37    Copyright (c) 2022      Jeffrey Walton <noloader@gmail.com>
38    Copyright (c) 2022      Jann Horn <jannh@google.com>
39    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
40    Copyright (c) 2023      Owain Davies <owaind@bath.edu>
41    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42    Licensed under the MIT license:
43 
44    Permission is  hereby granted,  free of charge,  to any  person obtaining
45    a  copy  of  this  software   and  associated  documentation  files  (the
46    "Software"),  to  deal in  the  Software  without restriction,  including
47    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
48    distribute, sublicense, and/or sell copies of the Software, and to permit
49    persons  to whom  the Software  is  furnished to  do so,  subject to  the
50    following conditions:
51 
52    The above copyright  notice and this permission notice  shall be included
53    in all copies or substantial portions of the Software.
54 
55    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
56    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
57    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
58    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
59    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
60    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
61    USE OR OTHER DEALINGS IN THE SOFTWARE.
62 */
63 
64 #define XML_BUILDING_EXPAT 1
65 
66 #include "expat_config.h"
67 
68 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
69 #  error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
70 #endif
71 
72 #if defined(XML_DTD) && XML_GE == 0
73 #  error Either undefine XML_DTD or define XML_GE to 1.
74 #endif
75 
76 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2)           \
77     || (XML_CONTEXT_BYTES + 0 < 0)
78 #  error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
79 #endif
80 
81 #if defined(HAVE_SYSCALL_GETRANDOM)
82 #  if ! defined(_GNU_SOURCE)
83 #    define _GNU_SOURCE 1 /* syscall prototype */
84 #  endif
85 #endif
86 
87 #ifdef _WIN32
88 /* force stdlib to define rand_s() */
89 #  if ! defined(_CRT_RAND_S)
90 #    define _CRT_RAND_S
91 #  endif
92 #endif
93 
94 #include <stdbool.h>
95 #include <stddef.h>
96 #include <string.h> /* memset(), memcpy() */
97 #include <assert.h>
98 #include <limits.h> /* UINT_MAX */
99 #include <stdio.h>  /* fprintf */
100 #include <stdlib.h> /* getenv, rand_s */
101 #include <stdint.h> /* uintptr_t */
102 #include <math.h>   /* isnan */
103 
104 #ifdef _WIN32
105 #  define getpid GetCurrentProcessId
106 #else
107 #  include <sys/time.h>  /* gettimeofday() */
108 #  include <sys/types.h> /* getpid() */
109 #  include <unistd.h>    /* getpid() */
110 #  include <fcntl.h>     /* O_RDONLY */
111 #  include <errno.h>
112 #endif
113 
114 #ifdef _WIN32
115 #  include "winconfig.h"
116 #endif
117 
118 #include "ascii.h"
119 #include "expat.h"
120 #include "siphash.h"
121 
122 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
123 #  if defined(HAVE_GETRANDOM)
124 #    include <sys/random.h> /* getrandom */
125 #  else
126 #    include <unistd.h>      /* syscall */
127 #    include <sys/syscall.h> /* SYS_getrandom */
128 #  endif
129 #  if ! defined(GRND_NONBLOCK)
130 #    define GRND_NONBLOCK 0x0001
131 #  endif /* defined(GRND_NONBLOCK) */
132 #endif   /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
133 
134 #if defined(HAVE_LIBBSD)                                                       \
135     && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
136 #  include <bsd/stdlib.h>
137 #endif
138 
139 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
140 #  define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
141 #endif
142 
143 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM)             \
144     && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)            \
145     && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32)                         \
146     && ! defined(XML_POOR_ENTROPY)
147 #  error You do not have support for any sources of high quality entropy \
148     enabled.  For end user security, that is probably not what you want. \
149     \
150     Your options include: \
151       * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
152       * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
153       * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
154       * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
155       * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
156       * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
157       * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
158       * Windows >=Vista (rand_s): _WIN32. \
159     \
160     If insist on not using any of these, bypass this error by defining \
161     XML_POOR_ENTROPY; you have been warned. \
162     \
163     If you have reasons to patch this detection code away or need changes \
164     to the build system, please open a bug.  Thank you!
165 #endif
166 
167 #ifdef XML_UNICODE
168 #  define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
169 #  define XmlConvert XmlUtf16Convert
170 #  define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
171 #  define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
172 #  define XmlEncode XmlUtf16Encode
173 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
174 typedef unsigned short ICHAR;
175 #else
176 #  define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
177 #  define XmlConvert XmlUtf8Convert
178 #  define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
179 #  define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
180 #  define XmlEncode XmlUtf8Encode
181 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
182 typedef char ICHAR;
183 #endif
184 
185 #ifndef XML_NS
186 
187 #  define XmlInitEncodingNS XmlInitEncoding
188 #  define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
189 #  undef XmlGetInternalEncodingNS
190 #  define XmlGetInternalEncodingNS XmlGetInternalEncoding
191 #  define XmlParseXmlDeclNS XmlParseXmlDecl
192 
193 #endif
194 
195 #ifdef XML_UNICODE
196 
197 #  ifdef XML_UNICODE_WCHAR_T
198 #    define XML_T(x) (const wchar_t) x
199 #    define XML_L(x) L##x
200 #  else
201 #    define XML_T(x) (const unsigned short)x
202 #    define XML_L(x) x
203 #  endif
204 
205 #else
206 
207 #  define XML_T(x) x
208 #  define XML_L(x) x
209 
210 #endif
211 
212 /* Round up n to be a multiple of sz, where sz is a power of 2. */
213 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
214 
215 /* Do safe (NULL-aware) pointer arithmetic */
216 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
217 
218 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
219 
220 #include "internal.h"
221 #include "xmltok.h"
222 #include "xmlrole.h"
223 
224 typedef const XML_Char *KEY;
225 
226 typedef struct {
227   KEY name;
228 } NAMED;
229 
230 typedef struct {
231   NAMED **v;
232   unsigned char power;
233   size_t size;
234   size_t used;
235   const XML_Memory_Handling_Suite *mem;
236 } HASH_TABLE;
237 
238 static size_t keylen(KEY s);
239 
240 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
241 
242 /* For probing (after a collision) we need a step size relative prime
243    to the hash table size, which is a power of 2. We use double-hashing,
244    since we can calculate a second hash value cheaply by taking those bits
245    of the first hash value that were discarded (masked out) when the table
246    index was calculated: index = hash & mask, where mask = table->size - 1.
247    We limit the maximum step size to table->size / 4 (mask >> 2) and make
248    it odd, since odd numbers are always relative prime to a power of 2.
249 */
250 #define SECOND_HASH(hash, mask, power)                                         \
251   ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
252 #define PROBE_STEP(hash, mask, power)                                          \
253   ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
254 
255 typedef struct {
256   NAMED **p;
257   NAMED **end;
258 } HASH_TABLE_ITER;
259 
260 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
261 #define INIT_DATA_BUF_SIZE 1024
262 #define INIT_ATTS_SIZE 16
263 #define INIT_ATTS_VERSION 0xFFFFFFFF
264 #define INIT_BLOCK_SIZE 1024
265 #define INIT_BUFFER_SIZE 1024
266 
267 #define EXPAND_SPARE 24
268 
269 typedef struct binding {
270   struct prefix *prefix;
271   struct binding *nextTagBinding;
272   struct binding *prevPrefixBinding;
273   const struct attribute_id *attId;
274   XML_Char *uri;
275   int uriLen;
276   int uriAlloc;
277 } BINDING;
278 
279 typedef struct prefix {
280   const XML_Char *name;
281   BINDING *binding;
282 } PREFIX;
283 
284 typedef struct {
285   const XML_Char *str;
286   const XML_Char *localPart;
287   const XML_Char *prefix;
288   int strLen;
289   int uriLen;
290   int prefixLen;
291 } TAG_NAME;
292 
293 /* TAG represents an open element.
294    The name of the element is stored in both the document and API
295    encodings.  The memory buffer 'buf' is a separately-allocated
296    memory area which stores the name.  During the XML_Parse()/
297    XMLParseBuffer() when the element is open, the memory for the 'raw'
298    version of the name (in the document encoding) is shared with the
299    document buffer.  If the element is open across calls to
300    XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
301    contain the 'raw' name as well.
302 
303    A parser reuses these structures, maintaining a list of allocated
304    TAG objects in a free list.
305 */
306 typedef struct tag {
307   struct tag *parent;  /* parent of this element */
308   const char *rawName; /* tagName in the original encoding */
309   int rawNameLength;
310   TAG_NAME name; /* tagName in the API encoding */
311   char *buf;     /* buffer for name components */
312   char *bufEnd;  /* end of the buffer */
313   BINDING *bindings;
314 } TAG;
315 
316 typedef struct {
317   const XML_Char *name;
318   const XML_Char *textPtr;
319   int textLen;   /* length in XML_Chars */
320   int processed; /* # of processed bytes - when suspended */
321   const XML_Char *systemId;
322   const XML_Char *base;
323   const XML_Char *publicId;
324   const XML_Char *notation;
325   XML_Bool open;
326   XML_Bool is_param;
327   XML_Bool is_internal; /* true if declared in internal subset outside PE */
328 } ENTITY;
329 
330 typedef struct {
331   enum XML_Content_Type type;
332   enum XML_Content_Quant quant;
333   const XML_Char *name;
334   int firstchild;
335   int lastchild;
336   int childcnt;
337   int nextsib;
338 } CONTENT_SCAFFOLD;
339 
340 #define INIT_SCAFFOLD_ELEMENTS 32
341 
342 typedef struct block {
343   struct block *next;
344   int size;
345   XML_Char s[1];
346 } BLOCK;
347 
348 typedef struct {
349   BLOCK *blocks;
350   BLOCK *freeBlocks;
351   const XML_Char *end;
352   XML_Char *ptr;
353   XML_Char *start;
354   const XML_Memory_Handling_Suite *mem;
355 } STRING_POOL;
356 
357 /* The XML_Char before the name is used to determine whether
358    an attribute has been specified. */
359 typedef struct attribute_id {
360   XML_Char *name;
361   PREFIX *prefix;
362   XML_Bool maybeTokenized;
363   XML_Bool xmlns;
364 } ATTRIBUTE_ID;
365 
366 typedef struct {
367   const ATTRIBUTE_ID *id;
368   XML_Bool isCdata;
369   const XML_Char *value;
370 } DEFAULT_ATTRIBUTE;
371 
372 typedef struct {
373   unsigned long version;
374   unsigned long hash;
375   const XML_Char *uriName;
376 } NS_ATT;
377 
378 typedef struct {
379   const XML_Char *name;
380   PREFIX *prefix;
381   const ATTRIBUTE_ID *idAtt;
382   int nDefaultAtts;
383   int allocDefaultAtts;
384   DEFAULT_ATTRIBUTE *defaultAtts;
385 } ELEMENT_TYPE;
386 
387 typedef struct {
388   HASH_TABLE generalEntities;
389   HASH_TABLE elementTypes;
390   HASH_TABLE attributeIds;
391   HASH_TABLE prefixes;
392   STRING_POOL pool;
393   STRING_POOL entityValuePool;
394   /* false once a parameter entity reference has been skipped */
395   XML_Bool keepProcessing;
396   /* true once an internal or external PE reference has been encountered;
397      this includes the reference to an external subset */
398   XML_Bool hasParamEntityRefs;
399   XML_Bool standalone;
400 #ifdef XML_DTD
401   /* indicates if external PE has been read */
402   XML_Bool paramEntityRead;
403   HASH_TABLE paramEntities;
404 #endif /* XML_DTD */
405   PREFIX defaultPrefix;
406   /* === scaffolding for building content model === */
407   XML_Bool in_eldecl;
408   CONTENT_SCAFFOLD *scaffold;
409   unsigned contentStringLen;
410   unsigned scaffSize;
411   unsigned scaffCount;
412   int scaffLevel;
413   int *scaffIndex;
414 } DTD;
415 
416 typedef struct open_internal_entity {
417   const char *internalEventPtr;
418   const char *internalEventEndPtr;
419   struct open_internal_entity *next;
420   ENTITY *entity;
421   int startTagLevel;
422   XML_Bool betweenDecl; /* WFC: PE Between Declarations */
423 } OPEN_INTERNAL_ENTITY;
424 
425 enum XML_Account {
426   XML_ACCOUNT_DIRECT,           /* bytes directly passed to the Expat parser */
427   XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
428                                    expansion */
429   XML_ACCOUNT_NONE              /* i.e. do not account, was accounted already */
430 };
431 
432 #if XML_GE == 1
433 typedef unsigned long long XmlBigCount;
434 typedef struct accounting {
435   XmlBigCount countBytesDirect;
436   XmlBigCount countBytesIndirect;
437   unsigned long debugLevel;
438   float maximumAmplificationFactor; // >=1.0
439   unsigned long long activationThresholdBytes;
440 } ACCOUNTING;
441 
442 typedef struct entity_stats {
443   unsigned int countEverOpened;
444   unsigned int currentDepth;
445   unsigned int maximumDepthSeen;
446   unsigned long debugLevel;
447 } ENTITY_STATS;
448 #endif /* XML_GE == 1 */
449 
450 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
451                                          const char *end, const char **endPtr);
452 
453 static Processor prologProcessor;
454 static Processor prologInitProcessor;
455 static Processor contentProcessor;
456 static Processor cdataSectionProcessor;
457 #ifdef XML_DTD
458 static Processor ignoreSectionProcessor;
459 static Processor externalParEntProcessor;
460 static Processor externalParEntInitProcessor;
461 static Processor entityValueProcessor;
462 static Processor entityValueInitProcessor;
463 #endif /* XML_DTD */
464 static Processor epilogProcessor;
465 static Processor errorProcessor;
466 static Processor externalEntityInitProcessor;
467 static Processor externalEntityInitProcessor2;
468 static Processor externalEntityInitProcessor3;
469 static Processor externalEntityContentProcessor;
470 static Processor internalEntityProcessor;
471 
472 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
473                                             const XML_Char *encodingName);
474 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
475                                      const char *s, const char *next);
476 static enum XML_Error initializeEncoding(XML_Parser parser);
477 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
478                                const char *s, const char *end, int tok,
479                                const char *next, const char **nextPtr,
480                                XML_Bool haveMore, XML_Bool allowClosingDoctype,
481                                enum XML_Account account);
482 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
483                                             XML_Bool betweenDecl);
484 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
485                                 const ENCODING *enc, const char *start,
486                                 const char *end, const char **endPtr,
487                                 XML_Bool haveMore, enum XML_Account account);
488 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
489                                      const char **startPtr, const char *end,
490                                      const char **nextPtr, XML_Bool haveMore,
491                                      enum XML_Account account);
492 #ifdef XML_DTD
493 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
494                                       const char **startPtr, const char *end,
495                                       const char **nextPtr, XML_Bool haveMore);
496 #endif /* XML_DTD */
497 
498 static void freeBindings(XML_Parser parser, BINDING *bindings);
499 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
500                                 const char *attStr, TAG_NAME *tagNamePtr,
501                                 BINDING **bindingsPtr,
502                                 enum XML_Account account);
503 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
504                                  const ATTRIBUTE_ID *attId, const XML_Char *uri,
505                                  BINDING **bindingsPtr);
506 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
507                            XML_Bool isCdata, XML_Bool isId,
508                            const XML_Char *value, XML_Parser parser);
509 static enum XML_Error storeAttributeValue(XML_Parser parser,
510                                           const ENCODING *enc, XML_Bool isCdata,
511                                           const char *ptr, const char *end,
512                                           STRING_POOL *pool,
513                                           enum XML_Account account);
514 static enum XML_Error appendAttributeValue(XML_Parser parser,
515                                            const ENCODING *enc,
516                                            XML_Bool isCdata, const char *ptr,
517                                            const char *end, STRING_POOL *pool,
518                                            enum XML_Account account);
519 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
520                                     const char *start, const char *end);
521 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
522 #if XML_GE == 1
523 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
524                                        const char *start, const char *end,
525                                        enum XML_Account account);
526 #else
527 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
528 #endif
529 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
530                                        const char *start, const char *end);
531 static int reportComment(XML_Parser parser, const ENCODING *enc,
532                          const char *start, const char *end);
533 static void reportDefault(XML_Parser parser, const ENCODING *enc,
534                           const char *start, const char *end);
535 
536 static const XML_Char *getContext(XML_Parser parser);
537 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
538 
539 static void FASTCALL normalizePublicId(XML_Char *s);
540 
541 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
542 /* do not call if m_parentParser != NULL */
543 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
544 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
545                        const XML_Memory_Handling_Suite *ms);
546 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
547                    const XML_Memory_Handling_Suite *ms);
548 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
549                            STRING_POOL *newPool, const HASH_TABLE *oldTable);
550 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
551                      size_t createSize);
552 static void FASTCALL hashTableInit(HASH_TABLE *table,
553                                    const XML_Memory_Handling_Suite *ms);
554 static void FASTCALL hashTableClear(HASH_TABLE *table);
555 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
556 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
557                                        const HASH_TABLE *table);
558 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
559 
560 static void FASTCALL poolInit(STRING_POOL *pool,
561                               const XML_Memory_Handling_Suite *ms);
562 static void FASTCALL poolClear(STRING_POOL *pool);
563 static void FASTCALL poolDestroy(STRING_POOL *pool);
564 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
565                             const char *ptr, const char *end);
566 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
567                                  const char *ptr, const char *end);
568 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
569 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
570                                                const XML_Char *s);
571 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
572                                        int n);
573 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
574                                                  const XML_Char *s);
575 
576 static int FASTCALL nextScaffoldPart(XML_Parser parser);
577 static XML_Content *build_model(XML_Parser parser);
578 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
579                                     const char *ptr, const char *end);
580 
581 static XML_Char *copyString(const XML_Char *s,
582                             const XML_Memory_Handling_Suite *memsuite);
583 
584 static unsigned long generate_hash_secret_salt(XML_Parser parser);
585 static XML_Bool startParsing(XML_Parser parser);
586 
587 static XML_Parser parserCreate(const XML_Char *encodingName,
588                                const XML_Memory_Handling_Suite *memsuite,
589                                const XML_Char *nameSep, DTD *dtd);
590 
591 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
592 
593 #if XML_GE == 1
594 static float accountingGetCurrentAmplification(XML_Parser rootParser);
595 static void accountingReportStats(XML_Parser originParser, const char *epilog);
596 static void accountingOnAbort(XML_Parser originParser);
597 static void accountingReportDiff(XML_Parser rootParser,
598                                  unsigned int levelsAwayFromRootParser,
599                                  const char *before, const char *after,
600                                  ptrdiff_t bytesMore, int source_line,
601                                  enum XML_Account account);
602 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
603                                         const char *before, const char *after,
604                                         int source_line,
605                                         enum XML_Account account);
606 
607 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
608                                       const char *action, int sourceLine);
609 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
610                                  int sourceLine);
611 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
612                                   int sourceLine);
613 
614 static XML_Parser getRootParserOf(XML_Parser parser,
615                                   unsigned int *outLevelDiff);
616 #endif /* XML_GE == 1 */
617 
618 static unsigned long getDebugLevel(const char *variableName,
619                                    unsigned long defaultDebugLevel);
620 
621 #define poolStart(pool) ((pool)->start)
622 #define poolLength(pool) ((pool)->ptr - (pool)->start)
623 #define poolChop(pool) ((void)--(pool->ptr))
624 #define poolLastChar(pool) (((pool)->ptr)[-1])
625 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
626 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
627 #define poolAppendChar(pool, c)                                                \
628   (((pool)->ptr == (pool)->end && ! poolGrow(pool))                            \
629        ? 0                                                                     \
630        : ((*((pool)->ptr)++ = c), 1))
631 
632 #if ! defined(XML_TESTING)
633 const
634 #endif
635     XML_Bool g_reparseDeferralEnabledDefault
636     = XML_TRUE; // write ONLY in runtests.c
637 #if defined(XML_TESTING)
638 unsigned int g_bytesScanned = 0; // used for testing only
639 #endif
640 
641 struct XML_ParserStruct {
642   /* The first member must be m_userData so that the XML_GetUserData
643      macro works. */
644   void *m_userData;
645   void *m_handlerArg;
646 
647   // How the four parse buffer pointers below relate in time and space:
648   //
649   //   m_buffer <= m_bufferPtr <= m_bufferEnd  <= m_bufferLim
650   //   |           |              |               |
651   //   <--parsed-->|              |               |
652   //               <---parsing--->|               |
653   //                              <--unoccupied-->|
654   //   <---------total-malloced/realloced-------->|
655 
656   char *m_buffer; // malloc/realloc base pointer of parse buffer
657   const XML_Memory_Handling_Suite m_mem;
658   const char *m_bufferPtr; // first character to be parsed
659   char *m_bufferEnd;       // past last character to be parsed
660   const char *m_bufferLim; // allocated end of m_buffer
661 
662   XML_Index m_parseEndByteIndex;
663   const char *m_parseEndPtr;
664   size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
665   XML_Bool m_reparseDeferralEnabled;
666   int m_lastBufferRequestSize;
667   XML_Char *m_dataBuf;
668   XML_Char *m_dataBufEnd;
669   XML_StartElementHandler m_startElementHandler;
670   XML_EndElementHandler m_endElementHandler;
671   XML_CharacterDataHandler m_characterDataHandler;
672   XML_ProcessingInstructionHandler m_processingInstructionHandler;
673   XML_CommentHandler m_commentHandler;
674   XML_StartCdataSectionHandler m_startCdataSectionHandler;
675   XML_EndCdataSectionHandler m_endCdataSectionHandler;
676   XML_DefaultHandler m_defaultHandler;
677   XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
678   XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
679   XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
680   XML_NotationDeclHandler m_notationDeclHandler;
681   XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
682   XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
683   XML_NotStandaloneHandler m_notStandaloneHandler;
684   XML_ExternalEntityRefHandler m_externalEntityRefHandler;
685   XML_Parser m_externalEntityRefHandlerArg;
686   XML_SkippedEntityHandler m_skippedEntityHandler;
687   XML_UnknownEncodingHandler m_unknownEncodingHandler;
688   XML_ElementDeclHandler m_elementDeclHandler;
689   XML_AttlistDeclHandler m_attlistDeclHandler;
690   XML_EntityDeclHandler m_entityDeclHandler;
691   XML_XmlDeclHandler m_xmlDeclHandler;
692   const ENCODING *m_encoding;
693   INIT_ENCODING m_initEncoding;
694   const ENCODING *m_internalEncoding;
695   const XML_Char *m_protocolEncodingName;
696   XML_Bool m_ns;
697   XML_Bool m_ns_triplets;
698   void *m_unknownEncodingMem;
699   void *m_unknownEncodingData;
700   void *m_unknownEncodingHandlerData;
701   void(XMLCALL *m_unknownEncodingRelease)(void *);
702   PROLOG_STATE m_prologState;
703   Processor *m_processor;
704   enum XML_Error m_errorCode;
705   const char *m_eventPtr;
706   const char *m_eventEndPtr;
707   const char *m_positionPtr;
708   OPEN_INTERNAL_ENTITY *m_openInternalEntities;
709   OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
710   XML_Bool m_defaultExpandInternalEntities;
711   int m_tagLevel;
712   ENTITY *m_declEntity;
713   const XML_Char *m_doctypeName;
714   const XML_Char *m_doctypeSysid;
715   const XML_Char *m_doctypePubid;
716   const XML_Char *m_declAttributeType;
717   const XML_Char *m_declNotationName;
718   const XML_Char *m_declNotationPublicId;
719   ELEMENT_TYPE *m_declElementType;
720   ATTRIBUTE_ID *m_declAttributeId;
721   XML_Bool m_declAttributeIsCdata;
722   XML_Bool m_declAttributeIsId;
723   DTD *m_dtd;
724   const XML_Char *m_curBase;
725   TAG *m_tagStack;
726   TAG *m_freeTagList;
727   BINDING *m_inheritedBindings;
728   BINDING *m_freeBindingList;
729   int m_attsSize;
730   int m_nSpecifiedAtts;
731   int m_idAttIndex;
732   ATTRIBUTE *m_atts;
733   NS_ATT *m_nsAtts;
734   unsigned long m_nsAttsVersion;
735   unsigned char m_nsAttsPower;
736 #ifdef XML_ATTR_INFO
737   XML_AttrInfo *m_attInfo;
738 #endif
739   POSITION m_position;
740   STRING_POOL m_tempPool;
741   STRING_POOL m_temp2Pool;
742   char *m_groupConnector;
743   unsigned int m_groupSize;
744   XML_Char m_namespaceSeparator;
745   XML_Parser m_parentParser;
746   XML_ParsingStatus m_parsingStatus;
747 #ifdef XML_DTD
748   XML_Bool m_isParamEntity;
749   XML_Bool m_useForeignDTD;
750   enum XML_ParamEntityParsing m_paramEntityParsing;
751 #endif
752   unsigned long m_hash_secret_salt;
753 #if XML_GE == 1
754   ACCOUNTING m_accounting;
755   ENTITY_STATS m_entity_stats;
756 #endif
757 };
758 
759 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
760 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
761 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
762 
763 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)764 XML_ParserCreate(const XML_Char *encodingName) {
765   return XML_ParserCreate_MM(encodingName, NULL, NULL);
766 }
767 
768 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)769 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
770   XML_Char tmp[2] = {nsSep, 0};
771   return XML_ParserCreate_MM(encodingName, NULL, tmp);
772 }
773 
774 // "xml=http://www.w3.org/XML/1998/namespace"
775 static const XML_Char implicitContext[]
776     = {ASCII_x,     ASCII_m,     ASCII_l,      ASCII_EQUALS, ASCII_h,
777        ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,  ASCII_SLASH,
778        ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,      ASCII_PERIOD,
779        ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,      ASCII_r,
780        ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,      ASCII_L,
781        ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,      ASCII_8,
782        ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,      ASCII_e,
783        ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,      ASCII_e,
784        '\0'};
785 
786 /* To avoid warnings about unused functions: */
787 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
788 
789 #  if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
790 
791 /* Obtain entropy on Linux 3.17+ */
792 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)793 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
794   int success = 0; /* full count bytes written? */
795   size_t bytesWrittenTotal = 0;
796   const unsigned int getrandomFlags = GRND_NONBLOCK;
797 
798   do {
799     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
800     const size_t bytesToWrite = count - bytesWrittenTotal;
801 
802     const int bytesWrittenMore =
803 #    if defined(HAVE_GETRANDOM)
804         getrandom(currentTarget, bytesToWrite, getrandomFlags);
805 #    else
806         syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
807 #    endif
808 
809     if (bytesWrittenMore > 0) {
810       bytesWrittenTotal += bytesWrittenMore;
811       if (bytesWrittenTotal >= count)
812         success = 1;
813     }
814   } while (! success && (errno == EINTR));
815 
816   return success;
817 }
818 
819 #  endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
820 
821 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
822 
823 /* Extract entropy from /dev/urandom */
824 static int
writeRandomBytes_dev_urandom(void * target,size_t count)825 writeRandomBytes_dev_urandom(void *target, size_t count) {
826   int success = 0; /* full count bytes written? */
827   size_t bytesWrittenTotal = 0;
828 
829   const int fd = open("/dev/urandom", O_RDONLY);
830   if (fd < 0) {
831     return 0;
832   }
833 
834   do {
835     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
836     const size_t bytesToWrite = count - bytesWrittenTotal;
837 
838     const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
839 
840     if (bytesWrittenMore > 0) {
841       bytesWrittenTotal += bytesWrittenMore;
842       if (bytesWrittenTotal >= count)
843         success = 1;
844     }
845   } while (! success && (errno == EINTR));
846 
847   close(fd);
848   return success;
849 }
850 
851 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
852 
853 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
854 
855 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
856 
857 static void
writeRandomBytes_arc4random(void * target,size_t count)858 writeRandomBytes_arc4random(void *target, size_t count) {
859   size_t bytesWrittenTotal = 0;
860 
861   while (bytesWrittenTotal < count) {
862     const uint32_t random32 = arc4random();
863     size_t i = 0;
864 
865     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
866          i++, bytesWrittenTotal++) {
867       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
868       ((uint8_t *)target)[bytesWrittenTotal] = random8;
869     }
870   }
871 }
872 
873 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
874 
875 #ifdef _WIN32
876 
877 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
878    as it didn't declare it in its header prior to version 5.3.0 of its
879    runtime package (mingwrt, containing stdlib.h).  The upstream fix
880    was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
881 #  if defined(__MINGW32__) && defined(__MINGW32_VERSION)                       \
882       && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
883 __declspec(dllimport) int rand_s(unsigned int *);
884 #  endif
885 
886 /* Obtain entropy on Windows using the rand_s() function which
887  * generates cryptographically secure random numbers.  Internally it
888  * uses RtlGenRandom API which is present in Windows XP and later.
889  */
890 static int
writeRandomBytes_rand_s(void * target,size_t count)891 writeRandomBytes_rand_s(void *target, size_t count) {
892   size_t bytesWrittenTotal = 0;
893 
894   while (bytesWrittenTotal < count) {
895     unsigned int random32 = 0;
896     size_t i = 0;
897 
898     if (rand_s(&random32))
899       return 0; /* failure */
900 
901     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
902          i++, bytesWrittenTotal++) {
903       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
904       ((uint8_t *)target)[bytesWrittenTotal] = random8;
905     }
906   }
907   return 1; /* success */
908 }
909 
910 #endif /* _WIN32 */
911 
912 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
913 
914 static unsigned long
gather_time_entropy(void)915 gather_time_entropy(void) {
916 #  ifdef _WIN32
917   FILETIME ft;
918   GetSystemTimeAsFileTime(&ft); /* never fails */
919   return ft.dwHighDateTime ^ ft.dwLowDateTime;
920 #  else
921   struct timeval tv;
922   int gettimeofday_res;
923 
924   gettimeofday_res = gettimeofday(&tv, NULL);
925 
926 #    if defined(NDEBUG)
927   (void)gettimeofday_res;
928 #    else
929   assert(gettimeofday_res == 0);
930 #    endif /* defined(NDEBUG) */
931 
932   /* Microseconds time is <20 bits entropy */
933   return tv.tv_usec;
934 #  endif
935 }
936 
937 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
938 
939 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)940 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
941   if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
942     fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
943             (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
944   }
945   return entropy;
946 }
947 
948 static unsigned long
generate_hash_secret_salt(XML_Parser parser)949 generate_hash_secret_salt(XML_Parser parser) {
950   unsigned long entropy;
951   (void)parser;
952 
953   /* "Failproof" high quality providers: */
954 #if defined(HAVE_ARC4RANDOM_BUF)
955   arc4random_buf(&entropy, sizeof(entropy));
956   return ENTROPY_DEBUG("arc4random_buf", entropy);
957 #elif defined(HAVE_ARC4RANDOM)
958   writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
959   return ENTROPY_DEBUG("arc4random", entropy);
960 #else
961   /* Try high quality providers first .. */
962 #  ifdef _WIN32
963   if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
964     return ENTROPY_DEBUG("rand_s", entropy);
965   }
966 #  elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
967   if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
968     return ENTROPY_DEBUG("getrandom", entropy);
969   }
970 #  endif
971 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
972   if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
973     return ENTROPY_DEBUG("/dev/urandom", entropy);
974   }
975 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
976   /* .. and self-made low quality for backup: */
977 
978   /* Process ID is 0 bits entropy if attacker has local access */
979   entropy = gather_time_entropy() ^ getpid();
980 
981   /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
982   if (sizeof(unsigned long) == 4) {
983     return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
984   } else {
985     return ENTROPY_DEBUG("fallback(8)",
986                          entropy * (unsigned long)2305843009213693951ULL);
987   }
988 #endif
989 }
990 
991 static unsigned long
get_hash_secret_salt(XML_Parser parser)992 get_hash_secret_salt(XML_Parser parser) {
993   if (parser->m_parentParser != NULL)
994     return get_hash_secret_salt(parser->m_parentParser);
995   return parser->m_hash_secret_salt;
996 }
997 
998 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)999 callProcessor(XML_Parser parser, const char *start, const char *end,
1000               const char **endPtr) {
1001   const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1002 
1003   if (parser->m_reparseDeferralEnabled
1004       && ! parser->m_parsingStatus.finalBuffer) {
1005     // Heuristic: don't try to parse a partial token again until the amount of
1006     // available data has increased significantly.
1007     const size_t had_before = parser->m_partialTokenBytesBefore;
1008     // ...but *do* try anyway if we're close to causing a reallocation.
1009     size_t available_buffer
1010         = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1011 #if XML_CONTEXT_BYTES > 0
1012     available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1013 #endif
1014     available_buffer
1015         += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1016     // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1017     const bool enough
1018         = (have_now >= 2 * had_before)
1019           || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1020 
1021     if (! enough) {
1022       *endPtr = start; // callers may expect this to be set
1023       return XML_ERROR_NONE;
1024     }
1025   }
1026 #if defined(XML_TESTING)
1027   g_bytesScanned += (unsigned)have_now;
1028 #endif
1029   const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1030   if (ret == XML_ERROR_NONE) {
1031     // if we consumed nothing, remember what we had on this parse attempt.
1032     if (*endPtr == start) {
1033       parser->m_partialTokenBytesBefore = have_now;
1034     } else {
1035       parser->m_partialTokenBytesBefore = 0;
1036     }
1037   }
1038   return ret;
1039 }
1040 
1041 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1042 startParsing(XML_Parser parser) {
1043   /* hash functions must be initialized before setContext() is called */
1044   if (parser->m_hash_secret_salt == 0)
1045     parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1046   if (parser->m_ns) {
1047     /* implicit context only set for root parser, since child
1048        parsers (i.e. external entity parsers) will inherit it
1049     */
1050     return setContext(parser, implicitContext);
1051   }
1052   return XML_TRUE;
1053 }
1054 
1055 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1056 XML_ParserCreate_MM(const XML_Char *encodingName,
1057                     const XML_Memory_Handling_Suite *memsuite,
1058                     const XML_Char *nameSep) {
1059   return parserCreate(encodingName, memsuite, nameSep, NULL);
1060 }
1061 
1062 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1063 parserCreate(const XML_Char *encodingName,
1064              const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1065              DTD *dtd) {
1066   XML_Parser parser;
1067 
1068   if (memsuite) {
1069     XML_Memory_Handling_Suite *mtemp;
1070     parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1071     if (parser != NULL) {
1072       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1073       mtemp->malloc_fcn = memsuite->malloc_fcn;
1074       mtemp->realloc_fcn = memsuite->realloc_fcn;
1075       mtemp->free_fcn = memsuite->free_fcn;
1076     }
1077   } else {
1078     XML_Memory_Handling_Suite *mtemp;
1079     parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1080     if (parser != NULL) {
1081       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1082       mtemp->malloc_fcn = malloc;
1083       mtemp->realloc_fcn = realloc;
1084       mtemp->free_fcn = free;
1085     }
1086   }
1087 
1088   if (! parser)
1089     return parser;
1090 
1091   parser->m_buffer = NULL;
1092   parser->m_bufferLim = NULL;
1093 
1094   parser->m_attsSize = INIT_ATTS_SIZE;
1095   parser->m_atts
1096       = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1097   if (parser->m_atts == NULL) {
1098     FREE(parser, parser);
1099     return NULL;
1100   }
1101 #ifdef XML_ATTR_INFO
1102   parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1103       parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1104   if (parser->m_attInfo == NULL) {
1105     FREE(parser, parser->m_atts);
1106     FREE(parser, parser);
1107     return NULL;
1108   }
1109 #endif
1110   parser->m_dataBuf
1111       = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1112   if (parser->m_dataBuf == NULL) {
1113     FREE(parser, parser->m_atts);
1114 #ifdef XML_ATTR_INFO
1115     FREE(parser, parser->m_attInfo);
1116 #endif
1117     FREE(parser, parser);
1118     return NULL;
1119   }
1120   parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1121 
1122   if (dtd)
1123     parser->m_dtd = dtd;
1124   else {
1125     parser->m_dtd = dtdCreate(&parser->m_mem);
1126     if (parser->m_dtd == NULL) {
1127       FREE(parser, parser->m_dataBuf);
1128       FREE(parser, parser->m_atts);
1129 #ifdef XML_ATTR_INFO
1130       FREE(parser, parser->m_attInfo);
1131 #endif
1132       FREE(parser, parser);
1133       return NULL;
1134     }
1135   }
1136 
1137   parser->m_freeBindingList = NULL;
1138   parser->m_freeTagList = NULL;
1139   parser->m_freeInternalEntities = NULL;
1140 
1141   parser->m_groupSize = 0;
1142   parser->m_groupConnector = NULL;
1143 
1144   parser->m_unknownEncodingHandler = NULL;
1145   parser->m_unknownEncodingHandlerData = NULL;
1146 
1147   parser->m_namespaceSeparator = ASCII_EXCL;
1148   parser->m_ns = XML_FALSE;
1149   parser->m_ns_triplets = XML_FALSE;
1150 
1151   parser->m_nsAtts = NULL;
1152   parser->m_nsAttsVersion = 0;
1153   parser->m_nsAttsPower = 0;
1154 
1155   parser->m_protocolEncodingName = NULL;
1156 
1157   poolInit(&parser->m_tempPool, &(parser->m_mem));
1158   poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1159   parserInit(parser, encodingName);
1160 
1161   if (encodingName && ! parser->m_protocolEncodingName) {
1162     if (dtd) {
1163       // We need to stop the upcoming call to XML_ParserFree from happily
1164       // destroying parser->m_dtd because the DTD is shared with the parent
1165       // parser and the only guard that keeps XML_ParserFree from destroying
1166       // parser->m_dtd is parser->m_isParamEntity but it will be set to
1167       // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1168       parser->m_dtd = NULL;
1169     }
1170     XML_ParserFree(parser);
1171     return NULL;
1172   }
1173 
1174   if (nameSep) {
1175     parser->m_ns = XML_TRUE;
1176     parser->m_internalEncoding = XmlGetInternalEncodingNS();
1177     parser->m_namespaceSeparator = *nameSep;
1178   } else {
1179     parser->m_internalEncoding = XmlGetInternalEncoding();
1180   }
1181 
1182   return parser;
1183 }
1184 
1185 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1186 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1187   parser->m_processor = prologInitProcessor;
1188   XmlPrologStateInit(&parser->m_prologState);
1189   if (encodingName != NULL) {
1190     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1191   }
1192   parser->m_curBase = NULL;
1193   XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1194   parser->m_userData = NULL;
1195   parser->m_handlerArg = NULL;
1196   parser->m_startElementHandler = NULL;
1197   parser->m_endElementHandler = NULL;
1198   parser->m_characterDataHandler = NULL;
1199   parser->m_processingInstructionHandler = NULL;
1200   parser->m_commentHandler = NULL;
1201   parser->m_startCdataSectionHandler = NULL;
1202   parser->m_endCdataSectionHandler = NULL;
1203   parser->m_defaultHandler = NULL;
1204   parser->m_startDoctypeDeclHandler = NULL;
1205   parser->m_endDoctypeDeclHandler = NULL;
1206   parser->m_unparsedEntityDeclHandler = NULL;
1207   parser->m_notationDeclHandler = NULL;
1208   parser->m_startNamespaceDeclHandler = NULL;
1209   parser->m_endNamespaceDeclHandler = NULL;
1210   parser->m_notStandaloneHandler = NULL;
1211   parser->m_externalEntityRefHandler = NULL;
1212   parser->m_externalEntityRefHandlerArg = parser;
1213   parser->m_skippedEntityHandler = NULL;
1214   parser->m_elementDeclHandler = NULL;
1215   parser->m_attlistDeclHandler = NULL;
1216   parser->m_entityDeclHandler = NULL;
1217   parser->m_xmlDeclHandler = NULL;
1218   parser->m_bufferPtr = parser->m_buffer;
1219   parser->m_bufferEnd = parser->m_buffer;
1220   parser->m_parseEndByteIndex = 0;
1221   parser->m_parseEndPtr = NULL;
1222   parser->m_partialTokenBytesBefore = 0;
1223   parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1224   parser->m_lastBufferRequestSize = 0;
1225   parser->m_declElementType = NULL;
1226   parser->m_declAttributeId = NULL;
1227   parser->m_declEntity = NULL;
1228   parser->m_doctypeName = NULL;
1229   parser->m_doctypeSysid = NULL;
1230   parser->m_doctypePubid = NULL;
1231   parser->m_declAttributeType = NULL;
1232   parser->m_declNotationName = NULL;
1233   parser->m_declNotationPublicId = NULL;
1234   parser->m_declAttributeIsCdata = XML_FALSE;
1235   parser->m_declAttributeIsId = XML_FALSE;
1236   memset(&parser->m_position, 0, sizeof(POSITION));
1237   parser->m_errorCode = XML_ERROR_NONE;
1238   parser->m_eventPtr = NULL;
1239   parser->m_eventEndPtr = NULL;
1240   parser->m_positionPtr = NULL;
1241   parser->m_openInternalEntities = NULL;
1242   parser->m_defaultExpandInternalEntities = XML_TRUE;
1243   parser->m_tagLevel = 0;
1244   parser->m_tagStack = NULL;
1245   parser->m_inheritedBindings = NULL;
1246   parser->m_nSpecifiedAtts = 0;
1247   parser->m_unknownEncodingMem = NULL;
1248   parser->m_unknownEncodingRelease = NULL;
1249   parser->m_unknownEncodingData = NULL;
1250   parser->m_parentParser = NULL;
1251   parser->m_parsingStatus.parsing = XML_INITIALIZED;
1252 #ifdef XML_DTD
1253   parser->m_isParamEntity = XML_FALSE;
1254   parser->m_useForeignDTD = XML_FALSE;
1255   parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1256 #endif
1257   parser->m_hash_secret_salt = 0;
1258 
1259 #if XML_GE == 1
1260   memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1261   parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1262   parser->m_accounting.maximumAmplificationFactor
1263       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1264   parser->m_accounting.activationThresholdBytes
1265       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1266 
1267   memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1268   parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1269 #endif
1270 }
1271 
1272 /* moves list of bindings to m_freeBindingList */
1273 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1274 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1275   while (bindings) {
1276     BINDING *b = bindings;
1277     bindings = bindings->nextTagBinding;
1278     b->nextTagBinding = parser->m_freeBindingList;
1279     parser->m_freeBindingList = b;
1280   }
1281 }
1282 
1283 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1284 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1285   TAG *tStk;
1286   OPEN_INTERNAL_ENTITY *openEntityList;
1287 
1288   if (parser == NULL)
1289     return XML_FALSE;
1290 
1291   if (parser->m_parentParser)
1292     return XML_FALSE;
1293   /* move m_tagStack to m_freeTagList */
1294   tStk = parser->m_tagStack;
1295   while (tStk) {
1296     TAG *tag = tStk;
1297     tStk = tStk->parent;
1298     tag->parent = parser->m_freeTagList;
1299     moveToFreeBindingList(parser, tag->bindings);
1300     tag->bindings = NULL;
1301     parser->m_freeTagList = tag;
1302   }
1303   /* move m_openInternalEntities to m_freeInternalEntities */
1304   openEntityList = parser->m_openInternalEntities;
1305   while (openEntityList) {
1306     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1307     openEntityList = openEntity->next;
1308     openEntity->next = parser->m_freeInternalEntities;
1309     parser->m_freeInternalEntities = openEntity;
1310   }
1311   moveToFreeBindingList(parser, parser->m_inheritedBindings);
1312   FREE(parser, parser->m_unknownEncodingMem);
1313   if (parser->m_unknownEncodingRelease)
1314     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1315   poolClear(&parser->m_tempPool);
1316   poolClear(&parser->m_temp2Pool);
1317   FREE(parser, (void *)parser->m_protocolEncodingName);
1318   parser->m_protocolEncodingName = NULL;
1319   parserInit(parser, encodingName);
1320   dtdReset(parser->m_dtd, &parser->m_mem);
1321   return XML_TRUE;
1322 }
1323 
1324 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1325 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1326   if (parser == NULL)
1327     return XML_STATUS_ERROR;
1328   /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1329      XXX There's no way for the caller to determine which of the
1330      XXX possible error cases caused the XML_STATUS_ERROR return.
1331   */
1332   if (parser->m_parsingStatus.parsing == XML_PARSING
1333       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1334     return XML_STATUS_ERROR;
1335 
1336   /* Get rid of any previous encoding name */
1337   FREE(parser, (void *)parser->m_protocolEncodingName);
1338 
1339   if (encodingName == NULL)
1340     /* No new encoding name */
1341     parser->m_protocolEncodingName = NULL;
1342   else {
1343     /* Copy the new encoding name into allocated memory */
1344     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1345     if (! parser->m_protocolEncodingName)
1346       return XML_STATUS_ERROR;
1347   }
1348   return XML_STATUS_OK;
1349 }
1350 
1351 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1352 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1353                                const XML_Char *encodingName) {
1354   XML_Parser parser = oldParser;
1355   DTD *newDtd = NULL;
1356   DTD *oldDtd;
1357   XML_StartElementHandler oldStartElementHandler;
1358   XML_EndElementHandler oldEndElementHandler;
1359   XML_CharacterDataHandler oldCharacterDataHandler;
1360   XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1361   XML_CommentHandler oldCommentHandler;
1362   XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1363   XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1364   XML_DefaultHandler oldDefaultHandler;
1365   XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1366   XML_NotationDeclHandler oldNotationDeclHandler;
1367   XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1368   XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1369   XML_NotStandaloneHandler oldNotStandaloneHandler;
1370   XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1371   XML_SkippedEntityHandler oldSkippedEntityHandler;
1372   XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1373   XML_ElementDeclHandler oldElementDeclHandler;
1374   XML_AttlistDeclHandler oldAttlistDeclHandler;
1375   XML_EntityDeclHandler oldEntityDeclHandler;
1376   XML_XmlDeclHandler oldXmlDeclHandler;
1377   ELEMENT_TYPE *oldDeclElementType;
1378 
1379   void *oldUserData;
1380   void *oldHandlerArg;
1381   XML_Bool oldDefaultExpandInternalEntities;
1382   XML_Parser oldExternalEntityRefHandlerArg;
1383 #ifdef XML_DTD
1384   enum XML_ParamEntityParsing oldParamEntityParsing;
1385   int oldInEntityValue;
1386 #endif
1387   XML_Bool oldns_triplets;
1388   /* Note that the new parser shares the same hash secret as the old
1389      parser, so that dtdCopy and copyEntityTable can lookup values
1390      from hash tables associated with either parser without us having
1391      to worry which hash secrets each table has.
1392   */
1393   unsigned long oldhash_secret_salt;
1394   XML_Bool oldReparseDeferralEnabled;
1395 
1396   /* Validate the oldParser parameter before we pull everything out of it */
1397   if (oldParser == NULL)
1398     return NULL;
1399 
1400   /* Stash the original parser contents on the stack */
1401   oldDtd = parser->m_dtd;
1402   oldStartElementHandler = parser->m_startElementHandler;
1403   oldEndElementHandler = parser->m_endElementHandler;
1404   oldCharacterDataHandler = parser->m_characterDataHandler;
1405   oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1406   oldCommentHandler = parser->m_commentHandler;
1407   oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1408   oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1409   oldDefaultHandler = parser->m_defaultHandler;
1410   oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1411   oldNotationDeclHandler = parser->m_notationDeclHandler;
1412   oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1413   oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1414   oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1415   oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1416   oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1417   oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1418   oldElementDeclHandler = parser->m_elementDeclHandler;
1419   oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1420   oldEntityDeclHandler = parser->m_entityDeclHandler;
1421   oldXmlDeclHandler = parser->m_xmlDeclHandler;
1422   oldDeclElementType = parser->m_declElementType;
1423 
1424   oldUserData = parser->m_userData;
1425   oldHandlerArg = parser->m_handlerArg;
1426   oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1427   oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1428 #ifdef XML_DTD
1429   oldParamEntityParsing = parser->m_paramEntityParsing;
1430   oldInEntityValue = parser->m_prologState.inEntityValue;
1431 #endif
1432   oldns_triplets = parser->m_ns_triplets;
1433   /* Note that the new parser shares the same hash secret as the old
1434      parser, so that dtdCopy and copyEntityTable can lookup values
1435      from hash tables associated with either parser without us having
1436      to worry which hash secrets each table has.
1437   */
1438   oldhash_secret_salt = parser->m_hash_secret_salt;
1439   oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1440 
1441 #ifdef XML_DTD
1442   if (! context)
1443     newDtd = oldDtd;
1444 #endif /* XML_DTD */
1445 
1446   /* Note that the magical uses of the pre-processor to make field
1447      access look more like C++ require that `parser' be overwritten
1448      here.  This makes this function more painful to follow than it
1449      would be otherwise.
1450   */
1451   if (parser->m_ns) {
1452     XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1453     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1454   } else {
1455     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1456   }
1457 
1458   if (! parser)
1459     return NULL;
1460 
1461   parser->m_startElementHandler = oldStartElementHandler;
1462   parser->m_endElementHandler = oldEndElementHandler;
1463   parser->m_characterDataHandler = oldCharacterDataHandler;
1464   parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1465   parser->m_commentHandler = oldCommentHandler;
1466   parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1467   parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1468   parser->m_defaultHandler = oldDefaultHandler;
1469   parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1470   parser->m_notationDeclHandler = oldNotationDeclHandler;
1471   parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1472   parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1473   parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1474   parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1475   parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1476   parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1477   parser->m_elementDeclHandler = oldElementDeclHandler;
1478   parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1479   parser->m_entityDeclHandler = oldEntityDeclHandler;
1480   parser->m_xmlDeclHandler = oldXmlDeclHandler;
1481   parser->m_declElementType = oldDeclElementType;
1482   parser->m_userData = oldUserData;
1483   if (oldUserData == oldHandlerArg)
1484     parser->m_handlerArg = parser->m_userData;
1485   else
1486     parser->m_handlerArg = parser;
1487   if (oldExternalEntityRefHandlerArg != oldParser)
1488     parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1489   parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1490   parser->m_ns_triplets = oldns_triplets;
1491   parser->m_hash_secret_salt = oldhash_secret_salt;
1492   parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1493   parser->m_parentParser = oldParser;
1494 #ifdef XML_DTD
1495   parser->m_paramEntityParsing = oldParamEntityParsing;
1496   parser->m_prologState.inEntityValue = oldInEntityValue;
1497   if (context) {
1498 #endif /* XML_DTD */
1499     if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1500         || ! setContext(parser, context)) {
1501       XML_ParserFree(parser);
1502       return NULL;
1503     }
1504     parser->m_processor = externalEntityInitProcessor;
1505 #ifdef XML_DTD
1506   } else {
1507     /* The DTD instance referenced by parser->m_dtd is shared between the
1508        document's root parser and external PE parsers, therefore one does not
1509        need to call setContext. In addition, one also *must* not call
1510        setContext, because this would overwrite existing prefix->binding
1511        pointers in parser->m_dtd with ones that get destroyed with the external
1512        PE parser. This would leave those prefixes with dangling pointers.
1513     */
1514     parser->m_isParamEntity = XML_TRUE;
1515     XmlPrologStateInitExternalEntity(&parser->m_prologState);
1516     parser->m_processor = externalParEntInitProcessor;
1517   }
1518 #endif /* XML_DTD */
1519   return parser;
1520 }
1521 
1522 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1523 destroyBindings(BINDING *bindings, XML_Parser parser) {
1524   for (;;) {
1525     BINDING *b = bindings;
1526     if (! b)
1527       break;
1528     bindings = b->nextTagBinding;
1529     FREE(parser, b->uri);
1530     FREE(parser, b);
1531   }
1532 }
1533 
1534 void XMLCALL
XML_ParserFree(XML_Parser parser)1535 XML_ParserFree(XML_Parser parser) {
1536   TAG *tagList;
1537   OPEN_INTERNAL_ENTITY *entityList;
1538   if (parser == NULL)
1539     return;
1540   /* free m_tagStack and m_freeTagList */
1541   tagList = parser->m_tagStack;
1542   for (;;) {
1543     TAG *p;
1544     if (tagList == NULL) {
1545       if (parser->m_freeTagList == NULL)
1546         break;
1547       tagList = parser->m_freeTagList;
1548       parser->m_freeTagList = NULL;
1549     }
1550     p = tagList;
1551     tagList = tagList->parent;
1552     FREE(parser, p->buf);
1553     destroyBindings(p->bindings, parser);
1554     FREE(parser, p);
1555   }
1556   /* free m_openInternalEntities and m_freeInternalEntities */
1557   entityList = parser->m_openInternalEntities;
1558   for (;;) {
1559     OPEN_INTERNAL_ENTITY *openEntity;
1560     if (entityList == NULL) {
1561       if (parser->m_freeInternalEntities == NULL)
1562         break;
1563       entityList = parser->m_freeInternalEntities;
1564       parser->m_freeInternalEntities = NULL;
1565     }
1566     openEntity = entityList;
1567     entityList = entityList->next;
1568     FREE(parser, openEntity);
1569   }
1570 
1571   destroyBindings(parser->m_freeBindingList, parser);
1572   destroyBindings(parser->m_inheritedBindings, parser);
1573   poolDestroy(&parser->m_tempPool);
1574   poolDestroy(&parser->m_temp2Pool);
1575   FREE(parser, (void *)parser->m_protocolEncodingName);
1576 #ifdef XML_DTD
1577   /* external parameter entity parsers share the DTD structure
1578      parser->m_dtd with the root parser, so we must not destroy it
1579   */
1580   if (! parser->m_isParamEntity && parser->m_dtd)
1581 #else
1582   if (parser->m_dtd)
1583 #endif /* XML_DTD */
1584     dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1585                &parser->m_mem);
1586   FREE(parser, (void *)parser->m_atts);
1587 #ifdef XML_ATTR_INFO
1588   FREE(parser, (void *)parser->m_attInfo);
1589 #endif
1590   FREE(parser, parser->m_groupConnector);
1591   FREE(parser, parser->m_buffer);
1592   FREE(parser, parser->m_dataBuf);
1593   FREE(parser, parser->m_nsAtts);
1594   FREE(parser, parser->m_unknownEncodingMem);
1595   if (parser->m_unknownEncodingRelease)
1596     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1597   FREE(parser, parser);
1598 }
1599 
1600 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1601 XML_UseParserAsHandlerArg(XML_Parser parser) {
1602   if (parser != NULL)
1603     parser->m_handlerArg = parser;
1604 }
1605 
1606 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1607 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1608   if (parser == NULL)
1609     return XML_ERROR_INVALID_ARGUMENT;
1610 #ifdef XML_DTD
1611   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1612   if (parser->m_parsingStatus.parsing == XML_PARSING
1613       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1614     return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1615   parser->m_useForeignDTD = useDTD;
1616   return XML_ERROR_NONE;
1617 #else
1618   UNUSED_P(useDTD);
1619   return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1620 #endif
1621 }
1622 
1623 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1624 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1625   if (parser == NULL)
1626     return;
1627   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1628   if (parser->m_parsingStatus.parsing == XML_PARSING
1629       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1630     return;
1631   parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1632 }
1633 
1634 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1635 XML_SetUserData(XML_Parser parser, void *p) {
1636   if (parser == NULL)
1637     return;
1638   if (parser->m_handlerArg == parser->m_userData)
1639     parser->m_handlerArg = parser->m_userData = p;
1640   else
1641     parser->m_userData = p;
1642 }
1643 
1644 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1645 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1646   if (parser == NULL)
1647     return XML_STATUS_ERROR;
1648   if (p) {
1649     p = poolCopyString(&parser->m_dtd->pool, p);
1650     if (! p)
1651       return XML_STATUS_ERROR;
1652     parser->m_curBase = p;
1653   } else
1654     parser->m_curBase = NULL;
1655   return XML_STATUS_OK;
1656 }
1657 
1658 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1659 XML_GetBase(XML_Parser parser) {
1660   if (parser == NULL)
1661     return NULL;
1662   return parser->m_curBase;
1663 }
1664 
1665 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1666 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1667   if (parser == NULL)
1668     return -1;
1669   return parser->m_nSpecifiedAtts;
1670 }
1671 
1672 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1673 XML_GetIdAttributeIndex(XML_Parser parser) {
1674   if (parser == NULL)
1675     return -1;
1676   return parser->m_idAttIndex;
1677 }
1678 
1679 #ifdef XML_ATTR_INFO
1680 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1681 XML_GetAttributeInfo(XML_Parser parser) {
1682   if (parser == NULL)
1683     return NULL;
1684   return parser->m_attInfo;
1685 }
1686 #endif
1687 
1688 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1689 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1690                       XML_EndElementHandler end) {
1691   if (parser == NULL)
1692     return;
1693   parser->m_startElementHandler = start;
1694   parser->m_endElementHandler = end;
1695 }
1696 
1697 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1698 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1699   if (parser != NULL)
1700     parser->m_startElementHandler = start;
1701 }
1702 
1703 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1704 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1705   if (parser != NULL)
1706     parser->m_endElementHandler = end;
1707 }
1708 
1709 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1710 XML_SetCharacterDataHandler(XML_Parser parser,
1711                             XML_CharacterDataHandler handler) {
1712   if (parser != NULL)
1713     parser->m_characterDataHandler = handler;
1714 }
1715 
1716 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1717 XML_SetProcessingInstructionHandler(XML_Parser parser,
1718                                     XML_ProcessingInstructionHandler handler) {
1719   if (parser != NULL)
1720     parser->m_processingInstructionHandler = handler;
1721 }
1722 
1723 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1724 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1725   if (parser != NULL)
1726     parser->m_commentHandler = handler;
1727 }
1728 
1729 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1730 XML_SetCdataSectionHandler(XML_Parser parser,
1731                            XML_StartCdataSectionHandler start,
1732                            XML_EndCdataSectionHandler end) {
1733   if (parser == NULL)
1734     return;
1735   parser->m_startCdataSectionHandler = start;
1736   parser->m_endCdataSectionHandler = end;
1737 }
1738 
1739 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1740 XML_SetStartCdataSectionHandler(XML_Parser parser,
1741                                 XML_StartCdataSectionHandler start) {
1742   if (parser != NULL)
1743     parser->m_startCdataSectionHandler = start;
1744 }
1745 
1746 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1747 XML_SetEndCdataSectionHandler(XML_Parser parser,
1748                               XML_EndCdataSectionHandler end) {
1749   if (parser != NULL)
1750     parser->m_endCdataSectionHandler = end;
1751 }
1752 
1753 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1754 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1755   if (parser == NULL)
1756     return;
1757   parser->m_defaultHandler = handler;
1758   parser->m_defaultExpandInternalEntities = XML_FALSE;
1759 }
1760 
1761 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1762 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1763   if (parser == NULL)
1764     return;
1765   parser->m_defaultHandler = handler;
1766   parser->m_defaultExpandInternalEntities = XML_TRUE;
1767 }
1768 
1769 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1770 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1771                           XML_EndDoctypeDeclHandler end) {
1772   if (parser == NULL)
1773     return;
1774   parser->m_startDoctypeDeclHandler = start;
1775   parser->m_endDoctypeDeclHandler = end;
1776 }
1777 
1778 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1779 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1780                                XML_StartDoctypeDeclHandler start) {
1781   if (parser != NULL)
1782     parser->m_startDoctypeDeclHandler = start;
1783 }
1784 
1785 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1786 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1787   if (parser != NULL)
1788     parser->m_endDoctypeDeclHandler = end;
1789 }
1790 
1791 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1792 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1793                                  XML_UnparsedEntityDeclHandler handler) {
1794   if (parser != NULL)
1795     parser->m_unparsedEntityDeclHandler = handler;
1796 }
1797 
1798 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1799 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1800   if (parser != NULL)
1801     parser->m_notationDeclHandler = handler;
1802 }
1803 
1804 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1805 XML_SetNamespaceDeclHandler(XML_Parser parser,
1806                             XML_StartNamespaceDeclHandler start,
1807                             XML_EndNamespaceDeclHandler end) {
1808   if (parser == NULL)
1809     return;
1810   parser->m_startNamespaceDeclHandler = start;
1811   parser->m_endNamespaceDeclHandler = end;
1812 }
1813 
1814 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1815 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1816                                  XML_StartNamespaceDeclHandler start) {
1817   if (parser != NULL)
1818     parser->m_startNamespaceDeclHandler = start;
1819 }
1820 
1821 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1822 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1823                                XML_EndNamespaceDeclHandler end) {
1824   if (parser != NULL)
1825     parser->m_endNamespaceDeclHandler = end;
1826 }
1827 
1828 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1829 XML_SetNotStandaloneHandler(XML_Parser parser,
1830                             XML_NotStandaloneHandler handler) {
1831   if (parser != NULL)
1832     parser->m_notStandaloneHandler = handler;
1833 }
1834 
1835 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1836 XML_SetExternalEntityRefHandler(XML_Parser parser,
1837                                 XML_ExternalEntityRefHandler handler) {
1838   if (parser != NULL)
1839     parser->m_externalEntityRefHandler = handler;
1840 }
1841 
1842 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1843 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1844   if (parser == NULL)
1845     return;
1846   if (arg)
1847     parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1848   else
1849     parser->m_externalEntityRefHandlerArg = parser;
1850 }
1851 
1852 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1853 XML_SetSkippedEntityHandler(XML_Parser parser,
1854                             XML_SkippedEntityHandler handler) {
1855   if (parser != NULL)
1856     parser->m_skippedEntityHandler = handler;
1857 }
1858 
1859 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1860 XML_SetUnknownEncodingHandler(XML_Parser parser,
1861                               XML_UnknownEncodingHandler handler, void *data) {
1862   if (parser == NULL)
1863     return;
1864   parser->m_unknownEncodingHandler = handler;
1865   parser->m_unknownEncodingHandlerData = data;
1866 }
1867 
1868 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1869 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1870   if (parser != NULL)
1871     parser->m_elementDeclHandler = eldecl;
1872 }
1873 
1874 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1875 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1876   if (parser != NULL)
1877     parser->m_attlistDeclHandler = attdecl;
1878 }
1879 
1880 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1881 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1882   if (parser != NULL)
1883     parser->m_entityDeclHandler = handler;
1884 }
1885 
1886 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1887 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1888   if (parser != NULL)
1889     parser->m_xmlDeclHandler = handler;
1890 }
1891 
1892 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1893 XML_SetParamEntityParsing(XML_Parser parser,
1894                           enum XML_ParamEntityParsing peParsing) {
1895   if (parser == NULL)
1896     return 0;
1897   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1898   if (parser->m_parsingStatus.parsing == XML_PARSING
1899       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1900     return 0;
1901 #ifdef XML_DTD
1902   parser->m_paramEntityParsing = peParsing;
1903   return 1;
1904 #else
1905   return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1906 #endif
1907 }
1908 
1909 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1910 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1911   if (parser == NULL)
1912     return 0;
1913   if (parser->m_parentParser)
1914     return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1915   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1916   if (parser->m_parsingStatus.parsing == XML_PARSING
1917       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1918     return 0;
1919   parser->m_hash_secret_salt = hash_salt;
1920   return 1;
1921 }
1922 
1923 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1924 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1925   if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1926     if (parser != NULL)
1927       parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1928     return XML_STATUS_ERROR;
1929   }
1930   switch (parser->m_parsingStatus.parsing) {
1931   case XML_SUSPENDED:
1932     parser->m_errorCode = XML_ERROR_SUSPENDED;
1933     return XML_STATUS_ERROR;
1934   case XML_FINISHED:
1935     parser->m_errorCode = XML_ERROR_FINISHED;
1936     return XML_STATUS_ERROR;
1937   case XML_INITIALIZED:
1938     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1939       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1940       return XML_STATUS_ERROR;
1941     }
1942     /* fall through */
1943   default:
1944     parser->m_parsingStatus.parsing = XML_PARSING;
1945   }
1946 
1947 #if XML_CONTEXT_BYTES == 0
1948   if (parser->m_bufferPtr == parser->m_bufferEnd) {
1949     const char *end;
1950     int nLeftOver;
1951     enum XML_Status result;
1952     /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1953     if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1954       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1955       parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1956       parser->m_processor = errorProcessor;
1957       return XML_STATUS_ERROR;
1958     }
1959     // though this isn't a buffer request, we assume that `len` is the app's
1960     // preferred buffer fill size, and therefore save it here.
1961     parser->m_lastBufferRequestSize = len;
1962     parser->m_parseEndByteIndex += len;
1963     parser->m_positionPtr = s;
1964     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1965 
1966     parser->m_errorCode
1967         = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1968 
1969     if (parser->m_errorCode != XML_ERROR_NONE) {
1970       parser->m_eventEndPtr = parser->m_eventPtr;
1971       parser->m_processor = errorProcessor;
1972       return XML_STATUS_ERROR;
1973     } else {
1974       switch (parser->m_parsingStatus.parsing) {
1975       case XML_SUSPENDED:
1976         result = XML_STATUS_SUSPENDED;
1977         break;
1978       case XML_INITIALIZED:
1979       case XML_PARSING:
1980         if (isFinal) {
1981           parser->m_parsingStatus.parsing = XML_FINISHED;
1982           return XML_STATUS_OK;
1983         }
1984       /* fall through */
1985       default:
1986         result = XML_STATUS_OK;
1987       }
1988     }
1989 
1990     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1991                       &parser->m_position);
1992     nLeftOver = s + len - end;
1993     if (nLeftOver) {
1994       // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1995       // (and XML_ERROR_FINISHED) from XML_GetBuffer.
1996       const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1997       parser->m_parsingStatus.parsing = XML_PARSING;
1998       void *const temp = XML_GetBuffer(parser, nLeftOver);
1999       parser->m_parsingStatus.parsing = originalStatus;
2000       // GetBuffer may have overwritten this, but we want to remember what the
2001       // app requested, not how many bytes were left over after parsing.
2002       parser->m_lastBufferRequestSize = len;
2003       if (temp == NULL) {
2004         // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2005         parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2006         parser->m_processor = errorProcessor;
2007         return XML_STATUS_ERROR;
2008       }
2009       // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2010       // don't have any data to preserve, and can copy straight into the start
2011       // of the buffer rather than the GetBuffer return pointer (which may be
2012       // pointing further into the allocated buffer).
2013       memcpy(parser->m_buffer, end, nLeftOver);
2014     }
2015     parser->m_bufferPtr = parser->m_buffer;
2016     parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2017     parser->m_positionPtr = parser->m_bufferPtr;
2018     parser->m_parseEndPtr = parser->m_bufferEnd;
2019     parser->m_eventPtr = parser->m_bufferPtr;
2020     parser->m_eventEndPtr = parser->m_bufferPtr;
2021     return result;
2022   }
2023 #endif /* XML_CONTEXT_BYTES == 0 */
2024   void *buff = XML_GetBuffer(parser, len);
2025   if (buff == NULL)
2026     return XML_STATUS_ERROR;
2027   if (len > 0) {
2028     assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2029     memcpy(buff, s, len);
2030   }
2031   return XML_ParseBuffer(parser, len, isFinal);
2032 }
2033 
2034 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2035 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2036   const char *start;
2037   enum XML_Status result = XML_STATUS_OK;
2038 
2039   if (parser == NULL)
2040     return XML_STATUS_ERROR;
2041   switch (parser->m_parsingStatus.parsing) {
2042   case XML_SUSPENDED:
2043     parser->m_errorCode = XML_ERROR_SUSPENDED;
2044     return XML_STATUS_ERROR;
2045   case XML_FINISHED:
2046     parser->m_errorCode = XML_ERROR_FINISHED;
2047     return XML_STATUS_ERROR;
2048   case XML_INITIALIZED:
2049     /* Has someone called XML_GetBuffer successfully before? */
2050     if (! parser->m_bufferPtr) {
2051       parser->m_errorCode = XML_ERROR_NO_BUFFER;
2052       return XML_STATUS_ERROR;
2053     }
2054 
2055     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2056       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2057       return XML_STATUS_ERROR;
2058     }
2059     /* fall through */
2060   default:
2061     parser->m_parsingStatus.parsing = XML_PARSING;
2062   }
2063 
2064   start = parser->m_bufferPtr;
2065   parser->m_positionPtr = start;
2066   parser->m_bufferEnd += len;
2067   parser->m_parseEndPtr = parser->m_bufferEnd;
2068   parser->m_parseEndByteIndex += len;
2069   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2070 
2071   parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2072                                       &parser->m_bufferPtr);
2073 
2074   if (parser->m_errorCode != XML_ERROR_NONE) {
2075     parser->m_eventEndPtr = parser->m_eventPtr;
2076     parser->m_processor = errorProcessor;
2077     return XML_STATUS_ERROR;
2078   } else {
2079     switch (parser->m_parsingStatus.parsing) {
2080     case XML_SUSPENDED:
2081       result = XML_STATUS_SUSPENDED;
2082       break;
2083     case XML_INITIALIZED:
2084     case XML_PARSING:
2085       if (isFinal) {
2086         parser->m_parsingStatus.parsing = XML_FINISHED;
2087         return result;
2088       }
2089     default:; /* should not happen */
2090     }
2091   }
2092 
2093   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2094                     parser->m_bufferPtr, &parser->m_position);
2095   parser->m_positionPtr = parser->m_bufferPtr;
2096   return result;
2097 }
2098 
2099 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2100 XML_GetBuffer(XML_Parser parser, int len) {
2101   if (parser == NULL)
2102     return NULL;
2103   if (len < 0) {
2104     parser->m_errorCode = XML_ERROR_NO_MEMORY;
2105     return NULL;
2106   }
2107   switch (parser->m_parsingStatus.parsing) {
2108   case XML_SUSPENDED:
2109     parser->m_errorCode = XML_ERROR_SUSPENDED;
2110     return NULL;
2111   case XML_FINISHED:
2112     parser->m_errorCode = XML_ERROR_FINISHED;
2113     return NULL;
2114   default:;
2115   }
2116 
2117   // whether or not the request succeeds, `len` seems to be the app's preferred
2118   // buffer fill size; remember it.
2119   parser->m_lastBufferRequestSize = len;
2120   if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2121       || parser->m_buffer == NULL) {
2122 #if XML_CONTEXT_BYTES > 0
2123     int keep;
2124 #endif /* XML_CONTEXT_BYTES > 0 */
2125     /* Do not invoke signed arithmetic overflow: */
2126     int neededSize = (int)((unsigned)len
2127                            + (unsigned)EXPAT_SAFE_PTR_DIFF(
2128                                parser->m_bufferEnd, parser->m_bufferPtr));
2129     if (neededSize < 0) {
2130       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2131       return NULL;
2132     }
2133 #if XML_CONTEXT_BYTES > 0
2134     keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2135     if (keep > XML_CONTEXT_BYTES)
2136       keep = XML_CONTEXT_BYTES;
2137     /* Detect and prevent integer overflow */
2138     if (keep > INT_MAX - neededSize) {
2139       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2140       return NULL;
2141     }
2142     neededSize += keep;
2143 #endif /* XML_CONTEXT_BYTES > 0 */
2144     if (parser->m_buffer && parser->m_bufferPtr
2145         && neededSize
2146                <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2147 #if XML_CONTEXT_BYTES > 0
2148       if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2149         int offset
2150             = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2151               - keep;
2152         /* The buffer pointers cannot be NULL here; we have at least some bytes
2153          * in the buffer */
2154         memmove(parser->m_buffer, &parser->m_buffer[offset],
2155                 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2156         parser->m_bufferEnd -= offset;
2157         parser->m_bufferPtr -= offset;
2158       }
2159 #else
2160       memmove(parser->m_buffer, parser->m_bufferPtr,
2161               EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2162       parser->m_bufferEnd
2163           = parser->m_buffer
2164             + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2165       parser->m_bufferPtr = parser->m_buffer;
2166 #endif /* XML_CONTEXT_BYTES > 0 */
2167     } else {
2168       char *newBuf;
2169       int bufferSize
2170           = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2171       if (bufferSize == 0)
2172         bufferSize = INIT_BUFFER_SIZE;
2173       do {
2174         /* Do not invoke signed arithmetic overflow: */
2175         bufferSize = (int)(2U * (unsigned)bufferSize);
2176       } while (bufferSize < neededSize && bufferSize > 0);
2177       if (bufferSize <= 0) {
2178         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2179         return NULL;
2180       }
2181       newBuf = (char *)MALLOC(parser, bufferSize);
2182       if (newBuf == 0) {
2183         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2184         return NULL;
2185       }
2186       parser->m_bufferLim = newBuf + bufferSize;
2187 #if XML_CONTEXT_BYTES > 0
2188       if (parser->m_bufferPtr) {
2189         memcpy(newBuf, &parser->m_bufferPtr[-keep],
2190                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2191                    + keep);
2192         FREE(parser, parser->m_buffer);
2193         parser->m_buffer = newBuf;
2194         parser->m_bufferEnd
2195             = parser->m_buffer
2196               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2197               + keep;
2198         parser->m_bufferPtr = parser->m_buffer + keep;
2199       } else {
2200         /* This must be a brand new buffer with no data in it yet */
2201         parser->m_bufferEnd = newBuf;
2202         parser->m_bufferPtr = parser->m_buffer = newBuf;
2203       }
2204 #else
2205       if (parser->m_bufferPtr) {
2206         memcpy(newBuf, parser->m_bufferPtr,
2207                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2208         FREE(parser, parser->m_buffer);
2209         parser->m_bufferEnd
2210             = newBuf
2211               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2212       } else {
2213         /* This must be a brand new buffer with no data in it yet */
2214         parser->m_bufferEnd = newBuf;
2215       }
2216       parser->m_bufferPtr = parser->m_buffer = newBuf;
2217 #endif /* XML_CONTEXT_BYTES > 0 */
2218     }
2219     parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2220     parser->m_positionPtr = NULL;
2221   }
2222   return parser->m_bufferEnd;
2223 }
2224 
2225 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2226 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2227   if (parser == NULL)
2228     return XML_STATUS_ERROR;
2229   switch (parser->m_parsingStatus.parsing) {
2230   case XML_SUSPENDED:
2231     if (resumable) {
2232       parser->m_errorCode = XML_ERROR_SUSPENDED;
2233       return XML_STATUS_ERROR;
2234     }
2235     parser->m_parsingStatus.parsing = XML_FINISHED;
2236     break;
2237   case XML_FINISHED:
2238     parser->m_errorCode = XML_ERROR_FINISHED;
2239     return XML_STATUS_ERROR;
2240   default:
2241     if (resumable) {
2242 #ifdef XML_DTD
2243       if (parser->m_isParamEntity) {
2244         parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2245         return XML_STATUS_ERROR;
2246       }
2247 #endif
2248       parser->m_parsingStatus.parsing = XML_SUSPENDED;
2249     } else
2250       parser->m_parsingStatus.parsing = XML_FINISHED;
2251   }
2252   return XML_STATUS_OK;
2253 }
2254 
2255 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2256 XML_ResumeParser(XML_Parser parser) {
2257   enum XML_Status result = XML_STATUS_OK;
2258 
2259   if (parser == NULL)
2260     return XML_STATUS_ERROR;
2261   if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2262     parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2263     return XML_STATUS_ERROR;
2264   }
2265   parser->m_parsingStatus.parsing = XML_PARSING;
2266 
2267   parser->m_errorCode = callProcessor(
2268       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2269 
2270   if (parser->m_errorCode != XML_ERROR_NONE) {
2271     parser->m_eventEndPtr = parser->m_eventPtr;
2272     parser->m_processor = errorProcessor;
2273     return XML_STATUS_ERROR;
2274   } else {
2275     switch (parser->m_parsingStatus.parsing) {
2276     case XML_SUSPENDED:
2277       result = XML_STATUS_SUSPENDED;
2278       break;
2279     case XML_INITIALIZED:
2280     case XML_PARSING:
2281       if (parser->m_parsingStatus.finalBuffer) {
2282         parser->m_parsingStatus.parsing = XML_FINISHED;
2283         return result;
2284       }
2285     default:;
2286     }
2287   }
2288 
2289   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2290                     parser->m_bufferPtr, &parser->m_position);
2291   parser->m_positionPtr = parser->m_bufferPtr;
2292   return result;
2293 }
2294 
2295 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2296 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2297   if (parser == NULL)
2298     return;
2299   assert(status != NULL);
2300   *status = parser->m_parsingStatus;
2301 }
2302 
2303 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2304 XML_GetErrorCode(XML_Parser parser) {
2305   if (parser == NULL)
2306     return XML_ERROR_INVALID_ARGUMENT;
2307   return parser->m_errorCode;
2308 }
2309 
2310 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2311 XML_GetCurrentByteIndex(XML_Parser parser) {
2312   if (parser == NULL)
2313     return -1;
2314   if (parser->m_eventPtr)
2315     return (XML_Index)(parser->m_parseEndByteIndex
2316                        - (parser->m_parseEndPtr - parser->m_eventPtr));
2317   return -1;
2318 }
2319 
2320 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2321 XML_GetCurrentByteCount(XML_Parser parser) {
2322   if (parser == NULL)
2323     return 0;
2324   if (parser->m_eventEndPtr && parser->m_eventPtr)
2325     return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2326   return 0;
2327 }
2328 
2329 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2330 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2331 #if XML_CONTEXT_BYTES > 0
2332   if (parser == NULL)
2333     return NULL;
2334   if (parser->m_eventPtr && parser->m_buffer) {
2335     if (offset != NULL)
2336       *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2337     if (size != NULL)
2338       *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2339     return parser->m_buffer;
2340   }
2341 #else
2342   (void)parser;
2343   (void)offset;
2344   (void)size;
2345 #endif /* XML_CONTEXT_BYTES > 0 */
2346   return (const char *)0;
2347 }
2348 
2349 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2350 XML_GetCurrentLineNumber(XML_Parser parser) {
2351   if (parser == NULL)
2352     return 0;
2353   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2354     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2355                       parser->m_eventPtr, &parser->m_position);
2356     parser->m_positionPtr = parser->m_eventPtr;
2357   }
2358   return parser->m_position.lineNumber + 1;
2359 }
2360 
2361 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2362 XML_GetCurrentColumnNumber(XML_Parser parser) {
2363   if (parser == NULL)
2364     return 0;
2365   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2366     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2367                       parser->m_eventPtr, &parser->m_position);
2368     parser->m_positionPtr = parser->m_eventPtr;
2369   }
2370   return parser->m_position.columnNumber;
2371 }
2372 
2373 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2374 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2375   if (parser != NULL)
2376     FREE(parser, model);
2377 }
2378 
2379 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2380 XML_MemMalloc(XML_Parser parser, size_t size) {
2381   if (parser == NULL)
2382     return NULL;
2383   return MALLOC(parser, size);
2384 }
2385 
2386 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2387 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2388   if (parser == NULL)
2389     return NULL;
2390   return REALLOC(parser, ptr, size);
2391 }
2392 
2393 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2394 XML_MemFree(XML_Parser parser, void *ptr) {
2395   if (parser != NULL)
2396     FREE(parser, ptr);
2397 }
2398 
2399 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2400 XML_DefaultCurrent(XML_Parser parser) {
2401   if (parser == NULL)
2402     return;
2403   if (parser->m_defaultHandler) {
2404     if (parser->m_openInternalEntities)
2405       reportDefault(parser, parser->m_internalEncoding,
2406                     parser->m_openInternalEntities->internalEventPtr,
2407                     parser->m_openInternalEntities->internalEventEndPtr);
2408     else
2409       reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2410                     parser->m_eventEndPtr);
2411   }
2412 }
2413 
2414 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2415 XML_ErrorString(enum XML_Error code) {
2416   switch (code) {
2417   case XML_ERROR_NONE:
2418     return NULL;
2419   case XML_ERROR_NO_MEMORY:
2420     return XML_L("out of memory");
2421   case XML_ERROR_SYNTAX:
2422     return XML_L("syntax error");
2423   case XML_ERROR_NO_ELEMENTS:
2424     return XML_L("no element found");
2425   case XML_ERROR_INVALID_TOKEN:
2426     return XML_L("not well-formed (invalid token)");
2427   case XML_ERROR_UNCLOSED_TOKEN:
2428     return XML_L("unclosed token");
2429   case XML_ERROR_PARTIAL_CHAR:
2430     return XML_L("partial character");
2431   case XML_ERROR_TAG_MISMATCH:
2432     return XML_L("mismatched tag");
2433   case XML_ERROR_DUPLICATE_ATTRIBUTE:
2434     return XML_L("duplicate attribute");
2435   case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2436     return XML_L("junk after document element");
2437   case XML_ERROR_PARAM_ENTITY_REF:
2438     return XML_L("illegal parameter entity reference");
2439   case XML_ERROR_UNDEFINED_ENTITY:
2440     return XML_L("undefined entity");
2441   case XML_ERROR_RECURSIVE_ENTITY_REF:
2442     return XML_L("recursive entity reference");
2443   case XML_ERROR_ASYNC_ENTITY:
2444     return XML_L("asynchronous entity");
2445   case XML_ERROR_BAD_CHAR_REF:
2446     return XML_L("reference to invalid character number");
2447   case XML_ERROR_BINARY_ENTITY_REF:
2448     return XML_L("reference to binary entity");
2449   case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2450     return XML_L("reference to external entity in attribute");
2451   case XML_ERROR_MISPLACED_XML_PI:
2452     return XML_L("XML or text declaration not at start of entity");
2453   case XML_ERROR_UNKNOWN_ENCODING:
2454     return XML_L("unknown encoding");
2455   case XML_ERROR_INCORRECT_ENCODING:
2456     return XML_L("encoding specified in XML declaration is incorrect");
2457   case XML_ERROR_UNCLOSED_CDATA_SECTION:
2458     return XML_L("unclosed CDATA section");
2459   case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2460     return XML_L("error in processing external entity reference");
2461   case XML_ERROR_NOT_STANDALONE:
2462     return XML_L("document is not standalone");
2463   case XML_ERROR_UNEXPECTED_STATE:
2464     return XML_L("unexpected parser state - please send a bug report");
2465   case XML_ERROR_ENTITY_DECLARED_IN_PE:
2466     return XML_L("entity declared in parameter entity");
2467   case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2468     return XML_L("requested feature requires XML_DTD support in Expat");
2469   case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2470     return XML_L("cannot change setting once parsing has begun");
2471   /* Added in 1.95.7. */
2472   case XML_ERROR_UNBOUND_PREFIX:
2473     return XML_L("unbound prefix");
2474   /* Added in 1.95.8. */
2475   case XML_ERROR_UNDECLARING_PREFIX:
2476     return XML_L("must not undeclare prefix");
2477   case XML_ERROR_INCOMPLETE_PE:
2478     return XML_L("incomplete markup in parameter entity");
2479   case XML_ERROR_XML_DECL:
2480     return XML_L("XML declaration not well-formed");
2481   case XML_ERROR_TEXT_DECL:
2482     return XML_L("text declaration not well-formed");
2483   case XML_ERROR_PUBLICID:
2484     return XML_L("illegal character(s) in public id");
2485   case XML_ERROR_SUSPENDED:
2486     return XML_L("parser suspended");
2487   case XML_ERROR_NOT_SUSPENDED:
2488     return XML_L("parser not suspended");
2489   case XML_ERROR_ABORTED:
2490     return XML_L("parsing aborted");
2491   case XML_ERROR_FINISHED:
2492     return XML_L("parsing finished");
2493   case XML_ERROR_SUSPEND_PE:
2494     return XML_L("cannot suspend in external parameter entity");
2495   /* Added in 2.0.0. */
2496   case XML_ERROR_RESERVED_PREFIX_XML:
2497     return XML_L(
2498         "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2499   case XML_ERROR_RESERVED_PREFIX_XMLNS:
2500     return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2501   case XML_ERROR_RESERVED_NAMESPACE_URI:
2502     return XML_L(
2503         "prefix must not be bound to one of the reserved namespace names");
2504   /* Added in 2.2.5. */
2505   case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2506     return XML_L("invalid argument");
2507     /* Added in 2.3.0. */
2508   case XML_ERROR_NO_BUFFER:
2509     return XML_L(
2510         "a successful prior call to function XML_GetBuffer is required");
2511   /* Added in 2.4.0. */
2512   case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2513     return XML_L(
2514         "limit on input amplification factor (from DTD and entities) breached");
2515   }
2516   return NULL;
2517 }
2518 
2519 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2520 XML_ExpatVersion(void) {
2521   /* V1 is used to string-ize the version number. However, it would
2522      string-ize the actual version macro *names* unless we get them
2523      substituted before being passed to V1. CPP is defined to expand
2524      a macro, then rescan for more expansions. Thus, we use V2 to expand
2525      the version macros, then CPP will expand the resulting V1() macro
2526      with the correct numerals. */
2527   /* ### I'm assuming cpp is portable in this respect... */
2528 
2529 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2530 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2531 
2532   return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2533 
2534 #undef V1
2535 #undef V2
2536 }
2537 
2538 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2539 XML_ExpatVersionInfo(void) {
2540   XML_Expat_Version version;
2541 
2542   version.major = XML_MAJOR_VERSION;
2543   version.minor = XML_MINOR_VERSION;
2544   version.micro = XML_MICRO_VERSION;
2545 
2546   return version;
2547 }
2548 
2549 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2550 XML_GetFeatureList(void) {
2551   static const XML_Feature features[] = {
2552       {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2553        sizeof(XML_Char)},
2554       {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2555        sizeof(XML_LChar)},
2556 #ifdef XML_UNICODE
2557       {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2558 #endif
2559 #ifdef XML_UNICODE_WCHAR_T
2560       {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2561 #endif
2562 #ifdef XML_DTD
2563       {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2564 #endif
2565 #if XML_CONTEXT_BYTES > 0
2566       {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2567        XML_CONTEXT_BYTES},
2568 #endif
2569 #ifdef XML_MIN_SIZE
2570       {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2571 #endif
2572 #ifdef XML_NS
2573       {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2574 #endif
2575 #ifdef XML_LARGE_SIZE
2576       {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2577 #endif
2578 #ifdef XML_ATTR_INFO
2579       {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2580 #endif
2581 #if XML_GE == 1
2582       /* Added in Expat 2.4.0 for XML_DTD defined and
2583        * added in Expat 2.6.0 for XML_GE == 1. */
2584       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2585        XML_L("XML_BLAP_MAX_AMP"),
2586        (long int)
2587            EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2588       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2589        XML_L("XML_BLAP_ACT_THRES"),
2590        EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2591       /* Added in Expat 2.6.0. */
2592       {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2593 #endif
2594       {XML_FEATURE_END, NULL, 0}};
2595 
2596   return features;
2597 }
2598 
2599 #if XML_GE == 1
2600 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2601 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2602     XML_Parser parser, float maximumAmplificationFactor) {
2603   if ((parser == NULL) || (parser->m_parentParser != NULL)
2604       || isnan(maximumAmplificationFactor)
2605       || (maximumAmplificationFactor < 1.0f)) {
2606     return XML_FALSE;
2607   }
2608   parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2609   return XML_TRUE;
2610 }
2611 
2612 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2613 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2614     XML_Parser parser, unsigned long long activationThresholdBytes) {
2615   if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2616     return XML_FALSE;
2617   }
2618   parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2619   return XML_TRUE;
2620 }
2621 #endif /* XML_GE == 1 */
2622 
2623 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2624 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2625   if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2626     parser->m_reparseDeferralEnabled = enabled;
2627     return XML_TRUE;
2628   }
2629   return XML_FALSE;
2630 }
2631 
2632 /* Initially tag->rawName always points into the parse buffer;
2633    for those TAG instances opened while the current parse buffer was
2634    processed, and not yet closed, we need to store tag->rawName in a more
2635    permanent location, since the parse buffer is about to be discarded.
2636 */
2637 static XML_Bool
storeRawNames(XML_Parser parser)2638 storeRawNames(XML_Parser parser) {
2639   TAG *tag = parser->m_tagStack;
2640   while (tag) {
2641     int bufSize;
2642     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2643     size_t rawNameLen;
2644     char *rawNameBuf = tag->buf + nameLen;
2645     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
2646        at the first entry that has already been copied; everything
2647        below it in the stack is already been accounted for in a
2648        previous call to this function.
2649     */
2650     if (tag->rawName == rawNameBuf)
2651       break;
2652     /* For reuse purposes we need to ensure that the
2653        size of tag->buf is a multiple of sizeof(XML_Char).
2654     */
2655     rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2656     /* Detect and prevent integer overflow. */
2657     if (rawNameLen > (size_t)INT_MAX - nameLen)
2658       return XML_FALSE;
2659     bufSize = nameLen + (int)rawNameLen;
2660     if (bufSize > tag->bufEnd - tag->buf) {
2661       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2662       if (temp == NULL)
2663         return XML_FALSE;
2664       /* if tag->name.str points to tag->buf (only when namespace
2665          processing is off) then we have to update it
2666       */
2667       if (tag->name.str == (XML_Char *)tag->buf)
2668         tag->name.str = (XML_Char *)temp;
2669       /* if tag->name.localPart is set (when namespace processing is on)
2670          then update it as well, since it will always point into tag->buf
2671       */
2672       if (tag->name.localPart)
2673         tag->name.localPart
2674             = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2675       tag->buf = temp;
2676       tag->bufEnd = temp + bufSize;
2677       rawNameBuf = temp + nameLen;
2678     }
2679     memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2680     tag->rawName = rawNameBuf;
2681     tag = tag->parent;
2682   }
2683   return XML_TRUE;
2684 }
2685 
2686 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2687 contentProcessor(XML_Parser parser, const char *start, const char *end,
2688                  const char **endPtr) {
2689   enum XML_Error result = doContent(
2690       parser, 0, parser->m_encoding, start, end, endPtr,
2691       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2692   if (result == XML_ERROR_NONE) {
2693     if (! storeRawNames(parser))
2694       return XML_ERROR_NO_MEMORY;
2695   }
2696   return result;
2697 }
2698 
2699 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2700 externalEntityInitProcessor(XML_Parser parser, const char *start,
2701                             const char *end, const char **endPtr) {
2702   enum XML_Error result = initializeEncoding(parser);
2703   if (result != XML_ERROR_NONE)
2704     return result;
2705   parser->m_processor = externalEntityInitProcessor2;
2706   return externalEntityInitProcessor2(parser, start, end, endPtr);
2707 }
2708 
2709 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2710 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2711                              const char *end, const char **endPtr) {
2712   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2713   int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2714   switch (tok) {
2715   case XML_TOK_BOM:
2716 #if XML_GE == 1
2717     if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2718                                   XML_ACCOUNT_DIRECT)) {
2719       accountingOnAbort(parser);
2720       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2721     }
2722 #endif /* XML_GE == 1 */
2723 
2724     /* If we are at the end of the buffer, this would cause the next stage,
2725        i.e. externalEntityInitProcessor3, to pass control directly to
2726        doContent (by detecting XML_TOK_NONE) without processing any xml text
2727        declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2728     */
2729     if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2730       *endPtr = next;
2731       return XML_ERROR_NONE;
2732     }
2733     start = next;
2734     break;
2735   case XML_TOK_PARTIAL:
2736     if (! parser->m_parsingStatus.finalBuffer) {
2737       *endPtr = start;
2738       return XML_ERROR_NONE;
2739     }
2740     parser->m_eventPtr = start;
2741     return XML_ERROR_UNCLOSED_TOKEN;
2742   case XML_TOK_PARTIAL_CHAR:
2743     if (! parser->m_parsingStatus.finalBuffer) {
2744       *endPtr = start;
2745       return XML_ERROR_NONE;
2746     }
2747     parser->m_eventPtr = start;
2748     return XML_ERROR_PARTIAL_CHAR;
2749   }
2750   parser->m_processor = externalEntityInitProcessor3;
2751   return externalEntityInitProcessor3(parser, start, end, endPtr);
2752 }
2753 
2754 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2755 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2756                              const char *end, const char **endPtr) {
2757   int tok;
2758   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2759   parser->m_eventPtr = start;
2760   tok = XmlContentTok(parser->m_encoding, start, end, &next);
2761   /* Note: These bytes are accounted later in:
2762            - processXmlDecl
2763            - externalEntityContentProcessor
2764   */
2765   parser->m_eventEndPtr = next;
2766 
2767   switch (tok) {
2768   case XML_TOK_XML_DECL: {
2769     enum XML_Error result;
2770     result = processXmlDecl(parser, 1, start, next);
2771     if (result != XML_ERROR_NONE)
2772       return result;
2773     switch (parser->m_parsingStatus.parsing) {
2774     case XML_SUSPENDED:
2775       *endPtr = next;
2776       return XML_ERROR_NONE;
2777     case XML_FINISHED:
2778       return XML_ERROR_ABORTED;
2779     default:
2780       start = next;
2781     }
2782   } break;
2783   case XML_TOK_PARTIAL:
2784     if (! parser->m_parsingStatus.finalBuffer) {
2785       *endPtr = start;
2786       return XML_ERROR_NONE;
2787     }
2788     return XML_ERROR_UNCLOSED_TOKEN;
2789   case XML_TOK_PARTIAL_CHAR:
2790     if (! parser->m_parsingStatus.finalBuffer) {
2791       *endPtr = start;
2792       return XML_ERROR_NONE;
2793     }
2794     return XML_ERROR_PARTIAL_CHAR;
2795   }
2796   parser->m_processor = externalEntityContentProcessor;
2797   parser->m_tagLevel = 1;
2798   return externalEntityContentProcessor(parser, start, end, endPtr);
2799 }
2800 
2801 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2802 externalEntityContentProcessor(XML_Parser parser, const char *start,
2803                                const char *end, const char **endPtr) {
2804   enum XML_Error result
2805       = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2806                   (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2807                   XML_ACCOUNT_ENTITY_EXPANSION);
2808   if (result == XML_ERROR_NONE) {
2809     if (! storeRawNames(parser))
2810       return XML_ERROR_NO_MEMORY;
2811   }
2812   return result;
2813 }
2814 
2815 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2816 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2817           const char *s, const char *end, const char **nextPtr,
2818           XML_Bool haveMore, enum XML_Account account) {
2819   /* save one level of indirection */
2820   DTD *const dtd = parser->m_dtd;
2821 
2822   const char **eventPP;
2823   const char **eventEndPP;
2824   if (enc == parser->m_encoding) {
2825     eventPP = &parser->m_eventPtr;
2826     eventEndPP = &parser->m_eventEndPtr;
2827   } else {
2828     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2829     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2830   }
2831   *eventPP = s;
2832 
2833   for (;;) {
2834     const char *next = s; /* XmlContentTok doesn't always set the last arg */
2835     int tok = XmlContentTok(enc, s, end, &next);
2836 #if XML_GE == 1
2837     const char *accountAfter
2838         = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2839               ? (haveMore ? s /* i.e. 0 bytes */ : end)
2840               : next;
2841     if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2842                                   account)) {
2843       accountingOnAbort(parser);
2844       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2845     }
2846 #endif
2847     *eventEndPP = next;
2848     switch (tok) {
2849     case XML_TOK_TRAILING_CR:
2850       if (haveMore) {
2851         *nextPtr = s;
2852         return XML_ERROR_NONE;
2853       }
2854       *eventEndPP = end;
2855       if (parser->m_characterDataHandler) {
2856         XML_Char c = 0xA;
2857         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2858       } else if (parser->m_defaultHandler)
2859         reportDefault(parser, enc, s, end);
2860       /* We are at the end of the final buffer, should we check for
2861          XML_SUSPENDED, XML_FINISHED?
2862       */
2863       if (startTagLevel == 0)
2864         return XML_ERROR_NO_ELEMENTS;
2865       if (parser->m_tagLevel != startTagLevel)
2866         return XML_ERROR_ASYNC_ENTITY;
2867       *nextPtr = end;
2868       return XML_ERROR_NONE;
2869     case XML_TOK_NONE:
2870       if (haveMore) {
2871         *nextPtr = s;
2872         return XML_ERROR_NONE;
2873       }
2874       if (startTagLevel > 0) {
2875         if (parser->m_tagLevel != startTagLevel)
2876           return XML_ERROR_ASYNC_ENTITY;
2877         *nextPtr = s;
2878         return XML_ERROR_NONE;
2879       }
2880       return XML_ERROR_NO_ELEMENTS;
2881     case XML_TOK_INVALID:
2882       *eventPP = next;
2883       return XML_ERROR_INVALID_TOKEN;
2884     case XML_TOK_PARTIAL:
2885       if (haveMore) {
2886         *nextPtr = s;
2887         return XML_ERROR_NONE;
2888       }
2889       return XML_ERROR_UNCLOSED_TOKEN;
2890     case XML_TOK_PARTIAL_CHAR:
2891       if (haveMore) {
2892         *nextPtr = s;
2893         return XML_ERROR_NONE;
2894       }
2895       return XML_ERROR_PARTIAL_CHAR;
2896     case XML_TOK_ENTITY_REF: {
2897       const XML_Char *name;
2898       ENTITY *entity;
2899       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2900           enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2901       if (ch) {
2902 #if XML_GE == 1
2903         /* NOTE: We are replacing 4-6 characters original input for 1 character
2904          *       so there is no amplification and hence recording without
2905          *       protection. */
2906         accountingDiffTolerated(parser, tok, (char *)&ch,
2907                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2908                                 XML_ACCOUNT_ENTITY_EXPANSION);
2909 #endif /* XML_GE == 1 */
2910         if (parser->m_characterDataHandler)
2911           parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2912         else if (parser->m_defaultHandler)
2913           reportDefault(parser, enc, s, next);
2914         break;
2915       }
2916       name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2917                              next - enc->minBytesPerChar);
2918       if (! name)
2919         return XML_ERROR_NO_MEMORY;
2920       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2921       poolDiscard(&dtd->pool);
2922       /* First, determine if a check for an existing declaration is needed;
2923          if yes, check that the entity exists, and that it is internal,
2924          otherwise call the skipped entity or default handler.
2925       */
2926       if (! dtd->hasParamEntityRefs || dtd->standalone) {
2927         if (! entity)
2928           return XML_ERROR_UNDEFINED_ENTITY;
2929         else if (! entity->is_internal)
2930           return XML_ERROR_ENTITY_DECLARED_IN_PE;
2931       } else if (! entity) {
2932         if (parser->m_skippedEntityHandler)
2933           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2934         else if (parser->m_defaultHandler)
2935           reportDefault(parser, enc, s, next);
2936         break;
2937       }
2938       if (entity->open)
2939         return XML_ERROR_RECURSIVE_ENTITY_REF;
2940       if (entity->notation)
2941         return XML_ERROR_BINARY_ENTITY_REF;
2942       if (entity->textPtr) {
2943         enum XML_Error result;
2944         if (! parser->m_defaultExpandInternalEntities) {
2945           if (parser->m_skippedEntityHandler)
2946             parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2947                                            0);
2948           else if (parser->m_defaultHandler)
2949             reportDefault(parser, enc, s, next);
2950           break;
2951         }
2952         result = processInternalEntity(parser, entity, XML_FALSE);
2953         if (result != XML_ERROR_NONE)
2954           return result;
2955       } else if (parser->m_externalEntityRefHandler) {
2956         const XML_Char *context;
2957         entity->open = XML_TRUE;
2958         context = getContext(parser);
2959         entity->open = XML_FALSE;
2960         if (! context)
2961           return XML_ERROR_NO_MEMORY;
2962         if (! parser->m_externalEntityRefHandler(
2963                 parser->m_externalEntityRefHandlerArg, context, entity->base,
2964                 entity->systemId, entity->publicId))
2965           return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2966         poolDiscard(&parser->m_tempPool);
2967       } else if (parser->m_defaultHandler)
2968         reportDefault(parser, enc, s, next);
2969       break;
2970     }
2971     case XML_TOK_START_TAG_NO_ATTS:
2972       /* fall through */
2973     case XML_TOK_START_TAG_WITH_ATTS: {
2974       TAG *tag;
2975       enum XML_Error result;
2976       XML_Char *toPtr;
2977       if (parser->m_freeTagList) {
2978         tag = parser->m_freeTagList;
2979         parser->m_freeTagList = parser->m_freeTagList->parent;
2980       } else {
2981         tag = (TAG *)MALLOC(parser, sizeof(TAG));
2982         if (! tag)
2983           return XML_ERROR_NO_MEMORY;
2984         tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2985         if (! tag->buf) {
2986           FREE(parser, tag);
2987           return XML_ERROR_NO_MEMORY;
2988         }
2989         tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2990       }
2991       tag->bindings = NULL;
2992       tag->parent = parser->m_tagStack;
2993       parser->m_tagStack = tag;
2994       tag->name.localPart = NULL;
2995       tag->name.prefix = NULL;
2996       tag->rawName = s + enc->minBytesPerChar;
2997       tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2998       ++parser->m_tagLevel;
2999       {
3000         const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3001         const char *fromPtr = tag->rawName;
3002         toPtr = (XML_Char *)tag->buf;
3003         for (;;) {
3004           int bufSize;
3005           int convLen;
3006           const enum XML_Convert_Result convert_res
3007               = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3008                            (ICHAR *)tag->bufEnd - 1);
3009           convLen = (int)(toPtr - (XML_Char *)tag->buf);
3010           if ((fromPtr >= rawNameEnd)
3011               || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3012             tag->name.strLen = convLen;
3013             break;
3014           }
3015           bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3016           {
3017             char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3018             if (temp == NULL)
3019               return XML_ERROR_NO_MEMORY;
3020             tag->buf = temp;
3021             tag->bufEnd = temp + bufSize;
3022             toPtr = (XML_Char *)temp + convLen;
3023           }
3024         }
3025       }
3026       tag->name.str = (XML_Char *)tag->buf;
3027       *toPtr = XML_T('\0');
3028       result
3029           = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3030       if (result)
3031         return result;
3032       if (parser->m_startElementHandler)
3033         parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3034                                       (const XML_Char **)parser->m_atts);
3035       else if (parser->m_defaultHandler)
3036         reportDefault(parser, enc, s, next);
3037       poolClear(&parser->m_tempPool);
3038       break;
3039     }
3040     case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3041       /* fall through */
3042     case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3043       const char *rawName = s + enc->minBytesPerChar;
3044       enum XML_Error result;
3045       BINDING *bindings = NULL;
3046       XML_Bool noElmHandlers = XML_TRUE;
3047       TAG_NAME name;
3048       name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3049                                  rawName + XmlNameLength(enc, rawName));
3050       if (! name.str)
3051         return XML_ERROR_NO_MEMORY;
3052       poolFinish(&parser->m_tempPool);
3053       result = storeAtts(parser, enc, s, &name, &bindings,
3054                          XML_ACCOUNT_NONE /* token spans whole start tag */);
3055       if (result != XML_ERROR_NONE) {
3056         freeBindings(parser, bindings);
3057         return result;
3058       }
3059       poolFinish(&parser->m_tempPool);
3060       if (parser->m_startElementHandler) {
3061         parser->m_startElementHandler(parser->m_handlerArg, name.str,
3062                                       (const XML_Char **)parser->m_atts);
3063         noElmHandlers = XML_FALSE;
3064       }
3065       if (parser->m_endElementHandler) {
3066         if (parser->m_startElementHandler)
3067           *eventPP = *eventEndPP;
3068         parser->m_endElementHandler(parser->m_handlerArg, name.str);
3069         noElmHandlers = XML_FALSE;
3070       }
3071       if (noElmHandlers && parser->m_defaultHandler)
3072         reportDefault(parser, enc, s, next);
3073       poolClear(&parser->m_tempPool);
3074       freeBindings(parser, bindings);
3075     }
3076       if ((parser->m_tagLevel == 0)
3077           && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3078         if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3079           parser->m_processor = epilogProcessor;
3080         else
3081           return epilogProcessor(parser, next, end, nextPtr);
3082       }
3083       break;
3084     case XML_TOK_END_TAG:
3085       if (parser->m_tagLevel == startTagLevel)
3086         return XML_ERROR_ASYNC_ENTITY;
3087       else {
3088         int len;
3089         const char *rawName;
3090         TAG *tag = parser->m_tagStack;
3091         rawName = s + enc->minBytesPerChar * 2;
3092         len = XmlNameLength(enc, rawName);
3093         if (len != tag->rawNameLength
3094             || memcmp(tag->rawName, rawName, len) != 0) {
3095           *eventPP = rawName;
3096           return XML_ERROR_TAG_MISMATCH;
3097         }
3098         parser->m_tagStack = tag->parent;
3099         tag->parent = parser->m_freeTagList;
3100         parser->m_freeTagList = tag;
3101         --parser->m_tagLevel;
3102         if (parser->m_endElementHandler) {
3103           const XML_Char *localPart;
3104           const XML_Char *prefix;
3105           XML_Char *uri;
3106           localPart = tag->name.localPart;
3107           if (parser->m_ns && localPart) {
3108             /* localPart and prefix may have been overwritten in
3109                tag->name.str, since this points to the binding->uri
3110                buffer which gets reused; so we have to add them again
3111             */
3112             uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3113             /* don't need to check for space - already done in storeAtts() */
3114             while (*localPart)
3115               *uri++ = *localPart++;
3116             prefix = tag->name.prefix;
3117             if (parser->m_ns_triplets && prefix) {
3118               *uri++ = parser->m_namespaceSeparator;
3119               while (*prefix)
3120                 *uri++ = *prefix++;
3121             }
3122             *uri = XML_T('\0');
3123           }
3124           parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3125         } else if (parser->m_defaultHandler)
3126           reportDefault(parser, enc, s, next);
3127         while (tag->bindings) {
3128           BINDING *b = tag->bindings;
3129           if (parser->m_endNamespaceDeclHandler)
3130             parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3131                                               b->prefix->name);
3132           tag->bindings = tag->bindings->nextTagBinding;
3133           b->nextTagBinding = parser->m_freeBindingList;
3134           parser->m_freeBindingList = b;
3135           b->prefix->binding = b->prevPrefixBinding;
3136         }
3137         if ((parser->m_tagLevel == 0)
3138             && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3139           if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3140             parser->m_processor = epilogProcessor;
3141           else
3142             return epilogProcessor(parser, next, end, nextPtr);
3143         }
3144       }
3145       break;
3146     case XML_TOK_CHAR_REF: {
3147       int n = XmlCharRefNumber(enc, s);
3148       if (n < 0)
3149         return XML_ERROR_BAD_CHAR_REF;
3150       if (parser->m_characterDataHandler) {
3151         XML_Char buf[XML_ENCODE_MAX];
3152         parser->m_characterDataHandler(parser->m_handlerArg, buf,
3153                                        XmlEncode(n, (ICHAR *)buf));
3154       } else if (parser->m_defaultHandler)
3155         reportDefault(parser, enc, s, next);
3156     } break;
3157     case XML_TOK_XML_DECL:
3158       return XML_ERROR_MISPLACED_XML_PI;
3159     case XML_TOK_DATA_NEWLINE:
3160       if (parser->m_characterDataHandler) {
3161         XML_Char c = 0xA;
3162         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3163       } else if (parser->m_defaultHandler)
3164         reportDefault(parser, enc, s, next);
3165       break;
3166     case XML_TOK_CDATA_SECT_OPEN: {
3167       enum XML_Error result;
3168       if (parser->m_startCdataSectionHandler)
3169         parser->m_startCdataSectionHandler(parser->m_handlerArg);
3170       /* BEGIN disabled code */
3171       /* Suppose you doing a transformation on a document that involves
3172          changing only the character data.  You set up a defaultHandler
3173          and a characterDataHandler.  The defaultHandler simply copies
3174          characters through.  The characterDataHandler does the
3175          transformation and writes the characters out escaping them as
3176          necessary.  This case will fail to work if we leave out the
3177          following two lines (because & and < inside CDATA sections will
3178          be incorrectly escaped).
3179 
3180          However, now we have a start/endCdataSectionHandler, so it seems
3181          easier to let the user deal with this.
3182       */
3183       else if ((0) && parser->m_characterDataHandler)
3184         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3185                                        0);
3186       /* END disabled code */
3187       else if (parser->m_defaultHandler)
3188         reportDefault(parser, enc, s, next);
3189       result
3190           = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3191       if (result != XML_ERROR_NONE)
3192         return result;
3193       else if (! next) {
3194         parser->m_processor = cdataSectionProcessor;
3195         return result;
3196       }
3197     } break;
3198     case XML_TOK_TRAILING_RSQB:
3199       if (haveMore) {
3200         *nextPtr = s;
3201         return XML_ERROR_NONE;
3202       }
3203       if (parser->m_characterDataHandler) {
3204         if (MUST_CONVERT(enc, s)) {
3205           ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3206           XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3207           parser->m_characterDataHandler(
3208               parser->m_handlerArg, parser->m_dataBuf,
3209               (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3210         } else
3211           parser->m_characterDataHandler(
3212               parser->m_handlerArg, (const XML_Char *)s,
3213               (int)((const XML_Char *)end - (const XML_Char *)s));
3214       } else if (parser->m_defaultHandler)
3215         reportDefault(parser, enc, s, end);
3216       /* We are at the end of the final buffer, should we check for
3217          XML_SUSPENDED, XML_FINISHED?
3218       */
3219       if (startTagLevel == 0) {
3220         *eventPP = end;
3221         return XML_ERROR_NO_ELEMENTS;
3222       }
3223       if (parser->m_tagLevel != startTagLevel) {
3224         *eventPP = end;
3225         return XML_ERROR_ASYNC_ENTITY;
3226       }
3227       *nextPtr = end;
3228       return XML_ERROR_NONE;
3229     case XML_TOK_DATA_CHARS: {
3230       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3231       if (charDataHandler) {
3232         if (MUST_CONVERT(enc, s)) {
3233           for (;;) {
3234             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3235             const enum XML_Convert_Result convert_res = XmlConvert(
3236                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3237             *eventEndPP = s;
3238             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3239                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3240             if ((convert_res == XML_CONVERT_COMPLETED)
3241                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3242               break;
3243             *eventPP = s;
3244           }
3245         } else
3246           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3247                           (int)((const XML_Char *)next - (const XML_Char *)s));
3248       } else if (parser->m_defaultHandler)
3249         reportDefault(parser, enc, s, next);
3250     } break;
3251     case XML_TOK_PI:
3252       if (! reportProcessingInstruction(parser, enc, s, next))
3253         return XML_ERROR_NO_MEMORY;
3254       break;
3255     case XML_TOK_COMMENT:
3256       if (! reportComment(parser, enc, s, next))
3257         return XML_ERROR_NO_MEMORY;
3258       break;
3259     default:
3260       /* All of the tokens produced by XmlContentTok() have their own
3261        * explicit cases, so this default is not strictly necessary.
3262        * However it is a useful safety net, so we retain the code and
3263        * simply exclude it from the coverage tests.
3264        *
3265        * LCOV_EXCL_START
3266        */
3267       if (parser->m_defaultHandler)
3268         reportDefault(parser, enc, s, next);
3269       break;
3270       /* LCOV_EXCL_STOP */
3271     }
3272     *eventPP = s = next;
3273     switch (parser->m_parsingStatus.parsing) {
3274     case XML_SUSPENDED:
3275       *nextPtr = next;
3276       return XML_ERROR_NONE;
3277     case XML_FINISHED:
3278       return XML_ERROR_ABORTED;
3279     default:;
3280     }
3281   }
3282   /* not reached */
3283 }
3284 
3285 /* This function does not call free() on the allocated memory, merely
3286  * moving it to the parser's m_freeBindingList where it can be freed or
3287  * reused as appropriate.
3288  */
3289 static void
freeBindings(XML_Parser parser,BINDING * bindings)3290 freeBindings(XML_Parser parser, BINDING *bindings) {
3291   while (bindings) {
3292     BINDING *b = bindings;
3293 
3294     /* m_startNamespaceDeclHandler will have been called for this
3295      * binding in addBindings(), so call the end handler now.
3296      */
3297     if (parser->m_endNamespaceDeclHandler)
3298       parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3299 
3300     bindings = bindings->nextTagBinding;
3301     b->nextTagBinding = parser->m_freeBindingList;
3302     parser->m_freeBindingList = b;
3303     b->prefix->binding = b->prevPrefixBinding;
3304   }
3305 }
3306 
3307 /* Precondition: all arguments must be non-NULL;
3308    Purpose:
3309    - normalize attributes
3310    - check attributes for well-formedness
3311    - generate namespace aware attribute names (URI, prefix)
3312    - build list of attributes for startElementHandler
3313    - default attributes
3314    - process namespace declarations (check and report them)
3315    - generate namespace aware element name (URI, prefix)
3316 */
3317 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3318 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3319           TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3320           enum XML_Account account) {
3321   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3322   ELEMENT_TYPE *elementType;
3323   int nDefaultAtts;
3324   const XML_Char **appAtts; /* the attribute list for the application */
3325   int attIndex = 0;
3326   int prefixLen;
3327   int i;
3328   int n;
3329   XML_Char *uri;
3330   int nPrefixes = 0;
3331   BINDING *binding;
3332   const XML_Char *localPart;
3333 
3334   /* lookup the element type name */
3335   elementType
3336       = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3337   if (! elementType) {
3338     const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3339     if (! name)
3340       return XML_ERROR_NO_MEMORY;
3341     elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3342                                          sizeof(ELEMENT_TYPE));
3343     if (! elementType)
3344       return XML_ERROR_NO_MEMORY;
3345     if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3346       return XML_ERROR_NO_MEMORY;
3347   }
3348   nDefaultAtts = elementType->nDefaultAtts;
3349 
3350   /* get the attributes from the tokenizer */
3351   n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3352 
3353   /* Detect and prevent integer overflow */
3354   if (n > INT_MAX - nDefaultAtts) {
3355     return XML_ERROR_NO_MEMORY;
3356   }
3357 
3358   if (n + nDefaultAtts > parser->m_attsSize) {
3359     int oldAttsSize = parser->m_attsSize;
3360     ATTRIBUTE *temp;
3361 #ifdef XML_ATTR_INFO
3362     XML_AttrInfo *temp2;
3363 #endif
3364 
3365     /* Detect and prevent integer overflow */
3366     if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3367         || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3368       return XML_ERROR_NO_MEMORY;
3369     }
3370 
3371     parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3372 
3373     /* Detect and prevent integer overflow.
3374      * The preprocessor guard addresses the "always false" warning
3375      * from -Wtype-limits on platforms where
3376      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3377 #if UINT_MAX >= SIZE_MAX
3378     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3379       parser->m_attsSize = oldAttsSize;
3380       return XML_ERROR_NO_MEMORY;
3381     }
3382 #endif
3383 
3384     temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3385                                 parser->m_attsSize * sizeof(ATTRIBUTE));
3386     if (temp == NULL) {
3387       parser->m_attsSize = oldAttsSize;
3388       return XML_ERROR_NO_MEMORY;
3389     }
3390     parser->m_atts = temp;
3391 #ifdef XML_ATTR_INFO
3392     /* Detect and prevent integer overflow.
3393      * The preprocessor guard addresses the "always false" warning
3394      * from -Wtype-limits on platforms where
3395      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3396 #  if UINT_MAX >= SIZE_MAX
3397     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3398       parser->m_attsSize = oldAttsSize;
3399       return XML_ERROR_NO_MEMORY;
3400     }
3401 #  endif
3402 
3403     temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3404                                     parser->m_attsSize * sizeof(XML_AttrInfo));
3405     if (temp2 == NULL) {
3406       parser->m_attsSize = oldAttsSize;
3407       return XML_ERROR_NO_MEMORY;
3408     }
3409     parser->m_attInfo = temp2;
3410 #endif
3411     if (n > oldAttsSize)
3412       XmlGetAttributes(enc, attStr, n, parser->m_atts);
3413   }
3414 
3415   appAtts = (const XML_Char **)parser->m_atts;
3416   for (i = 0; i < n; i++) {
3417     ATTRIBUTE *currAtt = &parser->m_atts[i];
3418 #ifdef XML_ATTR_INFO
3419     XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3420 #endif
3421     /* add the name and value to the attribute list */
3422     ATTRIBUTE_ID *attId
3423         = getAttributeId(parser, enc, currAtt->name,
3424                          currAtt->name + XmlNameLength(enc, currAtt->name));
3425     if (! attId)
3426       return XML_ERROR_NO_MEMORY;
3427 #ifdef XML_ATTR_INFO
3428     currAttInfo->nameStart
3429         = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3430     currAttInfo->nameEnd
3431         = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3432     currAttInfo->valueStart = parser->m_parseEndByteIndex
3433                               - (parser->m_parseEndPtr - currAtt->valuePtr);
3434     currAttInfo->valueEnd = parser->m_parseEndByteIndex
3435                             - (parser->m_parseEndPtr - currAtt->valueEnd);
3436 #endif
3437     /* Detect duplicate attributes by their QNames. This does not work when
3438        namespace processing is turned on and different prefixes for the same
3439        namespace are used. For this case we have a check further down.
3440     */
3441     if ((attId->name)[-1]) {
3442       if (enc == parser->m_encoding)
3443         parser->m_eventPtr = parser->m_atts[i].name;
3444       return XML_ERROR_DUPLICATE_ATTRIBUTE;
3445     }
3446     (attId->name)[-1] = 1;
3447     appAtts[attIndex++] = attId->name;
3448     if (! parser->m_atts[i].normalized) {
3449       enum XML_Error result;
3450       XML_Bool isCdata = XML_TRUE;
3451 
3452       /* figure out whether declared as other than CDATA */
3453       if (attId->maybeTokenized) {
3454         int j;
3455         for (j = 0; j < nDefaultAtts; j++) {
3456           if (attId == elementType->defaultAtts[j].id) {
3457             isCdata = elementType->defaultAtts[j].isCdata;
3458             break;
3459           }
3460         }
3461       }
3462 
3463       /* normalize the attribute value */
3464       result = storeAttributeValue(
3465           parser, enc, isCdata, parser->m_atts[i].valuePtr,
3466           parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3467       if (result)
3468         return result;
3469       appAtts[attIndex] = poolStart(&parser->m_tempPool);
3470       poolFinish(&parser->m_tempPool);
3471     } else {
3472       /* the value did not need normalizing */
3473       appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3474                                           parser->m_atts[i].valuePtr,
3475                                           parser->m_atts[i].valueEnd);
3476       if (appAtts[attIndex] == 0)
3477         return XML_ERROR_NO_MEMORY;
3478       poolFinish(&parser->m_tempPool);
3479     }
3480     /* handle prefixed attribute names */
3481     if (attId->prefix) {
3482       if (attId->xmlns) {
3483         /* deal with namespace declarations here */
3484         enum XML_Error result = addBinding(parser, attId->prefix, attId,
3485                                            appAtts[attIndex], bindingsPtr);
3486         if (result)
3487           return result;
3488         --attIndex;
3489       } else {
3490         /* deal with other prefixed names later */
3491         attIndex++;
3492         nPrefixes++;
3493         (attId->name)[-1] = 2;
3494       }
3495     } else
3496       attIndex++;
3497   }
3498 
3499   /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3500   parser->m_nSpecifiedAtts = attIndex;
3501   if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3502     for (i = 0; i < attIndex; i += 2)
3503       if (appAtts[i] == elementType->idAtt->name) {
3504         parser->m_idAttIndex = i;
3505         break;
3506       }
3507   } else
3508     parser->m_idAttIndex = -1;
3509 
3510   /* do attribute defaulting */
3511   for (i = 0; i < nDefaultAtts; i++) {
3512     const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3513     if (! (da->id->name)[-1] && da->value) {
3514       if (da->id->prefix) {
3515         if (da->id->xmlns) {
3516           enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3517                                              da->value, bindingsPtr);
3518           if (result)
3519             return result;
3520         } else {
3521           (da->id->name)[-1] = 2;
3522           nPrefixes++;
3523           appAtts[attIndex++] = da->id->name;
3524           appAtts[attIndex++] = da->value;
3525         }
3526       } else {
3527         (da->id->name)[-1] = 1;
3528         appAtts[attIndex++] = da->id->name;
3529         appAtts[attIndex++] = da->value;
3530       }
3531     }
3532   }
3533   appAtts[attIndex] = 0;
3534 
3535   /* expand prefixed attribute names, check for duplicates,
3536      and clear flags that say whether attributes were specified */
3537   i = 0;
3538   if (nPrefixes) {
3539     int j; /* hash table index */
3540     unsigned long version = parser->m_nsAttsVersion;
3541 
3542     /* Detect and prevent invalid shift */
3543     if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3544       return XML_ERROR_NO_MEMORY;
3545     }
3546 
3547     unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3548     unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3549     /* size of hash table must be at least 2 * (# of prefixed attributes) */
3550     if ((nPrefixes << 1)
3551         >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3552       NS_ATT *temp;
3553       /* hash table size must also be a power of 2 and >= 8 */
3554       while (nPrefixes >> parser->m_nsAttsPower++)
3555         ;
3556       if (parser->m_nsAttsPower < 3)
3557         parser->m_nsAttsPower = 3;
3558 
3559       /* Detect and prevent invalid shift */
3560       if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3561         /* Restore actual size of memory in m_nsAtts */
3562         parser->m_nsAttsPower = oldNsAttsPower;
3563         return XML_ERROR_NO_MEMORY;
3564       }
3565 
3566       nsAttsSize = 1u << parser->m_nsAttsPower;
3567 
3568       /* Detect and prevent integer overflow.
3569        * The preprocessor guard addresses the "always false" warning
3570        * from -Wtype-limits on platforms where
3571        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3572 #if UINT_MAX >= SIZE_MAX
3573       if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3574         /* Restore actual size of memory in m_nsAtts */
3575         parser->m_nsAttsPower = oldNsAttsPower;
3576         return XML_ERROR_NO_MEMORY;
3577       }
3578 #endif
3579 
3580       temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3581                                nsAttsSize * sizeof(NS_ATT));
3582       if (! temp) {
3583         /* Restore actual size of memory in m_nsAtts */
3584         parser->m_nsAttsPower = oldNsAttsPower;
3585         return XML_ERROR_NO_MEMORY;
3586       }
3587       parser->m_nsAtts = temp;
3588       version = 0; /* force re-initialization of m_nsAtts hash table */
3589     }
3590     /* using a version flag saves us from initializing m_nsAtts every time */
3591     if (! version) { /* initialize version flags when version wraps around */
3592       version = INIT_ATTS_VERSION;
3593       for (j = nsAttsSize; j != 0;)
3594         parser->m_nsAtts[--j].version = version;
3595     }
3596     parser->m_nsAttsVersion = --version;
3597 
3598     /* expand prefixed names and check for duplicates */
3599     for (; i < attIndex; i += 2) {
3600       const XML_Char *s = appAtts[i];
3601       if (s[-1] == 2) { /* prefixed */
3602         ATTRIBUTE_ID *id;
3603         const BINDING *b;
3604         unsigned long uriHash;
3605         struct siphash sip_state;
3606         struct sipkey sip_key;
3607 
3608         copy_salt_to_sipkey(parser, &sip_key);
3609         sip24_init(&sip_state, &sip_key);
3610 
3611         ((XML_Char *)s)[-1] = 0; /* clear flag */
3612         id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3613         if (! id || ! id->prefix) {
3614           /* This code is walking through the appAtts array, dealing
3615            * with (in this case) a prefixed attribute name.  To be in
3616            * the array, the attribute must have already been bound, so
3617            * has to have passed through the hash table lookup once
3618            * already.  That implies that an entry for it already
3619            * exists, so the lookup above will return a pointer to
3620            * already allocated memory.  There is no opportunaity for
3621            * the allocator to fail, so the condition above cannot be
3622            * fulfilled.
3623            *
3624            * Since it is difficult to be certain that the above
3625            * analysis is complete, we retain the test and merely
3626            * remove the code from coverage tests.
3627            */
3628           return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3629         }
3630         b = id->prefix->binding;
3631         if (! b)
3632           return XML_ERROR_UNBOUND_PREFIX;
3633 
3634         for (j = 0; j < b->uriLen; j++) {
3635           const XML_Char c = b->uri[j];
3636           if (! poolAppendChar(&parser->m_tempPool, c))
3637             return XML_ERROR_NO_MEMORY;
3638         }
3639 
3640         sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3641 
3642         while (*s++ != XML_T(ASCII_COLON))
3643           ;
3644 
3645         sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3646 
3647         do { /* copies null terminator */
3648           if (! poolAppendChar(&parser->m_tempPool, *s))
3649             return XML_ERROR_NO_MEMORY;
3650         } while (*s++);
3651 
3652         uriHash = (unsigned long)sip24_final(&sip_state);
3653 
3654         { /* Check hash table for duplicate of expanded name (uriName).
3655              Derived from code in lookup(parser, HASH_TABLE *table, ...).
3656           */
3657           unsigned char step = 0;
3658           unsigned long mask = nsAttsSize - 1;
3659           j = uriHash & mask; /* index into hash table */
3660           while (parser->m_nsAtts[j].version == version) {
3661             /* for speed we compare stored hash values first */
3662             if (uriHash == parser->m_nsAtts[j].hash) {
3663               const XML_Char *s1 = poolStart(&parser->m_tempPool);
3664               const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3665               /* s1 is null terminated, but not s2 */
3666               for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3667                 ;
3668               if (*s1 == 0)
3669                 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3670             }
3671             if (! step)
3672               step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3673             j < step ? (j += nsAttsSize - step) : (j -= step);
3674           }
3675         }
3676 
3677         if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3678           parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3679           s = b->prefix->name;
3680           do {
3681             if (! poolAppendChar(&parser->m_tempPool, *s))
3682               return XML_ERROR_NO_MEMORY;
3683           } while (*s++);
3684         }
3685 
3686         /* store expanded name in attribute list */
3687         s = poolStart(&parser->m_tempPool);
3688         poolFinish(&parser->m_tempPool);
3689         appAtts[i] = s;
3690 
3691         /* fill empty slot with new version, uriName and hash value */
3692         parser->m_nsAtts[j].version = version;
3693         parser->m_nsAtts[j].hash = uriHash;
3694         parser->m_nsAtts[j].uriName = s;
3695 
3696         if (! --nPrefixes) {
3697           i += 2;
3698           break;
3699         }
3700       } else                     /* not prefixed */
3701         ((XML_Char *)s)[-1] = 0; /* clear flag */
3702     }
3703   }
3704   /* clear flags for the remaining attributes */
3705   for (; i < attIndex; i += 2)
3706     ((XML_Char *)(appAtts[i]))[-1] = 0;
3707   for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3708     binding->attId->name[-1] = 0;
3709 
3710   if (! parser->m_ns)
3711     return XML_ERROR_NONE;
3712 
3713   /* expand the element type name */
3714   if (elementType->prefix) {
3715     binding = elementType->prefix->binding;
3716     if (! binding)
3717       return XML_ERROR_UNBOUND_PREFIX;
3718     localPart = tagNamePtr->str;
3719     while (*localPart++ != XML_T(ASCII_COLON))
3720       ;
3721   } else if (dtd->defaultPrefix.binding) {
3722     binding = dtd->defaultPrefix.binding;
3723     localPart = tagNamePtr->str;
3724   } else
3725     return XML_ERROR_NONE;
3726   prefixLen = 0;
3727   if (parser->m_ns_triplets && binding->prefix->name) {
3728     for (; binding->prefix->name[prefixLen++];)
3729       ; /* prefixLen includes null terminator */
3730   }
3731   tagNamePtr->localPart = localPart;
3732   tagNamePtr->uriLen = binding->uriLen;
3733   tagNamePtr->prefix = binding->prefix->name;
3734   tagNamePtr->prefixLen = prefixLen;
3735   for (i = 0; localPart[i++];)
3736     ; /* i includes null terminator */
3737 
3738   /* Detect and prevent integer overflow */
3739   if (binding->uriLen > INT_MAX - prefixLen
3740       || i > INT_MAX - (binding->uriLen + prefixLen)) {
3741     return XML_ERROR_NO_MEMORY;
3742   }
3743 
3744   n = i + binding->uriLen + prefixLen;
3745   if (n > binding->uriAlloc) {
3746     TAG *p;
3747 
3748     /* Detect and prevent integer overflow */
3749     if (n > INT_MAX - EXPAND_SPARE) {
3750       return XML_ERROR_NO_MEMORY;
3751     }
3752     /* Detect and prevent integer overflow.
3753      * The preprocessor guard addresses the "always false" warning
3754      * from -Wtype-limits on platforms where
3755      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3756 #if UINT_MAX >= SIZE_MAX
3757     if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3758       return XML_ERROR_NO_MEMORY;
3759     }
3760 #endif
3761 
3762     uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3763     if (! uri)
3764       return XML_ERROR_NO_MEMORY;
3765     binding->uriAlloc = n + EXPAND_SPARE;
3766     memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3767     for (p = parser->m_tagStack; p; p = p->parent)
3768       if (p->name.str == binding->uri)
3769         p->name.str = uri;
3770     FREE(parser, binding->uri);
3771     binding->uri = uri;
3772   }
3773   /* if m_namespaceSeparator != '\0' then uri includes it already */
3774   uri = binding->uri + binding->uriLen;
3775   memcpy(uri, localPart, i * sizeof(XML_Char));
3776   /* we always have a namespace separator between localPart and prefix */
3777   if (prefixLen) {
3778     uri += i - 1;
3779     *uri = parser->m_namespaceSeparator; /* replace null terminator */
3780     memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3781   }
3782   tagNamePtr->str = binding->uri;
3783   return XML_ERROR_NONE;
3784 }
3785 
3786 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3787 is_rfc3986_uri_char(XML_Char candidate) {
3788   // For the RFC 3986 ANBF grammar see
3789   // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3790 
3791   switch (candidate) {
3792   // From rule "ALPHA" (uppercase half)
3793   case 'A':
3794   case 'B':
3795   case 'C':
3796   case 'D':
3797   case 'E':
3798   case 'F':
3799   case 'G':
3800   case 'H':
3801   case 'I':
3802   case 'J':
3803   case 'K':
3804   case 'L':
3805   case 'M':
3806   case 'N':
3807   case 'O':
3808   case 'P':
3809   case 'Q':
3810   case 'R':
3811   case 'S':
3812   case 'T':
3813   case 'U':
3814   case 'V':
3815   case 'W':
3816   case 'X':
3817   case 'Y':
3818   case 'Z':
3819 
3820   // From rule "ALPHA" (lowercase half)
3821   case 'a':
3822   case 'b':
3823   case 'c':
3824   case 'd':
3825   case 'e':
3826   case 'f':
3827   case 'g':
3828   case 'h':
3829   case 'i':
3830   case 'j':
3831   case 'k':
3832   case 'l':
3833   case 'm':
3834   case 'n':
3835   case 'o':
3836   case 'p':
3837   case 'q':
3838   case 'r':
3839   case 's':
3840   case 't':
3841   case 'u':
3842   case 'v':
3843   case 'w':
3844   case 'x':
3845   case 'y':
3846   case 'z':
3847 
3848   // From rule "DIGIT"
3849   case '0':
3850   case '1':
3851   case '2':
3852   case '3':
3853   case '4':
3854   case '5':
3855   case '6':
3856   case '7':
3857   case '8':
3858   case '9':
3859 
3860   // From rule "pct-encoded"
3861   case '%':
3862 
3863   // From rule "unreserved"
3864   case '-':
3865   case '.':
3866   case '_':
3867   case '~':
3868 
3869   // From rule "gen-delims"
3870   case ':':
3871   case '/':
3872   case '?':
3873   case '#':
3874   case '[':
3875   case ']':
3876   case '@':
3877 
3878   // From rule "sub-delims"
3879   case '!':
3880   case '$':
3881   case '&':
3882   case '\'':
3883   case '(':
3884   case ')':
3885   case '*':
3886   case '+':
3887   case ',':
3888   case ';':
3889   case '=':
3890     return XML_TRUE;
3891 
3892   default:
3893     return XML_FALSE;
3894   }
3895 }
3896 
3897 /* addBinding() overwrites the value of prefix->binding without checking.
3898    Therefore one must keep track of the old value outside of addBinding().
3899 */
3900 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3901 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3902            const XML_Char *uri, BINDING **bindingsPtr) {
3903   // "http://www.w3.org/XML/1998/namespace"
3904   static const XML_Char xmlNamespace[]
3905       = {ASCII_h,      ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,
3906          ASCII_SLASH,  ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,
3907          ASCII_PERIOD, ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,
3908          ASCII_r,      ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,
3909          ASCII_L,      ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,
3910          ASCII_8,      ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,
3911          ASCII_e,      ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,
3912          ASCII_e,      '\0'};
3913   static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3914   // "http://www.w3.org/2000/xmlns/"
3915   static const XML_Char xmlnsNamespace[]
3916       = {ASCII_h,     ASCII_t,      ASCII_t, ASCII_p, ASCII_COLON,  ASCII_SLASH,
3917          ASCII_SLASH, ASCII_w,      ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3918          ASCII_3,     ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,      ASCII_SLASH,
3919          ASCII_2,     ASCII_0,      ASCII_0, ASCII_0, ASCII_SLASH,  ASCII_x,
3920          ASCII_m,     ASCII_l,      ASCII_n, ASCII_s, ASCII_SLASH,  '\0'};
3921   static const int xmlnsLen
3922       = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3923 
3924   XML_Bool mustBeXML = XML_FALSE;
3925   XML_Bool isXML = XML_TRUE;
3926   XML_Bool isXMLNS = XML_TRUE;
3927 
3928   BINDING *b;
3929   int len;
3930 
3931   /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3932   if (*uri == XML_T('\0') && prefix->name)
3933     return XML_ERROR_UNDECLARING_PREFIX;
3934 
3935   if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3936       && prefix->name[1] == XML_T(ASCII_m)
3937       && prefix->name[2] == XML_T(ASCII_l)) {
3938     /* Not allowed to bind xmlns */
3939     if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3940         && prefix->name[5] == XML_T('\0'))
3941       return XML_ERROR_RESERVED_PREFIX_XMLNS;
3942 
3943     if (prefix->name[3] == XML_T('\0'))
3944       mustBeXML = XML_TRUE;
3945   }
3946 
3947   for (len = 0; uri[len]; len++) {
3948     if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3949       isXML = XML_FALSE;
3950 
3951     if (! mustBeXML && isXMLNS
3952         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3953       isXMLNS = XML_FALSE;
3954 
3955     // NOTE: While Expat does not validate namespace URIs against RFC 3986
3956     //       today (and is not REQUIRED to do so with regard to the XML 1.0
3957     //       namespaces specification) we have to at least make sure, that
3958     //       the application on top of Expat (that is likely splitting expanded
3959     //       element names ("qualified names") of form
3960     //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3961     //       in its element handler code) cannot be confused by an attacker
3962     //       putting additional namespace separator characters into namespace
3963     //       declarations.  That would be ambiguous and not to be expected.
3964     //
3965     //       While the HTML API docs of function XML_ParserCreateNS have been
3966     //       advising against use of a namespace separator character that can
3967     //       appear in a URI for >20 years now, some widespread applications
3968     //       are using URI characters (':' (colon) in particular) for a
3969     //       namespace separator, in practice.  To keep these applications
3970     //       functional, we only reject namespaces URIs containing the
3971     //       application-chosen namespace separator if the chosen separator
3972     //       is a non-URI character with regard to RFC 3986.
3973     if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3974         && ! is_rfc3986_uri_char(uri[len])) {
3975       return XML_ERROR_SYNTAX;
3976     }
3977   }
3978   isXML = isXML && len == xmlLen;
3979   isXMLNS = isXMLNS && len == xmlnsLen;
3980 
3981   if (mustBeXML != isXML)
3982     return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3983                      : XML_ERROR_RESERVED_NAMESPACE_URI;
3984 
3985   if (isXMLNS)
3986     return XML_ERROR_RESERVED_NAMESPACE_URI;
3987 
3988   if (parser->m_namespaceSeparator)
3989     len++;
3990   if (parser->m_freeBindingList) {
3991     b = parser->m_freeBindingList;
3992     if (len > b->uriAlloc) {
3993       /* Detect and prevent integer overflow */
3994       if (len > INT_MAX - EXPAND_SPARE) {
3995         return XML_ERROR_NO_MEMORY;
3996       }
3997 
3998       /* Detect and prevent integer overflow.
3999        * The preprocessor guard addresses the "always false" warning
4000        * from -Wtype-limits on platforms where
4001        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4002 #if UINT_MAX >= SIZE_MAX
4003       if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4004         return XML_ERROR_NO_MEMORY;
4005       }
4006 #endif
4007 
4008       XML_Char *temp = (XML_Char *)REALLOC(
4009           parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4010       if (temp == NULL)
4011         return XML_ERROR_NO_MEMORY;
4012       b->uri = temp;
4013       b->uriAlloc = len + EXPAND_SPARE;
4014     }
4015     parser->m_freeBindingList = b->nextTagBinding;
4016   } else {
4017     b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4018     if (! b)
4019       return XML_ERROR_NO_MEMORY;
4020 
4021     /* Detect and prevent integer overflow */
4022     if (len > INT_MAX - EXPAND_SPARE) {
4023       return XML_ERROR_NO_MEMORY;
4024     }
4025     /* Detect and prevent integer overflow.
4026      * The preprocessor guard addresses the "always false" warning
4027      * from -Wtype-limits on platforms where
4028      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4029 #if UINT_MAX >= SIZE_MAX
4030     if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4031       return XML_ERROR_NO_MEMORY;
4032     }
4033 #endif
4034 
4035     b->uri
4036         = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4037     if (! b->uri) {
4038       FREE(parser, b);
4039       return XML_ERROR_NO_MEMORY;
4040     }
4041     b->uriAlloc = len + EXPAND_SPARE;
4042   }
4043   b->uriLen = len;
4044   memcpy(b->uri, uri, len * sizeof(XML_Char));
4045   if (parser->m_namespaceSeparator)
4046     b->uri[len - 1] = parser->m_namespaceSeparator;
4047   b->prefix = prefix;
4048   b->attId = attId;
4049   b->prevPrefixBinding = prefix->binding;
4050   /* NULL binding when default namespace undeclared */
4051   if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4052     prefix->binding = NULL;
4053   else
4054     prefix->binding = b;
4055   b->nextTagBinding = *bindingsPtr;
4056   *bindingsPtr = b;
4057   /* if attId == NULL then we are not starting a namespace scope */
4058   if (attId && parser->m_startNamespaceDeclHandler)
4059     parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4060                                         prefix->binding ? uri : 0);
4061   return XML_ERROR_NONE;
4062 }
4063 
4064 /* The idea here is to avoid using stack for each CDATA section when
4065    the whole file is parsed with one call.
4066 */
4067 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4068 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4069                       const char **endPtr) {
4070   enum XML_Error result = doCdataSection(
4071       parser, parser->m_encoding, &start, end, endPtr,
4072       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4073   if (result != XML_ERROR_NONE)
4074     return result;
4075   if (start) {
4076     if (parser->m_parentParser) { /* we are parsing an external entity */
4077       parser->m_processor = externalEntityContentProcessor;
4078       return externalEntityContentProcessor(parser, start, end, endPtr);
4079     } else {
4080       parser->m_processor = contentProcessor;
4081       return contentProcessor(parser, start, end, endPtr);
4082     }
4083   }
4084   return result;
4085 }
4086 
4087 /* startPtr gets set to non-null if the section is closed, and to null if
4088    the section is not yet closed.
4089 */
4090 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4091 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4092                const char *end, const char **nextPtr, XML_Bool haveMore,
4093                enum XML_Account account) {
4094   const char *s = *startPtr;
4095   const char **eventPP;
4096   const char **eventEndPP;
4097   if (enc == parser->m_encoding) {
4098     eventPP = &parser->m_eventPtr;
4099     *eventPP = s;
4100     eventEndPP = &parser->m_eventEndPtr;
4101   } else {
4102     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4103     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4104   }
4105   *eventPP = s;
4106   *startPtr = NULL;
4107 
4108   for (;;) {
4109     const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4110     int tok = XmlCdataSectionTok(enc, s, end, &next);
4111 #if XML_GE == 1
4112     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4113       accountingOnAbort(parser);
4114       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4115     }
4116 #else
4117     UNUSED_P(account);
4118 #endif
4119     *eventEndPP = next;
4120     switch (tok) {
4121     case XML_TOK_CDATA_SECT_CLOSE:
4122       if (parser->m_endCdataSectionHandler)
4123         parser->m_endCdataSectionHandler(parser->m_handlerArg);
4124       /* BEGIN disabled code */
4125       /* see comment under XML_TOK_CDATA_SECT_OPEN */
4126       else if ((0) && parser->m_characterDataHandler)
4127         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4128                                        0);
4129       /* END disabled code */
4130       else if (parser->m_defaultHandler)
4131         reportDefault(parser, enc, s, next);
4132       *startPtr = next;
4133       *nextPtr = next;
4134       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4135         return XML_ERROR_ABORTED;
4136       else
4137         return XML_ERROR_NONE;
4138     case XML_TOK_DATA_NEWLINE:
4139       if (parser->m_characterDataHandler) {
4140         XML_Char c = 0xA;
4141         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4142       } else if (parser->m_defaultHandler)
4143         reportDefault(parser, enc, s, next);
4144       break;
4145     case XML_TOK_DATA_CHARS: {
4146       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4147       if (charDataHandler) {
4148         if (MUST_CONVERT(enc, s)) {
4149           for (;;) {
4150             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4151             const enum XML_Convert_Result convert_res = XmlConvert(
4152                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4153             *eventEndPP = next;
4154             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4155                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4156             if ((convert_res == XML_CONVERT_COMPLETED)
4157                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4158               break;
4159             *eventPP = s;
4160           }
4161         } else
4162           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4163                           (int)((const XML_Char *)next - (const XML_Char *)s));
4164       } else if (parser->m_defaultHandler)
4165         reportDefault(parser, enc, s, next);
4166     } break;
4167     case XML_TOK_INVALID:
4168       *eventPP = next;
4169       return XML_ERROR_INVALID_TOKEN;
4170     case XML_TOK_PARTIAL_CHAR:
4171       if (haveMore) {
4172         *nextPtr = s;
4173         return XML_ERROR_NONE;
4174       }
4175       return XML_ERROR_PARTIAL_CHAR;
4176     case XML_TOK_PARTIAL:
4177     case XML_TOK_NONE:
4178       if (haveMore) {
4179         *nextPtr = s;
4180         return XML_ERROR_NONE;
4181       }
4182       return XML_ERROR_UNCLOSED_CDATA_SECTION;
4183     default:
4184       /* Every token returned by XmlCdataSectionTok() has its own
4185        * explicit case, so this default case will never be executed.
4186        * We retain it as a safety net and exclude it from the coverage
4187        * statistics.
4188        *
4189        * LCOV_EXCL_START
4190        */
4191       *eventPP = next;
4192       return XML_ERROR_UNEXPECTED_STATE;
4193       /* LCOV_EXCL_STOP */
4194     }
4195 
4196     *eventPP = s = next;
4197     switch (parser->m_parsingStatus.parsing) {
4198     case XML_SUSPENDED:
4199       *nextPtr = next;
4200       return XML_ERROR_NONE;
4201     case XML_FINISHED:
4202       return XML_ERROR_ABORTED;
4203     default:;
4204     }
4205   }
4206   /* not reached */
4207 }
4208 
4209 #ifdef XML_DTD
4210 
4211 /* The idea here is to avoid using stack for each IGNORE section when
4212    the whole file is parsed with one call.
4213 */
4214 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4215 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4216                        const char **endPtr) {
4217   enum XML_Error result
4218       = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4219                         (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4220   if (result != XML_ERROR_NONE)
4221     return result;
4222   if (start) {
4223     parser->m_processor = prologProcessor;
4224     return prologProcessor(parser, start, end, endPtr);
4225   }
4226   return result;
4227 }
4228 
4229 /* startPtr gets set to non-null is the section is closed, and to null
4230    if the section is not yet closed.
4231 */
4232 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4233 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4234                 const char *end, const char **nextPtr, XML_Bool haveMore) {
4235   const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4236   int tok;
4237   const char *s = *startPtr;
4238   const char **eventPP;
4239   const char **eventEndPP;
4240   if (enc == parser->m_encoding) {
4241     eventPP = &parser->m_eventPtr;
4242     *eventPP = s;
4243     eventEndPP = &parser->m_eventEndPtr;
4244   } else {
4245     /* It's not entirely clear, but it seems the following two lines
4246      * of code cannot be executed.  The only occasions on which 'enc'
4247      * is not 'encoding' are when this function is called
4248      * from the internal entity processing, and IGNORE sections are an
4249      * error in internal entities.
4250      *
4251      * Since it really isn't clear that this is true, we keep the code
4252      * and just remove it from our coverage tests.
4253      *
4254      * LCOV_EXCL_START
4255      */
4256     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4257     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4258     /* LCOV_EXCL_STOP */
4259   }
4260   *eventPP = s;
4261   *startPtr = NULL;
4262   tok = XmlIgnoreSectionTok(enc, s, end, &next);
4263 #  if XML_GE == 1
4264   if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4265                                 XML_ACCOUNT_DIRECT)) {
4266     accountingOnAbort(parser);
4267     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4268   }
4269 #  endif
4270   *eventEndPP = next;
4271   switch (tok) {
4272   case XML_TOK_IGNORE_SECT:
4273     if (parser->m_defaultHandler)
4274       reportDefault(parser, enc, s, next);
4275     *startPtr = next;
4276     *nextPtr = next;
4277     if (parser->m_parsingStatus.parsing == XML_FINISHED)
4278       return XML_ERROR_ABORTED;
4279     else
4280       return XML_ERROR_NONE;
4281   case XML_TOK_INVALID:
4282     *eventPP = next;
4283     return XML_ERROR_INVALID_TOKEN;
4284   case XML_TOK_PARTIAL_CHAR:
4285     if (haveMore) {
4286       *nextPtr = s;
4287       return XML_ERROR_NONE;
4288     }
4289     return XML_ERROR_PARTIAL_CHAR;
4290   case XML_TOK_PARTIAL:
4291   case XML_TOK_NONE:
4292     if (haveMore) {
4293       *nextPtr = s;
4294       return XML_ERROR_NONE;
4295     }
4296     return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4297   default:
4298     /* All of the tokens that XmlIgnoreSectionTok() returns have
4299      * explicit cases to handle them, so this default case is never
4300      * executed.  We keep it as a safety net anyway, and remove it
4301      * from our test coverage statistics.
4302      *
4303      * LCOV_EXCL_START
4304      */
4305     *eventPP = next;
4306     return XML_ERROR_UNEXPECTED_STATE;
4307     /* LCOV_EXCL_STOP */
4308   }
4309   /* not reached */
4310 }
4311 
4312 #endif /* XML_DTD */
4313 
4314 static enum XML_Error
initializeEncoding(XML_Parser parser)4315 initializeEncoding(XML_Parser parser) {
4316   const char *s;
4317 #ifdef XML_UNICODE
4318   char encodingBuf[128];
4319   /* See comments about `protocolEncodingName` in parserInit() */
4320   if (! parser->m_protocolEncodingName)
4321     s = NULL;
4322   else {
4323     int i;
4324     for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4325       if (i == sizeof(encodingBuf) - 1
4326           || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4327         encodingBuf[0] = '\0';
4328         break;
4329       }
4330       encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4331     }
4332     encodingBuf[i] = '\0';
4333     s = encodingBuf;
4334   }
4335 #else
4336   s = parser->m_protocolEncodingName;
4337 #endif
4338   if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4339           &parser->m_initEncoding, &parser->m_encoding, s))
4340     return XML_ERROR_NONE;
4341   return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4342 }
4343 
4344 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4345 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4346                const char *next) {
4347   const char *encodingName = NULL;
4348   const XML_Char *storedEncName = NULL;
4349   const ENCODING *newEncoding = NULL;
4350   const char *version = NULL;
4351   const char *versionend = NULL;
4352   const XML_Char *storedversion = NULL;
4353   int standalone = -1;
4354 
4355 #if XML_GE == 1
4356   if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4357                                 XML_ACCOUNT_DIRECT)) {
4358     accountingOnAbort(parser);
4359     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4360   }
4361 #endif
4362 
4363   if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4364           isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4365           &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4366     if (isGeneralTextEntity)
4367       return XML_ERROR_TEXT_DECL;
4368     else
4369       return XML_ERROR_XML_DECL;
4370   }
4371   if (! isGeneralTextEntity && standalone == 1) {
4372     parser->m_dtd->standalone = XML_TRUE;
4373 #ifdef XML_DTD
4374     if (parser->m_paramEntityParsing
4375         == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4376       parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4377 #endif /* XML_DTD */
4378   }
4379   if (parser->m_xmlDeclHandler) {
4380     if (encodingName != NULL) {
4381       storedEncName = poolStoreString(
4382           &parser->m_temp2Pool, parser->m_encoding, encodingName,
4383           encodingName + XmlNameLength(parser->m_encoding, encodingName));
4384       if (! storedEncName)
4385         return XML_ERROR_NO_MEMORY;
4386       poolFinish(&parser->m_temp2Pool);
4387     }
4388     if (version) {
4389       storedversion
4390           = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4391                             versionend - parser->m_encoding->minBytesPerChar);
4392       if (! storedversion)
4393         return XML_ERROR_NO_MEMORY;
4394     }
4395     parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4396                              standalone);
4397   } else if (parser->m_defaultHandler)
4398     reportDefault(parser, parser->m_encoding, s, next);
4399   if (parser->m_protocolEncodingName == NULL) {
4400     if (newEncoding) {
4401       /* Check that the specified encoding does not conflict with what
4402        * the parser has already deduced.  Do we have the same number
4403        * of bytes in the smallest representation of a character?  If
4404        * this is UTF-16, is it the same endianness?
4405        */
4406       if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4407           || (newEncoding->minBytesPerChar == 2
4408               && newEncoding != parser->m_encoding)) {
4409         parser->m_eventPtr = encodingName;
4410         return XML_ERROR_INCORRECT_ENCODING;
4411       }
4412       parser->m_encoding = newEncoding;
4413     } else if (encodingName) {
4414       enum XML_Error result;
4415       if (! storedEncName) {
4416         storedEncName = poolStoreString(
4417             &parser->m_temp2Pool, parser->m_encoding, encodingName,
4418             encodingName + XmlNameLength(parser->m_encoding, encodingName));
4419         if (! storedEncName)
4420           return XML_ERROR_NO_MEMORY;
4421       }
4422       result = handleUnknownEncoding(parser, storedEncName);
4423       poolClear(&parser->m_temp2Pool);
4424       if (result == XML_ERROR_UNKNOWN_ENCODING)
4425         parser->m_eventPtr = encodingName;
4426       return result;
4427     }
4428   }
4429 
4430   if (storedEncName || storedversion)
4431     poolClear(&parser->m_temp2Pool);
4432 
4433   return XML_ERROR_NONE;
4434 }
4435 
4436 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4437 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4438   if (parser->m_unknownEncodingHandler) {
4439     XML_Encoding info;
4440     int i;
4441     for (i = 0; i < 256; i++)
4442       info.map[i] = -1;
4443     info.convert = NULL;
4444     info.data = NULL;
4445     info.release = NULL;
4446     if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4447                                          encodingName, &info)) {
4448       ENCODING *enc;
4449       parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4450       if (! parser->m_unknownEncodingMem) {
4451         if (info.release)
4452           info.release(info.data);
4453         return XML_ERROR_NO_MEMORY;
4454       }
4455       enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4456           parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4457       if (enc) {
4458         parser->m_unknownEncodingData = info.data;
4459         parser->m_unknownEncodingRelease = info.release;
4460         parser->m_encoding = enc;
4461         return XML_ERROR_NONE;
4462       }
4463     }
4464     if (info.release != NULL)
4465       info.release(info.data);
4466   }
4467   return XML_ERROR_UNKNOWN_ENCODING;
4468 }
4469 
4470 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4471 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4472                     const char **nextPtr) {
4473   enum XML_Error result = initializeEncoding(parser);
4474   if (result != XML_ERROR_NONE)
4475     return result;
4476   parser->m_processor = prologProcessor;
4477   return prologProcessor(parser, s, end, nextPtr);
4478 }
4479 
4480 #ifdef XML_DTD
4481 
4482 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4483 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4484                             const char **nextPtr) {
4485   enum XML_Error result = initializeEncoding(parser);
4486   if (result != XML_ERROR_NONE)
4487     return result;
4488 
4489   /* we know now that XML_Parse(Buffer) has been called,
4490      so we consider the external parameter entity read */
4491   parser->m_dtd->paramEntityRead = XML_TRUE;
4492 
4493   if (parser->m_prologState.inEntityValue) {
4494     parser->m_processor = entityValueInitProcessor;
4495     return entityValueInitProcessor(parser, s, end, nextPtr);
4496   } else {
4497     parser->m_processor = externalParEntProcessor;
4498     return externalParEntProcessor(parser, s, end, nextPtr);
4499   }
4500 }
4501 
4502 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4503 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4504                          const char **nextPtr) {
4505   int tok;
4506   const char *start = s;
4507   const char *next = start;
4508   parser->m_eventPtr = start;
4509 
4510   for (;;) {
4511     tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4512     /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4513              - storeEntityValue
4514              - processXmlDecl
4515     */
4516     parser->m_eventEndPtr = next;
4517     if (tok <= 0) {
4518       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4519         *nextPtr = s;
4520         return XML_ERROR_NONE;
4521       }
4522       switch (tok) {
4523       case XML_TOK_INVALID:
4524         return XML_ERROR_INVALID_TOKEN;
4525       case XML_TOK_PARTIAL:
4526         return XML_ERROR_UNCLOSED_TOKEN;
4527       case XML_TOK_PARTIAL_CHAR:
4528         return XML_ERROR_PARTIAL_CHAR;
4529       case XML_TOK_NONE: /* start == end */
4530       default:
4531         break;
4532       }
4533       /* found end of entity value - can store it now */
4534       return storeEntityValue(parser, parser->m_encoding, s, end,
4535                               XML_ACCOUNT_DIRECT);
4536     } else if (tok == XML_TOK_XML_DECL) {
4537       enum XML_Error result;
4538       result = processXmlDecl(parser, 0, start, next);
4539       if (result != XML_ERROR_NONE)
4540         return result;
4541       /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED.  For
4542        * that to happen, a parameter entity parsing handler must have attempted
4543        * to suspend the parser, which fails and raises an error.  The parser can
4544        * be aborted, but can't be suspended.
4545        */
4546       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4547         return XML_ERROR_ABORTED;
4548       *nextPtr = next;
4549       /* stop scanning for text declaration - we found one */
4550       parser->m_processor = entityValueProcessor;
4551       return entityValueProcessor(parser, next, end, nextPtr);
4552     }
4553     /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4554        must move s and nextPtr forward to consume the BOM.
4555 
4556        If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4557        would leave the BOM in the buffer and return. On the next call to this
4558        function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4559        is not valid to have multiple BOMs.
4560     */
4561     else if (tok == XML_TOK_BOM) {
4562 #  if XML_GE == 1
4563       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4564                                     XML_ACCOUNT_DIRECT)) {
4565         accountingOnAbort(parser);
4566         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4567       }
4568 #  endif
4569 
4570       *nextPtr = next;
4571       s = next;
4572     }
4573     /* If we get this token, we have the start of what might be a
4574        normal tag, but not a declaration (i.e. it doesn't begin with
4575        "<!").  In a DTD context, that isn't legal.
4576     */
4577     else if (tok == XML_TOK_INSTANCE_START) {
4578       *nextPtr = next;
4579       return XML_ERROR_SYNTAX;
4580     }
4581     start = next;
4582     parser->m_eventPtr = start;
4583   }
4584 }
4585 
4586 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4587 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4588                         const char **nextPtr) {
4589   const char *next = s;
4590   int tok;
4591 
4592   tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4593   if (tok <= 0) {
4594     if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4595       *nextPtr = s;
4596       return XML_ERROR_NONE;
4597     }
4598     switch (tok) {
4599     case XML_TOK_INVALID:
4600       return XML_ERROR_INVALID_TOKEN;
4601     case XML_TOK_PARTIAL:
4602       return XML_ERROR_UNCLOSED_TOKEN;
4603     case XML_TOK_PARTIAL_CHAR:
4604       return XML_ERROR_PARTIAL_CHAR;
4605     case XML_TOK_NONE: /* start == end */
4606     default:
4607       break;
4608     }
4609   }
4610   /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4611      However, when parsing an external subset, doProlog will not accept a BOM
4612      as valid, and report a syntax error, so we have to skip the BOM, and
4613      account for the BOM bytes.
4614   */
4615   else if (tok == XML_TOK_BOM) {
4616     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4617                                   XML_ACCOUNT_DIRECT)) {
4618       accountingOnAbort(parser);
4619       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4620     }
4621 
4622     s = next;
4623     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4624   }
4625 
4626   parser->m_processor = prologProcessor;
4627   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4628                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4629                   XML_ACCOUNT_DIRECT);
4630 }
4631 
4632 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4633 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4634                      const char **nextPtr) {
4635   const char *start = s;
4636   const char *next = s;
4637   const ENCODING *enc = parser->m_encoding;
4638   int tok;
4639 
4640   for (;;) {
4641     tok = XmlPrologTok(enc, start, end, &next);
4642     /* Note: These bytes are accounted later in:
4643              - storeEntityValue
4644     */
4645     if (tok <= 0) {
4646       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4647         *nextPtr = s;
4648         return XML_ERROR_NONE;
4649       }
4650       switch (tok) {
4651       case XML_TOK_INVALID:
4652         return XML_ERROR_INVALID_TOKEN;
4653       case XML_TOK_PARTIAL:
4654         return XML_ERROR_UNCLOSED_TOKEN;
4655       case XML_TOK_PARTIAL_CHAR:
4656         return XML_ERROR_PARTIAL_CHAR;
4657       case XML_TOK_NONE: /* start == end */
4658       default:
4659         break;
4660       }
4661       /* found end of entity value - can store it now */
4662       return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4663     }
4664     start = next;
4665   }
4666 }
4667 
4668 #endif /* XML_DTD */
4669 
4670 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4671 prologProcessor(XML_Parser parser, const char *s, const char *end,
4672                 const char **nextPtr) {
4673   const char *next = s;
4674   int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4675   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4676                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4677                   XML_ACCOUNT_DIRECT);
4678 }
4679 
4680 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4681 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4682          int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4683          XML_Bool allowClosingDoctype, enum XML_Account account) {
4684 #ifdef XML_DTD
4685   static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4686 #endif /* XML_DTD */
4687   static const XML_Char atypeCDATA[]
4688       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4689   static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4690   static const XML_Char atypeIDREF[]
4691       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4692   static const XML_Char atypeIDREFS[]
4693       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4694   static const XML_Char atypeENTITY[]
4695       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4696   static const XML_Char atypeENTITIES[]
4697       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4698          ASCII_I, ASCII_E, ASCII_S, '\0'};
4699   static const XML_Char atypeNMTOKEN[]
4700       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4701   static const XML_Char atypeNMTOKENS[]
4702       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4703          ASCII_E, ASCII_N, ASCII_S, '\0'};
4704   static const XML_Char notationPrefix[]
4705       = {ASCII_N, ASCII_O, ASCII_T, ASCII_A,      ASCII_T,
4706          ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4707   static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4708   static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4709 
4710 #ifndef XML_DTD
4711   UNUSED_P(account);
4712 #endif
4713 
4714   /* save one level of indirection */
4715   DTD *const dtd = parser->m_dtd;
4716 
4717   const char **eventPP;
4718   const char **eventEndPP;
4719   enum XML_Content_Quant quant;
4720 
4721   if (enc == parser->m_encoding) {
4722     eventPP = &parser->m_eventPtr;
4723     eventEndPP = &parser->m_eventEndPtr;
4724   } else {
4725     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4726     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4727   }
4728 
4729   for (;;) {
4730     int role;
4731     XML_Bool handleDefault = XML_TRUE;
4732     *eventPP = s;
4733     *eventEndPP = next;
4734     if (tok <= 0) {
4735       if (haveMore && tok != XML_TOK_INVALID) {
4736         *nextPtr = s;
4737         return XML_ERROR_NONE;
4738       }
4739       switch (tok) {
4740       case XML_TOK_INVALID:
4741         *eventPP = next;
4742         return XML_ERROR_INVALID_TOKEN;
4743       case XML_TOK_PARTIAL:
4744         return XML_ERROR_UNCLOSED_TOKEN;
4745       case XML_TOK_PARTIAL_CHAR:
4746         return XML_ERROR_PARTIAL_CHAR;
4747       case -XML_TOK_PROLOG_S:
4748         tok = -tok;
4749         break;
4750       case XML_TOK_NONE:
4751 #ifdef XML_DTD
4752         /* for internal PE NOT referenced between declarations */
4753         if (enc != parser->m_encoding
4754             && ! parser->m_openInternalEntities->betweenDecl) {
4755           *nextPtr = s;
4756           return XML_ERROR_NONE;
4757         }
4758         /* WFC: PE Between Declarations - must check that PE contains
4759            complete markup, not only for external PEs, but also for
4760            internal PEs if the reference occurs between declarations.
4761         */
4762         if (parser->m_isParamEntity || enc != parser->m_encoding) {
4763           if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4764               == XML_ROLE_ERROR)
4765             return XML_ERROR_INCOMPLETE_PE;
4766           *nextPtr = s;
4767           return XML_ERROR_NONE;
4768         }
4769 #endif /* XML_DTD */
4770         return XML_ERROR_NO_ELEMENTS;
4771       default:
4772         tok = -tok;
4773         next = end;
4774         break;
4775       }
4776     }
4777     role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4778 #if XML_GE == 1
4779     switch (role) {
4780     case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4781     case XML_ROLE_XML_DECL:       // bytes accounted in processXmlDecl
4782 #  ifdef XML_DTD
4783     case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4784 #  endif
4785       break;
4786     default:
4787       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4788         accountingOnAbort(parser);
4789         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4790       }
4791     }
4792 #endif
4793     switch (role) {
4794     case XML_ROLE_XML_DECL: {
4795       enum XML_Error result = processXmlDecl(parser, 0, s, next);
4796       if (result != XML_ERROR_NONE)
4797         return result;
4798       enc = parser->m_encoding;
4799       handleDefault = XML_FALSE;
4800     } break;
4801     case XML_ROLE_DOCTYPE_NAME:
4802       if (parser->m_startDoctypeDeclHandler) {
4803         parser->m_doctypeName
4804             = poolStoreString(&parser->m_tempPool, enc, s, next);
4805         if (! parser->m_doctypeName)
4806           return XML_ERROR_NO_MEMORY;
4807         poolFinish(&parser->m_tempPool);
4808         parser->m_doctypePubid = NULL;
4809         handleDefault = XML_FALSE;
4810       }
4811       parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4812       break;
4813     case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4814       if (parser->m_startDoctypeDeclHandler) {
4815         parser->m_startDoctypeDeclHandler(
4816             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4817             parser->m_doctypePubid, 1);
4818         parser->m_doctypeName = NULL;
4819         poolClear(&parser->m_tempPool);
4820         handleDefault = XML_FALSE;
4821       }
4822       break;
4823 #ifdef XML_DTD
4824     case XML_ROLE_TEXT_DECL: {
4825       enum XML_Error result = processXmlDecl(parser, 1, s, next);
4826       if (result != XML_ERROR_NONE)
4827         return result;
4828       enc = parser->m_encoding;
4829       handleDefault = XML_FALSE;
4830     } break;
4831 #endif /* XML_DTD */
4832     case XML_ROLE_DOCTYPE_PUBLIC_ID:
4833 #ifdef XML_DTD
4834       parser->m_useForeignDTD = XML_FALSE;
4835       parser->m_declEntity = (ENTITY *)lookup(
4836           parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4837       if (! parser->m_declEntity)
4838         return XML_ERROR_NO_MEMORY;
4839 #endif /* XML_DTD */
4840       dtd->hasParamEntityRefs = XML_TRUE;
4841       if (parser->m_startDoctypeDeclHandler) {
4842         XML_Char *pubId;
4843         if (! XmlIsPublicId(enc, s, next, eventPP))
4844           return XML_ERROR_PUBLICID;
4845         pubId = poolStoreString(&parser->m_tempPool, enc,
4846                                 s + enc->minBytesPerChar,
4847                                 next - enc->minBytesPerChar);
4848         if (! pubId)
4849           return XML_ERROR_NO_MEMORY;
4850         normalizePublicId(pubId);
4851         poolFinish(&parser->m_tempPool);
4852         parser->m_doctypePubid = pubId;
4853         handleDefault = XML_FALSE;
4854         goto alreadyChecked;
4855       }
4856       /* fall through */
4857     case XML_ROLE_ENTITY_PUBLIC_ID:
4858       if (! XmlIsPublicId(enc, s, next, eventPP))
4859         return XML_ERROR_PUBLICID;
4860     alreadyChecked:
4861       if (dtd->keepProcessing && parser->m_declEntity) {
4862         XML_Char *tem
4863             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4864                               next - enc->minBytesPerChar);
4865         if (! tem)
4866           return XML_ERROR_NO_MEMORY;
4867         normalizePublicId(tem);
4868         parser->m_declEntity->publicId = tem;
4869         poolFinish(&dtd->pool);
4870         /* Don't suppress the default handler if we fell through from
4871          * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4872          */
4873         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4874           handleDefault = XML_FALSE;
4875       }
4876       break;
4877     case XML_ROLE_DOCTYPE_CLOSE:
4878       if (allowClosingDoctype != XML_TRUE) {
4879         /* Must not close doctype from within expanded parameter entities */
4880         return XML_ERROR_INVALID_TOKEN;
4881       }
4882 
4883       if (parser->m_doctypeName) {
4884         parser->m_startDoctypeDeclHandler(
4885             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4886             parser->m_doctypePubid, 0);
4887         poolClear(&parser->m_tempPool);
4888         handleDefault = XML_FALSE;
4889       }
4890       /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4891          XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4892          was not set, indicating an external subset
4893       */
4894 #ifdef XML_DTD
4895       if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4896         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4897         dtd->hasParamEntityRefs = XML_TRUE;
4898         if (parser->m_paramEntityParsing
4899             && parser->m_externalEntityRefHandler) {
4900           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4901                                             externalSubsetName, sizeof(ENTITY));
4902           if (! entity) {
4903             /* The external subset name "#" will have already been
4904              * inserted into the hash table at the start of the
4905              * external entity parsing, so no allocation will happen
4906              * and lookup() cannot fail.
4907              */
4908             return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4909           }
4910           if (parser->m_useForeignDTD)
4911             entity->base = parser->m_curBase;
4912           dtd->paramEntityRead = XML_FALSE;
4913           if (! parser->m_externalEntityRefHandler(
4914                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4915                   entity->systemId, entity->publicId))
4916             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4917           if (dtd->paramEntityRead) {
4918             if (! dtd->standalone && parser->m_notStandaloneHandler
4919                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4920               return XML_ERROR_NOT_STANDALONE;
4921           }
4922           /* if we didn't read the foreign DTD then this means that there
4923              is no external subset and we must reset dtd->hasParamEntityRefs
4924           */
4925           else if (! parser->m_doctypeSysid)
4926             dtd->hasParamEntityRefs = hadParamEntityRefs;
4927           /* end of DTD - no need to update dtd->keepProcessing */
4928         }
4929         parser->m_useForeignDTD = XML_FALSE;
4930       }
4931 #endif /* XML_DTD */
4932       if (parser->m_endDoctypeDeclHandler) {
4933         parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4934         handleDefault = XML_FALSE;
4935       }
4936       break;
4937     case XML_ROLE_INSTANCE_START:
4938 #ifdef XML_DTD
4939       /* if there is no DOCTYPE declaration then now is the
4940          last chance to read the foreign DTD
4941       */
4942       if (parser->m_useForeignDTD) {
4943         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4944         dtd->hasParamEntityRefs = XML_TRUE;
4945         if (parser->m_paramEntityParsing
4946             && parser->m_externalEntityRefHandler) {
4947           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4948                                             externalSubsetName, sizeof(ENTITY));
4949           if (! entity)
4950             return XML_ERROR_NO_MEMORY;
4951           entity->base = parser->m_curBase;
4952           dtd->paramEntityRead = XML_FALSE;
4953           if (! parser->m_externalEntityRefHandler(
4954                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4955                   entity->systemId, entity->publicId))
4956             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4957           if (dtd->paramEntityRead) {
4958             if (! dtd->standalone && parser->m_notStandaloneHandler
4959                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4960               return XML_ERROR_NOT_STANDALONE;
4961           }
4962           /* if we didn't read the foreign DTD then this means that there
4963              is no external subset and we must reset dtd->hasParamEntityRefs
4964           */
4965           else
4966             dtd->hasParamEntityRefs = hadParamEntityRefs;
4967           /* end of DTD - no need to update dtd->keepProcessing */
4968         }
4969       }
4970 #endif /* XML_DTD */
4971       parser->m_processor = contentProcessor;
4972       return contentProcessor(parser, s, end, nextPtr);
4973     case XML_ROLE_ATTLIST_ELEMENT_NAME:
4974       parser->m_declElementType = getElementType(parser, enc, s, next);
4975       if (! parser->m_declElementType)
4976         return XML_ERROR_NO_MEMORY;
4977       goto checkAttListDeclHandler;
4978     case XML_ROLE_ATTRIBUTE_NAME:
4979       parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4980       if (! parser->m_declAttributeId)
4981         return XML_ERROR_NO_MEMORY;
4982       parser->m_declAttributeIsCdata = XML_FALSE;
4983       parser->m_declAttributeType = NULL;
4984       parser->m_declAttributeIsId = XML_FALSE;
4985       goto checkAttListDeclHandler;
4986     case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4987       parser->m_declAttributeIsCdata = XML_TRUE;
4988       parser->m_declAttributeType = atypeCDATA;
4989       goto checkAttListDeclHandler;
4990     case XML_ROLE_ATTRIBUTE_TYPE_ID:
4991       parser->m_declAttributeIsId = XML_TRUE;
4992       parser->m_declAttributeType = atypeID;
4993       goto checkAttListDeclHandler;
4994     case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4995       parser->m_declAttributeType = atypeIDREF;
4996       goto checkAttListDeclHandler;
4997     case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4998       parser->m_declAttributeType = atypeIDREFS;
4999       goto checkAttListDeclHandler;
5000     case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5001       parser->m_declAttributeType = atypeENTITY;
5002       goto checkAttListDeclHandler;
5003     case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5004       parser->m_declAttributeType = atypeENTITIES;
5005       goto checkAttListDeclHandler;
5006     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5007       parser->m_declAttributeType = atypeNMTOKEN;
5008       goto checkAttListDeclHandler;
5009     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5010       parser->m_declAttributeType = atypeNMTOKENS;
5011     checkAttListDeclHandler:
5012       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5013         handleDefault = XML_FALSE;
5014       break;
5015     case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5016     case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5017       if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5018         const XML_Char *prefix;
5019         if (parser->m_declAttributeType) {
5020           prefix = enumValueSep;
5021         } else {
5022           prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5023                                                               : enumValueStart);
5024         }
5025         if (! poolAppendString(&parser->m_tempPool, prefix))
5026           return XML_ERROR_NO_MEMORY;
5027         if (! poolAppend(&parser->m_tempPool, enc, s, next))
5028           return XML_ERROR_NO_MEMORY;
5029         parser->m_declAttributeType = parser->m_tempPool.start;
5030         handleDefault = XML_FALSE;
5031       }
5032       break;
5033     case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5034     case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5035       if (dtd->keepProcessing) {
5036         if (! defineAttribute(parser->m_declElementType,
5037                               parser->m_declAttributeId,
5038                               parser->m_declAttributeIsCdata,
5039                               parser->m_declAttributeIsId, 0, parser))
5040           return XML_ERROR_NO_MEMORY;
5041         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5042           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5043               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5044                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5045             /* Enumerated or Notation type */
5046             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5047                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5048               return XML_ERROR_NO_MEMORY;
5049             parser->m_declAttributeType = parser->m_tempPool.start;
5050             poolFinish(&parser->m_tempPool);
5051           }
5052           *eventEndPP = s;
5053           parser->m_attlistDeclHandler(
5054               parser->m_handlerArg, parser->m_declElementType->name,
5055               parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5056               role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5057           handleDefault = XML_FALSE;
5058         }
5059       }
5060       poolClear(&parser->m_tempPool);
5061       break;
5062     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5063     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5064       if (dtd->keepProcessing) {
5065         const XML_Char *attVal;
5066         enum XML_Error result = storeAttributeValue(
5067             parser, enc, parser->m_declAttributeIsCdata,
5068             s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5069             XML_ACCOUNT_NONE);
5070         if (result)
5071           return result;
5072         attVal = poolStart(&dtd->pool);
5073         poolFinish(&dtd->pool);
5074         /* ID attributes aren't allowed to have a default */
5075         if (! defineAttribute(
5076                 parser->m_declElementType, parser->m_declAttributeId,
5077                 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5078           return XML_ERROR_NO_MEMORY;
5079         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5080           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5081               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5082                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5083             /* Enumerated or Notation type */
5084             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5085                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5086               return XML_ERROR_NO_MEMORY;
5087             parser->m_declAttributeType = parser->m_tempPool.start;
5088             poolFinish(&parser->m_tempPool);
5089           }
5090           *eventEndPP = s;
5091           parser->m_attlistDeclHandler(
5092               parser->m_handlerArg, parser->m_declElementType->name,
5093               parser->m_declAttributeId->name, parser->m_declAttributeType,
5094               attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5095           poolClear(&parser->m_tempPool);
5096           handleDefault = XML_FALSE;
5097         }
5098       }
5099       break;
5100     case XML_ROLE_ENTITY_VALUE:
5101       if (dtd->keepProcessing) {
5102 #if XML_GE == 1
5103         // This will store the given replacement text in
5104         // parser->m_declEntity->textPtr.
5105         enum XML_Error result
5106             = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5107                                next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5108         if (parser->m_declEntity) {
5109           parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5110           parser->m_declEntity->textLen
5111               = (int)(poolLength(&dtd->entityValuePool));
5112           poolFinish(&dtd->entityValuePool);
5113           if (parser->m_entityDeclHandler) {
5114             *eventEndPP = s;
5115             parser->m_entityDeclHandler(
5116                 parser->m_handlerArg, parser->m_declEntity->name,
5117                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5118                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5119             handleDefault = XML_FALSE;
5120           }
5121         } else
5122           poolDiscard(&dtd->entityValuePool);
5123         if (result != XML_ERROR_NONE)
5124           return result;
5125 #else
5126         // This will store "&amp;entity123;" in parser->m_declEntity->textPtr
5127         // to end up as "&entity123;" in the handler.
5128         if (parser->m_declEntity != NULL) {
5129           const enum XML_Error result
5130               = storeSelfEntityValue(parser, parser->m_declEntity);
5131           if (result != XML_ERROR_NONE)
5132             return result;
5133 
5134           if (parser->m_entityDeclHandler) {
5135             *eventEndPP = s;
5136             parser->m_entityDeclHandler(
5137                 parser->m_handlerArg, parser->m_declEntity->name,
5138                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5139                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5140             handleDefault = XML_FALSE;
5141           }
5142         }
5143 #endif
5144       }
5145       break;
5146     case XML_ROLE_DOCTYPE_SYSTEM_ID:
5147 #ifdef XML_DTD
5148       parser->m_useForeignDTD = XML_FALSE;
5149 #endif /* XML_DTD */
5150       dtd->hasParamEntityRefs = XML_TRUE;
5151       if (parser->m_startDoctypeDeclHandler) {
5152         parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5153                                                  s + enc->minBytesPerChar,
5154                                                  next - enc->minBytesPerChar);
5155         if (parser->m_doctypeSysid == NULL)
5156           return XML_ERROR_NO_MEMORY;
5157         poolFinish(&parser->m_tempPool);
5158         handleDefault = XML_FALSE;
5159       }
5160 #ifdef XML_DTD
5161       else
5162         /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5163            for the case where no parser->m_startDoctypeDeclHandler is set */
5164         parser->m_doctypeSysid = externalSubsetName;
5165 #endif /* XML_DTD */
5166       if (! dtd->standalone
5167 #ifdef XML_DTD
5168           && ! parser->m_paramEntityParsing
5169 #endif /* XML_DTD */
5170           && parser->m_notStandaloneHandler
5171           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5172         return XML_ERROR_NOT_STANDALONE;
5173 #ifndef XML_DTD
5174       break;
5175 #else  /* XML_DTD */
5176       if (! parser->m_declEntity) {
5177         parser->m_declEntity = (ENTITY *)lookup(
5178             parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5179         if (! parser->m_declEntity)
5180           return XML_ERROR_NO_MEMORY;
5181         parser->m_declEntity->publicId = NULL;
5182       }
5183 #endif /* XML_DTD */
5184       /* fall through */
5185     case XML_ROLE_ENTITY_SYSTEM_ID:
5186       if (dtd->keepProcessing && parser->m_declEntity) {
5187         parser->m_declEntity->systemId
5188             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5189                               next - enc->minBytesPerChar);
5190         if (! parser->m_declEntity->systemId)
5191           return XML_ERROR_NO_MEMORY;
5192         parser->m_declEntity->base = parser->m_curBase;
5193         poolFinish(&dtd->pool);
5194         /* Don't suppress the default handler if we fell through from
5195          * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5196          */
5197         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5198           handleDefault = XML_FALSE;
5199       }
5200       break;
5201     case XML_ROLE_ENTITY_COMPLETE:
5202 #if XML_GE == 0
5203       // This will store "&amp;entity123;" in entity->textPtr
5204       // to end up as "&entity123;" in the handler.
5205       if (parser->m_declEntity != NULL) {
5206         const enum XML_Error result
5207             = storeSelfEntityValue(parser, parser->m_declEntity);
5208         if (result != XML_ERROR_NONE)
5209           return result;
5210       }
5211 #endif
5212       if (dtd->keepProcessing && parser->m_declEntity
5213           && parser->m_entityDeclHandler) {
5214         *eventEndPP = s;
5215         parser->m_entityDeclHandler(
5216             parser->m_handlerArg, parser->m_declEntity->name,
5217             parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5218             parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5219         handleDefault = XML_FALSE;
5220       }
5221       break;
5222     case XML_ROLE_ENTITY_NOTATION_NAME:
5223       if (dtd->keepProcessing && parser->m_declEntity) {
5224         parser->m_declEntity->notation
5225             = poolStoreString(&dtd->pool, enc, s, next);
5226         if (! parser->m_declEntity->notation)
5227           return XML_ERROR_NO_MEMORY;
5228         poolFinish(&dtd->pool);
5229         if (parser->m_unparsedEntityDeclHandler) {
5230           *eventEndPP = s;
5231           parser->m_unparsedEntityDeclHandler(
5232               parser->m_handlerArg, parser->m_declEntity->name,
5233               parser->m_declEntity->base, parser->m_declEntity->systemId,
5234               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5235           handleDefault = XML_FALSE;
5236         } else if (parser->m_entityDeclHandler) {
5237           *eventEndPP = s;
5238           parser->m_entityDeclHandler(
5239               parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5240               parser->m_declEntity->base, parser->m_declEntity->systemId,
5241               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5242           handleDefault = XML_FALSE;
5243         }
5244       }
5245       break;
5246     case XML_ROLE_GENERAL_ENTITY_NAME: {
5247       if (XmlPredefinedEntityName(enc, s, next)) {
5248         parser->m_declEntity = NULL;
5249         break;
5250       }
5251       if (dtd->keepProcessing) {
5252         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5253         if (! name)
5254           return XML_ERROR_NO_MEMORY;
5255         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5256                                                 name, sizeof(ENTITY));
5257         if (! parser->m_declEntity)
5258           return XML_ERROR_NO_MEMORY;
5259         if (parser->m_declEntity->name != name) {
5260           poolDiscard(&dtd->pool);
5261           parser->m_declEntity = NULL;
5262         } else {
5263           poolFinish(&dtd->pool);
5264           parser->m_declEntity->publicId = NULL;
5265           parser->m_declEntity->is_param = XML_FALSE;
5266           /* if we have a parent parser or are reading an internal parameter
5267              entity, then the entity declaration is not considered "internal"
5268           */
5269           parser->m_declEntity->is_internal
5270               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5271           if (parser->m_entityDeclHandler)
5272             handleDefault = XML_FALSE;
5273         }
5274       } else {
5275         poolDiscard(&dtd->pool);
5276         parser->m_declEntity = NULL;
5277       }
5278     } break;
5279     case XML_ROLE_PARAM_ENTITY_NAME:
5280 #ifdef XML_DTD
5281       if (dtd->keepProcessing) {
5282         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5283         if (! name)
5284           return XML_ERROR_NO_MEMORY;
5285         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5286                                                 name, sizeof(ENTITY));
5287         if (! parser->m_declEntity)
5288           return XML_ERROR_NO_MEMORY;
5289         if (parser->m_declEntity->name != name) {
5290           poolDiscard(&dtd->pool);
5291           parser->m_declEntity = NULL;
5292         } else {
5293           poolFinish(&dtd->pool);
5294           parser->m_declEntity->publicId = NULL;
5295           parser->m_declEntity->is_param = XML_TRUE;
5296           /* if we have a parent parser or are reading an internal parameter
5297              entity, then the entity declaration is not considered "internal"
5298           */
5299           parser->m_declEntity->is_internal
5300               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5301           if (parser->m_entityDeclHandler)
5302             handleDefault = XML_FALSE;
5303         }
5304       } else {
5305         poolDiscard(&dtd->pool);
5306         parser->m_declEntity = NULL;
5307       }
5308 #else  /* not XML_DTD */
5309       parser->m_declEntity = NULL;
5310 #endif /* XML_DTD */
5311       break;
5312     case XML_ROLE_NOTATION_NAME:
5313       parser->m_declNotationPublicId = NULL;
5314       parser->m_declNotationName = NULL;
5315       if (parser->m_notationDeclHandler) {
5316         parser->m_declNotationName
5317             = poolStoreString(&parser->m_tempPool, enc, s, next);
5318         if (! parser->m_declNotationName)
5319           return XML_ERROR_NO_MEMORY;
5320         poolFinish(&parser->m_tempPool);
5321         handleDefault = XML_FALSE;
5322       }
5323       break;
5324     case XML_ROLE_NOTATION_PUBLIC_ID:
5325       if (! XmlIsPublicId(enc, s, next, eventPP))
5326         return XML_ERROR_PUBLICID;
5327       if (parser
5328               ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5329         XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5330                                         s + enc->minBytesPerChar,
5331                                         next - enc->minBytesPerChar);
5332         if (! tem)
5333           return XML_ERROR_NO_MEMORY;
5334         normalizePublicId(tem);
5335         parser->m_declNotationPublicId = tem;
5336         poolFinish(&parser->m_tempPool);
5337         handleDefault = XML_FALSE;
5338       }
5339       break;
5340     case XML_ROLE_NOTATION_SYSTEM_ID:
5341       if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5342         const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5343                                                    s + enc->minBytesPerChar,
5344                                                    next - enc->minBytesPerChar);
5345         if (! systemId)
5346           return XML_ERROR_NO_MEMORY;
5347         *eventEndPP = s;
5348         parser->m_notationDeclHandler(
5349             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5350             systemId, parser->m_declNotationPublicId);
5351         handleDefault = XML_FALSE;
5352       }
5353       poolClear(&parser->m_tempPool);
5354       break;
5355     case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5356       if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5357         *eventEndPP = s;
5358         parser->m_notationDeclHandler(
5359             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5360             0, parser->m_declNotationPublicId);
5361         handleDefault = XML_FALSE;
5362       }
5363       poolClear(&parser->m_tempPool);
5364       break;
5365     case XML_ROLE_ERROR:
5366       switch (tok) {
5367       case XML_TOK_PARAM_ENTITY_REF:
5368         /* PE references in internal subset are
5369            not allowed within declarations. */
5370         return XML_ERROR_PARAM_ENTITY_REF;
5371       case XML_TOK_XML_DECL:
5372         return XML_ERROR_MISPLACED_XML_PI;
5373       default:
5374         return XML_ERROR_SYNTAX;
5375       }
5376 #ifdef XML_DTD
5377     case XML_ROLE_IGNORE_SECT: {
5378       enum XML_Error result;
5379       if (parser->m_defaultHandler)
5380         reportDefault(parser, enc, s, next);
5381       handleDefault = XML_FALSE;
5382       result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5383       if (result != XML_ERROR_NONE)
5384         return result;
5385       else if (! next) {
5386         parser->m_processor = ignoreSectionProcessor;
5387         return result;
5388       }
5389     } break;
5390 #endif /* XML_DTD */
5391     case XML_ROLE_GROUP_OPEN:
5392       if (parser->m_prologState.level >= parser->m_groupSize) {
5393         if (parser->m_groupSize) {
5394           {
5395             /* Detect and prevent integer overflow */
5396             if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5397               return XML_ERROR_NO_MEMORY;
5398             }
5399 
5400             char *const new_connector = (char *)REALLOC(
5401                 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5402             if (new_connector == NULL) {
5403               parser->m_groupSize /= 2;
5404               return XML_ERROR_NO_MEMORY;
5405             }
5406             parser->m_groupConnector = new_connector;
5407           }
5408 
5409           if (dtd->scaffIndex) {
5410             /* Detect and prevent integer overflow.
5411              * The preprocessor guard addresses the "always false" warning
5412              * from -Wtype-limits on platforms where
5413              * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5414 #if UINT_MAX >= SIZE_MAX
5415             if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5416               return XML_ERROR_NO_MEMORY;
5417             }
5418 #endif
5419 
5420             int *const new_scaff_index = (int *)REALLOC(
5421                 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5422             if (new_scaff_index == NULL)
5423               return XML_ERROR_NO_MEMORY;
5424             dtd->scaffIndex = new_scaff_index;
5425           }
5426         } else {
5427           parser->m_groupConnector
5428               = (char *)MALLOC(parser, parser->m_groupSize = 32);
5429           if (! parser->m_groupConnector) {
5430             parser->m_groupSize = 0;
5431             return XML_ERROR_NO_MEMORY;
5432           }
5433         }
5434       }
5435       parser->m_groupConnector[parser->m_prologState.level] = 0;
5436       if (dtd->in_eldecl) {
5437         int myindex = nextScaffoldPart(parser);
5438         if (myindex < 0)
5439           return XML_ERROR_NO_MEMORY;
5440         assert(dtd->scaffIndex != NULL);
5441         dtd->scaffIndex[dtd->scaffLevel] = myindex;
5442         dtd->scaffLevel++;
5443         dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5444         if (parser->m_elementDeclHandler)
5445           handleDefault = XML_FALSE;
5446       }
5447       break;
5448     case XML_ROLE_GROUP_SEQUENCE:
5449       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5450         return XML_ERROR_SYNTAX;
5451       parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5452       if (dtd->in_eldecl && parser->m_elementDeclHandler)
5453         handleDefault = XML_FALSE;
5454       break;
5455     case XML_ROLE_GROUP_CHOICE:
5456       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5457         return XML_ERROR_SYNTAX;
5458       if (dtd->in_eldecl
5459           && ! parser->m_groupConnector[parser->m_prologState.level]
5460           && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5461               != XML_CTYPE_MIXED)) {
5462         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5463             = XML_CTYPE_CHOICE;
5464         if (parser->m_elementDeclHandler)
5465           handleDefault = XML_FALSE;
5466       }
5467       parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5468       break;
5469     case XML_ROLE_PARAM_ENTITY_REF:
5470 #ifdef XML_DTD
5471     case XML_ROLE_INNER_PARAM_ENTITY_REF:
5472       dtd->hasParamEntityRefs = XML_TRUE;
5473       if (! parser->m_paramEntityParsing)
5474         dtd->keepProcessing = dtd->standalone;
5475       else {
5476         const XML_Char *name;
5477         ENTITY *entity;
5478         name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5479                                next - enc->minBytesPerChar);
5480         if (! name)
5481           return XML_ERROR_NO_MEMORY;
5482         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5483         poolDiscard(&dtd->pool);
5484         /* first, determine if a check for an existing declaration is needed;
5485            if yes, check that the entity exists, and that it is internal,
5486            otherwise call the skipped entity handler
5487         */
5488         if (parser->m_prologState.documentEntity
5489             && (dtd->standalone ? ! parser->m_openInternalEntities
5490                                 : ! dtd->hasParamEntityRefs)) {
5491           if (! entity)
5492             return XML_ERROR_UNDEFINED_ENTITY;
5493           else if (! entity->is_internal) {
5494             /* It's hard to exhaustively search the code to be sure,
5495              * but there doesn't seem to be a way of executing the
5496              * following line.  There are two cases:
5497              *
5498              * If 'standalone' is false, the DTD must have no
5499              * parameter entities or we wouldn't have passed the outer
5500              * 'if' statement.  That means the only entity in the hash
5501              * table is the external subset name "#" which cannot be
5502              * given as a parameter entity name in XML syntax, so the
5503              * lookup must have returned NULL and we don't even reach
5504              * the test for an internal entity.
5505              *
5506              * If 'standalone' is true, it does not seem to be
5507              * possible to create entities taking this code path that
5508              * are not internal entities, so fail the test above.
5509              *
5510              * Because this analysis is very uncertain, the code is
5511              * being left in place and merely removed from the
5512              * coverage test statistics.
5513              */
5514             return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5515           }
5516         } else if (! entity) {
5517           dtd->keepProcessing = dtd->standalone;
5518           /* cannot report skipped entities in declarations */
5519           if ((role == XML_ROLE_PARAM_ENTITY_REF)
5520               && parser->m_skippedEntityHandler) {
5521             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5522             handleDefault = XML_FALSE;
5523           }
5524           break;
5525         }
5526         if (entity->open)
5527           return XML_ERROR_RECURSIVE_ENTITY_REF;
5528         if (entity->textPtr) {
5529           enum XML_Error result;
5530           XML_Bool betweenDecl
5531               = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5532           result = processInternalEntity(parser, entity, betweenDecl);
5533           if (result != XML_ERROR_NONE)
5534             return result;
5535           handleDefault = XML_FALSE;
5536           break;
5537         }
5538         if (parser->m_externalEntityRefHandler) {
5539           dtd->paramEntityRead = XML_FALSE;
5540           entity->open = XML_TRUE;
5541           entityTrackingOnOpen(parser, entity, __LINE__);
5542           if (! parser->m_externalEntityRefHandler(
5543                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5544                   entity->systemId, entity->publicId)) {
5545             entityTrackingOnClose(parser, entity, __LINE__);
5546             entity->open = XML_FALSE;
5547             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5548           }
5549           entityTrackingOnClose(parser, entity, __LINE__);
5550           entity->open = XML_FALSE;
5551           handleDefault = XML_FALSE;
5552           if (! dtd->paramEntityRead) {
5553             dtd->keepProcessing = dtd->standalone;
5554             break;
5555           }
5556         } else {
5557           dtd->keepProcessing = dtd->standalone;
5558           break;
5559         }
5560       }
5561 #endif /* XML_DTD */
5562       if (! dtd->standalone && parser->m_notStandaloneHandler
5563           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5564         return XML_ERROR_NOT_STANDALONE;
5565       break;
5566 
5567       /* Element declaration stuff */
5568 
5569     case XML_ROLE_ELEMENT_NAME:
5570       if (parser->m_elementDeclHandler) {
5571         parser->m_declElementType = getElementType(parser, enc, s, next);
5572         if (! parser->m_declElementType)
5573           return XML_ERROR_NO_MEMORY;
5574         dtd->scaffLevel = 0;
5575         dtd->scaffCount = 0;
5576         dtd->in_eldecl = XML_TRUE;
5577         handleDefault = XML_FALSE;
5578       }
5579       break;
5580 
5581     case XML_ROLE_CONTENT_ANY:
5582     case XML_ROLE_CONTENT_EMPTY:
5583       if (dtd->in_eldecl) {
5584         if (parser->m_elementDeclHandler) {
5585           XML_Content *content
5586               = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5587           if (! content)
5588             return XML_ERROR_NO_MEMORY;
5589           content->quant = XML_CQUANT_NONE;
5590           content->name = NULL;
5591           content->numchildren = 0;
5592           content->children = NULL;
5593           content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5594                                                           : XML_CTYPE_EMPTY);
5595           *eventEndPP = s;
5596           parser->m_elementDeclHandler(
5597               parser->m_handlerArg, parser->m_declElementType->name, content);
5598           handleDefault = XML_FALSE;
5599         }
5600         dtd->in_eldecl = XML_FALSE;
5601       }
5602       break;
5603 
5604     case XML_ROLE_CONTENT_PCDATA:
5605       if (dtd->in_eldecl) {
5606         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5607             = XML_CTYPE_MIXED;
5608         if (parser->m_elementDeclHandler)
5609           handleDefault = XML_FALSE;
5610       }
5611       break;
5612 
5613     case XML_ROLE_CONTENT_ELEMENT:
5614       quant = XML_CQUANT_NONE;
5615       goto elementContent;
5616     case XML_ROLE_CONTENT_ELEMENT_OPT:
5617       quant = XML_CQUANT_OPT;
5618       goto elementContent;
5619     case XML_ROLE_CONTENT_ELEMENT_REP:
5620       quant = XML_CQUANT_REP;
5621       goto elementContent;
5622     case XML_ROLE_CONTENT_ELEMENT_PLUS:
5623       quant = XML_CQUANT_PLUS;
5624     elementContent:
5625       if (dtd->in_eldecl) {
5626         ELEMENT_TYPE *el;
5627         const XML_Char *name;
5628         size_t nameLen;
5629         const char *nxt
5630             = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5631         int myindex = nextScaffoldPart(parser);
5632         if (myindex < 0)
5633           return XML_ERROR_NO_MEMORY;
5634         dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5635         dtd->scaffold[myindex].quant = quant;
5636         el = getElementType(parser, enc, s, nxt);
5637         if (! el)
5638           return XML_ERROR_NO_MEMORY;
5639         name = el->name;
5640         dtd->scaffold[myindex].name = name;
5641         nameLen = 0;
5642         for (; name[nameLen++];)
5643           ;
5644 
5645         /* Detect and prevent integer overflow */
5646         if (nameLen > UINT_MAX - dtd->contentStringLen) {
5647           return XML_ERROR_NO_MEMORY;
5648         }
5649 
5650         dtd->contentStringLen += (unsigned)nameLen;
5651         if (parser->m_elementDeclHandler)
5652           handleDefault = XML_FALSE;
5653       }
5654       break;
5655 
5656     case XML_ROLE_GROUP_CLOSE:
5657       quant = XML_CQUANT_NONE;
5658       goto closeGroup;
5659     case XML_ROLE_GROUP_CLOSE_OPT:
5660       quant = XML_CQUANT_OPT;
5661       goto closeGroup;
5662     case XML_ROLE_GROUP_CLOSE_REP:
5663       quant = XML_CQUANT_REP;
5664       goto closeGroup;
5665     case XML_ROLE_GROUP_CLOSE_PLUS:
5666       quant = XML_CQUANT_PLUS;
5667     closeGroup:
5668       if (dtd->in_eldecl) {
5669         if (parser->m_elementDeclHandler)
5670           handleDefault = XML_FALSE;
5671         dtd->scaffLevel--;
5672         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5673         if (dtd->scaffLevel == 0) {
5674           if (! handleDefault) {
5675             XML_Content *model = build_model(parser);
5676             if (! model)
5677               return XML_ERROR_NO_MEMORY;
5678             *eventEndPP = s;
5679             parser->m_elementDeclHandler(
5680                 parser->m_handlerArg, parser->m_declElementType->name, model);
5681           }
5682           dtd->in_eldecl = XML_FALSE;
5683           dtd->contentStringLen = 0;
5684         }
5685       }
5686       break;
5687       /* End element declaration stuff */
5688 
5689     case XML_ROLE_PI:
5690       if (! reportProcessingInstruction(parser, enc, s, next))
5691         return XML_ERROR_NO_MEMORY;
5692       handleDefault = XML_FALSE;
5693       break;
5694     case XML_ROLE_COMMENT:
5695       if (! reportComment(parser, enc, s, next))
5696         return XML_ERROR_NO_MEMORY;
5697       handleDefault = XML_FALSE;
5698       break;
5699     case XML_ROLE_NONE:
5700       switch (tok) {
5701       case XML_TOK_BOM:
5702         handleDefault = XML_FALSE;
5703         break;
5704       }
5705       break;
5706     case XML_ROLE_DOCTYPE_NONE:
5707       if (parser->m_startDoctypeDeclHandler)
5708         handleDefault = XML_FALSE;
5709       break;
5710     case XML_ROLE_ENTITY_NONE:
5711       if (dtd->keepProcessing && parser->m_entityDeclHandler)
5712         handleDefault = XML_FALSE;
5713       break;
5714     case XML_ROLE_NOTATION_NONE:
5715       if (parser->m_notationDeclHandler)
5716         handleDefault = XML_FALSE;
5717       break;
5718     case XML_ROLE_ATTLIST_NONE:
5719       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5720         handleDefault = XML_FALSE;
5721       break;
5722     case XML_ROLE_ELEMENT_NONE:
5723       if (parser->m_elementDeclHandler)
5724         handleDefault = XML_FALSE;
5725       break;
5726     } /* end of big switch */
5727 
5728     if (handleDefault && parser->m_defaultHandler)
5729       reportDefault(parser, enc, s, next);
5730 
5731     switch (parser->m_parsingStatus.parsing) {
5732     case XML_SUSPENDED:
5733       *nextPtr = next;
5734       return XML_ERROR_NONE;
5735     case XML_FINISHED:
5736       return XML_ERROR_ABORTED;
5737     default:
5738       s = next;
5739       tok = XmlPrologTok(enc, s, end, &next);
5740     }
5741   }
5742   /* not reached */
5743 }
5744 
5745 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5746 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5747                 const char **nextPtr) {
5748   parser->m_processor = epilogProcessor;
5749   parser->m_eventPtr = s;
5750   for (;;) {
5751     const char *next = NULL;
5752     int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5753 #if XML_GE == 1
5754     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5755                                   XML_ACCOUNT_DIRECT)) {
5756       accountingOnAbort(parser);
5757       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5758     }
5759 #endif
5760     parser->m_eventEndPtr = next;
5761     switch (tok) {
5762     /* report partial linebreak - it might be the last token */
5763     case -XML_TOK_PROLOG_S:
5764       if (parser->m_defaultHandler) {
5765         reportDefault(parser, parser->m_encoding, s, next);
5766         if (parser->m_parsingStatus.parsing == XML_FINISHED)
5767           return XML_ERROR_ABORTED;
5768       }
5769       *nextPtr = next;
5770       return XML_ERROR_NONE;
5771     case XML_TOK_NONE:
5772       *nextPtr = s;
5773       return XML_ERROR_NONE;
5774     case XML_TOK_PROLOG_S:
5775       if (parser->m_defaultHandler)
5776         reportDefault(parser, parser->m_encoding, s, next);
5777       break;
5778     case XML_TOK_PI:
5779       if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5780         return XML_ERROR_NO_MEMORY;
5781       break;
5782     case XML_TOK_COMMENT:
5783       if (! reportComment(parser, parser->m_encoding, s, next))
5784         return XML_ERROR_NO_MEMORY;
5785       break;
5786     case XML_TOK_INVALID:
5787       parser->m_eventPtr = next;
5788       return XML_ERROR_INVALID_TOKEN;
5789     case XML_TOK_PARTIAL:
5790       if (! parser->m_parsingStatus.finalBuffer) {
5791         *nextPtr = s;
5792         return XML_ERROR_NONE;
5793       }
5794       return XML_ERROR_UNCLOSED_TOKEN;
5795     case XML_TOK_PARTIAL_CHAR:
5796       if (! parser->m_parsingStatus.finalBuffer) {
5797         *nextPtr = s;
5798         return XML_ERROR_NONE;
5799       }
5800       return XML_ERROR_PARTIAL_CHAR;
5801     default:
5802       return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5803     }
5804     parser->m_eventPtr = s = next;
5805     switch (parser->m_parsingStatus.parsing) {
5806     case XML_SUSPENDED:
5807       *nextPtr = next;
5808       return XML_ERROR_NONE;
5809     case XML_FINISHED:
5810       return XML_ERROR_ABORTED;
5811     default:;
5812     }
5813   }
5814 }
5815 
5816 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5817 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5818   const char *textStart, *textEnd;
5819   const char *next;
5820   enum XML_Error result;
5821   OPEN_INTERNAL_ENTITY *openEntity;
5822 
5823   if (parser->m_freeInternalEntities) {
5824     openEntity = parser->m_freeInternalEntities;
5825     parser->m_freeInternalEntities = openEntity->next;
5826   } else {
5827     openEntity
5828         = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5829     if (! openEntity)
5830       return XML_ERROR_NO_MEMORY;
5831   }
5832   entity->open = XML_TRUE;
5833 #if XML_GE == 1
5834   entityTrackingOnOpen(parser, entity, __LINE__);
5835 #endif
5836   entity->processed = 0;
5837   openEntity->next = parser->m_openInternalEntities;
5838   parser->m_openInternalEntities = openEntity;
5839   openEntity->entity = entity;
5840   openEntity->startTagLevel = parser->m_tagLevel;
5841   openEntity->betweenDecl = betweenDecl;
5842   openEntity->internalEventPtr = NULL;
5843   openEntity->internalEventEndPtr = NULL;
5844   textStart = (const char *)entity->textPtr;
5845   textEnd = (const char *)(entity->textPtr + entity->textLen);
5846   /* Set a safe default value in case 'next' does not get set */
5847   next = textStart;
5848 
5849 #ifdef XML_DTD
5850   if (entity->is_param) {
5851     int tok
5852         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5853     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5854                       tok, next, &next, XML_FALSE, XML_FALSE,
5855                       XML_ACCOUNT_ENTITY_EXPANSION);
5856   } else
5857 #endif /* XML_DTD */
5858     result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5859                        textStart, textEnd, &next, XML_FALSE,
5860                        XML_ACCOUNT_ENTITY_EXPANSION);
5861 
5862   if (result == XML_ERROR_NONE) {
5863     if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5864       entity->processed = (int)(next - textStart);
5865       parser->m_processor = internalEntityProcessor;
5866     } else if (parser->m_openInternalEntities->entity == entity) {
5867 #if XML_GE == 1
5868       entityTrackingOnClose(parser, entity, __LINE__);
5869 #endif /* XML_GE == 1 */
5870       entity->open = XML_FALSE;
5871       parser->m_openInternalEntities = openEntity->next;
5872       /* put openEntity back in list of free instances */
5873       openEntity->next = parser->m_freeInternalEntities;
5874       parser->m_freeInternalEntities = openEntity;
5875     }
5876   }
5877   return result;
5878 }
5879 
5880 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5881 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5882                         const char **nextPtr) {
5883   ENTITY *entity;
5884   const char *textStart, *textEnd;
5885   const char *next;
5886   enum XML_Error result;
5887   OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5888   if (! openEntity)
5889     return XML_ERROR_UNEXPECTED_STATE;
5890 
5891   entity = openEntity->entity;
5892   textStart = ((const char *)entity->textPtr) + entity->processed;
5893   textEnd = (const char *)(entity->textPtr + entity->textLen);
5894   /* Set a safe default value in case 'next' does not get set */
5895   next = textStart;
5896 
5897 #ifdef XML_DTD
5898   if (entity->is_param) {
5899     int tok
5900         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5901     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5902                       tok, next, &next, XML_FALSE, XML_TRUE,
5903                       XML_ACCOUNT_ENTITY_EXPANSION);
5904   } else
5905 #endif /* XML_DTD */
5906     result = doContent(parser, openEntity->startTagLevel,
5907                        parser->m_internalEncoding, textStart, textEnd, &next,
5908                        XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5909 
5910   if (result != XML_ERROR_NONE)
5911     return result;
5912 
5913   if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5914     entity->processed = (int)(next - (const char *)entity->textPtr);
5915     return result;
5916   }
5917 
5918 #if XML_GE == 1
5919   entityTrackingOnClose(parser, entity, __LINE__);
5920 #endif
5921   entity->open = XML_FALSE;
5922   parser->m_openInternalEntities = openEntity->next;
5923   /* put openEntity back in list of free instances */
5924   openEntity->next = parser->m_freeInternalEntities;
5925   parser->m_freeInternalEntities = openEntity;
5926 
5927   // If there are more open entities we want to stop right here and have the
5928   // upcoming call to XML_ResumeParser continue with entity content, or it would
5929   // be ignored altogether.
5930   if (parser->m_openInternalEntities != NULL
5931       && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5932     return XML_ERROR_NONE;
5933   }
5934 
5935 #ifdef XML_DTD
5936   if (entity->is_param) {
5937     int tok;
5938     parser->m_processor = prologProcessor;
5939     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5940     return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5941                     (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5942                     XML_ACCOUNT_DIRECT);
5943   } else
5944 #endif /* XML_DTD */
5945   {
5946     parser->m_processor = contentProcessor;
5947     /* see externalEntityContentProcessor vs contentProcessor */
5948     result = doContent(parser, parser->m_parentParser ? 1 : 0,
5949                        parser->m_encoding, s, end, nextPtr,
5950                        (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5951                        XML_ACCOUNT_DIRECT);
5952     if (result == XML_ERROR_NONE) {
5953       if (! storeRawNames(parser))
5954         return XML_ERROR_NO_MEMORY;
5955     }
5956     return result;
5957   }
5958 }
5959 
5960 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5961 errorProcessor(XML_Parser parser, const char *s, const char *end,
5962                const char **nextPtr) {
5963   UNUSED_P(s);
5964   UNUSED_P(end);
5965   UNUSED_P(nextPtr);
5966   return parser->m_errorCode;
5967 }
5968 
5969 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5970 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5971                     const char *ptr, const char *end, STRING_POOL *pool,
5972                     enum XML_Account account) {
5973   enum XML_Error result
5974       = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5975   if (result)
5976     return result;
5977   if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5978     poolChop(pool);
5979   if (! poolAppendChar(pool, XML_T('\0')))
5980     return XML_ERROR_NO_MEMORY;
5981   return XML_ERROR_NONE;
5982 }
5983 
5984 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5985 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5986                      const char *ptr, const char *end, STRING_POOL *pool,
5987                      enum XML_Account account) {
5988   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5989 #ifndef XML_DTD
5990   UNUSED_P(account);
5991 #endif
5992 
5993   for (;;) {
5994     const char *next
5995         = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5996     int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5997 #if XML_GE == 1
5998     if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5999       accountingOnAbort(parser);
6000       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6001     }
6002 #endif
6003     switch (tok) {
6004     case XML_TOK_NONE:
6005       return XML_ERROR_NONE;
6006     case XML_TOK_INVALID:
6007       if (enc == parser->m_encoding)
6008         parser->m_eventPtr = next;
6009       return XML_ERROR_INVALID_TOKEN;
6010     case XML_TOK_PARTIAL:
6011       if (enc == parser->m_encoding)
6012         parser->m_eventPtr = ptr;
6013       return XML_ERROR_INVALID_TOKEN;
6014     case XML_TOK_CHAR_REF: {
6015       XML_Char buf[XML_ENCODE_MAX];
6016       int i;
6017       int n = XmlCharRefNumber(enc, ptr);
6018       if (n < 0) {
6019         if (enc == parser->m_encoding)
6020           parser->m_eventPtr = ptr;
6021         return XML_ERROR_BAD_CHAR_REF;
6022       }
6023       if (! isCdata && n == 0x20 /* space */
6024           && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6025         break;
6026       n = XmlEncode(n, (ICHAR *)buf);
6027       /* The XmlEncode() functions can never return 0 here.  That
6028        * error return happens if the code point passed in is either
6029        * negative or greater than or equal to 0x110000.  The
6030        * XmlCharRefNumber() functions will all return a number
6031        * strictly less than 0x110000 or a negative value if an error
6032        * occurred.  The negative value is intercepted above, so
6033        * XmlEncode() is never passed a value it might return an
6034        * error for.
6035        */
6036       for (i = 0; i < n; i++) {
6037         if (! poolAppendChar(pool, buf[i]))
6038           return XML_ERROR_NO_MEMORY;
6039       }
6040     } break;
6041     case XML_TOK_DATA_CHARS:
6042       if (! poolAppend(pool, enc, ptr, next))
6043         return XML_ERROR_NO_MEMORY;
6044       break;
6045     case XML_TOK_TRAILING_CR:
6046       next = ptr + enc->minBytesPerChar;
6047       /* fall through */
6048     case XML_TOK_ATTRIBUTE_VALUE_S:
6049     case XML_TOK_DATA_NEWLINE:
6050       if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6051         break;
6052       if (! poolAppendChar(pool, 0x20))
6053         return XML_ERROR_NO_MEMORY;
6054       break;
6055     case XML_TOK_ENTITY_REF: {
6056       const XML_Char *name;
6057       ENTITY *entity;
6058       char checkEntityDecl;
6059       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6060           enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6061       if (ch) {
6062 #if XML_GE == 1
6063         /* NOTE: We are replacing 4-6 characters original input for 1 character
6064          *       so there is no amplification and hence recording without
6065          *       protection. */
6066         accountingDiffTolerated(parser, tok, (char *)&ch,
6067                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6068                                 XML_ACCOUNT_ENTITY_EXPANSION);
6069 #endif /* XML_GE == 1 */
6070         if (! poolAppendChar(pool, ch))
6071           return XML_ERROR_NO_MEMORY;
6072         break;
6073       }
6074       name = poolStoreString(&parser->m_temp2Pool, enc,
6075                              ptr + enc->minBytesPerChar,
6076                              next - enc->minBytesPerChar);
6077       if (! name)
6078         return XML_ERROR_NO_MEMORY;
6079       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6080       poolDiscard(&parser->m_temp2Pool);
6081       /* First, determine if a check for an existing declaration is needed;
6082          if yes, check that the entity exists, and that it is internal.
6083       */
6084       if (pool == &dtd->pool) /* are we called from prolog? */
6085         checkEntityDecl =
6086 #ifdef XML_DTD
6087             parser->m_prologState.documentEntity &&
6088 #endif /* XML_DTD */
6089             (dtd->standalone ? ! parser->m_openInternalEntities
6090                              : ! dtd->hasParamEntityRefs);
6091       else /* if (pool == &parser->m_tempPool): we are called from content */
6092         checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6093       if (checkEntityDecl) {
6094         if (! entity)
6095           return XML_ERROR_UNDEFINED_ENTITY;
6096         else if (! entity->is_internal)
6097           return XML_ERROR_ENTITY_DECLARED_IN_PE;
6098       } else if (! entity) {
6099         /* Cannot report skipped entity here - see comments on
6100            parser->m_skippedEntityHandler.
6101         if (parser->m_skippedEntityHandler)
6102           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6103         */
6104         /* Cannot call the default handler because this would be
6105            out of sync with the call to the startElementHandler.
6106         if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6107           reportDefault(parser, enc, ptr, next);
6108         */
6109         break;
6110       }
6111       if (entity->open) {
6112         if (enc == parser->m_encoding) {
6113           /* It does not appear that this line can be executed.
6114            *
6115            * The "if (entity->open)" check catches recursive entity
6116            * definitions.  In order to be called with an open
6117            * entity, it must have gone through this code before and
6118            * been through the recursive call to
6119            * appendAttributeValue() some lines below.  That call
6120            * sets the local encoding ("enc") to the parser's
6121            * internal encoding (internal_utf8 or internal_utf16),
6122            * which can never be the same as the principle encoding.
6123            * It doesn't appear there is another code path that gets
6124            * here with entity->open being TRUE.
6125            *
6126            * Since it is not certain that this logic is watertight,
6127            * we keep the line and merely exclude it from coverage
6128            * tests.
6129            */
6130           parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6131         }
6132         return XML_ERROR_RECURSIVE_ENTITY_REF;
6133       }
6134       if (entity->notation) {
6135         if (enc == parser->m_encoding)
6136           parser->m_eventPtr = ptr;
6137         return XML_ERROR_BINARY_ENTITY_REF;
6138       }
6139       if (! entity->textPtr) {
6140         if (enc == parser->m_encoding)
6141           parser->m_eventPtr = ptr;
6142         return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6143       } else {
6144         enum XML_Error result;
6145         const XML_Char *textEnd = entity->textPtr + entity->textLen;
6146         entity->open = XML_TRUE;
6147 #if XML_GE == 1
6148         entityTrackingOnOpen(parser, entity, __LINE__);
6149 #endif
6150         result = appendAttributeValue(parser, parser->m_internalEncoding,
6151                                       isCdata, (const char *)entity->textPtr,
6152                                       (const char *)textEnd, pool,
6153                                       XML_ACCOUNT_ENTITY_EXPANSION);
6154 #if XML_GE == 1
6155         entityTrackingOnClose(parser, entity, __LINE__);
6156 #endif
6157         entity->open = XML_FALSE;
6158         if (result)
6159           return result;
6160       }
6161     } break;
6162     default:
6163       /* The only token returned by XmlAttributeValueTok() that does
6164        * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6165        * Getting that would require an entity name to contain an
6166        * incomplete XML character (e.g. \xE2\x82); however previous
6167        * tokenisers will have already recognised and rejected such
6168        * names before XmlAttributeValueTok() gets a look-in.  This
6169        * default case should be retained as a safety net, but the code
6170        * excluded from coverage tests.
6171        *
6172        * LCOV_EXCL_START
6173        */
6174       if (enc == parser->m_encoding)
6175         parser->m_eventPtr = ptr;
6176       return XML_ERROR_UNEXPECTED_STATE;
6177       /* LCOV_EXCL_STOP */
6178     }
6179     ptr = next;
6180   }
6181   /* not reached */
6182 }
6183 
6184 #if XML_GE == 1
6185 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6186 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6187                  const char *entityTextPtr, const char *entityTextEnd,
6188                  enum XML_Account account) {
6189   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6190   STRING_POOL *pool = &(dtd->entityValuePool);
6191   enum XML_Error result = XML_ERROR_NONE;
6192 #  ifdef XML_DTD
6193   int oldInEntityValue = parser->m_prologState.inEntityValue;
6194   parser->m_prologState.inEntityValue = 1;
6195 #  else
6196   UNUSED_P(account);
6197 #  endif /* XML_DTD */
6198   /* never return Null for the value argument in EntityDeclHandler,
6199      since this would indicate an external entity; therefore we
6200      have to make sure that entityValuePool.start is not null */
6201   if (! pool->blocks) {
6202     if (! poolGrow(pool))
6203       return XML_ERROR_NO_MEMORY;
6204   }
6205 
6206   for (;;) {
6207     const char *next
6208         = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6209     int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6210 
6211     if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6212                                   account)) {
6213       accountingOnAbort(parser);
6214       result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6215       goto endEntityValue;
6216     }
6217 
6218     switch (tok) {
6219     case XML_TOK_PARAM_ENTITY_REF:
6220 #  ifdef XML_DTD
6221       if (parser->m_isParamEntity || enc != parser->m_encoding) {
6222         const XML_Char *name;
6223         ENTITY *entity;
6224         name = poolStoreString(&parser->m_tempPool, enc,
6225                                entityTextPtr + enc->minBytesPerChar,
6226                                next - enc->minBytesPerChar);
6227         if (! name) {
6228           result = XML_ERROR_NO_MEMORY;
6229           goto endEntityValue;
6230         }
6231         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6232         poolDiscard(&parser->m_tempPool);
6233         if (! entity) {
6234           /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6235           /* cannot report skipped entity here - see comments on
6236              parser->m_skippedEntityHandler
6237           if (parser->m_skippedEntityHandler)
6238             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6239           */
6240           dtd->keepProcessing = dtd->standalone;
6241           goto endEntityValue;
6242         }
6243         if (entity->open || (entity == parser->m_declEntity)) {
6244           if (enc == parser->m_encoding)
6245             parser->m_eventPtr = entityTextPtr;
6246           result = XML_ERROR_RECURSIVE_ENTITY_REF;
6247           goto endEntityValue;
6248         }
6249         if (entity->systemId) {
6250           if (parser->m_externalEntityRefHandler) {
6251             dtd->paramEntityRead = XML_FALSE;
6252             entity->open = XML_TRUE;
6253             entityTrackingOnOpen(parser, entity, __LINE__);
6254             if (! parser->m_externalEntityRefHandler(
6255                     parser->m_externalEntityRefHandlerArg, 0, entity->base,
6256                     entity->systemId, entity->publicId)) {
6257               entityTrackingOnClose(parser, entity, __LINE__);
6258               entity->open = XML_FALSE;
6259               result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6260               goto endEntityValue;
6261             }
6262             entityTrackingOnClose(parser, entity, __LINE__);
6263             entity->open = XML_FALSE;
6264             if (! dtd->paramEntityRead)
6265               dtd->keepProcessing = dtd->standalone;
6266           } else
6267             dtd->keepProcessing = dtd->standalone;
6268         } else {
6269           entity->open = XML_TRUE;
6270           entityTrackingOnOpen(parser, entity, __LINE__);
6271           result = storeEntityValue(
6272               parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6273               (const char *)(entity->textPtr + entity->textLen),
6274               XML_ACCOUNT_ENTITY_EXPANSION);
6275           entityTrackingOnClose(parser, entity, __LINE__);
6276           entity->open = XML_FALSE;
6277           if (result)
6278             goto endEntityValue;
6279         }
6280         break;
6281       }
6282 #  endif /* XML_DTD */
6283       /* In the internal subset, PE references are not legal
6284          within markup declarations, e.g entity values in this case. */
6285       parser->m_eventPtr = entityTextPtr;
6286       result = XML_ERROR_PARAM_ENTITY_REF;
6287       goto endEntityValue;
6288     case XML_TOK_NONE:
6289       result = XML_ERROR_NONE;
6290       goto endEntityValue;
6291     case XML_TOK_ENTITY_REF:
6292     case XML_TOK_DATA_CHARS:
6293       if (! poolAppend(pool, enc, entityTextPtr, next)) {
6294         result = XML_ERROR_NO_MEMORY;
6295         goto endEntityValue;
6296       }
6297       break;
6298     case XML_TOK_TRAILING_CR:
6299       next = entityTextPtr + enc->minBytesPerChar;
6300       /* fall through */
6301     case XML_TOK_DATA_NEWLINE:
6302       if (pool->end == pool->ptr && ! poolGrow(pool)) {
6303         result = XML_ERROR_NO_MEMORY;
6304         goto endEntityValue;
6305       }
6306       *(pool->ptr)++ = 0xA;
6307       break;
6308     case XML_TOK_CHAR_REF: {
6309       XML_Char buf[XML_ENCODE_MAX];
6310       int i;
6311       int n = XmlCharRefNumber(enc, entityTextPtr);
6312       if (n < 0) {
6313         if (enc == parser->m_encoding)
6314           parser->m_eventPtr = entityTextPtr;
6315         result = XML_ERROR_BAD_CHAR_REF;
6316         goto endEntityValue;
6317       }
6318       n = XmlEncode(n, (ICHAR *)buf);
6319       /* The XmlEncode() functions can never return 0 here.  That
6320        * error return happens if the code point passed in is either
6321        * negative or greater than or equal to 0x110000.  The
6322        * XmlCharRefNumber() functions will all return a number
6323        * strictly less than 0x110000 or a negative value if an error
6324        * occurred.  The negative value is intercepted above, so
6325        * XmlEncode() is never passed a value it might return an
6326        * error for.
6327        */
6328       for (i = 0; i < n; i++) {
6329         if (pool->end == pool->ptr && ! poolGrow(pool)) {
6330           result = XML_ERROR_NO_MEMORY;
6331           goto endEntityValue;
6332         }
6333         *(pool->ptr)++ = buf[i];
6334       }
6335     } break;
6336     case XML_TOK_PARTIAL:
6337       if (enc == parser->m_encoding)
6338         parser->m_eventPtr = entityTextPtr;
6339       result = XML_ERROR_INVALID_TOKEN;
6340       goto endEntityValue;
6341     case XML_TOK_INVALID:
6342       if (enc == parser->m_encoding)
6343         parser->m_eventPtr = next;
6344       result = XML_ERROR_INVALID_TOKEN;
6345       goto endEntityValue;
6346     default:
6347       /* This default case should be unnecessary -- all the tokens
6348        * that XmlEntityValueTok() can return have their own explicit
6349        * cases -- but should be retained for safety.  We do however
6350        * exclude it from the coverage statistics.
6351        *
6352        * LCOV_EXCL_START
6353        */
6354       if (enc == parser->m_encoding)
6355         parser->m_eventPtr = entityTextPtr;
6356       result = XML_ERROR_UNEXPECTED_STATE;
6357       goto endEntityValue;
6358       /* LCOV_EXCL_STOP */
6359     }
6360     entityTextPtr = next;
6361   }
6362 endEntityValue:
6363 #  ifdef XML_DTD
6364   parser->m_prologState.inEntityValue = oldInEntityValue;
6365 #  endif /* XML_DTD */
6366   return result;
6367 }
6368 
6369 #else /* XML_GE == 0 */
6370 
6371 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6372 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6373   // This will store "&amp;entity123;" in entity->textPtr
6374   // to end up as "&entity123;" in the handler.
6375   const char *const entity_start = "&amp;";
6376   const char *const entity_end = ";";
6377 
6378   STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6379   if (! poolAppendString(pool, entity_start)
6380       || ! poolAppendString(pool, entity->name)
6381       || ! poolAppendString(pool, entity_end)) {
6382     poolDiscard(pool);
6383     return XML_ERROR_NO_MEMORY;
6384   }
6385 
6386   entity->textPtr = poolStart(pool);
6387   entity->textLen = (int)(poolLength(pool));
6388   poolFinish(pool);
6389 
6390   return XML_ERROR_NONE;
6391 }
6392 
6393 #endif /* XML_GE == 0 */
6394 
6395 static void FASTCALL
normalizeLines(XML_Char * s)6396 normalizeLines(XML_Char *s) {
6397   XML_Char *p;
6398   for (;; s++) {
6399     if (*s == XML_T('\0'))
6400       return;
6401     if (*s == 0xD)
6402       break;
6403   }
6404   p = s;
6405   do {
6406     if (*s == 0xD) {
6407       *p++ = 0xA;
6408       if (*++s == 0xA)
6409         s++;
6410     } else
6411       *p++ = *s++;
6412   } while (*s);
6413   *p = XML_T('\0');
6414 }
6415 
6416 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6417 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6418                             const char *start, const char *end) {
6419   const XML_Char *target;
6420   XML_Char *data;
6421   const char *tem;
6422   if (! parser->m_processingInstructionHandler) {
6423     if (parser->m_defaultHandler)
6424       reportDefault(parser, enc, start, end);
6425     return 1;
6426   }
6427   start += enc->minBytesPerChar * 2;
6428   tem = start + XmlNameLength(enc, start);
6429   target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6430   if (! target)
6431     return 0;
6432   poolFinish(&parser->m_tempPool);
6433   data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6434                          end - enc->minBytesPerChar * 2);
6435   if (! data)
6436     return 0;
6437   normalizeLines(data);
6438   parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6439   poolClear(&parser->m_tempPool);
6440   return 1;
6441 }
6442 
6443 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6444 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6445               const char *end) {
6446   XML_Char *data;
6447   if (! parser->m_commentHandler) {
6448     if (parser->m_defaultHandler)
6449       reportDefault(parser, enc, start, end);
6450     return 1;
6451   }
6452   data = poolStoreString(&parser->m_tempPool, enc,
6453                          start + enc->minBytesPerChar * 4,
6454                          end - enc->minBytesPerChar * 3);
6455   if (! data)
6456     return 0;
6457   normalizeLines(data);
6458   parser->m_commentHandler(parser->m_handlerArg, data);
6459   poolClear(&parser->m_tempPool);
6460   return 1;
6461 }
6462 
6463 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6464 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6465               const char *end) {
6466   if (MUST_CONVERT(enc, s)) {
6467     enum XML_Convert_Result convert_res;
6468     const char **eventPP;
6469     const char **eventEndPP;
6470     if (enc == parser->m_encoding) {
6471       eventPP = &parser->m_eventPtr;
6472       eventEndPP = &parser->m_eventEndPtr;
6473     } else {
6474       /* To get here, two things must be true; the parser must be
6475        * using a character encoding that is not the same as the
6476        * encoding passed in, and the encoding passed in must need
6477        * conversion to the internal format (UTF-8 unless XML_UNICODE
6478        * is defined).  The only occasions on which the encoding passed
6479        * in is not the same as the parser's encoding are when it is
6480        * the internal encoding (e.g. a previously defined parameter
6481        * entity, already converted to internal format).  This by
6482        * definition doesn't need conversion, so the whole branch never
6483        * gets executed.
6484        *
6485        * For safety's sake we don't delete these lines and merely
6486        * exclude them from coverage statistics.
6487        *
6488        * LCOV_EXCL_START
6489        */
6490       eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6491       eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6492       /* LCOV_EXCL_STOP */
6493     }
6494     do {
6495       ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6496       convert_res
6497           = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6498       *eventEndPP = s;
6499       parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6500                                (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6501       *eventPP = s;
6502     } while ((convert_res != XML_CONVERT_COMPLETED)
6503              && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6504   } else
6505     parser->m_defaultHandler(
6506         parser->m_handlerArg, (const XML_Char *)s,
6507         (int)((const XML_Char *)end - (const XML_Char *)s));
6508 }
6509 
6510 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6511 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6512                 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6513   DEFAULT_ATTRIBUTE *att;
6514   if (value || isId) {
6515     /* The handling of default attributes gets messed up if we have
6516        a default which duplicates a non-default. */
6517     int i;
6518     for (i = 0; i < type->nDefaultAtts; i++)
6519       if (attId == type->defaultAtts[i].id)
6520         return 1;
6521     if (isId && ! type->idAtt && ! attId->xmlns)
6522       type->idAtt = attId;
6523   }
6524   if (type->nDefaultAtts == type->allocDefaultAtts) {
6525     if (type->allocDefaultAtts == 0) {
6526       type->allocDefaultAtts = 8;
6527       type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6528           parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6529       if (! type->defaultAtts) {
6530         type->allocDefaultAtts = 0;
6531         return 0;
6532       }
6533     } else {
6534       DEFAULT_ATTRIBUTE *temp;
6535 
6536       /* Detect and prevent integer overflow */
6537       if (type->allocDefaultAtts > INT_MAX / 2) {
6538         return 0;
6539       }
6540 
6541       int count = type->allocDefaultAtts * 2;
6542 
6543       /* Detect and prevent integer overflow.
6544        * The preprocessor guard addresses the "always false" warning
6545        * from -Wtype-limits on platforms where
6546        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6547 #if UINT_MAX >= SIZE_MAX
6548       if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6549         return 0;
6550       }
6551 #endif
6552 
6553       temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6554                                           (count * sizeof(DEFAULT_ATTRIBUTE)));
6555       if (temp == NULL)
6556         return 0;
6557       type->allocDefaultAtts = count;
6558       type->defaultAtts = temp;
6559     }
6560   }
6561   att = type->defaultAtts + type->nDefaultAtts;
6562   att->id = attId;
6563   att->value = value;
6564   att->isCdata = isCdata;
6565   if (! isCdata)
6566     attId->maybeTokenized = XML_TRUE;
6567   type->nDefaultAtts += 1;
6568   return 1;
6569 }
6570 
6571 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6572 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6573   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6574   const XML_Char *name;
6575   for (name = elementType->name; *name; name++) {
6576     if (*name == XML_T(ASCII_COLON)) {
6577       PREFIX *prefix;
6578       const XML_Char *s;
6579       for (s = elementType->name; s != name; s++) {
6580         if (! poolAppendChar(&dtd->pool, *s))
6581           return 0;
6582       }
6583       if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6584         return 0;
6585       prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6586                                 sizeof(PREFIX));
6587       if (! prefix)
6588         return 0;
6589       if (prefix->name == poolStart(&dtd->pool))
6590         poolFinish(&dtd->pool);
6591       else
6592         poolDiscard(&dtd->pool);
6593       elementType->prefix = prefix;
6594       break;
6595     }
6596   }
6597   return 1;
6598 }
6599 
6600 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6601 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6602                const char *end) {
6603   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6604   ATTRIBUTE_ID *id;
6605   const XML_Char *name;
6606   if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6607     return NULL;
6608   name = poolStoreString(&dtd->pool, enc, start, end);
6609   if (! name)
6610     return NULL;
6611   /* skip quotation mark - its storage will be reused (like in name[-1]) */
6612   ++name;
6613   id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6614                               sizeof(ATTRIBUTE_ID));
6615   if (! id)
6616     return NULL;
6617   if (id->name != name)
6618     poolDiscard(&dtd->pool);
6619   else {
6620     poolFinish(&dtd->pool);
6621     if (! parser->m_ns)
6622       ;
6623     else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6624              && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6625              && name[4] == XML_T(ASCII_s)
6626              && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6627       if (name[5] == XML_T('\0'))
6628         id->prefix = &dtd->defaultPrefix;
6629       else
6630         id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6631                                       sizeof(PREFIX));
6632       id->xmlns = XML_TRUE;
6633     } else {
6634       int i;
6635       for (i = 0; name[i]; i++) {
6636         /* attributes without prefix are *not* in the default namespace */
6637         if (name[i] == XML_T(ASCII_COLON)) {
6638           int j;
6639           for (j = 0; j < i; j++) {
6640             if (! poolAppendChar(&dtd->pool, name[j]))
6641               return NULL;
6642           }
6643           if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6644             return NULL;
6645           id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6646                                         poolStart(&dtd->pool), sizeof(PREFIX));
6647           if (! id->prefix)
6648             return NULL;
6649           if (id->prefix->name == poolStart(&dtd->pool))
6650             poolFinish(&dtd->pool);
6651           else
6652             poolDiscard(&dtd->pool);
6653           break;
6654         }
6655       }
6656     }
6657   }
6658   return id;
6659 }
6660 
6661 #define CONTEXT_SEP XML_T(ASCII_FF)
6662 
6663 static const XML_Char *
getContext(XML_Parser parser)6664 getContext(XML_Parser parser) {
6665   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6666   HASH_TABLE_ITER iter;
6667   XML_Bool needSep = XML_FALSE;
6668 
6669   if (dtd->defaultPrefix.binding) {
6670     int i;
6671     int len;
6672     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6673       return NULL;
6674     len = dtd->defaultPrefix.binding->uriLen;
6675     if (parser->m_namespaceSeparator)
6676       len--;
6677     for (i = 0; i < len; i++) {
6678       if (! poolAppendChar(&parser->m_tempPool,
6679                            dtd->defaultPrefix.binding->uri[i])) {
6680         /* Because of memory caching, I don't believe this line can be
6681          * executed.
6682          *
6683          * This is part of a loop copying the default prefix binding
6684          * URI into the parser's temporary string pool.  Previously,
6685          * that URI was copied into the same string pool, with a
6686          * terminating NUL character, as part of setContext().  When
6687          * the pool was cleared, that leaves a block definitely big
6688          * enough to hold the URI on the free block list of the pool.
6689          * The URI copy in getContext() therefore cannot run out of
6690          * memory.
6691          *
6692          * If the pool is used between the setContext() and
6693          * getContext() calls, the worst it can do is leave a bigger
6694          * block on the front of the free list.  Given that this is
6695          * all somewhat inobvious and program logic can be changed, we
6696          * don't delete the line but we do exclude it from the test
6697          * coverage statistics.
6698          */
6699         return NULL; /* LCOV_EXCL_LINE */
6700       }
6701     }
6702     needSep = XML_TRUE;
6703   }
6704 
6705   hashTableIterInit(&iter, &(dtd->prefixes));
6706   for (;;) {
6707     int i;
6708     int len;
6709     const XML_Char *s;
6710     PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6711     if (! prefix)
6712       break;
6713     if (! prefix->binding) {
6714       /* This test appears to be (justifiable) paranoia.  There does
6715        * not seem to be a way of injecting a prefix without a binding
6716        * that doesn't get errored long before this function is called.
6717        * The test should remain for safety's sake, so we instead
6718        * exclude the following line from the coverage statistics.
6719        */
6720       continue; /* LCOV_EXCL_LINE */
6721     }
6722     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6723       return NULL;
6724     for (s = prefix->name; *s; s++)
6725       if (! poolAppendChar(&parser->m_tempPool, *s))
6726         return NULL;
6727     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6728       return NULL;
6729     len = prefix->binding->uriLen;
6730     if (parser->m_namespaceSeparator)
6731       len--;
6732     for (i = 0; i < len; i++)
6733       if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6734         return NULL;
6735     needSep = XML_TRUE;
6736   }
6737 
6738   hashTableIterInit(&iter, &(dtd->generalEntities));
6739   for (;;) {
6740     const XML_Char *s;
6741     ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6742     if (! e)
6743       break;
6744     if (! e->open)
6745       continue;
6746     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6747       return NULL;
6748     for (s = e->name; *s; s++)
6749       if (! poolAppendChar(&parser->m_tempPool, *s))
6750         return 0;
6751     needSep = XML_TRUE;
6752   }
6753 
6754   if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6755     return NULL;
6756   return parser->m_tempPool.start;
6757 }
6758 
6759 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6760 setContext(XML_Parser parser, const XML_Char *context) {
6761   if (context == NULL) {
6762     return XML_FALSE;
6763   }
6764 
6765   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6766   const XML_Char *s = context;
6767 
6768   while (*context != XML_T('\0')) {
6769     if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6770       ENTITY *e;
6771       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6772         return XML_FALSE;
6773       e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6774                            poolStart(&parser->m_tempPool), 0);
6775       if (e)
6776         e->open = XML_TRUE;
6777       if (*s != XML_T('\0'))
6778         s++;
6779       context = s;
6780       poolDiscard(&parser->m_tempPool);
6781     } else if (*s == XML_T(ASCII_EQUALS)) {
6782       PREFIX *prefix;
6783       if (poolLength(&parser->m_tempPool) == 0)
6784         prefix = &dtd->defaultPrefix;
6785       else {
6786         if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6787           return XML_FALSE;
6788         prefix
6789             = (PREFIX *)lookup(parser, &dtd->prefixes,
6790                                poolStart(&parser->m_tempPool), sizeof(PREFIX));
6791         if (! prefix)
6792           return XML_FALSE;
6793         if (prefix->name == poolStart(&parser->m_tempPool)) {
6794           prefix->name = poolCopyString(&dtd->pool, prefix->name);
6795           if (! prefix->name)
6796             return XML_FALSE;
6797         }
6798         poolDiscard(&parser->m_tempPool);
6799       }
6800       for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6801            context++)
6802         if (! poolAppendChar(&parser->m_tempPool, *context))
6803           return XML_FALSE;
6804       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6805         return XML_FALSE;
6806       if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6807                      &parser->m_inheritedBindings)
6808           != XML_ERROR_NONE)
6809         return XML_FALSE;
6810       poolDiscard(&parser->m_tempPool);
6811       if (*context != XML_T('\0'))
6812         ++context;
6813       s = context;
6814     } else {
6815       if (! poolAppendChar(&parser->m_tempPool, *s))
6816         return XML_FALSE;
6817       s++;
6818     }
6819   }
6820   return XML_TRUE;
6821 }
6822 
6823 static void FASTCALL
normalizePublicId(XML_Char * publicId)6824 normalizePublicId(XML_Char *publicId) {
6825   XML_Char *p = publicId;
6826   XML_Char *s;
6827   for (s = publicId; *s; s++) {
6828     switch (*s) {
6829     case 0x20:
6830     case 0xD:
6831     case 0xA:
6832       if (p != publicId && p[-1] != 0x20)
6833         *p++ = 0x20;
6834       break;
6835     default:
6836       *p++ = *s;
6837     }
6838   }
6839   if (p != publicId && p[-1] == 0x20)
6840     --p;
6841   *p = XML_T('\0');
6842 }
6843 
6844 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6845 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6846   DTD *p = ms->malloc_fcn(sizeof(DTD));
6847   if (p == NULL)
6848     return p;
6849   poolInit(&(p->pool), ms);
6850   poolInit(&(p->entityValuePool), ms);
6851   hashTableInit(&(p->generalEntities), ms);
6852   hashTableInit(&(p->elementTypes), ms);
6853   hashTableInit(&(p->attributeIds), ms);
6854   hashTableInit(&(p->prefixes), ms);
6855 #ifdef XML_DTD
6856   p->paramEntityRead = XML_FALSE;
6857   hashTableInit(&(p->paramEntities), ms);
6858 #endif /* XML_DTD */
6859   p->defaultPrefix.name = NULL;
6860   p->defaultPrefix.binding = NULL;
6861 
6862   p->in_eldecl = XML_FALSE;
6863   p->scaffIndex = NULL;
6864   p->scaffold = NULL;
6865   p->scaffLevel = 0;
6866   p->scaffSize = 0;
6867   p->scaffCount = 0;
6868   p->contentStringLen = 0;
6869 
6870   p->keepProcessing = XML_TRUE;
6871   p->hasParamEntityRefs = XML_FALSE;
6872   p->standalone = XML_FALSE;
6873   return p;
6874 }
6875 
6876 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6877 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6878   HASH_TABLE_ITER iter;
6879   hashTableIterInit(&iter, &(p->elementTypes));
6880   for (;;) {
6881     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6882     if (! e)
6883       break;
6884     if (e->allocDefaultAtts != 0)
6885       ms->free_fcn(e->defaultAtts);
6886   }
6887   hashTableClear(&(p->generalEntities));
6888 #ifdef XML_DTD
6889   p->paramEntityRead = XML_FALSE;
6890   hashTableClear(&(p->paramEntities));
6891 #endif /* XML_DTD */
6892   hashTableClear(&(p->elementTypes));
6893   hashTableClear(&(p->attributeIds));
6894   hashTableClear(&(p->prefixes));
6895   poolClear(&(p->pool));
6896   poolClear(&(p->entityValuePool));
6897   p->defaultPrefix.name = NULL;
6898   p->defaultPrefix.binding = NULL;
6899 
6900   p->in_eldecl = XML_FALSE;
6901 
6902   ms->free_fcn(p->scaffIndex);
6903   p->scaffIndex = NULL;
6904   ms->free_fcn(p->scaffold);
6905   p->scaffold = NULL;
6906 
6907   p->scaffLevel = 0;
6908   p->scaffSize = 0;
6909   p->scaffCount = 0;
6910   p->contentStringLen = 0;
6911 
6912   p->keepProcessing = XML_TRUE;
6913   p->hasParamEntityRefs = XML_FALSE;
6914   p->standalone = XML_FALSE;
6915 }
6916 
6917 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6918 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6919   HASH_TABLE_ITER iter;
6920   hashTableIterInit(&iter, &(p->elementTypes));
6921   for (;;) {
6922     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6923     if (! e)
6924       break;
6925     if (e->allocDefaultAtts != 0)
6926       ms->free_fcn(e->defaultAtts);
6927   }
6928   hashTableDestroy(&(p->generalEntities));
6929 #ifdef XML_DTD
6930   hashTableDestroy(&(p->paramEntities));
6931 #endif /* XML_DTD */
6932   hashTableDestroy(&(p->elementTypes));
6933   hashTableDestroy(&(p->attributeIds));
6934   hashTableDestroy(&(p->prefixes));
6935   poolDestroy(&(p->pool));
6936   poolDestroy(&(p->entityValuePool));
6937   if (isDocEntity) {
6938     ms->free_fcn(p->scaffIndex);
6939     ms->free_fcn(p->scaffold);
6940   }
6941   ms->free_fcn(p);
6942 }
6943 
6944 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6945    The new DTD has already been initialized.
6946 */
6947 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6948 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6949         const XML_Memory_Handling_Suite *ms) {
6950   HASH_TABLE_ITER iter;
6951 
6952   /* Copy the prefix table. */
6953 
6954   hashTableIterInit(&iter, &(oldDtd->prefixes));
6955   for (;;) {
6956     const XML_Char *name;
6957     const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6958     if (! oldP)
6959       break;
6960     name = poolCopyString(&(newDtd->pool), oldP->name);
6961     if (! name)
6962       return 0;
6963     if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6964       return 0;
6965   }
6966 
6967   hashTableIterInit(&iter, &(oldDtd->attributeIds));
6968 
6969   /* Copy the attribute id table. */
6970 
6971   for (;;) {
6972     ATTRIBUTE_ID *newA;
6973     const XML_Char *name;
6974     const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6975 
6976     if (! oldA)
6977       break;
6978     /* Remember to allocate the scratch byte before the name. */
6979     if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6980       return 0;
6981     name = poolCopyString(&(newDtd->pool), oldA->name);
6982     if (! name)
6983       return 0;
6984     ++name;
6985     newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6986                                   sizeof(ATTRIBUTE_ID));
6987     if (! newA)
6988       return 0;
6989     newA->maybeTokenized = oldA->maybeTokenized;
6990     if (oldA->prefix) {
6991       newA->xmlns = oldA->xmlns;
6992       if (oldA->prefix == &oldDtd->defaultPrefix)
6993         newA->prefix = &newDtd->defaultPrefix;
6994       else
6995         newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6996                                         oldA->prefix->name, 0);
6997     }
6998   }
6999 
7000   /* Copy the element type table. */
7001 
7002   hashTableIterInit(&iter, &(oldDtd->elementTypes));
7003 
7004   for (;;) {
7005     int i;
7006     ELEMENT_TYPE *newE;
7007     const XML_Char *name;
7008     const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7009     if (! oldE)
7010       break;
7011     name = poolCopyString(&(newDtd->pool), oldE->name);
7012     if (! name)
7013       return 0;
7014     newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7015                                   sizeof(ELEMENT_TYPE));
7016     if (! newE)
7017       return 0;
7018     if (oldE->nDefaultAtts) {
7019       newE->defaultAtts
7020           = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7021       if (! newE->defaultAtts) {
7022         return 0;
7023       }
7024     }
7025     if (oldE->idAtt)
7026       newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7027                                            oldE->idAtt->name, 0);
7028     newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7029     if (oldE->prefix)
7030       newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7031                                       oldE->prefix->name, 0);
7032     for (i = 0; i < newE->nDefaultAtts; i++) {
7033       newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7034           oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7035       newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7036       if (oldE->defaultAtts[i].value) {
7037         newE->defaultAtts[i].value
7038             = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7039         if (! newE->defaultAtts[i].value)
7040           return 0;
7041       } else
7042         newE->defaultAtts[i].value = NULL;
7043     }
7044   }
7045 
7046   /* Copy the entity tables. */
7047   if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7048                         &(oldDtd->generalEntities)))
7049     return 0;
7050 
7051 #ifdef XML_DTD
7052   if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7053                         &(oldDtd->paramEntities)))
7054     return 0;
7055   newDtd->paramEntityRead = oldDtd->paramEntityRead;
7056 #endif /* XML_DTD */
7057 
7058   newDtd->keepProcessing = oldDtd->keepProcessing;
7059   newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7060   newDtd->standalone = oldDtd->standalone;
7061 
7062   /* Don't want deep copying for scaffolding */
7063   newDtd->in_eldecl = oldDtd->in_eldecl;
7064   newDtd->scaffold = oldDtd->scaffold;
7065   newDtd->contentStringLen = oldDtd->contentStringLen;
7066   newDtd->scaffSize = oldDtd->scaffSize;
7067   newDtd->scaffLevel = oldDtd->scaffLevel;
7068   newDtd->scaffIndex = oldDtd->scaffIndex;
7069 
7070   return 1;
7071 } /* End dtdCopy */
7072 
7073 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7074 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7075                 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7076   HASH_TABLE_ITER iter;
7077   const XML_Char *cachedOldBase = NULL;
7078   const XML_Char *cachedNewBase = NULL;
7079 
7080   hashTableIterInit(&iter, oldTable);
7081 
7082   for (;;) {
7083     ENTITY *newE;
7084     const XML_Char *name;
7085     const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7086     if (! oldE)
7087       break;
7088     name = poolCopyString(newPool, oldE->name);
7089     if (! name)
7090       return 0;
7091     newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7092     if (! newE)
7093       return 0;
7094     if (oldE->systemId) {
7095       const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7096       if (! tem)
7097         return 0;
7098       newE->systemId = tem;
7099       if (oldE->base) {
7100         if (oldE->base == cachedOldBase)
7101           newE->base = cachedNewBase;
7102         else {
7103           cachedOldBase = oldE->base;
7104           tem = poolCopyString(newPool, cachedOldBase);
7105           if (! tem)
7106             return 0;
7107           cachedNewBase = newE->base = tem;
7108         }
7109       }
7110       if (oldE->publicId) {
7111         tem = poolCopyString(newPool, oldE->publicId);
7112         if (! tem)
7113           return 0;
7114         newE->publicId = tem;
7115       }
7116     } else {
7117       const XML_Char *tem
7118           = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7119       if (! tem)
7120         return 0;
7121       newE->textPtr = tem;
7122       newE->textLen = oldE->textLen;
7123     }
7124     if (oldE->notation) {
7125       const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7126       if (! tem)
7127         return 0;
7128       newE->notation = tem;
7129     }
7130     newE->is_param = oldE->is_param;
7131     newE->is_internal = oldE->is_internal;
7132   }
7133   return 1;
7134 }
7135 
7136 #define INIT_POWER 6
7137 
7138 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7139 keyeq(KEY s1, KEY s2) {
7140   for (; *s1 == *s2; s1++, s2++)
7141     if (*s1 == 0)
7142       return XML_TRUE;
7143   return XML_FALSE;
7144 }
7145 
7146 static size_t
keylen(KEY s)7147 keylen(KEY s) {
7148   size_t len = 0;
7149   for (; *s; s++, len++)
7150     ;
7151   return len;
7152 }
7153 
7154 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7155 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7156   key->k[0] = 0;
7157   key->k[1] = get_hash_secret_salt(parser);
7158 }
7159 
7160 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7161 hash(XML_Parser parser, KEY s) {
7162   struct siphash state;
7163   struct sipkey key;
7164   (void)sip24_valid;
7165   copy_salt_to_sipkey(parser, &key);
7166   sip24_init(&state, &key);
7167   sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7168   return (unsigned long)sip24_final(&state);
7169 }
7170 
7171 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7172 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7173   size_t i;
7174   if (table->size == 0) {
7175     size_t tsize;
7176     if (! createSize)
7177       return NULL;
7178     table->power = INIT_POWER;
7179     /* table->size is a power of 2 */
7180     table->size = (size_t)1 << INIT_POWER;
7181     tsize = table->size * sizeof(NAMED *);
7182     table->v = table->mem->malloc_fcn(tsize);
7183     if (! table->v) {
7184       table->size = 0;
7185       return NULL;
7186     }
7187     memset(table->v, 0, tsize);
7188     i = hash(parser, name) & ((unsigned long)table->size - 1);
7189   } else {
7190     unsigned long h = hash(parser, name);
7191     unsigned long mask = (unsigned long)table->size - 1;
7192     unsigned char step = 0;
7193     i = h & mask;
7194     while (table->v[i]) {
7195       if (keyeq(name, table->v[i]->name))
7196         return table->v[i];
7197       if (! step)
7198         step = PROBE_STEP(h, mask, table->power);
7199       i < step ? (i += table->size - step) : (i -= step);
7200     }
7201     if (! createSize)
7202       return NULL;
7203 
7204     /* check for overflow (table is half full) */
7205     if (table->used >> (table->power - 1)) {
7206       unsigned char newPower = table->power + 1;
7207 
7208       /* Detect and prevent invalid shift */
7209       if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7210         return NULL;
7211       }
7212 
7213       size_t newSize = (size_t)1 << newPower;
7214       unsigned long newMask = (unsigned long)newSize - 1;
7215 
7216       /* Detect and prevent integer overflow */
7217       if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7218         return NULL;
7219       }
7220 
7221       size_t tsize = newSize * sizeof(NAMED *);
7222       NAMED **newV = table->mem->malloc_fcn(tsize);
7223       if (! newV)
7224         return NULL;
7225       memset(newV, 0, tsize);
7226       for (i = 0; i < table->size; i++)
7227         if (table->v[i]) {
7228           unsigned long newHash = hash(parser, table->v[i]->name);
7229           size_t j = newHash & newMask;
7230           step = 0;
7231           while (newV[j]) {
7232             if (! step)
7233               step = PROBE_STEP(newHash, newMask, newPower);
7234             j < step ? (j += newSize - step) : (j -= step);
7235           }
7236           newV[j] = table->v[i];
7237         }
7238       table->mem->free_fcn(table->v);
7239       table->v = newV;
7240       table->power = newPower;
7241       table->size = newSize;
7242       i = h & newMask;
7243       step = 0;
7244       while (table->v[i]) {
7245         if (! step)
7246           step = PROBE_STEP(h, newMask, newPower);
7247         i < step ? (i += newSize - step) : (i -= step);
7248       }
7249     }
7250   }
7251   table->v[i] = table->mem->malloc_fcn(createSize);
7252   if (! table->v[i])
7253     return NULL;
7254   memset(table->v[i], 0, createSize);
7255   table->v[i]->name = name;
7256   (table->used)++;
7257   return table->v[i];
7258 }
7259 
7260 static void FASTCALL
hashTableClear(HASH_TABLE * table)7261 hashTableClear(HASH_TABLE *table) {
7262   size_t i;
7263   for (i = 0; i < table->size; i++) {
7264     table->mem->free_fcn(table->v[i]);
7265     table->v[i] = NULL;
7266   }
7267   table->used = 0;
7268 }
7269 
7270 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7271 hashTableDestroy(HASH_TABLE *table) {
7272   size_t i;
7273   for (i = 0; i < table->size; i++)
7274     table->mem->free_fcn(table->v[i]);
7275   table->mem->free_fcn(table->v);
7276 }
7277 
7278 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7279 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7280   p->power = 0;
7281   p->size = 0;
7282   p->used = 0;
7283   p->v = NULL;
7284   p->mem = ms;
7285 }
7286 
7287 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7288 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7289   iter->p = table->v;
7290   iter->end = iter->p ? iter->p + table->size : NULL;
7291 }
7292 
7293 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7294 hashTableIterNext(HASH_TABLE_ITER *iter) {
7295   while (iter->p != iter->end) {
7296     NAMED *tem = *(iter->p)++;
7297     if (tem)
7298       return tem;
7299   }
7300   return NULL;
7301 }
7302 
7303 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7304 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7305   pool->blocks = NULL;
7306   pool->freeBlocks = NULL;
7307   pool->start = NULL;
7308   pool->ptr = NULL;
7309   pool->end = NULL;
7310   pool->mem = ms;
7311 }
7312 
7313 static void FASTCALL
poolClear(STRING_POOL * pool)7314 poolClear(STRING_POOL *pool) {
7315   if (! pool->freeBlocks)
7316     pool->freeBlocks = pool->blocks;
7317   else {
7318     BLOCK *p = pool->blocks;
7319     while (p) {
7320       BLOCK *tem = p->next;
7321       p->next = pool->freeBlocks;
7322       pool->freeBlocks = p;
7323       p = tem;
7324     }
7325   }
7326   pool->blocks = NULL;
7327   pool->start = NULL;
7328   pool->ptr = NULL;
7329   pool->end = NULL;
7330 }
7331 
7332 static void FASTCALL
poolDestroy(STRING_POOL * pool)7333 poolDestroy(STRING_POOL *pool) {
7334   BLOCK *p = pool->blocks;
7335   while (p) {
7336     BLOCK *tem = p->next;
7337     pool->mem->free_fcn(p);
7338     p = tem;
7339   }
7340   p = pool->freeBlocks;
7341   while (p) {
7342     BLOCK *tem = p->next;
7343     pool->mem->free_fcn(p);
7344     p = tem;
7345   }
7346 }
7347 
7348 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7349 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7350            const char *end) {
7351   if (! pool->ptr && ! poolGrow(pool))
7352     return NULL;
7353   for (;;) {
7354     const enum XML_Convert_Result convert_res = XmlConvert(
7355         enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7356     if ((convert_res == XML_CONVERT_COMPLETED)
7357         || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7358       break;
7359     if (! poolGrow(pool))
7360       return NULL;
7361   }
7362   return pool->start;
7363 }
7364 
7365 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7366 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7367   do {
7368     if (! poolAppendChar(pool, *s))
7369       return NULL;
7370   } while (*s++);
7371   s = pool->start;
7372   poolFinish(pool);
7373   return s;
7374 }
7375 
7376 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7377 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7378   if (! pool->ptr && ! poolGrow(pool)) {
7379     /* The following line is unreachable given the current usage of
7380      * poolCopyStringN().  Currently it is called from exactly one
7381      * place to copy the text of a simple general entity.  By that
7382      * point, the name of the entity is already stored in the pool, so
7383      * pool->ptr cannot be NULL.
7384      *
7385      * If poolCopyStringN() is used elsewhere as it well might be,
7386      * this line may well become executable again.  Regardless, this
7387      * sort of check shouldn't be removed lightly, so we just exclude
7388      * it from the coverage statistics.
7389      */
7390     return NULL; /* LCOV_EXCL_LINE */
7391   }
7392   for (; n > 0; --n, s++) {
7393     if (! poolAppendChar(pool, *s))
7394       return NULL;
7395   }
7396   s = pool->start;
7397   poolFinish(pool);
7398   return s;
7399 }
7400 
7401 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7402 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7403   while (*s) {
7404     if (! poolAppendChar(pool, *s))
7405       return NULL;
7406     s++;
7407   }
7408   return pool->start;
7409 }
7410 
7411 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7412 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7413                 const char *end) {
7414   if (! poolAppend(pool, enc, ptr, end))
7415     return NULL;
7416   if (pool->ptr == pool->end && ! poolGrow(pool))
7417     return NULL;
7418   *(pool->ptr)++ = 0;
7419   return pool->start;
7420 }
7421 
7422 static size_t
poolBytesToAllocateFor(int blockSize)7423 poolBytesToAllocateFor(int blockSize) {
7424   /* Unprotected math would be:
7425   ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7426   **
7427   ** Detect overflow, avoiding _signed_ overflow undefined behavior
7428   ** For a + b * c we check b * c in isolation first, so that addition of a
7429   ** on top has no chance of making us accept a small non-negative number
7430   */
7431   const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7432 
7433   if (blockSize <= 0)
7434     return 0;
7435 
7436   if (blockSize > (int)(INT_MAX / stretch))
7437     return 0;
7438 
7439   {
7440     const int stretchedBlockSize = blockSize * (int)stretch;
7441     const int bytesToAllocate
7442         = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7443     if (bytesToAllocate < 0)
7444       return 0;
7445 
7446     return (size_t)bytesToAllocate;
7447   }
7448 }
7449 
7450 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7451 poolGrow(STRING_POOL *pool) {
7452   if (pool->freeBlocks) {
7453     if (pool->start == 0) {
7454       pool->blocks = pool->freeBlocks;
7455       pool->freeBlocks = pool->freeBlocks->next;
7456       pool->blocks->next = NULL;
7457       pool->start = pool->blocks->s;
7458       pool->end = pool->start + pool->blocks->size;
7459       pool->ptr = pool->start;
7460       return XML_TRUE;
7461     }
7462     if (pool->end - pool->start < pool->freeBlocks->size) {
7463       BLOCK *tem = pool->freeBlocks->next;
7464       pool->freeBlocks->next = pool->blocks;
7465       pool->blocks = pool->freeBlocks;
7466       pool->freeBlocks = tem;
7467       memcpy(pool->blocks->s, pool->start,
7468              (pool->end - pool->start) * sizeof(XML_Char));
7469       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7470       pool->start = pool->blocks->s;
7471       pool->end = pool->start + pool->blocks->size;
7472       return XML_TRUE;
7473     }
7474   }
7475   if (pool->blocks && pool->start == pool->blocks->s) {
7476     BLOCK *temp;
7477     int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7478     size_t bytesToAllocate;
7479 
7480     /* NOTE: Needs to be calculated prior to calling `realloc`
7481              to avoid dangling pointers: */
7482     const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7483 
7484     if (blockSize < 0) {
7485       /* This condition traps a situation where either more than
7486        * INT_MAX/2 bytes have already been allocated.  This isn't
7487        * readily testable, since it is unlikely that an average
7488        * machine will have that much memory, so we exclude it from the
7489        * coverage statistics.
7490        */
7491       return XML_FALSE; /* LCOV_EXCL_LINE */
7492     }
7493 
7494     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7495     if (bytesToAllocate == 0)
7496       return XML_FALSE;
7497 
7498     temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7499                                            (unsigned)bytesToAllocate);
7500     if (temp == NULL)
7501       return XML_FALSE;
7502     pool->blocks = temp;
7503     pool->blocks->size = blockSize;
7504     pool->ptr = pool->blocks->s + offsetInsideBlock;
7505     pool->start = pool->blocks->s;
7506     pool->end = pool->start + blockSize;
7507   } else {
7508     BLOCK *tem;
7509     int blockSize = (int)(pool->end - pool->start);
7510     size_t bytesToAllocate;
7511 
7512     if (blockSize < 0) {
7513       /* This condition traps a situation where either more than
7514        * INT_MAX bytes have already been allocated (which is prevented
7515        * by various pieces of program logic, not least this one, never
7516        * mind the unlikelihood of actually having that much memory) or
7517        * the pool control fields have been corrupted (which could
7518        * conceivably happen in an extremely buggy user handler
7519        * function).  Either way it isn't readily testable, so we
7520        * exclude it from the coverage statistics.
7521        */
7522       return XML_FALSE; /* LCOV_EXCL_LINE */
7523     }
7524 
7525     if (blockSize < INIT_BLOCK_SIZE)
7526       blockSize = INIT_BLOCK_SIZE;
7527     else {
7528       /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7529       if ((int)((unsigned)blockSize * 2U) < 0) {
7530         return XML_FALSE;
7531       }
7532       blockSize *= 2;
7533     }
7534 
7535     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7536     if (bytesToAllocate == 0)
7537       return XML_FALSE;
7538 
7539     tem = pool->mem->malloc_fcn(bytesToAllocate);
7540     if (! tem)
7541       return XML_FALSE;
7542     tem->size = blockSize;
7543     tem->next = pool->blocks;
7544     pool->blocks = tem;
7545     if (pool->ptr != pool->start)
7546       memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7547     pool->ptr = tem->s + (pool->ptr - pool->start);
7548     pool->start = tem->s;
7549     pool->end = tem->s + blockSize;
7550   }
7551   return XML_TRUE;
7552 }
7553 
7554 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7555 nextScaffoldPart(XML_Parser parser) {
7556   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7557   CONTENT_SCAFFOLD *me;
7558   int next;
7559 
7560   if (! dtd->scaffIndex) {
7561     dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7562     if (! dtd->scaffIndex)
7563       return -1;
7564     dtd->scaffIndex[0] = 0;
7565   }
7566 
7567   if (dtd->scaffCount >= dtd->scaffSize) {
7568     CONTENT_SCAFFOLD *temp;
7569     if (dtd->scaffold) {
7570       /* Detect and prevent integer overflow */
7571       if (dtd->scaffSize > UINT_MAX / 2u) {
7572         return -1;
7573       }
7574       /* Detect and prevent integer overflow.
7575        * The preprocessor guard addresses the "always false" warning
7576        * from -Wtype-limits on platforms where
7577        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7578 #if UINT_MAX >= SIZE_MAX
7579       if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7580         return -1;
7581       }
7582 #endif
7583 
7584       temp = (CONTENT_SCAFFOLD *)REALLOC(
7585           parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7586       if (temp == NULL)
7587         return -1;
7588       dtd->scaffSize *= 2;
7589     } else {
7590       temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7591                                                     * sizeof(CONTENT_SCAFFOLD));
7592       if (temp == NULL)
7593         return -1;
7594       dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7595     }
7596     dtd->scaffold = temp;
7597   }
7598   next = dtd->scaffCount++;
7599   me = &dtd->scaffold[next];
7600   if (dtd->scaffLevel) {
7601     CONTENT_SCAFFOLD *parent
7602         = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7603     if (parent->lastchild) {
7604       dtd->scaffold[parent->lastchild].nextsib = next;
7605     }
7606     if (! parent->childcnt)
7607       parent->firstchild = next;
7608     parent->lastchild = next;
7609     parent->childcnt++;
7610   }
7611   me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7612   return next;
7613 }
7614 
7615 static XML_Content *
build_model(XML_Parser parser)7616 build_model(XML_Parser parser) {
7617   /* Function build_model transforms the existing parser->m_dtd->scaffold
7618    * array of CONTENT_SCAFFOLD tree nodes into a new array of
7619    * XML_Content tree nodes followed by a gapless list of zero-terminated
7620    * strings. */
7621   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7622   XML_Content *ret;
7623   XML_Char *str; /* the current string writing location */
7624 
7625   /* Detect and prevent integer overflow.
7626    * The preprocessor guard addresses the "always false" warning
7627    * from -Wtype-limits on platforms where
7628    * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7629 #if UINT_MAX >= SIZE_MAX
7630   if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7631     return NULL;
7632   }
7633   if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7634     return NULL;
7635   }
7636 #endif
7637   if (dtd->scaffCount * sizeof(XML_Content)
7638       > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7639     return NULL;
7640   }
7641 
7642   const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7643                             + (dtd->contentStringLen * sizeof(XML_Char)));
7644 
7645   ret = (XML_Content *)MALLOC(parser, allocsize);
7646   if (! ret)
7647     return NULL;
7648 
7649   /* What follows is an iterative implementation (of what was previously done
7650    * recursively in a dedicated function called "build_node".  The old recursive
7651    * build_node could be forced into stack exhaustion from input as small as a
7652    * few megabyte, and so that was a security issue.  Hence, a function call
7653    * stack is avoided now by resolving recursion.)
7654    *
7655    * The iterative approach works as follows:
7656    *
7657    * - We have two writing pointers, both walking up the result array; one does
7658    *   the work, the other creates "jobs" for its colleague to do, and leads
7659    *   the way:
7660    *
7661    *   - The faster one, pointer jobDest, always leads and writes "what job
7662    *     to do" by the other, once they reach that place in the
7663    *     array: leader "jobDest" stores the source node array index (relative
7664    *     to array dtd->scaffold) in field "numchildren".
7665    *
7666    *   - The slower one, pointer dest, looks at the value stored in the
7667    *     "numchildren" field (which actually holds a source node array index
7668    *     at that time) and puts the real data from dtd->scaffold in.
7669    *
7670    * - Before the loop starts, jobDest writes source array index 0
7671    *   (where the root node is located) so that dest will have something to do
7672    *   when it starts operation.
7673    *
7674    * - Whenever nodes with children are encountered, jobDest appends
7675    *   them as new jobs, in order.  As a result, tree node siblings are
7676    *   adjacent in the resulting array, for example:
7677    *
7678    *     [0] root, has two children
7679    *       [1] first child of 0, has three children
7680    *         [3] first child of 1, does not have children
7681    *         [4] second child of 1, does not have children
7682    *         [5] third child of 1, does not have children
7683    *       [2] second child of 0, does not have children
7684    *
7685    *   Or (the same data) presented in flat array view:
7686    *
7687    *     [0] root, has two children
7688    *
7689    *     [1] first child of 0, has three children
7690    *     [2] second child of 0, does not have children
7691    *
7692    *     [3] first child of 1, does not have children
7693    *     [4] second child of 1, does not have children
7694    *     [5] third child of 1, does not have children
7695    *
7696    * - The algorithm repeats until all target array indices have been processed.
7697    */
7698   XML_Content *dest = ret; /* tree node writing location, moves upwards */
7699   XML_Content *const destLimit = &ret[dtd->scaffCount];
7700   XML_Content *jobDest = ret; /* next free writing location in target array */
7701   str = (XML_Char *)&ret[dtd->scaffCount];
7702 
7703   /* Add the starting job, the root node (index 0) of the source tree  */
7704   (jobDest++)->numchildren = 0;
7705 
7706   for (; dest < destLimit; dest++) {
7707     /* Retrieve source tree array index from job storage */
7708     const int src_node = (int)dest->numchildren;
7709 
7710     /* Convert item */
7711     dest->type = dtd->scaffold[src_node].type;
7712     dest->quant = dtd->scaffold[src_node].quant;
7713     if (dest->type == XML_CTYPE_NAME) {
7714       const XML_Char *src;
7715       dest->name = str;
7716       src = dtd->scaffold[src_node].name;
7717       for (;;) {
7718         *str++ = *src;
7719         if (! *src)
7720           break;
7721         src++;
7722       }
7723       dest->numchildren = 0;
7724       dest->children = NULL;
7725     } else {
7726       unsigned int i;
7727       int cn;
7728       dest->name = NULL;
7729       dest->numchildren = dtd->scaffold[src_node].childcnt;
7730       dest->children = jobDest;
7731 
7732       /* Append scaffold indices of children to array */
7733       for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7734            i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7735         (jobDest++)->numchildren = (unsigned int)cn;
7736     }
7737   }
7738 
7739   return ret;
7740 }
7741 
7742 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7743 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7744                const char *end) {
7745   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7746   const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7747   ELEMENT_TYPE *ret;
7748 
7749   if (! name)
7750     return NULL;
7751   ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7752                                sizeof(ELEMENT_TYPE));
7753   if (! ret)
7754     return NULL;
7755   if (ret->name != name)
7756     poolDiscard(&dtd->pool);
7757   else {
7758     poolFinish(&dtd->pool);
7759     if (! setElementTypePrefix(parser, ret))
7760       return NULL;
7761   }
7762   return ret;
7763 }
7764 
7765 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7766 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7767   size_t charsRequired = 0;
7768   XML_Char *result;
7769 
7770   /* First determine how long the string is */
7771   while (s[charsRequired] != 0) {
7772     charsRequired++;
7773   }
7774   /* Include the terminator */
7775   charsRequired++;
7776 
7777   /* Now allocate space for the copy */
7778   result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7779   if (result == NULL)
7780     return NULL;
7781   /* Copy the original into place */
7782   memcpy(result, s, charsRequired * sizeof(XML_Char));
7783   return result;
7784 }
7785 
7786 #if XML_GE == 1
7787 
7788 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7789 accountingGetCurrentAmplification(XML_Parser rootParser) {
7790   //                                          1.........1.........12 => 22
7791   const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
7792   const XmlBigCount countBytesOutput
7793       = rootParser->m_accounting.countBytesDirect
7794         + rootParser->m_accounting.countBytesIndirect;
7795   const float amplificationFactor
7796       = rootParser->m_accounting.countBytesDirect
7797             ? (countBytesOutput
7798                / (float)(rootParser->m_accounting.countBytesDirect))
7799             : ((lenOfShortestInclude
7800                 + rootParser->m_accounting.countBytesIndirect)
7801                / (float)lenOfShortestInclude);
7802   assert(! rootParser->m_parentParser);
7803   return amplificationFactor;
7804 }
7805 
7806 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7807 accountingReportStats(XML_Parser originParser, const char *epilog) {
7808   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7809   assert(! rootParser->m_parentParser);
7810 
7811   if (rootParser->m_accounting.debugLevel == 0u) {
7812     return;
7813   }
7814 
7815   const float amplificationFactor
7816       = accountingGetCurrentAmplification(rootParser);
7817   fprintf(stderr,
7818           "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7819               "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7820           (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7821           rootParser->m_accounting.countBytesIndirect,
7822           (double)amplificationFactor, epilog);
7823 }
7824 
7825 static void
accountingOnAbort(XML_Parser originParser)7826 accountingOnAbort(XML_Parser originParser) {
7827   accountingReportStats(originParser, " ABORTING\n");
7828 }
7829 
7830 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7831 accountingReportDiff(XML_Parser rootParser,
7832                      unsigned int levelsAwayFromRootParser, const char *before,
7833                      const char *after, ptrdiff_t bytesMore, int source_line,
7834                      enum XML_Account account) {
7835   assert(! rootParser->m_parentParser);
7836 
7837   fprintf(stderr,
7838           " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7839           bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7840           levelsAwayFromRootParser, source_line, 10, "");
7841 
7842   const char ellipis[] = "[..]";
7843   const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7844   const unsigned int contextLength = 10;
7845 
7846   /* Note: Performance is of no concern here */
7847   const char *walker = before;
7848   if ((rootParser->m_accounting.debugLevel >= 3u)
7849       || (after - before)
7850              <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7851     for (; walker < after; walker++) {
7852       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7853     }
7854   } else {
7855     for (; walker < before + contextLength; walker++) {
7856       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7857     }
7858     fprintf(stderr, ellipis);
7859     walker = after - contextLength;
7860     for (; walker < after; walker++) {
7861       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7862     }
7863   }
7864   fprintf(stderr, "\"\n");
7865 }
7866 
7867 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7868 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7869                         const char *after, int source_line,
7870                         enum XML_Account account) {
7871   /* Note: We need to check the token type *first* to be sure that
7872    *       we can even access variable <after>, safely.
7873    *       E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7874   switch (tok) {
7875   case XML_TOK_INVALID:
7876   case XML_TOK_PARTIAL:
7877   case XML_TOK_PARTIAL_CHAR:
7878   case XML_TOK_NONE:
7879     return XML_TRUE;
7880   }
7881 
7882   if (account == XML_ACCOUNT_NONE)
7883     return XML_TRUE; /* because these bytes have been accounted for, already */
7884 
7885   unsigned int levelsAwayFromRootParser;
7886   const XML_Parser rootParser
7887       = getRootParserOf(originParser, &levelsAwayFromRootParser);
7888   assert(! rootParser->m_parentParser);
7889 
7890   const int isDirect
7891       = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7892   const ptrdiff_t bytesMore = after - before;
7893 
7894   XmlBigCount *const additionTarget
7895       = isDirect ? &rootParser->m_accounting.countBytesDirect
7896                  : &rootParser->m_accounting.countBytesIndirect;
7897 
7898   /* Detect and avoid integer overflow */
7899   if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7900     return XML_FALSE;
7901   *additionTarget += bytesMore;
7902 
7903   const XmlBigCount countBytesOutput
7904       = rootParser->m_accounting.countBytesDirect
7905         + rootParser->m_accounting.countBytesIndirect;
7906   const float amplificationFactor
7907       = accountingGetCurrentAmplification(rootParser);
7908   const XML_Bool tolerated
7909       = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7910         || (amplificationFactor
7911             <= rootParser->m_accounting.maximumAmplificationFactor);
7912 
7913   if (rootParser->m_accounting.debugLevel >= 2u) {
7914     accountingReportStats(rootParser, "");
7915     accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7916                          bytesMore, source_line, account);
7917   }
7918 
7919   return tolerated;
7920 }
7921 
7922 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7923 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7924   if (! parser)
7925     return 0;
7926   return parser->m_accounting.countBytesDirect;
7927 }
7928 
7929 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7930 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7931   if (! parser)
7932     return 0;
7933   return parser->m_accounting.countBytesIndirect;
7934 }
7935 
7936 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7937 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7938                           const char *action, int sourceLine) {
7939   assert(! rootParser->m_parentParser);
7940   if (rootParser->m_entity_stats.debugLevel == 0u)
7941     return;
7942 
7943 #  if defined(XML_UNICODE)
7944   const char *const entityName = "[..]";
7945 #  else
7946   const char *const entityName = entity->name;
7947 #  endif
7948 
7949   fprintf(
7950       stderr,
7951       "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7952       (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7953       rootParser->m_entity_stats.currentDepth,
7954       rootParser->m_entity_stats.maximumDepthSeen,
7955       (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7956       entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7957       sourceLine);
7958 }
7959 
7960 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7961 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7962   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7963   assert(! rootParser->m_parentParser);
7964 
7965   rootParser->m_entity_stats.countEverOpened++;
7966   rootParser->m_entity_stats.currentDepth++;
7967   if (rootParser->m_entity_stats.currentDepth
7968       > rootParser->m_entity_stats.maximumDepthSeen) {
7969     rootParser->m_entity_stats.maximumDepthSeen++;
7970   }
7971 
7972   entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7973 }
7974 
7975 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7976 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7977   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7978   assert(! rootParser->m_parentParser);
7979 
7980   entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7981   rootParser->m_entity_stats.currentDepth--;
7982 }
7983 
7984 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7985 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7986   XML_Parser rootParser = parser;
7987   unsigned int stepsTakenUpwards = 0;
7988   while (rootParser->m_parentParser) {
7989     rootParser = rootParser->m_parentParser;
7990     stepsTakenUpwards++;
7991   }
7992   assert(! rootParser->m_parentParser);
7993   if (outLevelDiff != NULL) {
7994     *outLevelDiff = stepsTakenUpwards;
7995   }
7996   return rootParser;
7997 }
7998 
7999 const char *
unsignedCharToPrintable(unsigned char c)8000 unsignedCharToPrintable(unsigned char c) {
8001   switch (c) {
8002   case 0:
8003     return "\\0";
8004   case 1:
8005     return "\\x1";
8006   case 2:
8007     return "\\x2";
8008   case 3:
8009     return "\\x3";
8010   case 4:
8011     return "\\x4";
8012   case 5:
8013     return "\\x5";
8014   case 6:
8015     return "\\x6";
8016   case 7:
8017     return "\\x7";
8018   case 8:
8019     return "\\x8";
8020   case 9:
8021     return "\\t";
8022   case 10:
8023     return "\\n";
8024   case 11:
8025     return "\\xB";
8026   case 12:
8027     return "\\xC";
8028   case 13:
8029     return "\\r";
8030   case 14:
8031     return "\\xE";
8032   case 15:
8033     return "\\xF";
8034   case 16:
8035     return "\\x10";
8036   case 17:
8037     return "\\x11";
8038   case 18:
8039     return "\\x12";
8040   case 19:
8041     return "\\x13";
8042   case 20:
8043     return "\\x14";
8044   case 21:
8045     return "\\x15";
8046   case 22:
8047     return "\\x16";
8048   case 23:
8049     return "\\x17";
8050   case 24:
8051     return "\\x18";
8052   case 25:
8053     return "\\x19";
8054   case 26:
8055     return "\\x1A";
8056   case 27:
8057     return "\\x1B";
8058   case 28:
8059     return "\\x1C";
8060   case 29:
8061     return "\\x1D";
8062   case 30:
8063     return "\\x1E";
8064   case 31:
8065     return "\\x1F";
8066   case 32:
8067     return " ";
8068   case 33:
8069     return "!";
8070   case 34:
8071     return "\\\"";
8072   case 35:
8073     return "#";
8074   case 36:
8075     return "$";
8076   case 37:
8077     return "%";
8078   case 38:
8079     return "&";
8080   case 39:
8081     return "'";
8082   case 40:
8083     return "(";
8084   case 41:
8085     return ")";
8086   case 42:
8087     return "*";
8088   case 43:
8089     return "+";
8090   case 44:
8091     return ",";
8092   case 45:
8093     return "-";
8094   case 46:
8095     return ".";
8096   case 47:
8097     return "/";
8098   case 48:
8099     return "0";
8100   case 49:
8101     return "1";
8102   case 50:
8103     return "2";
8104   case 51:
8105     return "3";
8106   case 52:
8107     return "4";
8108   case 53:
8109     return "5";
8110   case 54:
8111     return "6";
8112   case 55:
8113     return "7";
8114   case 56:
8115     return "8";
8116   case 57:
8117     return "9";
8118   case 58:
8119     return ":";
8120   case 59:
8121     return ";";
8122   case 60:
8123     return "<";
8124   case 61:
8125     return "=";
8126   case 62:
8127     return ">";
8128   case 63:
8129     return "?";
8130   case 64:
8131     return "@";
8132   case 65:
8133     return "A";
8134   case 66:
8135     return "B";
8136   case 67:
8137     return "C";
8138   case 68:
8139     return "D";
8140   case 69:
8141     return "E";
8142   case 70:
8143     return "F";
8144   case 71:
8145     return "G";
8146   case 72:
8147     return "H";
8148   case 73:
8149     return "I";
8150   case 74:
8151     return "J";
8152   case 75:
8153     return "K";
8154   case 76:
8155     return "L";
8156   case 77:
8157     return "M";
8158   case 78:
8159     return "N";
8160   case 79:
8161     return "O";
8162   case 80:
8163     return "P";
8164   case 81:
8165     return "Q";
8166   case 82:
8167     return "R";
8168   case 83:
8169     return "S";
8170   case 84:
8171     return "T";
8172   case 85:
8173     return "U";
8174   case 86:
8175     return "V";
8176   case 87:
8177     return "W";
8178   case 88:
8179     return "X";
8180   case 89:
8181     return "Y";
8182   case 90:
8183     return "Z";
8184   case 91:
8185     return "[";
8186   case 92:
8187     return "\\\\";
8188   case 93:
8189     return "]";
8190   case 94:
8191     return "^";
8192   case 95:
8193     return "_";
8194   case 96:
8195     return "`";
8196   case 97:
8197     return "a";
8198   case 98:
8199     return "b";
8200   case 99:
8201     return "c";
8202   case 100:
8203     return "d";
8204   case 101:
8205     return "e";
8206   case 102:
8207     return "f";
8208   case 103:
8209     return "g";
8210   case 104:
8211     return "h";
8212   case 105:
8213     return "i";
8214   case 106:
8215     return "j";
8216   case 107:
8217     return "k";
8218   case 108:
8219     return "l";
8220   case 109:
8221     return "m";
8222   case 110:
8223     return "n";
8224   case 111:
8225     return "o";
8226   case 112:
8227     return "p";
8228   case 113:
8229     return "q";
8230   case 114:
8231     return "r";
8232   case 115:
8233     return "s";
8234   case 116:
8235     return "t";
8236   case 117:
8237     return "u";
8238   case 118:
8239     return "v";
8240   case 119:
8241     return "w";
8242   case 120:
8243     return "x";
8244   case 121:
8245     return "y";
8246   case 122:
8247     return "z";
8248   case 123:
8249     return "{";
8250   case 124:
8251     return "|";
8252   case 125:
8253     return "}";
8254   case 126:
8255     return "~";
8256   case 127:
8257     return "\\x7F";
8258   case 128:
8259     return "\\x80";
8260   case 129:
8261     return "\\x81";
8262   case 130:
8263     return "\\x82";
8264   case 131:
8265     return "\\x83";
8266   case 132:
8267     return "\\x84";
8268   case 133:
8269     return "\\x85";
8270   case 134:
8271     return "\\x86";
8272   case 135:
8273     return "\\x87";
8274   case 136:
8275     return "\\x88";
8276   case 137:
8277     return "\\x89";
8278   case 138:
8279     return "\\x8A";
8280   case 139:
8281     return "\\x8B";
8282   case 140:
8283     return "\\x8C";
8284   case 141:
8285     return "\\x8D";
8286   case 142:
8287     return "\\x8E";
8288   case 143:
8289     return "\\x8F";
8290   case 144:
8291     return "\\x90";
8292   case 145:
8293     return "\\x91";
8294   case 146:
8295     return "\\x92";
8296   case 147:
8297     return "\\x93";
8298   case 148:
8299     return "\\x94";
8300   case 149:
8301     return "\\x95";
8302   case 150:
8303     return "\\x96";
8304   case 151:
8305     return "\\x97";
8306   case 152:
8307     return "\\x98";
8308   case 153:
8309     return "\\x99";
8310   case 154:
8311     return "\\x9A";
8312   case 155:
8313     return "\\x9B";
8314   case 156:
8315     return "\\x9C";
8316   case 157:
8317     return "\\x9D";
8318   case 158:
8319     return "\\x9E";
8320   case 159:
8321     return "\\x9F";
8322   case 160:
8323     return "\\xA0";
8324   case 161:
8325     return "\\xA1";
8326   case 162:
8327     return "\\xA2";
8328   case 163:
8329     return "\\xA3";
8330   case 164:
8331     return "\\xA4";
8332   case 165:
8333     return "\\xA5";
8334   case 166:
8335     return "\\xA6";
8336   case 167:
8337     return "\\xA7";
8338   case 168:
8339     return "\\xA8";
8340   case 169:
8341     return "\\xA9";
8342   case 170:
8343     return "\\xAA";
8344   case 171:
8345     return "\\xAB";
8346   case 172:
8347     return "\\xAC";
8348   case 173:
8349     return "\\xAD";
8350   case 174:
8351     return "\\xAE";
8352   case 175:
8353     return "\\xAF";
8354   case 176:
8355     return "\\xB0";
8356   case 177:
8357     return "\\xB1";
8358   case 178:
8359     return "\\xB2";
8360   case 179:
8361     return "\\xB3";
8362   case 180:
8363     return "\\xB4";
8364   case 181:
8365     return "\\xB5";
8366   case 182:
8367     return "\\xB6";
8368   case 183:
8369     return "\\xB7";
8370   case 184:
8371     return "\\xB8";
8372   case 185:
8373     return "\\xB9";
8374   case 186:
8375     return "\\xBA";
8376   case 187:
8377     return "\\xBB";
8378   case 188:
8379     return "\\xBC";
8380   case 189:
8381     return "\\xBD";
8382   case 190:
8383     return "\\xBE";
8384   case 191:
8385     return "\\xBF";
8386   case 192:
8387     return "\\xC0";
8388   case 193:
8389     return "\\xC1";
8390   case 194:
8391     return "\\xC2";
8392   case 195:
8393     return "\\xC3";
8394   case 196:
8395     return "\\xC4";
8396   case 197:
8397     return "\\xC5";
8398   case 198:
8399     return "\\xC6";
8400   case 199:
8401     return "\\xC7";
8402   case 200:
8403     return "\\xC8";
8404   case 201:
8405     return "\\xC9";
8406   case 202:
8407     return "\\xCA";
8408   case 203:
8409     return "\\xCB";
8410   case 204:
8411     return "\\xCC";
8412   case 205:
8413     return "\\xCD";
8414   case 206:
8415     return "\\xCE";
8416   case 207:
8417     return "\\xCF";
8418   case 208:
8419     return "\\xD0";
8420   case 209:
8421     return "\\xD1";
8422   case 210:
8423     return "\\xD2";
8424   case 211:
8425     return "\\xD3";
8426   case 212:
8427     return "\\xD4";
8428   case 213:
8429     return "\\xD5";
8430   case 214:
8431     return "\\xD6";
8432   case 215:
8433     return "\\xD7";
8434   case 216:
8435     return "\\xD8";
8436   case 217:
8437     return "\\xD9";
8438   case 218:
8439     return "\\xDA";
8440   case 219:
8441     return "\\xDB";
8442   case 220:
8443     return "\\xDC";
8444   case 221:
8445     return "\\xDD";
8446   case 222:
8447     return "\\xDE";
8448   case 223:
8449     return "\\xDF";
8450   case 224:
8451     return "\\xE0";
8452   case 225:
8453     return "\\xE1";
8454   case 226:
8455     return "\\xE2";
8456   case 227:
8457     return "\\xE3";
8458   case 228:
8459     return "\\xE4";
8460   case 229:
8461     return "\\xE5";
8462   case 230:
8463     return "\\xE6";
8464   case 231:
8465     return "\\xE7";
8466   case 232:
8467     return "\\xE8";
8468   case 233:
8469     return "\\xE9";
8470   case 234:
8471     return "\\xEA";
8472   case 235:
8473     return "\\xEB";
8474   case 236:
8475     return "\\xEC";
8476   case 237:
8477     return "\\xED";
8478   case 238:
8479     return "\\xEE";
8480   case 239:
8481     return "\\xEF";
8482   case 240:
8483     return "\\xF0";
8484   case 241:
8485     return "\\xF1";
8486   case 242:
8487     return "\\xF2";
8488   case 243:
8489     return "\\xF3";
8490   case 244:
8491     return "\\xF4";
8492   case 245:
8493     return "\\xF5";
8494   case 246:
8495     return "\\xF6";
8496   case 247:
8497     return "\\xF7";
8498   case 248:
8499     return "\\xF8";
8500   case 249:
8501     return "\\xF9";
8502   case 250:
8503     return "\\xFA";
8504   case 251:
8505     return "\\xFB";
8506   case 252:
8507     return "\\xFC";
8508   case 253:
8509     return "\\xFD";
8510   case 254:
8511     return "\\xFE";
8512   case 255:
8513     return "\\xFF";
8514   default:
8515     assert(0); /* never gets here */
8516     return "dead code";
8517   }
8518   assert(0); /* never gets here */
8519 }
8520 
8521 #endif /* XML_GE == 1 */
8522 
8523 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8524 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8525   const char *const valueOrNull = getenv(variableName);
8526   if (valueOrNull == NULL) {
8527     return defaultDebugLevel;
8528   }
8529   const char *const value = valueOrNull;
8530 
8531   errno = 0;
8532   char *afterValue = NULL;
8533   unsigned long debugLevel = strtoul(value, &afterValue, 10);
8534   if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8535     errno = 0;
8536     return defaultDebugLevel;
8537   }
8538 
8539   return debugLevel;
8540 }
8541