xref: /freebsd/contrib/expat/lib/xmlparse.c (revision 4543ef51)
1 /* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+)
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15    Copyright (c) 2016      Eric Rahm <erahm@mozilla.com>
16    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17    Copyright (c) 2016      Gaurav <g.gupta@samsung.com>
18    Copyright (c) 2016      Thomas Beutlich <tc@tbeu.de>
19    Copyright (c) 2016      Gustavo Grieco <gustavo.grieco@imag.fr>
20    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
21    Copyright (c) 2016      Ed Schouten <ed@nuxi.nl>
22    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23    Copyright (c) 2017      Václav Slavík <vaclav@slavik.io>
24    Copyright (c) 2017      Viktor Szakats <commit@vsz.me>
25    Copyright (c) 2017      Chanho Park <chanho61.park@samsung.com>
26    Copyright (c) 2017      Rolf Eike Beer <eike@sf-mail.de>
27    Copyright (c) 2017      Hans Wennborg <hans@chromium.org>
28    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
29    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
30    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
31    Copyright (c) 2018      Mariusz Zaborski <oshogbo@vexillium.org>
32    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
33    Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34    Copyright (c) 2019      Vadim Zeitlin <vadim@zeitlins.org>
35    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
36    Copyright (c) 2022      Samanta Navarro <ferivoz@riseup.net>
37    Copyright (c) 2022      Jeffrey Walton <noloader@gmail.com>
38    Copyright (c) 2022      Jann Horn <jannh@google.com>
39    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
40    Copyright (c) 2023      Owain Davies <owaind@bath.edu>
41    Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
42    Licensed under the MIT license:
43 
44    Permission is  hereby granted,  free of charge,  to any  person obtaining
45    a  copy  of  this  software   and  associated  documentation  files  (the
46    "Software"),  to  deal in  the  Software  without restriction,  including
47    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
48    distribute, sublicense, and/or sell copies of the Software, and to permit
49    persons  to whom  the Software  is  furnished to  do so,  subject to  the
50    following conditions:
51 
52    The above copyright  notice and this permission notice  shall be included
53    in all copies or substantial portions of the Software.
54 
55    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
56    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
57    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
58    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
59    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
60    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
61    USE OR OTHER DEALINGS IN THE SOFTWARE.
62 */
63 
64 #define XML_BUILDING_EXPAT 1
65 
66 #include "expat_config.h"
67 
68 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
69 #  error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
70 #endif
71 
72 #if defined(XML_DTD) && XML_GE == 0
73 #  error Either undefine XML_DTD or define XML_GE to 1.
74 #endif
75 
76 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2)           \
77     || (XML_CONTEXT_BYTES + 0 < 0)
78 #  error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
79 #endif
80 
81 #if defined(HAVE_SYSCALL_GETRANDOM)
82 #  if ! defined(_GNU_SOURCE)
83 #    define _GNU_SOURCE 1 /* syscall prototype */
84 #  endif
85 #endif
86 
87 #ifdef _WIN32
88 /* force stdlib to define rand_s() */
89 #  if ! defined(_CRT_RAND_S)
90 #    define _CRT_RAND_S
91 #  endif
92 #endif
93 
94 #include <stdbool.h>
95 #include <stddef.h>
96 #include <string.h> /* memset(), memcpy() */
97 #include <assert.h>
98 #include <limits.h> /* UINT_MAX */
99 #include <stdio.h>  /* fprintf */
100 #include <stdlib.h> /* getenv, rand_s */
101 #include <stdint.h> /* uintptr_t */
102 #include <math.h>   /* isnan */
103 
104 #ifdef _WIN32
105 #  define getpid GetCurrentProcessId
106 #else
107 #  include <sys/time.h>  /* gettimeofday() */
108 #  include <sys/types.h> /* getpid() */
109 #  include <unistd.h>    /* getpid() */
110 #  include <fcntl.h>     /* O_RDONLY */
111 #  include <errno.h>
112 #endif
113 
114 #ifdef _WIN32
115 #  include "winconfig.h"
116 #endif
117 
118 #include "ascii.h"
119 #include "expat.h"
120 #include "siphash.h"
121 
122 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
123 #  if defined(HAVE_GETRANDOM)
124 #    include <sys/random.h> /* getrandom */
125 #  else
126 #    include <unistd.h>      /* syscall */
127 #    include <sys/syscall.h> /* SYS_getrandom */
128 #  endif
129 #  if ! defined(GRND_NONBLOCK)
130 #    define GRND_NONBLOCK 0x0001
131 #  endif /* defined(GRND_NONBLOCK) */
132 #endif   /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
133 
134 #if defined(HAVE_LIBBSD)                                                       \
135     && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
136 #  include <bsd/stdlib.h>
137 #endif
138 
139 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
140 #  define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
141 #endif
142 
143 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM)             \
144     && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)            \
145     && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32)                         \
146     && ! defined(XML_POOR_ENTROPY)
147 #  error You do not have support for any sources of high quality entropy \
148     enabled.  For end user security, that is probably not what you want. \
149     \
150     Your options include: \
151       * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
152       * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
153       * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
154       * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
155       * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
156       * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
157       * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
158       * Windows >=Vista (rand_s): _WIN32. \
159     \
160     If insist on not using any of these, bypass this error by defining \
161     XML_POOR_ENTROPY; you have been warned. \
162     \
163     If you have reasons to patch this detection code away or need changes \
164     to the build system, please open a bug.  Thank you!
165 #endif
166 
167 #ifdef XML_UNICODE
168 #  define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
169 #  define XmlConvert XmlUtf16Convert
170 #  define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
171 #  define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
172 #  define XmlEncode XmlUtf16Encode
173 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
174 typedef unsigned short ICHAR;
175 #else
176 #  define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
177 #  define XmlConvert XmlUtf8Convert
178 #  define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
179 #  define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
180 #  define XmlEncode XmlUtf8Encode
181 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
182 typedef char ICHAR;
183 #endif
184 
185 #ifndef XML_NS
186 
187 #  define XmlInitEncodingNS XmlInitEncoding
188 #  define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
189 #  undef XmlGetInternalEncodingNS
190 #  define XmlGetInternalEncodingNS XmlGetInternalEncoding
191 #  define XmlParseXmlDeclNS XmlParseXmlDecl
192 
193 #endif
194 
195 #ifdef XML_UNICODE
196 
197 #  ifdef XML_UNICODE_WCHAR_T
198 #    define XML_T(x) (const wchar_t) x
199 #    define XML_L(x) L##x
200 #  else
201 #    define XML_T(x) (const unsigned short)x
202 #    define XML_L(x) x
203 #  endif
204 
205 #else
206 
207 #  define XML_T(x) x
208 #  define XML_L(x) x
209 
210 #endif
211 
212 /* Round up n to be a multiple of sz, where sz is a power of 2. */
213 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
214 
215 /* Do safe (NULL-aware) pointer arithmetic */
216 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
217 
218 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
219 
220 #include "internal.h"
221 #include "xmltok.h"
222 #include "xmlrole.h"
223 
224 typedef const XML_Char *KEY;
225 
226 typedef struct {
227   KEY name;
228 } NAMED;
229 
230 typedef struct {
231   NAMED **v;
232   unsigned char power;
233   size_t size;
234   size_t used;
235   const XML_Memory_Handling_Suite *mem;
236 } HASH_TABLE;
237 
238 static size_t keylen(KEY s);
239 
240 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
241 
242 /* For probing (after a collision) we need a step size relative prime
243    to the hash table size, which is a power of 2. We use double-hashing,
244    since we can calculate a second hash value cheaply by taking those bits
245    of the first hash value that were discarded (masked out) when the table
246    index was calculated: index = hash & mask, where mask = table->size - 1.
247    We limit the maximum step size to table->size / 4 (mask >> 2) and make
248    it odd, since odd numbers are always relative prime to a power of 2.
249 */
250 #define SECOND_HASH(hash, mask, power)                                         \
251   ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
252 #define PROBE_STEP(hash, mask, power)                                          \
253   ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
254 
255 typedef struct {
256   NAMED **p;
257   NAMED **end;
258 } HASH_TABLE_ITER;
259 
260 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
261 #define INIT_DATA_BUF_SIZE 1024
262 #define INIT_ATTS_SIZE 16
263 #define INIT_ATTS_VERSION 0xFFFFFFFF
264 #define INIT_BLOCK_SIZE 1024
265 #define INIT_BUFFER_SIZE 1024
266 
267 #define EXPAND_SPARE 24
268 
269 typedef struct binding {
270   struct prefix *prefix;
271   struct binding *nextTagBinding;
272   struct binding *prevPrefixBinding;
273   const struct attribute_id *attId;
274   XML_Char *uri;
275   int uriLen;
276   int uriAlloc;
277 } BINDING;
278 
279 typedef struct prefix {
280   const XML_Char *name;
281   BINDING *binding;
282 } PREFIX;
283 
284 typedef struct {
285   const XML_Char *str;
286   const XML_Char *localPart;
287   const XML_Char *prefix;
288   int strLen;
289   int uriLen;
290   int prefixLen;
291 } TAG_NAME;
292 
293 /* TAG represents an open element.
294    The name of the element is stored in both the document and API
295    encodings.  The memory buffer 'buf' is a separately-allocated
296    memory area which stores the name.  During the XML_Parse()/
297    XMLParseBuffer() when the element is open, the memory for the 'raw'
298    version of the name (in the document encoding) is shared with the
299    document buffer.  If the element is open across calls to
300    XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
301    contain the 'raw' name as well.
302 
303    A parser reuses these structures, maintaining a list of allocated
304    TAG objects in a free list.
305 */
306 typedef struct tag {
307   struct tag *parent;  /* parent of this element */
308   const char *rawName; /* tagName in the original encoding */
309   int rawNameLength;
310   TAG_NAME name; /* tagName in the API encoding */
311   char *buf;     /* buffer for name components */
312   char *bufEnd;  /* end of the buffer */
313   BINDING *bindings;
314 } TAG;
315 
316 typedef struct {
317   const XML_Char *name;
318   const XML_Char *textPtr;
319   int textLen;   /* length in XML_Chars */
320   int processed; /* # of processed bytes - when suspended */
321   const XML_Char *systemId;
322   const XML_Char *base;
323   const XML_Char *publicId;
324   const XML_Char *notation;
325   XML_Bool open;
326   XML_Bool is_param;
327   XML_Bool is_internal; /* true if declared in internal subset outside PE */
328 } ENTITY;
329 
330 typedef struct {
331   enum XML_Content_Type type;
332   enum XML_Content_Quant quant;
333   const XML_Char *name;
334   int firstchild;
335   int lastchild;
336   int childcnt;
337   int nextsib;
338 } CONTENT_SCAFFOLD;
339 
340 #define INIT_SCAFFOLD_ELEMENTS 32
341 
342 typedef struct block {
343   struct block *next;
344   int size;
345   XML_Char s[1];
346 } BLOCK;
347 
348 typedef struct {
349   BLOCK *blocks;
350   BLOCK *freeBlocks;
351   const XML_Char *end;
352   XML_Char *ptr;
353   XML_Char *start;
354   const XML_Memory_Handling_Suite *mem;
355 } STRING_POOL;
356 
357 /* The XML_Char before the name is used to determine whether
358    an attribute has been specified. */
359 typedef struct attribute_id {
360   XML_Char *name;
361   PREFIX *prefix;
362   XML_Bool maybeTokenized;
363   XML_Bool xmlns;
364 } ATTRIBUTE_ID;
365 
366 typedef struct {
367   const ATTRIBUTE_ID *id;
368   XML_Bool isCdata;
369   const XML_Char *value;
370 } DEFAULT_ATTRIBUTE;
371 
372 typedef struct {
373   unsigned long version;
374   unsigned long hash;
375   const XML_Char *uriName;
376 } NS_ATT;
377 
378 typedef struct {
379   const XML_Char *name;
380   PREFIX *prefix;
381   const ATTRIBUTE_ID *idAtt;
382   int nDefaultAtts;
383   int allocDefaultAtts;
384   DEFAULT_ATTRIBUTE *defaultAtts;
385 } ELEMENT_TYPE;
386 
387 typedef struct {
388   HASH_TABLE generalEntities;
389   HASH_TABLE elementTypes;
390   HASH_TABLE attributeIds;
391   HASH_TABLE prefixes;
392   STRING_POOL pool;
393   STRING_POOL entityValuePool;
394   /* false once a parameter entity reference has been skipped */
395   XML_Bool keepProcessing;
396   /* true once an internal or external PE reference has been encountered;
397      this includes the reference to an external subset */
398   XML_Bool hasParamEntityRefs;
399   XML_Bool standalone;
400 #ifdef XML_DTD
401   /* indicates if external PE has been read */
402   XML_Bool paramEntityRead;
403   HASH_TABLE paramEntities;
404 #endif /* XML_DTD */
405   PREFIX defaultPrefix;
406   /* === scaffolding for building content model === */
407   XML_Bool in_eldecl;
408   CONTENT_SCAFFOLD *scaffold;
409   unsigned contentStringLen;
410   unsigned scaffSize;
411   unsigned scaffCount;
412   int scaffLevel;
413   int *scaffIndex;
414 } DTD;
415 
416 typedef struct open_internal_entity {
417   const char *internalEventPtr;
418   const char *internalEventEndPtr;
419   struct open_internal_entity *next;
420   ENTITY *entity;
421   int startTagLevel;
422   XML_Bool betweenDecl; /* WFC: PE Between Declarations */
423 } OPEN_INTERNAL_ENTITY;
424 
425 enum XML_Account {
426   XML_ACCOUNT_DIRECT,           /* bytes directly passed to the Expat parser */
427   XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
428                                    expansion */
429   XML_ACCOUNT_NONE              /* i.e. do not account, was accounted already */
430 };
431 
432 #if XML_GE == 1
433 typedef unsigned long long XmlBigCount;
434 typedef struct accounting {
435   XmlBigCount countBytesDirect;
436   XmlBigCount countBytesIndirect;
437   unsigned long debugLevel;
438   float maximumAmplificationFactor; // >=1.0
439   unsigned long long activationThresholdBytes;
440 } ACCOUNTING;
441 
442 typedef struct entity_stats {
443   unsigned int countEverOpened;
444   unsigned int currentDepth;
445   unsigned int maximumDepthSeen;
446   unsigned long debugLevel;
447 } ENTITY_STATS;
448 #endif /* XML_GE == 1 */
449 
450 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
451                                          const char *end, const char **endPtr);
452 
453 static Processor prologProcessor;
454 static Processor prologInitProcessor;
455 static Processor contentProcessor;
456 static Processor cdataSectionProcessor;
457 #ifdef XML_DTD
458 static Processor ignoreSectionProcessor;
459 static Processor externalParEntProcessor;
460 static Processor externalParEntInitProcessor;
461 static Processor entityValueProcessor;
462 static Processor entityValueInitProcessor;
463 #endif /* XML_DTD */
464 static Processor epilogProcessor;
465 static Processor errorProcessor;
466 static Processor externalEntityInitProcessor;
467 static Processor externalEntityInitProcessor2;
468 static Processor externalEntityInitProcessor3;
469 static Processor externalEntityContentProcessor;
470 static Processor internalEntityProcessor;
471 
472 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
473                                             const XML_Char *encodingName);
474 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
475                                      const char *s, const char *next);
476 static enum XML_Error initializeEncoding(XML_Parser parser);
477 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
478                                const char *s, const char *end, int tok,
479                                const char *next, const char **nextPtr,
480                                XML_Bool haveMore, XML_Bool allowClosingDoctype,
481                                enum XML_Account account);
482 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
483                                             XML_Bool betweenDecl);
484 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
485                                 const ENCODING *enc, const char *start,
486                                 const char *end, const char **endPtr,
487                                 XML_Bool haveMore, enum XML_Account account);
488 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
489                                      const char **startPtr, const char *end,
490                                      const char **nextPtr, XML_Bool haveMore,
491                                      enum XML_Account account);
492 #ifdef XML_DTD
493 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
494                                       const char **startPtr, const char *end,
495                                       const char **nextPtr, XML_Bool haveMore);
496 #endif /* XML_DTD */
497 
498 static void freeBindings(XML_Parser parser, BINDING *bindings);
499 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
500                                 const char *attStr, TAG_NAME *tagNamePtr,
501                                 BINDING **bindingsPtr,
502                                 enum XML_Account account);
503 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
504                                  const ATTRIBUTE_ID *attId, const XML_Char *uri,
505                                  BINDING **bindingsPtr);
506 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
507                            XML_Bool isCdata, XML_Bool isId,
508                            const XML_Char *value, XML_Parser parser);
509 static enum XML_Error storeAttributeValue(XML_Parser parser,
510                                           const ENCODING *enc, XML_Bool isCdata,
511                                           const char *ptr, const char *end,
512                                           STRING_POOL *pool,
513                                           enum XML_Account account);
514 static enum XML_Error appendAttributeValue(XML_Parser parser,
515                                            const ENCODING *enc,
516                                            XML_Bool isCdata, const char *ptr,
517                                            const char *end, STRING_POOL *pool,
518                                            enum XML_Account account);
519 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
520                                     const char *start, const char *end);
521 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
522 #if XML_GE == 1
523 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
524                                        const char *start, const char *end,
525                                        enum XML_Account account);
526 #else
527 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
528 #endif
529 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
530                                        const char *start, const char *end);
531 static int reportComment(XML_Parser parser, const ENCODING *enc,
532                          const char *start, const char *end);
533 static void reportDefault(XML_Parser parser, const ENCODING *enc,
534                           const char *start, const char *end);
535 
536 static const XML_Char *getContext(XML_Parser parser);
537 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
538 
539 static void FASTCALL normalizePublicId(XML_Char *s);
540 
541 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
542 /* do not call if m_parentParser != NULL */
543 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
544 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
545                        const XML_Memory_Handling_Suite *ms);
546 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
547                    const XML_Memory_Handling_Suite *ms);
548 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
549                            STRING_POOL *newPool, const HASH_TABLE *oldTable);
550 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
551                      size_t createSize);
552 static void FASTCALL hashTableInit(HASH_TABLE *table,
553                                    const XML_Memory_Handling_Suite *ms);
554 static void FASTCALL hashTableClear(HASH_TABLE *table);
555 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
556 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
557                                        const HASH_TABLE *table);
558 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
559 
560 static void FASTCALL poolInit(STRING_POOL *pool,
561                               const XML_Memory_Handling_Suite *ms);
562 static void FASTCALL poolClear(STRING_POOL *pool);
563 static void FASTCALL poolDestroy(STRING_POOL *pool);
564 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
565                             const char *ptr, const char *end);
566 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
567                                  const char *ptr, const char *end);
568 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
569 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
570                                                const XML_Char *s);
571 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
572                                        int n);
573 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
574                                                  const XML_Char *s);
575 
576 static int FASTCALL nextScaffoldPart(XML_Parser parser);
577 static XML_Content *build_model(XML_Parser parser);
578 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
579                                     const char *ptr, const char *end);
580 
581 static XML_Char *copyString(const XML_Char *s,
582                             const XML_Memory_Handling_Suite *memsuite);
583 
584 static unsigned long generate_hash_secret_salt(XML_Parser parser);
585 static XML_Bool startParsing(XML_Parser parser);
586 
587 static XML_Parser parserCreate(const XML_Char *encodingName,
588                                const XML_Memory_Handling_Suite *memsuite,
589                                const XML_Char *nameSep, DTD *dtd);
590 
591 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
592 
593 #if XML_GE == 1
594 static float accountingGetCurrentAmplification(XML_Parser rootParser);
595 static void accountingReportStats(XML_Parser originParser, const char *epilog);
596 static void accountingOnAbort(XML_Parser originParser);
597 static void accountingReportDiff(XML_Parser rootParser,
598                                  unsigned int levelsAwayFromRootParser,
599                                  const char *before, const char *after,
600                                  ptrdiff_t bytesMore, int source_line,
601                                  enum XML_Account account);
602 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
603                                         const char *before, const char *after,
604                                         int source_line,
605                                         enum XML_Account account);
606 
607 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
608                                       const char *action, int sourceLine);
609 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
610                                  int sourceLine);
611 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
612                                   int sourceLine);
613 
614 static XML_Parser getRootParserOf(XML_Parser parser,
615                                   unsigned int *outLevelDiff);
616 #endif /* XML_GE == 1 */
617 
618 static unsigned long getDebugLevel(const char *variableName,
619                                    unsigned long defaultDebugLevel);
620 
621 #define poolStart(pool) ((pool)->start)
622 #define poolLength(pool) ((pool)->ptr - (pool)->start)
623 #define poolChop(pool) ((void)--(pool->ptr))
624 #define poolLastChar(pool) (((pool)->ptr)[-1])
625 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
626 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
627 #define poolAppendChar(pool, c)                                                \
628   (((pool)->ptr == (pool)->end && ! poolGrow(pool))                            \
629        ? 0                                                                     \
630        : ((*((pool)->ptr)++ = c), 1))
631 
632 XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
633 unsigned int g_parseAttempts = 0;                    // used for testing only
634 
635 struct XML_ParserStruct {
636   /* The first member must be m_userData so that the XML_GetUserData
637      macro works. */
638   void *m_userData;
639   void *m_handlerArg;
640 
641   // How the four parse buffer pointers below relate in time and space:
642   //
643   //   m_buffer <= m_bufferPtr <= m_bufferEnd  <= m_bufferLim
644   //   |           |              |               |
645   //   <--parsed-->|              |               |
646   //               <---parsing--->|               |
647   //                              <--unoccupied-->|
648   //   <---------total-malloced/realloced-------->|
649 
650   char *m_buffer; // malloc/realloc base pointer of parse buffer
651   const XML_Memory_Handling_Suite m_mem;
652   const char *m_bufferPtr; // first character to be parsed
653   char *m_bufferEnd;       // past last character to be parsed
654   const char *m_bufferLim; // allocated end of m_buffer
655 
656   XML_Index m_parseEndByteIndex;
657   const char *m_parseEndPtr;
658   size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
659   XML_Bool m_reparseDeferralEnabled;
660   int m_lastBufferRequestSize;
661   XML_Char *m_dataBuf;
662   XML_Char *m_dataBufEnd;
663   XML_StartElementHandler m_startElementHandler;
664   XML_EndElementHandler m_endElementHandler;
665   XML_CharacterDataHandler m_characterDataHandler;
666   XML_ProcessingInstructionHandler m_processingInstructionHandler;
667   XML_CommentHandler m_commentHandler;
668   XML_StartCdataSectionHandler m_startCdataSectionHandler;
669   XML_EndCdataSectionHandler m_endCdataSectionHandler;
670   XML_DefaultHandler m_defaultHandler;
671   XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
672   XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
673   XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
674   XML_NotationDeclHandler m_notationDeclHandler;
675   XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
676   XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
677   XML_NotStandaloneHandler m_notStandaloneHandler;
678   XML_ExternalEntityRefHandler m_externalEntityRefHandler;
679   XML_Parser m_externalEntityRefHandlerArg;
680   XML_SkippedEntityHandler m_skippedEntityHandler;
681   XML_UnknownEncodingHandler m_unknownEncodingHandler;
682   XML_ElementDeclHandler m_elementDeclHandler;
683   XML_AttlistDeclHandler m_attlistDeclHandler;
684   XML_EntityDeclHandler m_entityDeclHandler;
685   XML_XmlDeclHandler m_xmlDeclHandler;
686   const ENCODING *m_encoding;
687   INIT_ENCODING m_initEncoding;
688   const ENCODING *m_internalEncoding;
689   const XML_Char *m_protocolEncodingName;
690   XML_Bool m_ns;
691   XML_Bool m_ns_triplets;
692   void *m_unknownEncodingMem;
693   void *m_unknownEncodingData;
694   void *m_unknownEncodingHandlerData;
695   void(XMLCALL *m_unknownEncodingRelease)(void *);
696   PROLOG_STATE m_prologState;
697   Processor *m_processor;
698   enum XML_Error m_errorCode;
699   const char *m_eventPtr;
700   const char *m_eventEndPtr;
701   const char *m_positionPtr;
702   OPEN_INTERNAL_ENTITY *m_openInternalEntities;
703   OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
704   XML_Bool m_defaultExpandInternalEntities;
705   int m_tagLevel;
706   ENTITY *m_declEntity;
707   const XML_Char *m_doctypeName;
708   const XML_Char *m_doctypeSysid;
709   const XML_Char *m_doctypePubid;
710   const XML_Char *m_declAttributeType;
711   const XML_Char *m_declNotationName;
712   const XML_Char *m_declNotationPublicId;
713   ELEMENT_TYPE *m_declElementType;
714   ATTRIBUTE_ID *m_declAttributeId;
715   XML_Bool m_declAttributeIsCdata;
716   XML_Bool m_declAttributeIsId;
717   DTD *m_dtd;
718   const XML_Char *m_curBase;
719   TAG *m_tagStack;
720   TAG *m_freeTagList;
721   BINDING *m_inheritedBindings;
722   BINDING *m_freeBindingList;
723   int m_attsSize;
724   int m_nSpecifiedAtts;
725   int m_idAttIndex;
726   ATTRIBUTE *m_atts;
727   NS_ATT *m_nsAtts;
728   unsigned long m_nsAttsVersion;
729   unsigned char m_nsAttsPower;
730 #ifdef XML_ATTR_INFO
731   XML_AttrInfo *m_attInfo;
732 #endif
733   POSITION m_position;
734   STRING_POOL m_tempPool;
735   STRING_POOL m_temp2Pool;
736   char *m_groupConnector;
737   unsigned int m_groupSize;
738   XML_Char m_namespaceSeparator;
739   XML_Parser m_parentParser;
740   XML_ParsingStatus m_parsingStatus;
741 #ifdef XML_DTD
742   XML_Bool m_isParamEntity;
743   XML_Bool m_useForeignDTD;
744   enum XML_ParamEntityParsing m_paramEntityParsing;
745 #endif
746   unsigned long m_hash_secret_salt;
747 #if XML_GE == 1
748   ACCOUNTING m_accounting;
749   ENTITY_STATS m_entity_stats;
750 #endif
751 };
752 
753 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
754 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
755 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
756 
757 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)758 XML_ParserCreate(const XML_Char *encodingName) {
759   return XML_ParserCreate_MM(encodingName, NULL, NULL);
760 }
761 
762 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)763 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
764   XML_Char tmp[2] = {nsSep, 0};
765   return XML_ParserCreate_MM(encodingName, NULL, tmp);
766 }
767 
768 // "xml=http://www.w3.org/XML/1998/namespace"
769 static const XML_Char implicitContext[]
770     = {ASCII_x,     ASCII_m,     ASCII_l,      ASCII_EQUALS, ASCII_h,
771        ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,  ASCII_SLASH,
772        ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,      ASCII_PERIOD,
773        ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,      ASCII_r,
774        ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,      ASCII_L,
775        ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,      ASCII_8,
776        ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,      ASCII_e,
777        ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,      ASCII_e,
778        '\0'};
779 
780 /* To avoid warnings about unused functions: */
781 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
782 
783 #  if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
784 
785 /* Obtain entropy on Linux 3.17+ */
786 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)787 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
788   int success = 0; /* full count bytes written? */
789   size_t bytesWrittenTotal = 0;
790   const unsigned int getrandomFlags = GRND_NONBLOCK;
791 
792   do {
793     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
794     const size_t bytesToWrite = count - bytesWrittenTotal;
795 
796     const int bytesWrittenMore =
797 #    if defined(HAVE_GETRANDOM)
798         getrandom(currentTarget, bytesToWrite, getrandomFlags);
799 #    else
800         syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
801 #    endif
802 
803     if (bytesWrittenMore > 0) {
804       bytesWrittenTotal += bytesWrittenMore;
805       if (bytesWrittenTotal >= count)
806         success = 1;
807     }
808   } while (! success && (errno == EINTR));
809 
810   return success;
811 }
812 
813 #  endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
814 
815 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
816 
817 /* Extract entropy from /dev/urandom */
818 static int
writeRandomBytes_dev_urandom(void * target,size_t count)819 writeRandomBytes_dev_urandom(void *target, size_t count) {
820   int success = 0; /* full count bytes written? */
821   size_t bytesWrittenTotal = 0;
822 
823   const int fd = open("/dev/urandom", O_RDONLY);
824   if (fd < 0) {
825     return 0;
826   }
827 
828   do {
829     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
830     const size_t bytesToWrite = count - bytesWrittenTotal;
831 
832     const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
833 
834     if (bytesWrittenMore > 0) {
835       bytesWrittenTotal += bytesWrittenMore;
836       if (bytesWrittenTotal >= count)
837         success = 1;
838     }
839   } while (! success && (errno == EINTR));
840 
841   close(fd);
842   return success;
843 }
844 
845 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
846 
847 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
848 
849 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
850 
851 static void
writeRandomBytes_arc4random(void * target,size_t count)852 writeRandomBytes_arc4random(void *target, size_t count) {
853   size_t bytesWrittenTotal = 0;
854 
855   while (bytesWrittenTotal < count) {
856     const uint32_t random32 = arc4random();
857     size_t i = 0;
858 
859     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
860          i++, bytesWrittenTotal++) {
861       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
862       ((uint8_t *)target)[bytesWrittenTotal] = random8;
863     }
864   }
865 }
866 
867 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
868 
869 #ifdef _WIN32
870 
871 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
872    as it didn't declare it in its header prior to version 5.3.0 of its
873    runtime package (mingwrt, containing stdlib.h).  The upstream fix
874    was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
875 #  if defined(__MINGW32__) && defined(__MINGW32_VERSION)                       \
876       && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
877 __declspec(dllimport) int rand_s(unsigned int *);
878 #  endif
879 
880 /* Obtain entropy on Windows using the rand_s() function which
881  * generates cryptographically secure random numbers.  Internally it
882  * uses RtlGenRandom API which is present in Windows XP and later.
883  */
884 static int
writeRandomBytes_rand_s(void * target,size_t count)885 writeRandomBytes_rand_s(void *target, size_t count) {
886   size_t bytesWrittenTotal = 0;
887 
888   while (bytesWrittenTotal < count) {
889     unsigned int random32 = 0;
890     size_t i = 0;
891 
892     if (rand_s(&random32))
893       return 0; /* failure */
894 
895     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
896          i++, bytesWrittenTotal++) {
897       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
898       ((uint8_t *)target)[bytesWrittenTotal] = random8;
899     }
900   }
901   return 1; /* success */
902 }
903 
904 #endif /* _WIN32 */
905 
906 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
907 
908 static unsigned long
gather_time_entropy(void)909 gather_time_entropy(void) {
910 #  ifdef _WIN32
911   FILETIME ft;
912   GetSystemTimeAsFileTime(&ft); /* never fails */
913   return ft.dwHighDateTime ^ ft.dwLowDateTime;
914 #  else
915   struct timeval tv;
916   int gettimeofday_res;
917 
918   gettimeofday_res = gettimeofday(&tv, NULL);
919 
920 #    if defined(NDEBUG)
921   (void)gettimeofday_res;
922 #    else
923   assert(gettimeofday_res == 0);
924 #    endif /* defined(NDEBUG) */
925 
926   /* Microseconds time is <20 bits entropy */
927   return tv.tv_usec;
928 #  endif
929 }
930 
931 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
932 
933 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)934 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
935   if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
936     fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
937             (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
938   }
939   return entropy;
940 }
941 
942 static unsigned long
generate_hash_secret_salt(XML_Parser parser)943 generate_hash_secret_salt(XML_Parser parser) {
944   unsigned long entropy;
945   (void)parser;
946 
947   /* "Failproof" high quality providers: */
948 #if defined(HAVE_ARC4RANDOM_BUF)
949   arc4random_buf(&entropy, sizeof(entropy));
950   return ENTROPY_DEBUG("arc4random_buf", entropy);
951 #elif defined(HAVE_ARC4RANDOM)
952   writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
953   return ENTROPY_DEBUG("arc4random", entropy);
954 #else
955   /* Try high quality providers first .. */
956 #  ifdef _WIN32
957   if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
958     return ENTROPY_DEBUG("rand_s", entropy);
959   }
960 #  elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
961   if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
962     return ENTROPY_DEBUG("getrandom", entropy);
963   }
964 #  endif
965 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
966   if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
967     return ENTROPY_DEBUG("/dev/urandom", entropy);
968   }
969 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
970   /* .. and self-made low quality for backup: */
971 
972   /* Process ID is 0 bits entropy if attacker has local access */
973   entropy = gather_time_entropy() ^ getpid();
974 
975   /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
976   if (sizeof(unsigned long) == 4) {
977     return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
978   } else {
979     return ENTROPY_DEBUG("fallback(8)",
980                          entropy * (unsigned long)2305843009213693951ULL);
981   }
982 #endif
983 }
984 
985 static unsigned long
get_hash_secret_salt(XML_Parser parser)986 get_hash_secret_salt(XML_Parser parser) {
987   if (parser->m_parentParser != NULL)
988     return get_hash_secret_salt(parser->m_parentParser);
989   return parser->m_hash_secret_salt;
990 }
991 
992 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)993 callProcessor(XML_Parser parser, const char *start, const char *end,
994               const char **endPtr) {
995   const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
996 
997   if (parser->m_reparseDeferralEnabled
998       && ! parser->m_parsingStatus.finalBuffer) {
999     // Heuristic: don't try to parse a partial token again until the amount of
1000     // available data has increased significantly.
1001     const size_t had_before = parser->m_partialTokenBytesBefore;
1002     // ...but *do* try anyway if we're close to causing a reallocation.
1003     size_t available_buffer
1004         = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1005 #if XML_CONTEXT_BYTES > 0
1006     available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1007 #endif
1008     available_buffer
1009         += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1010     // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1011     const bool enough
1012         = (have_now >= 2 * had_before)
1013           || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1014 
1015     if (! enough) {
1016       *endPtr = start; // callers may expect this to be set
1017       return XML_ERROR_NONE;
1018     }
1019   }
1020   g_parseAttempts += 1;
1021   const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1022   if (ret == XML_ERROR_NONE) {
1023     // if we consumed nothing, remember what we had on this parse attempt.
1024     if (*endPtr == start) {
1025       parser->m_partialTokenBytesBefore = have_now;
1026     } else {
1027       parser->m_partialTokenBytesBefore = 0;
1028     }
1029   }
1030   return ret;
1031 }
1032 
1033 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1034 startParsing(XML_Parser parser) {
1035   /* hash functions must be initialized before setContext() is called */
1036   if (parser->m_hash_secret_salt == 0)
1037     parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1038   if (parser->m_ns) {
1039     /* implicit context only set for root parser, since child
1040        parsers (i.e. external entity parsers) will inherit it
1041     */
1042     return setContext(parser, implicitContext);
1043   }
1044   return XML_TRUE;
1045 }
1046 
1047 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1048 XML_ParserCreate_MM(const XML_Char *encodingName,
1049                     const XML_Memory_Handling_Suite *memsuite,
1050                     const XML_Char *nameSep) {
1051   return parserCreate(encodingName, memsuite, nameSep, NULL);
1052 }
1053 
1054 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1055 parserCreate(const XML_Char *encodingName,
1056              const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1057              DTD *dtd) {
1058   XML_Parser parser;
1059 
1060   if (memsuite) {
1061     XML_Memory_Handling_Suite *mtemp;
1062     parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1063     if (parser != NULL) {
1064       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1065       mtemp->malloc_fcn = memsuite->malloc_fcn;
1066       mtemp->realloc_fcn = memsuite->realloc_fcn;
1067       mtemp->free_fcn = memsuite->free_fcn;
1068     }
1069   } else {
1070     XML_Memory_Handling_Suite *mtemp;
1071     parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1072     if (parser != NULL) {
1073       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1074       mtemp->malloc_fcn = malloc;
1075       mtemp->realloc_fcn = realloc;
1076       mtemp->free_fcn = free;
1077     }
1078   }
1079 
1080   if (! parser)
1081     return parser;
1082 
1083   parser->m_buffer = NULL;
1084   parser->m_bufferLim = NULL;
1085 
1086   parser->m_attsSize = INIT_ATTS_SIZE;
1087   parser->m_atts
1088       = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1089   if (parser->m_atts == NULL) {
1090     FREE(parser, parser);
1091     return NULL;
1092   }
1093 #ifdef XML_ATTR_INFO
1094   parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1095       parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1096   if (parser->m_attInfo == NULL) {
1097     FREE(parser, parser->m_atts);
1098     FREE(parser, parser);
1099     return NULL;
1100   }
1101 #endif
1102   parser->m_dataBuf
1103       = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1104   if (parser->m_dataBuf == NULL) {
1105     FREE(parser, parser->m_atts);
1106 #ifdef XML_ATTR_INFO
1107     FREE(parser, parser->m_attInfo);
1108 #endif
1109     FREE(parser, parser);
1110     return NULL;
1111   }
1112   parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1113 
1114   if (dtd)
1115     parser->m_dtd = dtd;
1116   else {
1117     parser->m_dtd = dtdCreate(&parser->m_mem);
1118     if (parser->m_dtd == NULL) {
1119       FREE(parser, parser->m_dataBuf);
1120       FREE(parser, parser->m_atts);
1121 #ifdef XML_ATTR_INFO
1122       FREE(parser, parser->m_attInfo);
1123 #endif
1124       FREE(parser, parser);
1125       return NULL;
1126     }
1127   }
1128 
1129   parser->m_freeBindingList = NULL;
1130   parser->m_freeTagList = NULL;
1131   parser->m_freeInternalEntities = NULL;
1132 
1133   parser->m_groupSize = 0;
1134   parser->m_groupConnector = NULL;
1135 
1136   parser->m_unknownEncodingHandler = NULL;
1137   parser->m_unknownEncodingHandlerData = NULL;
1138 
1139   parser->m_namespaceSeparator = ASCII_EXCL;
1140   parser->m_ns = XML_FALSE;
1141   parser->m_ns_triplets = XML_FALSE;
1142 
1143   parser->m_nsAtts = NULL;
1144   parser->m_nsAttsVersion = 0;
1145   parser->m_nsAttsPower = 0;
1146 
1147   parser->m_protocolEncodingName = NULL;
1148 
1149   poolInit(&parser->m_tempPool, &(parser->m_mem));
1150   poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1151   parserInit(parser, encodingName);
1152 
1153   if (encodingName && ! parser->m_protocolEncodingName) {
1154     if (dtd) {
1155       // We need to stop the upcoming call to XML_ParserFree from happily
1156       // destroying parser->m_dtd because the DTD is shared with the parent
1157       // parser and the only guard that keeps XML_ParserFree from destroying
1158       // parser->m_dtd is parser->m_isParamEntity but it will be set to
1159       // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1160       parser->m_dtd = NULL;
1161     }
1162     XML_ParserFree(parser);
1163     return NULL;
1164   }
1165 
1166   if (nameSep) {
1167     parser->m_ns = XML_TRUE;
1168     parser->m_internalEncoding = XmlGetInternalEncodingNS();
1169     parser->m_namespaceSeparator = *nameSep;
1170   } else {
1171     parser->m_internalEncoding = XmlGetInternalEncoding();
1172   }
1173 
1174   return parser;
1175 }
1176 
1177 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1178 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1179   parser->m_processor = prologInitProcessor;
1180   XmlPrologStateInit(&parser->m_prologState);
1181   if (encodingName != NULL) {
1182     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1183   }
1184   parser->m_curBase = NULL;
1185   XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1186   parser->m_userData = NULL;
1187   parser->m_handlerArg = NULL;
1188   parser->m_startElementHandler = NULL;
1189   parser->m_endElementHandler = NULL;
1190   parser->m_characterDataHandler = NULL;
1191   parser->m_processingInstructionHandler = NULL;
1192   parser->m_commentHandler = NULL;
1193   parser->m_startCdataSectionHandler = NULL;
1194   parser->m_endCdataSectionHandler = NULL;
1195   parser->m_defaultHandler = NULL;
1196   parser->m_startDoctypeDeclHandler = NULL;
1197   parser->m_endDoctypeDeclHandler = NULL;
1198   parser->m_unparsedEntityDeclHandler = NULL;
1199   parser->m_notationDeclHandler = NULL;
1200   parser->m_startNamespaceDeclHandler = NULL;
1201   parser->m_endNamespaceDeclHandler = NULL;
1202   parser->m_notStandaloneHandler = NULL;
1203   parser->m_externalEntityRefHandler = NULL;
1204   parser->m_externalEntityRefHandlerArg = parser;
1205   parser->m_skippedEntityHandler = NULL;
1206   parser->m_elementDeclHandler = NULL;
1207   parser->m_attlistDeclHandler = NULL;
1208   parser->m_entityDeclHandler = NULL;
1209   parser->m_xmlDeclHandler = NULL;
1210   parser->m_bufferPtr = parser->m_buffer;
1211   parser->m_bufferEnd = parser->m_buffer;
1212   parser->m_parseEndByteIndex = 0;
1213   parser->m_parseEndPtr = NULL;
1214   parser->m_partialTokenBytesBefore = 0;
1215   parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1216   parser->m_lastBufferRequestSize = 0;
1217   parser->m_declElementType = NULL;
1218   parser->m_declAttributeId = NULL;
1219   parser->m_declEntity = NULL;
1220   parser->m_doctypeName = NULL;
1221   parser->m_doctypeSysid = NULL;
1222   parser->m_doctypePubid = NULL;
1223   parser->m_declAttributeType = NULL;
1224   parser->m_declNotationName = NULL;
1225   parser->m_declNotationPublicId = NULL;
1226   parser->m_declAttributeIsCdata = XML_FALSE;
1227   parser->m_declAttributeIsId = XML_FALSE;
1228   memset(&parser->m_position, 0, sizeof(POSITION));
1229   parser->m_errorCode = XML_ERROR_NONE;
1230   parser->m_eventPtr = NULL;
1231   parser->m_eventEndPtr = NULL;
1232   parser->m_positionPtr = NULL;
1233   parser->m_openInternalEntities = NULL;
1234   parser->m_defaultExpandInternalEntities = XML_TRUE;
1235   parser->m_tagLevel = 0;
1236   parser->m_tagStack = NULL;
1237   parser->m_inheritedBindings = NULL;
1238   parser->m_nSpecifiedAtts = 0;
1239   parser->m_unknownEncodingMem = NULL;
1240   parser->m_unknownEncodingRelease = NULL;
1241   parser->m_unknownEncodingData = NULL;
1242   parser->m_parentParser = NULL;
1243   parser->m_parsingStatus.parsing = XML_INITIALIZED;
1244 #ifdef XML_DTD
1245   parser->m_isParamEntity = XML_FALSE;
1246   parser->m_useForeignDTD = XML_FALSE;
1247   parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1248 #endif
1249   parser->m_hash_secret_salt = 0;
1250 
1251 #if XML_GE == 1
1252   memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1253   parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1254   parser->m_accounting.maximumAmplificationFactor
1255       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1256   parser->m_accounting.activationThresholdBytes
1257       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1258 
1259   memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1260   parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1261 #endif
1262 }
1263 
1264 /* moves list of bindings to m_freeBindingList */
1265 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1266 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1267   while (bindings) {
1268     BINDING *b = bindings;
1269     bindings = bindings->nextTagBinding;
1270     b->nextTagBinding = parser->m_freeBindingList;
1271     parser->m_freeBindingList = b;
1272   }
1273 }
1274 
1275 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1276 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1277   TAG *tStk;
1278   OPEN_INTERNAL_ENTITY *openEntityList;
1279 
1280   if (parser == NULL)
1281     return XML_FALSE;
1282 
1283   if (parser->m_parentParser)
1284     return XML_FALSE;
1285   /* move m_tagStack to m_freeTagList */
1286   tStk = parser->m_tagStack;
1287   while (tStk) {
1288     TAG *tag = tStk;
1289     tStk = tStk->parent;
1290     tag->parent = parser->m_freeTagList;
1291     moveToFreeBindingList(parser, tag->bindings);
1292     tag->bindings = NULL;
1293     parser->m_freeTagList = tag;
1294   }
1295   /* move m_openInternalEntities to m_freeInternalEntities */
1296   openEntityList = parser->m_openInternalEntities;
1297   while (openEntityList) {
1298     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1299     openEntityList = openEntity->next;
1300     openEntity->next = parser->m_freeInternalEntities;
1301     parser->m_freeInternalEntities = openEntity;
1302   }
1303   moveToFreeBindingList(parser, parser->m_inheritedBindings);
1304   FREE(parser, parser->m_unknownEncodingMem);
1305   if (parser->m_unknownEncodingRelease)
1306     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1307   poolClear(&parser->m_tempPool);
1308   poolClear(&parser->m_temp2Pool);
1309   FREE(parser, (void *)parser->m_protocolEncodingName);
1310   parser->m_protocolEncodingName = NULL;
1311   parserInit(parser, encodingName);
1312   dtdReset(parser->m_dtd, &parser->m_mem);
1313   return XML_TRUE;
1314 }
1315 
1316 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1317 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1318   if (parser == NULL)
1319     return XML_STATUS_ERROR;
1320   /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1321      XXX There's no way for the caller to determine which of the
1322      XXX possible error cases caused the XML_STATUS_ERROR return.
1323   */
1324   if (parser->m_parsingStatus.parsing == XML_PARSING
1325       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1326     return XML_STATUS_ERROR;
1327 
1328   /* Get rid of any previous encoding name */
1329   FREE(parser, (void *)parser->m_protocolEncodingName);
1330 
1331   if (encodingName == NULL)
1332     /* No new encoding name */
1333     parser->m_protocolEncodingName = NULL;
1334   else {
1335     /* Copy the new encoding name into allocated memory */
1336     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1337     if (! parser->m_protocolEncodingName)
1338       return XML_STATUS_ERROR;
1339   }
1340   return XML_STATUS_OK;
1341 }
1342 
1343 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1344 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1345                                const XML_Char *encodingName) {
1346   XML_Parser parser = oldParser;
1347   DTD *newDtd = NULL;
1348   DTD *oldDtd;
1349   XML_StartElementHandler oldStartElementHandler;
1350   XML_EndElementHandler oldEndElementHandler;
1351   XML_CharacterDataHandler oldCharacterDataHandler;
1352   XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1353   XML_CommentHandler oldCommentHandler;
1354   XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1355   XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1356   XML_DefaultHandler oldDefaultHandler;
1357   XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1358   XML_NotationDeclHandler oldNotationDeclHandler;
1359   XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1360   XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1361   XML_NotStandaloneHandler oldNotStandaloneHandler;
1362   XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1363   XML_SkippedEntityHandler oldSkippedEntityHandler;
1364   XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1365   XML_ElementDeclHandler oldElementDeclHandler;
1366   XML_AttlistDeclHandler oldAttlistDeclHandler;
1367   XML_EntityDeclHandler oldEntityDeclHandler;
1368   XML_XmlDeclHandler oldXmlDeclHandler;
1369   ELEMENT_TYPE *oldDeclElementType;
1370 
1371   void *oldUserData;
1372   void *oldHandlerArg;
1373   XML_Bool oldDefaultExpandInternalEntities;
1374   XML_Parser oldExternalEntityRefHandlerArg;
1375 #ifdef XML_DTD
1376   enum XML_ParamEntityParsing oldParamEntityParsing;
1377   int oldInEntityValue;
1378 #endif
1379   XML_Bool oldns_triplets;
1380   /* Note that the new parser shares the same hash secret as the old
1381      parser, so that dtdCopy and copyEntityTable can lookup values
1382      from hash tables associated with either parser without us having
1383      to worry which hash secrets each table has.
1384   */
1385   unsigned long oldhash_secret_salt;
1386   XML_Bool oldReparseDeferralEnabled;
1387 
1388   /* Validate the oldParser parameter before we pull everything out of it */
1389   if (oldParser == NULL)
1390     return NULL;
1391 
1392   /* Stash the original parser contents on the stack */
1393   oldDtd = parser->m_dtd;
1394   oldStartElementHandler = parser->m_startElementHandler;
1395   oldEndElementHandler = parser->m_endElementHandler;
1396   oldCharacterDataHandler = parser->m_characterDataHandler;
1397   oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1398   oldCommentHandler = parser->m_commentHandler;
1399   oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1400   oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1401   oldDefaultHandler = parser->m_defaultHandler;
1402   oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1403   oldNotationDeclHandler = parser->m_notationDeclHandler;
1404   oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1405   oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1406   oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1407   oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1408   oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1409   oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1410   oldElementDeclHandler = parser->m_elementDeclHandler;
1411   oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1412   oldEntityDeclHandler = parser->m_entityDeclHandler;
1413   oldXmlDeclHandler = parser->m_xmlDeclHandler;
1414   oldDeclElementType = parser->m_declElementType;
1415 
1416   oldUserData = parser->m_userData;
1417   oldHandlerArg = parser->m_handlerArg;
1418   oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1419   oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1420 #ifdef XML_DTD
1421   oldParamEntityParsing = parser->m_paramEntityParsing;
1422   oldInEntityValue = parser->m_prologState.inEntityValue;
1423 #endif
1424   oldns_triplets = parser->m_ns_triplets;
1425   /* Note that the new parser shares the same hash secret as the old
1426      parser, so that dtdCopy and copyEntityTable can lookup values
1427      from hash tables associated with either parser without us having
1428      to worry which hash secrets each table has.
1429   */
1430   oldhash_secret_salt = parser->m_hash_secret_salt;
1431   oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1432 
1433 #ifdef XML_DTD
1434   if (! context)
1435     newDtd = oldDtd;
1436 #endif /* XML_DTD */
1437 
1438   /* Note that the magical uses of the pre-processor to make field
1439      access look more like C++ require that `parser' be overwritten
1440      here.  This makes this function more painful to follow than it
1441      would be otherwise.
1442   */
1443   if (parser->m_ns) {
1444     XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1445     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1446   } else {
1447     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1448   }
1449 
1450   if (! parser)
1451     return NULL;
1452 
1453   parser->m_startElementHandler = oldStartElementHandler;
1454   parser->m_endElementHandler = oldEndElementHandler;
1455   parser->m_characterDataHandler = oldCharacterDataHandler;
1456   parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1457   parser->m_commentHandler = oldCommentHandler;
1458   parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1459   parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1460   parser->m_defaultHandler = oldDefaultHandler;
1461   parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1462   parser->m_notationDeclHandler = oldNotationDeclHandler;
1463   parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1464   parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1465   parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1466   parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1467   parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1468   parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1469   parser->m_elementDeclHandler = oldElementDeclHandler;
1470   parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1471   parser->m_entityDeclHandler = oldEntityDeclHandler;
1472   parser->m_xmlDeclHandler = oldXmlDeclHandler;
1473   parser->m_declElementType = oldDeclElementType;
1474   parser->m_userData = oldUserData;
1475   if (oldUserData == oldHandlerArg)
1476     parser->m_handlerArg = parser->m_userData;
1477   else
1478     parser->m_handlerArg = parser;
1479   if (oldExternalEntityRefHandlerArg != oldParser)
1480     parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1481   parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1482   parser->m_ns_triplets = oldns_triplets;
1483   parser->m_hash_secret_salt = oldhash_secret_salt;
1484   parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1485   parser->m_parentParser = oldParser;
1486 #ifdef XML_DTD
1487   parser->m_paramEntityParsing = oldParamEntityParsing;
1488   parser->m_prologState.inEntityValue = oldInEntityValue;
1489   if (context) {
1490 #endif /* XML_DTD */
1491     if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1492         || ! setContext(parser, context)) {
1493       XML_ParserFree(parser);
1494       return NULL;
1495     }
1496     parser->m_processor = externalEntityInitProcessor;
1497 #ifdef XML_DTD
1498   } else {
1499     /* The DTD instance referenced by parser->m_dtd is shared between the
1500        document's root parser and external PE parsers, therefore one does not
1501        need to call setContext. In addition, one also *must* not call
1502        setContext, because this would overwrite existing prefix->binding
1503        pointers in parser->m_dtd with ones that get destroyed with the external
1504        PE parser. This would leave those prefixes with dangling pointers.
1505     */
1506     parser->m_isParamEntity = XML_TRUE;
1507     XmlPrologStateInitExternalEntity(&parser->m_prologState);
1508     parser->m_processor = externalParEntInitProcessor;
1509   }
1510 #endif /* XML_DTD */
1511   return parser;
1512 }
1513 
1514 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1515 destroyBindings(BINDING *bindings, XML_Parser parser) {
1516   for (;;) {
1517     BINDING *b = bindings;
1518     if (! b)
1519       break;
1520     bindings = b->nextTagBinding;
1521     FREE(parser, b->uri);
1522     FREE(parser, b);
1523   }
1524 }
1525 
1526 void XMLCALL
XML_ParserFree(XML_Parser parser)1527 XML_ParserFree(XML_Parser parser) {
1528   TAG *tagList;
1529   OPEN_INTERNAL_ENTITY *entityList;
1530   if (parser == NULL)
1531     return;
1532   /* free m_tagStack and m_freeTagList */
1533   tagList = parser->m_tagStack;
1534   for (;;) {
1535     TAG *p;
1536     if (tagList == NULL) {
1537       if (parser->m_freeTagList == NULL)
1538         break;
1539       tagList = parser->m_freeTagList;
1540       parser->m_freeTagList = NULL;
1541     }
1542     p = tagList;
1543     tagList = tagList->parent;
1544     FREE(parser, p->buf);
1545     destroyBindings(p->bindings, parser);
1546     FREE(parser, p);
1547   }
1548   /* free m_openInternalEntities and m_freeInternalEntities */
1549   entityList = parser->m_openInternalEntities;
1550   for (;;) {
1551     OPEN_INTERNAL_ENTITY *openEntity;
1552     if (entityList == NULL) {
1553       if (parser->m_freeInternalEntities == NULL)
1554         break;
1555       entityList = parser->m_freeInternalEntities;
1556       parser->m_freeInternalEntities = NULL;
1557     }
1558     openEntity = entityList;
1559     entityList = entityList->next;
1560     FREE(parser, openEntity);
1561   }
1562 
1563   destroyBindings(parser->m_freeBindingList, parser);
1564   destroyBindings(parser->m_inheritedBindings, parser);
1565   poolDestroy(&parser->m_tempPool);
1566   poolDestroy(&parser->m_temp2Pool);
1567   FREE(parser, (void *)parser->m_protocolEncodingName);
1568 #ifdef XML_DTD
1569   /* external parameter entity parsers share the DTD structure
1570      parser->m_dtd with the root parser, so we must not destroy it
1571   */
1572   if (! parser->m_isParamEntity && parser->m_dtd)
1573 #else
1574   if (parser->m_dtd)
1575 #endif /* XML_DTD */
1576     dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1577                &parser->m_mem);
1578   FREE(parser, (void *)parser->m_atts);
1579 #ifdef XML_ATTR_INFO
1580   FREE(parser, (void *)parser->m_attInfo);
1581 #endif
1582   FREE(parser, parser->m_groupConnector);
1583   FREE(parser, parser->m_buffer);
1584   FREE(parser, parser->m_dataBuf);
1585   FREE(parser, parser->m_nsAtts);
1586   FREE(parser, parser->m_unknownEncodingMem);
1587   if (parser->m_unknownEncodingRelease)
1588     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1589   FREE(parser, parser);
1590 }
1591 
1592 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1593 XML_UseParserAsHandlerArg(XML_Parser parser) {
1594   if (parser != NULL)
1595     parser->m_handlerArg = parser;
1596 }
1597 
1598 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1599 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1600   if (parser == NULL)
1601     return XML_ERROR_INVALID_ARGUMENT;
1602 #ifdef XML_DTD
1603   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1604   if (parser->m_parsingStatus.parsing == XML_PARSING
1605       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1606     return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1607   parser->m_useForeignDTD = useDTD;
1608   return XML_ERROR_NONE;
1609 #else
1610   UNUSED_P(useDTD);
1611   return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1612 #endif
1613 }
1614 
1615 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1616 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1617   if (parser == NULL)
1618     return;
1619   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1620   if (parser->m_parsingStatus.parsing == XML_PARSING
1621       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1622     return;
1623   parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1624 }
1625 
1626 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1627 XML_SetUserData(XML_Parser parser, void *p) {
1628   if (parser == NULL)
1629     return;
1630   if (parser->m_handlerArg == parser->m_userData)
1631     parser->m_handlerArg = parser->m_userData = p;
1632   else
1633     parser->m_userData = p;
1634 }
1635 
1636 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1637 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1638   if (parser == NULL)
1639     return XML_STATUS_ERROR;
1640   if (p) {
1641     p = poolCopyString(&parser->m_dtd->pool, p);
1642     if (! p)
1643       return XML_STATUS_ERROR;
1644     parser->m_curBase = p;
1645   } else
1646     parser->m_curBase = NULL;
1647   return XML_STATUS_OK;
1648 }
1649 
1650 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1651 XML_GetBase(XML_Parser parser) {
1652   if (parser == NULL)
1653     return NULL;
1654   return parser->m_curBase;
1655 }
1656 
1657 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1658 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1659   if (parser == NULL)
1660     return -1;
1661   return parser->m_nSpecifiedAtts;
1662 }
1663 
1664 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1665 XML_GetIdAttributeIndex(XML_Parser parser) {
1666   if (parser == NULL)
1667     return -1;
1668   return parser->m_idAttIndex;
1669 }
1670 
1671 #ifdef XML_ATTR_INFO
1672 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1673 XML_GetAttributeInfo(XML_Parser parser) {
1674   if (parser == NULL)
1675     return NULL;
1676   return parser->m_attInfo;
1677 }
1678 #endif
1679 
1680 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1681 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1682                       XML_EndElementHandler end) {
1683   if (parser == NULL)
1684     return;
1685   parser->m_startElementHandler = start;
1686   parser->m_endElementHandler = end;
1687 }
1688 
1689 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1690 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1691   if (parser != NULL)
1692     parser->m_startElementHandler = start;
1693 }
1694 
1695 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1696 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1697   if (parser != NULL)
1698     parser->m_endElementHandler = end;
1699 }
1700 
1701 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1702 XML_SetCharacterDataHandler(XML_Parser parser,
1703                             XML_CharacterDataHandler handler) {
1704   if (parser != NULL)
1705     parser->m_characterDataHandler = handler;
1706 }
1707 
1708 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1709 XML_SetProcessingInstructionHandler(XML_Parser parser,
1710                                     XML_ProcessingInstructionHandler handler) {
1711   if (parser != NULL)
1712     parser->m_processingInstructionHandler = handler;
1713 }
1714 
1715 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1716 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1717   if (parser != NULL)
1718     parser->m_commentHandler = handler;
1719 }
1720 
1721 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1722 XML_SetCdataSectionHandler(XML_Parser parser,
1723                            XML_StartCdataSectionHandler start,
1724                            XML_EndCdataSectionHandler end) {
1725   if (parser == NULL)
1726     return;
1727   parser->m_startCdataSectionHandler = start;
1728   parser->m_endCdataSectionHandler = end;
1729 }
1730 
1731 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1732 XML_SetStartCdataSectionHandler(XML_Parser parser,
1733                                 XML_StartCdataSectionHandler start) {
1734   if (parser != NULL)
1735     parser->m_startCdataSectionHandler = start;
1736 }
1737 
1738 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1739 XML_SetEndCdataSectionHandler(XML_Parser parser,
1740                               XML_EndCdataSectionHandler end) {
1741   if (parser != NULL)
1742     parser->m_endCdataSectionHandler = end;
1743 }
1744 
1745 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1746 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1747   if (parser == NULL)
1748     return;
1749   parser->m_defaultHandler = handler;
1750   parser->m_defaultExpandInternalEntities = XML_FALSE;
1751 }
1752 
1753 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1754 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1755   if (parser == NULL)
1756     return;
1757   parser->m_defaultHandler = handler;
1758   parser->m_defaultExpandInternalEntities = XML_TRUE;
1759 }
1760 
1761 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1762 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1763                           XML_EndDoctypeDeclHandler end) {
1764   if (parser == NULL)
1765     return;
1766   parser->m_startDoctypeDeclHandler = start;
1767   parser->m_endDoctypeDeclHandler = end;
1768 }
1769 
1770 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1771 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1772                                XML_StartDoctypeDeclHandler start) {
1773   if (parser != NULL)
1774     parser->m_startDoctypeDeclHandler = start;
1775 }
1776 
1777 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1778 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1779   if (parser != NULL)
1780     parser->m_endDoctypeDeclHandler = end;
1781 }
1782 
1783 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1784 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1785                                  XML_UnparsedEntityDeclHandler handler) {
1786   if (parser != NULL)
1787     parser->m_unparsedEntityDeclHandler = handler;
1788 }
1789 
1790 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1791 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1792   if (parser != NULL)
1793     parser->m_notationDeclHandler = handler;
1794 }
1795 
1796 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1797 XML_SetNamespaceDeclHandler(XML_Parser parser,
1798                             XML_StartNamespaceDeclHandler start,
1799                             XML_EndNamespaceDeclHandler end) {
1800   if (parser == NULL)
1801     return;
1802   parser->m_startNamespaceDeclHandler = start;
1803   parser->m_endNamespaceDeclHandler = end;
1804 }
1805 
1806 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1807 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1808                                  XML_StartNamespaceDeclHandler start) {
1809   if (parser != NULL)
1810     parser->m_startNamespaceDeclHandler = start;
1811 }
1812 
1813 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1814 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1815                                XML_EndNamespaceDeclHandler end) {
1816   if (parser != NULL)
1817     parser->m_endNamespaceDeclHandler = end;
1818 }
1819 
1820 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1821 XML_SetNotStandaloneHandler(XML_Parser parser,
1822                             XML_NotStandaloneHandler handler) {
1823   if (parser != NULL)
1824     parser->m_notStandaloneHandler = handler;
1825 }
1826 
1827 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1828 XML_SetExternalEntityRefHandler(XML_Parser parser,
1829                                 XML_ExternalEntityRefHandler handler) {
1830   if (parser != NULL)
1831     parser->m_externalEntityRefHandler = handler;
1832 }
1833 
1834 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1835 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1836   if (parser == NULL)
1837     return;
1838   if (arg)
1839     parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1840   else
1841     parser->m_externalEntityRefHandlerArg = parser;
1842 }
1843 
1844 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1845 XML_SetSkippedEntityHandler(XML_Parser parser,
1846                             XML_SkippedEntityHandler handler) {
1847   if (parser != NULL)
1848     parser->m_skippedEntityHandler = handler;
1849 }
1850 
1851 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1852 XML_SetUnknownEncodingHandler(XML_Parser parser,
1853                               XML_UnknownEncodingHandler handler, void *data) {
1854   if (parser == NULL)
1855     return;
1856   parser->m_unknownEncodingHandler = handler;
1857   parser->m_unknownEncodingHandlerData = data;
1858 }
1859 
1860 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1861 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1862   if (parser != NULL)
1863     parser->m_elementDeclHandler = eldecl;
1864 }
1865 
1866 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1867 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1868   if (parser != NULL)
1869     parser->m_attlistDeclHandler = attdecl;
1870 }
1871 
1872 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1873 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1874   if (parser != NULL)
1875     parser->m_entityDeclHandler = handler;
1876 }
1877 
1878 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1879 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1880   if (parser != NULL)
1881     parser->m_xmlDeclHandler = handler;
1882 }
1883 
1884 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1885 XML_SetParamEntityParsing(XML_Parser parser,
1886                           enum XML_ParamEntityParsing peParsing) {
1887   if (parser == NULL)
1888     return 0;
1889   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1890   if (parser->m_parsingStatus.parsing == XML_PARSING
1891       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1892     return 0;
1893 #ifdef XML_DTD
1894   parser->m_paramEntityParsing = peParsing;
1895   return 1;
1896 #else
1897   return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1898 #endif
1899 }
1900 
1901 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1902 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1903   if (parser == NULL)
1904     return 0;
1905   if (parser->m_parentParser)
1906     return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1907   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1908   if (parser->m_parsingStatus.parsing == XML_PARSING
1909       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1910     return 0;
1911   parser->m_hash_secret_salt = hash_salt;
1912   return 1;
1913 }
1914 
1915 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1916 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1917   if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1918     if (parser != NULL)
1919       parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1920     return XML_STATUS_ERROR;
1921   }
1922   switch (parser->m_parsingStatus.parsing) {
1923   case XML_SUSPENDED:
1924     parser->m_errorCode = XML_ERROR_SUSPENDED;
1925     return XML_STATUS_ERROR;
1926   case XML_FINISHED:
1927     parser->m_errorCode = XML_ERROR_FINISHED;
1928     return XML_STATUS_ERROR;
1929   case XML_INITIALIZED:
1930     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1931       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1932       return XML_STATUS_ERROR;
1933     }
1934     /* fall through */
1935   default:
1936     parser->m_parsingStatus.parsing = XML_PARSING;
1937   }
1938 
1939 #if XML_CONTEXT_BYTES == 0
1940   if (parser->m_bufferPtr == parser->m_bufferEnd) {
1941     const char *end;
1942     int nLeftOver;
1943     enum XML_Status result;
1944     /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1945     if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1946       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1947       parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1948       parser->m_processor = errorProcessor;
1949       return XML_STATUS_ERROR;
1950     }
1951     // though this isn't a buffer request, we assume that `len` is the app's
1952     // preferred buffer fill size, and therefore save it here.
1953     parser->m_lastBufferRequestSize = len;
1954     parser->m_parseEndByteIndex += len;
1955     parser->m_positionPtr = s;
1956     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1957 
1958     parser->m_errorCode
1959         = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1960 
1961     if (parser->m_errorCode != XML_ERROR_NONE) {
1962       parser->m_eventEndPtr = parser->m_eventPtr;
1963       parser->m_processor = errorProcessor;
1964       return XML_STATUS_ERROR;
1965     } else {
1966       switch (parser->m_parsingStatus.parsing) {
1967       case XML_SUSPENDED:
1968         result = XML_STATUS_SUSPENDED;
1969         break;
1970       case XML_INITIALIZED:
1971       case XML_PARSING:
1972         if (isFinal) {
1973           parser->m_parsingStatus.parsing = XML_FINISHED;
1974           return XML_STATUS_OK;
1975         }
1976       /* fall through */
1977       default:
1978         result = XML_STATUS_OK;
1979       }
1980     }
1981 
1982     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1983                       &parser->m_position);
1984     nLeftOver = s + len - end;
1985     if (nLeftOver) {
1986       // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1987       // (and XML_ERROR_FINISHED) from XML_GetBuffer.
1988       const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1989       parser->m_parsingStatus.parsing = XML_PARSING;
1990       void *const temp = XML_GetBuffer(parser, nLeftOver);
1991       parser->m_parsingStatus.parsing = originalStatus;
1992       // GetBuffer may have overwritten this, but we want to remember what the
1993       // app requested, not how many bytes were left over after parsing.
1994       parser->m_lastBufferRequestSize = len;
1995       if (temp == NULL) {
1996         // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
1997         parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1998         parser->m_processor = errorProcessor;
1999         return XML_STATUS_ERROR;
2000       }
2001       // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2002       // don't have any data to preserve, and can copy straight into the start
2003       // of the buffer rather than the GetBuffer return pointer (which may be
2004       // pointing further into the allocated buffer).
2005       memcpy(parser->m_buffer, end, nLeftOver);
2006     }
2007     parser->m_bufferPtr = parser->m_buffer;
2008     parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2009     parser->m_positionPtr = parser->m_bufferPtr;
2010     parser->m_parseEndPtr = parser->m_bufferEnd;
2011     parser->m_eventPtr = parser->m_bufferPtr;
2012     parser->m_eventEndPtr = parser->m_bufferPtr;
2013     return result;
2014   }
2015 #endif /* XML_CONTEXT_BYTES == 0 */
2016   void *buff = XML_GetBuffer(parser, len);
2017   if (buff == NULL)
2018     return XML_STATUS_ERROR;
2019   if (len > 0) {
2020     assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2021     memcpy(buff, s, len);
2022   }
2023   return XML_ParseBuffer(parser, len, isFinal);
2024 }
2025 
2026 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2027 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2028   const char *start;
2029   enum XML_Status result = XML_STATUS_OK;
2030 
2031   if (parser == NULL)
2032     return XML_STATUS_ERROR;
2033   switch (parser->m_parsingStatus.parsing) {
2034   case XML_SUSPENDED:
2035     parser->m_errorCode = XML_ERROR_SUSPENDED;
2036     return XML_STATUS_ERROR;
2037   case XML_FINISHED:
2038     parser->m_errorCode = XML_ERROR_FINISHED;
2039     return XML_STATUS_ERROR;
2040   case XML_INITIALIZED:
2041     /* Has someone called XML_GetBuffer successfully before? */
2042     if (! parser->m_bufferPtr) {
2043       parser->m_errorCode = XML_ERROR_NO_BUFFER;
2044       return XML_STATUS_ERROR;
2045     }
2046 
2047     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2048       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2049       return XML_STATUS_ERROR;
2050     }
2051     /* fall through */
2052   default:
2053     parser->m_parsingStatus.parsing = XML_PARSING;
2054   }
2055 
2056   start = parser->m_bufferPtr;
2057   parser->m_positionPtr = start;
2058   parser->m_bufferEnd += len;
2059   parser->m_parseEndPtr = parser->m_bufferEnd;
2060   parser->m_parseEndByteIndex += len;
2061   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2062 
2063   parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2064                                       &parser->m_bufferPtr);
2065 
2066   if (parser->m_errorCode != XML_ERROR_NONE) {
2067     parser->m_eventEndPtr = parser->m_eventPtr;
2068     parser->m_processor = errorProcessor;
2069     return XML_STATUS_ERROR;
2070   } else {
2071     switch (parser->m_parsingStatus.parsing) {
2072     case XML_SUSPENDED:
2073       result = XML_STATUS_SUSPENDED;
2074       break;
2075     case XML_INITIALIZED:
2076     case XML_PARSING:
2077       if (isFinal) {
2078         parser->m_parsingStatus.parsing = XML_FINISHED;
2079         return result;
2080       }
2081     default:; /* should not happen */
2082     }
2083   }
2084 
2085   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2086                     parser->m_bufferPtr, &parser->m_position);
2087   parser->m_positionPtr = parser->m_bufferPtr;
2088   return result;
2089 }
2090 
2091 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2092 XML_GetBuffer(XML_Parser parser, int len) {
2093   if (parser == NULL)
2094     return NULL;
2095   if (len < 0) {
2096     parser->m_errorCode = XML_ERROR_NO_MEMORY;
2097     return NULL;
2098   }
2099   switch (parser->m_parsingStatus.parsing) {
2100   case XML_SUSPENDED:
2101     parser->m_errorCode = XML_ERROR_SUSPENDED;
2102     return NULL;
2103   case XML_FINISHED:
2104     parser->m_errorCode = XML_ERROR_FINISHED;
2105     return NULL;
2106   default:;
2107   }
2108 
2109   // whether or not the request succeeds, `len` seems to be the app's preferred
2110   // buffer fill size; remember it.
2111   parser->m_lastBufferRequestSize = len;
2112   if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2113       || parser->m_buffer == NULL) {
2114 #if XML_CONTEXT_BYTES > 0
2115     int keep;
2116 #endif /* XML_CONTEXT_BYTES > 0 */
2117     /* Do not invoke signed arithmetic overflow: */
2118     int neededSize = (int)((unsigned)len
2119                            + (unsigned)EXPAT_SAFE_PTR_DIFF(
2120                                parser->m_bufferEnd, parser->m_bufferPtr));
2121     if (neededSize < 0) {
2122       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2123       return NULL;
2124     }
2125 #if XML_CONTEXT_BYTES > 0
2126     keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2127     if (keep > XML_CONTEXT_BYTES)
2128       keep = XML_CONTEXT_BYTES;
2129     /* Detect and prevent integer overflow */
2130     if (keep > INT_MAX - neededSize) {
2131       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2132       return NULL;
2133     }
2134     neededSize += keep;
2135 #endif /* XML_CONTEXT_BYTES > 0 */
2136     if (parser->m_buffer && parser->m_bufferPtr
2137         && neededSize
2138                <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2139 #if XML_CONTEXT_BYTES > 0
2140       if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2141         int offset
2142             = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2143               - keep;
2144         /* The buffer pointers cannot be NULL here; we have at least some bytes
2145          * in the buffer */
2146         memmove(parser->m_buffer, &parser->m_buffer[offset],
2147                 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2148         parser->m_bufferEnd -= offset;
2149         parser->m_bufferPtr -= offset;
2150       }
2151 #else
2152       memmove(parser->m_buffer, parser->m_bufferPtr,
2153               EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2154       parser->m_bufferEnd
2155           = parser->m_buffer
2156             + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2157       parser->m_bufferPtr = parser->m_buffer;
2158 #endif /* XML_CONTEXT_BYTES > 0 */
2159     } else {
2160       char *newBuf;
2161       int bufferSize
2162           = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2163       if (bufferSize == 0)
2164         bufferSize = INIT_BUFFER_SIZE;
2165       do {
2166         /* Do not invoke signed arithmetic overflow: */
2167         bufferSize = (int)(2U * (unsigned)bufferSize);
2168       } while (bufferSize < neededSize && bufferSize > 0);
2169       if (bufferSize <= 0) {
2170         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2171         return NULL;
2172       }
2173       newBuf = (char *)MALLOC(parser, bufferSize);
2174       if (newBuf == 0) {
2175         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2176         return NULL;
2177       }
2178       parser->m_bufferLim = newBuf + bufferSize;
2179 #if XML_CONTEXT_BYTES > 0
2180       if (parser->m_bufferPtr) {
2181         memcpy(newBuf, &parser->m_bufferPtr[-keep],
2182                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2183                    + keep);
2184         FREE(parser, parser->m_buffer);
2185         parser->m_buffer = newBuf;
2186         parser->m_bufferEnd
2187             = parser->m_buffer
2188               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2189               + keep;
2190         parser->m_bufferPtr = parser->m_buffer + keep;
2191       } else {
2192         /* This must be a brand new buffer with no data in it yet */
2193         parser->m_bufferEnd = newBuf;
2194         parser->m_bufferPtr = parser->m_buffer = newBuf;
2195       }
2196 #else
2197       if (parser->m_bufferPtr) {
2198         memcpy(newBuf, parser->m_bufferPtr,
2199                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2200         FREE(parser, parser->m_buffer);
2201         parser->m_bufferEnd
2202             = newBuf
2203               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2204       } else {
2205         /* This must be a brand new buffer with no data in it yet */
2206         parser->m_bufferEnd = newBuf;
2207       }
2208       parser->m_bufferPtr = parser->m_buffer = newBuf;
2209 #endif /* XML_CONTEXT_BYTES > 0 */
2210     }
2211     parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2212     parser->m_positionPtr = NULL;
2213   }
2214   return parser->m_bufferEnd;
2215 }
2216 
2217 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2218 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2219   if (parser == NULL)
2220     return XML_STATUS_ERROR;
2221   switch (parser->m_parsingStatus.parsing) {
2222   case XML_SUSPENDED:
2223     if (resumable) {
2224       parser->m_errorCode = XML_ERROR_SUSPENDED;
2225       return XML_STATUS_ERROR;
2226     }
2227     parser->m_parsingStatus.parsing = XML_FINISHED;
2228     break;
2229   case XML_FINISHED:
2230     parser->m_errorCode = XML_ERROR_FINISHED;
2231     return XML_STATUS_ERROR;
2232   default:
2233     if (resumable) {
2234 #ifdef XML_DTD
2235       if (parser->m_isParamEntity) {
2236         parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2237         return XML_STATUS_ERROR;
2238       }
2239 #endif
2240       parser->m_parsingStatus.parsing = XML_SUSPENDED;
2241     } else
2242       parser->m_parsingStatus.parsing = XML_FINISHED;
2243   }
2244   return XML_STATUS_OK;
2245 }
2246 
2247 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2248 XML_ResumeParser(XML_Parser parser) {
2249   enum XML_Status result = XML_STATUS_OK;
2250 
2251   if (parser == NULL)
2252     return XML_STATUS_ERROR;
2253   if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2254     parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2255     return XML_STATUS_ERROR;
2256   }
2257   parser->m_parsingStatus.parsing = XML_PARSING;
2258 
2259   parser->m_errorCode = callProcessor(
2260       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2261 
2262   if (parser->m_errorCode != XML_ERROR_NONE) {
2263     parser->m_eventEndPtr = parser->m_eventPtr;
2264     parser->m_processor = errorProcessor;
2265     return XML_STATUS_ERROR;
2266   } else {
2267     switch (parser->m_parsingStatus.parsing) {
2268     case XML_SUSPENDED:
2269       result = XML_STATUS_SUSPENDED;
2270       break;
2271     case XML_INITIALIZED:
2272     case XML_PARSING:
2273       if (parser->m_parsingStatus.finalBuffer) {
2274         parser->m_parsingStatus.parsing = XML_FINISHED;
2275         return result;
2276       }
2277     default:;
2278     }
2279   }
2280 
2281   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2282                     parser->m_bufferPtr, &parser->m_position);
2283   parser->m_positionPtr = parser->m_bufferPtr;
2284   return result;
2285 }
2286 
2287 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2288 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2289   if (parser == NULL)
2290     return;
2291   assert(status != NULL);
2292   *status = parser->m_parsingStatus;
2293 }
2294 
2295 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2296 XML_GetErrorCode(XML_Parser parser) {
2297   if (parser == NULL)
2298     return XML_ERROR_INVALID_ARGUMENT;
2299   return parser->m_errorCode;
2300 }
2301 
2302 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2303 XML_GetCurrentByteIndex(XML_Parser parser) {
2304   if (parser == NULL)
2305     return -1;
2306   if (parser->m_eventPtr)
2307     return (XML_Index)(parser->m_parseEndByteIndex
2308                        - (parser->m_parseEndPtr - parser->m_eventPtr));
2309   return -1;
2310 }
2311 
2312 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2313 XML_GetCurrentByteCount(XML_Parser parser) {
2314   if (parser == NULL)
2315     return 0;
2316   if (parser->m_eventEndPtr && parser->m_eventPtr)
2317     return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2318   return 0;
2319 }
2320 
2321 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2322 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2323 #if XML_CONTEXT_BYTES > 0
2324   if (parser == NULL)
2325     return NULL;
2326   if (parser->m_eventPtr && parser->m_buffer) {
2327     if (offset != NULL)
2328       *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2329     if (size != NULL)
2330       *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2331     return parser->m_buffer;
2332   }
2333 #else
2334   (void)parser;
2335   (void)offset;
2336   (void)size;
2337 #endif /* XML_CONTEXT_BYTES > 0 */
2338   return (const char *)0;
2339 }
2340 
2341 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2342 XML_GetCurrentLineNumber(XML_Parser parser) {
2343   if (parser == NULL)
2344     return 0;
2345   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2346     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2347                       parser->m_eventPtr, &parser->m_position);
2348     parser->m_positionPtr = parser->m_eventPtr;
2349   }
2350   return parser->m_position.lineNumber + 1;
2351 }
2352 
2353 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2354 XML_GetCurrentColumnNumber(XML_Parser parser) {
2355   if (parser == NULL)
2356     return 0;
2357   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2358     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2359                       parser->m_eventPtr, &parser->m_position);
2360     parser->m_positionPtr = parser->m_eventPtr;
2361   }
2362   return parser->m_position.columnNumber;
2363 }
2364 
2365 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2366 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2367   if (parser != NULL)
2368     FREE(parser, model);
2369 }
2370 
2371 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2372 XML_MemMalloc(XML_Parser parser, size_t size) {
2373   if (parser == NULL)
2374     return NULL;
2375   return MALLOC(parser, size);
2376 }
2377 
2378 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2379 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2380   if (parser == NULL)
2381     return NULL;
2382   return REALLOC(parser, ptr, size);
2383 }
2384 
2385 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2386 XML_MemFree(XML_Parser parser, void *ptr) {
2387   if (parser != NULL)
2388     FREE(parser, ptr);
2389 }
2390 
2391 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2392 XML_DefaultCurrent(XML_Parser parser) {
2393   if (parser == NULL)
2394     return;
2395   if (parser->m_defaultHandler) {
2396     if (parser->m_openInternalEntities)
2397       reportDefault(parser, parser->m_internalEncoding,
2398                     parser->m_openInternalEntities->internalEventPtr,
2399                     parser->m_openInternalEntities->internalEventEndPtr);
2400     else
2401       reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2402                     parser->m_eventEndPtr);
2403   }
2404 }
2405 
2406 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2407 XML_ErrorString(enum XML_Error code) {
2408   switch (code) {
2409   case XML_ERROR_NONE:
2410     return NULL;
2411   case XML_ERROR_NO_MEMORY:
2412     return XML_L("out of memory");
2413   case XML_ERROR_SYNTAX:
2414     return XML_L("syntax error");
2415   case XML_ERROR_NO_ELEMENTS:
2416     return XML_L("no element found");
2417   case XML_ERROR_INVALID_TOKEN:
2418     return XML_L("not well-formed (invalid token)");
2419   case XML_ERROR_UNCLOSED_TOKEN:
2420     return XML_L("unclosed token");
2421   case XML_ERROR_PARTIAL_CHAR:
2422     return XML_L("partial character");
2423   case XML_ERROR_TAG_MISMATCH:
2424     return XML_L("mismatched tag");
2425   case XML_ERROR_DUPLICATE_ATTRIBUTE:
2426     return XML_L("duplicate attribute");
2427   case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2428     return XML_L("junk after document element");
2429   case XML_ERROR_PARAM_ENTITY_REF:
2430     return XML_L("illegal parameter entity reference");
2431   case XML_ERROR_UNDEFINED_ENTITY:
2432     return XML_L("undefined entity");
2433   case XML_ERROR_RECURSIVE_ENTITY_REF:
2434     return XML_L("recursive entity reference");
2435   case XML_ERROR_ASYNC_ENTITY:
2436     return XML_L("asynchronous entity");
2437   case XML_ERROR_BAD_CHAR_REF:
2438     return XML_L("reference to invalid character number");
2439   case XML_ERROR_BINARY_ENTITY_REF:
2440     return XML_L("reference to binary entity");
2441   case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2442     return XML_L("reference to external entity in attribute");
2443   case XML_ERROR_MISPLACED_XML_PI:
2444     return XML_L("XML or text declaration not at start of entity");
2445   case XML_ERROR_UNKNOWN_ENCODING:
2446     return XML_L("unknown encoding");
2447   case XML_ERROR_INCORRECT_ENCODING:
2448     return XML_L("encoding specified in XML declaration is incorrect");
2449   case XML_ERROR_UNCLOSED_CDATA_SECTION:
2450     return XML_L("unclosed CDATA section");
2451   case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2452     return XML_L("error in processing external entity reference");
2453   case XML_ERROR_NOT_STANDALONE:
2454     return XML_L("document is not standalone");
2455   case XML_ERROR_UNEXPECTED_STATE:
2456     return XML_L("unexpected parser state - please send a bug report");
2457   case XML_ERROR_ENTITY_DECLARED_IN_PE:
2458     return XML_L("entity declared in parameter entity");
2459   case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2460     return XML_L("requested feature requires XML_DTD support in Expat");
2461   case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2462     return XML_L("cannot change setting once parsing has begun");
2463   /* Added in 1.95.7. */
2464   case XML_ERROR_UNBOUND_PREFIX:
2465     return XML_L("unbound prefix");
2466   /* Added in 1.95.8. */
2467   case XML_ERROR_UNDECLARING_PREFIX:
2468     return XML_L("must not undeclare prefix");
2469   case XML_ERROR_INCOMPLETE_PE:
2470     return XML_L("incomplete markup in parameter entity");
2471   case XML_ERROR_XML_DECL:
2472     return XML_L("XML declaration not well-formed");
2473   case XML_ERROR_TEXT_DECL:
2474     return XML_L("text declaration not well-formed");
2475   case XML_ERROR_PUBLICID:
2476     return XML_L("illegal character(s) in public id");
2477   case XML_ERROR_SUSPENDED:
2478     return XML_L("parser suspended");
2479   case XML_ERROR_NOT_SUSPENDED:
2480     return XML_L("parser not suspended");
2481   case XML_ERROR_ABORTED:
2482     return XML_L("parsing aborted");
2483   case XML_ERROR_FINISHED:
2484     return XML_L("parsing finished");
2485   case XML_ERROR_SUSPEND_PE:
2486     return XML_L("cannot suspend in external parameter entity");
2487   /* Added in 2.0.0. */
2488   case XML_ERROR_RESERVED_PREFIX_XML:
2489     return XML_L(
2490         "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2491   case XML_ERROR_RESERVED_PREFIX_XMLNS:
2492     return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2493   case XML_ERROR_RESERVED_NAMESPACE_URI:
2494     return XML_L(
2495         "prefix must not be bound to one of the reserved namespace names");
2496   /* Added in 2.2.5. */
2497   case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2498     return XML_L("invalid argument");
2499     /* Added in 2.3.0. */
2500   case XML_ERROR_NO_BUFFER:
2501     return XML_L(
2502         "a successful prior call to function XML_GetBuffer is required");
2503   /* Added in 2.4.0. */
2504   case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2505     return XML_L(
2506         "limit on input amplification factor (from DTD and entities) breached");
2507   }
2508   return NULL;
2509 }
2510 
2511 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2512 XML_ExpatVersion(void) {
2513   /* V1 is used to string-ize the version number. However, it would
2514      string-ize the actual version macro *names* unless we get them
2515      substituted before being passed to V1. CPP is defined to expand
2516      a macro, then rescan for more expansions. Thus, we use V2 to expand
2517      the version macros, then CPP will expand the resulting V1() macro
2518      with the correct numerals. */
2519   /* ### I'm assuming cpp is portable in this respect... */
2520 
2521 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2522 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2523 
2524   return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2525 
2526 #undef V1
2527 #undef V2
2528 }
2529 
2530 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2531 XML_ExpatVersionInfo(void) {
2532   XML_Expat_Version version;
2533 
2534   version.major = XML_MAJOR_VERSION;
2535   version.minor = XML_MINOR_VERSION;
2536   version.micro = XML_MICRO_VERSION;
2537 
2538   return version;
2539 }
2540 
2541 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2542 XML_GetFeatureList(void) {
2543   static const XML_Feature features[] = {
2544       {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2545        sizeof(XML_Char)},
2546       {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2547        sizeof(XML_LChar)},
2548 #ifdef XML_UNICODE
2549       {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2550 #endif
2551 #ifdef XML_UNICODE_WCHAR_T
2552       {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2553 #endif
2554 #ifdef XML_DTD
2555       {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2556 #endif
2557 #if XML_CONTEXT_BYTES > 0
2558       {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2559        XML_CONTEXT_BYTES},
2560 #endif
2561 #ifdef XML_MIN_SIZE
2562       {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2563 #endif
2564 #ifdef XML_NS
2565       {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2566 #endif
2567 #ifdef XML_LARGE_SIZE
2568       {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2569 #endif
2570 #ifdef XML_ATTR_INFO
2571       {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2572 #endif
2573 #if XML_GE == 1
2574       /* Added in Expat 2.4.0 for XML_DTD defined and
2575        * added in Expat 2.6.0 for XML_GE == 1. */
2576       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2577        XML_L("XML_BLAP_MAX_AMP"),
2578        (long int)
2579            EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2580       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2581        XML_L("XML_BLAP_ACT_THRES"),
2582        EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2583       /* Added in Expat 2.6.0. */
2584       {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2585 #endif
2586       {XML_FEATURE_END, NULL, 0}};
2587 
2588   return features;
2589 }
2590 
2591 #if XML_GE == 1
2592 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2593 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2594     XML_Parser parser, float maximumAmplificationFactor) {
2595   if ((parser == NULL) || (parser->m_parentParser != NULL)
2596       || isnan(maximumAmplificationFactor)
2597       || (maximumAmplificationFactor < 1.0f)) {
2598     return XML_FALSE;
2599   }
2600   parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2601   return XML_TRUE;
2602 }
2603 
2604 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2605 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2606     XML_Parser parser, unsigned long long activationThresholdBytes) {
2607   if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2608     return XML_FALSE;
2609   }
2610   parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2611   return XML_TRUE;
2612 }
2613 #endif /* XML_GE == 1 */
2614 
2615 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2616 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2617   if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2618     parser->m_reparseDeferralEnabled = enabled;
2619     return XML_TRUE;
2620   }
2621   return XML_FALSE;
2622 }
2623 
2624 /* Initially tag->rawName always points into the parse buffer;
2625    for those TAG instances opened while the current parse buffer was
2626    processed, and not yet closed, we need to store tag->rawName in a more
2627    permanent location, since the parse buffer is about to be discarded.
2628 */
2629 static XML_Bool
storeRawNames(XML_Parser parser)2630 storeRawNames(XML_Parser parser) {
2631   TAG *tag = parser->m_tagStack;
2632   while (tag) {
2633     int bufSize;
2634     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2635     size_t rawNameLen;
2636     char *rawNameBuf = tag->buf + nameLen;
2637     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
2638        at the first entry that has already been copied; everything
2639        below it in the stack is already been accounted for in a
2640        previous call to this function.
2641     */
2642     if (tag->rawName == rawNameBuf)
2643       break;
2644     /* For reuse purposes we need to ensure that the
2645        size of tag->buf is a multiple of sizeof(XML_Char).
2646     */
2647     rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2648     /* Detect and prevent integer overflow. */
2649     if (rawNameLen > (size_t)INT_MAX - nameLen)
2650       return XML_FALSE;
2651     bufSize = nameLen + (int)rawNameLen;
2652     if (bufSize > tag->bufEnd - tag->buf) {
2653       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2654       if (temp == NULL)
2655         return XML_FALSE;
2656       /* if tag->name.str points to tag->buf (only when namespace
2657          processing is off) then we have to update it
2658       */
2659       if (tag->name.str == (XML_Char *)tag->buf)
2660         tag->name.str = (XML_Char *)temp;
2661       /* if tag->name.localPart is set (when namespace processing is on)
2662          then update it as well, since it will always point into tag->buf
2663       */
2664       if (tag->name.localPart)
2665         tag->name.localPart
2666             = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2667       tag->buf = temp;
2668       tag->bufEnd = temp + bufSize;
2669       rawNameBuf = temp + nameLen;
2670     }
2671     memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2672     tag->rawName = rawNameBuf;
2673     tag = tag->parent;
2674   }
2675   return XML_TRUE;
2676 }
2677 
2678 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2679 contentProcessor(XML_Parser parser, const char *start, const char *end,
2680                  const char **endPtr) {
2681   enum XML_Error result = doContent(
2682       parser, 0, parser->m_encoding, start, end, endPtr,
2683       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2684   if (result == XML_ERROR_NONE) {
2685     if (! storeRawNames(parser))
2686       return XML_ERROR_NO_MEMORY;
2687   }
2688   return result;
2689 }
2690 
2691 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2692 externalEntityInitProcessor(XML_Parser parser, const char *start,
2693                             const char *end, const char **endPtr) {
2694   enum XML_Error result = initializeEncoding(parser);
2695   if (result != XML_ERROR_NONE)
2696     return result;
2697   parser->m_processor = externalEntityInitProcessor2;
2698   return externalEntityInitProcessor2(parser, start, end, endPtr);
2699 }
2700 
2701 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2702 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2703                              const char *end, const char **endPtr) {
2704   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2705   int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2706   switch (tok) {
2707   case XML_TOK_BOM:
2708 #if XML_GE == 1
2709     if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2710                                   XML_ACCOUNT_DIRECT)) {
2711       accountingOnAbort(parser);
2712       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2713     }
2714 #endif /* XML_GE == 1 */
2715 
2716     /* If we are at the end of the buffer, this would cause the next stage,
2717        i.e. externalEntityInitProcessor3, to pass control directly to
2718        doContent (by detecting XML_TOK_NONE) without processing any xml text
2719        declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2720     */
2721     if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2722       *endPtr = next;
2723       return XML_ERROR_NONE;
2724     }
2725     start = next;
2726     break;
2727   case XML_TOK_PARTIAL:
2728     if (! parser->m_parsingStatus.finalBuffer) {
2729       *endPtr = start;
2730       return XML_ERROR_NONE;
2731     }
2732     parser->m_eventPtr = start;
2733     return XML_ERROR_UNCLOSED_TOKEN;
2734   case XML_TOK_PARTIAL_CHAR:
2735     if (! parser->m_parsingStatus.finalBuffer) {
2736       *endPtr = start;
2737       return XML_ERROR_NONE;
2738     }
2739     parser->m_eventPtr = start;
2740     return XML_ERROR_PARTIAL_CHAR;
2741   }
2742   parser->m_processor = externalEntityInitProcessor3;
2743   return externalEntityInitProcessor3(parser, start, end, endPtr);
2744 }
2745 
2746 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2747 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2748                              const char *end, const char **endPtr) {
2749   int tok;
2750   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2751   parser->m_eventPtr = start;
2752   tok = XmlContentTok(parser->m_encoding, start, end, &next);
2753   /* Note: These bytes are accounted later in:
2754            - processXmlDecl
2755            - externalEntityContentProcessor
2756   */
2757   parser->m_eventEndPtr = next;
2758 
2759   switch (tok) {
2760   case XML_TOK_XML_DECL: {
2761     enum XML_Error result;
2762     result = processXmlDecl(parser, 1, start, next);
2763     if (result != XML_ERROR_NONE)
2764       return result;
2765     switch (parser->m_parsingStatus.parsing) {
2766     case XML_SUSPENDED:
2767       *endPtr = next;
2768       return XML_ERROR_NONE;
2769     case XML_FINISHED:
2770       return XML_ERROR_ABORTED;
2771     default:
2772       start = next;
2773     }
2774   } break;
2775   case XML_TOK_PARTIAL:
2776     if (! parser->m_parsingStatus.finalBuffer) {
2777       *endPtr = start;
2778       return XML_ERROR_NONE;
2779     }
2780     return XML_ERROR_UNCLOSED_TOKEN;
2781   case XML_TOK_PARTIAL_CHAR:
2782     if (! parser->m_parsingStatus.finalBuffer) {
2783       *endPtr = start;
2784       return XML_ERROR_NONE;
2785     }
2786     return XML_ERROR_PARTIAL_CHAR;
2787   }
2788   parser->m_processor = externalEntityContentProcessor;
2789   parser->m_tagLevel = 1;
2790   return externalEntityContentProcessor(parser, start, end, endPtr);
2791 }
2792 
2793 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2794 externalEntityContentProcessor(XML_Parser parser, const char *start,
2795                                const char *end, const char **endPtr) {
2796   enum XML_Error result
2797       = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2798                   (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2799                   XML_ACCOUNT_ENTITY_EXPANSION);
2800   if (result == XML_ERROR_NONE) {
2801     if (! storeRawNames(parser))
2802       return XML_ERROR_NO_MEMORY;
2803   }
2804   return result;
2805 }
2806 
2807 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2808 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2809           const char *s, const char *end, const char **nextPtr,
2810           XML_Bool haveMore, enum XML_Account account) {
2811   /* save one level of indirection */
2812   DTD *const dtd = parser->m_dtd;
2813 
2814   const char **eventPP;
2815   const char **eventEndPP;
2816   if (enc == parser->m_encoding) {
2817     eventPP = &parser->m_eventPtr;
2818     eventEndPP = &parser->m_eventEndPtr;
2819   } else {
2820     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2821     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2822   }
2823   *eventPP = s;
2824 
2825   for (;;) {
2826     const char *next = s; /* XmlContentTok doesn't always set the last arg */
2827     int tok = XmlContentTok(enc, s, end, &next);
2828 #if XML_GE == 1
2829     const char *accountAfter
2830         = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2831               ? (haveMore ? s /* i.e. 0 bytes */ : end)
2832               : next;
2833     if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2834                                   account)) {
2835       accountingOnAbort(parser);
2836       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2837     }
2838 #endif
2839     *eventEndPP = next;
2840     switch (tok) {
2841     case XML_TOK_TRAILING_CR:
2842       if (haveMore) {
2843         *nextPtr = s;
2844         return XML_ERROR_NONE;
2845       }
2846       *eventEndPP = end;
2847       if (parser->m_characterDataHandler) {
2848         XML_Char c = 0xA;
2849         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2850       } else if (parser->m_defaultHandler)
2851         reportDefault(parser, enc, s, end);
2852       /* We are at the end of the final buffer, should we check for
2853          XML_SUSPENDED, XML_FINISHED?
2854       */
2855       if (startTagLevel == 0)
2856         return XML_ERROR_NO_ELEMENTS;
2857       if (parser->m_tagLevel != startTagLevel)
2858         return XML_ERROR_ASYNC_ENTITY;
2859       *nextPtr = end;
2860       return XML_ERROR_NONE;
2861     case XML_TOK_NONE:
2862       if (haveMore) {
2863         *nextPtr = s;
2864         return XML_ERROR_NONE;
2865       }
2866       if (startTagLevel > 0) {
2867         if (parser->m_tagLevel != startTagLevel)
2868           return XML_ERROR_ASYNC_ENTITY;
2869         *nextPtr = s;
2870         return XML_ERROR_NONE;
2871       }
2872       return XML_ERROR_NO_ELEMENTS;
2873     case XML_TOK_INVALID:
2874       *eventPP = next;
2875       return XML_ERROR_INVALID_TOKEN;
2876     case XML_TOK_PARTIAL:
2877       if (haveMore) {
2878         *nextPtr = s;
2879         return XML_ERROR_NONE;
2880       }
2881       return XML_ERROR_UNCLOSED_TOKEN;
2882     case XML_TOK_PARTIAL_CHAR:
2883       if (haveMore) {
2884         *nextPtr = s;
2885         return XML_ERROR_NONE;
2886       }
2887       return XML_ERROR_PARTIAL_CHAR;
2888     case XML_TOK_ENTITY_REF: {
2889       const XML_Char *name;
2890       ENTITY *entity;
2891       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2892           enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2893       if (ch) {
2894 #if XML_GE == 1
2895         /* NOTE: We are replacing 4-6 characters original input for 1 character
2896          *       so there is no amplification and hence recording without
2897          *       protection. */
2898         accountingDiffTolerated(parser, tok, (char *)&ch,
2899                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2900                                 XML_ACCOUNT_ENTITY_EXPANSION);
2901 #endif /* XML_GE == 1 */
2902         if (parser->m_characterDataHandler)
2903           parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2904         else if (parser->m_defaultHandler)
2905           reportDefault(parser, enc, s, next);
2906         break;
2907       }
2908       name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2909                              next - enc->minBytesPerChar);
2910       if (! name)
2911         return XML_ERROR_NO_MEMORY;
2912       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2913       poolDiscard(&dtd->pool);
2914       /* First, determine if a check for an existing declaration is needed;
2915          if yes, check that the entity exists, and that it is internal,
2916          otherwise call the skipped entity or default handler.
2917       */
2918       if (! dtd->hasParamEntityRefs || dtd->standalone) {
2919         if (! entity)
2920           return XML_ERROR_UNDEFINED_ENTITY;
2921         else if (! entity->is_internal)
2922           return XML_ERROR_ENTITY_DECLARED_IN_PE;
2923       } else if (! entity) {
2924         if (parser->m_skippedEntityHandler)
2925           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2926         else if (parser->m_defaultHandler)
2927           reportDefault(parser, enc, s, next);
2928         break;
2929       }
2930       if (entity->open)
2931         return XML_ERROR_RECURSIVE_ENTITY_REF;
2932       if (entity->notation)
2933         return XML_ERROR_BINARY_ENTITY_REF;
2934       if (entity->textPtr) {
2935         enum XML_Error result;
2936         if (! parser->m_defaultExpandInternalEntities) {
2937           if (parser->m_skippedEntityHandler)
2938             parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2939                                            0);
2940           else if (parser->m_defaultHandler)
2941             reportDefault(parser, enc, s, next);
2942           break;
2943         }
2944         result = processInternalEntity(parser, entity, XML_FALSE);
2945         if (result != XML_ERROR_NONE)
2946           return result;
2947       } else if (parser->m_externalEntityRefHandler) {
2948         const XML_Char *context;
2949         entity->open = XML_TRUE;
2950         context = getContext(parser);
2951         entity->open = XML_FALSE;
2952         if (! context)
2953           return XML_ERROR_NO_MEMORY;
2954         if (! parser->m_externalEntityRefHandler(
2955                 parser->m_externalEntityRefHandlerArg, context, entity->base,
2956                 entity->systemId, entity->publicId))
2957           return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2958         poolDiscard(&parser->m_tempPool);
2959       } else if (parser->m_defaultHandler)
2960         reportDefault(parser, enc, s, next);
2961       break;
2962     }
2963     case XML_TOK_START_TAG_NO_ATTS:
2964       /* fall through */
2965     case XML_TOK_START_TAG_WITH_ATTS: {
2966       TAG *tag;
2967       enum XML_Error result;
2968       XML_Char *toPtr;
2969       if (parser->m_freeTagList) {
2970         tag = parser->m_freeTagList;
2971         parser->m_freeTagList = parser->m_freeTagList->parent;
2972       } else {
2973         tag = (TAG *)MALLOC(parser, sizeof(TAG));
2974         if (! tag)
2975           return XML_ERROR_NO_MEMORY;
2976         tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2977         if (! tag->buf) {
2978           FREE(parser, tag);
2979           return XML_ERROR_NO_MEMORY;
2980         }
2981         tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2982       }
2983       tag->bindings = NULL;
2984       tag->parent = parser->m_tagStack;
2985       parser->m_tagStack = tag;
2986       tag->name.localPart = NULL;
2987       tag->name.prefix = NULL;
2988       tag->rawName = s + enc->minBytesPerChar;
2989       tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2990       ++parser->m_tagLevel;
2991       {
2992         const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2993         const char *fromPtr = tag->rawName;
2994         toPtr = (XML_Char *)tag->buf;
2995         for (;;) {
2996           int bufSize;
2997           int convLen;
2998           const enum XML_Convert_Result convert_res
2999               = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3000                            (ICHAR *)tag->bufEnd - 1);
3001           convLen = (int)(toPtr - (XML_Char *)tag->buf);
3002           if ((fromPtr >= rawNameEnd)
3003               || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3004             tag->name.strLen = convLen;
3005             break;
3006           }
3007           bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3008           {
3009             char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3010             if (temp == NULL)
3011               return XML_ERROR_NO_MEMORY;
3012             tag->buf = temp;
3013             tag->bufEnd = temp + bufSize;
3014             toPtr = (XML_Char *)temp + convLen;
3015           }
3016         }
3017       }
3018       tag->name.str = (XML_Char *)tag->buf;
3019       *toPtr = XML_T('\0');
3020       result
3021           = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3022       if (result)
3023         return result;
3024       if (parser->m_startElementHandler)
3025         parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3026                                       (const XML_Char **)parser->m_atts);
3027       else if (parser->m_defaultHandler)
3028         reportDefault(parser, enc, s, next);
3029       poolClear(&parser->m_tempPool);
3030       break;
3031     }
3032     case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3033       /* fall through */
3034     case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3035       const char *rawName = s + enc->minBytesPerChar;
3036       enum XML_Error result;
3037       BINDING *bindings = NULL;
3038       XML_Bool noElmHandlers = XML_TRUE;
3039       TAG_NAME name;
3040       name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3041                                  rawName + XmlNameLength(enc, rawName));
3042       if (! name.str)
3043         return XML_ERROR_NO_MEMORY;
3044       poolFinish(&parser->m_tempPool);
3045       result = storeAtts(parser, enc, s, &name, &bindings,
3046                          XML_ACCOUNT_NONE /* token spans whole start tag */);
3047       if (result != XML_ERROR_NONE) {
3048         freeBindings(parser, bindings);
3049         return result;
3050       }
3051       poolFinish(&parser->m_tempPool);
3052       if (parser->m_startElementHandler) {
3053         parser->m_startElementHandler(parser->m_handlerArg, name.str,
3054                                       (const XML_Char **)parser->m_atts);
3055         noElmHandlers = XML_FALSE;
3056       }
3057       if (parser->m_endElementHandler) {
3058         if (parser->m_startElementHandler)
3059           *eventPP = *eventEndPP;
3060         parser->m_endElementHandler(parser->m_handlerArg, name.str);
3061         noElmHandlers = XML_FALSE;
3062       }
3063       if (noElmHandlers && parser->m_defaultHandler)
3064         reportDefault(parser, enc, s, next);
3065       poolClear(&parser->m_tempPool);
3066       freeBindings(parser, bindings);
3067     }
3068       if ((parser->m_tagLevel == 0)
3069           && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3070         if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3071           parser->m_processor = epilogProcessor;
3072         else
3073           return epilogProcessor(parser, next, end, nextPtr);
3074       }
3075       break;
3076     case XML_TOK_END_TAG:
3077       if (parser->m_tagLevel == startTagLevel)
3078         return XML_ERROR_ASYNC_ENTITY;
3079       else {
3080         int len;
3081         const char *rawName;
3082         TAG *tag = parser->m_tagStack;
3083         rawName = s + enc->minBytesPerChar * 2;
3084         len = XmlNameLength(enc, rawName);
3085         if (len != tag->rawNameLength
3086             || memcmp(tag->rawName, rawName, len) != 0) {
3087           *eventPP = rawName;
3088           return XML_ERROR_TAG_MISMATCH;
3089         }
3090         parser->m_tagStack = tag->parent;
3091         tag->parent = parser->m_freeTagList;
3092         parser->m_freeTagList = tag;
3093         --parser->m_tagLevel;
3094         if (parser->m_endElementHandler) {
3095           const XML_Char *localPart;
3096           const XML_Char *prefix;
3097           XML_Char *uri;
3098           localPart = tag->name.localPart;
3099           if (parser->m_ns && localPart) {
3100             /* localPart and prefix may have been overwritten in
3101                tag->name.str, since this points to the binding->uri
3102                buffer which gets reused; so we have to add them again
3103             */
3104             uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3105             /* don't need to check for space - already done in storeAtts() */
3106             while (*localPart)
3107               *uri++ = *localPart++;
3108             prefix = tag->name.prefix;
3109             if (parser->m_ns_triplets && prefix) {
3110               *uri++ = parser->m_namespaceSeparator;
3111               while (*prefix)
3112                 *uri++ = *prefix++;
3113             }
3114             *uri = XML_T('\0');
3115           }
3116           parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3117         } else if (parser->m_defaultHandler)
3118           reportDefault(parser, enc, s, next);
3119         while (tag->bindings) {
3120           BINDING *b = tag->bindings;
3121           if (parser->m_endNamespaceDeclHandler)
3122             parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3123                                               b->prefix->name);
3124           tag->bindings = tag->bindings->nextTagBinding;
3125           b->nextTagBinding = parser->m_freeBindingList;
3126           parser->m_freeBindingList = b;
3127           b->prefix->binding = b->prevPrefixBinding;
3128         }
3129         if ((parser->m_tagLevel == 0)
3130             && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3131           if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3132             parser->m_processor = epilogProcessor;
3133           else
3134             return epilogProcessor(parser, next, end, nextPtr);
3135         }
3136       }
3137       break;
3138     case XML_TOK_CHAR_REF: {
3139       int n = XmlCharRefNumber(enc, s);
3140       if (n < 0)
3141         return XML_ERROR_BAD_CHAR_REF;
3142       if (parser->m_characterDataHandler) {
3143         XML_Char buf[XML_ENCODE_MAX];
3144         parser->m_characterDataHandler(parser->m_handlerArg, buf,
3145                                        XmlEncode(n, (ICHAR *)buf));
3146       } else if (parser->m_defaultHandler)
3147         reportDefault(parser, enc, s, next);
3148     } break;
3149     case XML_TOK_XML_DECL:
3150       return XML_ERROR_MISPLACED_XML_PI;
3151     case XML_TOK_DATA_NEWLINE:
3152       if (parser->m_characterDataHandler) {
3153         XML_Char c = 0xA;
3154         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3155       } else if (parser->m_defaultHandler)
3156         reportDefault(parser, enc, s, next);
3157       break;
3158     case XML_TOK_CDATA_SECT_OPEN: {
3159       enum XML_Error result;
3160       if (parser->m_startCdataSectionHandler)
3161         parser->m_startCdataSectionHandler(parser->m_handlerArg);
3162       /* BEGIN disabled code */
3163       /* Suppose you doing a transformation on a document that involves
3164          changing only the character data.  You set up a defaultHandler
3165          and a characterDataHandler.  The defaultHandler simply copies
3166          characters through.  The characterDataHandler does the
3167          transformation and writes the characters out escaping them as
3168          necessary.  This case will fail to work if we leave out the
3169          following two lines (because & and < inside CDATA sections will
3170          be incorrectly escaped).
3171 
3172          However, now we have a start/endCdataSectionHandler, so it seems
3173          easier to let the user deal with this.
3174       */
3175       else if ((0) && parser->m_characterDataHandler)
3176         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3177                                        0);
3178       /* END disabled code */
3179       else if (parser->m_defaultHandler)
3180         reportDefault(parser, enc, s, next);
3181       result
3182           = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3183       if (result != XML_ERROR_NONE)
3184         return result;
3185       else if (! next) {
3186         parser->m_processor = cdataSectionProcessor;
3187         return result;
3188       }
3189     } break;
3190     case XML_TOK_TRAILING_RSQB:
3191       if (haveMore) {
3192         *nextPtr = s;
3193         return XML_ERROR_NONE;
3194       }
3195       if (parser->m_characterDataHandler) {
3196         if (MUST_CONVERT(enc, s)) {
3197           ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3198           XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3199           parser->m_characterDataHandler(
3200               parser->m_handlerArg, parser->m_dataBuf,
3201               (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3202         } else
3203           parser->m_characterDataHandler(
3204               parser->m_handlerArg, (const XML_Char *)s,
3205               (int)((const XML_Char *)end - (const XML_Char *)s));
3206       } else if (parser->m_defaultHandler)
3207         reportDefault(parser, enc, s, end);
3208       /* We are at the end of the final buffer, should we check for
3209          XML_SUSPENDED, XML_FINISHED?
3210       */
3211       if (startTagLevel == 0) {
3212         *eventPP = end;
3213         return XML_ERROR_NO_ELEMENTS;
3214       }
3215       if (parser->m_tagLevel != startTagLevel) {
3216         *eventPP = end;
3217         return XML_ERROR_ASYNC_ENTITY;
3218       }
3219       *nextPtr = end;
3220       return XML_ERROR_NONE;
3221     case XML_TOK_DATA_CHARS: {
3222       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3223       if (charDataHandler) {
3224         if (MUST_CONVERT(enc, s)) {
3225           for (;;) {
3226             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3227             const enum XML_Convert_Result convert_res = XmlConvert(
3228                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3229             *eventEndPP = s;
3230             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3231                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3232             if ((convert_res == XML_CONVERT_COMPLETED)
3233                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3234               break;
3235             *eventPP = s;
3236           }
3237         } else
3238           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3239                           (int)((const XML_Char *)next - (const XML_Char *)s));
3240       } else if (parser->m_defaultHandler)
3241         reportDefault(parser, enc, s, next);
3242     } break;
3243     case XML_TOK_PI:
3244       if (! reportProcessingInstruction(parser, enc, s, next))
3245         return XML_ERROR_NO_MEMORY;
3246       break;
3247     case XML_TOK_COMMENT:
3248       if (! reportComment(parser, enc, s, next))
3249         return XML_ERROR_NO_MEMORY;
3250       break;
3251     default:
3252       /* All of the tokens produced by XmlContentTok() have their own
3253        * explicit cases, so this default is not strictly necessary.
3254        * However it is a useful safety net, so we retain the code and
3255        * simply exclude it from the coverage tests.
3256        *
3257        * LCOV_EXCL_START
3258        */
3259       if (parser->m_defaultHandler)
3260         reportDefault(parser, enc, s, next);
3261       break;
3262       /* LCOV_EXCL_STOP */
3263     }
3264     *eventPP = s = next;
3265     switch (parser->m_parsingStatus.parsing) {
3266     case XML_SUSPENDED:
3267       *nextPtr = next;
3268       return XML_ERROR_NONE;
3269     case XML_FINISHED:
3270       return XML_ERROR_ABORTED;
3271     default:;
3272     }
3273   }
3274   /* not reached */
3275 }
3276 
3277 /* This function does not call free() on the allocated memory, merely
3278  * moving it to the parser's m_freeBindingList where it can be freed or
3279  * reused as appropriate.
3280  */
3281 static void
freeBindings(XML_Parser parser,BINDING * bindings)3282 freeBindings(XML_Parser parser, BINDING *bindings) {
3283   while (bindings) {
3284     BINDING *b = bindings;
3285 
3286     /* m_startNamespaceDeclHandler will have been called for this
3287      * binding in addBindings(), so call the end handler now.
3288      */
3289     if (parser->m_endNamespaceDeclHandler)
3290       parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3291 
3292     bindings = bindings->nextTagBinding;
3293     b->nextTagBinding = parser->m_freeBindingList;
3294     parser->m_freeBindingList = b;
3295     b->prefix->binding = b->prevPrefixBinding;
3296   }
3297 }
3298 
3299 /* Precondition: all arguments must be non-NULL;
3300    Purpose:
3301    - normalize attributes
3302    - check attributes for well-formedness
3303    - generate namespace aware attribute names (URI, prefix)
3304    - build list of attributes for startElementHandler
3305    - default attributes
3306    - process namespace declarations (check and report them)
3307    - generate namespace aware element name (URI, prefix)
3308 */
3309 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3310 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3311           TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3312           enum XML_Account account) {
3313   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3314   ELEMENT_TYPE *elementType;
3315   int nDefaultAtts;
3316   const XML_Char **appAtts; /* the attribute list for the application */
3317   int attIndex = 0;
3318   int prefixLen;
3319   int i;
3320   int n;
3321   XML_Char *uri;
3322   int nPrefixes = 0;
3323   BINDING *binding;
3324   const XML_Char *localPart;
3325 
3326   /* lookup the element type name */
3327   elementType
3328       = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3329   if (! elementType) {
3330     const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3331     if (! name)
3332       return XML_ERROR_NO_MEMORY;
3333     elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3334                                          sizeof(ELEMENT_TYPE));
3335     if (! elementType)
3336       return XML_ERROR_NO_MEMORY;
3337     if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3338       return XML_ERROR_NO_MEMORY;
3339   }
3340   nDefaultAtts = elementType->nDefaultAtts;
3341 
3342   /* get the attributes from the tokenizer */
3343   n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3344 
3345   /* Detect and prevent integer overflow */
3346   if (n > INT_MAX - nDefaultAtts) {
3347     return XML_ERROR_NO_MEMORY;
3348   }
3349 
3350   if (n + nDefaultAtts > parser->m_attsSize) {
3351     int oldAttsSize = parser->m_attsSize;
3352     ATTRIBUTE *temp;
3353 #ifdef XML_ATTR_INFO
3354     XML_AttrInfo *temp2;
3355 #endif
3356 
3357     /* Detect and prevent integer overflow */
3358     if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3359         || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3360       return XML_ERROR_NO_MEMORY;
3361     }
3362 
3363     parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3364 
3365     /* Detect and prevent integer overflow.
3366      * The preprocessor guard addresses the "always false" warning
3367      * from -Wtype-limits on platforms where
3368      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3369 #if UINT_MAX >= SIZE_MAX
3370     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3371       parser->m_attsSize = oldAttsSize;
3372       return XML_ERROR_NO_MEMORY;
3373     }
3374 #endif
3375 
3376     temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3377                                 parser->m_attsSize * sizeof(ATTRIBUTE));
3378     if (temp == NULL) {
3379       parser->m_attsSize = oldAttsSize;
3380       return XML_ERROR_NO_MEMORY;
3381     }
3382     parser->m_atts = temp;
3383 #ifdef XML_ATTR_INFO
3384     /* Detect and prevent integer overflow.
3385      * The preprocessor guard addresses the "always false" warning
3386      * from -Wtype-limits on platforms where
3387      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3388 #  if UINT_MAX >= SIZE_MAX
3389     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3390       parser->m_attsSize = oldAttsSize;
3391       return XML_ERROR_NO_MEMORY;
3392     }
3393 #  endif
3394 
3395     temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3396                                     parser->m_attsSize * sizeof(XML_AttrInfo));
3397     if (temp2 == NULL) {
3398       parser->m_attsSize = oldAttsSize;
3399       return XML_ERROR_NO_MEMORY;
3400     }
3401     parser->m_attInfo = temp2;
3402 #endif
3403     if (n > oldAttsSize)
3404       XmlGetAttributes(enc, attStr, n, parser->m_atts);
3405   }
3406 
3407   appAtts = (const XML_Char **)parser->m_atts;
3408   for (i = 0; i < n; i++) {
3409     ATTRIBUTE *currAtt = &parser->m_atts[i];
3410 #ifdef XML_ATTR_INFO
3411     XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3412 #endif
3413     /* add the name and value to the attribute list */
3414     ATTRIBUTE_ID *attId
3415         = getAttributeId(parser, enc, currAtt->name,
3416                          currAtt->name + XmlNameLength(enc, currAtt->name));
3417     if (! attId)
3418       return XML_ERROR_NO_MEMORY;
3419 #ifdef XML_ATTR_INFO
3420     currAttInfo->nameStart
3421         = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3422     currAttInfo->nameEnd
3423         = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3424     currAttInfo->valueStart = parser->m_parseEndByteIndex
3425                               - (parser->m_parseEndPtr - currAtt->valuePtr);
3426     currAttInfo->valueEnd = parser->m_parseEndByteIndex
3427                             - (parser->m_parseEndPtr - currAtt->valueEnd);
3428 #endif
3429     /* Detect duplicate attributes by their QNames. This does not work when
3430        namespace processing is turned on and different prefixes for the same
3431        namespace are used. For this case we have a check further down.
3432     */
3433     if ((attId->name)[-1]) {
3434       if (enc == parser->m_encoding)
3435         parser->m_eventPtr = parser->m_atts[i].name;
3436       return XML_ERROR_DUPLICATE_ATTRIBUTE;
3437     }
3438     (attId->name)[-1] = 1;
3439     appAtts[attIndex++] = attId->name;
3440     if (! parser->m_atts[i].normalized) {
3441       enum XML_Error result;
3442       XML_Bool isCdata = XML_TRUE;
3443 
3444       /* figure out whether declared as other than CDATA */
3445       if (attId->maybeTokenized) {
3446         int j;
3447         for (j = 0; j < nDefaultAtts; j++) {
3448           if (attId == elementType->defaultAtts[j].id) {
3449             isCdata = elementType->defaultAtts[j].isCdata;
3450             break;
3451           }
3452         }
3453       }
3454 
3455       /* normalize the attribute value */
3456       result = storeAttributeValue(
3457           parser, enc, isCdata, parser->m_atts[i].valuePtr,
3458           parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3459       if (result)
3460         return result;
3461       appAtts[attIndex] = poolStart(&parser->m_tempPool);
3462       poolFinish(&parser->m_tempPool);
3463     } else {
3464       /* the value did not need normalizing */
3465       appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3466                                           parser->m_atts[i].valuePtr,
3467                                           parser->m_atts[i].valueEnd);
3468       if (appAtts[attIndex] == 0)
3469         return XML_ERROR_NO_MEMORY;
3470       poolFinish(&parser->m_tempPool);
3471     }
3472     /* handle prefixed attribute names */
3473     if (attId->prefix) {
3474       if (attId->xmlns) {
3475         /* deal with namespace declarations here */
3476         enum XML_Error result = addBinding(parser, attId->prefix, attId,
3477                                            appAtts[attIndex], bindingsPtr);
3478         if (result)
3479           return result;
3480         --attIndex;
3481       } else {
3482         /* deal with other prefixed names later */
3483         attIndex++;
3484         nPrefixes++;
3485         (attId->name)[-1] = 2;
3486       }
3487     } else
3488       attIndex++;
3489   }
3490 
3491   /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3492   parser->m_nSpecifiedAtts = attIndex;
3493   if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3494     for (i = 0; i < attIndex; i += 2)
3495       if (appAtts[i] == elementType->idAtt->name) {
3496         parser->m_idAttIndex = i;
3497         break;
3498       }
3499   } else
3500     parser->m_idAttIndex = -1;
3501 
3502   /* do attribute defaulting */
3503   for (i = 0; i < nDefaultAtts; i++) {
3504     const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3505     if (! (da->id->name)[-1] && da->value) {
3506       if (da->id->prefix) {
3507         if (da->id->xmlns) {
3508           enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3509                                              da->value, bindingsPtr);
3510           if (result)
3511             return result;
3512         } else {
3513           (da->id->name)[-1] = 2;
3514           nPrefixes++;
3515           appAtts[attIndex++] = da->id->name;
3516           appAtts[attIndex++] = da->value;
3517         }
3518       } else {
3519         (da->id->name)[-1] = 1;
3520         appAtts[attIndex++] = da->id->name;
3521         appAtts[attIndex++] = da->value;
3522       }
3523     }
3524   }
3525   appAtts[attIndex] = 0;
3526 
3527   /* expand prefixed attribute names, check for duplicates,
3528      and clear flags that say whether attributes were specified */
3529   i = 0;
3530   if (nPrefixes) {
3531     int j; /* hash table index */
3532     unsigned long version = parser->m_nsAttsVersion;
3533 
3534     /* Detect and prevent invalid shift */
3535     if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3536       return XML_ERROR_NO_MEMORY;
3537     }
3538 
3539     unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3540     unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3541     /* size of hash table must be at least 2 * (# of prefixed attributes) */
3542     if ((nPrefixes << 1)
3543         >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3544       NS_ATT *temp;
3545       /* hash table size must also be a power of 2 and >= 8 */
3546       while (nPrefixes >> parser->m_nsAttsPower++)
3547         ;
3548       if (parser->m_nsAttsPower < 3)
3549         parser->m_nsAttsPower = 3;
3550 
3551       /* Detect and prevent invalid shift */
3552       if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3553         /* Restore actual size of memory in m_nsAtts */
3554         parser->m_nsAttsPower = oldNsAttsPower;
3555         return XML_ERROR_NO_MEMORY;
3556       }
3557 
3558       nsAttsSize = 1u << parser->m_nsAttsPower;
3559 
3560       /* Detect and prevent integer overflow.
3561        * The preprocessor guard addresses the "always false" warning
3562        * from -Wtype-limits on platforms where
3563        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3564 #if UINT_MAX >= SIZE_MAX
3565       if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3566         /* Restore actual size of memory in m_nsAtts */
3567         parser->m_nsAttsPower = oldNsAttsPower;
3568         return XML_ERROR_NO_MEMORY;
3569       }
3570 #endif
3571 
3572       temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3573                                nsAttsSize * sizeof(NS_ATT));
3574       if (! temp) {
3575         /* Restore actual size of memory in m_nsAtts */
3576         parser->m_nsAttsPower = oldNsAttsPower;
3577         return XML_ERROR_NO_MEMORY;
3578       }
3579       parser->m_nsAtts = temp;
3580       version = 0; /* force re-initialization of m_nsAtts hash table */
3581     }
3582     /* using a version flag saves us from initializing m_nsAtts every time */
3583     if (! version) { /* initialize version flags when version wraps around */
3584       version = INIT_ATTS_VERSION;
3585       for (j = nsAttsSize; j != 0;)
3586         parser->m_nsAtts[--j].version = version;
3587     }
3588     parser->m_nsAttsVersion = --version;
3589 
3590     /* expand prefixed names and check for duplicates */
3591     for (; i < attIndex; i += 2) {
3592       const XML_Char *s = appAtts[i];
3593       if (s[-1] == 2) { /* prefixed */
3594         ATTRIBUTE_ID *id;
3595         const BINDING *b;
3596         unsigned long uriHash;
3597         struct siphash sip_state;
3598         struct sipkey sip_key;
3599 
3600         copy_salt_to_sipkey(parser, &sip_key);
3601         sip24_init(&sip_state, &sip_key);
3602 
3603         ((XML_Char *)s)[-1] = 0; /* clear flag */
3604         id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3605         if (! id || ! id->prefix) {
3606           /* This code is walking through the appAtts array, dealing
3607            * with (in this case) a prefixed attribute name.  To be in
3608            * the array, the attribute must have already been bound, so
3609            * has to have passed through the hash table lookup once
3610            * already.  That implies that an entry for it already
3611            * exists, so the lookup above will return a pointer to
3612            * already allocated memory.  There is no opportunaity for
3613            * the allocator to fail, so the condition above cannot be
3614            * fulfilled.
3615            *
3616            * Since it is difficult to be certain that the above
3617            * analysis is complete, we retain the test and merely
3618            * remove the code from coverage tests.
3619            */
3620           return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3621         }
3622         b = id->prefix->binding;
3623         if (! b)
3624           return XML_ERROR_UNBOUND_PREFIX;
3625 
3626         for (j = 0; j < b->uriLen; j++) {
3627           const XML_Char c = b->uri[j];
3628           if (! poolAppendChar(&parser->m_tempPool, c))
3629             return XML_ERROR_NO_MEMORY;
3630         }
3631 
3632         sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3633 
3634         while (*s++ != XML_T(ASCII_COLON))
3635           ;
3636 
3637         sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3638 
3639         do { /* copies null terminator */
3640           if (! poolAppendChar(&parser->m_tempPool, *s))
3641             return XML_ERROR_NO_MEMORY;
3642         } while (*s++);
3643 
3644         uriHash = (unsigned long)sip24_final(&sip_state);
3645 
3646         { /* Check hash table for duplicate of expanded name (uriName).
3647              Derived from code in lookup(parser, HASH_TABLE *table, ...).
3648           */
3649           unsigned char step = 0;
3650           unsigned long mask = nsAttsSize - 1;
3651           j = uriHash & mask; /* index into hash table */
3652           while (parser->m_nsAtts[j].version == version) {
3653             /* for speed we compare stored hash values first */
3654             if (uriHash == parser->m_nsAtts[j].hash) {
3655               const XML_Char *s1 = poolStart(&parser->m_tempPool);
3656               const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3657               /* s1 is null terminated, but not s2 */
3658               for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3659                 ;
3660               if (*s1 == 0)
3661                 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3662             }
3663             if (! step)
3664               step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3665             j < step ? (j += nsAttsSize - step) : (j -= step);
3666           }
3667         }
3668 
3669         if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3670           parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3671           s = b->prefix->name;
3672           do {
3673             if (! poolAppendChar(&parser->m_tempPool, *s))
3674               return XML_ERROR_NO_MEMORY;
3675           } while (*s++);
3676         }
3677 
3678         /* store expanded name in attribute list */
3679         s = poolStart(&parser->m_tempPool);
3680         poolFinish(&parser->m_tempPool);
3681         appAtts[i] = s;
3682 
3683         /* fill empty slot with new version, uriName and hash value */
3684         parser->m_nsAtts[j].version = version;
3685         parser->m_nsAtts[j].hash = uriHash;
3686         parser->m_nsAtts[j].uriName = s;
3687 
3688         if (! --nPrefixes) {
3689           i += 2;
3690           break;
3691         }
3692       } else                     /* not prefixed */
3693         ((XML_Char *)s)[-1] = 0; /* clear flag */
3694     }
3695   }
3696   /* clear flags for the remaining attributes */
3697   for (; i < attIndex; i += 2)
3698     ((XML_Char *)(appAtts[i]))[-1] = 0;
3699   for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3700     binding->attId->name[-1] = 0;
3701 
3702   if (! parser->m_ns)
3703     return XML_ERROR_NONE;
3704 
3705   /* expand the element type name */
3706   if (elementType->prefix) {
3707     binding = elementType->prefix->binding;
3708     if (! binding)
3709       return XML_ERROR_UNBOUND_PREFIX;
3710     localPart = tagNamePtr->str;
3711     while (*localPart++ != XML_T(ASCII_COLON))
3712       ;
3713   } else if (dtd->defaultPrefix.binding) {
3714     binding = dtd->defaultPrefix.binding;
3715     localPart = tagNamePtr->str;
3716   } else
3717     return XML_ERROR_NONE;
3718   prefixLen = 0;
3719   if (parser->m_ns_triplets && binding->prefix->name) {
3720     for (; binding->prefix->name[prefixLen++];)
3721       ; /* prefixLen includes null terminator */
3722   }
3723   tagNamePtr->localPart = localPart;
3724   tagNamePtr->uriLen = binding->uriLen;
3725   tagNamePtr->prefix = binding->prefix->name;
3726   tagNamePtr->prefixLen = prefixLen;
3727   for (i = 0; localPart[i++];)
3728     ; /* i includes null terminator */
3729 
3730   /* Detect and prevent integer overflow */
3731   if (binding->uriLen > INT_MAX - prefixLen
3732       || i > INT_MAX - (binding->uriLen + prefixLen)) {
3733     return XML_ERROR_NO_MEMORY;
3734   }
3735 
3736   n = i + binding->uriLen + prefixLen;
3737   if (n > binding->uriAlloc) {
3738     TAG *p;
3739 
3740     /* Detect and prevent integer overflow */
3741     if (n > INT_MAX - EXPAND_SPARE) {
3742       return XML_ERROR_NO_MEMORY;
3743     }
3744     /* Detect and prevent integer overflow.
3745      * The preprocessor guard addresses the "always false" warning
3746      * from -Wtype-limits on platforms where
3747      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3748 #if UINT_MAX >= SIZE_MAX
3749     if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3750       return XML_ERROR_NO_MEMORY;
3751     }
3752 #endif
3753 
3754     uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3755     if (! uri)
3756       return XML_ERROR_NO_MEMORY;
3757     binding->uriAlloc = n + EXPAND_SPARE;
3758     memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3759     for (p = parser->m_tagStack; p; p = p->parent)
3760       if (p->name.str == binding->uri)
3761         p->name.str = uri;
3762     FREE(parser, binding->uri);
3763     binding->uri = uri;
3764   }
3765   /* if m_namespaceSeparator != '\0' then uri includes it already */
3766   uri = binding->uri + binding->uriLen;
3767   memcpy(uri, localPart, i * sizeof(XML_Char));
3768   /* we always have a namespace separator between localPart and prefix */
3769   if (prefixLen) {
3770     uri += i - 1;
3771     *uri = parser->m_namespaceSeparator; /* replace null terminator */
3772     memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3773   }
3774   tagNamePtr->str = binding->uri;
3775   return XML_ERROR_NONE;
3776 }
3777 
3778 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3779 is_rfc3986_uri_char(XML_Char candidate) {
3780   // For the RFC 3986 ANBF grammar see
3781   // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3782 
3783   switch (candidate) {
3784   // From rule "ALPHA" (uppercase half)
3785   case 'A':
3786   case 'B':
3787   case 'C':
3788   case 'D':
3789   case 'E':
3790   case 'F':
3791   case 'G':
3792   case 'H':
3793   case 'I':
3794   case 'J':
3795   case 'K':
3796   case 'L':
3797   case 'M':
3798   case 'N':
3799   case 'O':
3800   case 'P':
3801   case 'Q':
3802   case 'R':
3803   case 'S':
3804   case 'T':
3805   case 'U':
3806   case 'V':
3807   case 'W':
3808   case 'X':
3809   case 'Y':
3810   case 'Z':
3811 
3812   // From rule "ALPHA" (lowercase half)
3813   case 'a':
3814   case 'b':
3815   case 'c':
3816   case 'd':
3817   case 'e':
3818   case 'f':
3819   case 'g':
3820   case 'h':
3821   case 'i':
3822   case 'j':
3823   case 'k':
3824   case 'l':
3825   case 'm':
3826   case 'n':
3827   case 'o':
3828   case 'p':
3829   case 'q':
3830   case 'r':
3831   case 's':
3832   case 't':
3833   case 'u':
3834   case 'v':
3835   case 'w':
3836   case 'x':
3837   case 'y':
3838   case 'z':
3839 
3840   // From rule "DIGIT"
3841   case '0':
3842   case '1':
3843   case '2':
3844   case '3':
3845   case '4':
3846   case '5':
3847   case '6':
3848   case '7':
3849   case '8':
3850   case '9':
3851 
3852   // From rule "pct-encoded"
3853   case '%':
3854 
3855   // From rule "unreserved"
3856   case '-':
3857   case '.':
3858   case '_':
3859   case '~':
3860 
3861   // From rule "gen-delims"
3862   case ':':
3863   case '/':
3864   case '?':
3865   case '#':
3866   case '[':
3867   case ']':
3868   case '@':
3869 
3870   // From rule "sub-delims"
3871   case '!':
3872   case '$':
3873   case '&':
3874   case '\'':
3875   case '(':
3876   case ')':
3877   case '*':
3878   case '+':
3879   case ',':
3880   case ';':
3881   case '=':
3882     return XML_TRUE;
3883 
3884   default:
3885     return XML_FALSE;
3886   }
3887 }
3888 
3889 /* addBinding() overwrites the value of prefix->binding without checking.
3890    Therefore one must keep track of the old value outside of addBinding().
3891 */
3892 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3893 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3894            const XML_Char *uri, BINDING **bindingsPtr) {
3895   // "http://www.w3.org/XML/1998/namespace"
3896   static const XML_Char xmlNamespace[]
3897       = {ASCII_h,      ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,
3898          ASCII_SLASH,  ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,
3899          ASCII_PERIOD, ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,
3900          ASCII_r,      ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,
3901          ASCII_L,      ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,
3902          ASCII_8,      ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,
3903          ASCII_e,      ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,
3904          ASCII_e,      '\0'};
3905   static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3906   // "http://www.w3.org/2000/xmlns/"
3907   static const XML_Char xmlnsNamespace[]
3908       = {ASCII_h,     ASCII_t,      ASCII_t, ASCII_p, ASCII_COLON,  ASCII_SLASH,
3909          ASCII_SLASH, ASCII_w,      ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3910          ASCII_3,     ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,      ASCII_SLASH,
3911          ASCII_2,     ASCII_0,      ASCII_0, ASCII_0, ASCII_SLASH,  ASCII_x,
3912          ASCII_m,     ASCII_l,      ASCII_n, ASCII_s, ASCII_SLASH,  '\0'};
3913   static const int xmlnsLen
3914       = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3915 
3916   XML_Bool mustBeXML = XML_FALSE;
3917   XML_Bool isXML = XML_TRUE;
3918   XML_Bool isXMLNS = XML_TRUE;
3919 
3920   BINDING *b;
3921   int len;
3922 
3923   /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3924   if (*uri == XML_T('\0') && prefix->name)
3925     return XML_ERROR_UNDECLARING_PREFIX;
3926 
3927   if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3928       && prefix->name[1] == XML_T(ASCII_m)
3929       && prefix->name[2] == XML_T(ASCII_l)) {
3930     /* Not allowed to bind xmlns */
3931     if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3932         && prefix->name[5] == XML_T('\0'))
3933       return XML_ERROR_RESERVED_PREFIX_XMLNS;
3934 
3935     if (prefix->name[3] == XML_T('\0'))
3936       mustBeXML = XML_TRUE;
3937   }
3938 
3939   for (len = 0; uri[len]; len++) {
3940     if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3941       isXML = XML_FALSE;
3942 
3943     if (! mustBeXML && isXMLNS
3944         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3945       isXMLNS = XML_FALSE;
3946 
3947     // NOTE: While Expat does not validate namespace URIs against RFC 3986
3948     //       today (and is not REQUIRED to do so with regard to the XML 1.0
3949     //       namespaces specification) we have to at least make sure, that
3950     //       the application on top of Expat (that is likely splitting expanded
3951     //       element names ("qualified names") of form
3952     //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3953     //       in its element handler code) cannot be confused by an attacker
3954     //       putting additional namespace separator characters into namespace
3955     //       declarations.  That would be ambiguous and not to be expected.
3956     //
3957     //       While the HTML API docs of function XML_ParserCreateNS have been
3958     //       advising against use of a namespace separator character that can
3959     //       appear in a URI for >20 years now, some widespread applications
3960     //       are using URI characters (':' (colon) in particular) for a
3961     //       namespace separator, in practice.  To keep these applications
3962     //       functional, we only reject namespaces URIs containing the
3963     //       application-chosen namespace separator if the chosen separator
3964     //       is a non-URI character with regard to RFC 3986.
3965     if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3966         && ! is_rfc3986_uri_char(uri[len])) {
3967       return XML_ERROR_SYNTAX;
3968     }
3969   }
3970   isXML = isXML && len == xmlLen;
3971   isXMLNS = isXMLNS && len == xmlnsLen;
3972 
3973   if (mustBeXML != isXML)
3974     return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3975                      : XML_ERROR_RESERVED_NAMESPACE_URI;
3976 
3977   if (isXMLNS)
3978     return XML_ERROR_RESERVED_NAMESPACE_URI;
3979 
3980   if (parser->m_namespaceSeparator)
3981     len++;
3982   if (parser->m_freeBindingList) {
3983     b = parser->m_freeBindingList;
3984     if (len > b->uriAlloc) {
3985       /* Detect and prevent integer overflow */
3986       if (len > INT_MAX - EXPAND_SPARE) {
3987         return XML_ERROR_NO_MEMORY;
3988       }
3989 
3990       /* Detect and prevent integer overflow.
3991        * The preprocessor guard addresses the "always false" warning
3992        * from -Wtype-limits on platforms where
3993        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3994 #if UINT_MAX >= SIZE_MAX
3995       if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3996         return XML_ERROR_NO_MEMORY;
3997       }
3998 #endif
3999 
4000       XML_Char *temp = (XML_Char *)REALLOC(
4001           parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4002       if (temp == NULL)
4003         return XML_ERROR_NO_MEMORY;
4004       b->uri = temp;
4005       b->uriAlloc = len + EXPAND_SPARE;
4006     }
4007     parser->m_freeBindingList = b->nextTagBinding;
4008   } else {
4009     b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4010     if (! b)
4011       return XML_ERROR_NO_MEMORY;
4012 
4013     /* Detect and prevent integer overflow */
4014     if (len > INT_MAX - EXPAND_SPARE) {
4015       return XML_ERROR_NO_MEMORY;
4016     }
4017     /* Detect and prevent integer overflow.
4018      * The preprocessor guard addresses the "always false" warning
4019      * from -Wtype-limits on platforms where
4020      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4021 #if UINT_MAX >= SIZE_MAX
4022     if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4023       return XML_ERROR_NO_MEMORY;
4024     }
4025 #endif
4026 
4027     b->uri
4028         = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4029     if (! b->uri) {
4030       FREE(parser, b);
4031       return XML_ERROR_NO_MEMORY;
4032     }
4033     b->uriAlloc = len + EXPAND_SPARE;
4034   }
4035   b->uriLen = len;
4036   memcpy(b->uri, uri, len * sizeof(XML_Char));
4037   if (parser->m_namespaceSeparator)
4038     b->uri[len - 1] = parser->m_namespaceSeparator;
4039   b->prefix = prefix;
4040   b->attId = attId;
4041   b->prevPrefixBinding = prefix->binding;
4042   /* NULL binding when default namespace undeclared */
4043   if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4044     prefix->binding = NULL;
4045   else
4046     prefix->binding = b;
4047   b->nextTagBinding = *bindingsPtr;
4048   *bindingsPtr = b;
4049   /* if attId == NULL then we are not starting a namespace scope */
4050   if (attId && parser->m_startNamespaceDeclHandler)
4051     parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4052                                         prefix->binding ? uri : 0);
4053   return XML_ERROR_NONE;
4054 }
4055 
4056 /* The idea here is to avoid using stack for each CDATA section when
4057    the whole file is parsed with one call.
4058 */
4059 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4060 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4061                       const char **endPtr) {
4062   enum XML_Error result = doCdataSection(
4063       parser, parser->m_encoding, &start, end, endPtr,
4064       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4065   if (result != XML_ERROR_NONE)
4066     return result;
4067   if (start) {
4068     if (parser->m_parentParser) { /* we are parsing an external entity */
4069       parser->m_processor = externalEntityContentProcessor;
4070       return externalEntityContentProcessor(parser, start, end, endPtr);
4071     } else {
4072       parser->m_processor = contentProcessor;
4073       return contentProcessor(parser, start, end, endPtr);
4074     }
4075   }
4076   return result;
4077 }
4078 
4079 /* startPtr gets set to non-null if the section is closed, and to null if
4080    the section is not yet closed.
4081 */
4082 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4083 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4084                const char *end, const char **nextPtr, XML_Bool haveMore,
4085                enum XML_Account account) {
4086   const char *s = *startPtr;
4087   const char **eventPP;
4088   const char **eventEndPP;
4089   if (enc == parser->m_encoding) {
4090     eventPP = &parser->m_eventPtr;
4091     *eventPP = s;
4092     eventEndPP = &parser->m_eventEndPtr;
4093   } else {
4094     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4095     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4096   }
4097   *eventPP = s;
4098   *startPtr = NULL;
4099 
4100   for (;;) {
4101     const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4102     int tok = XmlCdataSectionTok(enc, s, end, &next);
4103 #if XML_GE == 1
4104     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4105       accountingOnAbort(parser);
4106       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4107     }
4108 #else
4109     UNUSED_P(account);
4110 #endif
4111     *eventEndPP = next;
4112     switch (tok) {
4113     case XML_TOK_CDATA_SECT_CLOSE:
4114       if (parser->m_endCdataSectionHandler)
4115         parser->m_endCdataSectionHandler(parser->m_handlerArg);
4116       /* BEGIN disabled code */
4117       /* see comment under XML_TOK_CDATA_SECT_OPEN */
4118       else if ((0) && parser->m_characterDataHandler)
4119         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4120                                        0);
4121       /* END disabled code */
4122       else if (parser->m_defaultHandler)
4123         reportDefault(parser, enc, s, next);
4124       *startPtr = next;
4125       *nextPtr = next;
4126       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4127         return XML_ERROR_ABORTED;
4128       else
4129         return XML_ERROR_NONE;
4130     case XML_TOK_DATA_NEWLINE:
4131       if (parser->m_characterDataHandler) {
4132         XML_Char c = 0xA;
4133         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4134       } else if (parser->m_defaultHandler)
4135         reportDefault(parser, enc, s, next);
4136       break;
4137     case XML_TOK_DATA_CHARS: {
4138       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4139       if (charDataHandler) {
4140         if (MUST_CONVERT(enc, s)) {
4141           for (;;) {
4142             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4143             const enum XML_Convert_Result convert_res = XmlConvert(
4144                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4145             *eventEndPP = next;
4146             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4147                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4148             if ((convert_res == XML_CONVERT_COMPLETED)
4149                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4150               break;
4151             *eventPP = s;
4152           }
4153         } else
4154           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4155                           (int)((const XML_Char *)next - (const XML_Char *)s));
4156       } else if (parser->m_defaultHandler)
4157         reportDefault(parser, enc, s, next);
4158     } break;
4159     case XML_TOK_INVALID:
4160       *eventPP = next;
4161       return XML_ERROR_INVALID_TOKEN;
4162     case XML_TOK_PARTIAL_CHAR:
4163       if (haveMore) {
4164         *nextPtr = s;
4165         return XML_ERROR_NONE;
4166       }
4167       return XML_ERROR_PARTIAL_CHAR;
4168     case XML_TOK_PARTIAL:
4169     case XML_TOK_NONE:
4170       if (haveMore) {
4171         *nextPtr = s;
4172         return XML_ERROR_NONE;
4173       }
4174       return XML_ERROR_UNCLOSED_CDATA_SECTION;
4175     default:
4176       /* Every token returned by XmlCdataSectionTok() has its own
4177        * explicit case, so this default case will never be executed.
4178        * We retain it as a safety net and exclude it from the coverage
4179        * statistics.
4180        *
4181        * LCOV_EXCL_START
4182        */
4183       *eventPP = next;
4184       return XML_ERROR_UNEXPECTED_STATE;
4185       /* LCOV_EXCL_STOP */
4186     }
4187 
4188     *eventPP = s = next;
4189     switch (parser->m_parsingStatus.parsing) {
4190     case XML_SUSPENDED:
4191       *nextPtr = next;
4192       return XML_ERROR_NONE;
4193     case XML_FINISHED:
4194       return XML_ERROR_ABORTED;
4195     default:;
4196     }
4197   }
4198   /* not reached */
4199 }
4200 
4201 #ifdef XML_DTD
4202 
4203 /* The idea here is to avoid using stack for each IGNORE section when
4204    the whole file is parsed with one call.
4205 */
4206 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4207 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4208                        const char **endPtr) {
4209   enum XML_Error result
4210       = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4211                         (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4212   if (result != XML_ERROR_NONE)
4213     return result;
4214   if (start) {
4215     parser->m_processor = prologProcessor;
4216     return prologProcessor(parser, start, end, endPtr);
4217   }
4218   return result;
4219 }
4220 
4221 /* startPtr gets set to non-null is the section is closed, and to null
4222    if the section is not yet closed.
4223 */
4224 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4225 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4226                 const char *end, const char **nextPtr, XML_Bool haveMore) {
4227   const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4228   int tok;
4229   const char *s = *startPtr;
4230   const char **eventPP;
4231   const char **eventEndPP;
4232   if (enc == parser->m_encoding) {
4233     eventPP = &parser->m_eventPtr;
4234     *eventPP = s;
4235     eventEndPP = &parser->m_eventEndPtr;
4236   } else {
4237     /* It's not entirely clear, but it seems the following two lines
4238      * of code cannot be executed.  The only occasions on which 'enc'
4239      * is not 'encoding' are when this function is called
4240      * from the internal entity processing, and IGNORE sections are an
4241      * error in internal entities.
4242      *
4243      * Since it really isn't clear that this is true, we keep the code
4244      * and just remove it from our coverage tests.
4245      *
4246      * LCOV_EXCL_START
4247      */
4248     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4249     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4250     /* LCOV_EXCL_STOP */
4251   }
4252   *eventPP = s;
4253   *startPtr = NULL;
4254   tok = XmlIgnoreSectionTok(enc, s, end, &next);
4255 #  if XML_GE == 1
4256   if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4257                                 XML_ACCOUNT_DIRECT)) {
4258     accountingOnAbort(parser);
4259     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4260   }
4261 #  endif
4262   *eventEndPP = next;
4263   switch (tok) {
4264   case XML_TOK_IGNORE_SECT:
4265     if (parser->m_defaultHandler)
4266       reportDefault(parser, enc, s, next);
4267     *startPtr = next;
4268     *nextPtr = next;
4269     if (parser->m_parsingStatus.parsing == XML_FINISHED)
4270       return XML_ERROR_ABORTED;
4271     else
4272       return XML_ERROR_NONE;
4273   case XML_TOK_INVALID:
4274     *eventPP = next;
4275     return XML_ERROR_INVALID_TOKEN;
4276   case XML_TOK_PARTIAL_CHAR:
4277     if (haveMore) {
4278       *nextPtr = s;
4279       return XML_ERROR_NONE;
4280     }
4281     return XML_ERROR_PARTIAL_CHAR;
4282   case XML_TOK_PARTIAL:
4283   case XML_TOK_NONE:
4284     if (haveMore) {
4285       *nextPtr = s;
4286       return XML_ERROR_NONE;
4287     }
4288     return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4289   default:
4290     /* All of the tokens that XmlIgnoreSectionTok() returns have
4291      * explicit cases to handle them, so this default case is never
4292      * executed.  We keep it as a safety net anyway, and remove it
4293      * from our test coverage statistics.
4294      *
4295      * LCOV_EXCL_START
4296      */
4297     *eventPP = next;
4298     return XML_ERROR_UNEXPECTED_STATE;
4299     /* LCOV_EXCL_STOP */
4300   }
4301   /* not reached */
4302 }
4303 
4304 #endif /* XML_DTD */
4305 
4306 static enum XML_Error
initializeEncoding(XML_Parser parser)4307 initializeEncoding(XML_Parser parser) {
4308   const char *s;
4309 #ifdef XML_UNICODE
4310   char encodingBuf[128];
4311   /* See comments about `protocolEncodingName` in parserInit() */
4312   if (! parser->m_protocolEncodingName)
4313     s = NULL;
4314   else {
4315     int i;
4316     for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4317       if (i == sizeof(encodingBuf) - 1
4318           || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4319         encodingBuf[0] = '\0';
4320         break;
4321       }
4322       encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4323     }
4324     encodingBuf[i] = '\0';
4325     s = encodingBuf;
4326   }
4327 #else
4328   s = parser->m_protocolEncodingName;
4329 #endif
4330   if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4331           &parser->m_initEncoding, &parser->m_encoding, s))
4332     return XML_ERROR_NONE;
4333   return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4334 }
4335 
4336 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4337 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4338                const char *next) {
4339   const char *encodingName = NULL;
4340   const XML_Char *storedEncName = NULL;
4341   const ENCODING *newEncoding = NULL;
4342   const char *version = NULL;
4343   const char *versionend = NULL;
4344   const XML_Char *storedversion = NULL;
4345   int standalone = -1;
4346 
4347 #if XML_GE == 1
4348   if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4349                                 XML_ACCOUNT_DIRECT)) {
4350     accountingOnAbort(parser);
4351     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4352   }
4353 #endif
4354 
4355   if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4356           isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4357           &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4358     if (isGeneralTextEntity)
4359       return XML_ERROR_TEXT_DECL;
4360     else
4361       return XML_ERROR_XML_DECL;
4362   }
4363   if (! isGeneralTextEntity && standalone == 1) {
4364     parser->m_dtd->standalone = XML_TRUE;
4365 #ifdef XML_DTD
4366     if (parser->m_paramEntityParsing
4367         == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4368       parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4369 #endif /* XML_DTD */
4370   }
4371   if (parser->m_xmlDeclHandler) {
4372     if (encodingName != NULL) {
4373       storedEncName = poolStoreString(
4374           &parser->m_temp2Pool, parser->m_encoding, encodingName,
4375           encodingName + XmlNameLength(parser->m_encoding, encodingName));
4376       if (! storedEncName)
4377         return XML_ERROR_NO_MEMORY;
4378       poolFinish(&parser->m_temp2Pool);
4379     }
4380     if (version) {
4381       storedversion
4382           = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4383                             versionend - parser->m_encoding->minBytesPerChar);
4384       if (! storedversion)
4385         return XML_ERROR_NO_MEMORY;
4386     }
4387     parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4388                              standalone);
4389   } else if (parser->m_defaultHandler)
4390     reportDefault(parser, parser->m_encoding, s, next);
4391   if (parser->m_protocolEncodingName == NULL) {
4392     if (newEncoding) {
4393       /* Check that the specified encoding does not conflict with what
4394        * the parser has already deduced.  Do we have the same number
4395        * of bytes in the smallest representation of a character?  If
4396        * this is UTF-16, is it the same endianness?
4397        */
4398       if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4399           || (newEncoding->minBytesPerChar == 2
4400               && newEncoding != parser->m_encoding)) {
4401         parser->m_eventPtr = encodingName;
4402         return XML_ERROR_INCORRECT_ENCODING;
4403       }
4404       parser->m_encoding = newEncoding;
4405     } else if (encodingName) {
4406       enum XML_Error result;
4407       if (! storedEncName) {
4408         storedEncName = poolStoreString(
4409             &parser->m_temp2Pool, parser->m_encoding, encodingName,
4410             encodingName + XmlNameLength(parser->m_encoding, encodingName));
4411         if (! storedEncName)
4412           return XML_ERROR_NO_MEMORY;
4413       }
4414       result = handleUnknownEncoding(parser, storedEncName);
4415       poolClear(&parser->m_temp2Pool);
4416       if (result == XML_ERROR_UNKNOWN_ENCODING)
4417         parser->m_eventPtr = encodingName;
4418       return result;
4419     }
4420   }
4421 
4422   if (storedEncName || storedversion)
4423     poolClear(&parser->m_temp2Pool);
4424 
4425   return XML_ERROR_NONE;
4426 }
4427 
4428 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4429 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4430   if (parser->m_unknownEncodingHandler) {
4431     XML_Encoding info;
4432     int i;
4433     for (i = 0; i < 256; i++)
4434       info.map[i] = -1;
4435     info.convert = NULL;
4436     info.data = NULL;
4437     info.release = NULL;
4438     if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4439                                          encodingName, &info)) {
4440       ENCODING *enc;
4441       parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4442       if (! parser->m_unknownEncodingMem) {
4443         if (info.release)
4444           info.release(info.data);
4445         return XML_ERROR_NO_MEMORY;
4446       }
4447       enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4448           parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4449       if (enc) {
4450         parser->m_unknownEncodingData = info.data;
4451         parser->m_unknownEncodingRelease = info.release;
4452         parser->m_encoding = enc;
4453         return XML_ERROR_NONE;
4454       }
4455     }
4456     if (info.release != NULL)
4457       info.release(info.data);
4458   }
4459   return XML_ERROR_UNKNOWN_ENCODING;
4460 }
4461 
4462 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4463 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4464                     const char **nextPtr) {
4465   enum XML_Error result = initializeEncoding(parser);
4466   if (result != XML_ERROR_NONE)
4467     return result;
4468   parser->m_processor = prologProcessor;
4469   return prologProcessor(parser, s, end, nextPtr);
4470 }
4471 
4472 #ifdef XML_DTD
4473 
4474 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4475 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4476                             const char **nextPtr) {
4477   enum XML_Error result = initializeEncoding(parser);
4478   if (result != XML_ERROR_NONE)
4479     return result;
4480 
4481   /* we know now that XML_Parse(Buffer) has been called,
4482      so we consider the external parameter entity read */
4483   parser->m_dtd->paramEntityRead = XML_TRUE;
4484 
4485   if (parser->m_prologState.inEntityValue) {
4486     parser->m_processor = entityValueInitProcessor;
4487     return entityValueInitProcessor(parser, s, end, nextPtr);
4488   } else {
4489     parser->m_processor = externalParEntProcessor;
4490     return externalParEntProcessor(parser, s, end, nextPtr);
4491   }
4492 }
4493 
4494 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4495 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4496                          const char **nextPtr) {
4497   int tok;
4498   const char *start = s;
4499   const char *next = start;
4500   parser->m_eventPtr = start;
4501 
4502   for (;;) {
4503     tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4504     /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4505              - storeEntityValue
4506              - processXmlDecl
4507     */
4508     parser->m_eventEndPtr = next;
4509     if (tok <= 0) {
4510       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4511         *nextPtr = s;
4512         return XML_ERROR_NONE;
4513       }
4514       switch (tok) {
4515       case XML_TOK_INVALID:
4516         return XML_ERROR_INVALID_TOKEN;
4517       case XML_TOK_PARTIAL:
4518         return XML_ERROR_UNCLOSED_TOKEN;
4519       case XML_TOK_PARTIAL_CHAR:
4520         return XML_ERROR_PARTIAL_CHAR;
4521       case XML_TOK_NONE: /* start == end */
4522       default:
4523         break;
4524       }
4525       /* found end of entity value - can store it now */
4526       return storeEntityValue(parser, parser->m_encoding, s, end,
4527                               XML_ACCOUNT_DIRECT);
4528     } else if (tok == XML_TOK_XML_DECL) {
4529       enum XML_Error result;
4530       result = processXmlDecl(parser, 0, start, next);
4531       if (result != XML_ERROR_NONE)
4532         return result;
4533       /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED.  For
4534        * that to happen, a parameter entity parsing handler must have attempted
4535        * to suspend the parser, which fails and raises an error.  The parser can
4536        * be aborted, but can't be suspended.
4537        */
4538       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4539         return XML_ERROR_ABORTED;
4540       *nextPtr = next;
4541       /* stop scanning for text declaration - we found one */
4542       parser->m_processor = entityValueProcessor;
4543       return entityValueProcessor(parser, next, end, nextPtr);
4544     }
4545     /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4546        must move s and nextPtr forward to consume the BOM.
4547 
4548        If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4549        would leave the BOM in the buffer and return. On the next call to this
4550        function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4551        is not valid to have multiple BOMs.
4552     */
4553     else if (tok == XML_TOK_BOM) {
4554 #  if XML_GE == 1
4555       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4556                                     XML_ACCOUNT_DIRECT)) {
4557         accountingOnAbort(parser);
4558         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4559       }
4560 #  endif
4561 
4562       *nextPtr = next;
4563       s = next;
4564     }
4565     /* If we get this token, we have the start of what might be a
4566        normal tag, but not a declaration (i.e. it doesn't begin with
4567        "<!").  In a DTD context, that isn't legal.
4568     */
4569     else if (tok == XML_TOK_INSTANCE_START) {
4570       *nextPtr = next;
4571       return XML_ERROR_SYNTAX;
4572     }
4573     start = next;
4574     parser->m_eventPtr = start;
4575   }
4576 }
4577 
4578 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4579 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4580                         const char **nextPtr) {
4581   const char *next = s;
4582   int tok;
4583 
4584   tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4585   if (tok <= 0) {
4586     if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4587       *nextPtr = s;
4588       return XML_ERROR_NONE;
4589     }
4590     switch (tok) {
4591     case XML_TOK_INVALID:
4592       return XML_ERROR_INVALID_TOKEN;
4593     case XML_TOK_PARTIAL:
4594       return XML_ERROR_UNCLOSED_TOKEN;
4595     case XML_TOK_PARTIAL_CHAR:
4596       return XML_ERROR_PARTIAL_CHAR;
4597     case XML_TOK_NONE: /* start == end */
4598     default:
4599       break;
4600     }
4601   }
4602   /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4603      However, when parsing an external subset, doProlog will not accept a BOM
4604      as valid, and report a syntax error, so we have to skip the BOM, and
4605      account for the BOM bytes.
4606   */
4607   else if (tok == XML_TOK_BOM) {
4608     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4609                                   XML_ACCOUNT_DIRECT)) {
4610       accountingOnAbort(parser);
4611       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4612     }
4613 
4614     s = next;
4615     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4616   }
4617 
4618   parser->m_processor = prologProcessor;
4619   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4620                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4621                   XML_ACCOUNT_DIRECT);
4622 }
4623 
4624 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4625 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4626                      const char **nextPtr) {
4627   const char *start = s;
4628   const char *next = s;
4629   const ENCODING *enc = parser->m_encoding;
4630   int tok;
4631 
4632   for (;;) {
4633     tok = XmlPrologTok(enc, start, end, &next);
4634     /* Note: These bytes are accounted later in:
4635              - storeEntityValue
4636     */
4637     if (tok <= 0) {
4638       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4639         *nextPtr = s;
4640         return XML_ERROR_NONE;
4641       }
4642       switch (tok) {
4643       case XML_TOK_INVALID:
4644         return XML_ERROR_INVALID_TOKEN;
4645       case XML_TOK_PARTIAL:
4646         return XML_ERROR_UNCLOSED_TOKEN;
4647       case XML_TOK_PARTIAL_CHAR:
4648         return XML_ERROR_PARTIAL_CHAR;
4649       case XML_TOK_NONE: /* start == end */
4650       default:
4651         break;
4652       }
4653       /* found end of entity value - can store it now */
4654       return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4655     }
4656     start = next;
4657   }
4658 }
4659 
4660 #endif /* XML_DTD */
4661 
4662 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4663 prologProcessor(XML_Parser parser, const char *s, const char *end,
4664                 const char **nextPtr) {
4665   const char *next = s;
4666   int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4667   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4668                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4669                   XML_ACCOUNT_DIRECT);
4670 }
4671 
4672 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4673 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4674          int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4675          XML_Bool allowClosingDoctype, enum XML_Account account) {
4676 #ifdef XML_DTD
4677   static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4678 #endif /* XML_DTD */
4679   static const XML_Char atypeCDATA[]
4680       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4681   static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4682   static const XML_Char atypeIDREF[]
4683       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4684   static const XML_Char atypeIDREFS[]
4685       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4686   static const XML_Char atypeENTITY[]
4687       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4688   static const XML_Char atypeENTITIES[]
4689       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4690          ASCII_I, ASCII_E, ASCII_S, '\0'};
4691   static const XML_Char atypeNMTOKEN[]
4692       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4693   static const XML_Char atypeNMTOKENS[]
4694       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4695          ASCII_E, ASCII_N, ASCII_S, '\0'};
4696   static const XML_Char notationPrefix[]
4697       = {ASCII_N, ASCII_O, ASCII_T, ASCII_A,      ASCII_T,
4698          ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4699   static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4700   static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4701 
4702 #ifndef XML_DTD
4703   UNUSED_P(account);
4704 #endif
4705 
4706   /* save one level of indirection */
4707   DTD *const dtd = parser->m_dtd;
4708 
4709   const char **eventPP;
4710   const char **eventEndPP;
4711   enum XML_Content_Quant quant;
4712 
4713   if (enc == parser->m_encoding) {
4714     eventPP = &parser->m_eventPtr;
4715     eventEndPP = &parser->m_eventEndPtr;
4716   } else {
4717     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4718     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4719   }
4720 
4721   for (;;) {
4722     int role;
4723     XML_Bool handleDefault = XML_TRUE;
4724     *eventPP = s;
4725     *eventEndPP = next;
4726     if (tok <= 0) {
4727       if (haveMore && tok != XML_TOK_INVALID) {
4728         *nextPtr = s;
4729         return XML_ERROR_NONE;
4730       }
4731       switch (tok) {
4732       case XML_TOK_INVALID:
4733         *eventPP = next;
4734         return XML_ERROR_INVALID_TOKEN;
4735       case XML_TOK_PARTIAL:
4736         return XML_ERROR_UNCLOSED_TOKEN;
4737       case XML_TOK_PARTIAL_CHAR:
4738         return XML_ERROR_PARTIAL_CHAR;
4739       case -XML_TOK_PROLOG_S:
4740         tok = -tok;
4741         break;
4742       case XML_TOK_NONE:
4743 #ifdef XML_DTD
4744         /* for internal PE NOT referenced between declarations */
4745         if (enc != parser->m_encoding
4746             && ! parser->m_openInternalEntities->betweenDecl) {
4747           *nextPtr = s;
4748           return XML_ERROR_NONE;
4749         }
4750         /* WFC: PE Between Declarations - must check that PE contains
4751            complete markup, not only for external PEs, but also for
4752            internal PEs if the reference occurs between declarations.
4753         */
4754         if (parser->m_isParamEntity || enc != parser->m_encoding) {
4755           if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4756               == XML_ROLE_ERROR)
4757             return XML_ERROR_INCOMPLETE_PE;
4758           *nextPtr = s;
4759           return XML_ERROR_NONE;
4760         }
4761 #endif /* XML_DTD */
4762         return XML_ERROR_NO_ELEMENTS;
4763       default:
4764         tok = -tok;
4765         next = end;
4766         break;
4767       }
4768     }
4769     role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4770 #if XML_GE == 1
4771     switch (role) {
4772     case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4773     case XML_ROLE_XML_DECL:       // bytes accounted in processXmlDecl
4774 #  ifdef XML_DTD
4775     case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4776 #  endif
4777       break;
4778     default:
4779       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4780         accountingOnAbort(parser);
4781         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4782       }
4783     }
4784 #endif
4785     switch (role) {
4786     case XML_ROLE_XML_DECL: {
4787       enum XML_Error result = processXmlDecl(parser, 0, s, next);
4788       if (result != XML_ERROR_NONE)
4789         return result;
4790       enc = parser->m_encoding;
4791       handleDefault = XML_FALSE;
4792     } break;
4793     case XML_ROLE_DOCTYPE_NAME:
4794       if (parser->m_startDoctypeDeclHandler) {
4795         parser->m_doctypeName
4796             = poolStoreString(&parser->m_tempPool, enc, s, next);
4797         if (! parser->m_doctypeName)
4798           return XML_ERROR_NO_MEMORY;
4799         poolFinish(&parser->m_tempPool);
4800         parser->m_doctypePubid = NULL;
4801         handleDefault = XML_FALSE;
4802       }
4803       parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4804       break;
4805     case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4806       if (parser->m_startDoctypeDeclHandler) {
4807         parser->m_startDoctypeDeclHandler(
4808             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4809             parser->m_doctypePubid, 1);
4810         parser->m_doctypeName = NULL;
4811         poolClear(&parser->m_tempPool);
4812         handleDefault = XML_FALSE;
4813       }
4814       break;
4815 #ifdef XML_DTD
4816     case XML_ROLE_TEXT_DECL: {
4817       enum XML_Error result = processXmlDecl(parser, 1, s, next);
4818       if (result != XML_ERROR_NONE)
4819         return result;
4820       enc = parser->m_encoding;
4821       handleDefault = XML_FALSE;
4822     } break;
4823 #endif /* XML_DTD */
4824     case XML_ROLE_DOCTYPE_PUBLIC_ID:
4825 #ifdef XML_DTD
4826       parser->m_useForeignDTD = XML_FALSE;
4827       parser->m_declEntity = (ENTITY *)lookup(
4828           parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4829       if (! parser->m_declEntity)
4830         return XML_ERROR_NO_MEMORY;
4831 #endif /* XML_DTD */
4832       dtd->hasParamEntityRefs = XML_TRUE;
4833       if (parser->m_startDoctypeDeclHandler) {
4834         XML_Char *pubId;
4835         if (! XmlIsPublicId(enc, s, next, eventPP))
4836           return XML_ERROR_PUBLICID;
4837         pubId = poolStoreString(&parser->m_tempPool, enc,
4838                                 s + enc->minBytesPerChar,
4839                                 next - enc->minBytesPerChar);
4840         if (! pubId)
4841           return XML_ERROR_NO_MEMORY;
4842         normalizePublicId(pubId);
4843         poolFinish(&parser->m_tempPool);
4844         parser->m_doctypePubid = pubId;
4845         handleDefault = XML_FALSE;
4846         goto alreadyChecked;
4847       }
4848       /* fall through */
4849     case XML_ROLE_ENTITY_PUBLIC_ID:
4850       if (! XmlIsPublicId(enc, s, next, eventPP))
4851         return XML_ERROR_PUBLICID;
4852     alreadyChecked:
4853       if (dtd->keepProcessing && parser->m_declEntity) {
4854         XML_Char *tem
4855             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4856                               next - enc->minBytesPerChar);
4857         if (! tem)
4858           return XML_ERROR_NO_MEMORY;
4859         normalizePublicId(tem);
4860         parser->m_declEntity->publicId = tem;
4861         poolFinish(&dtd->pool);
4862         /* Don't suppress the default handler if we fell through from
4863          * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4864          */
4865         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4866           handleDefault = XML_FALSE;
4867       }
4868       break;
4869     case XML_ROLE_DOCTYPE_CLOSE:
4870       if (allowClosingDoctype != XML_TRUE) {
4871         /* Must not close doctype from within expanded parameter entities */
4872         return XML_ERROR_INVALID_TOKEN;
4873       }
4874 
4875       if (parser->m_doctypeName) {
4876         parser->m_startDoctypeDeclHandler(
4877             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4878             parser->m_doctypePubid, 0);
4879         poolClear(&parser->m_tempPool);
4880         handleDefault = XML_FALSE;
4881       }
4882       /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4883          XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4884          was not set, indicating an external subset
4885       */
4886 #ifdef XML_DTD
4887       if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4888         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4889         dtd->hasParamEntityRefs = XML_TRUE;
4890         if (parser->m_paramEntityParsing
4891             && parser->m_externalEntityRefHandler) {
4892           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4893                                             externalSubsetName, sizeof(ENTITY));
4894           if (! entity) {
4895             /* The external subset name "#" will have already been
4896              * inserted into the hash table at the start of the
4897              * external entity parsing, so no allocation will happen
4898              * and lookup() cannot fail.
4899              */
4900             return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4901           }
4902           if (parser->m_useForeignDTD)
4903             entity->base = parser->m_curBase;
4904           dtd->paramEntityRead = XML_FALSE;
4905           if (! parser->m_externalEntityRefHandler(
4906                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4907                   entity->systemId, entity->publicId))
4908             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4909           if (dtd->paramEntityRead) {
4910             if (! dtd->standalone && parser->m_notStandaloneHandler
4911                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4912               return XML_ERROR_NOT_STANDALONE;
4913           }
4914           /* if we didn't read the foreign DTD then this means that there
4915              is no external subset and we must reset dtd->hasParamEntityRefs
4916           */
4917           else if (! parser->m_doctypeSysid)
4918             dtd->hasParamEntityRefs = hadParamEntityRefs;
4919           /* end of DTD - no need to update dtd->keepProcessing */
4920         }
4921         parser->m_useForeignDTD = XML_FALSE;
4922       }
4923 #endif /* XML_DTD */
4924       if (parser->m_endDoctypeDeclHandler) {
4925         parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4926         handleDefault = XML_FALSE;
4927       }
4928       break;
4929     case XML_ROLE_INSTANCE_START:
4930 #ifdef XML_DTD
4931       /* if there is no DOCTYPE declaration then now is the
4932          last chance to read the foreign DTD
4933       */
4934       if (parser->m_useForeignDTD) {
4935         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4936         dtd->hasParamEntityRefs = XML_TRUE;
4937         if (parser->m_paramEntityParsing
4938             && parser->m_externalEntityRefHandler) {
4939           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4940                                             externalSubsetName, sizeof(ENTITY));
4941           if (! entity)
4942             return XML_ERROR_NO_MEMORY;
4943           entity->base = parser->m_curBase;
4944           dtd->paramEntityRead = XML_FALSE;
4945           if (! parser->m_externalEntityRefHandler(
4946                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4947                   entity->systemId, entity->publicId))
4948             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4949           if (dtd->paramEntityRead) {
4950             if (! dtd->standalone && parser->m_notStandaloneHandler
4951                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4952               return XML_ERROR_NOT_STANDALONE;
4953           }
4954           /* if we didn't read the foreign DTD then this means that there
4955              is no external subset and we must reset dtd->hasParamEntityRefs
4956           */
4957           else
4958             dtd->hasParamEntityRefs = hadParamEntityRefs;
4959           /* end of DTD - no need to update dtd->keepProcessing */
4960         }
4961       }
4962 #endif /* XML_DTD */
4963       parser->m_processor = contentProcessor;
4964       return contentProcessor(parser, s, end, nextPtr);
4965     case XML_ROLE_ATTLIST_ELEMENT_NAME:
4966       parser->m_declElementType = getElementType(parser, enc, s, next);
4967       if (! parser->m_declElementType)
4968         return XML_ERROR_NO_MEMORY;
4969       goto checkAttListDeclHandler;
4970     case XML_ROLE_ATTRIBUTE_NAME:
4971       parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4972       if (! parser->m_declAttributeId)
4973         return XML_ERROR_NO_MEMORY;
4974       parser->m_declAttributeIsCdata = XML_FALSE;
4975       parser->m_declAttributeType = NULL;
4976       parser->m_declAttributeIsId = XML_FALSE;
4977       goto checkAttListDeclHandler;
4978     case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4979       parser->m_declAttributeIsCdata = XML_TRUE;
4980       parser->m_declAttributeType = atypeCDATA;
4981       goto checkAttListDeclHandler;
4982     case XML_ROLE_ATTRIBUTE_TYPE_ID:
4983       parser->m_declAttributeIsId = XML_TRUE;
4984       parser->m_declAttributeType = atypeID;
4985       goto checkAttListDeclHandler;
4986     case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4987       parser->m_declAttributeType = atypeIDREF;
4988       goto checkAttListDeclHandler;
4989     case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4990       parser->m_declAttributeType = atypeIDREFS;
4991       goto checkAttListDeclHandler;
4992     case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4993       parser->m_declAttributeType = atypeENTITY;
4994       goto checkAttListDeclHandler;
4995     case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4996       parser->m_declAttributeType = atypeENTITIES;
4997       goto checkAttListDeclHandler;
4998     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4999       parser->m_declAttributeType = atypeNMTOKEN;
5000       goto checkAttListDeclHandler;
5001     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5002       parser->m_declAttributeType = atypeNMTOKENS;
5003     checkAttListDeclHandler:
5004       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5005         handleDefault = XML_FALSE;
5006       break;
5007     case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5008     case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5009       if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5010         const XML_Char *prefix;
5011         if (parser->m_declAttributeType) {
5012           prefix = enumValueSep;
5013         } else {
5014           prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5015                                                               : enumValueStart);
5016         }
5017         if (! poolAppendString(&parser->m_tempPool, prefix))
5018           return XML_ERROR_NO_MEMORY;
5019         if (! poolAppend(&parser->m_tempPool, enc, s, next))
5020           return XML_ERROR_NO_MEMORY;
5021         parser->m_declAttributeType = parser->m_tempPool.start;
5022         handleDefault = XML_FALSE;
5023       }
5024       break;
5025     case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5026     case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5027       if (dtd->keepProcessing) {
5028         if (! defineAttribute(parser->m_declElementType,
5029                               parser->m_declAttributeId,
5030                               parser->m_declAttributeIsCdata,
5031                               parser->m_declAttributeIsId, 0, parser))
5032           return XML_ERROR_NO_MEMORY;
5033         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5034           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5035               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5036                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5037             /* Enumerated or Notation type */
5038             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5039                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5040               return XML_ERROR_NO_MEMORY;
5041             parser->m_declAttributeType = parser->m_tempPool.start;
5042             poolFinish(&parser->m_tempPool);
5043           }
5044           *eventEndPP = s;
5045           parser->m_attlistDeclHandler(
5046               parser->m_handlerArg, parser->m_declElementType->name,
5047               parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5048               role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5049           handleDefault = XML_FALSE;
5050         }
5051       }
5052       poolClear(&parser->m_tempPool);
5053       break;
5054     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5055     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5056       if (dtd->keepProcessing) {
5057         const XML_Char *attVal;
5058         enum XML_Error result = storeAttributeValue(
5059             parser, enc, parser->m_declAttributeIsCdata,
5060             s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5061             XML_ACCOUNT_NONE);
5062         if (result)
5063           return result;
5064         attVal = poolStart(&dtd->pool);
5065         poolFinish(&dtd->pool);
5066         /* ID attributes aren't allowed to have a default */
5067         if (! defineAttribute(
5068                 parser->m_declElementType, parser->m_declAttributeId,
5069                 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5070           return XML_ERROR_NO_MEMORY;
5071         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5072           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5073               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5074                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5075             /* Enumerated or Notation type */
5076             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5077                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5078               return XML_ERROR_NO_MEMORY;
5079             parser->m_declAttributeType = parser->m_tempPool.start;
5080             poolFinish(&parser->m_tempPool);
5081           }
5082           *eventEndPP = s;
5083           parser->m_attlistDeclHandler(
5084               parser->m_handlerArg, parser->m_declElementType->name,
5085               parser->m_declAttributeId->name, parser->m_declAttributeType,
5086               attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5087           poolClear(&parser->m_tempPool);
5088           handleDefault = XML_FALSE;
5089         }
5090       }
5091       break;
5092     case XML_ROLE_ENTITY_VALUE:
5093       if (dtd->keepProcessing) {
5094 #if XML_GE == 1
5095         // This will store the given replacement text in
5096         // parser->m_declEntity->textPtr.
5097         enum XML_Error result
5098             = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5099                                next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5100         if (parser->m_declEntity) {
5101           parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5102           parser->m_declEntity->textLen
5103               = (int)(poolLength(&dtd->entityValuePool));
5104           poolFinish(&dtd->entityValuePool);
5105           if (parser->m_entityDeclHandler) {
5106             *eventEndPP = s;
5107             parser->m_entityDeclHandler(
5108                 parser->m_handlerArg, parser->m_declEntity->name,
5109                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5110                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5111             handleDefault = XML_FALSE;
5112           }
5113         } else
5114           poolDiscard(&dtd->entityValuePool);
5115         if (result != XML_ERROR_NONE)
5116           return result;
5117 #else
5118         // This will store "&amp;entity123;" in parser->m_declEntity->textPtr
5119         // to end up as "&entity123;" in the handler.
5120         if (parser->m_declEntity != NULL) {
5121           const enum XML_Error result
5122               = storeSelfEntityValue(parser, parser->m_declEntity);
5123           if (result != XML_ERROR_NONE)
5124             return result;
5125 
5126           if (parser->m_entityDeclHandler) {
5127             *eventEndPP = s;
5128             parser->m_entityDeclHandler(
5129                 parser->m_handlerArg, parser->m_declEntity->name,
5130                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5131                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5132             handleDefault = XML_FALSE;
5133           }
5134         }
5135 #endif
5136       }
5137       break;
5138     case XML_ROLE_DOCTYPE_SYSTEM_ID:
5139 #ifdef XML_DTD
5140       parser->m_useForeignDTD = XML_FALSE;
5141 #endif /* XML_DTD */
5142       dtd->hasParamEntityRefs = XML_TRUE;
5143       if (parser->m_startDoctypeDeclHandler) {
5144         parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5145                                                  s + enc->minBytesPerChar,
5146                                                  next - enc->minBytesPerChar);
5147         if (parser->m_doctypeSysid == NULL)
5148           return XML_ERROR_NO_MEMORY;
5149         poolFinish(&parser->m_tempPool);
5150         handleDefault = XML_FALSE;
5151       }
5152 #ifdef XML_DTD
5153       else
5154         /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5155            for the case where no parser->m_startDoctypeDeclHandler is set */
5156         parser->m_doctypeSysid = externalSubsetName;
5157 #endif /* XML_DTD */
5158       if (! dtd->standalone
5159 #ifdef XML_DTD
5160           && ! parser->m_paramEntityParsing
5161 #endif /* XML_DTD */
5162           && parser->m_notStandaloneHandler
5163           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5164         return XML_ERROR_NOT_STANDALONE;
5165 #ifndef XML_DTD
5166       break;
5167 #else  /* XML_DTD */
5168       if (! parser->m_declEntity) {
5169         parser->m_declEntity = (ENTITY *)lookup(
5170             parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5171         if (! parser->m_declEntity)
5172           return XML_ERROR_NO_MEMORY;
5173         parser->m_declEntity->publicId = NULL;
5174       }
5175 #endif /* XML_DTD */
5176       /* fall through */
5177     case XML_ROLE_ENTITY_SYSTEM_ID:
5178       if (dtd->keepProcessing && parser->m_declEntity) {
5179         parser->m_declEntity->systemId
5180             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5181                               next - enc->minBytesPerChar);
5182         if (! parser->m_declEntity->systemId)
5183           return XML_ERROR_NO_MEMORY;
5184         parser->m_declEntity->base = parser->m_curBase;
5185         poolFinish(&dtd->pool);
5186         /* Don't suppress the default handler if we fell through from
5187          * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5188          */
5189         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5190           handleDefault = XML_FALSE;
5191       }
5192       break;
5193     case XML_ROLE_ENTITY_COMPLETE:
5194 #if XML_GE == 0
5195       // This will store "&amp;entity123;" in entity->textPtr
5196       // to end up as "&entity123;" in the handler.
5197       if (parser->m_declEntity != NULL) {
5198         const enum XML_Error result
5199             = storeSelfEntityValue(parser, parser->m_declEntity);
5200         if (result != XML_ERROR_NONE)
5201           return result;
5202       }
5203 #endif
5204       if (dtd->keepProcessing && parser->m_declEntity
5205           && parser->m_entityDeclHandler) {
5206         *eventEndPP = s;
5207         parser->m_entityDeclHandler(
5208             parser->m_handlerArg, parser->m_declEntity->name,
5209             parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5210             parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5211         handleDefault = XML_FALSE;
5212       }
5213       break;
5214     case XML_ROLE_ENTITY_NOTATION_NAME:
5215       if (dtd->keepProcessing && parser->m_declEntity) {
5216         parser->m_declEntity->notation
5217             = poolStoreString(&dtd->pool, enc, s, next);
5218         if (! parser->m_declEntity->notation)
5219           return XML_ERROR_NO_MEMORY;
5220         poolFinish(&dtd->pool);
5221         if (parser->m_unparsedEntityDeclHandler) {
5222           *eventEndPP = s;
5223           parser->m_unparsedEntityDeclHandler(
5224               parser->m_handlerArg, parser->m_declEntity->name,
5225               parser->m_declEntity->base, parser->m_declEntity->systemId,
5226               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5227           handleDefault = XML_FALSE;
5228         } else if (parser->m_entityDeclHandler) {
5229           *eventEndPP = s;
5230           parser->m_entityDeclHandler(
5231               parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5232               parser->m_declEntity->base, parser->m_declEntity->systemId,
5233               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5234           handleDefault = XML_FALSE;
5235         }
5236       }
5237       break;
5238     case XML_ROLE_GENERAL_ENTITY_NAME: {
5239       if (XmlPredefinedEntityName(enc, s, next)) {
5240         parser->m_declEntity = NULL;
5241         break;
5242       }
5243       if (dtd->keepProcessing) {
5244         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5245         if (! name)
5246           return XML_ERROR_NO_MEMORY;
5247         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5248                                                 name, sizeof(ENTITY));
5249         if (! parser->m_declEntity)
5250           return XML_ERROR_NO_MEMORY;
5251         if (parser->m_declEntity->name != name) {
5252           poolDiscard(&dtd->pool);
5253           parser->m_declEntity = NULL;
5254         } else {
5255           poolFinish(&dtd->pool);
5256           parser->m_declEntity->publicId = NULL;
5257           parser->m_declEntity->is_param = XML_FALSE;
5258           /* if we have a parent parser or are reading an internal parameter
5259              entity, then the entity declaration is not considered "internal"
5260           */
5261           parser->m_declEntity->is_internal
5262               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5263           if (parser->m_entityDeclHandler)
5264             handleDefault = XML_FALSE;
5265         }
5266       } else {
5267         poolDiscard(&dtd->pool);
5268         parser->m_declEntity = NULL;
5269       }
5270     } break;
5271     case XML_ROLE_PARAM_ENTITY_NAME:
5272 #ifdef XML_DTD
5273       if (dtd->keepProcessing) {
5274         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5275         if (! name)
5276           return XML_ERROR_NO_MEMORY;
5277         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5278                                                 name, sizeof(ENTITY));
5279         if (! parser->m_declEntity)
5280           return XML_ERROR_NO_MEMORY;
5281         if (parser->m_declEntity->name != name) {
5282           poolDiscard(&dtd->pool);
5283           parser->m_declEntity = NULL;
5284         } else {
5285           poolFinish(&dtd->pool);
5286           parser->m_declEntity->publicId = NULL;
5287           parser->m_declEntity->is_param = XML_TRUE;
5288           /* if we have a parent parser or are reading an internal parameter
5289              entity, then the entity declaration is not considered "internal"
5290           */
5291           parser->m_declEntity->is_internal
5292               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5293           if (parser->m_entityDeclHandler)
5294             handleDefault = XML_FALSE;
5295         }
5296       } else {
5297         poolDiscard(&dtd->pool);
5298         parser->m_declEntity = NULL;
5299       }
5300 #else  /* not XML_DTD */
5301       parser->m_declEntity = NULL;
5302 #endif /* XML_DTD */
5303       break;
5304     case XML_ROLE_NOTATION_NAME:
5305       parser->m_declNotationPublicId = NULL;
5306       parser->m_declNotationName = NULL;
5307       if (parser->m_notationDeclHandler) {
5308         parser->m_declNotationName
5309             = poolStoreString(&parser->m_tempPool, enc, s, next);
5310         if (! parser->m_declNotationName)
5311           return XML_ERROR_NO_MEMORY;
5312         poolFinish(&parser->m_tempPool);
5313         handleDefault = XML_FALSE;
5314       }
5315       break;
5316     case XML_ROLE_NOTATION_PUBLIC_ID:
5317       if (! XmlIsPublicId(enc, s, next, eventPP))
5318         return XML_ERROR_PUBLICID;
5319       if (parser
5320               ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5321         XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5322                                         s + enc->minBytesPerChar,
5323                                         next - enc->minBytesPerChar);
5324         if (! tem)
5325           return XML_ERROR_NO_MEMORY;
5326         normalizePublicId(tem);
5327         parser->m_declNotationPublicId = tem;
5328         poolFinish(&parser->m_tempPool);
5329         handleDefault = XML_FALSE;
5330       }
5331       break;
5332     case XML_ROLE_NOTATION_SYSTEM_ID:
5333       if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5334         const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5335                                                    s + enc->minBytesPerChar,
5336                                                    next - enc->minBytesPerChar);
5337         if (! systemId)
5338           return XML_ERROR_NO_MEMORY;
5339         *eventEndPP = s;
5340         parser->m_notationDeclHandler(
5341             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5342             systemId, parser->m_declNotationPublicId);
5343         handleDefault = XML_FALSE;
5344       }
5345       poolClear(&parser->m_tempPool);
5346       break;
5347     case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5348       if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5349         *eventEndPP = s;
5350         parser->m_notationDeclHandler(
5351             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5352             0, parser->m_declNotationPublicId);
5353         handleDefault = XML_FALSE;
5354       }
5355       poolClear(&parser->m_tempPool);
5356       break;
5357     case XML_ROLE_ERROR:
5358       switch (tok) {
5359       case XML_TOK_PARAM_ENTITY_REF:
5360         /* PE references in internal subset are
5361            not allowed within declarations. */
5362         return XML_ERROR_PARAM_ENTITY_REF;
5363       case XML_TOK_XML_DECL:
5364         return XML_ERROR_MISPLACED_XML_PI;
5365       default:
5366         return XML_ERROR_SYNTAX;
5367       }
5368 #ifdef XML_DTD
5369     case XML_ROLE_IGNORE_SECT: {
5370       enum XML_Error result;
5371       if (parser->m_defaultHandler)
5372         reportDefault(parser, enc, s, next);
5373       handleDefault = XML_FALSE;
5374       result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5375       if (result != XML_ERROR_NONE)
5376         return result;
5377       else if (! next) {
5378         parser->m_processor = ignoreSectionProcessor;
5379         return result;
5380       }
5381     } break;
5382 #endif /* XML_DTD */
5383     case XML_ROLE_GROUP_OPEN:
5384       if (parser->m_prologState.level >= parser->m_groupSize) {
5385         if (parser->m_groupSize) {
5386           {
5387             /* Detect and prevent integer overflow */
5388             if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5389               return XML_ERROR_NO_MEMORY;
5390             }
5391 
5392             char *const new_connector = (char *)REALLOC(
5393                 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5394             if (new_connector == NULL) {
5395               parser->m_groupSize /= 2;
5396               return XML_ERROR_NO_MEMORY;
5397             }
5398             parser->m_groupConnector = new_connector;
5399           }
5400 
5401           if (dtd->scaffIndex) {
5402             /* Detect and prevent integer overflow.
5403              * The preprocessor guard addresses the "always false" warning
5404              * from -Wtype-limits on platforms where
5405              * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5406 #if UINT_MAX >= SIZE_MAX
5407             if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5408               return XML_ERROR_NO_MEMORY;
5409             }
5410 #endif
5411 
5412             int *const new_scaff_index = (int *)REALLOC(
5413                 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5414             if (new_scaff_index == NULL)
5415               return XML_ERROR_NO_MEMORY;
5416             dtd->scaffIndex = new_scaff_index;
5417           }
5418         } else {
5419           parser->m_groupConnector
5420               = (char *)MALLOC(parser, parser->m_groupSize = 32);
5421           if (! parser->m_groupConnector) {
5422             parser->m_groupSize = 0;
5423             return XML_ERROR_NO_MEMORY;
5424           }
5425         }
5426       }
5427       parser->m_groupConnector[parser->m_prologState.level] = 0;
5428       if (dtd->in_eldecl) {
5429         int myindex = nextScaffoldPart(parser);
5430         if (myindex < 0)
5431           return XML_ERROR_NO_MEMORY;
5432         assert(dtd->scaffIndex != NULL);
5433         dtd->scaffIndex[dtd->scaffLevel] = myindex;
5434         dtd->scaffLevel++;
5435         dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5436         if (parser->m_elementDeclHandler)
5437           handleDefault = XML_FALSE;
5438       }
5439       break;
5440     case XML_ROLE_GROUP_SEQUENCE:
5441       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5442         return XML_ERROR_SYNTAX;
5443       parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5444       if (dtd->in_eldecl && parser->m_elementDeclHandler)
5445         handleDefault = XML_FALSE;
5446       break;
5447     case XML_ROLE_GROUP_CHOICE:
5448       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5449         return XML_ERROR_SYNTAX;
5450       if (dtd->in_eldecl
5451           && ! parser->m_groupConnector[parser->m_prologState.level]
5452           && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5453               != XML_CTYPE_MIXED)) {
5454         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5455             = XML_CTYPE_CHOICE;
5456         if (parser->m_elementDeclHandler)
5457           handleDefault = XML_FALSE;
5458       }
5459       parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5460       break;
5461     case XML_ROLE_PARAM_ENTITY_REF:
5462 #ifdef XML_DTD
5463     case XML_ROLE_INNER_PARAM_ENTITY_REF:
5464       dtd->hasParamEntityRefs = XML_TRUE;
5465       if (! parser->m_paramEntityParsing)
5466         dtd->keepProcessing = dtd->standalone;
5467       else {
5468         const XML_Char *name;
5469         ENTITY *entity;
5470         name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5471                                next - enc->minBytesPerChar);
5472         if (! name)
5473           return XML_ERROR_NO_MEMORY;
5474         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5475         poolDiscard(&dtd->pool);
5476         /* first, determine if a check for an existing declaration is needed;
5477            if yes, check that the entity exists, and that it is internal,
5478            otherwise call the skipped entity handler
5479         */
5480         if (parser->m_prologState.documentEntity
5481             && (dtd->standalone ? ! parser->m_openInternalEntities
5482                                 : ! dtd->hasParamEntityRefs)) {
5483           if (! entity)
5484             return XML_ERROR_UNDEFINED_ENTITY;
5485           else if (! entity->is_internal) {
5486             /* It's hard to exhaustively search the code to be sure,
5487              * but there doesn't seem to be a way of executing the
5488              * following line.  There are two cases:
5489              *
5490              * If 'standalone' is false, the DTD must have no
5491              * parameter entities or we wouldn't have passed the outer
5492              * 'if' statement.  That means the only entity in the hash
5493              * table is the external subset name "#" which cannot be
5494              * given as a parameter entity name in XML syntax, so the
5495              * lookup must have returned NULL and we don't even reach
5496              * the test for an internal entity.
5497              *
5498              * If 'standalone' is true, it does not seem to be
5499              * possible to create entities taking this code path that
5500              * are not internal entities, so fail the test above.
5501              *
5502              * Because this analysis is very uncertain, the code is
5503              * being left in place and merely removed from the
5504              * coverage test statistics.
5505              */
5506             return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5507           }
5508         } else if (! entity) {
5509           dtd->keepProcessing = dtd->standalone;
5510           /* cannot report skipped entities in declarations */
5511           if ((role == XML_ROLE_PARAM_ENTITY_REF)
5512               && parser->m_skippedEntityHandler) {
5513             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5514             handleDefault = XML_FALSE;
5515           }
5516           break;
5517         }
5518         if (entity->open)
5519           return XML_ERROR_RECURSIVE_ENTITY_REF;
5520         if (entity->textPtr) {
5521           enum XML_Error result;
5522           XML_Bool betweenDecl
5523               = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5524           result = processInternalEntity(parser, entity, betweenDecl);
5525           if (result != XML_ERROR_NONE)
5526             return result;
5527           handleDefault = XML_FALSE;
5528           break;
5529         }
5530         if (parser->m_externalEntityRefHandler) {
5531           dtd->paramEntityRead = XML_FALSE;
5532           entity->open = XML_TRUE;
5533           entityTrackingOnOpen(parser, entity, __LINE__);
5534           if (! parser->m_externalEntityRefHandler(
5535                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5536                   entity->systemId, entity->publicId)) {
5537             entityTrackingOnClose(parser, entity, __LINE__);
5538             entity->open = XML_FALSE;
5539             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5540           }
5541           entityTrackingOnClose(parser, entity, __LINE__);
5542           entity->open = XML_FALSE;
5543           handleDefault = XML_FALSE;
5544           if (! dtd->paramEntityRead) {
5545             dtd->keepProcessing = dtd->standalone;
5546             break;
5547           }
5548         } else {
5549           dtd->keepProcessing = dtd->standalone;
5550           break;
5551         }
5552       }
5553 #endif /* XML_DTD */
5554       if (! dtd->standalone && parser->m_notStandaloneHandler
5555           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5556         return XML_ERROR_NOT_STANDALONE;
5557       break;
5558 
5559       /* Element declaration stuff */
5560 
5561     case XML_ROLE_ELEMENT_NAME:
5562       if (parser->m_elementDeclHandler) {
5563         parser->m_declElementType = getElementType(parser, enc, s, next);
5564         if (! parser->m_declElementType)
5565           return XML_ERROR_NO_MEMORY;
5566         dtd->scaffLevel = 0;
5567         dtd->scaffCount = 0;
5568         dtd->in_eldecl = XML_TRUE;
5569         handleDefault = XML_FALSE;
5570       }
5571       break;
5572 
5573     case XML_ROLE_CONTENT_ANY:
5574     case XML_ROLE_CONTENT_EMPTY:
5575       if (dtd->in_eldecl) {
5576         if (parser->m_elementDeclHandler) {
5577           XML_Content *content
5578               = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5579           if (! content)
5580             return XML_ERROR_NO_MEMORY;
5581           content->quant = XML_CQUANT_NONE;
5582           content->name = NULL;
5583           content->numchildren = 0;
5584           content->children = NULL;
5585           content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5586                                                           : XML_CTYPE_EMPTY);
5587           *eventEndPP = s;
5588           parser->m_elementDeclHandler(
5589               parser->m_handlerArg, parser->m_declElementType->name, content);
5590           handleDefault = XML_FALSE;
5591         }
5592         dtd->in_eldecl = XML_FALSE;
5593       }
5594       break;
5595 
5596     case XML_ROLE_CONTENT_PCDATA:
5597       if (dtd->in_eldecl) {
5598         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5599             = XML_CTYPE_MIXED;
5600         if (parser->m_elementDeclHandler)
5601           handleDefault = XML_FALSE;
5602       }
5603       break;
5604 
5605     case XML_ROLE_CONTENT_ELEMENT:
5606       quant = XML_CQUANT_NONE;
5607       goto elementContent;
5608     case XML_ROLE_CONTENT_ELEMENT_OPT:
5609       quant = XML_CQUANT_OPT;
5610       goto elementContent;
5611     case XML_ROLE_CONTENT_ELEMENT_REP:
5612       quant = XML_CQUANT_REP;
5613       goto elementContent;
5614     case XML_ROLE_CONTENT_ELEMENT_PLUS:
5615       quant = XML_CQUANT_PLUS;
5616     elementContent:
5617       if (dtd->in_eldecl) {
5618         ELEMENT_TYPE *el;
5619         const XML_Char *name;
5620         size_t nameLen;
5621         const char *nxt
5622             = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5623         int myindex = nextScaffoldPart(parser);
5624         if (myindex < 0)
5625           return XML_ERROR_NO_MEMORY;
5626         dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5627         dtd->scaffold[myindex].quant = quant;
5628         el = getElementType(parser, enc, s, nxt);
5629         if (! el)
5630           return XML_ERROR_NO_MEMORY;
5631         name = el->name;
5632         dtd->scaffold[myindex].name = name;
5633         nameLen = 0;
5634         for (; name[nameLen++];)
5635           ;
5636 
5637         /* Detect and prevent integer overflow */
5638         if (nameLen > UINT_MAX - dtd->contentStringLen) {
5639           return XML_ERROR_NO_MEMORY;
5640         }
5641 
5642         dtd->contentStringLen += (unsigned)nameLen;
5643         if (parser->m_elementDeclHandler)
5644           handleDefault = XML_FALSE;
5645       }
5646       break;
5647 
5648     case XML_ROLE_GROUP_CLOSE:
5649       quant = XML_CQUANT_NONE;
5650       goto closeGroup;
5651     case XML_ROLE_GROUP_CLOSE_OPT:
5652       quant = XML_CQUANT_OPT;
5653       goto closeGroup;
5654     case XML_ROLE_GROUP_CLOSE_REP:
5655       quant = XML_CQUANT_REP;
5656       goto closeGroup;
5657     case XML_ROLE_GROUP_CLOSE_PLUS:
5658       quant = XML_CQUANT_PLUS;
5659     closeGroup:
5660       if (dtd->in_eldecl) {
5661         if (parser->m_elementDeclHandler)
5662           handleDefault = XML_FALSE;
5663         dtd->scaffLevel--;
5664         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5665         if (dtd->scaffLevel == 0) {
5666           if (! handleDefault) {
5667             XML_Content *model = build_model(parser);
5668             if (! model)
5669               return XML_ERROR_NO_MEMORY;
5670             *eventEndPP = s;
5671             parser->m_elementDeclHandler(
5672                 parser->m_handlerArg, parser->m_declElementType->name, model);
5673           }
5674           dtd->in_eldecl = XML_FALSE;
5675           dtd->contentStringLen = 0;
5676         }
5677       }
5678       break;
5679       /* End element declaration stuff */
5680 
5681     case XML_ROLE_PI:
5682       if (! reportProcessingInstruction(parser, enc, s, next))
5683         return XML_ERROR_NO_MEMORY;
5684       handleDefault = XML_FALSE;
5685       break;
5686     case XML_ROLE_COMMENT:
5687       if (! reportComment(parser, enc, s, next))
5688         return XML_ERROR_NO_MEMORY;
5689       handleDefault = XML_FALSE;
5690       break;
5691     case XML_ROLE_NONE:
5692       switch (tok) {
5693       case XML_TOK_BOM:
5694         handleDefault = XML_FALSE;
5695         break;
5696       }
5697       break;
5698     case XML_ROLE_DOCTYPE_NONE:
5699       if (parser->m_startDoctypeDeclHandler)
5700         handleDefault = XML_FALSE;
5701       break;
5702     case XML_ROLE_ENTITY_NONE:
5703       if (dtd->keepProcessing && parser->m_entityDeclHandler)
5704         handleDefault = XML_FALSE;
5705       break;
5706     case XML_ROLE_NOTATION_NONE:
5707       if (parser->m_notationDeclHandler)
5708         handleDefault = XML_FALSE;
5709       break;
5710     case XML_ROLE_ATTLIST_NONE:
5711       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5712         handleDefault = XML_FALSE;
5713       break;
5714     case XML_ROLE_ELEMENT_NONE:
5715       if (parser->m_elementDeclHandler)
5716         handleDefault = XML_FALSE;
5717       break;
5718     } /* end of big switch */
5719 
5720     if (handleDefault && parser->m_defaultHandler)
5721       reportDefault(parser, enc, s, next);
5722 
5723     switch (parser->m_parsingStatus.parsing) {
5724     case XML_SUSPENDED:
5725       *nextPtr = next;
5726       return XML_ERROR_NONE;
5727     case XML_FINISHED:
5728       return XML_ERROR_ABORTED;
5729     default:
5730       s = next;
5731       tok = XmlPrologTok(enc, s, end, &next);
5732     }
5733   }
5734   /* not reached */
5735 }
5736 
5737 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5738 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5739                 const char **nextPtr) {
5740   parser->m_processor = epilogProcessor;
5741   parser->m_eventPtr = s;
5742   for (;;) {
5743     const char *next = NULL;
5744     int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5745 #if XML_GE == 1
5746     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5747                                   XML_ACCOUNT_DIRECT)) {
5748       accountingOnAbort(parser);
5749       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5750     }
5751 #endif
5752     parser->m_eventEndPtr = next;
5753     switch (tok) {
5754     /* report partial linebreak - it might be the last token */
5755     case -XML_TOK_PROLOG_S:
5756       if (parser->m_defaultHandler) {
5757         reportDefault(parser, parser->m_encoding, s, next);
5758         if (parser->m_parsingStatus.parsing == XML_FINISHED)
5759           return XML_ERROR_ABORTED;
5760       }
5761       *nextPtr = next;
5762       return XML_ERROR_NONE;
5763     case XML_TOK_NONE:
5764       *nextPtr = s;
5765       return XML_ERROR_NONE;
5766     case XML_TOK_PROLOG_S:
5767       if (parser->m_defaultHandler)
5768         reportDefault(parser, parser->m_encoding, s, next);
5769       break;
5770     case XML_TOK_PI:
5771       if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5772         return XML_ERROR_NO_MEMORY;
5773       break;
5774     case XML_TOK_COMMENT:
5775       if (! reportComment(parser, parser->m_encoding, s, next))
5776         return XML_ERROR_NO_MEMORY;
5777       break;
5778     case XML_TOK_INVALID:
5779       parser->m_eventPtr = next;
5780       return XML_ERROR_INVALID_TOKEN;
5781     case XML_TOK_PARTIAL:
5782       if (! parser->m_parsingStatus.finalBuffer) {
5783         *nextPtr = s;
5784         return XML_ERROR_NONE;
5785       }
5786       return XML_ERROR_UNCLOSED_TOKEN;
5787     case XML_TOK_PARTIAL_CHAR:
5788       if (! parser->m_parsingStatus.finalBuffer) {
5789         *nextPtr = s;
5790         return XML_ERROR_NONE;
5791       }
5792       return XML_ERROR_PARTIAL_CHAR;
5793     default:
5794       return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5795     }
5796     parser->m_eventPtr = s = next;
5797     switch (parser->m_parsingStatus.parsing) {
5798     case XML_SUSPENDED:
5799       *nextPtr = next;
5800       return XML_ERROR_NONE;
5801     case XML_FINISHED:
5802       return XML_ERROR_ABORTED;
5803     default:;
5804     }
5805   }
5806 }
5807 
5808 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5809 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5810   const char *textStart, *textEnd;
5811   const char *next;
5812   enum XML_Error result;
5813   OPEN_INTERNAL_ENTITY *openEntity;
5814 
5815   if (parser->m_freeInternalEntities) {
5816     openEntity = parser->m_freeInternalEntities;
5817     parser->m_freeInternalEntities = openEntity->next;
5818   } else {
5819     openEntity
5820         = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5821     if (! openEntity)
5822       return XML_ERROR_NO_MEMORY;
5823   }
5824   entity->open = XML_TRUE;
5825 #if XML_GE == 1
5826   entityTrackingOnOpen(parser, entity, __LINE__);
5827 #endif
5828   entity->processed = 0;
5829   openEntity->next = parser->m_openInternalEntities;
5830   parser->m_openInternalEntities = openEntity;
5831   openEntity->entity = entity;
5832   openEntity->startTagLevel = parser->m_tagLevel;
5833   openEntity->betweenDecl = betweenDecl;
5834   openEntity->internalEventPtr = NULL;
5835   openEntity->internalEventEndPtr = NULL;
5836   textStart = (const char *)entity->textPtr;
5837   textEnd = (const char *)(entity->textPtr + entity->textLen);
5838   /* Set a safe default value in case 'next' does not get set */
5839   next = textStart;
5840 
5841 #ifdef XML_DTD
5842   if (entity->is_param) {
5843     int tok
5844         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5845     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5846                       tok, next, &next, XML_FALSE, XML_FALSE,
5847                       XML_ACCOUNT_ENTITY_EXPANSION);
5848   } else
5849 #endif /* XML_DTD */
5850     result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5851                        textStart, textEnd, &next, XML_FALSE,
5852                        XML_ACCOUNT_ENTITY_EXPANSION);
5853 
5854   if (result == XML_ERROR_NONE) {
5855     if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5856       entity->processed = (int)(next - textStart);
5857       parser->m_processor = internalEntityProcessor;
5858     } else if (parser->m_openInternalEntities->entity == entity) {
5859 #if XML_GE == 1
5860       entityTrackingOnClose(parser, entity, __LINE__);
5861 #endif /* XML_GE == 1 */
5862       entity->open = XML_FALSE;
5863       parser->m_openInternalEntities = openEntity->next;
5864       /* put openEntity back in list of free instances */
5865       openEntity->next = parser->m_freeInternalEntities;
5866       parser->m_freeInternalEntities = openEntity;
5867     }
5868   }
5869   return result;
5870 }
5871 
5872 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5873 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5874                         const char **nextPtr) {
5875   ENTITY *entity;
5876   const char *textStart, *textEnd;
5877   const char *next;
5878   enum XML_Error result;
5879   OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5880   if (! openEntity)
5881     return XML_ERROR_UNEXPECTED_STATE;
5882 
5883   entity = openEntity->entity;
5884   textStart = ((const char *)entity->textPtr) + entity->processed;
5885   textEnd = (const char *)(entity->textPtr + entity->textLen);
5886   /* Set a safe default value in case 'next' does not get set */
5887   next = textStart;
5888 
5889 #ifdef XML_DTD
5890   if (entity->is_param) {
5891     int tok
5892         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5893     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5894                       tok, next, &next, XML_FALSE, XML_TRUE,
5895                       XML_ACCOUNT_ENTITY_EXPANSION);
5896   } else
5897 #endif /* XML_DTD */
5898     result = doContent(parser, openEntity->startTagLevel,
5899                        parser->m_internalEncoding, textStart, textEnd, &next,
5900                        XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5901 
5902   if (result != XML_ERROR_NONE)
5903     return result;
5904 
5905   if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5906     entity->processed = (int)(next - (const char *)entity->textPtr);
5907     return result;
5908   }
5909 
5910 #if XML_GE == 1
5911   entityTrackingOnClose(parser, entity, __LINE__);
5912 #endif
5913   entity->open = XML_FALSE;
5914   parser->m_openInternalEntities = openEntity->next;
5915   /* put openEntity back in list of free instances */
5916   openEntity->next = parser->m_freeInternalEntities;
5917   parser->m_freeInternalEntities = openEntity;
5918 
5919   // If there are more open entities we want to stop right here and have the
5920   // upcoming call to XML_ResumeParser continue with entity content, or it would
5921   // be ignored altogether.
5922   if (parser->m_openInternalEntities != NULL
5923       && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5924     return XML_ERROR_NONE;
5925   }
5926 
5927 #ifdef XML_DTD
5928   if (entity->is_param) {
5929     int tok;
5930     parser->m_processor = prologProcessor;
5931     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5932     return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5933                     (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5934                     XML_ACCOUNT_DIRECT);
5935   } else
5936 #endif /* XML_DTD */
5937   {
5938     parser->m_processor = contentProcessor;
5939     /* see externalEntityContentProcessor vs contentProcessor */
5940     result = doContent(parser, parser->m_parentParser ? 1 : 0,
5941                        parser->m_encoding, s, end, nextPtr,
5942                        (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5943                        XML_ACCOUNT_DIRECT);
5944     if (result == XML_ERROR_NONE) {
5945       if (! storeRawNames(parser))
5946         return XML_ERROR_NO_MEMORY;
5947     }
5948     return result;
5949   }
5950 }
5951 
5952 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5953 errorProcessor(XML_Parser parser, const char *s, const char *end,
5954                const char **nextPtr) {
5955   UNUSED_P(s);
5956   UNUSED_P(end);
5957   UNUSED_P(nextPtr);
5958   return parser->m_errorCode;
5959 }
5960 
5961 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5962 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5963                     const char *ptr, const char *end, STRING_POOL *pool,
5964                     enum XML_Account account) {
5965   enum XML_Error result
5966       = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5967   if (result)
5968     return result;
5969   if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5970     poolChop(pool);
5971   if (! poolAppendChar(pool, XML_T('\0')))
5972     return XML_ERROR_NO_MEMORY;
5973   return XML_ERROR_NONE;
5974 }
5975 
5976 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5977 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5978                      const char *ptr, const char *end, STRING_POOL *pool,
5979                      enum XML_Account account) {
5980   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5981 #ifndef XML_DTD
5982   UNUSED_P(account);
5983 #endif
5984 
5985   for (;;) {
5986     const char *next
5987         = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5988     int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5989 #if XML_GE == 1
5990     if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5991       accountingOnAbort(parser);
5992       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5993     }
5994 #endif
5995     switch (tok) {
5996     case XML_TOK_NONE:
5997       return XML_ERROR_NONE;
5998     case XML_TOK_INVALID:
5999       if (enc == parser->m_encoding)
6000         parser->m_eventPtr = next;
6001       return XML_ERROR_INVALID_TOKEN;
6002     case XML_TOK_PARTIAL:
6003       if (enc == parser->m_encoding)
6004         parser->m_eventPtr = ptr;
6005       return XML_ERROR_INVALID_TOKEN;
6006     case XML_TOK_CHAR_REF: {
6007       XML_Char buf[XML_ENCODE_MAX];
6008       int i;
6009       int n = XmlCharRefNumber(enc, ptr);
6010       if (n < 0) {
6011         if (enc == parser->m_encoding)
6012           parser->m_eventPtr = ptr;
6013         return XML_ERROR_BAD_CHAR_REF;
6014       }
6015       if (! isCdata && n == 0x20 /* space */
6016           && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6017         break;
6018       n = XmlEncode(n, (ICHAR *)buf);
6019       /* The XmlEncode() functions can never return 0 here.  That
6020        * error return happens if the code point passed in is either
6021        * negative or greater than or equal to 0x110000.  The
6022        * XmlCharRefNumber() functions will all return a number
6023        * strictly less than 0x110000 or a negative value if an error
6024        * occurred.  The negative value is intercepted above, so
6025        * XmlEncode() is never passed a value it might return an
6026        * error for.
6027        */
6028       for (i = 0; i < n; i++) {
6029         if (! poolAppendChar(pool, buf[i]))
6030           return XML_ERROR_NO_MEMORY;
6031       }
6032     } break;
6033     case XML_TOK_DATA_CHARS:
6034       if (! poolAppend(pool, enc, ptr, next))
6035         return XML_ERROR_NO_MEMORY;
6036       break;
6037     case XML_TOK_TRAILING_CR:
6038       next = ptr + enc->minBytesPerChar;
6039       /* fall through */
6040     case XML_TOK_ATTRIBUTE_VALUE_S:
6041     case XML_TOK_DATA_NEWLINE:
6042       if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6043         break;
6044       if (! poolAppendChar(pool, 0x20))
6045         return XML_ERROR_NO_MEMORY;
6046       break;
6047     case XML_TOK_ENTITY_REF: {
6048       const XML_Char *name;
6049       ENTITY *entity;
6050       char checkEntityDecl;
6051       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6052           enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6053       if (ch) {
6054 #if XML_GE == 1
6055         /* NOTE: We are replacing 4-6 characters original input for 1 character
6056          *       so there is no amplification and hence recording without
6057          *       protection. */
6058         accountingDiffTolerated(parser, tok, (char *)&ch,
6059                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6060                                 XML_ACCOUNT_ENTITY_EXPANSION);
6061 #endif /* XML_GE == 1 */
6062         if (! poolAppendChar(pool, ch))
6063           return XML_ERROR_NO_MEMORY;
6064         break;
6065       }
6066       name = poolStoreString(&parser->m_temp2Pool, enc,
6067                              ptr + enc->minBytesPerChar,
6068                              next - enc->minBytesPerChar);
6069       if (! name)
6070         return XML_ERROR_NO_MEMORY;
6071       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6072       poolDiscard(&parser->m_temp2Pool);
6073       /* First, determine if a check for an existing declaration is needed;
6074          if yes, check that the entity exists, and that it is internal.
6075       */
6076       if (pool == &dtd->pool) /* are we called from prolog? */
6077         checkEntityDecl =
6078 #ifdef XML_DTD
6079             parser->m_prologState.documentEntity &&
6080 #endif /* XML_DTD */
6081             (dtd->standalone ? ! parser->m_openInternalEntities
6082                              : ! dtd->hasParamEntityRefs);
6083       else /* if (pool == &parser->m_tempPool): we are called from content */
6084         checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6085       if (checkEntityDecl) {
6086         if (! entity)
6087           return XML_ERROR_UNDEFINED_ENTITY;
6088         else if (! entity->is_internal)
6089           return XML_ERROR_ENTITY_DECLARED_IN_PE;
6090       } else if (! entity) {
6091         /* Cannot report skipped entity here - see comments on
6092            parser->m_skippedEntityHandler.
6093         if (parser->m_skippedEntityHandler)
6094           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6095         */
6096         /* Cannot call the default handler because this would be
6097            out of sync with the call to the startElementHandler.
6098         if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6099           reportDefault(parser, enc, ptr, next);
6100         */
6101         break;
6102       }
6103       if (entity->open) {
6104         if (enc == parser->m_encoding) {
6105           /* It does not appear that this line can be executed.
6106            *
6107            * The "if (entity->open)" check catches recursive entity
6108            * definitions.  In order to be called with an open
6109            * entity, it must have gone through this code before and
6110            * been through the recursive call to
6111            * appendAttributeValue() some lines below.  That call
6112            * sets the local encoding ("enc") to the parser's
6113            * internal encoding (internal_utf8 or internal_utf16),
6114            * which can never be the same as the principle encoding.
6115            * It doesn't appear there is another code path that gets
6116            * here with entity->open being TRUE.
6117            *
6118            * Since it is not certain that this logic is watertight,
6119            * we keep the line and merely exclude it from coverage
6120            * tests.
6121            */
6122           parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6123         }
6124         return XML_ERROR_RECURSIVE_ENTITY_REF;
6125       }
6126       if (entity->notation) {
6127         if (enc == parser->m_encoding)
6128           parser->m_eventPtr = ptr;
6129         return XML_ERROR_BINARY_ENTITY_REF;
6130       }
6131       if (! entity->textPtr) {
6132         if (enc == parser->m_encoding)
6133           parser->m_eventPtr = ptr;
6134         return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6135       } else {
6136         enum XML_Error result;
6137         const XML_Char *textEnd = entity->textPtr + entity->textLen;
6138         entity->open = XML_TRUE;
6139 #if XML_GE == 1
6140         entityTrackingOnOpen(parser, entity, __LINE__);
6141 #endif
6142         result = appendAttributeValue(parser, parser->m_internalEncoding,
6143                                       isCdata, (const char *)entity->textPtr,
6144                                       (const char *)textEnd, pool,
6145                                       XML_ACCOUNT_ENTITY_EXPANSION);
6146 #if XML_GE == 1
6147         entityTrackingOnClose(parser, entity, __LINE__);
6148 #endif
6149         entity->open = XML_FALSE;
6150         if (result)
6151           return result;
6152       }
6153     } break;
6154     default:
6155       /* The only token returned by XmlAttributeValueTok() that does
6156        * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6157        * Getting that would require an entity name to contain an
6158        * incomplete XML character (e.g. \xE2\x82); however previous
6159        * tokenisers will have already recognised and rejected such
6160        * names before XmlAttributeValueTok() gets a look-in.  This
6161        * default case should be retained as a safety net, but the code
6162        * excluded from coverage tests.
6163        *
6164        * LCOV_EXCL_START
6165        */
6166       if (enc == parser->m_encoding)
6167         parser->m_eventPtr = ptr;
6168       return XML_ERROR_UNEXPECTED_STATE;
6169       /* LCOV_EXCL_STOP */
6170     }
6171     ptr = next;
6172   }
6173   /* not reached */
6174 }
6175 
6176 #if XML_GE == 1
6177 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6178 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6179                  const char *entityTextPtr, const char *entityTextEnd,
6180                  enum XML_Account account) {
6181   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6182   STRING_POOL *pool = &(dtd->entityValuePool);
6183   enum XML_Error result = XML_ERROR_NONE;
6184 #  ifdef XML_DTD
6185   int oldInEntityValue = parser->m_prologState.inEntityValue;
6186   parser->m_prologState.inEntityValue = 1;
6187 #  else
6188   UNUSED_P(account);
6189 #  endif /* XML_DTD */
6190   /* never return Null for the value argument in EntityDeclHandler,
6191      since this would indicate an external entity; therefore we
6192      have to make sure that entityValuePool.start is not null */
6193   if (! pool->blocks) {
6194     if (! poolGrow(pool))
6195       return XML_ERROR_NO_MEMORY;
6196   }
6197 
6198   for (;;) {
6199     const char *next
6200         = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6201     int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6202 
6203     if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6204                                   account)) {
6205       accountingOnAbort(parser);
6206       result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6207       goto endEntityValue;
6208     }
6209 
6210     switch (tok) {
6211     case XML_TOK_PARAM_ENTITY_REF:
6212 #  ifdef XML_DTD
6213       if (parser->m_isParamEntity || enc != parser->m_encoding) {
6214         const XML_Char *name;
6215         ENTITY *entity;
6216         name = poolStoreString(&parser->m_tempPool, enc,
6217                                entityTextPtr + enc->minBytesPerChar,
6218                                next - enc->minBytesPerChar);
6219         if (! name) {
6220           result = XML_ERROR_NO_MEMORY;
6221           goto endEntityValue;
6222         }
6223         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6224         poolDiscard(&parser->m_tempPool);
6225         if (! entity) {
6226           /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6227           /* cannot report skipped entity here - see comments on
6228              parser->m_skippedEntityHandler
6229           if (parser->m_skippedEntityHandler)
6230             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6231           */
6232           dtd->keepProcessing = dtd->standalone;
6233           goto endEntityValue;
6234         }
6235         if (entity->open) {
6236           if (enc == parser->m_encoding)
6237             parser->m_eventPtr = entityTextPtr;
6238           result = XML_ERROR_RECURSIVE_ENTITY_REF;
6239           goto endEntityValue;
6240         }
6241         if (entity->systemId) {
6242           if (parser->m_externalEntityRefHandler) {
6243             dtd->paramEntityRead = XML_FALSE;
6244             entity->open = XML_TRUE;
6245             entityTrackingOnOpen(parser, entity, __LINE__);
6246             if (! parser->m_externalEntityRefHandler(
6247                     parser->m_externalEntityRefHandlerArg, 0, entity->base,
6248                     entity->systemId, entity->publicId)) {
6249               entityTrackingOnClose(parser, entity, __LINE__);
6250               entity->open = XML_FALSE;
6251               result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6252               goto endEntityValue;
6253             }
6254             entityTrackingOnClose(parser, entity, __LINE__);
6255             entity->open = XML_FALSE;
6256             if (! dtd->paramEntityRead)
6257               dtd->keepProcessing = dtd->standalone;
6258           } else
6259             dtd->keepProcessing = dtd->standalone;
6260         } else {
6261           entity->open = XML_TRUE;
6262           entityTrackingOnOpen(parser, entity, __LINE__);
6263           result = storeEntityValue(
6264               parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6265               (const char *)(entity->textPtr + entity->textLen),
6266               XML_ACCOUNT_ENTITY_EXPANSION);
6267           entityTrackingOnClose(parser, entity, __LINE__);
6268           entity->open = XML_FALSE;
6269           if (result)
6270             goto endEntityValue;
6271         }
6272         break;
6273       }
6274 #  endif /* XML_DTD */
6275       /* In the internal subset, PE references are not legal
6276          within markup declarations, e.g entity values in this case. */
6277       parser->m_eventPtr = entityTextPtr;
6278       result = XML_ERROR_PARAM_ENTITY_REF;
6279       goto endEntityValue;
6280     case XML_TOK_NONE:
6281       result = XML_ERROR_NONE;
6282       goto endEntityValue;
6283     case XML_TOK_ENTITY_REF:
6284     case XML_TOK_DATA_CHARS:
6285       if (! poolAppend(pool, enc, entityTextPtr, next)) {
6286         result = XML_ERROR_NO_MEMORY;
6287         goto endEntityValue;
6288       }
6289       break;
6290     case XML_TOK_TRAILING_CR:
6291       next = entityTextPtr + enc->minBytesPerChar;
6292       /* fall through */
6293     case XML_TOK_DATA_NEWLINE:
6294       if (pool->end == pool->ptr && ! poolGrow(pool)) {
6295         result = XML_ERROR_NO_MEMORY;
6296         goto endEntityValue;
6297       }
6298       *(pool->ptr)++ = 0xA;
6299       break;
6300     case XML_TOK_CHAR_REF: {
6301       XML_Char buf[XML_ENCODE_MAX];
6302       int i;
6303       int n = XmlCharRefNumber(enc, entityTextPtr);
6304       if (n < 0) {
6305         if (enc == parser->m_encoding)
6306           parser->m_eventPtr = entityTextPtr;
6307         result = XML_ERROR_BAD_CHAR_REF;
6308         goto endEntityValue;
6309       }
6310       n = XmlEncode(n, (ICHAR *)buf);
6311       /* The XmlEncode() functions can never return 0 here.  That
6312        * error return happens if the code point passed in is either
6313        * negative or greater than or equal to 0x110000.  The
6314        * XmlCharRefNumber() functions will all return a number
6315        * strictly less than 0x110000 or a negative value if an error
6316        * occurred.  The negative value is intercepted above, so
6317        * XmlEncode() is never passed a value it might return an
6318        * error for.
6319        */
6320       for (i = 0; i < n; i++) {
6321         if (pool->end == pool->ptr && ! poolGrow(pool)) {
6322           result = XML_ERROR_NO_MEMORY;
6323           goto endEntityValue;
6324         }
6325         *(pool->ptr)++ = buf[i];
6326       }
6327     } break;
6328     case XML_TOK_PARTIAL:
6329       if (enc == parser->m_encoding)
6330         parser->m_eventPtr = entityTextPtr;
6331       result = XML_ERROR_INVALID_TOKEN;
6332       goto endEntityValue;
6333     case XML_TOK_INVALID:
6334       if (enc == parser->m_encoding)
6335         parser->m_eventPtr = next;
6336       result = XML_ERROR_INVALID_TOKEN;
6337       goto endEntityValue;
6338     default:
6339       /* This default case should be unnecessary -- all the tokens
6340        * that XmlEntityValueTok() can return have their own explicit
6341        * cases -- but should be retained for safety.  We do however
6342        * exclude it from the coverage statistics.
6343        *
6344        * LCOV_EXCL_START
6345        */
6346       if (enc == parser->m_encoding)
6347         parser->m_eventPtr = entityTextPtr;
6348       result = XML_ERROR_UNEXPECTED_STATE;
6349       goto endEntityValue;
6350       /* LCOV_EXCL_STOP */
6351     }
6352     entityTextPtr = next;
6353   }
6354 endEntityValue:
6355 #  ifdef XML_DTD
6356   parser->m_prologState.inEntityValue = oldInEntityValue;
6357 #  endif /* XML_DTD */
6358   return result;
6359 }
6360 
6361 #else /* XML_GE == 0 */
6362 
6363 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6364 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6365   // This will store "&amp;entity123;" in entity->textPtr
6366   // to end up as "&entity123;" in the handler.
6367   const char *const entity_start = "&amp;";
6368   const char *const entity_end = ";";
6369 
6370   STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6371   if (! poolAppendString(pool, entity_start)
6372       || ! poolAppendString(pool, entity->name)
6373       || ! poolAppendString(pool, entity_end)) {
6374     poolDiscard(pool);
6375     return XML_ERROR_NO_MEMORY;
6376   }
6377 
6378   entity->textPtr = poolStart(pool);
6379   entity->textLen = (int)(poolLength(pool));
6380   poolFinish(pool);
6381 
6382   return XML_ERROR_NONE;
6383 }
6384 
6385 #endif /* XML_GE == 0 */
6386 
6387 static void FASTCALL
normalizeLines(XML_Char * s)6388 normalizeLines(XML_Char *s) {
6389   XML_Char *p;
6390   for (;; s++) {
6391     if (*s == XML_T('\0'))
6392       return;
6393     if (*s == 0xD)
6394       break;
6395   }
6396   p = s;
6397   do {
6398     if (*s == 0xD) {
6399       *p++ = 0xA;
6400       if (*++s == 0xA)
6401         s++;
6402     } else
6403       *p++ = *s++;
6404   } while (*s);
6405   *p = XML_T('\0');
6406 }
6407 
6408 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6409 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6410                             const char *start, const char *end) {
6411   const XML_Char *target;
6412   XML_Char *data;
6413   const char *tem;
6414   if (! parser->m_processingInstructionHandler) {
6415     if (parser->m_defaultHandler)
6416       reportDefault(parser, enc, start, end);
6417     return 1;
6418   }
6419   start += enc->minBytesPerChar * 2;
6420   tem = start + XmlNameLength(enc, start);
6421   target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6422   if (! target)
6423     return 0;
6424   poolFinish(&parser->m_tempPool);
6425   data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6426                          end - enc->minBytesPerChar * 2);
6427   if (! data)
6428     return 0;
6429   normalizeLines(data);
6430   parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6431   poolClear(&parser->m_tempPool);
6432   return 1;
6433 }
6434 
6435 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6436 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6437               const char *end) {
6438   XML_Char *data;
6439   if (! parser->m_commentHandler) {
6440     if (parser->m_defaultHandler)
6441       reportDefault(parser, enc, start, end);
6442     return 1;
6443   }
6444   data = poolStoreString(&parser->m_tempPool, enc,
6445                          start + enc->minBytesPerChar * 4,
6446                          end - enc->minBytesPerChar * 3);
6447   if (! data)
6448     return 0;
6449   normalizeLines(data);
6450   parser->m_commentHandler(parser->m_handlerArg, data);
6451   poolClear(&parser->m_tempPool);
6452   return 1;
6453 }
6454 
6455 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6456 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6457               const char *end) {
6458   if (MUST_CONVERT(enc, s)) {
6459     enum XML_Convert_Result convert_res;
6460     const char **eventPP;
6461     const char **eventEndPP;
6462     if (enc == parser->m_encoding) {
6463       eventPP = &parser->m_eventPtr;
6464       eventEndPP = &parser->m_eventEndPtr;
6465     } else {
6466       /* To get here, two things must be true; the parser must be
6467        * using a character encoding that is not the same as the
6468        * encoding passed in, and the encoding passed in must need
6469        * conversion to the internal format (UTF-8 unless XML_UNICODE
6470        * is defined).  The only occasions on which the encoding passed
6471        * in is not the same as the parser's encoding are when it is
6472        * the internal encoding (e.g. a previously defined parameter
6473        * entity, already converted to internal format).  This by
6474        * definition doesn't need conversion, so the whole branch never
6475        * gets executed.
6476        *
6477        * For safety's sake we don't delete these lines and merely
6478        * exclude them from coverage statistics.
6479        *
6480        * LCOV_EXCL_START
6481        */
6482       eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6483       eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6484       /* LCOV_EXCL_STOP */
6485     }
6486     do {
6487       ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6488       convert_res
6489           = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6490       *eventEndPP = s;
6491       parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6492                                (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6493       *eventPP = s;
6494     } while ((convert_res != XML_CONVERT_COMPLETED)
6495              && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6496   } else
6497     parser->m_defaultHandler(
6498         parser->m_handlerArg, (const XML_Char *)s,
6499         (int)((const XML_Char *)end - (const XML_Char *)s));
6500 }
6501 
6502 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6503 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6504                 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6505   DEFAULT_ATTRIBUTE *att;
6506   if (value || isId) {
6507     /* The handling of default attributes gets messed up if we have
6508        a default which duplicates a non-default. */
6509     int i;
6510     for (i = 0; i < type->nDefaultAtts; i++)
6511       if (attId == type->defaultAtts[i].id)
6512         return 1;
6513     if (isId && ! type->idAtt && ! attId->xmlns)
6514       type->idAtt = attId;
6515   }
6516   if (type->nDefaultAtts == type->allocDefaultAtts) {
6517     if (type->allocDefaultAtts == 0) {
6518       type->allocDefaultAtts = 8;
6519       type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6520           parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6521       if (! type->defaultAtts) {
6522         type->allocDefaultAtts = 0;
6523         return 0;
6524       }
6525     } else {
6526       DEFAULT_ATTRIBUTE *temp;
6527 
6528       /* Detect and prevent integer overflow */
6529       if (type->allocDefaultAtts > INT_MAX / 2) {
6530         return 0;
6531       }
6532 
6533       int count = type->allocDefaultAtts * 2;
6534 
6535       /* Detect and prevent integer overflow.
6536        * The preprocessor guard addresses the "always false" warning
6537        * from -Wtype-limits on platforms where
6538        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6539 #if UINT_MAX >= SIZE_MAX
6540       if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6541         return 0;
6542       }
6543 #endif
6544 
6545       temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6546                                           (count * sizeof(DEFAULT_ATTRIBUTE)));
6547       if (temp == NULL)
6548         return 0;
6549       type->allocDefaultAtts = count;
6550       type->defaultAtts = temp;
6551     }
6552   }
6553   att = type->defaultAtts + type->nDefaultAtts;
6554   att->id = attId;
6555   att->value = value;
6556   att->isCdata = isCdata;
6557   if (! isCdata)
6558     attId->maybeTokenized = XML_TRUE;
6559   type->nDefaultAtts += 1;
6560   return 1;
6561 }
6562 
6563 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6564 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6565   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6566   const XML_Char *name;
6567   for (name = elementType->name; *name; name++) {
6568     if (*name == XML_T(ASCII_COLON)) {
6569       PREFIX *prefix;
6570       const XML_Char *s;
6571       for (s = elementType->name; s != name; s++) {
6572         if (! poolAppendChar(&dtd->pool, *s))
6573           return 0;
6574       }
6575       if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6576         return 0;
6577       prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6578                                 sizeof(PREFIX));
6579       if (! prefix)
6580         return 0;
6581       if (prefix->name == poolStart(&dtd->pool))
6582         poolFinish(&dtd->pool);
6583       else
6584         poolDiscard(&dtd->pool);
6585       elementType->prefix = prefix;
6586       break;
6587     }
6588   }
6589   return 1;
6590 }
6591 
6592 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6593 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6594                const char *end) {
6595   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6596   ATTRIBUTE_ID *id;
6597   const XML_Char *name;
6598   if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6599     return NULL;
6600   name = poolStoreString(&dtd->pool, enc, start, end);
6601   if (! name)
6602     return NULL;
6603   /* skip quotation mark - its storage will be reused (like in name[-1]) */
6604   ++name;
6605   id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6606                               sizeof(ATTRIBUTE_ID));
6607   if (! id)
6608     return NULL;
6609   if (id->name != name)
6610     poolDiscard(&dtd->pool);
6611   else {
6612     poolFinish(&dtd->pool);
6613     if (! parser->m_ns)
6614       ;
6615     else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6616              && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6617              && name[4] == XML_T(ASCII_s)
6618              && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6619       if (name[5] == XML_T('\0'))
6620         id->prefix = &dtd->defaultPrefix;
6621       else
6622         id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6623                                       sizeof(PREFIX));
6624       id->xmlns = XML_TRUE;
6625     } else {
6626       int i;
6627       for (i = 0; name[i]; i++) {
6628         /* attributes without prefix are *not* in the default namespace */
6629         if (name[i] == XML_T(ASCII_COLON)) {
6630           int j;
6631           for (j = 0; j < i; j++) {
6632             if (! poolAppendChar(&dtd->pool, name[j]))
6633               return NULL;
6634           }
6635           if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6636             return NULL;
6637           id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6638                                         poolStart(&dtd->pool), sizeof(PREFIX));
6639           if (! id->prefix)
6640             return NULL;
6641           if (id->prefix->name == poolStart(&dtd->pool))
6642             poolFinish(&dtd->pool);
6643           else
6644             poolDiscard(&dtd->pool);
6645           break;
6646         }
6647       }
6648     }
6649   }
6650   return id;
6651 }
6652 
6653 #define CONTEXT_SEP XML_T(ASCII_FF)
6654 
6655 static const XML_Char *
getContext(XML_Parser parser)6656 getContext(XML_Parser parser) {
6657   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6658   HASH_TABLE_ITER iter;
6659   XML_Bool needSep = XML_FALSE;
6660 
6661   if (dtd->defaultPrefix.binding) {
6662     int i;
6663     int len;
6664     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6665       return NULL;
6666     len = dtd->defaultPrefix.binding->uriLen;
6667     if (parser->m_namespaceSeparator)
6668       len--;
6669     for (i = 0; i < len; i++) {
6670       if (! poolAppendChar(&parser->m_tempPool,
6671                            dtd->defaultPrefix.binding->uri[i])) {
6672         /* Because of memory caching, I don't believe this line can be
6673          * executed.
6674          *
6675          * This is part of a loop copying the default prefix binding
6676          * URI into the parser's temporary string pool.  Previously,
6677          * that URI was copied into the same string pool, with a
6678          * terminating NUL character, as part of setContext().  When
6679          * the pool was cleared, that leaves a block definitely big
6680          * enough to hold the URI on the free block list of the pool.
6681          * The URI copy in getContext() therefore cannot run out of
6682          * memory.
6683          *
6684          * If the pool is used between the setContext() and
6685          * getContext() calls, the worst it can do is leave a bigger
6686          * block on the front of the free list.  Given that this is
6687          * all somewhat inobvious and program logic can be changed, we
6688          * don't delete the line but we do exclude it from the test
6689          * coverage statistics.
6690          */
6691         return NULL; /* LCOV_EXCL_LINE */
6692       }
6693     }
6694     needSep = XML_TRUE;
6695   }
6696 
6697   hashTableIterInit(&iter, &(dtd->prefixes));
6698   for (;;) {
6699     int i;
6700     int len;
6701     const XML_Char *s;
6702     PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6703     if (! prefix)
6704       break;
6705     if (! prefix->binding) {
6706       /* This test appears to be (justifiable) paranoia.  There does
6707        * not seem to be a way of injecting a prefix without a binding
6708        * that doesn't get errored long before this function is called.
6709        * The test should remain for safety's sake, so we instead
6710        * exclude the following line from the coverage statistics.
6711        */
6712       continue; /* LCOV_EXCL_LINE */
6713     }
6714     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6715       return NULL;
6716     for (s = prefix->name; *s; s++)
6717       if (! poolAppendChar(&parser->m_tempPool, *s))
6718         return NULL;
6719     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6720       return NULL;
6721     len = prefix->binding->uriLen;
6722     if (parser->m_namespaceSeparator)
6723       len--;
6724     for (i = 0; i < len; i++)
6725       if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6726         return NULL;
6727     needSep = XML_TRUE;
6728   }
6729 
6730   hashTableIterInit(&iter, &(dtd->generalEntities));
6731   for (;;) {
6732     const XML_Char *s;
6733     ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6734     if (! e)
6735       break;
6736     if (! e->open)
6737       continue;
6738     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6739       return NULL;
6740     for (s = e->name; *s; s++)
6741       if (! poolAppendChar(&parser->m_tempPool, *s))
6742         return 0;
6743     needSep = XML_TRUE;
6744   }
6745 
6746   if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6747     return NULL;
6748   return parser->m_tempPool.start;
6749 }
6750 
6751 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6752 setContext(XML_Parser parser, const XML_Char *context) {
6753   if (context == NULL) {
6754     return XML_FALSE;
6755   }
6756 
6757   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6758   const XML_Char *s = context;
6759 
6760   while (*context != XML_T('\0')) {
6761     if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6762       ENTITY *e;
6763       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6764         return XML_FALSE;
6765       e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6766                            poolStart(&parser->m_tempPool), 0);
6767       if (e)
6768         e->open = XML_TRUE;
6769       if (*s != XML_T('\0'))
6770         s++;
6771       context = s;
6772       poolDiscard(&parser->m_tempPool);
6773     } else if (*s == XML_T(ASCII_EQUALS)) {
6774       PREFIX *prefix;
6775       if (poolLength(&parser->m_tempPool) == 0)
6776         prefix = &dtd->defaultPrefix;
6777       else {
6778         if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6779           return XML_FALSE;
6780         prefix
6781             = (PREFIX *)lookup(parser, &dtd->prefixes,
6782                                poolStart(&parser->m_tempPool), sizeof(PREFIX));
6783         if (! prefix)
6784           return XML_FALSE;
6785         if (prefix->name == poolStart(&parser->m_tempPool)) {
6786           prefix->name = poolCopyString(&dtd->pool, prefix->name);
6787           if (! prefix->name)
6788             return XML_FALSE;
6789         }
6790         poolDiscard(&parser->m_tempPool);
6791       }
6792       for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6793            context++)
6794         if (! poolAppendChar(&parser->m_tempPool, *context))
6795           return XML_FALSE;
6796       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6797         return XML_FALSE;
6798       if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6799                      &parser->m_inheritedBindings)
6800           != XML_ERROR_NONE)
6801         return XML_FALSE;
6802       poolDiscard(&parser->m_tempPool);
6803       if (*context != XML_T('\0'))
6804         ++context;
6805       s = context;
6806     } else {
6807       if (! poolAppendChar(&parser->m_tempPool, *s))
6808         return XML_FALSE;
6809       s++;
6810     }
6811   }
6812   return XML_TRUE;
6813 }
6814 
6815 static void FASTCALL
normalizePublicId(XML_Char * publicId)6816 normalizePublicId(XML_Char *publicId) {
6817   XML_Char *p = publicId;
6818   XML_Char *s;
6819   for (s = publicId; *s; s++) {
6820     switch (*s) {
6821     case 0x20:
6822     case 0xD:
6823     case 0xA:
6824       if (p != publicId && p[-1] != 0x20)
6825         *p++ = 0x20;
6826       break;
6827     default:
6828       *p++ = *s;
6829     }
6830   }
6831   if (p != publicId && p[-1] == 0x20)
6832     --p;
6833   *p = XML_T('\0');
6834 }
6835 
6836 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6837 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6838   DTD *p = ms->malloc_fcn(sizeof(DTD));
6839   if (p == NULL)
6840     return p;
6841   poolInit(&(p->pool), ms);
6842   poolInit(&(p->entityValuePool), ms);
6843   hashTableInit(&(p->generalEntities), ms);
6844   hashTableInit(&(p->elementTypes), ms);
6845   hashTableInit(&(p->attributeIds), ms);
6846   hashTableInit(&(p->prefixes), ms);
6847 #ifdef XML_DTD
6848   p->paramEntityRead = XML_FALSE;
6849   hashTableInit(&(p->paramEntities), ms);
6850 #endif /* XML_DTD */
6851   p->defaultPrefix.name = NULL;
6852   p->defaultPrefix.binding = NULL;
6853 
6854   p->in_eldecl = XML_FALSE;
6855   p->scaffIndex = NULL;
6856   p->scaffold = NULL;
6857   p->scaffLevel = 0;
6858   p->scaffSize = 0;
6859   p->scaffCount = 0;
6860   p->contentStringLen = 0;
6861 
6862   p->keepProcessing = XML_TRUE;
6863   p->hasParamEntityRefs = XML_FALSE;
6864   p->standalone = XML_FALSE;
6865   return p;
6866 }
6867 
6868 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6869 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6870   HASH_TABLE_ITER iter;
6871   hashTableIterInit(&iter, &(p->elementTypes));
6872   for (;;) {
6873     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6874     if (! e)
6875       break;
6876     if (e->allocDefaultAtts != 0)
6877       ms->free_fcn(e->defaultAtts);
6878   }
6879   hashTableClear(&(p->generalEntities));
6880 #ifdef XML_DTD
6881   p->paramEntityRead = XML_FALSE;
6882   hashTableClear(&(p->paramEntities));
6883 #endif /* XML_DTD */
6884   hashTableClear(&(p->elementTypes));
6885   hashTableClear(&(p->attributeIds));
6886   hashTableClear(&(p->prefixes));
6887   poolClear(&(p->pool));
6888   poolClear(&(p->entityValuePool));
6889   p->defaultPrefix.name = NULL;
6890   p->defaultPrefix.binding = NULL;
6891 
6892   p->in_eldecl = XML_FALSE;
6893 
6894   ms->free_fcn(p->scaffIndex);
6895   p->scaffIndex = NULL;
6896   ms->free_fcn(p->scaffold);
6897   p->scaffold = NULL;
6898 
6899   p->scaffLevel = 0;
6900   p->scaffSize = 0;
6901   p->scaffCount = 0;
6902   p->contentStringLen = 0;
6903 
6904   p->keepProcessing = XML_TRUE;
6905   p->hasParamEntityRefs = XML_FALSE;
6906   p->standalone = XML_FALSE;
6907 }
6908 
6909 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6910 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6911   HASH_TABLE_ITER iter;
6912   hashTableIterInit(&iter, &(p->elementTypes));
6913   for (;;) {
6914     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6915     if (! e)
6916       break;
6917     if (e->allocDefaultAtts != 0)
6918       ms->free_fcn(e->defaultAtts);
6919   }
6920   hashTableDestroy(&(p->generalEntities));
6921 #ifdef XML_DTD
6922   hashTableDestroy(&(p->paramEntities));
6923 #endif /* XML_DTD */
6924   hashTableDestroy(&(p->elementTypes));
6925   hashTableDestroy(&(p->attributeIds));
6926   hashTableDestroy(&(p->prefixes));
6927   poolDestroy(&(p->pool));
6928   poolDestroy(&(p->entityValuePool));
6929   if (isDocEntity) {
6930     ms->free_fcn(p->scaffIndex);
6931     ms->free_fcn(p->scaffold);
6932   }
6933   ms->free_fcn(p);
6934 }
6935 
6936 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6937    The new DTD has already been initialized.
6938 */
6939 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6940 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6941         const XML_Memory_Handling_Suite *ms) {
6942   HASH_TABLE_ITER iter;
6943 
6944   /* Copy the prefix table. */
6945 
6946   hashTableIterInit(&iter, &(oldDtd->prefixes));
6947   for (;;) {
6948     const XML_Char *name;
6949     const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6950     if (! oldP)
6951       break;
6952     name = poolCopyString(&(newDtd->pool), oldP->name);
6953     if (! name)
6954       return 0;
6955     if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6956       return 0;
6957   }
6958 
6959   hashTableIterInit(&iter, &(oldDtd->attributeIds));
6960 
6961   /* Copy the attribute id table. */
6962 
6963   for (;;) {
6964     ATTRIBUTE_ID *newA;
6965     const XML_Char *name;
6966     const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6967 
6968     if (! oldA)
6969       break;
6970     /* Remember to allocate the scratch byte before the name. */
6971     if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6972       return 0;
6973     name = poolCopyString(&(newDtd->pool), oldA->name);
6974     if (! name)
6975       return 0;
6976     ++name;
6977     newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6978                                   sizeof(ATTRIBUTE_ID));
6979     if (! newA)
6980       return 0;
6981     newA->maybeTokenized = oldA->maybeTokenized;
6982     if (oldA->prefix) {
6983       newA->xmlns = oldA->xmlns;
6984       if (oldA->prefix == &oldDtd->defaultPrefix)
6985         newA->prefix = &newDtd->defaultPrefix;
6986       else
6987         newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6988                                         oldA->prefix->name, 0);
6989     }
6990   }
6991 
6992   /* Copy the element type table. */
6993 
6994   hashTableIterInit(&iter, &(oldDtd->elementTypes));
6995 
6996   for (;;) {
6997     int i;
6998     ELEMENT_TYPE *newE;
6999     const XML_Char *name;
7000     const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7001     if (! oldE)
7002       break;
7003     name = poolCopyString(&(newDtd->pool), oldE->name);
7004     if (! name)
7005       return 0;
7006     newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7007                                   sizeof(ELEMENT_TYPE));
7008     if (! newE)
7009       return 0;
7010     if (oldE->nDefaultAtts) {
7011       newE->defaultAtts
7012           = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7013       if (! newE->defaultAtts) {
7014         return 0;
7015       }
7016     }
7017     if (oldE->idAtt)
7018       newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7019                                            oldE->idAtt->name, 0);
7020     newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7021     if (oldE->prefix)
7022       newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7023                                       oldE->prefix->name, 0);
7024     for (i = 0; i < newE->nDefaultAtts; i++) {
7025       newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7026           oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7027       newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7028       if (oldE->defaultAtts[i].value) {
7029         newE->defaultAtts[i].value
7030             = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7031         if (! newE->defaultAtts[i].value)
7032           return 0;
7033       } else
7034         newE->defaultAtts[i].value = NULL;
7035     }
7036   }
7037 
7038   /* Copy the entity tables. */
7039   if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7040                         &(oldDtd->generalEntities)))
7041     return 0;
7042 
7043 #ifdef XML_DTD
7044   if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7045                         &(oldDtd->paramEntities)))
7046     return 0;
7047   newDtd->paramEntityRead = oldDtd->paramEntityRead;
7048 #endif /* XML_DTD */
7049 
7050   newDtd->keepProcessing = oldDtd->keepProcessing;
7051   newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7052   newDtd->standalone = oldDtd->standalone;
7053 
7054   /* Don't want deep copying for scaffolding */
7055   newDtd->in_eldecl = oldDtd->in_eldecl;
7056   newDtd->scaffold = oldDtd->scaffold;
7057   newDtd->contentStringLen = oldDtd->contentStringLen;
7058   newDtd->scaffSize = oldDtd->scaffSize;
7059   newDtd->scaffLevel = oldDtd->scaffLevel;
7060   newDtd->scaffIndex = oldDtd->scaffIndex;
7061 
7062   return 1;
7063 } /* End dtdCopy */
7064 
7065 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7066 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7067                 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7068   HASH_TABLE_ITER iter;
7069   const XML_Char *cachedOldBase = NULL;
7070   const XML_Char *cachedNewBase = NULL;
7071 
7072   hashTableIterInit(&iter, oldTable);
7073 
7074   for (;;) {
7075     ENTITY *newE;
7076     const XML_Char *name;
7077     const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7078     if (! oldE)
7079       break;
7080     name = poolCopyString(newPool, oldE->name);
7081     if (! name)
7082       return 0;
7083     newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7084     if (! newE)
7085       return 0;
7086     if (oldE->systemId) {
7087       const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7088       if (! tem)
7089         return 0;
7090       newE->systemId = tem;
7091       if (oldE->base) {
7092         if (oldE->base == cachedOldBase)
7093           newE->base = cachedNewBase;
7094         else {
7095           cachedOldBase = oldE->base;
7096           tem = poolCopyString(newPool, cachedOldBase);
7097           if (! tem)
7098             return 0;
7099           cachedNewBase = newE->base = tem;
7100         }
7101       }
7102       if (oldE->publicId) {
7103         tem = poolCopyString(newPool, oldE->publicId);
7104         if (! tem)
7105           return 0;
7106         newE->publicId = tem;
7107       }
7108     } else {
7109       const XML_Char *tem
7110           = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7111       if (! tem)
7112         return 0;
7113       newE->textPtr = tem;
7114       newE->textLen = oldE->textLen;
7115     }
7116     if (oldE->notation) {
7117       const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7118       if (! tem)
7119         return 0;
7120       newE->notation = tem;
7121     }
7122     newE->is_param = oldE->is_param;
7123     newE->is_internal = oldE->is_internal;
7124   }
7125   return 1;
7126 }
7127 
7128 #define INIT_POWER 6
7129 
7130 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7131 keyeq(KEY s1, KEY s2) {
7132   for (; *s1 == *s2; s1++, s2++)
7133     if (*s1 == 0)
7134       return XML_TRUE;
7135   return XML_FALSE;
7136 }
7137 
7138 static size_t
keylen(KEY s)7139 keylen(KEY s) {
7140   size_t len = 0;
7141   for (; *s; s++, len++)
7142     ;
7143   return len;
7144 }
7145 
7146 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7147 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7148   key->k[0] = 0;
7149   key->k[1] = get_hash_secret_salt(parser);
7150 }
7151 
7152 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7153 hash(XML_Parser parser, KEY s) {
7154   struct siphash state;
7155   struct sipkey key;
7156   (void)sip24_valid;
7157   copy_salt_to_sipkey(parser, &key);
7158   sip24_init(&state, &key);
7159   sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7160   return (unsigned long)sip24_final(&state);
7161 }
7162 
7163 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7164 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7165   size_t i;
7166   if (table->size == 0) {
7167     size_t tsize;
7168     if (! createSize)
7169       return NULL;
7170     table->power = INIT_POWER;
7171     /* table->size is a power of 2 */
7172     table->size = (size_t)1 << INIT_POWER;
7173     tsize = table->size * sizeof(NAMED *);
7174     table->v = table->mem->malloc_fcn(tsize);
7175     if (! table->v) {
7176       table->size = 0;
7177       return NULL;
7178     }
7179     memset(table->v, 0, tsize);
7180     i = hash(parser, name) & ((unsigned long)table->size - 1);
7181   } else {
7182     unsigned long h = hash(parser, name);
7183     unsigned long mask = (unsigned long)table->size - 1;
7184     unsigned char step = 0;
7185     i = h & mask;
7186     while (table->v[i]) {
7187       if (keyeq(name, table->v[i]->name))
7188         return table->v[i];
7189       if (! step)
7190         step = PROBE_STEP(h, mask, table->power);
7191       i < step ? (i += table->size - step) : (i -= step);
7192     }
7193     if (! createSize)
7194       return NULL;
7195 
7196     /* check for overflow (table is half full) */
7197     if (table->used >> (table->power - 1)) {
7198       unsigned char newPower = table->power + 1;
7199 
7200       /* Detect and prevent invalid shift */
7201       if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7202         return NULL;
7203       }
7204 
7205       size_t newSize = (size_t)1 << newPower;
7206       unsigned long newMask = (unsigned long)newSize - 1;
7207 
7208       /* Detect and prevent integer overflow */
7209       if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7210         return NULL;
7211       }
7212 
7213       size_t tsize = newSize * sizeof(NAMED *);
7214       NAMED **newV = table->mem->malloc_fcn(tsize);
7215       if (! newV)
7216         return NULL;
7217       memset(newV, 0, tsize);
7218       for (i = 0; i < table->size; i++)
7219         if (table->v[i]) {
7220           unsigned long newHash = hash(parser, table->v[i]->name);
7221           size_t j = newHash & newMask;
7222           step = 0;
7223           while (newV[j]) {
7224             if (! step)
7225               step = PROBE_STEP(newHash, newMask, newPower);
7226             j < step ? (j += newSize - step) : (j -= step);
7227           }
7228           newV[j] = table->v[i];
7229         }
7230       table->mem->free_fcn(table->v);
7231       table->v = newV;
7232       table->power = newPower;
7233       table->size = newSize;
7234       i = h & newMask;
7235       step = 0;
7236       while (table->v[i]) {
7237         if (! step)
7238           step = PROBE_STEP(h, newMask, newPower);
7239         i < step ? (i += newSize - step) : (i -= step);
7240       }
7241     }
7242   }
7243   table->v[i] = table->mem->malloc_fcn(createSize);
7244   if (! table->v[i])
7245     return NULL;
7246   memset(table->v[i], 0, createSize);
7247   table->v[i]->name = name;
7248   (table->used)++;
7249   return table->v[i];
7250 }
7251 
7252 static void FASTCALL
hashTableClear(HASH_TABLE * table)7253 hashTableClear(HASH_TABLE *table) {
7254   size_t i;
7255   for (i = 0; i < table->size; i++) {
7256     table->mem->free_fcn(table->v[i]);
7257     table->v[i] = NULL;
7258   }
7259   table->used = 0;
7260 }
7261 
7262 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7263 hashTableDestroy(HASH_TABLE *table) {
7264   size_t i;
7265   for (i = 0; i < table->size; i++)
7266     table->mem->free_fcn(table->v[i]);
7267   table->mem->free_fcn(table->v);
7268 }
7269 
7270 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7271 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7272   p->power = 0;
7273   p->size = 0;
7274   p->used = 0;
7275   p->v = NULL;
7276   p->mem = ms;
7277 }
7278 
7279 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7280 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7281   iter->p = table->v;
7282   iter->end = iter->p ? iter->p + table->size : NULL;
7283 }
7284 
7285 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7286 hashTableIterNext(HASH_TABLE_ITER *iter) {
7287   while (iter->p != iter->end) {
7288     NAMED *tem = *(iter->p)++;
7289     if (tem)
7290       return tem;
7291   }
7292   return NULL;
7293 }
7294 
7295 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7296 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7297   pool->blocks = NULL;
7298   pool->freeBlocks = NULL;
7299   pool->start = NULL;
7300   pool->ptr = NULL;
7301   pool->end = NULL;
7302   pool->mem = ms;
7303 }
7304 
7305 static void FASTCALL
poolClear(STRING_POOL * pool)7306 poolClear(STRING_POOL *pool) {
7307   if (! pool->freeBlocks)
7308     pool->freeBlocks = pool->blocks;
7309   else {
7310     BLOCK *p = pool->blocks;
7311     while (p) {
7312       BLOCK *tem = p->next;
7313       p->next = pool->freeBlocks;
7314       pool->freeBlocks = p;
7315       p = tem;
7316     }
7317   }
7318   pool->blocks = NULL;
7319   pool->start = NULL;
7320   pool->ptr = NULL;
7321   pool->end = NULL;
7322 }
7323 
7324 static void FASTCALL
poolDestroy(STRING_POOL * pool)7325 poolDestroy(STRING_POOL *pool) {
7326   BLOCK *p = pool->blocks;
7327   while (p) {
7328     BLOCK *tem = p->next;
7329     pool->mem->free_fcn(p);
7330     p = tem;
7331   }
7332   p = pool->freeBlocks;
7333   while (p) {
7334     BLOCK *tem = p->next;
7335     pool->mem->free_fcn(p);
7336     p = tem;
7337   }
7338 }
7339 
7340 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7341 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7342            const char *end) {
7343   if (! pool->ptr && ! poolGrow(pool))
7344     return NULL;
7345   for (;;) {
7346     const enum XML_Convert_Result convert_res = XmlConvert(
7347         enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7348     if ((convert_res == XML_CONVERT_COMPLETED)
7349         || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7350       break;
7351     if (! poolGrow(pool))
7352       return NULL;
7353   }
7354   return pool->start;
7355 }
7356 
7357 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7358 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7359   do {
7360     if (! poolAppendChar(pool, *s))
7361       return NULL;
7362   } while (*s++);
7363   s = pool->start;
7364   poolFinish(pool);
7365   return s;
7366 }
7367 
7368 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7369 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7370   if (! pool->ptr && ! poolGrow(pool)) {
7371     /* The following line is unreachable given the current usage of
7372      * poolCopyStringN().  Currently it is called from exactly one
7373      * place to copy the text of a simple general entity.  By that
7374      * point, the name of the entity is already stored in the pool, so
7375      * pool->ptr cannot be NULL.
7376      *
7377      * If poolCopyStringN() is used elsewhere as it well might be,
7378      * this line may well become executable again.  Regardless, this
7379      * sort of check shouldn't be removed lightly, so we just exclude
7380      * it from the coverage statistics.
7381      */
7382     return NULL; /* LCOV_EXCL_LINE */
7383   }
7384   for (; n > 0; --n, s++) {
7385     if (! poolAppendChar(pool, *s))
7386       return NULL;
7387   }
7388   s = pool->start;
7389   poolFinish(pool);
7390   return s;
7391 }
7392 
7393 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7394 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7395   while (*s) {
7396     if (! poolAppendChar(pool, *s))
7397       return NULL;
7398     s++;
7399   }
7400   return pool->start;
7401 }
7402 
7403 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7404 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7405                 const char *end) {
7406   if (! poolAppend(pool, enc, ptr, end))
7407     return NULL;
7408   if (pool->ptr == pool->end && ! poolGrow(pool))
7409     return NULL;
7410   *(pool->ptr)++ = 0;
7411   return pool->start;
7412 }
7413 
7414 static size_t
poolBytesToAllocateFor(int blockSize)7415 poolBytesToAllocateFor(int blockSize) {
7416   /* Unprotected math would be:
7417   ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7418   **
7419   ** Detect overflow, avoiding _signed_ overflow undefined behavior
7420   ** For a + b * c we check b * c in isolation first, so that addition of a
7421   ** on top has no chance of making us accept a small non-negative number
7422   */
7423   const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7424 
7425   if (blockSize <= 0)
7426     return 0;
7427 
7428   if (blockSize > (int)(INT_MAX / stretch))
7429     return 0;
7430 
7431   {
7432     const int stretchedBlockSize = blockSize * (int)stretch;
7433     const int bytesToAllocate
7434         = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7435     if (bytesToAllocate < 0)
7436       return 0;
7437 
7438     return (size_t)bytesToAllocate;
7439   }
7440 }
7441 
7442 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7443 poolGrow(STRING_POOL *pool) {
7444   if (pool->freeBlocks) {
7445     if (pool->start == 0) {
7446       pool->blocks = pool->freeBlocks;
7447       pool->freeBlocks = pool->freeBlocks->next;
7448       pool->blocks->next = NULL;
7449       pool->start = pool->blocks->s;
7450       pool->end = pool->start + pool->blocks->size;
7451       pool->ptr = pool->start;
7452       return XML_TRUE;
7453     }
7454     if (pool->end - pool->start < pool->freeBlocks->size) {
7455       BLOCK *tem = pool->freeBlocks->next;
7456       pool->freeBlocks->next = pool->blocks;
7457       pool->blocks = pool->freeBlocks;
7458       pool->freeBlocks = tem;
7459       memcpy(pool->blocks->s, pool->start,
7460              (pool->end - pool->start) * sizeof(XML_Char));
7461       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7462       pool->start = pool->blocks->s;
7463       pool->end = pool->start + pool->blocks->size;
7464       return XML_TRUE;
7465     }
7466   }
7467   if (pool->blocks && pool->start == pool->blocks->s) {
7468     BLOCK *temp;
7469     int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7470     size_t bytesToAllocate;
7471 
7472     /* NOTE: Needs to be calculated prior to calling `realloc`
7473              to avoid dangling pointers: */
7474     const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7475 
7476     if (blockSize < 0) {
7477       /* This condition traps a situation where either more than
7478        * INT_MAX/2 bytes have already been allocated.  This isn't
7479        * readily testable, since it is unlikely that an average
7480        * machine will have that much memory, so we exclude it from the
7481        * coverage statistics.
7482        */
7483       return XML_FALSE; /* LCOV_EXCL_LINE */
7484     }
7485 
7486     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7487     if (bytesToAllocate == 0)
7488       return XML_FALSE;
7489 
7490     temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7491                                            (unsigned)bytesToAllocate);
7492     if (temp == NULL)
7493       return XML_FALSE;
7494     pool->blocks = temp;
7495     pool->blocks->size = blockSize;
7496     pool->ptr = pool->blocks->s + offsetInsideBlock;
7497     pool->start = pool->blocks->s;
7498     pool->end = pool->start + blockSize;
7499   } else {
7500     BLOCK *tem;
7501     int blockSize = (int)(pool->end - pool->start);
7502     size_t bytesToAllocate;
7503 
7504     if (blockSize < 0) {
7505       /* This condition traps a situation where either more than
7506        * INT_MAX bytes have already been allocated (which is prevented
7507        * by various pieces of program logic, not least this one, never
7508        * mind the unlikelihood of actually having that much memory) or
7509        * the pool control fields have been corrupted (which could
7510        * conceivably happen in an extremely buggy user handler
7511        * function).  Either way it isn't readily testable, so we
7512        * exclude it from the coverage statistics.
7513        */
7514       return XML_FALSE; /* LCOV_EXCL_LINE */
7515     }
7516 
7517     if (blockSize < INIT_BLOCK_SIZE)
7518       blockSize = INIT_BLOCK_SIZE;
7519     else {
7520       /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7521       if ((int)((unsigned)blockSize * 2U) < 0) {
7522         return XML_FALSE;
7523       }
7524       blockSize *= 2;
7525     }
7526 
7527     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7528     if (bytesToAllocate == 0)
7529       return XML_FALSE;
7530 
7531     tem = pool->mem->malloc_fcn(bytesToAllocate);
7532     if (! tem)
7533       return XML_FALSE;
7534     tem->size = blockSize;
7535     tem->next = pool->blocks;
7536     pool->blocks = tem;
7537     if (pool->ptr != pool->start)
7538       memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7539     pool->ptr = tem->s + (pool->ptr - pool->start);
7540     pool->start = tem->s;
7541     pool->end = tem->s + blockSize;
7542   }
7543   return XML_TRUE;
7544 }
7545 
7546 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7547 nextScaffoldPart(XML_Parser parser) {
7548   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7549   CONTENT_SCAFFOLD *me;
7550   int next;
7551 
7552   if (! dtd->scaffIndex) {
7553     dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7554     if (! dtd->scaffIndex)
7555       return -1;
7556     dtd->scaffIndex[0] = 0;
7557   }
7558 
7559   if (dtd->scaffCount >= dtd->scaffSize) {
7560     CONTENT_SCAFFOLD *temp;
7561     if (dtd->scaffold) {
7562       /* Detect and prevent integer overflow */
7563       if (dtd->scaffSize > UINT_MAX / 2u) {
7564         return -1;
7565       }
7566       /* Detect and prevent integer overflow.
7567        * The preprocessor guard addresses the "always false" warning
7568        * from -Wtype-limits on platforms where
7569        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7570 #if UINT_MAX >= SIZE_MAX
7571       if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7572         return -1;
7573       }
7574 #endif
7575 
7576       temp = (CONTENT_SCAFFOLD *)REALLOC(
7577           parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7578       if (temp == NULL)
7579         return -1;
7580       dtd->scaffSize *= 2;
7581     } else {
7582       temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7583                                                     * sizeof(CONTENT_SCAFFOLD));
7584       if (temp == NULL)
7585         return -1;
7586       dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7587     }
7588     dtd->scaffold = temp;
7589   }
7590   next = dtd->scaffCount++;
7591   me = &dtd->scaffold[next];
7592   if (dtd->scaffLevel) {
7593     CONTENT_SCAFFOLD *parent
7594         = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7595     if (parent->lastchild) {
7596       dtd->scaffold[parent->lastchild].nextsib = next;
7597     }
7598     if (! parent->childcnt)
7599       parent->firstchild = next;
7600     parent->lastchild = next;
7601     parent->childcnt++;
7602   }
7603   me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7604   return next;
7605 }
7606 
7607 static XML_Content *
build_model(XML_Parser parser)7608 build_model(XML_Parser parser) {
7609   /* Function build_model transforms the existing parser->m_dtd->scaffold
7610    * array of CONTENT_SCAFFOLD tree nodes into a new array of
7611    * XML_Content tree nodes followed by a gapless list of zero-terminated
7612    * strings. */
7613   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7614   XML_Content *ret;
7615   XML_Char *str; /* the current string writing location */
7616 
7617   /* Detect and prevent integer overflow.
7618    * The preprocessor guard addresses the "always false" warning
7619    * from -Wtype-limits on platforms where
7620    * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7621 #if UINT_MAX >= SIZE_MAX
7622   if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7623     return NULL;
7624   }
7625   if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7626     return NULL;
7627   }
7628 #endif
7629   if (dtd->scaffCount * sizeof(XML_Content)
7630       > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7631     return NULL;
7632   }
7633 
7634   const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7635                             + (dtd->contentStringLen * sizeof(XML_Char)));
7636 
7637   ret = (XML_Content *)MALLOC(parser, allocsize);
7638   if (! ret)
7639     return NULL;
7640 
7641   /* What follows is an iterative implementation (of what was previously done
7642    * recursively in a dedicated function called "build_node".  The old recursive
7643    * build_node could be forced into stack exhaustion from input as small as a
7644    * few megabyte, and so that was a security issue.  Hence, a function call
7645    * stack is avoided now by resolving recursion.)
7646    *
7647    * The iterative approach works as follows:
7648    *
7649    * - We have two writing pointers, both walking up the result array; one does
7650    *   the work, the other creates "jobs" for its colleague to do, and leads
7651    *   the way:
7652    *
7653    *   - The faster one, pointer jobDest, always leads and writes "what job
7654    *     to do" by the other, once they reach that place in the
7655    *     array: leader "jobDest" stores the source node array index (relative
7656    *     to array dtd->scaffold) in field "numchildren".
7657    *
7658    *   - The slower one, pointer dest, looks at the value stored in the
7659    *     "numchildren" field (which actually holds a source node array index
7660    *     at that time) and puts the real data from dtd->scaffold in.
7661    *
7662    * - Before the loop starts, jobDest writes source array index 0
7663    *   (where the root node is located) so that dest will have something to do
7664    *   when it starts operation.
7665    *
7666    * - Whenever nodes with children are encountered, jobDest appends
7667    *   them as new jobs, in order.  As a result, tree node siblings are
7668    *   adjacent in the resulting array, for example:
7669    *
7670    *     [0] root, has two children
7671    *       [1] first child of 0, has three children
7672    *         [3] first child of 1, does not have children
7673    *         [4] second child of 1, does not have children
7674    *         [5] third child of 1, does not have children
7675    *       [2] second child of 0, does not have children
7676    *
7677    *   Or (the same data) presented in flat array view:
7678    *
7679    *     [0] root, has two children
7680    *
7681    *     [1] first child of 0, has three children
7682    *     [2] second child of 0, does not have children
7683    *
7684    *     [3] first child of 1, does not have children
7685    *     [4] second child of 1, does not have children
7686    *     [5] third child of 1, does not have children
7687    *
7688    * - The algorithm repeats until all target array indices have been processed.
7689    */
7690   XML_Content *dest = ret; /* tree node writing location, moves upwards */
7691   XML_Content *const destLimit = &ret[dtd->scaffCount];
7692   XML_Content *jobDest = ret; /* next free writing location in target array */
7693   str = (XML_Char *)&ret[dtd->scaffCount];
7694 
7695   /* Add the starting job, the root node (index 0) of the source tree  */
7696   (jobDest++)->numchildren = 0;
7697 
7698   for (; dest < destLimit; dest++) {
7699     /* Retrieve source tree array index from job storage */
7700     const int src_node = (int)dest->numchildren;
7701 
7702     /* Convert item */
7703     dest->type = dtd->scaffold[src_node].type;
7704     dest->quant = dtd->scaffold[src_node].quant;
7705     if (dest->type == XML_CTYPE_NAME) {
7706       const XML_Char *src;
7707       dest->name = str;
7708       src = dtd->scaffold[src_node].name;
7709       for (;;) {
7710         *str++ = *src;
7711         if (! *src)
7712           break;
7713         src++;
7714       }
7715       dest->numchildren = 0;
7716       dest->children = NULL;
7717     } else {
7718       unsigned int i;
7719       int cn;
7720       dest->name = NULL;
7721       dest->numchildren = dtd->scaffold[src_node].childcnt;
7722       dest->children = jobDest;
7723 
7724       /* Append scaffold indices of children to array */
7725       for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7726            i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7727         (jobDest++)->numchildren = (unsigned int)cn;
7728     }
7729   }
7730 
7731   return ret;
7732 }
7733 
7734 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7735 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7736                const char *end) {
7737   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7738   const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7739   ELEMENT_TYPE *ret;
7740 
7741   if (! name)
7742     return NULL;
7743   ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7744                                sizeof(ELEMENT_TYPE));
7745   if (! ret)
7746     return NULL;
7747   if (ret->name != name)
7748     poolDiscard(&dtd->pool);
7749   else {
7750     poolFinish(&dtd->pool);
7751     if (! setElementTypePrefix(parser, ret))
7752       return NULL;
7753   }
7754   return ret;
7755 }
7756 
7757 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7758 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7759   size_t charsRequired = 0;
7760   XML_Char *result;
7761 
7762   /* First determine how long the string is */
7763   while (s[charsRequired] != 0) {
7764     charsRequired++;
7765   }
7766   /* Include the terminator */
7767   charsRequired++;
7768 
7769   /* Now allocate space for the copy */
7770   result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7771   if (result == NULL)
7772     return NULL;
7773   /* Copy the original into place */
7774   memcpy(result, s, charsRequired * sizeof(XML_Char));
7775   return result;
7776 }
7777 
7778 #if XML_GE == 1
7779 
7780 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7781 accountingGetCurrentAmplification(XML_Parser rootParser) {
7782   const XmlBigCount countBytesOutput
7783       = rootParser->m_accounting.countBytesDirect
7784         + rootParser->m_accounting.countBytesIndirect;
7785   const float amplificationFactor
7786       = rootParser->m_accounting.countBytesDirect
7787             ? (countBytesOutput
7788                / (float)(rootParser->m_accounting.countBytesDirect))
7789             : 1.0f;
7790   assert(! rootParser->m_parentParser);
7791   return amplificationFactor;
7792 }
7793 
7794 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7795 accountingReportStats(XML_Parser originParser, const char *epilog) {
7796   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7797   assert(! rootParser->m_parentParser);
7798 
7799   if (rootParser->m_accounting.debugLevel == 0u) {
7800     return;
7801   }
7802 
7803   const float amplificationFactor
7804       = accountingGetCurrentAmplification(rootParser);
7805   fprintf(stderr,
7806           "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7807               "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7808           (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7809           rootParser->m_accounting.countBytesIndirect,
7810           (double)amplificationFactor, epilog);
7811 }
7812 
7813 static void
accountingOnAbort(XML_Parser originParser)7814 accountingOnAbort(XML_Parser originParser) {
7815   accountingReportStats(originParser, " ABORTING\n");
7816 }
7817 
7818 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7819 accountingReportDiff(XML_Parser rootParser,
7820                      unsigned int levelsAwayFromRootParser, const char *before,
7821                      const char *after, ptrdiff_t bytesMore, int source_line,
7822                      enum XML_Account account) {
7823   assert(! rootParser->m_parentParser);
7824 
7825   fprintf(stderr,
7826           " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7827           bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7828           levelsAwayFromRootParser, source_line, 10, "");
7829 
7830   const char ellipis[] = "[..]";
7831   const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7832   const unsigned int contextLength = 10;
7833 
7834   /* Note: Performance is of no concern here */
7835   const char *walker = before;
7836   if ((rootParser->m_accounting.debugLevel >= 3u)
7837       || (after - before)
7838              <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7839     for (; walker < after; walker++) {
7840       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7841     }
7842   } else {
7843     for (; walker < before + contextLength; walker++) {
7844       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7845     }
7846     fprintf(stderr, ellipis);
7847     walker = after - contextLength;
7848     for (; walker < after; walker++) {
7849       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7850     }
7851   }
7852   fprintf(stderr, "\"\n");
7853 }
7854 
7855 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7856 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7857                         const char *after, int source_line,
7858                         enum XML_Account account) {
7859   /* Note: We need to check the token type *first* to be sure that
7860    *       we can even access variable <after>, safely.
7861    *       E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7862   switch (tok) {
7863   case XML_TOK_INVALID:
7864   case XML_TOK_PARTIAL:
7865   case XML_TOK_PARTIAL_CHAR:
7866   case XML_TOK_NONE:
7867     return XML_TRUE;
7868   }
7869 
7870   if (account == XML_ACCOUNT_NONE)
7871     return XML_TRUE; /* because these bytes have been accounted for, already */
7872 
7873   unsigned int levelsAwayFromRootParser;
7874   const XML_Parser rootParser
7875       = getRootParserOf(originParser, &levelsAwayFromRootParser);
7876   assert(! rootParser->m_parentParser);
7877 
7878   const int isDirect
7879       = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7880   const ptrdiff_t bytesMore = after - before;
7881 
7882   XmlBigCount *const additionTarget
7883       = isDirect ? &rootParser->m_accounting.countBytesDirect
7884                  : &rootParser->m_accounting.countBytesIndirect;
7885 
7886   /* Detect and avoid integer overflow */
7887   if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7888     return XML_FALSE;
7889   *additionTarget += bytesMore;
7890 
7891   const XmlBigCount countBytesOutput
7892       = rootParser->m_accounting.countBytesDirect
7893         + rootParser->m_accounting.countBytesIndirect;
7894   const float amplificationFactor
7895       = accountingGetCurrentAmplification(rootParser);
7896   const XML_Bool tolerated
7897       = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7898         || (amplificationFactor
7899             <= rootParser->m_accounting.maximumAmplificationFactor);
7900 
7901   if (rootParser->m_accounting.debugLevel >= 2u) {
7902     accountingReportStats(rootParser, "");
7903     accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7904                          bytesMore, source_line, account);
7905   }
7906 
7907   return tolerated;
7908 }
7909 
7910 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7911 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7912   if (! parser)
7913     return 0;
7914   return parser->m_accounting.countBytesDirect;
7915 }
7916 
7917 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7918 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7919   if (! parser)
7920     return 0;
7921   return parser->m_accounting.countBytesIndirect;
7922 }
7923 
7924 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7925 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7926                           const char *action, int sourceLine) {
7927   assert(! rootParser->m_parentParser);
7928   if (rootParser->m_entity_stats.debugLevel == 0u)
7929     return;
7930 
7931 #  if defined(XML_UNICODE)
7932   const char *const entityName = "[..]";
7933 #  else
7934   const char *const entityName = entity->name;
7935 #  endif
7936 
7937   fprintf(
7938       stderr,
7939       "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7940       (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7941       rootParser->m_entity_stats.currentDepth,
7942       rootParser->m_entity_stats.maximumDepthSeen,
7943       (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7944       entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7945       sourceLine);
7946 }
7947 
7948 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7949 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7950   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7951   assert(! rootParser->m_parentParser);
7952 
7953   rootParser->m_entity_stats.countEverOpened++;
7954   rootParser->m_entity_stats.currentDepth++;
7955   if (rootParser->m_entity_stats.currentDepth
7956       > rootParser->m_entity_stats.maximumDepthSeen) {
7957     rootParser->m_entity_stats.maximumDepthSeen++;
7958   }
7959 
7960   entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7961 }
7962 
7963 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7964 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7965   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7966   assert(! rootParser->m_parentParser);
7967 
7968   entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7969   rootParser->m_entity_stats.currentDepth--;
7970 }
7971 
7972 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7973 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7974   XML_Parser rootParser = parser;
7975   unsigned int stepsTakenUpwards = 0;
7976   while (rootParser->m_parentParser) {
7977     rootParser = rootParser->m_parentParser;
7978     stepsTakenUpwards++;
7979   }
7980   assert(! rootParser->m_parentParser);
7981   if (outLevelDiff != NULL) {
7982     *outLevelDiff = stepsTakenUpwards;
7983   }
7984   return rootParser;
7985 }
7986 
7987 const char *
unsignedCharToPrintable(unsigned char c)7988 unsignedCharToPrintable(unsigned char c) {
7989   switch (c) {
7990   case 0:
7991     return "\\0";
7992   case 1:
7993     return "\\x1";
7994   case 2:
7995     return "\\x2";
7996   case 3:
7997     return "\\x3";
7998   case 4:
7999     return "\\x4";
8000   case 5:
8001     return "\\x5";
8002   case 6:
8003     return "\\x6";
8004   case 7:
8005     return "\\x7";
8006   case 8:
8007     return "\\x8";
8008   case 9:
8009     return "\\t";
8010   case 10:
8011     return "\\n";
8012   case 11:
8013     return "\\xB";
8014   case 12:
8015     return "\\xC";
8016   case 13:
8017     return "\\r";
8018   case 14:
8019     return "\\xE";
8020   case 15:
8021     return "\\xF";
8022   case 16:
8023     return "\\x10";
8024   case 17:
8025     return "\\x11";
8026   case 18:
8027     return "\\x12";
8028   case 19:
8029     return "\\x13";
8030   case 20:
8031     return "\\x14";
8032   case 21:
8033     return "\\x15";
8034   case 22:
8035     return "\\x16";
8036   case 23:
8037     return "\\x17";
8038   case 24:
8039     return "\\x18";
8040   case 25:
8041     return "\\x19";
8042   case 26:
8043     return "\\x1A";
8044   case 27:
8045     return "\\x1B";
8046   case 28:
8047     return "\\x1C";
8048   case 29:
8049     return "\\x1D";
8050   case 30:
8051     return "\\x1E";
8052   case 31:
8053     return "\\x1F";
8054   case 32:
8055     return " ";
8056   case 33:
8057     return "!";
8058   case 34:
8059     return "\\\"";
8060   case 35:
8061     return "#";
8062   case 36:
8063     return "$";
8064   case 37:
8065     return "%";
8066   case 38:
8067     return "&";
8068   case 39:
8069     return "'";
8070   case 40:
8071     return "(";
8072   case 41:
8073     return ")";
8074   case 42:
8075     return "*";
8076   case 43:
8077     return "+";
8078   case 44:
8079     return ",";
8080   case 45:
8081     return "-";
8082   case 46:
8083     return ".";
8084   case 47:
8085     return "/";
8086   case 48:
8087     return "0";
8088   case 49:
8089     return "1";
8090   case 50:
8091     return "2";
8092   case 51:
8093     return "3";
8094   case 52:
8095     return "4";
8096   case 53:
8097     return "5";
8098   case 54:
8099     return "6";
8100   case 55:
8101     return "7";
8102   case 56:
8103     return "8";
8104   case 57:
8105     return "9";
8106   case 58:
8107     return ":";
8108   case 59:
8109     return ";";
8110   case 60:
8111     return "<";
8112   case 61:
8113     return "=";
8114   case 62:
8115     return ">";
8116   case 63:
8117     return "?";
8118   case 64:
8119     return "@";
8120   case 65:
8121     return "A";
8122   case 66:
8123     return "B";
8124   case 67:
8125     return "C";
8126   case 68:
8127     return "D";
8128   case 69:
8129     return "E";
8130   case 70:
8131     return "F";
8132   case 71:
8133     return "G";
8134   case 72:
8135     return "H";
8136   case 73:
8137     return "I";
8138   case 74:
8139     return "J";
8140   case 75:
8141     return "K";
8142   case 76:
8143     return "L";
8144   case 77:
8145     return "M";
8146   case 78:
8147     return "N";
8148   case 79:
8149     return "O";
8150   case 80:
8151     return "P";
8152   case 81:
8153     return "Q";
8154   case 82:
8155     return "R";
8156   case 83:
8157     return "S";
8158   case 84:
8159     return "T";
8160   case 85:
8161     return "U";
8162   case 86:
8163     return "V";
8164   case 87:
8165     return "W";
8166   case 88:
8167     return "X";
8168   case 89:
8169     return "Y";
8170   case 90:
8171     return "Z";
8172   case 91:
8173     return "[";
8174   case 92:
8175     return "\\\\";
8176   case 93:
8177     return "]";
8178   case 94:
8179     return "^";
8180   case 95:
8181     return "_";
8182   case 96:
8183     return "`";
8184   case 97:
8185     return "a";
8186   case 98:
8187     return "b";
8188   case 99:
8189     return "c";
8190   case 100:
8191     return "d";
8192   case 101:
8193     return "e";
8194   case 102:
8195     return "f";
8196   case 103:
8197     return "g";
8198   case 104:
8199     return "h";
8200   case 105:
8201     return "i";
8202   case 106:
8203     return "j";
8204   case 107:
8205     return "k";
8206   case 108:
8207     return "l";
8208   case 109:
8209     return "m";
8210   case 110:
8211     return "n";
8212   case 111:
8213     return "o";
8214   case 112:
8215     return "p";
8216   case 113:
8217     return "q";
8218   case 114:
8219     return "r";
8220   case 115:
8221     return "s";
8222   case 116:
8223     return "t";
8224   case 117:
8225     return "u";
8226   case 118:
8227     return "v";
8228   case 119:
8229     return "w";
8230   case 120:
8231     return "x";
8232   case 121:
8233     return "y";
8234   case 122:
8235     return "z";
8236   case 123:
8237     return "{";
8238   case 124:
8239     return "|";
8240   case 125:
8241     return "}";
8242   case 126:
8243     return "~";
8244   case 127:
8245     return "\\x7F";
8246   case 128:
8247     return "\\x80";
8248   case 129:
8249     return "\\x81";
8250   case 130:
8251     return "\\x82";
8252   case 131:
8253     return "\\x83";
8254   case 132:
8255     return "\\x84";
8256   case 133:
8257     return "\\x85";
8258   case 134:
8259     return "\\x86";
8260   case 135:
8261     return "\\x87";
8262   case 136:
8263     return "\\x88";
8264   case 137:
8265     return "\\x89";
8266   case 138:
8267     return "\\x8A";
8268   case 139:
8269     return "\\x8B";
8270   case 140:
8271     return "\\x8C";
8272   case 141:
8273     return "\\x8D";
8274   case 142:
8275     return "\\x8E";
8276   case 143:
8277     return "\\x8F";
8278   case 144:
8279     return "\\x90";
8280   case 145:
8281     return "\\x91";
8282   case 146:
8283     return "\\x92";
8284   case 147:
8285     return "\\x93";
8286   case 148:
8287     return "\\x94";
8288   case 149:
8289     return "\\x95";
8290   case 150:
8291     return "\\x96";
8292   case 151:
8293     return "\\x97";
8294   case 152:
8295     return "\\x98";
8296   case 153:
8297     return "\\x99";
8298   case 154:
8299     return "\\x9A";
8300   case 155:
8301     return "\\x9B";
8302   case 156:
8303     return "\\x9C";
8304   case 157:
8305     return "\\x9D";
8306   case 158:
8307     return "\\x9E";
8308   case 159:
8309     return "\\x9F";
8310   case 160:
8311     return "\\xA0";
8312   case 161:
8313     return "\\xA1";
8314   case 162:
8315     return "\\xA2";
8316   case 163:
8317     return "\\xA3";
8318   case 164:
8319     return "\\xA4";
8320   case 165:
8321     return "\\xA5";
8322   case 166:
8323     return "\\xA6";
8324   case 167:
8325     return "\\xA7";
8326   case 168:
8327     return "\\xA8";
8328   case 169:
8329     return "\\xA9";
8330   case 170:
8331     return "\\xAA";
8332   case 171:
8333     return "\\xAB";
8334   case 172:
8335     return "\\xAC";
8336   case 173:
8337     return "\\xAD";
8338   case 174:
8339     return "\\xAE";
8340   case 175:
8341     return "\\xAF";
8342   case 176:
8343     return "\\xB0";
8344   case 177:
8345     return "\\xB1";
8346   case 178:
8347     return "\\xB2";
8348   case 179:
8349     return "\\xB3";
8350   case 180:
8351     return "\\xB4";
8352   case 181:
8353     return "\\xB5";
8354   case 182:
8355     return "\\xB6";
8356   case 183:
8357     return "\\xB7";
8358   case 184:
8359     return "\\xB8";
8360   case 185:
8361     return "\\xB9";
8362   case 186:
8363     return "\\xBA";
8364   case 187:
8365     return "\\xBB";
8366   case 188:
8367     return "\\xBC";
8368   case 189:
8369     return "\\xBD";
8370   case 190:
8371     return "\\xBE";
8372   case 191:
8373     return "\\xBF";
8374   case 192:
8375     return "\\xC0";
8376   case 193:
8377     return "\\xC1";
8378   case 194:
8379     return "\\xC2";
8380   case 195:
8381     return "\\xC3";
8382   case 196:
8383     return "\\xC4";
8384   case 197:
8385     return "\\xC5";
8386   case 198:
8387     return "\\xC6";
8388   case 199:
8389     return "\\xC7";
8390   case 200:
8391     return "\\xC8";
8392   case 201:
8393     return "\\xC9";
8394   case 202:
8395     return "\\xCA";
8396   case 203:
8397     return "\\xCB";
8398   case 204:
8399     return "\\xCC";
8400   case 205:
8401     return "\\xCD";
8402   case 206:
8403     return "\\xCE";
8404   case 207:
8405     return "\\xCF";
8406   case 208:
8407     return "\\xD0";
8408   case 209:
8409     return "\\xD1";
8410   case 210:
8411     return "\\xD2";
8412   case 211:
8413     return "\\xD3";
8414   case 212:
8415     return "\\xD4";
8416   case 213:
8417     return "\\xD5";
8418   case 214:
8419     return "\\xD6";
8420   case 215:
8421     return "\\xD7";
8422   case 216:
8423     return "\\xD8";
8424   case 217:
8425     return "\\xD9";
8426   case 218:
8427     return "\\xDA";
8428   case 219:
8429     return "\\xDB";
8430   case 220:
8431     return "\\xDC";
8432   case 221:
8433     return "\\xDD";
8434   case 222:
8435     return "\\xDE";
8436   case 223:
8437     return "\\xDF";
8438   case 224:
8439     return "\\xE0";
8440   case 225:
8441     return "\\xE1";
8442   case 226:
8443     return "\\xE2";
8444   case 227:
8445     return "\\xE3";
8446   case 228:
8447     return "\\xE4";
8448   case 229:
8449     return "\\xE5";
8450   case 230:
8451     return "\\xE6";
8452   case 231:
8453     return "\\xE7";
8454   case 232:
8455     return "\\xE8";
8456   case 233:
8457     return "\\xE9";
8458   case 234:
8459     return "\\xEA";
8460   case 235:
8461     return "\\xEB";
8462   case 236:
8463     return "\\xEC";
8464   case 237:
8465     return "\\xED";
8466   case 238:
8467     return "\\xEE";
8468   case 239:
8469     return "\\xEF";
8470   case 240:
8471     return "\\xF0";
8472   case 241:
8473     return "\\xF1";
8474   case 242:
8475     return "\\xF2";
8476   case 243:
8477     return "\\xF3";
8478   case 244:
8479     return "\\xF4";
8480   case 245:
8481     return "\\xF5";
8482   case 246:
8483     return "\\xF6";
8484   case 247:
8485     return "\\xF7";
8486   case 248:
8487     return "\\xF8";
8488   case 249:
8489     return "\\xF9";
8490   case 250:
8491     return "\\xFA";
8492   case 251:
8493     return "\\xFB";
8494   case 252:
8495     return "\\xFC";
8496   case 253:
8497     return "\\xFD";
8498   case 254:
8499     return "\\xFE";
8500   case 255:
8501     return "\\xFF";
8502   default:
8503     assert(0); /* never gets here */
8504     return "dead code";
8505   }
8506   assert(0); /* never gets here */
8507 }
8508 
8509 #endif /* XML_GE == 1 */
8510 
8511 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8512 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8513   const char *const valueOrNull = getenv(variableName);
8514   if (valueOrNull == NULL) {
8515     return defaultDebugLevel;
8516   }
8517   const char *const value = valueOrNull;
8518 
8519   errno = 0;
8520   char *afterValue = NULL;
8521   unsigned long debugLevel = strtoul(value, &afterValue, 10);
8522   if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8523     errno = 0;
8524     return defaultDebugLevel;
8525   }
8526 
8527   return debugLevel;
8528 }
8529