1 /* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+)
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15    Copyright (c) 2016      Eric Rahm <erahm@mozilla.com>
16    Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
17    Copyright (c) 2016      Gaurav <g.gupta@samsung.com>
18    Copyright (c) 2016      Thomas Beutlich <tc@tbeu.de>
19    Copyright (c) 2016      Gustavo Grieco <gustavo.grieco@imag.fr>
20    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
21    Copyright (c) 2016      Ed Schouten <ed@nuxi.nl>
22    Copyright (c) 2017-2018 Rhodri James <rhodri@wildebeest.org.uk>
23    Copyright (c) 2017      Václav Slavík <vaclav@slavik.io>
24    Copyright (c) 2017      Viktor Szakats <commit@vsz.me>
25    Copyright (c) 2017      Chanho Park <chanho61.park@samsung.com>
26    Copyright (c) 2017      Rolf Eike Beer <eike@sf-mail.de>
27    Copyright (c) 2017      Hans Wennborg <hans@chromium.org>
28    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
29    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
30    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
31    Copyright (c) 2018      Mariusz Zaborski <oshogbo@vexillium.org>
32    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
33    Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34    Copyright (c) 2019      Vadim Zeitlin <vadim@zeitlins.org>
35    Copyright (c) 2021      Dong-hee Na <donghee.na@python.org>
36    Copyright (c) 2022      Samanta Navarro <ferivoz@riseup.net>
37    Copyright (c) 2022      Jeffrey Walton <noloader@gmail.com>
38    Licensed under the MIT license:
39 
40    Permission is  hereby granted,  free of charge,  to any  person obtaining
41    a  copy  of  this  software   and  associated  documentation  files  (the
42    "Software"),  to  deal in  the  Software  without restriction,  including
43    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
44    distribute, sublicense, and/or sell copies of the Software, and to permit
45    persons  to whom  the Software  is  furnished to  do so,  subject to  the
46    following conditions:
47 
48    The above copyright  notice and this permission notice  shall be included
49    in all copies or substantial portions of the Software.
50 
51    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
52    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
53    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
54    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
55    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
56    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
57    USE OR OTHER DEALINGS IN THE SOFTWARE.
58 */
59 
60 #define XML_BUILDING_EXPAT 1
61 
62 #include <expat_config.h>
63 
64 #if ! defined(_GNU_SOURCE)
65 #  define _GNU_SOURCE 1 /* syscall prototype */
66 #endif
67 
68 #ifdef _WIN32
69 /* force stdlib to define rand_s() */
70 #  if ! defined(_CRT_RAND_S)
71 #    define _CRT_RAND_S
72 #  endif
73 #endif
74 
75 #include <stddef.h>
76 #include <string.h> /* memset(), memcpy() */
77 #include <assert.h>
78 #include <limits.h> /* UINT_MAX */
79 #include <stdio.h>  /* fprintf */
80 #include <stdlib.h> /* getenv, rand_s */
81 #include <stdint.h> /* uintptr_t */
82 #include <math.h>   /* isnan */
83 
84 #ifdef _WIN32
85 #  define getpid GetCurrentProcessId
86 #else
87 #  include <sys/time.h>  /* gettimeofday() */
88 #  include <sys/types.h> /* getpid() */
89 #  include <unistd.h>    /* getpid() */
90 #  include <fcntl.h>     /* O_RDONLY */
91 #  include <errno.h>
92 #endif
93 
94 #ifdef _WIN32
95 #  include "winconfig.h"
96 #endif
97 
98 #include "ascii.h"
99 #include "expat.h"
100 #include "siphash.h"
101 
102 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
103 #  if defined(HAVE_GETRANDOM)
104 #    include <sys/random.h> /* getrandom */
105 #  else
106 #    include <unistd.h>      /* syscall */
107 #    include <sys/syscall.h> /* SYS_getrandom */
108 #  endif
109 #  if ! defined(GRND_NONBLOCK)
110 #    define GRND_NONBLOCK 0x0001
111 #  endif /* defined(GRND_NONBLOCK) */
112 #endif   /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
113 
114 #if defined(HAVE_LIBBSD)                                                       \
115     && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
116 #  include <bsd/stdlib.h>
117 #endif
118 
119 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
120 #  define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
121 #endif
122 
123 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM)             \
124     && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)            \
125     && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32)                         \
126     && ! defined(XML_POOR_ENTROPY)
127 #  error You do not have support for any sources of high quality entropy \
128     enabled.  For end user security, that is probably not what you want. \
129     \
130     Your options include: \
131       * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
132       * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
133       * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
134       * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
135       * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
136       * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
137       * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
138       * Windows >=Vista (rand_s): _WIN32. \
139     \
140     If insist on not using any of these, bypass this error by defining \
141     XML_POOR_ENTROPY; you have been warned. \
142     \
143     If you have reasons to patch this detection code away or need changes \
144     to the build system, please open a bug.  Thank you!
145 #endif
146 
147 #ifdef XML_UNICODE
148 #  define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
149 #  define XmlConvert XmlUtf16Convert
150 #  define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
151 #  define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
152 #  define XmlEncode XmlUtf16Encode
153 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
154 typedef unsigned short ICHAR;
155 #else
156 #  define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
157 #  define XmlConvert XmlUtf8Convert
158 #  define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
159 #  define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
160 #  define XmlEncode XmlUtf8Encode
161 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
162 typedef char ICHAR;
163 #endif
164 
165 #ifndef XML_NS
166 
167 #  define XmlInitEncodingNS XmlInitEncoding
168 #  define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
169 #  undef XmlGetInternalEncodingNS
170 #  define XmlGetInternalEncodingNS XmlGetInternalEncoding
171 #  define XmlParseXmlDeclNS XmlParseXmlDecl
172 
173 #endif
174 
175 #ifdef XML_UNICODE
176 
177 #  ifdef XML_UNICODE_WCHAR_T
178 #    define XML_T(x) (const wchar_t) x
179 #    define XML_L(x) L##x
180 #  else
181 #    define XML_T(x) (const unsigned short)x
182 #    define XML_L(x) x
183 #  endif
184 
185 #else
186 
187 #  define XML_T(x) x
188 #  define XML_L(x) x
189 
190 #endif
191 
192 /* Round up n to be a multiple of sz, where sz is a power of 2. */
193 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
194 
195 /* Do safe (NULL-aware) pointer arithmetic */
196 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
197 
198 #include "internal.h"
199 #include "xmltok.h"
200 #include "xmlrole.h"
201 
202 typedef const XML_Char *KEY;
203 
204 typedef struct {
205   KEY name;
206 } NAMED;
207 
208 typedef struct {
209   NAMED **v;
210   unsigned char power;
211   size_t size;
212   size_t used;
213   const XML_Memory_Handling_Suite *mem;
214 } HASH_TABLE;
215 
216 static size_t keylen(KEY s);
217 
218 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
219 
220 /* For probing (after a collision) we need a step size relative prime
221    to the hash table size, which is a power of 2. We use double-hashing,
222    since we can calculate a second hash value cheaply by taking those bits
223    of the first hash value that were discarded (masked out) when the table
224    index was calculated: index = hash & mask, where mask = table->size - 1.
225    We limit the maximum step size to table->size / 4 (mask >> 2) and make
226    it odd, since odd numbers are always relative prime to a power of 2.
227 */
228 #define SECOND_HASH(hash, mask, power)                                         \
229   ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
230 #define PROBE_STEP(hash, mask, power)                                          \
231   ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
232 
233 typedef struct {
234   NAMED **p;
235   NAMED **end;
236 } HASH_TABLE_ITER;
237 
238 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
239 #define INIT_DATA_BUF_SIZE 1024
240 #define INIT_ATTS_SIZE 16
241 #define INIT_ATTS_VERSION 0xFFFFFFFF
242 #define INIT_BLOCK_SIZE 1024
243 #define INIT_BUFFER_SIZE 1024
244 
245 #define EXPAND_SPARE 24
246 
247 typedef struct binding {
248   struct prefix *prefix;
249   struct binding *nextTagBinding;
250   struct binding *prevPrefixBinding;
251   const struct attribute_id *attId;
252   XML_Char *uri;
253   int uriLen;
254   int uriAlloc;
255 } BINDING;
256 
257 typedef struct prefix {
258   const XML_Char *name;
259   BINDING *binding;
260 } PREFIX;
261 
262 typedef struct {
263   const XML_Char *str;
264   const XML_Char *localPart;
265   const XML_Char *prefix;
266   int strLen;
267   int uriLen;
268   int prefixLen;
269 } TAG_NAME;
270 
271 /* TAG represents an open element.
272    The name of the element is stored in both the document and API
273    encodings.  The memory buffer 'buf' is a separately-allocated
274    memory area which stores the name.  During the XML_Parse()/
275    XMLParseBuffer() when the element is open, the memory for the 'raw'
276    version of the name (in the document encoding) is shared with the
277    document buffer.  If the element is open across calls to
278    XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
279    contain the 'raw' name as well.
280 
281    A parser re-uses these structures, maintaining a list of allocated
282    TAG objects in a free list.
283 */
284 typedef struct tag {
285   struct tag *parent;  /* parent of this element */
286   const char *rawName; /* tagName in the original encoding */
287   int rawNameLength;
288   TAG_NAME name; /* tagName in the API encoding */
289   char *buf;     /* buffer for name components */
290   char *bufEnd;  /* end of the buffer */
291   BINDING *bindings;
292 } TAG;
293 
294 typedef struct {
295   const XML_Char *name;
296   const XML_Char *textPtr;
297   int textLen;   /* length in XML_Chars */
298   int processed; /* # of processed bytes - when suspended */
299   const XML_Char *systemId;
300   const XML_Char *base;
301   const XML_Char *publicId;
302   const XML_Char *notation;
303   XML_Bool open;
304   XML_Bool is_param;
305   XML_Bool is_internal; /* true if declared in internal subset outside PE */
306 } ENTITY;
307 
308 typedef struct {
309   enum XML_Content_Type type;
310   enum XML_Content_Quant quant;
311   const XML_Char *name;
312   int firstchild;
313   int lastchild;
314   int childcnt;
315   int nextsib;
316 } CONTENT_SCAFFOLD;
317 
318 #define INIT_SCAFFOLD_ELEMENTS 32
319 
320 typedef struct block {
321   struct block *next;
322   int size;
323   XML_Char s[1];
324 } BLOCK;
325 
326 typedef struct {
327   BLOCK *blocks;
328   BLOCK *freeBlocks;
329   const XML_Char *end;
330   XML_Char *ptr;
331   XML_Char *start;
332   const XML_Memory_Handling_Suite *mem;
333 } STRING_POOL;
334 
335 /* The XML_Char before the name is used to determine whether
336    an attribute has been specified. */
337 typedef struct attribute_id {
338   XML_Char *name;
339   PREFIX *prefix;
340   XML_Bool maybeTokenized;
341   XML_Bool xmlns;
342 } ATTRIBUTE_ID;
343 
344 typedef struct {
345   const ATTRIBUTE_ID *id;
346   XML_Bool isCdata;
347   const XML_Char *value;
348 } DEFAULT_ATTRIBUTE;
349 
350 typedef struct {
351   unsigned long version;
352   unsigned long hash;
353   const XML_Char *uriName;
354 } NS_ATT;
355 
356 typedef struct {
357   const XML_Char *name;
358   PREFIX *prefix;
359   const ATTRIBUTE_ID *idAtt;
360   int nDefaultAtts;
361   int allocDefaultAtts;
362   DEFAULT_ATTRIBUTE *defaultAtts;
363 } ELEMENT_TYPE;
364 
365 typedef struct {
366   HASH_TABLE generalEntities;
367   HASH_TABLE elementTypes;
368   HASH_TABLE attributeIds;
369   HASH_TABLE prefixes;
370   STRING_POOL pool;
371   STRING_POOL entityValuePool;
372   /* false once a parameter entity reference has been skipped */
373   XML_Bool keepProcessing;
374   /* true once an internal or external PE reference has been encountered;
375      this includes the reference to an external subset */
376   XML_Bool hasParamEntityRefs;
377   XML_Bool standalone;
378 #ifdef XML_DTD
379   /* indicates if external PE has been read */
380   XML_Bool paramEntityRead;
381   HASH_TABLE paramEntities;
382 #endif /* XML_DTD */
383   PREFIX defaultPrefix;
384   /* === scaffolding for building content model === */
385   XML_Bool in_eldecl;
386   CONTENT_SCAFFOLD *scaffold;
387   unsigned contentStringLen;
388   unsigned scaffSize;
389   unsigned scaffCount;
390   int scaffLevel;
391   int *scaffIndex;
392 } DTD;
393 
394 typedef struct open_internal_entity {
395   const char *internalEventPtr;
396   const char *internalEventEndPtr;
397   struct open_internal_entity *next;
398   ENTITY *entity;
399   int startTagLevel;
400   XML_Bool betweenDecl; /* WFC: PE Between Declarations */
401 } OPEN_INTERNAL_ENTITY;
402 
403 enum XML_Account {
404   XML_ACCOUNT_DIRECT,           /* bytes directly passed to the Expat parser */
405   XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
406                                    expansion */
407   XML_ACCOUNT_NONE              /* i.e. do not account, was accounted already */
408 };
409 
410 #ifdef XML_DTD
411 typedef unsigned long long XmlBigCount;
412 typedef struct accounting {
413   XmlBigCount countBytesDirect;
414   XmlBigCount countBytesIndirect;
415   int debugLevel;
416   float maximumAmplificationFactor; // >=1.0
417   unsigned long long activationThresholdBytes;
418 } ACCOUNTING;
419 
420 typedef struct entity_stats {
421   unsigned int countEverOpened;
422   unsigned int currentDepth;
423   unsigned int maximumDepthSeen;
424   int debugLevel;
425 } ENTITY_STATS;
426 #endif /* XML_DTD */
427 
428 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
429                                          const char *end, const char **endPtr);
430 
431 static Processor prologProcessor;
432 static Processor prologInitProcessor;
433 static Processor contentProcessor;
434 static Processor cdataSectionProcessor;
435 #ifdef XML_DTD
436 static Processor ignoreSectionProcessor;
437 static Processor externalParEntProcessor;
438 static Processor externalParEntInitProcessor;
439 static Processor entityValueProcessor;
440 static Processor entityValueInitProcessor;
441 #endif /* XML_DTD */
442 static Processor epilogProcessor;
443 static Processor errorProcessor;
444 static Processor externalEntityInitProcessor;
445 static Processor externalEntityInitProcessor2;
446 static Processor externalEntityInitProcessor3;
447 static Processor externalEntityContentProcessor;
448 static Processor internalEntityProcessor;
449 
450 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
451                                             const XML_Char *encodingName);
452 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
453                                      const char *s, const char *next);
454 static enum XML_Error initializeEncoding(XML_Parser parser);
455 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
456                                const char *s, const char *end, int tok,
457                                const char *next, const char **nextPtr,
458                                XML_Bool haveMore, XML_Bool allowClosingDoctype,
459                                enum XML_Account account);
460 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
461                                             XML_Bool betweenDecl);
462 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
463                                 const ENCODING *enc, const char *start,
464                                 const char *end, const char **endPtr,
465                                 XML_Bool haveMore, enum XML_Account account);
466 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
467                                      const char **startPtr, const char *end,
468                                      const char **nextPtr, XML_Bool haveMore,
469                                      enum XML_Account account);
470 #ifdef XML_DTD
471 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
472                                       const char **startPtr, const char *end,
473                                       const char **nextPtr, XML_Bool haveMore);
474 #endif /* XML_DTD */
475 
476 static void freeBindings(XML_Parser parser, BINDING *bindings);
477 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
478                                 const char *s, TAG_NAME *tagNamePtr,
479                                 BINDING **bindingsPtr,
480                                 enum XML_Account account);
481 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
482                                  const ATTRIBUTE_ID *attId, const XML_Char *uri,
483                                  BINDING **bindingsPtr);
484 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
485                            XML_Bool isId, const XML_Char *dfltValue,
486                            XML_Parser parser);
487 static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
488                                           XML_Bool isCdata, const char *,
489                                           const char *, STRING_POOL *,
490                                           enum XML_Account account);
491 static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
492                                            XML_Bool isCdata, const char *,
493                                            const char *, STRING_POOL *,
494                                            enum XML_Account account);
495 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
496                                     const char *start, const char *end);
497 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
498 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
499                                        const char *start, const char *end,
500                                        enum XML_Account account);
501 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
502                                        const char *start, const char *end);
503 static int reportComment(XML_Parser parser, const ENCODING *enc,
504                          const char *start, const char *end);
505 static void reportDefault(XML_Parser parser, const ENCODING *enc,
506                           const char *start, const char *end);
507 
508 static const XML_Char *getContext(XML_Parser parser);
509 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
510 
511 static void FASTCALL normalizePublicId(XML_Char *s);
512 
513 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
514 /* do not call if m_parentParser != NULL */
515 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
516 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
517                        const XML_Memory_Handling_Suite *ms);
518 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
519                    const XML_Memory_Handling_Suite *ms);
520 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
521                            const HASH_TABLE *);
522 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
523                      size_t createSize);
524 static void FASTCALL hashTableInit(HASH_TABLE *,
525                                    const XML_Memory_Handling_Suite *ms);
526 static void FASTCALL hashTableClear(HASH_TABLE *);
527 static void FASTCALL hashTableDestroy(HASH_TABLE *);
528 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
529 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
530 
531 static void FASTCALL poolInit(STRING_POOL *,
532                               const XML_Memory_Handling_Suite *ms);
533 static void FASTCALL poolClear(STRING_POOL *);
534 static void FASTCALL poolDestroy(STRING_POOL *);
535 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
536                             const char *ptr, const char *end);
537 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
538                                  const char *ptr, const char *end);
539 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
540 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
541                                                const XML_Char *s);
542 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
543                                        int n);
544 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
545                                                  const XML_Char *s);
546 
547 static int FASTCALL nextScaffoldPart(XML_Parser parser);
548 static XML_Content *build_model(XML_Parser parser);
549 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
550                                     const char *ptr, const char *end);
551 
552 static XML_Char *copyString(const XML_Char *s,
553                             const XML_Memory_Handling_Suite *memsuite);
554 
555 static unsigned long generate_hash_secret_salt(XML_Parser parser);
556 static XML_Bool startParsing(XML_Parser parser);
557 
558 static XML_Parser parserCreate(const XML_Char *encodingName,
559                                const XML_Memory_Handling_Suite *memsuite,
560                                const XML_Char *nameSep, DTD *dtd);
561 
562 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
563 
564 #ifdef XML_DTD
565 static float accountingGetCurrentAmplification(XML_Parser rootParser);
566 static void accountingReportStats(XML_Parser originParser, const char *epilog);
567 static void accountingOnAbort(XML_Parser originParser);
568 static void accountingReportDiff(XML_Parser rootParser,
569                                  unsigned int levelsAwayFromRootParser,
570                                  const char *before, const char *after,
571                                  ptrdiff_t bytesMore, int source_line,
572                                  enum XML_Account account);
573 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
574                                         const char *before, const char *after,
575                                         int source_line,
576                                         enum XML_Account account);
577 
578 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
579                                       const char *action, int sourceLine);
580 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
581                                  int sourceLine);
582 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
583                                   int sourceLine);
584 
585 static XML_Parser getRootParserOf(XML_Parser parser,
586                                   unsigned int *outLevelDiff);
587 #endif /* XML_DTD */
588 
589 static unsigned long getDebugLevel(const char *variableName,
590                                    unsigned long defaultDebugLevel);
591 
592 #define poolStart(pool) ((pool)->start)
593 #define poolEnd(pool) ((pool)->ptr)
594 #define poolLength(pool) ((pool)->ptr - (pool)->start)
595 #define poolChop(pool) ((void)--(pool->ptr))
596 #define poolLastChar(pool) (((pool)->ptr)[-1])
597 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
598 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
599 #define poolAppendChar(pool, c)                                                \
600   (((pool)->ptr == (pool)->end && ! poolGrow(pool))                            \
601        ? 0                                                                     \
602        : ((*((pool)->ptr)++ = c), 1))
603 
604 struct XML_ParserStruct {
605   /* The first member must be m_userData so that the XML_GetUserData
606      macro works. */
607   void *m_userData;
608   void *m_handlerArg;
609   char *m_buffer;
610   const XML_Memory_Handling_Suite m_mem;
611   /* first character to be parsed */
612   const char *m_bufferPtr;
613   /* past last character to be parsed */
614   char *m_bufferEnd;
615   /* allocated end of m_buffer */
616   const char *m_bufferLim;
617   XML_Index m_parseEndByteIndex;
618   const char *m_parseEndPtr;
619   XML_Char *m_dataBuf;
620   XML_Char *m_dataBufEnd;
621   XML_StartElementHandler m_startElementHandler;
622   XML_EndElementHandler m_endElementHandler;
623   XML_CharacterDataHandler m_characterDataHandler;
624   XML_ProcessingInstructionHandler m_processingInstructionHandler;
625   XML_CommentHandler m_commentHandler;
626   XML_StartCdataSectionHandler m_startCdataSectionHandler;
627   XML_EndCdataSectionHandler m_endCdataSectionHandler;
628   XML_DefaultHandler m_defaultHandler;
629   XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
630   XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
631   XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
632   XML_NotationDeclHandler m_notationDeclHandler;
633   XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
634   XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
635   XML_NotStandaloneHandler m_notStandaloneHandler;
636   XML_ExternalEntityRefHandler m_externalEntityRefHandler;
637   XML_Parser m_externalEntityRefHandlerArg;
638   XML_SkippedEntityHandler m_skippedEntityHandler;
639   XML_UnknownEncodingHandler m_unknownEncodingHandler;
640   XML_ElementDeclHandler m_elementDeclHandler;
641   XML_AttlistDeclHandler m_attlistDeclHandler;
642   XML_EntityDeclHandler m_entityDeclHandler;
643   XML_XmlDeclHandler m_xmlDeclHandler;
644   const ENCODING *m_encoding;
645   INIT_ENCODING m_initEncoding;
646   const ENCODING *m_internalEncoding;
647   const XML_Char *m_protocolEncodingName;
648   XML_Bool m_ns;
649   XML_Bool m_ns_triplets;
650   void *m_unknownEncodingMem;
651   void *m_unknownEncodingData;
652   void *m_unknownEncodingHandlerData;
653   void(XMLCALL *m_unknownEncodingRelease)(void *);
654   PROLOG_STATE m_prologState;
655   Processor *m_processor;
656   enum XML_Error m_errorCode;
657   const char *m_eventPtr;
658   const char *m_eventEndPtr;
659   const char *m_positionPtr;
660   OPEN_INTERNAL_ENTITY *m_openInternalEntities;
661   OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
662   XML_Bool m_defaultExpandInternalEntities;
663   int m_tagLevel;
664   ENTITY *m_declEntity;
665   const XML_Char *m_doctypeName;
666   const XML_Char *m_doctypeSysid;
667   const XML_Char *m_doctypePubid;
668   const XML_Char *m_declAttributeType;
669   const XML_Char *m_declNotationName;
670   const XML_Char *m_declNotationPublicId;
671   ELEMENT_TYPE *m_declElementType;
672   ATTRIBUTE_ID *m_declAttributeId;
673   XML_Bool m_declAttributeIsCdata;
674   XML_Bool m_declAttributeIsId;
675   DTD *m_dtd;
676   const XML_Char *m_curBase;
677   TAG *m_tagStack;
678   TAG *m_freeTagList;
679   BINDING *m_inheritedBindings;
680   BINDING *m_freeBindingList;
681   int m_attsSize;
682   int m_nSpecifiedAtts;
683   int m_idAttIndex;
684   ATTRIBUTE *m_atts;
685   NS_ATT *m_nsAtts;
686   unsigned long m_nsAttsVersion;
687   unsigned char m_nsAttsPower;
688 #ifdef XML_ATTR_INFO
689   XML_AttrInfo *m_attInfo;
690 #endif
691   POSITION m_position;
692   STRING_POOL m_tempPool;
693   STRING_POOL m_temp2Pool;
694   char *m_groupConnector;
695   unsigned int m_groupSize;
696   XML_Char m_namespaceSeparator;
697   XML_Parser m_parentParser;
698   XML_ParsingStatus m_parsingStatus;
699 #ifdef XML_DTD
700   XML_Bool m_isParamEntity;
701   XML_Bool m_useForeignDTD;
702   enum XML_ParamEntityParsing m_paramEntityParsing;
703 #endif
704   unsigned long m_hash_secret_salt;
705 #ifdef XML_DTD
706   ACCOUNTING m_accounting;
707   ENTITY_STATS m_entity_stats;
708 #endif
709 };
710 
711 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
712 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
713 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
714 
715 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)716 XML_ParserCreate(const XML_Char *encodingName) {
717   return XML_ParserCreate_MM(encodingName, NULL, NULL);
718 }
719 
720 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)721 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
722   XML_Char tmp[2] = {nsSep, 0};
723   return XML_ParserCreate_MM(encodingName, NULL, tmp);
724 }
725 
726 // "xml=http://www.w3.org/XML/1998/namespace"
727 static const XML_Char implicitContext[]
728     = {ASCII_x,     ASCII_m,     ASCII_l,      ASCII_EQUALS, ASCII_h,
729        ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,  ASCII_SLASH,
730        ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,      ASCII_PERIOD,
731        ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,      ASCII_r,
732        ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,      ASCII_L,
733        ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,      ASCII_8,
734        ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,      ASCII_e,
735        ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,      ASCII_e,
736        '\0'};
737 
738 /* To avoid warnings about unused functions: */
739 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
740 
741 #  if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
742 
743 /* Obtain entropy on Linux 3.17+ */
744 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)745 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
746   int success = 0; /* full count bytes written? */
747   size_t bytesWrittenTotal = 0;
748   const unsigned int getrandomFlags = GRND_NONBLOCK;
749 
750   do {
751     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
752     const size_t bytesToWrite = count - bytesWrittenTotal;
753 
754     const int bytesWrittenMore =
755 #    if defined(HAVE_GETRANDOM)
756         getrandom(currentTarget, bytesToWrite, getrandomFlags);
757 #    else
758         syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
759 #    endif
760 
761     if (bytesWrittenMore > 0) {
762       bytesWrittenTotal += bytesWrittenMore;
763       if (bytesWrittenTotal >= count)
764         success = 1;
765     }
766   } while (! success && (errno == EINTR));
767 
768   return success;
769 }
770 
771 #  endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
772 
773 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
774 
775 /* Extract entropy from /dev/urandom */
776 static int
writeRandomBytes_dev_urandom(void * target,size_t count)777 writeRandomBytes_dev_urandom(void *target, size_t count) {
778   int success = 0; /* full count bytes written? */
779   size_t bytesWrittenTotal = 0;
780 
781   const int fd = open("/dev/urandom", O_RDONLY);
782   if (fd < 0) {
783     return 0;
784   }
785 
786   do {
787     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
788     const size_t bytesToWrite = count - bytesWrittenTotal;
789 
790     const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
791 
792     if (bytesWrittenMore > 0) {
793       bytesWrittenTotal += bytesWrittenMore;
794       if (bytesWrittenTotal >= count)
795         success = 1;
796     }
797   } while (! success && (errno == EINTR));
798 
799   close(fd);
800   return success;
801 }
802 
803 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
804 
805 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
806 
807 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
808 
809 static void
writeRandomBytes_arc4random(void * target,size_t count)810 writeRandomBytes_arc4random(void *target, size_t count) {
811   size_t bytesWrittenTotal = 0;
812 
813   while (bytesWrittenTotal < count) {
814     const uint32_t random32 = arc4random();
815     size_t i = 0;
816 
817     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
818          i++, bytesWrittenTotal++) {
819       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
820       ((uint8_t *)target)[bytesWrittenTotal] = random8;
821     }
822   }
823 }
824 
825 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
826 
827 #ifdef _WIN32
828 
829 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
830    as it didn't declare it in its header prior to version 5.3.0 of its
831    runtime package (mingwrt, containing stdlib.h).  The upstream fix
832    was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
833 #  if defined(__MINGW32__) && defined(__MINGW32_VERSION)                       \
834       && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
835 __declspec(dllimport) int rand_s(unsigned int *);
836 #  endif
837 
838 /* Obtain entropy on Windows using the rand_s() function which
839  * generates cryptographically secure random numbers.  Internally it
840  * uses RtlGenRandom API which is present in Windows XP and later.
841  */
842 static int
writeRandomBytes_rand_s(void * target,size_t count)843 writeRandomBytes_rand_s(void *target, size_t count) {
844   size_t bytesWrittenTotal = 0;
845 
846   while (bytesWrittenTotal < count) {
847     unsigned int random32 = 0;
848     size_t i = 0;
849 
850     if (rand_s(&random32))
851       return 0; /* failure */
852 
853     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
854          i++, bytesWrittenTotal++) {
855       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
856       ((uint8_t *)target)[bytesWrittenTotal] = random8;
857     }
858   }
859   return 1; /* success */
860 }
861 
862 #endif /* _WIN32 */
863 
864 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
865 
866 static unsigned long
gather_time_entropy(void)867 gather_time_entropy(void) {
868 #  ifdef _WIN32
869   FILETIME ft;
870   GetSystemTimeAsFileTime(&ft); /* never fails */
871   return ft.dwHighDateTime ^ ft.dwLowDateTime;
872 #  else
873   struct timeval tv;
874   int gettimeofday_res;
875 
876   gettimeofday_res = gettimeofday(&tv, NULL);
877 
878 #    if defined(NDEBUG)
879   (void)gettimeofday_res;
880 #    else
881   assert(gettimeofday_res == 0);
882 #    endif /* defined(NDEBUG) */
883 
884   /* Microseconds time is <20 bits entropy */
885   return tv.tv_usec;
886 #  endif
887 }
888 
889 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
890 
891 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)892 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
893   if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
894     fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
895             (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
896   }
897   return entropy;
898 }
899 
900 static unsigned long
generate_hash_secret_salt(XML_Parser parser)901 generate_hash_secret_salt(XML_Parser parser) {
902   unsigned long entropy;
903   (void)parser;
904 
905   /* "Failproof" high quality providers: */
906 #if defined(HAVE_ARC4RANDOM_BUF)
907   arc4random_buf(&entropy, sizeof(entropy));
908   return ENTROPY_DEBUG("arc4random_buf", entropy);
909 #elif defined(HAVE_ARC4RANDOM)
910   writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
911   return ENTROPY_DEBUG("arc4random", entropy);
912 #else
913   /* Try high quality providers first .. */
914 #  ifdef _WIN32
915   if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
916     return ENTROPY_DEBUG("rand_s", entropy);
917   }
918 #  elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
919   if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
920     return ENTROPY_DEBUG("getrandom", entropy);
921   }
922 #  endif
923 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
924   if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
925     return ENTROPY_DEBUG("/dev/urandom", entropy);
926   }
927 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
928   /* .. and self-made low quality for backup: */
929 
930   /* Process ID is 0 bits entropy if attacker has local access */
931   entropy = gather_time_entropy() ^ getpid();
932 
933   /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
934   if (sizeof(unsigned long) == 4) {
935     return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
936   } else {
937     return ENTROPY_DEBUG("fallback(8)",
938                          entropy * (unsigned long)2305843009213693951ULL);
939   }
940 #endif
941 }
942 
943 static unsigned long
get_hash_secret_salt(XML_Parser parser)944 get_hash_secret_salt(XML_Parser parser) {
945   if (parser->m_parentParser != NULL)
946     return get_hash_secret_salt(parser->m_parentParser);
947   return parser->m_hash_secret_salt;
948 }
949 
950 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)951 startParsing(XML_Parser parser) {
952   /* hash functions must be initialized before setContext() is called */
953   if (parser->m_hash_secret_salt == 0)
954     parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
955   if (parser->m_ns) {
956     /* implicit context only set for root parser, since child
957        parsers (i.e. external entity parsers) will inherit it
958     */
959     return setContext(parser, implicitContext);
960   }
961   return XML_TRUE;
962 }
963 
964 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)965 XML_ParserCreate_MM(const XML_Char *encodingName,
966                     const XML_Memory_Handling_Suite *memsuite,
967                     const XML_Char *nameSep) {
968   return parserCreate(encodingName, memsuite, nameSep, NULL);
969 }
970 
971 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)972 parserCreate(const XML_Char *encodingName,
973              const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
974              DTD *dtd) {
975   XML_Parser parser;
976 
977   if (memsuite) {
978     XML_Memory_Handling_Suite *mtemp;
979     parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
980     if (parser != NULL) {
981       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
982       mtemp->malloc_fcn = memsuite->malloc_fcn;
983       mtemp->realloc_fcn = memsuite->realloc_fcn;
984       mtemp->free_fcn = memsuite->free_fcn;
985     }
986   } else {
987     XML_Memory_Handling_Suite *mtemp;
988     parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
989     if (parser != NULL) {
990       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
991       mtemp->malloc_fcn = malloc;
992       mtemp->realloc_fcn = realloc;
993       mtemp->free_fcn = free;
994     }
995   }
996 
997   if (! parser)
998     return parser;
999 
1000   parser->m_buffer = NULL;
1001   parser->m_bufferLim = NULL;
1002 
1003   parser->m_attsSize = INIT_ATTS_SIZE;
1004   parser->m_atts
1005       = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1006   if (parser->m_atts == NULL) {
1007     FREE(parser, parser);
1008     return NULL;
1009   }
1010 #ifdef XML_ATTR_INFO
1011   parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1012       parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1013   if (parser->m_attInfo == NULL) {
1014     FREE(parser, parser->m_atts);
1015     FREE(parser, parser);
1016     return NULL;
1017   }
1018 #endif
1019   parser->m_dataBuf
1020       = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1021   if (parser->m_dataBuf == NULL) {
1022     FREE(parser, parser->m_atts);
1023 #ifdef XML_ATTR_INFO
1024     FREE(parser, parser->m_attInfo);
1025 #endif
1026     FREE(parser, parser);
1027     return NULL;
1028   }
1029   parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1030 
1031   if (dtd)
1032     parser->m_dtd = dtd;
1033   else {
1034     parser->m_dtd = dtdCreate(&parser->m_mem);
1035     if (parser->m_dtd == NULL) {
1036       FREE(parser, parser->m_dataBuf);
1037       FREE(parser, parser->m_atts);
1038 #ifdef XML_ATTR_INFO
1039       FREE(parser, parser->m_attInfo);
1040 #endif
1041       FREE(parser, parser);
1042       return NULL;
1043     }
1044   }
1045 
1046   parser->m_freeBindingList = NULL;
1047   parser->m_freeTagList = NULL;
1048   parser->m_freeInternalEntities = NULL;
1049 
1050   parser->m_groupSize = 0;
1051   parser->m_groupConnector = NULL;
1052 
1053   parser->m_unknownEncodingHandler = NULL;
1054   parser->m_unknownEncodingHandlerData = NULL;
1055 
1056   parser->m_namespaceSeparator = ASCII_EXCL;
1057   parser->m_ns = XML_FALSE;
1058   parser->m_ns_triplets = XML_FALSE;
1059 
1060   parser->m_nsAtts = NULL;
1061   parser->m_nsAttsVersion = 0;
1062   parser->m_nsAttsPower = 0;
1063 
1064   parser->m_protocolEncodingName = NULL;
1065 
1066   poolInit(&parser->m_tempPool, &(parser->m_mem));
1067   poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1068   parserInit(parser, encodingName);
1069 
1070   if (encodingName && ! parser->m_protocolEncodingName) {
1071     XML_ParserFree(parser);
1072     return NULL;
1073   }
1074 
1075   if (nameSep) {
1076     parser->m_ns = XML_TRUE;
1077     parser->m_internalEncoding = XmlGetInternalEncodingNS();
1078     parser->m_namespaceSeparator = *nameSep;
1079   } else {
1080     parser->m_internalEncoding = XmlGetInternalEncoding();
1081   }
1082 
1083   return parser;
1084 }
1085 
1086 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1087 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1088   parser->m_processor = prologInitProcessor;
1089   XmlPrologStateInit(&parser->m_prologState);
1090   if (encodingName != NULL) {
1091     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1092   }
1093   parser->m_curBase = NULL;
1094   XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1095   parser->m_userData = NULL;
1096   parser->m_handlerArg = NULL;
1097   parser->m_startElementHandler = NULL;
1098   parser->m_endElementHandler = NULL;
1099   parser->m_characterDataHandler = NULL;
1100   parser->m_processingInstructionHandler = NULL;
1101   parser->m_commentHandler = NULL;
1102   parser->m_startCdataSectionHandler = NULL;
1103   parser->m_endCdataSectionHandler = NULL;
1104   parser->m_defaultHandler = NULL;
1105   parser->m_startDoctypeDeclHandler = NULL;
1106   parser->m_endDoctypeDeclHandler = NULL;
1107   parser->m_unparsedEntityDeclHandler = NULL;
1108   parser->m_notationDeclHandler = NULL;
1109   parser->m_startNamespaceDeclHandler = NULL;
1110   parser->m_endNamespaceDeclHandler = NULL;
1111   parser->m_notStandaloneHandler = NULL;
1112   parser->m_externalEntityRefHandler = NULL;
1113   parser->m_externalEntityRefHandlerArg = parser;
1114   parser->m_skippedEntityHandler = NULL;
1115   parser->m_elementDeclHandler = NULL;
1116   parser->m_attlistDeclHandler = NULL;
1117   parser->m_entityDeclHandler = NULL;
1118   parser->m_xmlDeclHandler = NULL;
1119   parser->m_bufferPtr = parser->m_buffer;
1120   parser->m_bufferEnd = parser->m_buffer;
1121   parser->m_parseEndByteIndex = 0;
1122   parser->m_parseEndPtr = NULL;
1123   parser->m_declElementType = NULL;
1124   parser->m_declAttributeId = NULL;
1125   parser->m_declEntity = NULL;
1126   parser->m_doctypeName = NULL;
1127   parser->m_doctypeSysid = NULL;
1128   parser->m_doctypePubid = NULL;
1129   parser->m_declAttributeType = NULL;
1130   parser->m_declNotationName = NULL;
1131   parser->m_declNotationPublicId = NULL;
1132   parser->m_declAttributeIsCdata = XML_FALSE;
1133   parser->m_declAttributeIsId = XML_FALSE;
1134   memset(&parser->m_position, 0, sizeof(POSITION));
1135   parser->m_errorCode = XML_ERROR_NONE;
1136   parser->m_eventPtr = NULL;
1137   parser->m_eventEndPtr = NULL;
1138   parser->m_positionPtr = NULL;
1139   parser->m_openInternalEntities = NULL;
1140   parser->m_defaultExpandInternalEntities = XML_TRUE;
1141   parser->m_tagLevel = 0;
1142   parser->m_tagStack = NULL;
1143   parser->m_inheritedBindings = NULL;
1144   parser->m_nSpecifiedAtts = 0;
1145   parser->m_unknownEncodingMem = NULL;
1146   parser->m_unknownEncodingRelease = NULL;
1147   parser->m_unknownEncodingData = NULL;
1148   parser->m_parentParser = NULL;
1149   parser->m_parsingStatus.parsing = XML_INITIALIZED;
1150 #ifdef XML_DTD
1151   parser->m_isParamEntity = XML_FALSE;
1152   parser->m_useForeignDTD = XML_FALSE;
1153   parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1154 #endif
1155   parser->m_hash_secret_salt = 0;
1156 
1157 #ifdef XML_DTD
1158   memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1159   parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1160   parser->m_accounting.maximumAmplificationFactor
1161       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1162   parser->m_accounting.activationThresholdBytes
1163       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1164 
1165   memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1166   parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1167 #endif
1168 }
1169 
1170 /* moves list of bindings to m_freeBindingList */
1171 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1172 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1173   while (bindings) {
1174     BINDING *b = bindings;
1175     bindings = bindings->nextTagBinding;
1176     b->nextTagBinding = parser->m_freeBindingList;
1177     parser->m_freeBindingList = b;
1178   }
1179 }
1180 
1181 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1182 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1183   TAG *tStk;
1184   OPEN_INTERNAL_ENTITY *openEntityList;
1185 
1186   if (parser == NULL)
1187     return XML_FALSE;
1188 
1189   if (parser->m_parentParser)
1190     return XML_FALSE;
1191   /* move m_tagStack to m_freeTagList */
1192   tStk = parser->m_tagStack;
1193   while (tStk) {
1194     TAG *tag = tStk;
1195     tStk = tStk->parent;
1196     tag->parent = parser->m_freeTagList;
1197     moveToFreeBindingList(parser, tag->bindings);
1198     tag->bindings = NULL;
1199     parser->m_freeTagList = tag;
1200   }
1201   /* move m_openInternalEntities to m_freeInternalEntities */
1202   openEntityList = parser->m_openInternalEntities;
1203   while (openEntityList) {
1204     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1205     openEntityList = openEntity->next;
1206     openEntity->next = parser->m_freeInternalEntities;
1207     parser->m_freeInternalEntities = openEntity;
1208   }
1209   moveToFreeBindingList(parser, parser->m_inheritedBindings);
1210   FREE(parser, parser->m_unknownEncodingMem);
1211   if (parser->m_unknownEncodingRelease)
1212     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1213   poolClear(&parser->m_tempPool);
1214   poolClear(&parser->m_temp2Pool);
1215   FREE(parser, (void *)parser->m_protocolEncodingName);
1216   parser->m_protocolEncodingName = NULL;
1217   parserInit(parser, encodingName);
1218   dtdReset(parser->m_dtd, &parser->m_mem);
1219   return XML_TRUE;
1220 }
1221 
1222 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1223 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1224   if (parser == NULL)
1225     return XML_STATUS_ERROR;
1226   /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1227      XXX There's no way for the caller to determine which of the
1228      XXX possible error cases caused the XML_STATUS_ERROR return.
1229   */
1230   if (parser->m_parsingStatus.parsing == XML_PARSING
1231       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1232     return XML_STATUS_ERROR;
1233 
1234   /* Get rid of any previous encoding name */
1235   FREE(parser, (void *)parser->m_protocolEncodingName);
1236 
1237   if (encodingName == NULL)
1238     /* No new encoding name */
1239     parser->m_protocolEncodingName = NULL;
1240   else {
1241     /* Copy the new encoding name into allocated memory */
1242     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1243     if (! parser->m_protocolEncodingName)
1244       return XML_STATUS_ERROR;
1245   }
1246   return XML_STATUS_OK;
1247 }
1248 
1249 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1250 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1251                                const XML_Char *encodingName) {
1252   XML_Parser parser = oldParser;
1253   DTD *newDtd = NULL;
1254   DTD *oldDtd;
1255   XML_StartElementHandler oldStartElementHandler;
1256   XML_EndElementHandler oldEndElementHandler;
1257   XML_CharacterDataHandler oldCharacterDataHandler;
1258   XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1259   XML_CommentHandler oldCommentHandler;
1260   XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1261   XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1262   XML_DefaultHandler oldDefaultHandler;
1263   XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1264   XML_NotationDeclHandler oldNotationDeclHandler;
1265   XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1266   XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1267   XML_NotStandaloneHandler oldNotStandaloneHandler;
1268   XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1269   XML_SkippedEntityHandler oldSkippedEntityHandler;
1270   XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1271   XML_ElementDeclHandler oldElementDeclHandler;
1272   XML_AttlistDeclHandler oldAttlistDeclHandler;
1273   XML_EntityDeclHandler oldEntityDeclHandler;
1274   XML_XmlDeclHandler oldXmlDeclHandler;
1275   ELEMENT_TYPE *oldDeclElementType;
1276 
1277   void *oldUserData;
1278   void *oldHandlerArg;
1279   XML_Bool oldDefaultExpandInternalEntities;
1280   XML_Parser oldExternalEntityRefHandlerArg;
1281 #ifdef XML_DTD
1282   enum XML_ParamEntityParsing oldParamEntityParsing;
1283   int oldInEntityValue;
1284 #endif
1285   XML_Bool oldns_triplets;
1286   /* Note that the new parser shares the same hash secret as the old
1287      parser, so that dtdCopy and copyEntityTable can lookup values
1288      from hash tables associated with either parser without us having
1289      to worry which hash secrets each table has.
1290   */
1291   unsigned long oldhash_secret_salt;
1292 
1293   /* Validate the oldParser parameter before we pull everything out of it */
1294   if (oldParser == NULL)
1295     return NULL;
1296 
1297   /* Stash the original parser contents on the stack */
1298   oldDtd = parser->m_dtd;
1299   oldStartElementHandler = parser->m_startElementHandler;
1300   oldEndElementHandler = parser->m_endElementHandler;
1301   oldCharacterDataHandler = parser->m_characterDataHandler;
1302   oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1303   oldCommentHandler = parser->m_commentHandler;
1304   oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1305   oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1306   oldDefaultHandler = parser->m_defaultHandler;
1307   oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1308   oldNotationDeclHandler = parser->m_notationDeclHandler;
1309   oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1310   oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1311   oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1312   oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1313   oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1314   oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1315   oldElementDeclHandler = parser->m_elementDeclHandler;
1316   oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1317   oldEntityDeclHandler = parser->m_entityDeclHandler;
1318   oldXmlDeclHandler = parser->m_xmlDeclHandler;
1319   oldDeclElementType = parser->m_declElementType;
1320 
1321   oldUserData = parser->m_userData;
1322   oldHandlerArg = parser->m_handlerArg;
1323   oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1324   oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1325 #ifdef XML_DTD
1326   oldParamEntityParsing = parser->m_paramEntityParsing;
1327   oldInEntityValue = parser->m_prologState.inEntityValue;
1328 #endif
1329   oldns_triplets = parser->m_ns_triplets;
1330   /* Note that the new parser shares the same hash secret as the old
1331      parser, so that dtdCopy and copyEntityTable can lookup values
1332      from hash tables associated with either parser without us having
1333      to worry which hash secrets each table has.
1334   */
1335   oldhash_secret_salt = parser->m_hash_secret_salt;
1336 
1337 #ifdef XML_DTD
1338   if (! context)
1339     newDtd = oldDtd;
1340 #endif /* XML_DTD */
1341 
1342   /* Note that the magical uses of the pre-processor to make field
1343      access look more like C++ require that `parser' be overwritten
1344      here.  This makes this function more painful to follow than it
1345      would be otherwise.
1346   */
1347   if (parser->m_ns) {
1348     XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1349     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1350   } else {
1351     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1352   }
1353 
1354   if (! parser)
1355     return NULL;
1356 
1357   parser->m_startElementHandler = oldStartElementHandler;
1358   parser->m_endElementHandler = oldEndElementHandler;
1359   parser->m_characterDataHandler = oldCharacterDataHandler;
1360   parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1361   parser->m_commentHandler = oldCommentHandler;
1362   parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1363   parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1364   parser->m_defaultHandler = oldDefaultHandler;
1365   parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1366   parser->m_notationDeclHandler = oldNotationDeclHandler;
1367   parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1368   parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1369   parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1370   parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1371   parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1372   parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1373   parser->m_elementDeclHandler = oldElementDeclHandler;
1374   parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1375   parser->m_entityDeclHandler = oldEntityDeclHandler;
1376   parser->m_xmlDeclHandler = oldXmlDeclHandler;
1377   parser->m_declElementType = oldDeclElementType;
1378   parser->m_userData = oldUserData;
1379   if (oldUserData == oldHandlerArg)
1380     parser->m_handlerArg = parser->m_userData;
1381   else
1382     parser->m_handlerArg = parser;
1383   if (oldExternalEntityRefHandlerArg != oldParser)
1384     parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1385   parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1386   parser->m_ns_triplets = oldns_triplets;
1387   parser->m_hash_secret_salt = oldhash_secret_salt;
1388   parser->m_parentParser = oldParser;
1389 #ifdef XML_DTD
1390   parser->m_paramEntityParsing = oldParamEntityParsing;
1391   parser->m_prologState.inEntityValue = oldInEntityValue;
1392   if (context) {
1393 #endif /* XML_DTD */
1394     if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1395         || ! setContext(parser, context)) {
1396       XML_ParserFree(parser);
1397       return NULL;
1398     }
1399     parser->m_processor = externalEntityInitProcessor;
1400 #ifdef XML_DTD
1401   } else {
1402     /* The DTD instance referenced by parser->m_dtd is shared between the
1403        document's root parser and external PE parsers, therefore one does not
1404        need to call setContext. In addition, one also *must* not call
1405        setContext, because this would overwrite existing prefix->binding
1406        pointers in parser->m_dtd with ones that get destroyed with the external
1407        PE parser. This would leave those prefixes with dangling pointers.
1408     */
1409     parser->m_isParamEntity = XML_TRUE;
1410     XmlPrologStateInitExternalEntity(&parser->m_prologState);
1411     parser->m_processor = externalParEntInitProcessor;
1412   }
1413 #endif /* XML_DTD */
1414   return parser;
1415 }
1416 
1417 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1418 destroyBindings(BINDING *bindings, XML_Parser parser) {
1419   for (;;) {
1420     BINDING *b = bindings;
1421     if (! b)
1422       break;
1423     bindings = b->nextTagBinding;
1424     FREE(parser, b->uri);
1425     FREE(parser, b);
1426   }
1427 }
1428 
1429 void XMLCALL
XML_ParserFree(XML_Parser parser)1430 XML_ParserFree(XML_Parser parser) {
1431   TAG *tagList;
1432   OPEN_INTERNAL_ENTITY *entityList;
1433   if (parser == NULL)
1434     return;
1435   /* free m_tagStack and m_freeTagList */
1436   tagList = parser->m_tagStack;
1437   for (;;) {
1438     TAG *p;
1439     if (tagList == NULL) {
1440       if (parser->m_freeTagList == NULL)
1441         break;
1442       tagList = parser->m_freeTagList;
1443       parser->m_freeTagList = NULL;
1444     }
1445     p = tagList;
1446     tagList = tagList->parent;
1447     FREE(parser, p->buf);
1448     destroyBindings(p->bindings, parser);
1449     FREE(parser, p);
1450   }
1451   /* free m_openInternalEntities and m_freeInternalEntities */
1452   entityList = parser->m_openInternalEntities;
1453   for (;;) {
1454     OPEN_INTERNAL_ENTITY *openEntity;
1455     if (entityList == NULL) {
1456       if (parser->m_freeInternalEntities == NULL)
1457         break;
1458       entityList = parser->m_freeInternalEntities;
1459       parser->m_freeInternalEntities = NULL;
1460     }
1461     openEntity = entityList;
1462     entityList = entityList->next;
1463     FREE(parser, openEntity);
1464   }
1465 
1466   destroyBindings(parser->m_freeBindingList, parser);
1467   destroyBindings(parser->m_inheritedBindings, parser);
1468   poolDestroy(&parser->m_tempPool);
1469   poolDestroy(&parser->m_temp2Pool);
1470   FREE(parser, (void *)parser->m_protocolEncodingName);
1471 #ifdef XML_DTD
1472   /* external parameter entity parsers share the DTD structure
1473      parser->m_dtd with the root parser, so we must not destroy it
1474   */
1475   if (! parser->m_isParamEntity && parser->m_dtd)
1476 #else
1477   if (parser->m_dtd)
1478 #endif /* XML_DTD */
1479     dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1480                &parser->m_mem);
1481   FREE(parser, (void *)parser->m_atts);
1482 #ifdef XML_ATTR_INFO
1483   FREE(parser, (void *)parser->m_attInfo);
1484 #endif
1485   FREE(parser, parser->m_groupConnector);
1486   FREE(parser, parser->m_buffer);
1487   FREE(parser, parser->m_dataBuf);
1488   FREE(parser, parser->m_nsAtts);
1489   FREE(parser, parser->m_unknownEncodingMem);
1490   if (parser->m_unknownEncodingRelease)
1491     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1492   FREE(parser, parser);
1493 }
1494 
1495 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1496 XML_UseParserAsHandlerArg(XML_Parser parser) {
1497   if (parser != NULL)
1498     parser->m_handlerArg = parser;
1499 }
1500 
1501 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1502 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1503   if (parser == NULL)
1504     return XML_ERROR_INVALID_ARGUMENT;
1505 #ifdef XML_DTD
1506   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1507   if (parser->m_parsingStatus.parsing == XML_PARSING
1508       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1509     return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1510   parser->m_useForeignDTD = useDTD;
1511   return XML_ERROR_NONE;
1512 #else
1513   UNUSED_P(useDTD);
1514   return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1515 #endif
1516 }
1517 
1518 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1519 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1520   if (parser == NULL)
1521     return;
1522   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1523   if (parser->m_parsingStatus.parsing == XML_PARSING
1524       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1525     return;
1526   parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1527 }
1528 
1529 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1530 XML_SetUserData(XML_Parser parser, void *p) {
1531   if (parser == NULL)
1532     return;
1533   if (parser->m_handlerArg == parser->m_userData)
1534     parser->m_handlerArg = parser->m_userData = p;
1535   else
1536     parser->m_userData = p;
1537 }
1538 
1539 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1540 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1541   if (parser == NULL)
1542     return XML_STATUS_ERROR;
1543   if (p) {
1544     p = poolCopyString(&parser->m_dtd->pool, p);
1545     if (! p)
1546       return XML_STATUS_ERROR;
1547     parser->m_curBase = p;
1548   } else
1549     parser->m_curBase = NULL;
1550   return XML_STATUS_OK;
1551 }
1552 
1553 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1554 XML_GetBase(XML_Parser parser) {
1555   if (parser == NULL)
1556     return NULL;
1557   return parser->m_curBase;
1558 }
1559 
1560 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1561 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1562   if (parser == NULL)
1563     return -1;
1564   return parser->m_nSpecifiedAtts;
1565 }
1566 
1567 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1568 XML_GetIdAttributeIndex(XML_Parser parser) {
1569   if (parser == NULL)
1570     return -1;
1571   return parser->m_idAttIndex;
1572 }
1573 
1574 #ifdef XML_ATTR_INFO
1575 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1576 XML_GetAttributeInfo(XML_Parser parser) {
1577   if (parser == NULL)
1578     return NULL;
1579   return parser->m_attInfo;
1580 }
1581 #endif
1582 
1583 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1584 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1585                       XML_EndElementHandler end) {
1586   if (parser == NULL)
1587     return;
1588   parser->m_startElementHandler = start;
1589   parser->m_endElementHandler = end;
1590 }
1591 
1592 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1593 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1594   if (parser != NULL)
1595     parser->m_startElementHandler = start;
1596 }
1597 
1598 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1599 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1600   if (parser != NULL)
1601     parser->m_endElementHandler = end;
1602 }
1603 
1604 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1605 XML_SetCharacterDataHandler(XML_Parser parser,
1606                             XML_CharacterDataHandler handler) {
1607   if (parser != NULL)
1608     parser->m_characterDataHandler = handler;
1609 }
1610 
1611 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1612 XML_SetProcessingInstructionHandler(XML_Parser parser,
1613                                     XML_ProcessingInstructionHandler handler) {
1614   if (parser != NULL)
1615     parser->m_processingInstructionHandler = handler;
1616 }
1617 
1618 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1619 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1620   if (parser != NULL)
1621     parser->m_commentHandler = handler;
1622 }
1623 
1624 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1625 XML_SetCdataSectionHandler(XML_Parser parser,
1626                            XML_StartCdataSectionHandler start,
1627                            XML_EndCdataSectionHandler end) {
1628   if (parser == NULL)
1629     return;
1630   parser->m_startCdataSectionHandler = start;
1631   parser->m_endCdataSectionHandler = end;
1632 }
1633 
1634 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1635 XML_SetStartCdataSectionHandler(XML_Parser parser,
1636                                 XML_StartCdataSectionHandler start) {
1637   if (parser != NULL)
1638     parser->m_startCdataSectionHandler = start;
1639 }
1640 
1641 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1642 XML_SetEndCdataSectionHandler(XML_Parser parser,
1643                               XML_EndCdataSectionHandler end) {
1644   if (parser != NULL)
1645     parser->m_endCdataSectionHandler = end;
1646 }
1647 
1648 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1649 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1650   if (parser == NULL)
1651     return;
1652   parser->m_defaultHandler = handler;
1653   parser->m_defaultExpandInternalEntities = XML_FALSE;
1654 }
1655 
1656 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1657 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1658   if (parser == NULL)
1659     return;
1660   parser->m_defaultHandler = handler;
1661   parser->m_defaultExpandInternalEntities = XML_TRUE;
1662 }
1663 
1664 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1665 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1666                           XML_EndDoctypeDeclHandler end) {
1667   if (parser == NULL)
1668     return;
1669   parser->m_startDoctypeDeclHandler = start;
1670   parser->m_endDoctypeDeclHandler = end;
1671 }
1672 
1673 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1674 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1675                                XML_StartDoctypeDeclHandler start) {
1676   if (parser != NULL)
1677     parser->m_startDoctypeDeclHandler = start;
1678 }
1679 
1680 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1681 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1682   if (parser != NULL)
1683     parser->m_endDoctypeDeclHandler = end;
1684 }
1685 
1686 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1687 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1688                                  XML_UnparsedEntityDeclHandler handler) {
1689   if (parser != NULL)
1690     parser->m_unparsedEntityDeclHandler = handler;
1691 }
1692 
1693 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1694 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1695   if (parser != NULL)
1696     parser->m_notationDeclHandler = handler;
1697 }
1698 
1699 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1700 XML_SetNamespaceDeclHandler(XML_Parser parser,
1701                             XML_StartNamespaceDeclHandler start,
1702                             XML_EndNamespaceDeclHandler end) {
1703   if (parser == NULL)
1704     return;
1705   parser->m_startNamespaceDeclHandler = start;
1706   parser->m_endNamespaceDeclHandler = end;
1707 }
1708 
1709 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1710 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1711                                  XML_StartNamespaceDeclHandler start) {
1712   if (parser != NULL)
1713     parser->m_startNamespaceDeclHandler = start;
1714 }
1715 
1716 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1717 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1718                                XML_EndNamespaceDeclHandler end) {
1719   if (parser != NULL)
1720     parser->m_endNamespaceDeclHandler = end;
1721 }
1722 
1723 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1724 XML_SetNotStandaloneHandler(XML_Parser parser,
1725                             XML_NotStandaloneHandler handler) {
1726   if (parser != NULL)
1727     parser->m_notStandaloneHandler = handler;
1728 }
1729 
1730 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1731 XML_SetExternalEntityRefHandler(XML_Parser parser,
1732                                 XML_ExternalEntityRefHandler handler) {
1733   if (parser != NULL)
1734     parser->m_externalEntityRefHandler = handler;
1735 }
1736 
1737 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1738 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1739   if (parser == NULL)
1740     return;
1741   if (arg)
1742     parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1743   else
1744     parser->m_externalEntityRefHandlerArg = parser;
1745 }
1746 
1747 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1748 XML_SetSkippedEntityHandler(XML_Parser parser,
1749                             XML_SkippedEntityHandler handler) {
1750   if (parser != NULL)
1751     parser->m_skippedEntityHandler = handler;
1752 }
1753 
1754 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1755 XML_SetUnknownEncodingHandler(XML_Parser parser,
1756                               XML_UnknownEncodingHandler handler, void *data) {
1757   if (parser == NULL)
1758     return;
1759   parser->m_unknownEncodingHandler = handler;
1760   parser->m_unknownEncodingHandlerData = data;
1761 }
1762 
1763 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1764 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1765   if (parser != NULL)
1766     parser->m_elementDeclHandler = eldecl;
1767 }
1768 
1769 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1770 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1771   if (parser != NULL)
1772     parser->m_attlistDeclHandler = attdecl;
1773 }
1774 
1775 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1776 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1777   if (parser != NULL)
1778     parser->m_entityDeclHandler = handler;
1779 }
1780 
1781 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1782 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1783   if (parser != NULL)
1784     parser->m_xmlDeclHandler = handler;
1785 }
1786 
1787 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1788 XML_SetParamEntityParsing(XML_Parser parser,
1789                           enum XML_ParamEntityParsing peParsing) {
1790   if (parser == NULL)
1791     return 0;
1792   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1793   if (parser->m_parsingStatus.parsing == XML_PARSING
1794       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1795     return 0;
1796 #ifdef XML_DTD
1797   parser->m_paramEntityParsing = peParsing;
1798   return 1;
1799 #else
1800   return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1801 #endif
1802 }
1803 
1804 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1805 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1806   if (parser == NULL)
1807     return 0;
1808   if (parser->m_parentParser)
1809     return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1810   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1811   if (parser->m_parsingStatus.parsing == XML_PARSING
1812       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1813     return 0;
1814   parser->m_hash_secret_salt = hash_salt;
1815   return 1;
1816 }
1817 
1818 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1819 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1820   if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1821     if (parser != NULL)
1822       parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1823     return XML_STATUS_ERROR;
1824   }
1825   switch (parser->m_parsingStatus.parsing) {
1826   case XML_SUSPENDED:
1827     parser->m_errorCode = XML_ERROR_SUSPENDED;
1828     return XML_STATUS_ERROR;
1829   case XML_FINISHED:
1830     parser->m_errorCode = XML_ERROR_FINISHED;
1831     return XML_STATUS_ERROR;
1832   case XML_INITIALIZED:
1833     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1834       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1835       return XML_STATUS_ERROR;
1836     }
1837     /* fall through */
1838   default:
1839     parser->m_parsingStatus.parsing = XML_PARSING;
1840   }
1841 
1842   if (len == 0) {
1843     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1844     if (! isFinal)
1845       return XML_STATUS_OK;
1846     parser->m_positionPtr = parser->m_bufferPtr;
1847     parser->m_parseEndPtr = parser->m_bufferEnd;
1848 
1849     /* If data are left over from last buffer, and we now know that these
1850        data are the final chunk of input, then we have to check them again
1851        to detect errors based on that fact.
1852     */
1853     parser->m_errorCode
1854         = parser->m_processor(parser, parser->m_bufferPtr,
1855                               parser->m_parseEndPtr, &parser->m_bufferPtr);
1856 
1857     if (parser->m_errorCode == XML_ERROR_NONE) {
1858       switch (parser->m_parsingStatus.parsing) {
1859       case XML_SUSPENDED:
1860         /* It is hard to be certain, but it seems that this case
1861          * cannot occur.  This code is cleaning up a previous parse
1862          * with no new data (since len == 0).  Changing the parsing
1863          * state requires getting to execute a handler function, and
1864          * there doesn't seem to be an opportunity for that while in
1865          * this circumstance.
1866          *
1867          * Given the uncertainty, we retain the code but exclude it
1868          * from coverage tests.
1869          *
1870          * LCOV_EXCL_START
1871          */
1872         XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1873                           parser->m_bufferPtr, &parser->m_position);
1874         parser->m_positionPtr = parser->m_bufferPtr;
1875         return XML_STATUS_SUSPENDED;
1876         /* LCOV_EXCL_STOP */
1877       case XML_INITIALIZED:
1878       case XML_PARSING:
1879         parser->m_parsingStatus.parsing = XML_FINISHED;
1880         /* fall through */
1881       default:
1882         return XML_STATUS_OK;
1883       }
1884     }
1885     parser->m_eventEndPtr = parser->m_eventPtr;
1886     parser->m_processor = errorProcessor;
1887     return XML_STATUS_ERROR;
1888   }
1889 #ifndef XML_CONTEXT_BYTES
1890   else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1891     const char *end;
1892     int nLeftOver;
1893     enum XML_Status result;
1894     /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1895     if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1896       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1897       parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1898       parser->m_processor = errorProcessor;
1899       return XML_STATUS_ERROR;
1900     }
1901     parser->m_parseEndByteIndex += len;
1902     parser->m_positionPtr = s;
1903     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1904 
1905     parser->m_errorCode
1906         = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1907 
1908     if (parser->m_errorCode != XML_ERROR_NONE) {
1909       parser->m_eventEndPtr = parser->m_eventPtr;
1910       parser->m_processor = errorProcessor;
1911       return XML_STATUS_ERROR;
1912     } else {
1913       switch (parser->m_parsingStatus.parsing) {
1914       case XML_SUSPENDED:
1915         result = XML_STATUS_SUSPENDED;
1916         break;
1917       case XML_INITIALIZED:
1918       case XML_PARSING:
1919         if (isFinal) {
1920           parser->m_parsingStatus.parsing = XML_FINISHED;
1921           return XML_STATUS_OK;
1922         }
1923       /* fall through */
1924       default:
1925         result = XML_STATUS_OK;
1926       }
1927     }
1928 
1929     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1930                       &parser->m_position);
1931     nLeftOver = s + len - end;
1932     if (nLeftOver) {
1933       if (parser->m_buffer == NULL
1934           || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1935         /* avoid _signed_ integer overflow */
1936         char *temp = NULL;
1937         const int bytesToAllocate = (int)((unsigned)len * 2U);
1938         if (bytesToAllocate > 0) {
1939           temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1940         }
1941         if (temp == NULL) {
1942           parser->m_errorCode = XML_ERROR_NO_MEMORY;
1943           parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1944           parser->m_processor = errorProcessor;
1945           return XML_STATUS_ERROR;
1946         }
1947         parser->m_buffer = temp;
1948         parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1949       }
1950       memcpy(parser->m_buffer, end, nLeftOver);
1951     }
1952     parser->m_bufferPtr = parser->m_buffer;
1953     parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1954     parser->m_positionPtr = parser->m_bufferPtr;
1955     parser->m_parseEndPtr = parser->m_bufferEnd;
1956     parser->m_eventPtr = parser->m_bufferPtr;
1957     parser->m_eventEndPtr = parser->m_bufferPtr;
1958     return result;
1959   }
1960 #endif /* not defined XML_CONTEXT_BYTES */
1961   else {
1962     void *buff = XML_GetBuffer(parser, len);
1963     if (buff == NULL)
1964       return XML_STATUS_ERROR;
1965     else {
1966       memcpy(buff, s, len);
1967       return XML_ParseBuffer(parser, len, isFinal);
1968     }
1969   }
1970 }
1971 
1972 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)1973 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
1974   const char *start;
1975   enum XML_Status result = XML_STATUS_OK;
1976 
1977   if (parser == NULL)
1978     return XML_STATUS_ERROR;
1979   switch (parser->m_parsingStatus.parsing) {
1980   case XML_SUSPENDED:
1981     parser->m_errorCode = XML_ERROR_SUSPENDED;
1982     return XML_STATUS_ERROR;
1983   case XML_FINISHED:
1984     parser->m_errorCode = XML_ERROR_FINISHED;
1985     return XML_STATUS_ERROR;
1986   case XML_INITIALIZED:
1987     /* Has someone called XML_GetBuffer successfully before? */
1988     if (! parser->m_bufferPtr) {
1989       parser->m_errorCode = XML_ERROR_NO_BUFFER;
1990       return XML_STATUS_ERROR;
1991     }
1992 
1993     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1994       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1995       return XML_STATUS_ERROR;
1996     }
1997     /* fall through */
1998   default:
1999     parser->m_parsingStatus.parsing = XML_PARSING;
2000   }
2001 
2002   start = parser->m_bufferPtr;
2003   parser->m_positionPtr = start;
2004   parser->m_bufferEnd += len;
2005   parser->m_parseEndPtr = parser->m_bufferEnd;
2006   parser->m_parseEndByteIndex += len;
2007   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2008 
2009   parser->m_errorCode = parser->m_processor(
2010       parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
2011 
2012   if (parser->m_errorCode != XML_ERROR_NONE) {
2013     parser->m_eventEndPtr = parser->m_eventPtr;
2014     parser->m_processor = errorProcessor;
2015     return XML_STATUS_ERROR;
2016   } else {
2017     switch (parser->m_parsingStatus.parsing) {
2018     case XML_SUSPENDED:
2019       result = XML_STATUS_SUSPENDED;
2020       break;
2021     case XML_INITIALIZED:
2022     case XML_PARSING:
2023       if (isFinal) {
2024         parser->m_parsingStatus.parsing = XML_FINISHED;
2025         return result;
2026       }
2027     default:; /* should not happen */
2028     }
2029   }
2030 
2031   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2032                     parser->m_bufferPtr, &parser->m_position);
2033   parser->m_positionPtr = parser->m_bufferPtr;
2034   return result;
2035 }
2036 
2037 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2038 XML_GetBuffer(XML_Parser parser, int len) {
2039   if (parser == NULL)
2040     return NULL;
2041   if (len < 0) {
2042     parser->m_errorCode = XML_ERROR_NO_MEMORY;
2043     return NULL;
2044   }
2045   switch (parser->m_parsingStatus.parsing) {
2046   case XML_SUSPENDED:
2047     parser->m_errorCode = XML_ERROR_SUSPENDED;
2048     return NULL;
2049   case XML_FINISHED:
2050     parser->m_errorCode = XML_ERROR_FINISHED;
2051     return NULL;
2052   default:;
2053   }
2054 
2055   if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
2056 #ifdef XML_CONTEXT_BYTES
2057     int keep;
2058 #endif /* defined XML_CONTEXT_BYTES */
2059     /* Do not invoke signed arithmetic overflow: */
2060     int neededSize = (int)((unsigned)len
2061                            + (unsigned)EXPAT_SAFE_PTR_DIFF(
2062                                parser->m_bufferEnd, parser->m_bufferPtr));
2063     if (neededSize < 0) {
2064       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2065       return NULL;
2066     }
2067 #ifdef XML_CONTEXT_BYTES
2068     keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2069     if (keep > XML_CONTEXT_BYTES)
2070       keep = XML_CONTEXT_BYTES;
2071     /* Detect and prevent integer overflow */
2072     if (keep > INT_MAX - neededSize) {
2073       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2074       return NULL;
2075     }
2076     neededSize += keep;
2077 #endif /* defined XML_CONTEXT_BYTES */
2078     if (neededSize
2079         <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2080 #ifdef XML_CONTEXT_BYTES
2081       if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2082         int offset
2083             = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2084               - keep;
2085         /* The buffer pointers cannot be NULL here; we have at least some bytes
2086          * in the buffer */
2087         memmove(parser->m_buffer, &parser->m_buffer[offset],
2088                 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2089         parser->m_bufferEnd -= offset;
2090         parser->m_bufferPtr -= offset;
2091       }
2092 #else
2093       if (parser->m_buffer && parser->m_bufferPtr) {
2094         memmove(parser->m_buffer, parser->m_bufferPtr,
2095                 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2096         parser->m_bufferEnd
2097             = parser->m_buffer
2098               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2099         parser->m_bufferPtr = parser->m_buffer;
2100       }
2101 #endif /* not defined XML_CONTEXT_BYTES */
2102     } else {
2103       char *newBuf;
2104       int bufferSize
2105           = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
2106       if (bufferSize == 0)
2107         bufferSize = INIT_BUFFER_SIZE;
2108       do {
2109         /* Do not invoke signed arithmetic overflow: */
2110         bufferSize = (int)(2U * (unsigned)bufferSize);
2111       } while (bufferSize < neededSize && bufferSize > 0);
2112       if (bufferSize <= 0) {
2113         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2114         return NULL;
2115       }
2116       newBuf = (char *)MALLOC(parser, bufferSize);
2117       if (newBuf == 0) {
2118         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2119         return NULL;
2120       }
2121       parser->m_bufferLim = newBuf + bufferSize;
2122 #ifdef XML_CONTEXT_BYTES
2123       if (parser->m_bufferPtr) {
2124         memcpy(newBuf, &parser->m_bufferPtr[-keep],
2125                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2126                    + keep);
2127         FREE(parser, parser->m_buffer);
2128         parser->m_buffer = newBuf;
2129         parser->m_bufferEnd
2130             = parser->m_buffer
2131               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2132               + keep;
2133         parser->m_bufferPtr = parser->m_buffer + keep;
2134       } else {
2135         /* This must be a brand new buffer with no data in it yet */
2136         parser->m_bufferEnd = newBuf;
2137         parser->m_bufferPtr = parser->m_buffer = newBuf;
2138       }
2139 #else
2140       if (parser->m_bufferPtr) {
2141         memcpy(newBuf, parser->m_bufferPtr,
2142                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2143         FREE(parser, parser->m_buffer);
2144         parser->m_bufferEnd
2145             = newBuf
2146               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2147       } else {
2148         /* This must be a brand new buffer with no data in it yet */
2149         parser->m_bufferEnd = newBuf;
2150       }
2151       parser->m_bufferPtr = parser->m_buffer = newBuf;
2152 #endif /* not defined XML_CONTEXT_BYTES */
2153     }
2154     parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2155     parser->m_positionPtr = NULL;
2156   }
2157   return parser->m_bufferEnd;
2158 }
2159 
2160 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2161 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2162   if (parser == NULL)
2163     return XML_STATUS_ERROR;
2164   switch (parser->m_parsingStatus.parsing) {
2165   case XML_SUSPENDED:
2166     if (resumable) {
2167       parser->m_errorCode = XML_ERROR_SUSPENDED;
2168       return XML_STATUS_ERROR;
2169     }
2170     parser->m_parsingStatus.parsing = XML_FINISHED;
2171     break;
2172   case XML_FINISHED:
2173     parser->m_errorCode = XML_ERROR_FINISHED;
2174     return XML_STATUS_ERROR;
2175   default:
2176     if (resumable) {
2177 #ifdef XML_DTD
2178       if (parser->m_isParamEntity) {
2179         parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2180         return XML_STATUS_ERROR;
2181       }
2182 #endif
2183       parser->m_parsingStatus.parsing = XML_SUSPENDED;
2184     } else
2185       parser->m_parsingStatus.parsing = XML_FINISHED;
2186   }
2187   return XML_STATUS_OK;
2188 }
2189 
2190 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2191 XML_ResumeParser(XML_Parser parser) {
2192   enum XML_Status result = XML_STATUS_OK;
2193 
2194   if (parser == NULL)
2195     return XML_STATUS_ERROR;
2196   if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2197     parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2198     return XML_STATUS_ERROR;
2199   }
2200   parser->m_parsingStatus.parsing = XML_PARSING;
2201 
2202   parser->m_errorCode = parser->m_processor(
2203       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2204 
2205   if (parser->m_errorCode != XML_ERROR_NONE) {
2206     parser->m_eventEndPtr = parser->m_eventPtr;
2207     parser->m_processor = errorProcessor;
2208     return XML_STATUS_ERROR;
2209   } else {
2210     switch (parser->m_parsingStatus.parsing) {
2211     case XML_SUSPENDED:
2212       result = XML_STATUS_SUSPENDED;
2213       break;
2214     case XML_INITIALIZED:
2215     case XML_PARSING:
2216       if (parser->m_parsingStatus.finalBuffer) {
2217         parser->m_parsingStatus.parsing = XML_FINISHED;
2218         return result;
2219       }
2220     default:;
2221     }
2222   }
2223 
2224   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2225                     parser->m_bufferPtr, &parser->m_position);
2226   parser->m_positionPtr = parser->m_bufferPtr;
2227   return result;
2228 }
2229 
2230 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2231 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2232   if (parser == NULL)
2233     return;
2234   assert(status != NULL);
2235   *status = parser->m_parsingStatus;
2236 }
2237 
2238 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2239 XML_GetErrorCode(XML_Parser parser) {
2240   if (parser == NULL)
2241     return XML_ERROR_INVALID_ARGUMENT;
2242   return parser->m_errorCode;
2243 }
2244 
2245 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2246 XML_GetCurrentByteIndex(XML_Parser parser) {
2247   if (parser == NULL)
2248     return -1;
2249   if (parser->m_eventPtr)
2250     return (XML_Index)(parser->m_parseEndByteIndex
2251                        - (parser->m_parseEndPtr - parser->m_eventPtr));
2252   return -1;
2253 }
2254 
2255 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2256 XML_GetCurrentByteCount(XML_Parser parser) {
2257   if (parser == NULL)
2258     return 0;
2259   if (parser->m_eventEndPtr && parser->m_eventPtr)
2260     return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2261   return 0;
2262 }
2263 
2264 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2265 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2266 #ifdef XML_CONTEXT_BYTES
2267   if (parser == NULL)
2268     return NULL;
2269   if (parser->m_eventPtr && parser->m_buffer) {
2270     if (offset != NULL)
2271       *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2272     if (size != NULL)
2273       *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2274     return parser->m_buffer;
2275   }
2276 #else
2277   (void)parser;
2278   (void)offset;
2279   (void)size;
2280 #endif /* defined XML_CONTEXT_BYTES */
2281   return (const char *)0;
2282 }
2283 
2284 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2285 XML_GetCurrentLineNumber(XML_Parser parser) {
2286   if (parser == NULL)
2287     return 0;
2288   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2289     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2290                       parser->m_eventPtr, &parser->m_position);
2291     parser->m_positionPtr = parser->m_eventPtr;
2292   }
2293   return parser->m_position.lineNumber + 1;
2294 }
2295 
2296 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2297 XML_GetCurrentColumnNumber(XML_Parser parser) {
2298   if (parser == NULL)
2299     return 0;
2300   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2301     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2302                       parser->m_eventPtr, &parser->m_position);
2303     parser->m_positionPtr = parser->m_eventPtr;
2304   }
2305   return parser->m_position.columnNumber;
2306 }
2307 
2308 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2309 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2310   if (parser != NULL)
2311     FREE(parser, model);
2312 }
2313 
2314 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2315 XML_MemMalloc(XML_Parser parser, size_t size) {
2316   if (parser == NULL)
2317     return NULL;
2318   return MALLOC(parser, size);
2319 }
2320 
2321 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2322 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2323   if (parser == NULL)
2324     return NULL;
2325   return REALLOC(parser, ptr, size);
2326 }
2327 
2328 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2329 XML_MemFree(XML_Parser parser, void *ptr) {
2330   if (parser != NULL)
2331     FREE(parser, ptr);
2332 }
2333 
2334 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2335 XML_DefaultCurrent(XML_Parser parser) {
2336   if (parser == NULL)
2337     return;
2338   if (parser->m_defaultHandler) {
2339     if (parser->m_openInternalEntities)
2340       reportDefault(parser, parser->m_internalEncoding,
2341                     parser->m_openInternalEntities->internalEventPtr,
2342                     parser->m_openInternalEntities->internalEventEndPtr);
2343     else
2344       reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2345                     parser->m_eventEndPtr);
2346   }
2347 }
2348 
2349 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2350 XML_ErrorString(enum XML_Error code) {
2351   switch (code) {
2352   case XML_ERROR_NONE:
2353     return NULL;
2354   case XML_ERROR_NO_MEMORY:
2355     return XML_L("out of memory");
2356   case XML_ERROR_SYNTAX:
2357     return XML_L("syntax error");
2358   case XML_ERROR_NO_ELEMENTS:
2359     return XML_L("no element found");
2360   case XML_ERROR_INVALID_TOKEN:
2361     return XML_L("not well-formed (invalid token)");
2362   case XML_ERROR_UNCLOSED_TOKEN:
2363     return XML_L("unclosed token");
2364   case XML_ERROR_PARTIAL_CHAR:
2365     return XML_L("partial character");
2366   case XML_ERROR_TAG_MISMATCH:
2367     return XML_L("mismatched tag");
2368   case XML_ERROR_DUPLICATE_ATTRIBUTE:
2369     return XML_L("duplicate attribute");
2370   case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2371     return XML_L("junk after document element");
2372   case XML_ERROR_PARAM_ENTITY_REF:
2373     return XML_L("illegal parameter entity reference");
2374   case XML_ERROR_UNDEFINED_ENTITY:
2375     return XML_L("undefined entity");
2376   case XML_ERROR_RECURSIVE_ENTITY_REF:
2377     return XML_L("recursive entity reference");
2378   case XML_ERROR_ASYNC_ENTITY:
2379     return XML_L("asynchronous entity");
2380   case XML_ERROR_BAD_CHAR_REF:
2381     return XML_L("reference to invalid character number");
2382   case XML_ERROR_BINARY_ENTITY_REF:
2383     return XML_L("reference to binary entity");
2384   case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2385     return XML_L("reference to external entity in attribute");
2386   case XML_ERROR_MISPLACED_XML_PI:
2387     return XML_L("XML or text declaration not at start of entity");
2388   case XML_ERROR_UNKNOWN_ENCODING:
2389     return XML_L("unknown encoding");
2390   case XML_ERROR_INCORRECT_ENCODING:
2391     return XML_L("encoding specified in XML declaration is incorrect");
2392   case XML_ERROR_UNCLOSED_CDATA_SECTION:
2393     return XML_L("unclosed CDATA section");
2394   case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2395     return XML_L("error in processing external entity reference");
2396   case XML_ERROR_NOT_STANDALONE:
2397     return XML_L("document is not standalone");
2398   case XML_ERROR_UNEXPECTED_STATE:
2399     return XML_L("unexpected parser state - please send a bug report");
2400   case XML_ERROR_ENTITY_DECLARED_IN_PE:
2401     return XML_L("entity declared in parameter entity");
2402   case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2403     return XML_L("requested feature requires XML_DTD support in Expat");
2404   case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2405     return XML_L("cannot change setting once parsing has begun");
2406   /* Added in 1.95.7. */
2407   case XML_ERROR_UNBOUND_PREFIX:
2408     return XML_L("unbound prefix");
2409   /* Added in 1.95.8. */
2410   case XML_ERROR_UNDECLARING_PREFIX:
2411     return XML_L("must not undeclare prefix");
2412   case XML_ERROR_INCOMPLETE_PE:
2413     return XML_L("incomplete markup in parameter entity");
2414   case XML_ERROR_XML_DECL:
2415     return XML_L("XML declaration not well-formed");
2416   case XML_ERROR_TEXT_DECL:
2417     return XML_L("text declaration not well-formed");
2418   case XML_ERROR_PUBLICID:
2419     return XML_L("illegal character(s) in public id");
2420   case XML_ERROR_SUSPENDED:
2421     return XML_L("parser suspended");
2422   case XML_ERROR_NOT_SUSPENDED:
2423     return XML_L("parser not suspended");
2424   case XML_ERROR_ABORTED:
2425     return XML_L("parsing aborted");
2426   case XML_ERROR_FINISHED:
2427     return XML_L("parsing finished");
2428   case XML_ERROR_SUSPEND_PE:
2429     return XML_L("cannot suspend in external parameter entity");
2430   /* Added in 2.0.0. */
2431   case XML_ERROR_RESERVED_PREFIX_XML:
2432     return XML_L(
2433         "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2434   case XML_ERROR_RESERVED_PREFIX_XMLNS:
2435     return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2436   case XML_ERROR_RESERVED_NAMESPACE_URI:
2437     return XML_L(
2438         "prefix must not be bound to one of the reserved namespace names");
2439   /* Added in 2.2.5. */
2440   case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2441     return XML_L("invalid argument");
2442     /* Added in 2.3.0. */
2443   case XML_ERROR_NO_BUFFER:
2444     return XML_L(
2445         "a successful prior call to function XML_GetBuffer is required");
2446   /* Added in 2.4.0. */
2447   case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2448     return XML_L(
2449         "limit on input amplification factor (from DTD and entities) breached");
2450   }
2451   return NULL;
2452 }
2453 
2454 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2455 XML_ExpatVersion(void) {
2456   /* V1 is used to string-ize the version number. However, it would
2457      string-ize the actual version macro *names* unless we get them
2458      substituted before being passed to V1. CPP is defined to expand
2459      a macro, then rescan for more expansions. Thus, we use V2 to expand
2460      the version macros, then CPP will expand the resulting V1() macro
2461      with the correct numerals. */
2462   /* ### I'm assuming cpp is portable in this respect... */
2463 
2464 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2465 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2466 
2467   return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2468 
2469 #undef V1
2470 #undef V2
2471 }
2472 
2473 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2474 XML_ExpatVersionInfo(void) {
2475   XML_Expat_Version version;
2476 
2477   version.major = XML_MAJOR_VERSION;
2478   version.minor = XML_MINOR_VERSION;
2479   version.micro = XML_MICRO_VERSION;
2480 
2481   return version;
2482 }
2483 
2484 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2485 XML_GetFeatureList(void) {
2486   static const XML_Feature features[] = {
2487       {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2488        sizeof(XML_Char)},
2489       {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2490        sizeof(XML_LChar)},
2491 #ifdef XML_UNICODE
2492       {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2493 #endif
2494 #ifdef XML_UNICODE_WCHAR_T
2495       {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2496 #endif
2497 #ifdef XML_DTD
2498       {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2499 #endif
2500 #ifdef XML_CONTEXT_BYTES
2501       {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2502        XML_CONTEXT_BYTES},
2503 #endif
2504 #ifdef XML_MIN_SIZE
2505       {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2506 #endif
2507 #ifdef XML_NS
2508       {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2509 #endif
2510 #ifdef XML_LARGE_SIZE
2511       {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2512 #endif
2513 #ifdef XML_ATTR_INFO
2514       {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2515 #endif
2516 #ifdef XML_DTD
2517       /* Added in Expat 2.4.0. */
2518       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2519        XML_L("XML_BLAP_MAX_AMP"),
2520        (long int)
2521            EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2522       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2523        XML_L("XML_BLAP_ACT_THRES"),
2524        EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2525 #endif
2526       {XML_FEATURE_END, NULL, 0}};
2527 
2528   return features;
2529 }
2530 
2531 #ifdef XML_DTD
2532 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2533 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2534     XML_Parser parser, float maximumAmplificationFactor) {
2535   if ((parser == NULL) || (parser->m_parentParser != NULL)
2536       || isnan(maximumAmplificationFactor)
2537       || (maximumAmplificationFactor < 1.0f)) {
2538     return XML_FALSE;
2539   }
2540   parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2541   return XML_TRUE;
2542 }
2543 
2544 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2545 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2546     XML_Parser parser, unsigned long long activationThresholdBytes) {
2547   if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2548     return XML_FALSE;
2549   }
2550   parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2551   return XML_TRUE;
2552 }
2553 #endif /* XML_DTD */
2554 
2555 /* Initially tag->rawName always points into the parse buffer;
2556    for those TAG instances opened while the current parse buffer was
2557    processed, and not yet closed, we need to store tag->rawName in a more
2558    permanent location, since the parse buffer is about to be discarded.
2559 */
2560 static XML_Bool
storeRawNames(XML_Parser parser)2561 storeRawNames(XML_Parser parser) {
2562   TAG *tag = parser->m_tagStack;
2563   while (tag) {
2564     int bufSize;
2565     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2566     size_t rawNameLen;
2567     char *rawNameBuf = tag->buf + nameLen;
2568     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
2569        at the first entry that has already been copied; everything
2570        below it in the stack is already been accounted for in a
2571        previous call to this function.
2572     */
2573     if (tag->rawName == rawNameBuf)
2574       break;
2575     /* For re-use purposes we need to ensure that the
2576        size of tag->buf is a multiple of sizeof(XML_Char).
2577     */
2578     rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2579     /* Detect and prevent integer overflow. */
2580     if (rawNameLen > (size_t)INT_MAX - nameLen)
2581       return XML_FALSE;
2582     bufSize = nameLen + (int)rawNameLen;
2583     if (bufSize > tag->bufEnd - tag->buf) {
2584       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2585       if (temp == NULL)
2586         return XML_FALSE;
2587       /* if tag->name.str points to tag->buf (only when namespace
2588          processing is off) then we have to update it
2589       */
2590       if (tag->name.str == (XML_Char *)tag->buf)
2591         tag->name.str = (XML_Char *)temp;
2592       /* if tag->name.localPart is set (when namespace processing is on)
2593          then update it as well, since it will always point into tag->buf
2594       */
2595       if (tag->name.localPart)
2596         tag->name.localPart
2597             = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2598       tag->buf = temp;
2599       tag->bufEnd = temp + bufSize;
2600       rawNameBuf = temp + nameLen;
2601     }
2602     memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2603     tag->rawName = rawNameBuf;
2604     tag = tag->parent;
2605   }
2606   return XML_TRUE;
2607 }
2608 
2609 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2610 contentProcessor(XML_Parser parser, const char *start, const char *end,
2611                  const char **endPtr) {
2612   enum XML_Error result = doContent(
2613       parser, 0, parser->m_encoding, start, end, endPtr,
2614       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2615   if (result == XML_ERROR_NONE) {
2616     if (! storeRawNames(parser))
2617       return XML_ERROR_NO_MEMORY;
2618   }
2619   return result;
2620 }
2621 
2622 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2623 externalEntityInitProcessor(XML_Parser parser, const char *start,
2624                             const char *end, const char **endPtr) {
2625   enum XML_Error result = initializeEncoding(parser);
2626   if (result != XML_ERROR_NONE)
2627     return result;
2628   parser->m_processor = externalEntityInitProcessor2;
2629   return externalEntityInitProcessor2(parser, start, end, endPtr);
2630 }
2631 
2632 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2633 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2634                              const char *end, const char **endPtr) {
2635   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2636   int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2637   switch (tok) {
2638   case XML_TOK_BOM:
2639 #ifdef XML_DTD
2640     if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2641                                   XML_ACCOUNT_DIRECT)) {
2642       accountingOnAbort(parser);
2643       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2644     }
2645 #endif /* XML_DTD */
2646 
2647     /* If we are at the end of the buffer, this would cause the next stage,
2648        i.e. externalEntityInitProcessor3, to pass control directly to
2649        doContent (by detecting XML_TOK_NONE) without processing any xml text
2650        declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2651     */
2652     if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2653       *endPtr = next;
2654       return XML_ERROR_NONE;
2655     }
2656     start = next;
2657     break;
2658   case XML_TOK_PARTIAL:
2659     if (! parser->m_parsingStatus.finalBuffer) {
2660       *endPtr = start;
2661       return XML_ERROR_NONE;
2662     }
2663     parser->m_eventPtr = start;
2664     return XML_ERROR_UNCLOSED_TOKEN;
2665   case XML_TOK_PARTIAL_CHAR:
2666     if (! parser->m_parsingStatus.finalBuffer) {
2667       *endPtr = start;
2668       return XML_ERROR_NONE;
2669     }
2670     parser->m_eventPtr = start;
2671     return XML_ERROR_PARTIAL_CHAR;
2672   }
2673   parser->m_processor = externalEntityInitProcessor3;
2674   return externalEntityInitProcessor3(parser, start, end, endPtr);
2675 }
2676 
2677 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2678 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2679                              const char *end, const char **endPtr) {
2680   int tok;
2681   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2682   parser->m_eventPtr = start;
2683   tok = XmlContentTok(parser->m_encoding, start, end, &next);
2684   /* Note: These bytes are accounted later in:
2685            - processXmlDecl
2686            - externalEntityContentProcessor
2687   */
2688   parser->m_eventEndPtr = next;
2689 
2690   switch (tok) {
2691   case XML_TOK_XML_DECL: {
2692     enum XML_Error result;
2693     result = processXmlDecl(parser, 1, start, next);
2694     if (result != XML_ERROR_NONE)
2695       return result;
2696     switch (parser->m_parsingStatus.parsing) {
2697     case XML_SUSPENDED:
2698       *endPtr = next;
2699       return XML_ERROR_NONE;
2700     case XML_FINISHED:
2701       return XML_ERROR_ABORTED;
2702     default:
2703       start = next;
2704     }
2705   } break;
2706   case XML_TOK_PARTIAL:
2707     if (! parser->m_parsingStatus.finalBuffer) {
2708       *endPtr = start;
2709       return XML_ERROR_NONE;
2710     }
2711     return XML_ERROR_UNCLOSED_TOKEN;
2712   case XML_TOK_PARTIAL_CHAR:
2713     if (! parser->m_parsingStatus.finalBuffer) {
2714       *endPtr = start;
2715       return XML_ERROR_NONE;
2716     }
2717     return XML_ERROR_PARTIAL_CHAR;
2718   }
2719   parser->m_processor = externalEntityContentProcessor;
2720   parser->m_tagLevel = 1;
2721   return externalEntityContentProcessor(parser, start, end, endPtr);
2722 }
2723 
2724 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2725 externalEntityContentProcessor(XML_Parser parser, const char *start,
2726                                const char *end, const char **endPtr) {
2727   enum XML_Error result
2728       = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2729                   (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2730                   XML_ACCOUNT_ENTITY_EXPANSION);
2731   if (result == XML_ERROR_NONE) {
2732     if (! storeRawNames(parser))
2733       return XML_ERROR_NO_MEMORY;
2734   }
2735   return result;
2736 }
2737 
2738 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2739 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2740           const char *s, const char *end, const char **nextPtr,
2741           XML_Bool haveMore, enum XML_Account account) {
2742   /* save one level of indirection */
2743   DTD *const dtd = parser->m_dtd;
2744 
2745   const char **eventPP;
2746   const char **eventEndPP;
2747   if (enc == parser->m_encoding) {
2748     eventPP = &parser->m_eventPtr;
2749     eventEndPP = &parser->m_eventEndPtr;
2750   } else {
2751     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2752     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2753   }
2754   *eventPP = s;
2755 
2756   for (;;) {
2757     const char *next = s; /* XmlContentTok doesn't always set the last arg */
2758     int tok = XmlContentTok(enc, s, end, &next);
2759 #ifdef XML_DTD
2760     const char *accountAfter
2761         = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2762               ? (haveMore ? s /* i.e. 0 bytes */ : end)
2763               : next;
2764     if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2765                                   account)) {
2766       accountingOnAbort(parser);
2767       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2768     }
2769 #endif
2770     *eventEndPP = next;
2771     switch (tok) {
2772     case XML_TOK_TRAILING_CR:
2773       if (haveMore) {
2774         *nextPtr = s;
2775         return XML_ERROR_NONE;
2776       }
2777       *eventEndPP = end;
2778       if (parser->m_characterDataHandler) {
2779         XML_Char c = 0xA;
2780         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2781       } else if (parser->m_defaultHandler)
2782         reportDefault(parser, enc, s, end);
2783       /* We are at the end of the final buffer, should we check for
2784          XML_SUSPENDED, XML_FINISHED?
2785       */
2786       if (startTagLevel == 0)
2787         return XML_ERROR_NO_ELEMENTS;
2788       if (parser->m_tagLevel != startTagLevel)
2789         return XML_ERROR_ASYNC_ENTITY;
2790       *nextPtr = end;
2791       return XML_ERROR_NONE;
2792     case XML_TOK_NONE:
2793       if (haveMore) {
2794         *nextPtr = s;
2795         return XML_ERROR_NONE;
2796       }
2797       if (startTagLevel > 0) {
2798         if (parser->m_tagLevel != startTagLevel)
2799           return XML_ERROR_ASYNC_ENTITY;
2800         *nextPtr = s;
2801         return XML_ERROR_NONE;
2802       }
2803       return XML_ERROR_NO_ELEMENTS;
2804     case XML_TOK_INVALID:
2805       *eventPP = next;
2806       return XML_ERROR_INVALID_TOKEN;
2807     case XML_TOK_PARTIAL:
2808       if (haveMore) {
2809         *nextPtr = s;
2810         return XML_ERROR_NONE;
2811       }
2812       return XML_ERROR_UNCLOSED_TOKEN;
2813     case XML_TOK_PARTIAL_CHAR:
2814       if (haveMore) {
2815         *nextPtr = s;
2816         return XML_ERROR_NONE;
2817       }
2818       return XML_ERROR_PARTIAL_CHAR;
2819     case XML_TOK_ENTITY_REF: {
2820       const XML_Char *name;
2821       ENTITY *entity;
2822       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2823           enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2824       if (ch) {
2825 #ifdef XML_DTD
2826         /* NOTE: We are replacing 4-6 characters original input for 1 character
2827          *       so there is no amplification and hence recording without
2828          *       protection. */
2829         accountingDiffTolerated(parser, tok, (char *)&ch,
2830                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2831                                 XML_ACCOUNT_ENTITY_EXPANSION);
2832 #endif /* XML_DTD */
2833         if (parser->m_characterDataHandler)
2834           parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2835         else if (parser->m_defaultHandler)
2836           reportDefault(parser, enc, s, next);
2837         break;
2838       }
2839       name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2840                              next - enc->minBytesPerChar);
2841       if (! name)
2842         return XML_ERROR_NO_MEMORY;
2843       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2844       poolDiscard(&dtd->pool);
2845       /* First, determine if a check for an existing declaration is needed;
2846          if yes, check that the entity exists, and that it is internal,
2847          otherwise call the skipped entity or default handler.
2848       */
2849       if (! dtd->hasParamEntityRefs || dtd->standalone) {
2850         if (! entity)
2851           return XML_ERROR_UNDEFINED_ENTITY;
2852         else if (! entity->is_internal)
2853           return XML_ERROR_ENTITY_DECLARED_IN_PE;
2854       } else if (! entity) {
2855         if (parser->m_skippedEntityHandler)
2856           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2857         else if (parser->m_defaultHandler)
2858           reportDefault(parser, enc, s, next);
2859         break;
2860       }
2861       if (entity->open)
2862         return XML_ERROR_RECURSIVE_ENTITY_REF;
2863       if (entity->notation)
2864         return XML_ERROR_BINARY_ENTITY_REF;
2865       if (entity->textPtr) {
2866         enum XML_Error result;
2867         if (! parser->m_defaultExpandInternalEntities) {
2868           if (parser->m_skippedEntityHandler)
2869             parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2870                                            0);
2871           else if (parser->m_defaultHandler)
2872             reportDefault(parser, enc, s, next);
2873           break;
2874         }
2875         result = processInternalEntity(parser, entity, XML_FALSE);
2876         if (result != XML_ERROR_NONE)
2877           return result;
2878       } else if (parser->m_externalEntityRefHandler) {
2879         const XML_Char *context;
2880         entity->open = XML_TRUE;
2881         context = getContext(parser);
2882         entity->open = XML_FALSE;
2883         if (! context)
2884           return XML_ERROR_NO_MEMORY;
2885         if (! parser->m_externalEntityRefHandler(
2886                 parser->m_externalEntityRefHandlerArg, context, entity->base,
2887                 entity->systemId, entity->publicId))
2888           return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2889         poolDiscard(&parser->m_tempPool);
2890       } else if (parser->m_defaultHandler)
2891         reportDefault(parser, enc, s, next);
2892       break;
2893     }
2894     case XML_TOK_START_TAG_NO_ATTS:
2895       /* fall through */
2896     case XML_TOK_START_TAG_WITH_ATTS: {
2897       TAG *tag;
2898       enum XML_Error result;
2899       XML_Char *toPtr;
2900       if (parser->m_freeTagList) {
2901         tag = parser->m_freeTagList;
2902         parser->m_freeTagList = parser->m_freeTagList->parent;
2903       } else {
2904         tag = (TAG *)MALLOC(parser, sizeof(TAG));
2905         if (! tag)
2906           return XML_ERROR_NO_MEMORY;
2907         tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2908         if (! tag->buf) {
2909           FREE(parser, tag);
2910           return XML_ERROR_NO_MEMORY;
2911         }
2912         tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2913       }
2914       tag->bindings = NULL;
2915       tag->parent = parser->m_tagStack;
2916       parser->m_tagStack = tag;
2917       tag->name.localPart = NULL;
2918       tag->name.prefix = NULL;
2919       tag->rawName = s + enc->minBytesPerChar;
2920       tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2921       ++parser->m_tagLevel;
2922       {
2923         const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2924         const char *fromPtr = tag->rawName;
2925         toPtr = (XML_Char *)tag->buf;
2926         for (;;) {
2927           int bufSize;
2928           int convLen;
2929           const enum XML_Convert_Result convert_res
2930               = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2931                            (ICHAR *)tag->bufEnd - 1);
2932           convLen = (int)(toPtr - (XML_Char *)tag->buf);
2933           if ((fromPtr >= rawNameEnd)
2934               || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2935             tag->name.strLen = convLen;
2936             break;
2937           }
2938           bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2939           {
2940             char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2941             if (temp == NULL)
2942               return XML_ERROR_NO_MEMORY;
2943             tag->buf = temp;
2944             tag->bufEnd = temp + bufSize;
2945             toPtr = (XML_Char *)temp + convLen;
2946           }
2947         }
2948       }
2949       tag->name.str = (XML_Char *)tag->buf;
2950       *toPtr = XML_T('\0');
2951       result
2952           = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
2953       if (result)
2954         return result;
2955       if (parser->m_startElementHandler)
2956         parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2957                                       (const XML_Char **)parser->m_atts);
2958       else if (parser->m_defaultHandler)
2959         reportDefault(parser, enc, s, next);
2960       poolClear(&parser->m_tempPool);
2961       break;
2962     }
2963     case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2964       /* fall through */
2965     case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2966       const char *rawName = s + enc->minBytesPerChar;
2967       enum XML_Error result;
2968       BINDING *bindings = NULL;
2969       XML_Bool noElmHandlers = XML_TRUE;
2970       TAG_NAME name;
2971       name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2972                                  rawName + XmlNameLength(enc, rawName));
2973       if (! name.str)
2974         return XML_ERROR_NO_MEMORY;
2975       poolFinish(&parser->m_tempPool);
2976       result = storeAtts(parser, enc, s, &name, &bindings,
2977                          XML_ACCOUNT_NONE /* token spans whole start tag */);
2978       if (result != XML_ERROR_NONE) {
2979         freeBindings(parser, bindings);
2980         return result;
2981       }
2982       poolFinish(&parser->m_tempPool);
2983       if (parser->m_startElementHandler) {
2984         parser->m_startElementHandler(parser->m_handlerArg, name.str,
2985                                       (const XML_Char **)parser->m_atts);
2986         noElmHandlers = XML_FALSE;
2987       }
2988       if (parser->m_endElementHandler) {
2989         if (parser->m_startElementHandler)
2990           *eventPP = *eventEndPP;
2991         parser->m_endElementHandler(parser->m_handlerArg, name.str);
2992         noElmHandlers = XML_FALSE;
2993       }
2994       if (noElmHandlers && parser->m_defaultHandler)
2995         reportDefault(parser, enc, s, next);
2996       poolClear(&parser->m_tempPool);
2997       freeBindings(parser, bindings);
2998     }
2999       if ((parser->m_tagLevel == 0)
3000           && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3001         if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3002           parser->m_processor = epilogProcessor;
3003         else
3004           return epilogProcessor(parser, next, end, nextPtr);
3005       }
3006       break;
3007     case XML_TOK_END_TAG:
3008       if (parser->m_tagLevel == startTagLevel)
3009         return XML_ERROR_ASYNC_ENTITY;
3010       else {
3011         int len;
3012         const char *rawName;
3013         TAG *tag = parser->m_tagStack;
3014         parser->m_tagStack = tag->parent;
3015         tag->parent = parser->m_freeTagList;
3016         parser->m_freeTagList = tag;
3017         rawName = s + enc->minBytesPerChar * 2;
3018         len = XmlNameLength(enc, rawName);
3019         if (len != tag->rawNameLength
3020             || memcmp(tag->rawName, rawName, len) != 0) {
3021           *eventPP = rawName;
3022           return XML_ERROR_TAG_MISMATCH;
3023         }
3024         --parser->m_tagLevel;
3025         if (parser->m_endElementHandler) {
3026           const XML_Char *localPart;
3027           const XML_Char *prefix;
3028           XML_Char *uri;
3029           localPart = tag->name.localPart;
3030           if (parser->m_ns && localPart) {
3031             /* localPart and prefix may have been overwritten in
3032                tag->name.str, since this points to the binding->uri
3033                buffer which gets re-used; so we have to add them again
3034             */
3035             uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3036             /* don't need to check for space - already done in storeAtts() */
3037             while (*localPart)
3038               *uri++ = *localPart++;
3039             prefix = (XML_Char *)tag->name.prefix;
3040             if (parser->m_ns_triplets && prefix) {
3041               *uri++ = parser->m_namespaceSeparator;
3042               while (*prefix)
3043                 *uri++ = *prefix++;
3044             }
3045             *uri = XML_T('\0');
3046           }
3047           parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3048         } else if (parser->m_defaultHandler)
3049           reportDefault(parser, enc, s, next);
3050         while (tag->bindings) {
3051           BINDING *b = tag->bindings;
3052           if (parser->m_endNamespaceDeclHandler)
3053             parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3054                                               b->prefix->name);
3055           tag->bindings = tag->bindings->nextTagBinding;
3056           b->nextTagBinding = parser->m_freeBindingList;
3057           parser->m_freeBindingList = b;
3058           b->prefix->binding = b->prevPrefixBinding;
3059         }
3060         if ((parser->m_tagLevel == 0)
3061             && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3062           if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3063             parser->m_processor = epilogProcessor;
3064           else
3065             return epilogProcessor(parser, next, end, nextPtr);
3066         }
3067       }
3068       break;
3069     case XML_TOK_CHAR_REF: {
3070       int n = XmlCharRefNumber(enc, s);
3071       if (n < 0)
3072         return XML_ERROR_BAD_CHAR_REF;
3073       if (parser->m_characterDataHandler) {
3074         XML_Char buf[XML_ENCODE_MAX];
3075         parser->m_characterDataHandler(parser->m_handlerArg, buf,
3076                                        XmlEncode(n, (ICHAR *)buf));
3077       } else if (parser->m_defaultHandler)
3078         reportDefault(parser, enc, s, next);
3079     } break;
3080     case XML_TOK_XML_DECL:
3081       return XML_ERROR_MISPLACED_XML_PI;
3082     case XML_TOK_DATA_NEWLINE:
3083       if (parser->m_characterDataHandler) {
3084         XML_Char c = 0xA;
3085         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3086       } else if (parser->m_defaultHandler)
3087         reportDefault(parser, enc, s, next);
3088       break;
3089     case XML_TOK_CDATA_SECT_OPEN: {
3090       enum XML_Error result;
3091       if (parser->m_startCdataSectionHandler)
3092         parser->m_startCdataSectionHandler(parser->m_handlerArg);
3093       /* BEGIN disabled code */
3094       /* Suppose you doing a transformation on a document that involves
3095          changing only the character data.  You set up a defaultHandler
3096          and a characterDataHandler.  The defaultHandler simply copies
3097          characters through.  The characterDataHandler does the
3098          transformation and writes the characters out escaping them as
3099          necessary.  This case will fail to work if we leave out the
3100          following two lines (because & and < inside CDATA sections will
3101          be incorrectly escaped).
3102 
3103          However, now we have a start/endCdataSectionHandler, so it seems
3104          easier to let the user deal with this.
3105       */
3106       else if (0 && parser->m_characterDataHandler)
3107         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3108                                        0);
3109       /* END disabled code */
3110       else if (parser->m_defaultHandler)
3111         reportDefault(parser, enc, s, next);
3112       result
3113           = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3114       if (result != XML_ERROR_NONE)
3115         return result;
3116       else if (! next) {
3117         parser->m_processor = cdataSectionProcessor;
3118         return result;
3119       }
3120     } break;
3121     case XML_TOK_TRAILING_RSQB:
3122       if (haveMore) {
3123         *nextPtr = s;
3124         return XML_ERROR_NONE;
3125       }
3126       if (parser->m_characterDataHandler) {
3127         if (MUST_CONVERT(enc, s)) {
3128           ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3129           XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3130           parser->m_characterDataHandler(
3131               parser->m_handlerArg, parser->m_dataBuf,
3132               (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3133         } else
3134           parser->m_characterDataHandler(
3135               parser->m_handlerArg, (XML_Char *)s,
3136               (int)((XML_Char *)end - (XML_Char *)s));
3137       } else if (parser->m_defaultHandler)
3138         reportDefault(parser, enc, s, end);
3139       /* We are at the end of the final buffer, should we check for
3140          XML_SUSPENDED, XML_FINISHED?
3141       */
3142       if (startTagLevel == 0) {
3143         *eventPP = end;
3144         return XML_ERROR_NO_ELEMENTS;
3145       }
3146       if (parser->m_tagLevel != startTagLevel) {
3147         *eventPP = end;
3148         return XML_ERROR_ASYNC_ENTITY;
3149       }
3150       *nextPtr = end;
3151       return XML_ERROR_NONE;
3152     case XML_TOK_DATA_CHARS: {
3153       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3154       if (charDataHandler) {
3155         if (MUST_CONVERT(enc, s)) {
3156           for (;;) {
3157             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3158             const enum XML_Convert_Result convert_res = XmlConvert(
3159                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3160             *eventEndPP = s;
3161             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3162                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3163             if ((convert_res == XML_CONVERT_COMPLETED)
3164                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3165               break;
3166             *eventPP = s;
3167           }
3168         } else
3169           charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3170                           (int)((XML_Char *)next - (XML_Char *)s));
3171       } else if (parser->m_defaultHandler)
3172         reportDefault(parser, enc, s, next);
3173     } break;
3174     case XML_TOK_PI:
3175       if (! reportProcessingInstruction(parser, enc, s, next))
3176         return XML_ERROR_NO_MEMORY;
3177       break;
3178     case XML_TOK_COMMENT:
3179       if (! reportComment(parser, enc, s, next))
3180         return XML_ERROR_NO_MEMORY;
3181       break;
3182     default:
3183       /* All of the tokens produced by XmlContentTok() have their own
3184        * explicit cases, so this default is not strictly necessary.
3185        * However it is a useful safety net, so we retain the code and
3186        * simply exclude it from the coverage tests.
3187        *
3188        * LCOV_EXCL_START
3189        */
3190       if (parser->m_defaultHandler)
3191         reportDefault(parser, enc, s, next);
3192       break;
3193       /* LCOV_EXCL_STOP */
3194     }
3195     *eventPP = s = next;
3196     switch (parser->m_parsingStatus.parsing) {
3197     case XML_SUSPENDED:
3198       *nextPtr = next;
3199       return XML_ERROR_NONE;
3200     case XML_FINISHED:
3201       return XML_ERROR_ABORTED;
3202     default:;
3203     }
3204   }
3205   /* not reached */
3206 }
3207 
3208 /* This function does not call free() on the allocated memory, merely
3209  * moving it to the parser's m_freeBindingList where it can be freed or
3210  * reused as appropriate.
3211  */
3212 static void
freeBindings(XML_Parser parser,BINDING * bindings)3213 freeBindings(XML_Parser parser, BINDING *bindings) {
3214   while (bindings) {
3215     BINDING *b = bindings;
3216 
3217     /* m_startNamespaceDeclHandler will have been called for this
3218      * binding in addBindings(), so call the end handler now.
3219      */
3220     if (parser->m_endNamespaceDeclHandler)
3221       parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3222 
3223     bindings = bindings->nextTagBinding;
3224     b->nextTagBinding = parser->m_freeBindingList;
3225     parser->m_freeBindingList = b;
3226     b->prefix->binding = b->prevPrefixBinding;
3227   }
3228 }
3229 
3230 /* Precondition: all arguments must be non-NULL;
3231    Purpose:
3232    - normalize attributes
3233    - check attributes for well-formedness
3234    - generate namespace aware attribute names (URI, prefix)
3235    - build list of attributes for startElementHandler
3236    - default attributes
3237    - process namespace declarations (check and report them)
3238    - generate namespace aware element name (URI, prefix)
3239 */
3240 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3241 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3242           TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3243           enum XML_Account account) {
3244   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3245   ELEMENT_TYPE *elementType;
3246   int nDefaultAtts;
3247   const XML_Char **appAtts; /* the attribute list for the application */
3248   int attIndex = 0;
3249   int prefixLen;
3250   int i;
3251   int n;
3252   XML_Char *uri;
3253   int nPrefixes = 0;
3254   BINDING *binding;
3255   const XML_Char *localPart;
3256 
3257   /* lookup the element type name */
3258   elementType
3259       = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3260   if (! elementType) {
3261     const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3262     if (! name)
3263       return XML_ERROR_NO_MEMORY;
3264     elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3265                                          sizeof(ELEMENT_TYPE));
3266     if (! elementType)
3267       return XML_ERROR_NO_MEMORY;
3268     if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3269       return XML_ERROR_NO_MEMORY;
3270   }
3271   nDefaultAtts = elementType->nDefaultAtts;
3272 
3273   /* get the attributes from the tokenizer */
3274   n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3275 
3276   /* Detect and prevent integer overflow */
3277   if (n > INT_MAX - nDefaultAtts) {
3278     return XML_ERROR_NO_MEMORY;
3279   }
3280 
3281   if (n + nDefaultAtts > parser->m_attsSize) {
3282     int oldAttsSize = parser->m_attsSize;
3283     ATTRIBUTE *temp;
3284 #ifdef XML_ATTR_INFO
3285     XML_AttrInfo *temp2;
3286 #endif
3287 
3288     /* Detect and prevent integer overflow */
3289     if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3290         || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3291       return XML_ERROR_NO_MEMORY;
3292     }
3293 
3294     parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3295 
3296     /* Detect and prevent integer overflow.
3297      * The preprocessor guard addresses the "always false" warning
3298      * from -Wtype-limits on platforms where
3299      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3300 #if UINT_MAX >= SIZE_MAX
3301     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3302       parser->m_attsSize = oldAttsSize;
3303       return XML_ERROR_NO_MEMORY;
3304     }
3305 #endif
3306 
3307     temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3308                                 parser->m_attsSize * sizeof(ATTRIBUTE));
3309     if (temp == NULL) {
3310       parser->m_attsSize = oldAttsSize;
3311       return XML_ERROR_NO_MEMORY;
3312     }
3313     parser->m_atts = temp;
3314 #ifdef XML_ATTR_INFO
3315     /* Detect and prevent integer overflow.
3316      * The preprocessor guard addresses the "always false" warning
3317      * from -Wtype-limits on platforms where
3318      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3319 #  if UINT_MAX >= SIZE_MAX
3320     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3321       parser->m_attsSize = oldAttsSize;
3322       return XML_ERROR_NO_MEMORY;
3323     }
3324 #  endif
3325 
3326     temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3327                                     parser->m_attsSize * sizeof(XML_AttrInfo));
3328     if (temp2 == NULL) {
3329       parser->m_attsSize = oldAttsSize;
3330       return XML_ERROR_NO_MEMORY;
3331     }
3332     parser->m_attInfo = temp2;
3333 #endif
3334     if (n > oldAttsSize)
3335       XmlGetAttributes(enc, attStr, n, parser->m_atts);
3336   }
3337 
3338   appAtts = (const XML_Char **)parser->m_atts;
3339   for (i = 0; i < n; i++) {
3340     ATTRIBUTE *currAtt = &parser->m_atts[i];
3341 #ifdef XML_ATTR_INFO
3342     XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3343 #endif
3344     /* add the name and value to the attribute list */
3345     ATTRIBUTE_ID *attId
3346         = getAttributeId(parser, enc, currAtt->name,
3347                          currAtt->name + XmlNameLength(enc, currAtt->name));
3348     if (! attId)
3349       return XML_ERROR_NO_MEMORY;
3350 #ifdef XML_ATTR_INFO
3351     currAttInfo->nameStart
3352         = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3353     currAttInfo->nameEnd
3354         = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3355     currAttInfo->valueStart = parser->m_parseEndByteIndex
3356                               - (parser->m_parseEndPtr - currAtt->valuePtr);
3357     currAttInfo->valueEnd = parser->m_parseEndByteIndex
3358                             - (parser->m_parseEndPtr - currAtt->valueEnd);
3359 #endif
3360     /* Detect duplicate attributes by their QNames. This does not work when
3361        namespace processing is turned on and different prefixes for the same
3362        namespace are used. For this case we have a check further down.
3363     */
3364     if ((attId->name)[-1]) {
3365       if (enc == parser->m_encoding)
3366         parser->m_eventPtr = parser->m_atts[i].name;
3367       return XML_ERROR_DUPLICATE_ATTRIBUTE;
3368     }
3369     (attId->name)[-1] = 1;
3370     appAtts[attIndex++] = attId->name;
3371     if (! parser->m_atts[i].normalized) {
3372       enum XML_Error result;
3373       XML_Bool isCdata = XML_TRUE;
3374 
3375       /* figure out whether declared as other than CDATA */
3376       if (attId->maybeTokenized) {
3377         int j;
3378         for (j = 0; j < nDefaultAtts; j++) {
3379           if (attId == elementType->defaultAtts[j].id) {
3380             isCdata = elementType->defaultAtts[j].isCdata;
3381             break;
3382           }
3383         }
3384       }
3385 
3386       /* normalize the attribute value */
3387       result = storeAttributeValue(
3388           parser, enc, isCdata, parser->m_atts[i].valuePtr,
3389           parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3390       if (result)
3391         return result;
3392       appAtts[attIndex] = poolStart(&parser->m_tempPool);
3393       poolFinish(&parser->m_tempPool);
3394     } else {
3395       /* the value did not need normalizing */
3396       appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3397                                           parser->m_atts[i].valuePtr,
3398                                           parser->m_atts[i].valueEnd);
3399       if (appAtts[attIndex] == 0)
3400         return XML_ERROR_NO_MEMORY;
3401       poolFinish(&parser->m_tempPool);
3402     }
3403     /* handle prefixed attribute names */
3404     if (attId->prefix) {
3405       if (attId->xmlns) {
3406         /* deal with namespace declarations here */
3407         enum XML_Error result = addBinding(parser, attId->prefix, attId,
3408                                            appAtts[attIndex], bindingsPtr);
3409         if (result)
3410           return result;
3411         --attIndex;
3412       } else {
3413         /* deal with other prefixed names later */
3414         attIndex++;
3415         nPrefixes++;
3416         (attId->name)[-1] = 2;
3417       }
3418     } else
3419       attIndex++;
3420   }
3421 
3422   /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3423   parser->m_nSpecifiedAtts = attIndex;
3424   if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3425     for (i = 0; i < attIndex; i += 2)
3426       if (appAtts[i] == elementType->idAtt->name) {
3427         parser->m_idAttIndex = i;
3428         break;
3429       }
3430   } else
3431     parser->m_idAttIndex = -1;
3432 
3433   /* do attribute defaulting */
3434   for (i = 0; i < nDefaultAtts; i++) {
3435     const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3436     if (! (da->id->name)[-1] && da->value) {
3437       if (da->id->prefix) {
3438         if (da->id->xmlns) {
3439           enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3440                                              da->value, bindingsPtr);
3441           if (result)
3442             return result;
3443         } else {
3444           (da->id->name)[-1] = 2;
3445           nPrefixes++;
3446           appAtts[attIndex++] = da->id->name;
3447           appAtts[attIndex++] = da->value;
3448         }
3449       } else {
3450         (da->id->name)[-1] = 1;
3451         appAtts[attIndex++] = da->id->name;
3452         appAtts[attIndex++] = da->value;
3453       }
3454     }
3455   }
3456   appAtts[attIndex] = 0;
3457 
3458   /* expand prefixed attribute names, check for duplicates,
3459      and clear flags that say whether attributes were specified */
3460   i = 0;
3461   if (nPrefixes) {
3462     int j; /* hash table index */
3463     unsigned long version = parser->m_nsAttsVersion;
3464 
3465     /* Detect and prevent invalid shift */
3466     if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3467       return XML_ERROR_NO_MEMORY;
3468     }
3469 
3470     unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3471     unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3472     /* size of hash table must be at least 2 * (# of prefixed attributes) */
3473     if ((nPrefixes << 1)
3474         >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3475       NS_ATT *temp;
3476       /* hash table size must also be a power of 2 and >= 8 */
3477       while (nPrefixes >> parser->m_nsAttsPower++)
3478         ;
3479       if (parser->m_nsAttsPower < 3)
3480         parser->m_nsAttsPower = 3;
3481 
3482       /* Detect and prevent invalid shift */
3483       if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3484         /* Restore actual size of memory in m_nsAtts */
3485         parser->m_nsAttsPower = oldNsAttsPower;
3486         return XML_ERROR_NO_MEMORY;
3487       }
3488 
3489       nsAttsSize = 1u << parser->m_nsAttsPower;
3490 
3491       /* Detect and prevent integer overflow.
3492        * The preprocessor guard addresses the "always false" warning
3493        * from -Wtype-limits on platforms where
3494        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3495 #if UINT_MAX >= SIZE_MAX
3496       if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3497         /* Restore actual size of memory in m_nsAtts */
3498         parser->m_nsAttsPower = oldNsAttsPower;
3499         return XML_ERROR_NO_MEMORY;
3500       }
3501 #endif
3502 
3503       temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3504                                nsAttsSize * sizeof(NS_ATT));
3505       if (! temp) {
3506         /* Restore actual size of memory in m_nsAtts */
3507         parser->m_nsAttsPower = oldNsAttsPower;
3508         return XML_ERROR_NO_MEMORY;
3509       }
3510       parser->m_nsAtts = temp;
3511       version = 0; /* force re-initialization of m_nsAtts hash table */
3512     }
3513     /* using a version flag saves us from initializing m_nsAtts every time */
3514     if (! version) { /* initialize version flags when version wraps around */
3515       version = INIT_ATTS_VERSION;
3516       for (j = nsAttsSize; j != 0;)
3517         parser->m_nsAtts[--j].version = version;
3518     }
3519     parser->m_nsAttsVersion = --version;
3520 
3521     /* expand prefixed names and check for duplicates */
3522     for (; i < attIndex; i += 2) {
3523       const XML_Char *s = appAtts[i];
3524       if (s[-1] == 2) { /* prefixed */
3525         ATTRIBUTE_ID *id;
3526         const BINDING *b;
3527         unsigned long uriHash;
3528         struct siphash sip_state;
3529         struct sipkey sip_key;
3530 
3531         copy_salt_to_sipkey(parser, &sip_key);
3532         sip24_init(&sip_state, &sip_key);
3533 
3534         ((XML_Char *)s)[-1] = 0; /* clear flag */
3535         id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3536         if (! id || ! id->prefix) {
3537           /* This code is walking through the appAtts array, dealing
3538            * with (in this case) a prefixed attribute name.  To be in
3539            * the array, the attribute must have already been bound, so
3540            * has to have passed through the hash table lookup once
3541            * already.  That implies that an entry for it already
3542            * exists, so the lookup above will return a pointer to
3543            * already allocated memory.  There is no opportunaity for
3544            * the allocator to fail, so the condition above cannot be
3545            * fulfilled.
3546            *
3547            * Since it is difficult to be certain that the above
3548            * analysis is complete, we retain the test and merely
3549            * remove the code from coverage tests.
3550            */
3551           return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3552         }
3553         b = id->prefix->binding;
3554         if (! b)
3555           return XML_ERROR_UNBOUND_PREFIX;
3556 
3557         for (j = 0; j < b->uriLen; j++) {
3558           const XML_Char c = b->uri[j];
3559           if (! poolAppendChar(&parser->m_tempPool, c))
3560             return XML_ERROR_NO_MEMORY;
3561         }
3562 
3563         sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3564 
3565         while (*s++ != XML_T(ASCII_COLON))
3566           ;
3567 
3568         sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3569 
3570         do { /* copies null terminator */
3571           if (! poolAppendChar(&parser->m_tempPool, *s))
3572             return XML_ERROR_NO_MEMORY;
3573         } while (*s++);
3574 
3575         uriHash = (unsigned long)sip24_final(&sip_state);
3576 
3577         { /* Check hash table for duplicate of expanded name (uriName).
3578              Derived from code in lookup(parser, HASH_TABLE *table, ...).
3579           */
3580           unsigned char step = 0;
3581           unsigned long mask = nsAttsSize - 1;
3582           j = uriHash & mask; /* index into hash table */
3583           while (parser->m_nsAtts[j].version == version) {
3584             /* for speed we compare stored hash values first */
3585             if (uriHash == parser->m_nsAtts[j].hash) {
3586               const XML_Char *s1 = poolStart(&parser->m_tempPool);
3587               const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3588               /* s1 is null terminated, but not s2 */
3589               for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3590                 ;
3591               if (*s1 == 0)
3592                 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3593             }
3594             if (! step)
3595               step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3596             j < step ? (j += nsAttsSize - step) : (j -= step);
3597           }
3598         }
3599 
3600         if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3601           parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3602           s = b->prefix->name;
3603           do {
3604             if (! poolAppendChar(&parser->m_tempPool, *s))
3605               return XML_ERROR_NO_MEMORY;
3606           } while (*s++);
3607         }
3608 
3609         /* store expanded name in attribute list */
3610         s = poolStart(&parser->m_tempPool);
3611         poolFinish(&parser->m_tempPool);
3612         appAtts[i] = s;
3613 
3614         /* fill empty slot with new version, uriName and hash value */
3615         parser->m_nsAtts[j].version = version;
3616         parser->m_nsAtts[j].hash = uriHash;
3617         parser->m_nsAtts[j].uriName = s;
3618 
3619         if (! --nPrefixes) {
3620           i += 2;
3621           break;
3622         }
3623       } else                     /* not prefixed */
3624         ((XML_Char *)s)[-1] = 0; /* clear flag */
3625     }
3626   }
3627   /* clear flags for the remaining attributes */
3628   for (; i < attIndex; i += 2)
3629     ((XML_Char *)(appAtts[i]))[-1] = 0;
3630   for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3631     binding->attId->name[-1] = 0;
3632 
3633   if (! parser->m_ns)
3634     return XML_ERROR_NONE;
3635 
3636   /* expand the element type name */
3637   if (elementType->prefix) {
3638     binding = elementType->prefix->binding;
3639     if (! binding)
3640       return XML_ERROR_UNBOUND_PREFIX;
3641     localPart = tagNamePtr->str;
3642     while (*localPart++ != XML_T(ASCII_COLON))
3643       ;
3644   } else if (dtd->defaultPrefix.binding) {
3645     binding = dtd->defaultPrefix.binding;
3646     localPart = tagNamePtr->str;
3647   } else
3648     return XML_ERROR_NONE;
3649   prefixLen = 0;
3650   if (parser->m_ns_triplets && binding->prefix->name) {
3651     for (; binding->prefix->name[prefixLen++];)
3652       ; /* prefixLen includes null terminator */
3653   }
3654   tagNamePtr->localPart = localPart;
3655   tagNamePtr->uriLen = binding->uriLen;
3656   tagNamePtr->prefix = binding->prefix->name;
3657   tagNamePtr->prefixLen = prefixLen;
3658   for (i = 0; localPart[i++];)
3659     ; /* i includes null terminator */
3660 
3661   /* Detect and prevent integer overflow */
3662   if (binding->uriLen > INT_MAX - prefixLen
3663       || i > INT_MAX - (binding->uriLen + prefixLen)) {
3664     return XML_ERROR_NO_MEMORY;
3665   }
3666 
3667   n = i + binding->uriLen + prefixLen;
3668   if (n > binding->uriAlloc) {
3669     TAG *p;
3670 
3671     /* Detect and prevent integer overflow */
3672     if (n > INT_MAX - EXPAND_SPARE) {
3673       return XML_ERROR_NO_MEMORY;
3674     }
3675     /* Detect and prevent integer overflow.
3676      * The preprocessor guard addresses the "always false" warning
3677      * from -Wtype-limits on platforms where
3678      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3679 #if UINT_MAX >= SIZE_MAX
3680     if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3681       return XML_ERROR_NO_MEMORY;
3682     }
3683 #endif
3684 
3685     uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3686     if (! uri)
3687       return XML_ERROR_NO_MEMORY;
3688     binding->uriAlloc = n + EXPAND_SPARE;
3689     memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3690     for (p = parser->m_tagStack; p; p = p->parent)
3691       if (p->name.str == binding->uri)
3692         p->name.str = uri;
3693     FREE(parser, binding->uri);
3694     binding->uri = uri;
3695   }
3696   /* if m_namespaceSeparator != '\0' then uri includes it already */
3697   uri = binding->uri + binding->uriLen;
3698   memcpy(uri, localPart, i * sizeof(XML_Char));
3699   /* we always have a namespace separator between localPart and prefix */
3700   if (prefixLen) {
3701     uri += i - 1;
3702     *uri = parser->m_namespaceSeparator; /* replace null terminator */
3703     memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3704   }
3705   tagNamePtr->str = binding->uri;
3706   return XML_ERROR_NONE;
3707 }
3708 
3709 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3710 is_rfc3986_uri_char(XML_Char candidate) {
3711   // For the RFC 3986 ANBF grammar see
3712   // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3713 
3714   switch (candidate) {
3715   // From rule "ALPHA" (uppercase half)
3716   case 'A':
3717   case 'B':
3718   case 'C':
3719   case 'D':
3720   case 'E':
3721   case 'F':
3722   case 'G':
3723   case 'H':
3724   case 'I':
3725   case 'J':
3726   case 'K':
3727   case 'L':
3728   case 'M':
3729   case 'N':
3730   case 'O':
3731   case 'P':
3732   case 'Q':
3733   case 'R':
3734   case 'S':
3735   case 'T':
3736   case 'U':
3737   case 'V':
3738   case 'W':
3739   case 'X':
3740   case 'Y':
3741   case 'Z':
3742 
3743   // From rule "ALPHA" (lowercase half)
3744   case 'a':
3745   case 'b':
3746   case 'c':
3747   case 'd':
3748   case 'e':
3749   case 'f':
3750   case 'g':
3751   case 'h':
3752   case 'i':
3753   case 'j':
3754   case 'k':
3755   case 'l':
3756   case 'm':
3757   case 'n':
3758   case 'o':
3759   case 'p':
3760   case 'q':
3761   case 'r':
3762   case 's':
3763   case 't':
3764   case 'u':
3765   case 'v':
3766   case 'w':
3767   case 'x':
3768   case 'y':
3769   case 'z':
3770 
3771   // From rule "DIGIT"
3772   case '0':
3773   case '1':
3774   case '2':
3775   case '3':
3776   case '4':
3777   case '5':
3778   case '6':
3779   case '7':
3780   case '8':
3781   case '9':
3782 
3783   // From rule "pct-encoded"
3784   case '%':
3785 
3786   // From rule "unreserved"
3787   case '-':
3788   case '.':
3789   case '_':
3790   case '~':
3791 
3792   // From rule "gen-delims"
3793   case ':':
3794   case '/':
3795   case '?':
3796   case '#':
3797   case '[':
3798   case ']':
3799   case '@':
3800 
3801   // From rule "sub-delims"
3802   case '!':
3803   case '$':
3804   case '&':
3805   case '\'':
3806   case '(':
3807   case ')':
3808   case '*':
3809   case '+':
3810   case ',':
3811   case ';':
3812   case '=':
3813     return XML_TRUE;
3814 
3815   default:
3816     return XML_FALSE;
3817   }
3818 }
3819 
3820 /* addBinding() overwrites the value of prefix->binding without checking.
3821    Therefore one must keep track of the old value outside of addBinding().
3822 */
3823 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3824 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3825            const XML_Char *uri, BINDING **bindingsPtr) {
3826   // "http://www.w3.org/XML/1998/namespace"
3827   static const XML_Char xmlNamespace[]
3828       = {ASCII_h,      ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,
3829          ASCII_SLASH,  ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,
3830          ASCII_PERIOD, ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,
3831          ASCII_r,      ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,
3832          ASCII_L,      ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,
3833          ASCII_8,      ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,
3834          ASCII_e,      ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,
3835          ASCII_e,      '\0'};
3836   static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3837   // "http://www.w3.org/2000/xmlns/"
3838   static const XML_Char xmlnsNamespace[]
3839       = {ASCII_h,     ASCII_t,      ASCII_t, ASCII_p, ASCII_COLON,  ASCII_SLASH,
3840          ASCII_SLASH, ASCII_w,      ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3841          ASCII_3,     ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,      ASCII_SLASH,
3842          ASCII_2,     ASCII_0,      ASCII_0, ASCII_0, ASCII_SLASH,  ASCII_x,
3843          ASCII_m,     ASCII_l,      ASCII_n, ASCII_s, ASCII_SLASH,  '\0'};
3844   static const int xmlnsLen
3845       = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3846 
3847   XML_Bool mustBeXML = XML_FALSE;
3848   XML_Bool isXML = XML_TRUE;
3849   XML_Bool isXMLNS = XML_TRUE;
3850 
3851   BINDING *b;
3852   int len;
3853 
3854   /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3855   if (*uri == XML_T('\0') && prefix->name)
3856     return XML_ERROR_UNDECLARING_PREFIX;
3857 
3858   if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3859       && prefix->name[1] == XML_T(ASCII_m)
3860       && prefix->name[2] == XML_T(ASCII_l)) {
3861     /* Not allowed to bind xmlns */
3862     if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3863         && prefix->name[5] == XML_T('\0'))
3864       return XML_ERROR_RESERVED_PREFIX_XMLNS;
3865 
3866     if (prefix->name[3] == XML_T('\0'))
3867       mustBeXML = XML_TRUE;
3868   }
3869 
3870   for (len = 0; uri[len]; len++) {
3871     if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3872       isXML = XML_FALSE;
3873 
3874     if (! mustBeXML && isXMLNS
3875         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3876       isXMLNS = XML_FALSE;
3877 
3878     // NOTE: While Expat does not validate namespace URIs against RFC 3986
3879     //       today (and is not REQUIRED to do so with regard to the XML 1.0
3880     //       namespaces specification) we have to at least make sure, that
3881     //       the application on top of Expat (that is likely splitting expanded
3882     //       element names ("qualified names") of form
3883     //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3884     //       in its element handler code) cannot be confused by an attacker
3885     //       putting additional namespace separator characters into namespace
3886     //       declarations.  That would be ambiguous and not to be expected.
3887     //
3888     //       While the HTML API docs of function XML_ParserCreateNS have been
3889     //       advising against use of a namespace separator character that can
3890     //       appear in a URI for >20 years now, some widespread applications
3891     //       are using URI characters (':' (colon) in particular) for a
3892     //       namespace separator, in practice.  To keep these applications
3893     //       functional, we only reject namespaces URIs containing the
3894     //       application-chosen namespace separator if the chosen separator
3895     //       is a non-URI character with regard to RFC 3986.
3896     if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3897         && ! is_rfc3986_uri_char(uri[len])) {
3898       return XML_ERROR_SYNTAX;
3899     }
3900   }
3901   isXML = isXML && len == xmlLen;
3902   isXMLNS = isXMLNS && len == xmlnsLen;
3903 
3904   if (mustBeXML != isXML)
3905     return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3906                      : XML_ERROR_RESERVED_NAMESPACE_URI;
3907 
3908   if (isXMLNS)
3909     return XML_ERROR_RESERVED_NAMESPACE_URI;
3910 
3911   if (parser->m_namespaceSeparator)
3912     len++;
3913   if (parser->m_freeBindingList) {
3914     b = parser->m_freeBindingList;
3915     if (len > b->uriAlloc) {
3916       /* Detect and prevent integer overflow */
3917       if (len > INT_MAX - EXPAND_SPARE) {
3918         return XML_ERROR_NO_MEMORY;
3919       }
3920 
3921       /* Detect and prevent integer overflow.
3922        * The preprocessor guard addresses the "always false" warning
3923        * from -Wtype-limits on platforms where
3924        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3925 #if UINT_MAX >= SIZE_MAX
3926       if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3927         return XML_ERROR_NO_MEMORY;
3928       }
3929 #endif
3930 
3931       XML_Char *temp = (XML_Char *)REALLOC(
3932           parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
3933       if (temp == NULL)
3934         return XML_ERROR_NO_MEMORY;
3935       b->uri = temp;
3936       b->uriAlloc = len + EXPAND_SPARE;
3937     }
3938     parser->m_freeBindingList = b->nextTagBinding;
3939   } else {
3940     b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3941     if (! b)
3942       return XML_ERROR_NO_MEMORY;
3943 
3944     /* Detect and prevent integer overflow */
3945     if (len > INT_MAX - EXPAND_SPARE) {
3946       return XML_ERROR_NO_MEMORY;
3947     }
3948     /* Detect and prevent integer overflow.
3949      * The preprocessor guard addresses the "always false" warning
3950      * from -Wtype-limits on platforms where
3951      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3952 #if UINT_MAX >= SIZE_MAX
3953     if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3954       return XML_ERROR_NO_MEMORY;
3955     }
3956 #endif
3957 
3958     b->uri
3959         = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3960     if (! b->uri) {
3961       FREE(parser, b);
3962       return XML_ERROR_NO_MEMORY;
3963     }
3964     b->uriAlloc = len + EXPAND_SPARE;
3965   }
3966   b->uriLen = len;
3967   memcpy(b->uri, uri, len * sizeof(XML_Char));
3968   if (parser->m_namespaceSeparator)
3969     b->uri[len - 1] = parser->m_namespaceSeparator;
3970   b->prefix = prefix;
3971   b->attId = attId;
3972   b->prevPrefixBinding = prefix->binding;
3973   /* NULL binding when default namespace undeclared */
3974   if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3975     prefix->binding = NULL;
3976   else
3977     prefix->binding = b;
3978   b->nextTagBinding = *bindingsPtr;
3979   *bindingsPtr = b;
3980   /* if attId == NULL then we are not starting a namespace scope */
3981   if (attId && parser->m_startNamespaceDeclHandler)
3982     parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3983                                         prefix->binding ? uri : 0);
3984   return XML_ERROR_NONE;
3985 }
3986 
3987 /* The idea here is to avoid using stack for each CDATA section when
3988    the whole file is parsed with one call.
3989 */
3990 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3991 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
3992                       const char **endPtr) {
3993   enum XML_Error result = doCdataSection(
3994       parser, parser->m_encoding, &start, end, endPtr,
3995       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
3996   if (result != XML_ERROR_NONE)
3997     return result;
3998   if (start) {
3999     if (parser->m_parentParser) { /* we are parsing an external entity */
4000       parser->m_processor = externalEntityContentProcessor;
4001       return externalEntityContentProcessor(parser, start, end, endPtr);
4002     } else {
4003       parser->m_processor = contentProcessor;
4004       return contentProcessor(parser, start, end, endPtr);
4005     }
4006   }
4007   return result;
4008 }
4009 
4010 /* startPtr gets set to non-null if the section is closed, and to null if
4011    the section is not yet closed.
4012 */
4013 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4014 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4015                const char *end, const char **nextPtr, XML_Bool haveMore,
4016                enum XML_Account account) {
4017   const char *s = *startPtr;
4018   const char **eventPP;
4019   const char **eventEndPP;
4020   if (enc == parser->m_encoding) {
4021     eventPP = &parser->m_eventPtr;
4022     *eventPP = s;
4023     eventEndPP = &parser->m_eventEndPtr;
4024   } else {
4025     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4026     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4027   }
4028   *eventPP = s;
4029   *startPtr = NULL;
4030 
4031   for (;;) {
4032     const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4033     int tok = XmlCdataSectionTok(enc, s, end, &next);
4034 #ifdef XML_DTD
4035     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4036       accountingOnAbort(parser);
4037       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4038     }
4039 #else
4040     UNUSED_P(account);
4041 #endif
4042     *eventEndPP = next;
4043     switch (tok) {
4044     case XML_TOK_CDATA_SECT_CLOSE:
4045       if (parser->m_endCdataSectionHandler)
4046         parser->m_endCdataSectionHandler(parser->m_handlerArg);
4047       /* BEGIN disabled code */
4048       /* see comment under XML_TOK_CDATA_SECT_OPEN */
4049       else if (0 && parser->m_characterDataHandler)
4050         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4051                                        0);
4052       /* END disabled code */
4053       else if (parser->m_defaultHandler)
4054         reportDefault(parser, enc, s, next);
4055       *startPtr = next;
4056       *nextPtr = next;
4057       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4058         return XML_ERROR_ABORTED;
4059       else
4060         return XML_ERROR_NONE;
4061     case XML_TOK_DATA_NEWLINE:
4062       if (parser->m_characterDataHandler) {
4063         XML_Char c = 0xA;
4064         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4065       } else if (parser->m_defaultHandler)
4066         reportDefault(parser, enc, s, next);
4067       break;
4068     case XML_TOK_DATA_CHARS: {
4069       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4070       if (charDataHandler) {
4071         if (MUST_CONVERT(enc, s)) {
4072           for (;;) {
4073             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4074             const enum XML_Convert_Result convert_res = XmlConvert(
4075                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4076             *eventEndPP = next;
4077             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4078                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4079             if ((convert_res == XML_CONVERT_COMPLETED)
4080                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4081               break;
4082             *eventPP = s;
4083           }
4084         } else
4085           charDataHandler(parser->m_handlerArg, (XML_Char *)s,
4086                           (int)((XML_Char *)next - (XML_Char *)s));
4087       } else if (parser->m_defaultHandler)
4088         reportDefault(parser, enc, s, next);
4089     } break;
4090     case XML_TOK_INVALID:
4091       *eventPP = next;
4092       return XML_ERROR_INVALID_TOKEN;
4093     case XML_TOK_PARTIAL_CHAR:
4094       if (haveMore) {
4095         *nextPtr = s;
4096         return XML_ERROR_NONE;
4097       }
4098       return XML_ERROR_PARTIAL_CHAR;
4099     case XML_TOK_PARTIAL:
4100     case XML_TOK_NONE:
4101       if (haveMore) {
4102         *nextPtr = s;
4103         return XML_ERROR_NONE;
4104       }
4105       return XML_ERROR_UNCLOSED_CDATA_SECTION;
4106     default:
4107       /* Every token returned by XmlCdataSectionTok() has its own
4108        * explicit case, so this default case will never be executed.
4109        * We retain it as a safety net and exclude it from the coverage
4110        * statistics.
4111        *
4112        * LCOV_EXCL_START
4113        */
4114       *eventPP = next;
4115       return XML_ERROR_UNEXPECTED_STATE;
4116       /* LCOV_EXCL_STOP */
4117     }
4118 
4119     *eventPP = s = next;
4120     switch (parser->m_parsingStatus.parsing) {
4121     case XML_SUSPENDED:
4122       *nextPtr = next;
4123       return XML_ERROR_NONE;
4124     case XML_FINISHED:
4125       return XML_ERROR_ABORTED;
4126     default:;
4127     }
4128   }
4129   /* not reached */
4130 }
4131 
4132 #ifdef XML_DTD
4133 
4134 /* The idea here is to avoid using stack for each IGNORE section when
4135    the whole file is parsed with one call.
4136 */
4137 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4138 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4139                        const char **endPtr) {
4140   enum XML_Error result
4141       = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4142                         (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4143   if (result != XML_ERROR_NONE)
4144     return result;
4145   if (start) {
4146     parser->m_processor = prologProcessor;
4147     return prologProcessor(parser, start, end, endPtr);
4148   }
4149   return result;
4150 }
4151 
4152 /* startPtr gets set to non-null is the section is closed, and to null
4153    if the section is not yet closed.
4154 */
4155 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4156 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4157                 const char *end, const char **nextPtr, XML_Bool haveMore) {
4158   const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4159   int tok;
4160   const char *s = *startPtr;
4161   const char **eventPP;
4162   const char **eventEndPP;
4163   if (enc == parser->m_encoding) {
4164     eventPP = &parser->m_eventPtr;
4165     *eventPP = s;
4166     eventEndPP = &parser->m_eventEndPtr;
4167   } else {
4168     /* It's not entirely clear, but it seems the following two lines
4169      * of code cannot be executed.  The only occasions on which 'enc'
4170      * is not 'encoding' are when this function is called
4171      * from the internal entity processing, and IGNORE sections are an
4172      * error in internal entities.
4173      *
4174      * Since it really isn't clear that this is true, we keep the code
4175      * and just remove it from our coverage tests.
4176      *
4177      * LCOV_EXCL_START
4178      */
4179     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4180     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4181     /* LCOV_EXCL_STOP */
4182   }
4183   *eventPP = s;
4184   *startPtr = NULL;
4185   tok = XmlIgnoreSectionTok(enc, s, end, &next);
4186 #  ifdef XML_DTD
4187   if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4188                                 XML_ACCOUNT_DIRECT)) {
4189     accountingOnAbort(parser);
4190     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4191   }
4192 #  endif
4193   *eventEndPP = next;
4194   switch (tok) {
4195   case XML_TOK_IGNORE_SECT:
4196     if (parser->m_defaultHandler)
4197       reportDefault(parser, enc, s, next);
4198     *startPtr = next;
4199     *nextPtr = next;
4200     if (parser->m_parsingStatus.parsing == XML_FINISHED)
4201       return XML_ERROR_ABORTED;
4202     else
4203       return XML_ERROR_NONE;
4204   case XML_TOK_INVALID:
4205     *eventPP = next;
4206     return XML_ERROR_INVALID_TOKEN;
4207   case XML_TOK_PARTIAL_CHAR:
4208     if (haveMore) {
4209       *nextPtr = s;
4210       return XML_ERROR_NONE;
4211     }
4212     return XML_ERROR_PARTIAL_CHAR;
4213   case XML_TOK_PARTIAL:
4214   case XML_TOK_NONE:
4215     if (haveMore) {
4216       *nextPtr = s;
4217       return XML_ERROR_NONE;
4218     }
4219     return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4220   default:
4221     /* All of the tokens that XmlIgnoreSectionTok() returns have
4222      * explicit cases to handle them, so this default case is never
4223      * executed.  We keep it as a safety net anyway, and remove it
4224      * from our test coverage statistics.
4225      *
4226      * LCOV_EXCL_START
4227      */
4228     *eventPP = next;
4229     return XML_ERROR_UNEXPECTED_STATE;
4230     /* LCOV_EXCL_STOP */
4231   }
4232   /* not reached */
4233 }
4234 
4235 #endif /* XML_DTD */
4236 
4237 static enum XML_Error
initializeEncoding(XML_Parser parser)4238 initializeEncoding(XML_Parser parser) {
4239   const char *s;
4240 #ifdef XML_UNICODE
4241   char encodingBuf[128];
4242   /* See comments about `protocolEncodingName` in parserInit() */
4243   if (! parser->m_protocolEncodingName)
4244     s = NULL;
4245   else {
4246     int i;
4247     for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4248       if (i == sizeof(encodingBuf) - 1
4249           || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4250         encodingBuf[0] = '\0';
4251         break;
4252       }
4253       encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4254     }
4255     encodingBuf[i] = '\0';
4256     s = encodingBuf;
4257   }
4258 #else
4259   s = parser->m_protocolEncodingName;
4260 #endif
4261   if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4262           &parser->m_initEncoding, &parser->m_encoding, s))
4263     return XML_ERROR_NONE;
4264   return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4265 }
4266 
4267 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4268 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4269                const char *next) {
4270   const char *encodingName = NULL;
4271   const XML_Char *storedEncName = NULL;
4272   const ENCODING *newEncoding = NULL;
4273   const char *version = NULL;
4274   const char *versionend;
4275   const XML_Char *storedversion = NULL;
4276   int standalone = -1;
4277 
4278 #ifdef XML_DTD
4279   if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4280                                 XML_ACCOUNT_DIRECT)) {
4281     accountingOnAbort(parser);
4282     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4283   }
4284 #endif
4285 
4286   if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4287           isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4288           &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4289     if (isGeneralTextEntity)
4290       return XML_ERROR_TEXT_DECL;
4291     else
4292       return XML_ERROR_XML_DECL;
4293   }
4294   if (! isGeneralTextEntity && standalone == 1) {
4295     parser->m_dtd->standalone = XML_TRUE;
4296 #ifdef XML_DTD
4297     if (parser->m_paramEntityParsing
4298         == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4299       parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4300 #endif /* XML_DTD */
4301   }
4302   if (parser->m_xmlDeclHandler) {
4303     if (encodingName != NULL) {
4304       storedEncName = poolStoreString(
4305           &parser->m_temp2Pool, parser->m_encoding, encodingName,
4306           encodingName + XmlNameLength(parser->m_encoding, encodingName));
4307       if (! storedEncName)
4308         return XML_ERROR_NO_MEMORY;
4309       poolFinish(&parser->m_temp2Pool);
4310     }
4311     if (version) {
4312       storedversion
4313           = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4314                             versionend - parser->m_encoding->minBytesPerChar);
4315       if (! storedversion)
4316         return XML_ERROR_NO_MEMORY;
4317     }
4318     parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4319                              standalone);
4320   } else if (parser->m_defaultHandler)
4321     reportDefault(parser, parser->m_encoding, s, next);
4322   if (parser->m_protocolEncodingName == NULL) {
4323     if (newEncoding) {
4324       /* Check that the specified encoding does not conflict with what
4325        * the parser has already deduced.  Do we have the same number
4326        * of bytes in the smallest representation of a character?  If
4327        * this is UTF-16, is it the same endianness?
4328        */
4329       if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4330           || (newEncoding->minBytesPerChar == 2
4331               && newEncoding != parser->m_encoding)) {
4332         parser->m_eventPtr = encodingName;
4333         return XML_ERROR_INCORRECT_ENCODING;
4334       }
4335       parser->m_encoding = newEncoding;
4336     } else if (encodingName) {
4337       enum XML_Error result;
4338       if (! storedEncName) {
4339         storedEncName = poolStoreString(
4340             &parser->m_temp2Pool, parser->m_encoding, encodingName,
4341             encodingName + XmlNameLength(parser->m_encoding, encodingName));
4342         if (! storedEncName)
4343           return XML_ERROR_NO_MEMORY;
4344       }
4345       result = handleUnknownEncoding(parser, storedEncName);
4346       poolClear(&parser->m_temp2Pool);
4347       if (result == XML_ERROR_UNKNOWN_ENCODING)
4348         parser->m_eventPtr = encodingName;
4349       return result;
4350     }
4351   }
4352 
4353   if (storedEncName || storedversion)
4354     poolClear(&parser->m_temp2Pool);
4355 
4356   return XML_ERROR_NONE;
4357 }
4358 
4359 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4360 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4361   if (parser->m_unknownEncodingHandler) {
4362     XML_Encoding info;
4363     int i;
4364     for (i = 0; i < 256; i++)
4365       info.map[i] = -1;
4366     info.convert = NULL;
4367     info.data = NULL;
4368     info.release = NULL;
4369     if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4370                                          encodingName, &info)) {
4371       ENCODING *enc;
4372       parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4373       if (! parser->m_unknownEncodingMem) {
4374         if (info.release)
4375           info.release(info.data);
4376         return XML_ERROR_NO_MEMORY;
4377       }
4378       enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4379           parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4380       if (enc) {
4381         parser->m_unknownEncodingData = info.data;
4382         parser->m_unknownEncodingRelease = info.release;
4383         parser->m_encoding = enc;
4384         return XML_ERROR_NONE;
4385       }
4386     }
4387     if (info.release != NULL)
4388       info.release(info.data);
4389   }
4390   return XML_ERROR_UNKNOWN_ENCODING;
4391 }
4392 
4393 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4394 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4395                     const char **nextPtr) {
4396   enum XML_Error result = initializeEncoding(parser);
4397   if (result != XML_ERROR_NONE)
4398     return result;
4399   parser->m_processor = prologProcessor;
4400   return prologProcessor(parser, s, end, nextPtr);
4401 }
4402 
4403 #ifdef XML_DTD
4404 
4405 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4406 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4407                             const char **nextPtr) {
4408   enum XML_Error result = initializeEncoding(parser);
4409   if (result != XML_ERROR_NONE)
4410     return result;
4411 
4412   /* we know now that XML_Parse(Buffer) has been called,
4413      so we consider the external parameter entity read */
4414   parser->m_dtd->paramEntityRead = XML_TRUE;
4415 
4416   if (parser->m_prologState.inEntityValue) {
4417     parser->m_processor = entityValueInitProcessor;
4418     return entityValueInitProcessor(parser, s, end, nextPtr);
4419   } else {
4420     parser->m_processor = externalParEntProcessor;
4421     return externalParEntProcessor(parser, s, end, nextPtr);
4422   }
4423 }
4424 
4425 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4426 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4427                          const char **nextPtr) {
4428   int tok;
4429   const char *start = s;
4430   const char *next = start;
4431   parser->m_eventPtr = start;
4432 
4433   for (;;) {
4434     tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4435     /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4436              - storeEntityValue
4437              - processXmlDecl
4438     */
4439     parser->m_eventEndPtr = next;
4440     if (tok <= 0) {
4441       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4442         *nextPtr = s;
4443         return XML_ERROR_NONE;
4444       }
4445       switch (tok) {
4446       case XML_TOK_INVALID:
4447         return XML_ERROR_INVALID_TOKEN;
4448       case XML_TOK_PARTIAL:
4449         return XML_ERROR_UNCLOSED_TOKEN;
4450       case XML_TOK_PARTIAL_CHAR:
4451         return XML_ERROR_PARTIAL_CHAR;
4452       case XML_TOK_NONE: /* start == end */
4453       default:
4454         break;
4455       }
4456       /* found end of entity value - can store it now */
4457       return storeEntityValue(parser, parser->m_encoding, s, end,
4458                               XML_ACCOUNT_DIRECT);
4459     } else if (tok == XML_TOK_XML_DECL) {
4460       enum XML_Error result;
4461       result = processXmlDecl(parser, 0, start, next);
4462       if (result != XML_ERROR_NONE)
4463         return result;
4464       /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED.  For
4465        * that to happen, a parameter entity parsing handler must have attempted
4466        * to suspend the parser, which fails and raises an error.  The parser can
4467        * be aborted, but can't be suspended.
4468        */
4469       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4470         return XML_ERROR_ABORTED;
4471       *nextPtr = next;
4472       /* stop scanning for text declaration - we found one */
4473       parser->m_processor = entityValueProcessor;
4474       return entityValueProcessor(parser, next, end, nextPtr);
4475     }
4476     /* If we are at the end of the buffer, this would cause XmlPrologTok to
4477        return XML_TOK_NONE on the next call, which would then cause the
4478        function to exit with *nextPtr set to s - that is what we want for other
4479        tokens, but not for the BOM - we would rather like to skip it;
4480        then, when this routine is entered the next time, XmlPrologTok will
4481        return XML_TOK_INVALID, since the BOM is still in the buffer
4482     */
4483     else if (tok == XML_TOK_BOM && next == end
4484              && ! parser->m_parsingStatus.finalBuffer) {
4485 #  ifdef XML_DTD
4486       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4487                                     XML_ACCOUNT_DIRECT)) {
4488         accountingOnAbort(parser);
4489         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4490       }
4491 #  endif
4492 
4493       *nextPtr = next;
4494       return XML_ERROR_NONE;
4495     }
4496     /* If we get this token, we have the start of what might be a
4497        normal tag, but not a declaration (i.e. it doesn't begin with
4498        "<!").  In a DTD context, that isn't legal.
4499     */
4500     else if (tok == XML_TOK_INSTANCE_START) {
4501       *nextPtr = next;
4502       return XML_ERROR_SYNTAX;
4503     }
4504     start = next;
4505     parser->m_eventPtr = start;
4506   }
4507 }
4508 
4509 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4510 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4511                         const char **nextPtr) {
4512   const char *next = s;
4513   int tok;
4514 
4515   tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4516   if (tok <= 0) {
4517     if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4518       *nextPtr = s;
4519       return XML_ERROR_NONE;
4520     }
4521     switch (tok) {
4522     case XML_TOK_INVALID:
4523       return XML_ERROR_INVALID_TOKEN;
4524     case XML_TOK_PARTIAL:
4525       return XML_ERROR_UNCLOSED_TOKEN;
4526     case XML_TOK_PARTIAL_CHAR:
4527       return XML_ERROR_PARTIAL_CHAR;
4528     case XML_TOK_NONE: /* start == end */
4529     default:
4530       break;
4531     }
4532   }
4533   /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4534      However, when parsing an external subset, doProlog will not accept a BOM
4535      as valid, and report a syntax error, so we have to skip the BOM, and
4536      account for the BOM bytes.
4537   */
4538   else if (tok == XML_TOK_BOM) {
4539     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4540                                   XML_ACCOUNT_DIRECT)) {
4541       accountingOnAbort(parser);
4542       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4543     }
4544 
4545     s = next;
4546     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4547   }
4548 
4549   parser->m_processor = prologProcessor;
4550   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4551                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4552                   XML_ACCOUNT_DIRECT);
4553 }
4554 
4555 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4556 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4557                      const char **nextPtr) {
4558   const char *start = s;
4559   const char *next = s;
4560   const ENCODING *enc = parser->m_encoding;
4561   int tok;
4562 
4563   for (;;) {
4564     tok = XmlPrologTok(enc, start, end, &next);
4565     /* Note: These bytes are accounted later in:
4566              - storeEntityValue
4567     */
4568     if (tok <= 0) {
4569       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4570         *nextPtr = s;
4571         return XML_ERROR_NONE;
4572       }
4573       switch (tok) {
4574       case XML_TOK_INVALID:
4575         return XML_ERROR_INVALID_TOKEN;
4576       case XML_TOK_PARTIAL:
4577         return XML_ERROR_UNCLOSED_TOKEN;
4578       case XML_TOK_PARTIAL_CHAR:
4579         return XML_ERROR_PARTIAL_CHAR;
4580       case XML_TOK_NONE: /* start == end */
4581       default:
4582         break;
4583       }
4584       /* found end of entity value - can store it now */
4585       return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4586     }
4587     start = next;
4588   }
4589 }
4590 
4591 #endif /* XML_DTD */
4592 
4593 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4594 prologProcessor(XML_Parser parser, const char *s, const char *end,
4595                 const char **nextPtr) {
4596   const char *next = s;
4597   int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4598   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4599                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4600                   XML_ACCOUNT_DIRECT);
4601 }
4602 
4603 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4604 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4605          int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4606          XML_Bool allowClosingDoctype, enum XML_Account account) {
4607 #ifdef XML_DTD
4608   static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4609 #endif /* XML_DTD */
4610   static const XML_Char atypeCDATA[]
4611       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4612   static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4613   static const XML_Char atypeIDREF[]
4614       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4615   static const XML_Char atypeIDREFS[]
4616       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4617   static const XML_Char atypeENTITY[]
4618       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4619   static const XML_Char atypeENTITIES[]
4620       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4621          ASCII_I, ASCII_E, ASCII_S, '\0'};
4622   static const XML_Char atypeNMTOKEN[]
4623       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4624   static const XML_Char atypeNMTOKENS[]
4625       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4626          ASCII_E, ASCII_N, ASCII_S, '\0'};
4627   static const XML_Char notationPrefix[]
4628       = {ASCII_N, ASCII_O, ASCII_T, ASCII_A,      ASCII_T,
4629          ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4630   static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4631   static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4632 
4633 #ifndef XML_DTD
4634   UNUSED_P(account);
4635 #endif
4636 
4637   /* save one level of indirection */
4638   DTD *const dtd = parser->m_dtd;
4639 
4640   const char **eventPP;
4641   const char **eventEndPP;
4642   enum XML_Content_Quant quant;
4643 
4644   if (enc == parser->m_encoding) {
4645     eventPP = &parser->m_eventPtr;
4646     eventEndPP = &parser->m_eventEndPtr;
4647   } else {
4648     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4649     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4650   }
4651 
4652   for (;;) {
4653     int role;
4654     XML_Bool handleDefault = XML_TRUE;
4655     *eventPP = s;
4656     *eventEndPP = next;
4657     if (tok <= 0) {
4658       if (haveMore && tok != XML_TOK_INVALID) {
4659         *nextPtr = s;
4660         return XML_ERROR_NONE;
4661       }
4662       switch (tok) {
4663       case XML_TOK_INVALID:
4664         *eventPP = next;
4665         return XML_ERROR_INVALID_TOKEN;
4666       case XML_TOK_PARTIAL:
4667         return XML_ERROR_UNCLOSED_TOKEN;
4668       case XML_TOK_PARTIAL_CHAR:
4669         return XML_ERROR_PARTIAL_CHAR;
4670       case -XML_TOK_PROLOG_S:
4671         tok = -tok;
4672         break;
4673       case XML_TOK_NONE:
4674 #ifdef XML_DTD
4675         /* for internal PE NOT referenced between declarations */
4676         if (enc != parser->m_encoding
4677             && ! parser->m_openInternalEntities->betweenDecl) {
4678           *nextPtr = s;
4679           return XML_ERROR_NONE;
4680         }
4681         /* WFC: PE Between Declarations - must check that PE contains
4682            complete markup, not only for external PEs, but also for
4683            internal PEs if the reference occurs between declarations.
4684         */
4685         if (parser->m_isParamEntity || enc != parser->m_encoding) {
4686           if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4687               == XML_ROLE_ERROR)
4688             return XML_ERROR_INCOMPLETE_PE;
4689           *nextPtr = s;
4690           return XML_ERROR_NONE;
4691         }
4692 #endif /* XML_DTD */
4693         return XML_ERROR_NO_ELEMENTS;
4694       default:
4695         tok = -tok;
4696         next = end;
4697         break;
4698       }
4699     }
4700     role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4701 #ifdef XML_DTD
4702     switch (role) {
4703     case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4704     case XML_ROLE_XML_DECL:       // bytes accounted in processXmlDecl
4705     case XML_ROLE_TEXT_DECL:      // bytes accounted in processXmlDecl
4706       break;
4707     default:
4708       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4709         accountingOnAbort(parser);
4710         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4711       }
4712     }
4713 #endif
4714     switch (role) {
4715     case XML_ROLE_XML_DECL: {
4716       enum XML_Error result = processXmlDecl(parser, 0, s, next);
4717       if (result != XML_ERROR_NONE)
4718         return result;
4719       enc = parser->m_encoding;
4720       handleDefault = XML_FALSE;
4721     } break;
4722     case XML_ROLE_DOCTYPE_NAME:
4723       if (parser->m_startDoctypeDeclHandler) {
4724         parser->m_doctypeName
4725             = poolStoreString(&parser->m_tempPool, enc, s, next);
4726         if (! parser->m_doctypeName)
4727           return XML_ERROR_NO_MEMORY;
4728         poolFinish(&parser->m_tempPool);
4729         parser->m_doctypePubid = NULL;
4730         handleDefault = XML_FALSE;
4731       }
4732       parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4733       break;
4734     case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4735       if (parser->m_startDoctypeDeclHandler) {
4736         parser->m_startDoctypeDeclHandler(
4737             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4738             parser->m_doctypePubid, 1);
4739         parser->m_doctypeName = NULL;
4740         poolClear(&parser->m_tempPool);
4741         handleDefault = XML_FALSE;
4742       }
4743       break;
4744 #ifdef XML_DTD
4745     case XML_ROLE_TEXT_DECL: {
4746       enum XML_Error result = processXmlDecl(parser, 1, s, next);
4747       if (result != XML_ERROR_NONE)
4748         return result;
4749       enc = parser->m_encoding;
4750       handleDefault = XML_FALSE;
4751     } break;
4752 #endif /* XML_DTD */
4753     case XML_ROLE_DOCTYPE_PUBLIC_ID:
4754 #ifdef XML_DTD
4755       parser->m_useForeignDTD = XML_FALSE;
4756       parser->m_declEntity = (ENTITY *)lookup(
4757           parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4758       if (! parser->m_declEntity)
4759         return XML_ERROR_NO_MEMORY;
4760 #endif /* XML_DTD */
4761       dtd->hasParamEntityRefs = XML_TRUE;
4762       if (parser->m_startDoctypeDeclHandler) {
4763         XML_Char *pubId;
4764         if (! XmlIsPublicId(enc, s, next, eventPP))
4765           return XML_ERROR_PUBLICID;
4766         pubId = poolStoreString(&parser->m_tempPool, enc,
4767                                 s + enc->minBytesPerChar,
4768                                 next - enc->minBytesPerChar);
4769         if (! pubId)
4770           return XML_ERROR_NO_MEMORY;
4771         normalizePublicId(pubId);
4772         poolFinish(&parser->m_tempPool);
4773         parser->m_doctypePubid = pubId;
4774         handleDefault = XML_FALSE;
4775         goto alreadyChecked;
4776       }
4777       /* fall through */
4778     case XML_ROLE_ENTITY_PUBLIC_ID:
4779       if (! XmlIsPublicId(enc, s, next, eventPP))
4780         return XML_ERROR_PUBLICID;
4781     alreadyChecked:
4782       if (dtd->keepProcessing && parser->m_declEntity) {
4783         XML_Char *tem
4784             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4785                               next - enc->minBytesPerChar);
4786         if (! tem)
4787           return XML_ERROR_NO_MEMORY;
4788         normalizePublicId(tem);
4789         parser->m_declEntity->publicId = tem;
4790         poolFinish(&dtd->pool);
4791         /* Don't suppress the default handler if we fell through from
4792          * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4793          */
4794         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4795           handleDefault = XML_FALSE;
4796       }
4797       break;
4798     case XML_ROLE_DOCTYPE_CLOSE:
4799       if (allowClosingDoctype != XML_TRUE) {
4800         /* Must not close doctype from within expanded parameter entities */
4801         return XML_ERROR_INVALID_TOKEN;
4802       }
4803 
4804       if (parser->m_doctypeName) {
4805         parser->m_startDoctypeDeclHandler(
4806             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4807             parser->m_doctypePubid, 0);
4808         poolClear(&parser->m_tempPool);
4809         handleDefault = XML_FALSE;
4810       }
4811       /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4812          XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4813          was not set, indicating an external subset
4814       */
4815 #ifdef XML_DTD
4816       if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4817         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4818         dtd->hasParamEntityRefs = XML_TRUE;
4819         if (parser->m_paramEntityParsing
4820             && parser->m_externalEntityRefHandler) {
4821           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4822                                             externalSubsetName, sizeof(ENTITY));
4823           if (! entity) {
4824             /* The external subset name "#" will have already been
4825              * inserted into the hash table at the start of the
4826              * external entity parsing, so no allocation will happen
4827              * and lookup() cannot fail.
4828              */
4829             return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4830           }
4831           if (parser->m_useForeignDTD)
4832             entity->base = parser->m_curBase;
4833           dtd->paramEntityRead = XML_FALSE;
4834           if (! parser->m_externalEntityRefHandler(
4835                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4836                   entity->systemId, entity->publicId))
4837             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4838           if (dtd->paramEntityRead) {
4839             if (! dtd->standalone && parser->m_notStandaloneHandler
4840                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4841               return XML_ERROR_NOT_STANDALONE;
4842           }
4843           /* if we didn't read the foreign DTD then this means that there
4844              is no external subset and we must reset dtd->hasParamEntityRefs
4845           */
4846           else if (! parser->m_doctypeSysid)
4847             dtd->hasParamEntityRefs = hadParamEntityRefs;
4848           /* end of DTD - no need to update dtd->keepProcessing */
4849         }
4850         parser->m_useForeignDTD = XML_FALSE;
4851       }
4852 #endif /* XML_DTD */
4853       if (parser->m_endDoctypeDeclHandler) {
4854         parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4855         handleDefault = XML_FALSE;
4856       }
4857       break;
4858     case XML_ROLE_INSTANCE_START:
4859 #ifdef XML_DTD
4860       /* if there is no DOCTYPE declaration then now is the
4861          last chance to read the foreign DTD
4862       */
4863       if (parser->m_useForeignDTD) {
4864         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4865         dtd->hasParamEntityRefs = XML_TRUE;
4866         if (parser->m_paramEntityParsing
4867             && parser->m_externalEntityRefHandler) {
4868           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4869                                             externalSubsetName, sizeof(ENTITY));
4870           if (! entity)
4871             return XML_ERROR_NO_MEMORY;
4872           entity->base = parser->m_curBase;
4873           dtd->paramEntityRead = XML_FALSE;
4874           if (! parser->m_externalEntityRefHandler(
4875                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4876                   entity->systemId, entity->publicId))
4877             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4878           if (dtd->paramEntityRead) {
4879             if (! dtd->standalone && parser->m_notStandaloneHandler
4880                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4881               return XML_ERROR_NOT_STANDALONE;
4882           }
4883           /* if we didn't read the foreign DTD then this means that there
4884              is no external subset and we must reset dtd->hasParamEntityRefs
4885           */
4886           else
4887             dtd->hasParamEntityRefs = hadParamEntityRefs;
4888           /* end of DTD - no need to update dtd->keepProcessing */
4889         }
4890       }
4891 #endif /* XML_DTD */
4892       parser->m_processor = contentProcessor;
4893       return contentProcessor(parser, s, end, nextPtr);
4894     case XML_ROLE_ATTLIST_ELEMENT_NAME:
4895       parser->m_declElementType = getElementType(parser, enc, s, next);
4896       if (! parser->m_declElementType)
4897         return XML_ERROR_NO_MEMORY;
4898       goto checkAttListDeclHandler;
4899     case XML_ROLE_ATTRIBUTE_NAME:
4900       parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4901       if (! parser->m_declAttributeId)
4902         return XML_ERROR_NO_MEMORY;
4903       parser->m_declAttributeIsCdata = XML_FALSE;
4904       parser->m_declAttributeType = NULL;
4905       parser->m_declAttributeIsId = XML_FALSE;
4906       goto checkAttListDeclHandler;
4907     case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4908       parser->m_declAttributeIsCdata = XML_TRUE;
4909       parser->m_declAttributeType = atypeCDATA;
4910       goto checkAttListDeclHandler;
4911     case XML_ROLE_ATTRIBUTE_TYPE_ID:
4912       parser->m_declAttributeIsId = XML_TRUE;
4913       parser->m_declAttributeType = atypeID;
4914       goto checkAttListDeclHandler;
4915     case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4916       parser->m_declAttributeType = atypeIDREF;
4917       goto checkAttListDeclHandler;
4918     case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4919       parser->m_declAttributeType = atypeIDREFS;
4920       goto checkAttListDeclHandler;
4921     case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4922       parser->m_declAttributeType = atypeENTITY;
4923       goto checkAttListDeclHandler;
4924     case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4925       parser->m_declAttributeType = atypeENTITIES;
4926       goto checkAttListDeclHandler;
4927     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4928       parser->m_declAttributeType = atypeNMTOKEN;
4929       goto checkAttListDeclHandler;
4930     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4931       parser->m_declAttributeType = atypeNMTOKENS;
4932     checkAttListDeclHandler:
4933       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4934         handleDefault = XML_FALSE;
4935       break;
4936     case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4937     case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4938       if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4939         const XML_Char *prefix;
4940         if (parser->m_declAttributeType) {
4941           prefix = enumValueSep;
4942         } else {
4943           prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4944                                                               : enumValueStart);
4945         }
4946         if (! poolAppendString(&parser->m_tempPool, prefix))
4947           return XML_ERROR_NO_MEMORY;
4948         if (! poolAppend(&parser->m_tempPool, enc, s, next))
4949           return XML_ERROR_NO_MEMORY;
4950         parser->m_declAttributeType = parser->m_tempPool.start;
4951         handleDefault = XML_FALSE;
4952       }
4953       break;
4954     case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4955     case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4956       if (dtd->keepProcessing) {
4957         if (! defineAttribute(parser->m_declElementType,
4958                               parser->m_declAttributeId,
4959                               parser->m_declAttributeIsCdata,
4960                               parser->m_declAttributeIsId, 0, parser))
4961           return XML_ERROR_NO_MEMORY;
4962         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4963           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4964               || (*parser->m_declAttributeType == XML_T(ASCII_N)
4965                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4966             /* Enumerated or Notation type */
4967             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4968                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4969               return XML_ERROR_NO_MEMORY;
4970             parser->m_declAttributeType = parser->m_tempPool.start;
4971             poolFinish(&parser->m_tempPool);
4972           }
4973           *eventEndPP = s;
4974           parser->m_attlistDeclHandler(
4975               parser->m_handlerArg, parser->m_declElementType->name,
4976               parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4977               role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4978           poolClear(&parser->m_tempPool);
4979           handleDefault = XML_FALSE;
4980         }
4981       }
4982       break;
4983     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4984     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4985       if (dtd->keepProcessing) {
4986         const XML_Char *attVal;
4987         enum XML_Error result = storeAttributeValue(
4988             parser, enc, parser->m_declAttributeIsCdata,
4989             s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
4990             XML_ACCOUNT_NONE);
4991         if (result)
4992           return result;
4993         attVal = poolStart(&dtd->pool);
4994         poolFinish(&dtd->pool);
4995         /* ID attributes aren't allowed to have a default */
4996         if (! defineAttribute(
4997                 parser->m_declElementType, parser->m_declAttributeId,
4998                 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4999           return XML_ERROR_NO_MEMORY;
5000         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5001           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5002               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5003                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5004             /* Enumerated or Notation type */
5005             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5006                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5007               return XML_ERROR_NO_MEMORY;
5008             parser->m_declAttributeType = parser->m_tempPool.start;
5009             poolFinish(&parser->m_tempPool);
5010           }
5011           *eventEndPP = s;
5012           parser->m_attlistDeclHandler(
5013               parser->m_handlerArg, parser->m_declElementType->name,
5014               parser->m_declAttributeId->name, parser->m_declAttributeType,
5015               attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5016           poolClear(&parser->m_tempPool);
5017           handleDefault = XML_FALSE;
5018         }
5019       }
5020       break;
5021     case XML_ROLE_ENTITY_VALUE:
5022       if (dtd->keepProcessing) {
5023         enum XML_Error result
5024             = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5025                                next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5026         if (parser->m_declEntity) {
5027           parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5028           parser->m_declEntity->textLen
5029               = (int)(poolLength(&dtd->entityValuePool));
5030           poolFinish(&dtd->entityValuePool);
5031           if (parser->m_entityDeclHandler) {
5032             *eventEndPP = s;
5033             parser->m_entityDeclHandler(
5034                 parser->m_handlerArg, parser->m_declEntity->name,
5035                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5036                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5037             handleDefault = XML_FALSE;
5038           }
5039         } else
5040           poolDiscard(&dtd->entityValuePool);
5041         if (result != XML_ERROR_NONE)
5042           return result;
5043       }
5044       break;
5045     case XML_ROLE_DOCTYPE_SYSTEM_ID:
5046 #ifdef XML_DTD
5047       parser->m_useForeignDTD = XML_FALSE;
5048 #endif /* XML_DTD */
5049       dtd->hasParamEntityRefs = XML_TRUE;
5050       if (parser->m_startDoctypeDeclHandler) {
5051         parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5052                                                  s + enc->minBytesPerChar,
5053                                                  next - enc->minBytesPerChar);
5054         if (parser->m_doctypeSysid == NULL)
5055           return XML_ERROR_NO_MEMORY;
5056         poolFinish(&parser->m_tempPool);
5057         handleDefault = XML_FALSE;
5058       }
5059 #ifdef XML_DTD
5060       else
5061         /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5062            for the case where no parser->m_startDoctypeDeclHandler is set */
5063         parser->m_doctypeSysid = externalSubsetName;
5064 #endif /* XML_DTD */
5065       if (! dtd->standalone
5066 #ifdef XML_DTD
5067           && ! parser->m_paramEntityParsing
5068 #endif /* XML_DTD */
5069           && parser->m_notStandaloneHandler
5070           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5071         return XML_ERROR_NOT_STANDALONE;
5072 #ifndef XML_DTD
5073       break;
5074 #else  /* XML_DTD */
5075       if (! parser->m_declEntity) {
5076         parser->m_declEntity = (ENTITY *)lookup(
5077             parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5078         if (! parser->m_declEntity)
5079           return XML_ERROR_NO_MEMORY;
5080         parser->m_declEntity->publicId = NULL;
5081       }
5082 #endif /* XML_DTD */
5083       /* fall through */
5084     case XML_ROLE_ENTITY_SYSTEM_ID:
5085       if (dtd->keepProcessing && parser->m_declEntity) {
5086         parser->m_declEntity->systemId
5087             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5088                               next - enc->minBytesPerChar);
5089         if (! parser->m_declEntity->systemId)
5090           return XML_ERROR_NO_MEMORY;
5091         parser->m_declEntity->base = parser->m_curBase;
5092         poolFinish(&dtd->pool);
5093         /* Don't suppress the default handler if we fell through from
5094          * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5095          */
5096         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5097           handleDefault = XML_FALSE;
5098       }
5099       break;
5100     case XML_ROLE_ENTITY_COMPLETE:
5101       if (dtd->keepProcessing && parser->m_declEntity
5102           && parser->m_entityDeclHandler) {
5103         *eventEndPP = s;
5104         parser->m_entityDeclHandler(
5105             parser->m_handlerArg, parser->m_declEntity->name,
5106             parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5107             parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5108         handleDefault = XML_FALSE;
5109       }
5110       break;
5111     case XML_ROLE_ENTITY_NOTATION_NAME:
5112       if (dtd->keepProcessing && parser->m_declEntity) {
5113         parser->m_declEntity->notation
5114             = poolStoreString(&dtd->pool, enc, s, next);
5115         if (! parser->m_declEntity->notation)
5116           return XML_ERROR_NO_MEMORY;
5117         poolFinish(&dtd->pool);
5118         if (parser->m_unparsedEntityDeclHandler) {
5119           *eventEndPP = s;
5120           parser->m_unparsedEntityDeclHandler(
5121               parser->m_handlerArg, parser->m_declEntity->name,
5122               parser->m_declEntity->base, parser->m_declEntity->systemId,
5123               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5124           handleDefault = XML_FALSE;
5125         } else if (parser->m_entityDeclHandler) {
5126           *eventEndPP = s;
5127           parser->m_entityDeclHandler(
5128               parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5129               parser->m_declEntity->base, parser->m_declEntity->systemId,
5130               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5131           handleDefault = XML_FALSE;
5132         }
5133       }
5134       break;
5135     case XML_ROLE_GENERAL_ENTITY_NAME: {
5136       if (XmlPredefinedEntityName(enc, s, next)) {
5137         parser->m_declEntity = NULL;
5138         break;
5139       }
5140       if (dtd->keepProcessing) {
5141         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5142         if (! name)
5143           return XML_ERROR_NO_MEMORY;
5144         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5145                                                 name, sizeof(ENTITY));
5146         if (! parser->m_declEntity)
5147           return XML_ERROR_NO_MEMORY;
5148         if (parser->m_declEntity->name != name) {
5149           poolDiscard(&dtd->pool);
5150           parser->m_declEntity = NULL;
5151         } else {
5152           poolFinish(&dtd->pool);
5153           parser->m_declEntity->publicId = NULL;
5154           parser->m_declEntity->is_param = XML_FALSE;
5155           /* if we have a parent parser or are reading an internal parameter
5156              entity, then the entity declaration is not considered "internal"
5157           */
5158           parser->m_declEntity->is_internal
5159               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5160           if (parser->m_entityDeclHandler)
5161             handleDefault = XML_FALSE;
5162         }
5163       } else {
5164         poolDiscard(&dtd->pool);
5165         parser->m_declEntity = NULL;
5166       }
5167     } break;
5168     case XML_ROLE_PARAM_ENTITY_NAME:
5169 #ifdef XML_DTD
5170       if (dtd->keepProcessing) {
5171         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5172         if (! name)
5173           return XML_ERROR_NO_MEMORY;
5174         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5175                                                 name, sizeof(ENTITY));
5176         if (! parser->m_declEntity)
5177           return XML_ERROR_NO_MEMORY;
5178         if (parser->m_declEntity->name != name) {
5179           poolDiscard(&dtd->pool);
5180           parser->m_declEntity = NULL;
5181         } else {
5182           poolFinish(&dtd->pool);
5183           parser->m_declEntity->publicId = NULL;
5184           parser->m_declEntity->is_param = XML_TRUE;
5185           /* if we have a parent parser or are reading an internal parameter
5186              entity, then the entity declaration is not considered "internal"
5187           */
5188           parser->m_declEntity->is_internal
5189               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5190           if (parser->m_entityDeclHandler)
5191             handleDefault = XML_FALSE;
5192         }
5193       } else {
5194         poolDiscard(&dtd->pool);
5195         parser->m_declEntity = NULL;
5196       }
5197 #else  /* not XML_DTD */
5198       parser->m_declEntity = NULL;
5199 #endif /* XML_DTD */
5200       break;
5201     case XML_ROLE_NOTATION_NAME:
5202       parser->m_declNotationPublicId = NULL;
5203       parser->m_declNotationName = NULL;
5204       if (parser->m_notationDeclHandler) {
5205         parser->m_declNotationName
5206             = poolStoreString(&parser->m_tempPool, enc, s, next);
5207         if (! parser->m_declNotationName)
5208           return XML_ERROR_NO_MEMORY;
5209         poolFinish(&parser->m_tempPool);
5210         handleDefault = XML_FALSE;
5211       }
5212       break;
5213     case XML_ROLE_NOTATION_PUBLIC_ID:
5214       if (! XmlIsPublicId(enc, s, next, eventPP))
5215         return XML_ERROR_PUBLICID;
5216       if (parser
5217               ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5218         XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5219                                         s + enc->minBytesPerChar,
5220                                         next - enc->minBytesPerChar);
5221         if (! tem)
5222           return XML_ERROR_NO_MEMORY;
5223         normalizePublicId(tem);
5224         parser->m_declNotationPublicId = tem;
5225         poolFinish(&parser->m_tempPool);
5226         handleDefault = XML_FALSE;
5227       }
5228       break;
5229     case XML_ROLE_NOTATION_SYSTEM_ID:
5230       if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5231         const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5232                                                    s + enc->minBytesPerChar,
5233                                                    next - enc->minBytesPerChar);
5234         if (! systemId)
5235           return XML_ERROR_NO_MEMORY;
5236         *eventEndPP = s;
5237         parser->m_notationDeclHandler(
5238             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5239             systemId, parser->m_declNotationPublicId);
5240         handleDefault = XML_FALSE;
5241       }
5242       poolClear(&parser->m_tempPool);
5243       break;
5244     case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5245       if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5246         *eventEndPP = s;
5247         parser->m_notationDeclHandler(
5248             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5249             0, parser->m_declNotationPublicId);
5250         handleDefault = XML_FALSE;
5251       }
5252       poolClear(&parser->m_tempPool);
5253       break;
5254     case XML_ROLE_ERROR:
5255       switch (tok) {
5256       case XML_TOK_PARAM_ENTITY_REF:
5257         /* PE references in internal subset are
5258            not allowed within declarations. */
5259         return XML_ERROR_PARAM_ENTITY_REF;
5260       case XML_TOK_XML_DECL:
5261         return XML_ERROR_MISPLACED_XML_PI;
5262       default:
5263         return XML_ERROR_SYNTAX;
5264       }
5265 #ifdef XML_DTD
5266     case XML_ROLE_IGNORE_SECT: {
5267       enum XML_Error result;
5268       if (parser->m_defaultHandler)
5269         reportDefault(parser, enc, s, next);
5270       handleDefault = XML_FALSE;
5271       result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5272       if (result != XML_ERROR_NONE)
5273         return result;
5274       else if (! next) {
5275         parser->m_processor = ignoreSectionProcessor;
5276         return result;
5277       }
5278     } break;
5279 #endif /* XML_DTD */
5280     case XML_ROLE_GROUP_OPEN:
5281       if (parser->m_prologState.level >= parser->m_groupSize) {
5282         if (parser->m_groupSize) {
5283           {
5284             /* Detect and prevent integer overflow */
5285             if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5286               return XML_ERROR_NO_MEMORY;
5287             }
5288 
5289             char *const new_connector = (char *)REALLOC(
5290                 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5291             if (new_connector == NULL) {
5292               parser->m_groupSize /= 2;
5293               return XML_ERROR_NO_MEMORY;
5294             }
5295             parser->m_groupConnector = new_connector;
5296           }
5297 
5298           if (dtd->scaffIndex) {
5299             /* Detect and prevent integer overflow.
5300              * The preprocessor guard addresses the "always false" warning
5301              * from -Wtype-limits on platforms where
5302              * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5303 #if UINT_MAX >= SIZE_MAX
5304             if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5305               return XML_ERROR_NO_MEMORY;
5306             }
5307 #endif
5308 
5309             int *const new_scaff_index = (int *)REALLOC(
5310                 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5311             if (new_scaff_index == NULL)
5312               return XML_ERROR_NO_MEMORY;
5313             dtd->scaffIndex = new_scaff_index;
5314           }
5315         } else {
5316           parser->m_groupConnector
5317               = (char *)MALLOC(parser, parser->m_groupSize = 32);
5318           if (! parser->m_groupConnector) {
5319             parser->m_groupSize = 0;
5320             return XML_ERROR_NO_MEMORY;
5321           }
5322         }
5323       }
5324       parser->m_groupConnector[parser->m_prologState.level] = 0;
5325       if (dtd->in_eldecl) {
5326         int myindex = nextScaffoldPart(parser);
5327         if (myindex < 0)
5328           return XML_ERROR_NO_MEMORY;
5329         assert(dtd->scaffIndex != NULL);
5330         dtd->scaffIndex[dtd->scaffLevel] = myindex;
5331         dtd->scaffLevel++;
5332         dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5333         if (parser->m_elementDeclHandler)
5334           handleDefault = XML_FALSE;
5335       }
5336       break;
5337     case XML_ROLE_GROUP_SEQUENCE:
5338       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5339         return XML_ERROR_SYNTAX;
5340       parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5341       if (dtd->in_eldecl && parser->m_elementDeclHandler)
5342         handleDefault = XML_FALSE;
5343       break;
5344     case XML_ROLE_GROUP_CHOICE:
5345       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5346         return XML_ERROR_SYNTAX;
5347       if (dtd->in_eldecl
5348           && ! parser->m_groupConnector[parser->m_prologState.level]
5349           && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5350               != XML_CTYPE_MIXED)) {
5351         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5352             = XML_CTYPE_CHOICE;
5353         if (parser->m_elementDeclHandler)
5354           handleDefault = XML_FALSE;
5355       }
5356       parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5357       break;
5358     case XML_ROLE_PARAM_ENTITY_REF:
5359 #ifdef XML_DTD
5360     case XML_ROLE_INNER_PARAM_ENTITY_REF:
5361       dtd->hasParamEntityRefs = XML_TRUE;
5362       if (! parser->m_paramEntityParsing)
5363         dtd->keepProcessing = dtd->standalone;
5364       else {
5365         const XML_Char *name;
5366         ENTITY *entity;
5367         name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5368                                next - enc->minBytesPerChar);
5369         if (! name)
5370           return XML_ERROR_NO_MEMORY;
5371         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5372         poolDiscard(&dtd->pool);
5373         /* first, determine if a check for an existing declaration is needed;
5374            if yes, check that the entity exists, and that it is internal,
5375            otherwise call the skipped entity handler
5376         */
5377         if (parser->m_prologState.documentEntity
5378             && (dtd->standalone ? ! parser->m_openInternalEntities
5379                                 : ! dtd->hasParamEntityRefs)) {
5380           if (! entity)
5381             return XML_ERROR_UNDEFINED_ENTITY;
5382           else if (! entity->is_internal) {
5383             /* It's hard to exhaustively search the code to be sure,
5384              * but there doesn't seem to be a way of executing the
5385              * following line.  There are two cases:
5386              *
5387              * If 'standalone' is false, the DTD must have no
5388              * parameter entities or we wouldn't have passed the outer
5389              * 'if' statement.  That measn the only entity in the hash
5390              * table is the external subset name "#" which cannot be
5391              * given as a parameter entity name in XML syntax, so the
5392              * lookup must have returned NULL and we don't even reach
5393              * the test for an internal entity.
5394              *
5395              * If 'standalone' is true, it does not seem to be
5396              * possible to create entities taking this code path that
5397              * are not internal entities, so fail the test above.
5398              *
5399              * Because this analysis is very uncertain, the code is
5400              * being left in place and merely removed from the
5401              * coverage test statistics.
5402              */
5403             return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5404           }
5405         } else if (! entity) {
5406           dtd->keepProcessing = dtd->standalone;
5407           /* cannot report skipped entities in declarations */
5408           if ((role == XML_ROLE_PARAM_ENTITY_REF)
5409               && parser->m_skippedEntityHandler) {
5410             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5411             handleDefault = XML_FALSE;
5412           }
5413           break;
5414         }
5415         if (entity->open)
5416           return XML_ERROR_RECURSIVE_ENTITY_REF;
5417         if (entity->textPtr) {
5418           enum XML_Error result;
5419           XML_Bool betweenDecl
5420               = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5421           result = processInternalEntity(parser, entity, betweenDecl);
5422           if (result != XML_ERROR_NONE)
5423             return result;
5424           handleDefault = XML_FALSE;
5425           break;
5426         }
5427         if (parser->m_externalEntityRefHandler) {
5428           dtd->paramEntityRead = XML_FALSE;
5429           entity->open = XML_TRUE;
5430           entityTrackingOnOpen(parser, entity, __LINE__);
5431           if (! parser->m_externalEntityRefHandler(
5432                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5433                   entity->systemId, entity->publicId)) {
5434             entityTrackingOnClose(parser, entity, __LINE__);
5435             entity->open = XML_FALSE;
5436             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5437           }
5438           entityTrackingOnClose(parser, entity, __LINE__);
5439           entity->open = XML_FALSE;
5440           handleDefault = XML_FALSE;
5441           if (! dtd->paramEntityRead) {
5442             dtd->keepProcessing = dtd->standalone;
5443             break;
5444           }
5445         } else {
5446           dtd->keepProcessing = dtd->standalone;
5447           break;
5448         }
5449       }
5450 #endif /* XML_DTD */
5451       if (! dtd->standalone && parser->m_notStandaloneHandler
5452           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5453         return XML_ERROR_NOT_STANDALONE;
5454       break;
5455 
5456       /* Element declaration stuff */
5457 
5458     case XML_ROLE_ELEMENT_NAME:
5459       if (parser->m_elementDeclHandler) {
5460         parser->m_declElementType = getElementType(parser, enc, s, next);
5461         if (! parser->m_declElementType)
5462           return XML_ERROR_NO_MEMORY;
5463         dtd->scaffLevel = 0;
5464         dtd->scaffCount = 0;
5465         dtd->in_eldecl = XML_TRUE;
5466         handleDefault = XML_FALSE;
5467       }
5468       break;
5469 
5470     case XML_ROLE_CONTENT_ANY:
5471     case XML_ROLE_CONTENT_EMPTY:
5472       if (dtd->in_eldecl) {
5473         if (parser->m_elementDeclHandler) {
5474           XML_Content *content
5475               = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5476           if (! content)
5477             return XML_ERROR_NO_MEMORY;
5478           content->quant = XML_CQUANT_NONE;
5479           content->name = NULL;
5480           content->numchildren = 0;
5481           content->children = NULL;
5482           content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5483                                                           : XML_CTYPE_EMPTY);
5484           *eventEndPP = s;
5485           parser->m_elementDeclHandler(
5486               parser->m_handlerArg, parser->m_declElementType->name, content);
5487           handleDefault = XML_FALSE;
5488         }
5489         dtd->in_eldecl = XML_FALSE;
5490       }
5491       break;
5492 
5493     case XML_ROLE_CONTENT_PCDATA:
5494       if (dtd->in_eldecl) {
5495         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5496             = XML_CTYPE_MIXED;
5497         if (parser->m_elementDeclHandler)
5498           handleDefault = XML_FALSE;
5499       }
5500       break;
5501 
5502     case XML_ROLE_CONTENT_ELEMENT:
5503       quant = XML_CQUANT_NONE;
5504       goto elementContent;
5505     case XML_ROLE_CONTENT_ELEMENT_OPT:
5506       quant = XML_CQUANT_OPT;
5507       goto elementContent;
5508     case XML_ROLE_CONTENT_ELEMENT_REP:
5509       quant = XML_CQUANT_REP;
5510       goto elementContent;
5511     case XML_ROLE_CONTENT_ELEMENT_PLUS:
5512       quant = XML_CQUANT_PLUS;
5513     elementContent:
5514       if (dtd->in_eldecl) {
5515         ELEMENT_TYPE *el;
5516         const XML_Char *name;
5517         size_t nameLen;
5518         const char *nxt
5519             = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5520         int myindex = nextScaffoldPart(parser);
5521         if (myindex < 0)
5522           return XML_ERROR_NO_MEMORY;
5523         dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5524         dtd->scaffold[myindex].quant = quant;
5525         el = getElementType(parser, enc, s, nxt);
5526         if (! el)
5527           return XML_ERROR_NO_MEMORY;
5528         name = el->name;
5529         dtd->scaffold[myindex].name = name;
5530         nameLen = 0;
5531         for (; name[nameLen++];)
5532           ;
5533 
5534         /* Detect and prevent integer overflow */
5535         if (nameLen > UINT_MAX - dtd->contentStringLen) {
5536           return XML_ERROR_NO_MEMORY;
5537         }
5538 
5539         dtd->contentStringLen += (unsigned)nameLen;
5540         if (parser->m_elementDeclHandler)
5541           handleDefault = XML_FALSE;
5542       }
5543       break;
5544 
5545     case XML_ROLE_GROUP_CLOSE:
5546       quant = XML_CQUANT_NONE;
5547       goto closeGroup;
5548     case XML_ROLE_GROUP_CLOSE_OPT:
5549       quant = XML_CQUANT_OPT;
5550       goto closeGroup;
5551     case XML_ROLE_GROUP_CLOSE_REP:
5552       quant = XML_CQUANT_REP;
5553       goto closeGroup;
5554     case XML_ROLE_GROUP_CLOSE_PLUS:
5555       quant = XML_CQUANT_PLUS;
5556     closeGroup:
5557       if (dtd->in_eldecl) {
5558         if (parser->m_elementDeclHandler)
5559           handleDefault = XML_FALSE;
5560         dtd->scaffLevel--;
5561         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5562         if (dtd->scaffLevel == 0) {
5563           if (! handleDefault) {
5564             XML_Content *model = build_model(parser);
5565             if (! model)
5566               return XML_ERROR_NO_MEMORY;
5567             *eventEndPP = s;
5568             parser->m_elementDeclHandler(
5569                 parser->m_handlerArg, parser->m_declElementType->name, model);
5570           }
5571           dtd->in_eldecl = XML_FALSE;
5572           dtd->contentStringLen = 0;
5573         }
5574       }
5575       break;
5576       /* End element declaration stuff */
5577 
5578     case XML_ROLE_PI:
5579       if (! reportProcessingInstruction(parser, enc, s, next))
5580         return XML_ERROR_NO_MEMORY;
5581       handleDefault = XML_FALSE;
5582       break;
5583     case XML_ROLE_COMMENT:
5584       if (! reportComment(parser, enc, s, next))
5585         return XML_ERROR_NO_MEMORY;
5586       handleDefault = XML_FALSE;
5587       break;
5588     case XML_ROLE_NONE:
5589       switch (tok) {
5590       case XML_TOK_BOM:
5591         handleDefault = XML_FALSE;
5592         break;
5593       }
5594       break;
5595     case XML_ROLE_DOCTYPE_NONE:
5596       if (parser->m_startDoctypeDeclHandler)
5597         handleDefault = XML_FALSE;
5598       break;
5599     case XML_ROLE_ENTITY_NONE:
5600       if (dtd->keepProcessing && parser->m_entityDeclHandler)
5601         handleDefault = XML_FALSE;
5602       break;
5603     case XML_ROLE_NOTATION_NONE:
5604       if (parser->m_notationDeclHandler)
5605         handleDefault = XML_FALSE;
5606       break;
5607     case XML_ROLE_ATTLIST_NONE:
5608       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5609         handleDefault = XML_FALSE;
5610       break;
5611     case XML_ROLE_ELEMENT_NONE:
5612       if (parser->m_elementDeclHandler)
5613         handleDefault = XML_FALSE;
5614       break;
5615     } /* end of big switch */
5616 
5617     if (handleDefault && parser->m_defaultHandler)
5618       reportDefault(parser, enc, s, next);
5619 
5620     switch (parser->m_parsingStatus.parsing) {
5621     case XML_SUSPENDED:
5622       *nextPtr = next;
5623       return XML_ERROR_NONE;
5624     case XML_FINISHED:
5625       return XML_ERROR_ABORTED;
5626     default:
5627       s = next;
5628       tok = XmlPrologTok(enc, s, end, &next);
5629     }
5630   }
5631   /* not reached */
5632 }
5633 
5634 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5635 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5636                 const char **nextPtr) {
5637   parser->m_processor = epilogProcessor;
5638   parser->m_eventPtr = s;
5639   for (;;) {
5640     const char *next = NULL;
5641     int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5642 #ifdef XML_DTD
5643     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5644                                   XML_ACCOUNT_DIRECT)) {
5645       accountingOnAbort(parser);
5646       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5647     }
5648 #endif
5649     parser->m_eventEndPtr = next;
5650     switch (tok) {
5651     /* report partial linebreak - it might be the last token */
5652     case -XML_TOK_PROLOG_S:
5653       if (parser->m_defaultHandler) {
5654         reportDefault(parser, parser->m_encoding, s, next);
5655         if (parser->m_parsingStatus.parsing == XML_FINISHED)
5656           return XML_ERROR_ABORTED;
5657       }
5658       *nextPtr = next;
5659       return XML_ERROR_NONE;
5660     case XML_TOK_NONE:
5661       *nextPtr = s;
5662       return XML_ERROR_NONE;
5663     case XML_TOK_PROLOG_S:
5664       if (parser->m_defaultHandler)
5665         reportDefault(parser, parser->m_encoding, s, next);
5666       break;
5667     case XML_TOK_PI:
5668       if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5669         return XML_ERROR_NO_MEMORY;
5670       break;
5671     case XML_TOK_COMMENT:
5672       if (! reportComment(parser, parser->m_encoding, s, next))
5673         return XML_ERROR_NO_MEMORY;
5674       break;
5675     case XML_TOK_INVALID:
5676       parser->m_eventPtr = next;
5677       return XML_ERROR_INVALID_TOKEN;
5678     case XML_TOK_PARTIAL:
5679       if (! parser->m_parsingStatus.finalBuffer) {
5680         *nextPtr = s;
5681         return XML_ERROR_NONE;
5682       }
5683       return XML_ERROR_UNCLOSED_TOKEN;
5684     case XML_TOK_PARTIAL_CHAR:
5685       if (! parser->m_parsingStatus.finalBuffer) {
5686         *nextPtr = s;
5687         return XML_ERROR_NONE;
5688       }
5689       return XML_ERROR_PARTIAL_CHAR;
5690     default:
5691       return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5692     }
5693     parser->m_eventPtr = s = next;
5694     switch (parser->m_parsingStatus.parsing) {
5695     case XML_SUSPENDED:
5696       *nextPtr = next;
5697       return XML_ERROR_NONE;
5698     case XML_FINISHED:
5699       return XML_ERROR_ABORTED;
5700     default:;
5701     }
5702   }
5703 }
5704 
5705 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5706 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5707   const char *textStart, *textEnd;
5708   const char *next;
5709   enum XML_Error result;
5710   OPEN_INTERNAL_ENTITY *openEntity;
5711 
5712   if (parser->m_freeInternalEntities) {
5713     openEntity = parser->m_freeInternalEntities;
5714     parser->m_freeInternalEntities = openEntity->next;
5715   } else {
5716     openEntity
5717         = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5718     if (! openEntity)
5719       return XML_ERROR_NO_MEMORY;
5720   }
5721   entity->open = XML_TRUE;
5722 #ifdef XML_DTD
5723   entityTrackingOnOpen(parser, entity, __LINE__);
5724 #endif
5725   entity->processed = 0;
5726   openEntity->next = parser->m_openInternalEntities;
5727   parser->m_openInternalEntities = openEntity;
5728   openEntity->entity = entity;
5729   openEntity->startTagLevel = parser->m_tagLevel;
5730   openEntity->betweenDecl = betweenDecl;
5731   openEntity->internalEventPtr = NULL;
5732   openEntity->internalEventEndPtr = NULL;
5733   textStart = (const char *)entity->textPtr;
5734   textEnd = (const char *)(entity->textPtr + entity->textLen);
5735   /* Set a safe default value in case 'next' does not get set */
5736   next = textStart;
5737 
5738 #ifdef XML_DTD
5739   if (entity->is_param) {
5740     int tok
5741         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5742     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5743                       tok, next, &next, XML_FALSE, XML_FALSE,
5744                       XML_ACCOUNT_ENTITY_EXPANSION);
5745   } else
5746 #endif /* XML_DTD */
5747     result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5748                        textStart, textEnd, &next, XML_FALSE,
5749                        XML_ACCOUNT_ENTITY_EXPANSION);
5750 
5751   if (result == XML_ERROR_NONE) {
5752     if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5753       entity->processed = (int)(next - textStart);
5754       parser->m_processor = internalEntityProcessor;
5755     } else {
5756 #ifdef XML_DTD
5757       entityTrackingOnClose(parser, entity, __LINE__);
5758 #endif /* XML_DTD */
5759       entity->open = XML_FALSE;
5760       parser->m_openInternalEntities = openEntity->next;
5761       /* put openEntity back in list of free instances */
5762       openEntity->next = parser->m_freeInternalEntities;
5763       parser->m_freeInternalEntities = openEntity;
5764     }
5765   }
5766   return result;
5767 }
5768 
5769 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5770 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5771                         const char **nextPtr) {
5772   ENTITY *entity;
5773   const char *textStart, *textEnd;
5774   const char *next;
5775   enum XML_Error result;
5776   OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5777   if (! openEntity)
5778     return XML_ERROR_UNEXPECTED_STATE;
5779 
5780   entity = openEntity->entity;
5781   textStart = ((const char *)entity->textPtr) + entity->processed;
5782   textEnd = (const char *)(entity->textPtr + entity->textLen);
5783   /* Set a safe default value in case 'next' does not get set */
5784   next = textStart;
5785 
5786 #ifdef XML_DTD
5787   if (entity->is_param) {
5788     int tok
5789         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5790     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5791                       tok, next, &next, XML_FALSE, XML_TRUE,
5792                       XML_ACCOUNT_ENTITY_EXPANSION);
5793   } else
5794 #endif /* XML_DTD */
5795     result = doContent(parser, openEntity->startTagLevel,
5796                        parser->m_internalEncoding, textStart, textEnd, &next,
5797                        XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5798 
5799   if (result != XML_ERROR_NONE)
5800     return result;
5801   else if (textEnd != next
5802            && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5803     entity->processed = (int)(next - (const char *)entity->textPtr);
5804     return result;
5805   } else {
5806 #ifdef XML_DTD
5807     entityTrackingOnClose(parser, entity, __LINE__);
5808 #endif
5809     entity->open = XML_FALSE;
5810     parser->m_openInternalEntities = openEntity->next;
5811     /* put openEntity back in list of free instances */
5812     openEntity->next = parser->m_freeInternalEntities;
5813     parser->m_freeInternalEntities = openEntity;
5814   }
5815 
5816 #ifdef XML_DTD
5817   if (entity->is_param) {
5818     int tok;
5819     parser->m_processor = prologProcessor;
5820     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5821     return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5822                     (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5823                     XML_ACCOUNT_DIRECT);
5824   } else
5825 #endif /* XML_DTD */
5826   {
5827     parser->m_processor = contentProcessor;
5828     /* see externalEntityContentProcessor vs contentProcessor */
5829     return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
5830                      s, end, nextPtr,
5831                      (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5832                      XML_ACCOUNT_DIRECT);
5833   }
5834 }
5835 
5836 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5837 errorProcessor(XML_Parser parser, const char *s, const char *end,
5838                const char **nextPtr) {
5839   UNUSED_P(s);
5840   UNUSED_P(end);
5841   UNUSED_P(nextPtr);
5842   return parser->m_errorCode;
5843 }
5844 
5845 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5846 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5847                     const char *ptr, const char *end, STRING_POOL *pool,
5848                     enum XML_Account account) {
5849   enum XML_Error result
5850       = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5851   if (result)
5852     return result;
5853   if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5854     poolChop(pool);
5855   if (! poolAppendChar(pool, XML_T('\0')))
5856     return XML_ERROR_NO_MEMORY;
5857   return XML_ERROR_NONE;
5858 }
5859 
5860 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5861 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5862                      const char *ptr, const char *end, STRING_POOL *pool,
5863                      enum XML_Account account) {
5864   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5865 #ifndef XML_DTD
5866   UNUSED_P(account);
5867 #endif
5868 
5869   for (;;) {
5870     const char *next
5871         = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5872     int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5873 #ifdef XML_DTD
5874     if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5875       accountingOnAbort(parser);
5876       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5877     }
5878 #endif
5879     switch (tok) {
5880     case XML_TOK_NONE:
5881       return XML_ERROR_NONE;
5882     case XML_TOK_INVALID:
5883       if (enc == parser->m_encoding)
5884         parser->m_eventPtr = next;
5885       return XML_ERROR_INVALID_TOKEN;
5886     case XML_TOK_PARTIAL:
5887       if (enc == parser->m_encoding)
5888         parser->m_eventPtr = ptr;
5889       return XML_ERROR_INVALID_TOKEN;
5890     case XML_TOK_CHAR_REF: {
5891       XML_Char buf[XML_ENCODE_MAX];
5892       int i;
5893       int n = XmlCharRefNumber(enc, ptr);
5894       if (n < 0) {
5895         if (enc == parser->m_encoding)
5896           parser->m_eventPtr = ptr;
5897         return XML_ERROR_BAD_CHAR_REF;
5898       }
5899       if (! isCdata && n == 0x20 /* space */
5900           && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5901         break;
5902       n = XmlEncode(n, (ICHAR *)buf);
5903       /* The XmlEncode() functions can never return 0 here.  That
5904        * error return happens if the code point passed in is either
5905        * negative or greater than or equal to 0x110000.  The
5906        * XmlCharRefNumber() functions will all return a number
5907        * strictly less than 0x110000 or a negative value if an error
5908        * occurred.  The negative value is intercepted above, so
5909        * XmlEncode() is never passed a value it might return an
5910        * error for.
5911        */
5912       for (i = 0; i < n; i++) {
5913         if (! poolAppendChar(pool, buf[i]))
5914           return XML_ERROR_NO_MEMORY;
5915       }
5916     } break;
5917     case XML_TOK_DATA_CHARS:
5918       if (! poolAppend(pool, enc, ptr, next))
5919         return XML_ERROR_NO_MEMORY;
5920       break;
5921     case XML_TOK_TRAILING_CR:
5922       next = ptr + enc->minBytesPerChar;
5923       /* fall through */
5924     case XML_TOK_ATTRIBUTE_VALUE_S:
5925     case XML_TOK_DATA_NEWLINE:
5926       if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5927         break;
5928       if (! poolAppendChar(pool, 0x20))
5929         return XML_ERROR_NO_MEMORY;
5930       break;
5931     case XML_TOK_ENTITY_REF: {
5932       const XML_Char *name;
5933       ENTITY *entity;
5934       char checkEntityDecl;
5935       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5936           enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5937       if (ch) {
5938 #ifdef XML_DTD
5939         /* NOTE: We are replacing 4-6 characters original input for 1 character
5940          *       so there is no amplification and hence recording without
5941          *       protection. */
5942         accountingDiffTolerated(parser, tok, (char *)&ch,
5943                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
5944                                 XML_ACCOUNT_ENTITY_EXPANSION);
5945 #endif /* XML_DTD */
5946         if (! poolAppendChar(pool, ch))
5947           return XML_ERROR_NO_MEMORY;
5948         break;
5949       }
5950       name = poolStoreString(&parser->m_temp2Pool, enc,
5951                              ptr + enc->minBytesPerChar,
5952                              next - enc->minBytesPerChar);
5953       if (! name)
5954         return XML_ERROR_NO_MEMORY;
5955       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5956       poolDiscard(&parser->m_temp2Pool);
5957       /* First, determine if a check for an existing declaration is needed;
5958          if yes, check that the entity exists, and that it is internal.
5959       */
5960       if (pool == &dtd->pool) /* are we called from prolog? */
5961         checkEntityDecl =
5962 #ifdef XML_DTD
5963             parser->m_prologState.documentEntity &&
5964 #endif /* XML_DTD */
5965             (dtd->standalone ? ! parser->m_openInternalEntities
5966                              : ! dtd->hasParamEntityRefs);
5967       else /* if (pool == &parser->m_tempPool): we are called from content */
5968         checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5969       if (checkEntityDecl) {
5970         if (! entity)
5971           return XML_ERROR_UNDEFINED_ENTITY;
5972         else if (! entity->is_internal)
5973           return XML_ERROR_ENTITY_DECLARED_IN_PE;
5974       } else if (! entity) {
5975         /* Cannot report skipped entity here - see comments on
5976            parser->m_skippedEntityHandler.
5977         if (parser->m_skippedEntityHandler)
5978           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5979         */
5980         /* Cannot call the default handler because this would be
5981            out of sync with the call to the startElementHandler.
5982         if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5983           reportDefault(parser, enc, ptr, next);
5984         */
5985         break;
5986       }
5987       if (entity->open) {
5988         if (enc == parser->m_encoding) {
5989           /* It does not appear that this line can be executed.
5990            *
5991            * The "if (entity->open)" check catches recursive entity
5992            * definitions.  In order to be called with an open
5993            * entity, it must have gone through this code before and
5994            * been through the recursive call to
5995            * appendAttributeValue() some lines below.  That call
5996            * sets the local encoding ("enc") to the parser's
5997            * internal encoding (internal_utf8 or internal_utf16),
5998            * which can never be the same as the principle encoding.
5999            * It doesn't appear there is another code path that gets
6000            * here with entity->open being TRUE.
6001            *
6002            * Since it is not certain that this logic is watertight,
6003            * we keep the line and merely exclude it from coverage
6004            * tests.
6005            */
6006           parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6007         }
6008         return XML_ERROR_RECURSIVE_ENTITY_REF;
6009       }
6010       if (entity->notation) {
6011         if (enc == parser->m_encoding)
6012           parser->m_eventPtr = ptr;
6013         return XML_ERROR_BINARY_ENTITY_REF;
6014       }
6015       if (! entity->textPtr) {
6016         if (enc == parser->m_encoding)
6017           parser->m_eventPtr = ptr;
6018         return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6019       } else {
6020         enum XML_Error result;
6021         const XML_Char *textEnd = entity->textPtr + entity->textLen;
6022         entity->open = XML_TRUE;
6023 #ifdef XML_DTD
6024         entityTrackingOnOpen(parser, entity, __LINE__);
6025 #endif
6026         result = appendAttributeValue(parser, parser->m_internalEncoding,
6027                                       isCdata, (const char *)entity->textPtr,
6028                                       (const char *)textEnd, pool,
6029                                       XML_ACCOUNT_ENTITY_EXPANSION);
6030 #ifdef XML_DTD
6031         entityTrackingOnClose(parser, entity, __LINE__);
6032 #endif
6033         entity->open = XML_FALSE;
6034         if (result)
6035           return result;
6036       }
6037     } break;
6038     default:
6039       /* The only token returned by XmlAttributeValueTok() that does
6040        * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6041        * Getting that would require an entity name to contain an
6042        * incomplete XML character (e.g. \xE2\x82); however previous
6043        * tokenisers will have already recognised and rejected such
6044        * names before XmlAttributeValueTok() gets a look-in.  This
6045        * default case should be retained as a safety net, but the code
6046        * excluded from coverage tests.
6047        *
6048        * LCOV_EXCL_START
6049        */
6050       if (enc == parser->m_encoding)
6051         parser->m_eventPtr = ptr;
6052       return XML_ERROR_UNEXPECTED_STATE;
6053       /* LCOV_EXCL_STOP */
6054     }
6055     ptr = next;
6056   }
6057   /* not reached */
6058 }
6059 
6060 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6061 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6062                  const char *entityTextPtr, const char *entityTextEnd,
6063                  enum XML_Account account) {
6064   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6065   STRING_POOL *pool = &(dtd->entityValuePool);
6066   enum XML_Error result = XML_ERROR_NONE;
6067 #ifdef XML_DTD
6068   int oldInEntityValue = parser->m_prologState.inEntityValue;
6069   parser->m_prologState.inEntityValue = 1;
6070 #else
6071   UNUSED_P(account);
6072 #endif /* XML_DTD */
6073   /* never return Null for the value argument in EntityDeclHandler,
6074      since this would indicate an external entity; therefore we
6075      have to make sure that entityValuePool.start is not null */
6076   if (! pool->blocks) {
6077     if (! poolGrow(pool))
6078       return XML_ERROR_NO_MEMORY;
6079   }
6080 
6081   for (;;) {
6082     const char *next
6083         = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6084     int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6085 
6086 #ifdef XML_DTD
6087     if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6088                                   account)) {
6089       accountingOnAbort(parser);
6090       result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6091       goto endEntityValue;
6092     }
6093 #endif
6094 
6095     switch (tok) {
6096     case XML_TOK_PARAM_ENTITY_REF:
6097 #ifdef XML_DTD
6098       if (parser->m_isParamEntity || enc != parser->m_encoding) {
6099         const XML_Char *name;
6100         ENTITY *entity;
6101         name = poolStoreString(&parser->m_tempPool, enc,
6102                                entityTextPtr + enc->minBytesPerChar,
6103                                next - enc->minBytesPerChar);
6104         if (! name) {
6105           result = XML_ERROR_NO_MEMORY;
6106           goto endEntityValue;
6107         }
6108         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6109         poolDiscard(&parser->m_tempPool);
6110         if (! entity) {
6111           /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6112           /* cannot report skipped entity here - see comments on
6113              parser->m_skippedEntityHandler
6114           if (parser->m_skippedEntityHandler)
6115             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6116           */
6117           dtd->keepProcessing = dtd->standalone;
6118           goto endEntityValue;
6119         }
6120         if (entity->open) {
6121           if (enc == parser->m_encoding)
6122             parser->m_eventPtr = entityTextPtr;
6123           result = XML_ERROR_RECURSIVE_ENTITY_REF;
6124           goto endEntityValue;
6125         }
6126         if (entity->systemId) {
6127           if (parser->m_externalEntityRefHandler) {
6128             dtd->paramEntityRead = XML_FALSE;
6129             entity->open = XML_TRUE;
6130             entityTrackingOnOpen(parser, entity, __LINE__);
6131             if (! parser->m_externalEntityRefHandler(
6132                     parser->m_externalEntityRefHandlerArg, 0, entity->base,
6133                     entity->systemId, entity->publicId)) {
6134               entityTrackingOnClose(parser, entity, __LINE__);
6135               entity->open = XML_FALSE;
6136               result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6137               goto endEntityValue;
6138             }
6139             entityTrackingOnClose(parser, entity, __LINE__);
6140             entity->open = XML_FALSE;
6141             if (! dtd->paramEntityRead)
6142               dtd->keepProcessing = dtd->standalone;
6143           } else
6144             dtd->keepProcessing = dtd->standalone;
6145         } else {
6146           entity->open = XML_TRUE;
6147           entityTrackingOnOpen(parser, entity, __LINE__);
6148           result = storeEntityValue(
6149               parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6150               (const char *)(entity->textPtr + entity->textLen),
6151               XML_ACCOUNT_ENTITY_EXPANSION);
6152           entityTrackingOnClose(parser, entity, __LINE__);
6153           entity->open = XML_FALSE;
6154           if (result)
6155             goto endEntityValue;
6156         }
6157         break;
6158       }
6159 #endif /* XML_DTD */
6160       /* In the internal subset, PE references are not legal
6161          within markup declarations, e.g entity values in this case. */
6162       parser->m_eventPtr = entityTextPtr;
6163       result = XML_ERROR_PARAM_ENTITY_REF;
6164       goto endEntityValue;
6165     case XML_TOK_NONE:
6166       result = XML_ERROR_NONE;
6167       goto endEntityValue;
6168     case XML_TOK_ENTITY_REF:
6169     case XML_TOK_DATA_CHARS:
6170       if (! poolAppend(pool, enc, entityTextPtr, next)) {
6171         result = XML_ERROR_NO_MEMORY;
6172         goto endEntityValue;
6173       }
6174       break;
6175     case XML_TOK_TRAILING_CR:
6176       next = entityTextPtr + enc->minBytesPerChar;
6177       /* fall through */
6178     case XML_TOK_DATA_NEWLINE:
6179       if (pool->end == pool->ptr && ! poolGrow(pool)) {
6180         result = XML_ERROR_NO_MEMORY;
6181         goto endEntityValue;
6182       }
6183       *(pool->ptr)++ = 0xA;
6184       break;
6185     case XML_TOK_CHAR_REF: {
6186       XML_Char buf[XML_ENCODE_MAX];
6187       int i;
6188       int n = XmlCharRefNumber(enc, entityTextPtr);
6189       if (n < 0) {
6190         if (enc == parser->m_encoding)
6191           parser->m_eventPtr = entityTextPtr;
6192         result = XML_ERROR_BAD_CHAR_REF;
6193         goto endEntityValue;
6194       }
6195       n = XmlEncode(n, (ICHAR *)buf);
6196       /* The XmlEncode() functions can never return 0 here.  That
6197        * error return happens if the code point passed in is either
6198        * negative or greater than or equal to 0x110000.  The
6199        * XmlCharRefNumber() functions will all return a number
6200        * strictly less than 0x110000 or a negative value if an error
6201        * occurred.  The negative value is intercepted above, so
6202        * XmlEncode() is never passed a value it might return an
6203        * error for.
6204        */
6205       for (i = 0; i < n; i++) {
6206         if (pool->end == pool->ptr && ! poolGrow(pool)) {
6207           result = XML_ERROR_NO_MEMORY;
6208           goto endEntityValue;
6209         }
6210         *(pool->ptr)++ = buf[i];
6211       }
6212     } break;
6213     case XML_TOK_PARTIAL:
6214       if (enc == parser->m_encoding)
6215         parser->m_eventPtr = entityTextPtr;
6216       result = XML_ERROR_INVALID_TOKEN;
6217       goto endEntityValue;
6218     case XML_TOK_INVALID:
6219       if (enc == parser->m_encoding)
6220         parser->m_eventPtr = next;
6221       result = XML_ERROR_INVALID_TOKEN;
6222       goto endEntityValue;
6223     default:
6224       /* This default case should be unnecessary -- all the tokens
6225        * that XmlEntityValueTok() can return have their own explicit
6226        * cases -- but should be retained for safety.  We do however
6227        * exclude it from the coverage statistics.
6228        *
6229        * LCOV_EXCL_START
6230        */
6231       if (enc == parser->m_encoding)
6232         parser->m_eventPtr = entityTextPtr;
6233       result = XML_ERROR_UNEXPECTED_STATE;
6234       goto endEntityValue;
6235       /* LCOV_EXCL_STOP */
6236     }
6237     entityTextPtr = next;
6238   }
6239 endEntityValue:
6240 #ifdef XML_DTD
6241   parser->m_prologState.inEntityValue = oldInEntityValue;
6242 #endif /* XML_DTD */
6243   return result;
6244 }
6245 
6246 static void FASTCALL
normalizeLines(XML_Char * s)6247 normalizeLines(XML_Char *s) {
6248   XML_Char *p;
6249   for (;; s++) {
6250     if (*s == XML_T('\0'))
6251       return;
6252     if (*s == 0xD)
6253       break;
6254   }
6255   p = s;
6256   do {
6257     if (*s == 0xD) {
6258       *p++ = 0xA;
6259       if (*++s == 0xA)
6260         s++;
6261     } else
6262       *p++ = *s++;
6263   } while (*s);
6264   *p = XML_T('\0');
6265 }
6266 
6267 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6268 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6269                             const char *start, const char *end) {
6270   const XML_Char *target;
6271   XML_Char *data;
6272   const char *tem;
6273   if (! parser->m_processingInstructionHandler) {
6274     if (parser->m_defaultHandler)
6275       reportDefault(parser, enc, start, end);
6276     return 1;
6277   }
6278   start += enc->minBytesPerChar * 2;
6279   tem = start + XmlNameLength(enc, start);
6280   target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6281   if (! target)
6282     return 0;
6283   poolFinish(&parser->m_tempPool);
6284   data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6285                          end - enc->minBytesPerChar * 2);
6286   if (! data)
6287     return 0;
6288   normalizeLines(data);
6289   parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6290   poolClear(&parser->m_tempPool);
6291   return 1;
6292 }
6293 
6294 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6295 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6296               const char *end) {
6297   XML_Char *data;
6298   if (! parser->m_commentHandler) {
6299     if (parser->m_defaultHandler)
6300       reportDefault(parser, enc, start, end);
6301     return 1;
6302   }
6303   data = poolStoreString(&parser->m_tempPool, enc,
6304                          start + enc->minBytesPerChar * 4,
6305                          end - enc->minBytesPerChar * 3);
6306   if (! data)
6307     return 0;
6308   normalizeLines(data);
6309   parser->m_commentHandler(parser->m_handlerArg, data);
6310   poolClear(&parser->m_tempPool);
6311   return 1;
6312 }
6313 
6314 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6315 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6316               const char *end) {
6317   if (MUST_CONVERT(enc, s)) {
6318     enum XML_Convert_Result convert_res;
6319     const char **eventPP;
6320     const char **eventEndPP;
6321     if (enc == parser->m_encoding) {
6322       eventPP = &parser->m_eventPtr;
6323       eventEndPP = &parser->m_eventEndPtr;
6324     } else {
6325       /* To get here, two things must be true; the parser must be
6326        * using a character encoding that is not the same as the
6327        * encoding passed in, and the encoding passed in must need
6328        * conversion to the internal format (UTF-8 unless XML_UNICODE
6329        * is defined).  The only occasions on which the encoding passed
6330        * in is not the same as the parser's encoding are when it is
6331        * the internal encoding (e.g. a previously defined parameter
6332        * entity, already converted to internal format).  This by
6333        * definition doesn't need conversion, so the whole branch never
6334        * gets executed.
6335        *
6336        * For safety's sake we don't delete these lines and merely
6337        * exclude them from coverage statistics.
6338        *
6339        * LCOV_EXCL_START
6340        */
6341       eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6342       eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6343       /* LCOV_EXCL_STOP */
6344     }
6345     do {
6346       ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6347       convert_res
6348           = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6349       *eventEndPP = s;
6350       parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6351                                (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6352       *eventPP = s;
6353     } while ((convert_res != XML_CONVERT_COMPLETED)
6354              && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6355   } else
6356     parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
6357                              (int)((XML_Char *)end - (XML_Char *)s));
6358 }
6359 
6360 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6361 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6362                 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6363   DEFAULT_ATTRIBUTE *att;
6364   if (value || isId) {
6365     /* The handling of default attributes gets messed up if we have
6366        a default which duplicates a non-default. */
6367     int i;
6368     for (i = 0; i < type->nDefaultAtts; i++)
6369       if (attId == type->defaultAtts[i].id)
6370         return 1;
6371     if (isId && ! type->idAtt && ! attId->xmlns)
6372       type->idAtt = attId;
6373   }
6374   if (type->nDefaultAtts == type->allocDefaultAtts) {
6375     if (type->allocDefaultAtts == 0) {
6376       type->allocDefaultAtts = 8;
6377       type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6378           parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6379       if (! type->defaultAtts) {
6380         type->allocDefaultAtts = 0;
6381         return 0;
6382       }
6383     } else {
6384       DEFAULT_ATTRIBUTE *temp;
6385 
6386       /* Detect and prevent integer overflow */
6387       if (type->allocDefaultAtts > INT_MAX / 2) {
6388         return 0;
6389       }
6390 
6391       int count = type->allocDefaultAtts * 2;
6392 
6393       /* Detect and prevent integer overflow.
6394        * The preprocessor guard addresses the "always false" warning
6395        * from -Wtype-limits on platforms where
6396        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6397 #if UINT_MAX >= SIZE_MAX
6398       if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6399         return 0;
6400       }
6401 #endif
6402 
6403       temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6404                                           (count * sizeof(DEFAULT_ATTRIBUTE)));
6405       if (temp == NULL)
6406         return 0;
6407       type->allocDefaultAtts = count;
6408       type->defaultAtts = temp;
6409     }
6410   }
6411   att = type->defaultAtts + type->nDefaultAtts;
6412   att->id = attId;
6413   att->value = value;
6414   att->isCdata = isCdata;
6415   if (! isCdata)
6416     attId->maybeTokenized = XML_TRUE;
6417   type->nDefaultAtts += 1;
6418   return 1;
6419 }
6420 
6421 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6422 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6423   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6424   const XML_Char *name;
6425   for (name = elementType->name; *name; name++) {
6426     if (*name == XML_T(ASCII_COLON)) {
6427       PREFIX *prefix;
6428       const XML_Char *s;
6429       for (s = elementType->name; s != name; s++) {
6430         if (! poolAppendChar(&dtd->pool, *s))
6431           return 0;
6432       }
6433       if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6434         return 0;
6435       prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6436                                 sizeof(PREFIX));
6437       if (! prefix)
6438         return 0;
6439       if (prefix->name == poolStart(&dtd->pool))
6440         poolFinish(&dtd->pool);
6441       else
6442         poolDiscard(&dtd->pool);
6443       elementType->prefix = prefix;
6444       break;
6445     }
6446   }
6447   return 1;
6448 }
6449 
6450 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6451 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6452                const char *end) {
6453   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6454   ATTRIBUTE_ID *id;
6455   const XML_Char *name;
6456   if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6457     return NULL;
6458   name = poolStoreString(&dtd->pool, enc, start, end);
6459   if (! name)
6460     return NULL;
6461   /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6462   ++name;
6463   id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6464                               sizeof(ATTRIBUTE_ID));
6465   if (! id)
6466     return NULL;
6467   if (id->name != name)
6468     poolDiscard(&dtd->pool);
6469   else {
6470     poolFinish(&dtd->pool);
6471     if (! parser->m_ns)
6472       ;
6473     else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6474              && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6475              && name[4] == XML_T(ASCII_s)
6476              && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6477       if (name[5] == XML_T('\0'))
6478         id->prefix = &dtd->defaultPrefix;
6479       else
6480         id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6481                                       sizeof(PREFIX));
6482       id->xmlns = XML_TRUE;
6483     } else {
6484       int i;
6485       for (i = 0; name[i]; i++) {
6486         /* attributes without prefix are *not* in the default namespace */
6487         if (name[i] == XML_T(ASCII_COLON)) {
6488           int j;
6489           for (j = 0; j < i; j++) {
6490             if (! poolAppendChar(&dtd->pool, name[j]))
6491               return NULL;
6492           }
6493           if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6494             return NULL;
6495           id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6496                                         poolStart(&dtd->pool), sizeof(PREFIX));
6497           if (! id->prefix)
6498             return NULL;
6499           if (id->prefix->name == poolStart(&dtd->pool))
6500             poolFinish(&dtd->pool);
6501           else
6502             poolDiscard(&dtd->pool);
6503           break;
6504         }
6505       }
6506     }
6507   }
6508   return id;
6509 }
6510 
6511 #define CONTEXT_SEP XML_T(ASCII_FF)
6512 
6513 static const XML_Char *
getContext(XML_Parser parser)6514 getContext(XML_Parser parser) {
6515   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6516   HASH_TABLE_ITER iter;
6517   XML_Bool needSep = XML_FALSE;
6518 
6519   if (dtd->defaultPrefix.binding) {
6520     int i;
6521     int len;
6522     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6523       return NULL;
6524     len = dtd->defaultPrefix.binding->uriLen;
6525     if (parser->m_namespaceSeparator)
6526       len--;
6527     for (i = 0; i < len; i++) {
6528       if (! poolAppendChar(&parser->m_tempPool,
6529                            dtd->defaultPrefix.binding->uri[i])) {
6530         /* Because of memory caching, I don't believe this line can be
6531          * executed.
6532          *
6533          * This is part of a loop copying the default prefix binding
6534          * URI into the parser's temporary string pool.  Previously,
6535          * that URI was copied into the same string pool, with a
6536          * terminating NUL character, as part of setContext().  When
6537          * the pool was cleared, that leaves a block definitely big
6538          * enough to hold the URI on the free block list of the pool.
6539          * The URI copy in getContext() therefore cannot run out of
6540          * memory.
6541          *
6542          * If the pool is used between the setContext() and
6543          * getContext() calls, the worst it can do is leave a bigger
6544          * block on the front of the free list.  Given that this is
6545          * all somewhat inobvious and program logic can be changed, we
6546          * don't delete the line but we do exclude it from the test
6547          * coverage statistics.
6548          */
6549         return NULL; /* LCOV_EXCL_LINE */
6550       }
6551     }
6552     needSep = XML_TRUE;
6553   }
6554 
6555   hashTableIterInit(&iter, &(dtd->prefixes));
6556   for (;;) {
6557     int i;
6558     int len;
6559     const XML_Char *s;
6560     PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6561     if (! prefix)
6562       break;
6563     if (! prefix->binding) {
6564       /* This test appears to be (justifiable) paranoia.  There does
6565        * not seem to be a way of injecting a prefix without a binding
6566        * that doesn't get errored long before this function is called.
6567        * The test should remain for safety's sake, so we instead
6568        * exclude the following line from the coverage statistics.
6569        */
6570       continue; /* LCOV_EXCL_LINE */
6571     }
6572     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6573       return NULL;
6574     for (s = prefix->name; *s; s++)
6575       if (! poolAppendChar(&parser->m_tempPool, *s))
6576         return NULL;
6577     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6578       return NULL;
6579     len = prefix->binding->uriLen;
6580     if (parser->m_namespaceSeparator)
6581       len--;
6582     for (i = 0; i < len; i++)
6583       if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6584         return NULL;
6585     needSep = XML_TRUE;
6586   }
6587 
6588   hashTableIterInit(&iter, &(dtd->generalEntities));
6589   for (;;) {
6590     const XML_Char *s;
6591     ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6592     if (! e)
6593       break;
6594     if (! e->open)
6595       continue;
6596     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6597       return NULL;
6598     for (s = e->name; *s; s++)
6599       if (! poolAppendChar(&parser->m_tempPool, *s))
6600         return 0;
6601     needSep = XML_TRUE;
6602   }
6603 
6604   if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6605     return NULL;
6606   return parser->m_tempPool.start;
6607 }
6608 
6609 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6610 setContext(XML_Parser parser, const XML_Char *context) {
6611   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6612   const XML_Char *s = context;
6613 
6614   while (*context != XML_T('\0')) {
6615     if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6616       ENTITY *e;
6617       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6618         return XML_FALSE;
6619       e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6620                            poolStart(&parser->m_tempPool), 0);
6621       if (e)
6622         e->open = XML_TRUE;
6623       if (*s != XML_T('\0'))
6624         s++;
6625       context = s;
6626       poolDiscard(&parser->m_tempPool);
6627     } else if (*s == XML_T(ASCII_EQUALS)) {
6628       PREFIX *prefix;
6629       if (poolLength(&parser->m_tempPool) == 0)
6630         prefix = &dtd->defaultPrefix;
6631       else {
6632         if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6633           return XML_FALSE;
6634         prefix
6635             = (PREFIX *)lookup(parser, &dtd->prefixes,
6636                                poolStart(&parser->m_tempPool), sizeof(PREFIX));
6637         if (! prefix)
6638           return XML_FALSE;
6639         if (prefix->name == poolStart(&parser->m_tempPool)) {
6640           prefix->name = poolCopyString(&dtd->pool, prefix->name);
6641           if (! prefix->name)
6642             return XML_FALSE;
6643         }
6644         poolDiscard(&parser->m_tempPool);
6645       }
6646       for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6647            context++)
6648         if (! poolAppendChar(&parser->m_tempPool, *context))
6649           return XML_FALSE;
6650       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6651         return XML_FALSE;
6652       if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6653                      &parser->m_inheritedBindings)
6654           != XML_ERROR_NONE)
6655         return XML_FALSE;
6656       poolDiscard(&parser->m_tempPool);
6657       if (*context != XML_T('\0'))
6658         ++context;
6659       s = context;
6660     } else {
6661       if (! poolAppendChar(&parser->m_tempPool, *s))
6662         return XML_FALSE;
6663       s++;
6664     }
6665   }
6666   return XML_TRUE;
6667 }
6668 
6669 static void FASTCALL
normalizePublicId(XML_Char * publicId)6670 normalizePublicId(XML_Char *publicId) {
6671   XML_Char *p = publicId;
6672   XML_Char *s;
6673   for (s = publicId; *s; s++) {
6674     switch (*s) {
6675     case 0x20:
6676     case 0xD:
6677     case 0xA:
6678       if (p != publicId && p[-1] != 0x20)
6679         *p++ = 0x20;
6680       break;
6681     default:
6682       *p++ = *s;
6683     }
6684   }
6685   if (p != publicId && p[-1] == 0x20)
6686     --p;
6687   *p = XML_T('\0');
6688 }
6689 
6690 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6691 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6692   DTD *p = ms->malloc_fcn(sizeof(DTD));
6693   if (p == NULL)
6694     return p;
6695   poolInit(&(p->pool), ms);
6696   poolInit(&(p->entityValuePool), ms);
6697   hashTableInit(&(p->generalEntities), ms);
6698   hashTableInit(&(p->elementTypes), ms);
6699   hashTableInit(&(p->attributeIds), ms);
6700   hashTableInit(&(p->prefixes), ms);
6701 #ifdef XML_DTD
6702   p->paramEntityRead = XML_FALSE;
6703   hashTableInit(&(p->paramEntities), ms);
6704 #endif /* XML_DTD */
6705   p->defaultPrefix.name = NULL;
6706   p->defaultPrefix.binding = NULL;
6707 
6708   p->in_eldecl = XML_FALSE;
6709   p->scaffIndex = NULL;
6710   p->scaffold = NULL;
6711   p->scaffLevel = 0;
6712   p->scaffSize = 0;
6713   p->scaffCount = 0;
6714   p->contentStringLen = 0;
6715 
6716   p->keepProcessing = XML_TRUE;
6717   p->hasParamEntityRefs = XML_FALSE;
6718   p->standalone = XML_FALSE;
6719   return p;
6720 }
6721 
6722 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6723 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6724   HASH_TABLE_ITER iter;
6725   hashTableIterInit(&iter, &(p->elementTypes));
6726   for (;;) {
6727     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6728     if (! e)
6729       break;
6730     if (e->allocDefaultAtts != 0)
6731       ms->free_fcn(e->defaultAtts);
6732   }
6733   hashTableClear(&(p->generalEntities));
6734 #ifdef XML_DTD
6735   p->paramEntityRead = XML_FALSE;
6736   hashTableClear(&(p->paramEntities));
6737 #endif /* XML_DTD */
6738   hashTableClear(&(p->elementTypes));
6739   hashTableClear(&(p->attributeIds));
6740   hashTableClear(&(p->prefixes));
6741   poolClear(&(p->pool));
6742   poolClear(&(p->entityValuePool));
6743   p->defaultPrefix.name = NULL;
6744   p->defaultPrefix.binding = NULL;
6745 
6746   p->in_eldecl = XML_FALSE;
6747 
6748   ms->free_fcn(p->scaffIndex);
6749   p->scaffIndex = NULL;
6750   ms->free_fcn(p->scaffold);
6751   p->scaffold = NULL;
6752 
6753   p->scaffLevel = 0;
6754   p->scaffSize = 0;
6755   p->scaffCount = 0;
6756   p->contentStringLen = 0;
6757 
6758   p->keepProcessing = XML_TRUE;
6759   p->hasParamEntityRefs = XML_FALSE;
6760   p->standalone = XML_FALSE;
6761 }
6762 
6763 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6764 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6765   HASH_TABLE_ITER iter;
6766   hashTableIterInit(&iter, &(p->elementTypes));
6767   for (;;) {
6768     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6769     if (! e)
6770       break;
6771     if (e->allocDefaultAtts != 0)
6772       ms->free_fcn(e->defaultAtts);
6773   }
6774   hashTableDestroy(&(p->generalEntities));
6775 #ifdef XML_DTD
6776   hashTableDestroy(&(p->paramEntities));
6777 #endif /* XML_DTD */
6778   hashTableDestroy(&(p->elementTypes));
6779   hashTableDestroy(&(p->attributeIds));
6780   hashTableDestroy(&(p->prefixes));
6781   poolDestroy(&(p->pool));
6782   poolDestroy(&(p->entityValuePool));
6783   if (isDocEntity) {
6784     ms->free_fcn(p->scaffIndex);
6785     ms->free_fcn(p->scaffold);
6786   }
6787   ms->free_fcn(p);
6788 }
6789 
6790 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6791    The new DTD has already been initialized.
6792 */
6793 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6794 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6795         const XML_Memory_Handling_Suite *ms) {
6796   HASH_TABLE_ITER iter;
6797 
6798   /* Copy the prefix table. */
6799 
6800   hashTableIterInit(&iter, &(oldDtd->prefixes));
6801   for (;;) {
6802     const XML_Char *name;
6803     const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6804     if (! oldP)
6805       break;
6806     name = poolCopyString(&(newDtd->pool), oldP->name);
6807     if (! name)
6808       return 0;
6809     if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6810       return 0;
6811   }
6812 
6813   hashTableIterInit(&iter, &(oldDtd->attributeIds));
6814 
6815   /* Copy the attribute id table. */
6816 
6817   for (;;) {
6818     ATTRIBUTE_ID *newA;
6819     const XML_Char *name;
6820     const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6821 
6822     if (! oldA)
6823       break;
6824     /* Remember to allocate the scratch byte before the name. */
6825     if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6826       return 0;
6827     name = poolCopyString(&(newDtd->pool), oldA->name);
6828     if (! name)
6829       return 0;
6830     ++name;
6831     newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6832                                   sizeof(ATTRIBUTE_ID));
6833     if (! newA)
6834       return 0;
6835     newA->maybeTokenized = oldA->maybeTokenized;
6836     if (oldA->prefix) {
6837       newA->xmlns = oldA->xmlns;
6838       if (oldA->prefix == &oldDtd->defaultPrefix)
6839         newA->prefix = &newDtd->defaultPrefix;
6840       else
6841         newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6842                                         oldA->prefix->name, 0);
6843     }
6844   }
6845 
6846   /* Copy the element type table. */
6847 
6848   hashTableIterInit(&iter, &(oldDtd->elementTypes));
6849 
6850   for (;;) {
6851     int i;
6852     ELEMENT_TYPE *newE;
6853     const XML_Char *name;
6854     const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6855     if (! oldE)
6856       break;
6857     name = poolCopyString(&(newDtd->pool), oldE->name);
6858     if (! name)
6859       return 0;
6860     newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6861                                   sizeof(ELEMENT_TYPE));
6862     if (! newE)
6863       return 0;
6864     if (oldE->nDefaultAtts) {
6865       newE->defaultAtts
6866           = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6867       if (! newE->defaultAtts) {
6868         return 0;
6869       }
6870     }
6871     if (oldE->idAtt)
6872       newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6873                                            oldE->idAtt->name, 0);
6874     newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6875     if (oldE->prefix)
6876       newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6877                                       oldE->prefix->name, 0);
6878     for (i = 0; i < newE->nDefaultAtts; i++) {
6879       newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6880           oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6881       newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6882       if (oldE->defaultAtts[i].value) {
6883         newE->defaultAtts[i].value
6884             = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6885         if (! newE->defaultAtts[i].value)
6886           return 0;
6887       } else
6888         newE->defaultAtts[i].value = NULL;
6889     }
6890   }
6891 
6892   /* Copy the entity tables. */
6893   if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6894                         &(oldDtd->generalEntities)))
6895     return 0;
6896 
6897 #ifdef XML_DTD
6898   if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6899                         &(oldDtd->paramEntities)))
6900     return 0;
6901   newDtd->paramEntityRead = oldDtd->paramEntityRead;
6902 #endif /* XML_DTD */
6903 
6904   newDtd->keepProcessing = oldDtd->keepProcessing;
6905   newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6906   newDtd->standalone = oldDtd->standalone;
6907 
6908   /* Don't want deep copying for scaffolding */
6909   newDtd->in_eldecl = oldDtd->in_eldecl;
6910   newDtd->scaffold = oldDtd->scaffold;
6911   newDtd->contentStringLen = oldDtd->contentStringLen;
6912   newDtd->scaffSize = oldDtd->scaffSize;
6913   newDtd->scaffLevel = oldDtd->scaffLevel;
6914   newDtd->scaffIndex = oldDtd->scaffIndex;
6915 
6916   return 1;
6917 } /* End dtdCopy */
6918 
6919 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)6920 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6921                 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
6922   HASH_TABLE_ITER iter;
6923   const XML_Char *cachedOldBase = NULL;
6924   const XML_Char *cachedNewBase = NULL;
6925 
6926   hashTableIterInit(&iter, oldTable);
6927 
6928   for (;;) {
6929     ENTITY *newE;
6930     const XML_Char *name;
6931     const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6932     if (! oldE)
6933       break;
6934     name = poolCopyString(newPool, oldE->name);
6935     if (! name)
6936       return 0;
6937     newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6938     if (! newE)
6939       return 0;
6940     if (oldE->systemId) {
6941       const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6942       if (! tem)
6943         return 0;
6944       newE->systemId = tem;
6945       if (oldE->base) {
6946         if (oldE->base == cachedOldBase)
6947           newE->base = cachedNewBase;
6948         else {
6949           cachedOldBase = oldE->base;
6950           tem = poolCopyString(newPool, cachedOldBase);
6951           if (! tem)
6952             return 0;
6953           cachedNewBase = newE->base = tem;
6954         }
6955       }
6956       if (oldE->publicId) {
6957         tem = poolCopyString(newPool, oldE->publicId);
6958         if (! tem)
6959           return 0;
6960         newE->publicId = tem;
6961       }
6962     } else {
6963       const XML_Char *tem
6964           = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6965       if (! tem)
6966         return 0;
6967       newE->textPtr = tem;
6968       newE->textLen = oldE->textLen;
6969     }
6970     if (oldE->notation) {
6971       const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6972       if (! tem)
6973         return 0;
6974       newE->notation = tem;
6975     }
6976     newE->is_param = oldE->is_param;
6977     newE->is_internal = oldE->is_internal;
6978   }
6979   return 1;
6980 }
6981 
6982 #define INIT_POWER 6
6983 
6984 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)6985 keyeq(KEY s1, KEY s2) {
6986   for (; *s1 == *s2; s1++, s2++)
6987     if (*s1 == 0)
6988       return XML_TRUE;
6989   return XML_FALSE;
6990 }
6991 
6992 static size_t
keylen(KEY s)6993 keylen(KEY s) {
6994   size_t len = 0;
6995   for (; *s; s++, len++)
6996     ;
6997   return len;
6998 }
6999 
7000 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7001 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7002   key->k[0] = 0;
7003   key->k[1] = get_hash_secret_salt(parser);
7004 }
7005 
7006 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7007 hash(XML_Parser parser, KEY s) {
7008   struct siphash state;
7009   struct sipkey key;
7010   (void)sip24_valid;
7011   copy_salt_to_sipkey(parser, &key);
7012   sip24_init(&state, &key);
7013   sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7014   return (unsigned long)sip24_final(&state);
7015 }
7016 
7017 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7018 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7019   size_t i;
7020   if (table->size == 0) {
7021     size_t tsize;
7022     if (! createSize)
7023       return NULL;
7024     table->power = INIT_POWER;
7025     /* table->size is a power of 2 */
7026     table->size = (size_t)1 << INIT_POWER;
7027     tsize = table->size * sizeof(NAMED *);
7028     table->v = table->mem->malloc_fcn(tsize);
7029     if (! table->v) {
7030       table->size = 0;
7031       return NULL;
7032     }
7033     memset(table->v, 0, tsize);
7034     i = hash(parser, name) & ((unsigned long)table->size - 1);
7035   } else {
7036     unsigned long h = hash(parser, name);
7037     unsigned long mask = (unsigned long)table->size - 1;
7038     unsigned char step = 0;
7039     i = h & mask;
7040     while (table->v[i]) {
7041       if (keyeq(name, table->v[i]->name))
7042         return table->v[i];
7043       if (! step)
7044         step = PROBE_STEP(h, mask, table->power);
7045       i < step ? (i += table->size - step) : (i -= step);
7046     }
7047     if (! createSize)
7048       return NULL;
7049 
7050     /* check for overflow (table is half full) */
7051     if (table->used >> (table->power - 1)) {
7052       unsigned char newPower = table->power + 1;
7053 
7054       /* Detect and prevent invalid shift */
7055       if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7056         return NULL;
7057       }
7058 
7059       size_t newSize = (size_t)1 << newPower;
7060       unsigned long newMask = (unsigned long)newSize - 1;
7061 
7062       /* Detect and prevent integer overflow */
7063       if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7064         return NULL;
7065       }
7066 
7067       size_t tsize = newSize * sizeof(NAMED *);
7068       NAMED **newV = table->mem->malloc_fcn(tsize);
7069       if (! newV)
7070         return NULL;
7071       memset(newV, 0, tsize);
7072       for (i = 0; i < table->size; i++)
7073         if (table->v[i]) {
7074           unsigned long newHash = hash(parser, table->v[i]->name);
7075           size_t j = newHash & newMask;
7076           step = 0;
7077           while (newV[j]) {
7078             if (! step)
7079               step = PROBE_STEP(newHash, newMask, newPower);
7080             j < step ? (j += newSize - step) : (j -= step);
7081           }
7082           newV[j] = table->v[i];
7083         }
7084       table->mem->free_fcn(table->v);
7085       table->v = newV;
7086       table->power = newPower;
7087       table->size = newSize;
7088       i = h & newMask;
7089       step = 0;
7090       while (table->v[i]) {
7091         if (! step)
7092           step = PROBE_STEP(h, newMask, newPower);
7093         i < step ? (i += newSize - step) : (i -= step);
7094       }
7095     }
7096   }
7097   table->v[i] = table->mem->malloc_fcn(createSize);
7098   if (! table->v[i])
7099     return NULL;
7100   memset(table->v[i], 0, createSize);
7101   table->v[i]->name = name;
7102   (table->used)++;
7103   return table->v[i];
7104 }
7105 
7106 static void FASTCALL
hashTableClear(HASH_TABLE * table)7107 hashTableClear(HASH_TABLE *table) {
7108   size_t i;
7109   for (i = 0; i < table->size; i++) {
7110     table->mem->free_fcn(table->v[i]);
7111     table->v[i] = NULL;
7112   }
7113   table->used = 0;
7114 }
7115 
7116 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7117 hashTableDestroy(HASH_TABLE *table) {
7118   size_t i;
7119   for (i = 0; i < table->size; i++)
7120     table->mem->free_fcn(table->v[i]);
7121   table->mem->free_fcn(table->v);
7122 }
7123 
7124 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7125 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7126   p->power = 0;
7127   p->size = 0;
7128   p->used = 0;
7129   p->v = NULL;
7130   p->mem = ms;
7131 }
7132 
7133 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7134 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7135   iter->p = table->v;
7136   iter->end = iter->p ? iter->p + table->size : NULL;
7137 }
7138 
7139 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7140 hashTableIterNext(HASH_TABLE_ITER *iter) {
7141   while (iter->p != iter->end) {
7142     NAMED *tem = *(iter->p)++;
7143     if (tem)
7144       return tem;
7145   }
7146   return NULL;
7147 }
7148 
7149 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7150 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7151   pool->blocks = NULL;
7152   pool->freeBlocks = NULL;
7153   pool->start = NULL;
7154   pool->ptr = NULL;
7155   pool->end = NULL;
7156   pool->mem = ms;
7157 }
7158 
7159 static void FASTCALL
poolClear(STRING_POOL * pool)7160 poolClear(STRING_POOL *pool) {
7161   if (! pool->freeBlocks)
7162     pool->freeBlocks = pool->blocks;
7163   else {
7164     BLOCK *p = pool->blocks;
7165     while (p) {
7166       BLOCK *tem = p->next;
7167       p->next = pool->freeBlocks;
7168       pool->freeBlocks = p;
7169       p = tem;
7170     }
7171   }
7172   pool->blocks = NULL;
7173   pool->start = NULL;
7174   pool->ptr = NULL;
7175   pool->end = NULL;
7176 }
7177 
7178 static void FASTCALL
poolDestroy(STRING_POOL * pool)7179 poolDestroy(STRING_POOL *pool) {
7180   BLOCK *p = pool->blocks;
7181   while (p) {
7182     BLOCK *tem = p->next;
7183     pool->mem->free_fcn(p);
7184     p = tem;
7185   }
7186   p = pool->freeBlocks;
7187   while (p) {
7188     BLOCK *tem = p->next;
7189     pool->mem->free_fcn(p);
7190     p = tem;
7191   }
7192 }
7193 
7194 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7195 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7196            const char *end) {
7197   if (! pool->ptr && ! poolGrow(pool))
7198     return NULL;
7199   for (;;) {
7200     const enum XML_Convert_Result convert_res = XmlConvert(
7201         enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
7202     if ((convert_res == XML_CONVERT_COMPLETED)
7203         || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7204       break;
7205     if (! poolGrow(pool))
7206       return NULL;
7207   }
7208   return pool->start;
7209 }
7210 
7211 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7212 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7213   do {
7214     if (! poolAppendChar(pool, *s))
7215       return NULL;
7216   } while (*s++);
7217   s = pool->start;
7218   poolFinish(pool);
7219   return s;
7220 }
7221 
7222 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7223 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7224   if (! pool->ptr && ! poolGrow(pool)) {
7225     /* The following line is unreachable given the current usage of
7226      * poolCopyStringN().  Currently it is called from exactly one
7227      * place to copy the text of a simple general entity.  By that
7228      * point, the name of the entity is already stored in the pool, so
7229      * pool->ptr cannot be NULL.
7230      *
7231      * If poolCopyStringN() is used elsewhere as it well might be,
7232      * this line may well become executable again.  Regardless, this
7233      * sort of check shouldn't be removed lightly, so we just exclude
7234      * it from the coverage statistics.
7235      */
7236     return NULL; /* LCOV_EXCL_LINE */
7237   }
7238   for (; n > 0; --n, s++) {
7239     if (! poolAppendChar(pool, *s))
7240       return NULL;
7241   }
7242   s = pool->start;
7243   poolFinish(pool);
7244   return s;
7245 }
7246 
7247 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7248 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7249   while (*s) {
7250     if (! poolAppendChar(pool, *s))
7251       return NULL;
7252     s++;
7253   }
7254   return pool->start;
7255 }
7256 
7257 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7258 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7259                 const char *end) {
7260   if (! poolAppend(pool, enc, ptr, end))
7261     return NULL;
7262   if (pool->ptr == pool->end && ! poolGrow(pool))
7263     return NULL;
7264   *(pool->ptr)++ = 0;
7265   return pool->start;
7266 }
7267 
7268 static size_t
poolBytesToAllocateFor(int blockSize)7269 poolBytesToAllocateFor(int blockSize) {
7270   /* Unprotected math would be:
7271   ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7272   **
7273   ** Detect overflow, avoiding _signed_ overflow undefined behavior
7274   ** For a + b * c we check b * c in isolation first, so that addition of a
7275   ** on top has no chance of making us accept a small non-negative number
7276   */
7277   const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7278 
7279   if (blockSize <= 0)
7280     return 0;
7281 
7282   if (blockSize > (int)(INT_MAX / stretch))
7283     return 0;
7284 
7285   {
7286     const int stretchedBlockSize = blockSize * (int)stretch;
7287     const int bytesToAllocate
7288         = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7289     if (bytesToAllocate < 0)
7290       return 0;
7291 
7292     return (size_t)bytesToAllocate;
7293   }
7294 }
7295 
7296 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7297 poolGrow(STRING_POOL *pool) {
7298   if (pool->freeBlocks) {
7299     if (pool->start == 0) {
7300       pool->blocks = pool->freeBlocks;
7301       pool->freeBlocks = pool->freeBlocks->next;
7302       pool->blocks->next = NULL;
7303       pool->start = pool->blocks->s;
7304       pool->end = pool->start + pool->blocks->size;
7305       pool->ptr = pool->start;
7306       return XML_TRUE;
7307     }
7308     if (pool->end - pool->start < pool->freeBlocks->size) {
7309       BLOCK *tem = pool->freeBlocks->next;
7310       pool->freeBlocks->next = pool->blocks;
7311       pool->blocks = pool->freeBlocks;
7312       pool->freeBlocks = tem;
7313       memcpy(pool->blocks->s, pool->start,
7314              (pool->end - pool->start) * sizeof(XML_Char));
7315       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7316       pool->start = pool->blocks->s;
7317       pool->end = pool->start + pool->blocks->size;
7318       return XML_TRUE;
7319     }
7320   }
7321   if (pool->blocks && pool->start == pool->blocks->s) {
7322     BLOCK *temp;
7323     int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7324     size_t bytesToAllocate;
7325 
7326     /* NOTE: Needs to be calculated prior to calling `realloc`
7327              to avoid dangling pointers: */
7328     const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7329 
7330     if (blockSize < 0) {
7331       /* This condition traps a situation where either more than
7332        * INT_MAX/2 bytes have already been allocated.  This isn't
7333        * readily testable, since it is unlikely that an average
7334        * machine will have that much memory, so we exclude it from the
7335        * coverage statistics.
7336        */
7337       return XML_FALSE; /* LCOV_EXCL_LINE */
7338     }
7339 
7340     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7341     if (bytesToAllocate == 0)
7342       return XML_FALSE;
7343 
7344     temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7345                                            (unsigned)bytesToAllocate);
7346     if (temp == NULL)
7347       return XML_FALSE;
7348     pool->blocks = temp;
7349     pool->blocks->size = blockSize;
7350     pool->ptr = pool->blocks->s + offsetInsideBlock;
7351     pool->start = pool->blocks->s;
7352     pool->end = pool->start + blockSize;
7353   } else {
7354     BLOCK *tem;
7355     int blockSize = (int)(pool->end - pool->start);
7356     size_t bytesToAllocate;
7357 
7358     if (blockSize < 0) {
7359       /* This condition traps a situation where either more than
7360        * INT_MAX bytes have already been allocated (which is prevented
7361        * by various pieces of program logic, not least this one, never
7362        * mind the unlikelihood of actually having that much memory) or
7363        * the pool control fields have been corrupted (which could
7364        * conceivably happen in an extremely buggy user handler
7365        * function).  Either way it isn't readily testable, so we
7366        * exclude it from the coverage statistics.
7367        */
7368       return XML_FALSE; /* LCOV_EXCL_LINE */
7369     }
7370 
7371     if (blockSize < INIT_BLOCK_SIZE)
7372       blockSize = INIT_BLOCK_SIZE;
7373     else {
7374       /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7375       if ((int)((unsigned)blockSize * 2U) < 0) {
7376         return XML_FALSE;
7377       }
7378       blockSize *= 2;
7379     }
7380 
7381     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7382     if (bytesToAllocate == 0)
7383       return XML_FALSE;
7384 
7385     tem = pool->mem->malloc_fcn(bytesToAllocate);
7386     if (! tem)
7387       return XML_FALSE;
7388     tem->size = blockSize;
7389     tem->next = pool->blocks;
7390     pool->blocks = tem;
7391     if (pool->ptr != pool->start)
7392       memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7393     pool->ptr = tem->s + (pool->ptr - pool->start);
7394     pool->start = tem->s;
7395     pool->end = tem->s + blockSize;
7396   }
7397   return XML_TRUE;
7398 }
7399 
7400 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7401 nextScaffoldPart(XML_Parser parser) {
7402   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7403   CONTENT_SCAFFOLD *me;
7404   int next;
7405 
7406   if (! dtd->scaffIndex) {
7407     dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7408     if (! dtd->scaffIndex)
7409       return -1;
7410     dtd->scaffIndex[0] = 0;
7411   }
7412 
7413   if (dtd->scaffCount >= dtd->scaffSize) {
7414     CONTENT_SCAFFOLD *temp;
7415     if (dtd->scaffold) {
7416       /* Detect and prevent integer overflow */
7417       if (dtd->scaffSize > UINT_MAX / 2u) {
7418         return -1;
7419       }
7420       /* Detect and prevent integer overflow.
7421        * The preprocessor guard addresses the "always false" warning
7422        * from -Wtype-limits on platforms where
7423        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7424 #if UINT_MAX >= SIZE_MAX
7425       if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7426         return -1;
7427       }
7428 #endif
7429 
7430       temp = (CONTENT_SCAFFOLD *)REALLOC(
7431           parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7432       if (temp == NULL)
7433         return -1;
7434       dtd->scaffSize *= 2;
7435     } else {
7436       temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7437                                                     * sizeof(CONTENT_SCAFFOLD));
7438       if (temp == NULL)
7439         return -1;
7440       dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7441     }
7442     dtd->scaffold = temp;
7443   }
7444   next = dtd->scaffCount++;
7445   me = &dtd->scaffold[next];
7446   if (dtd->scaffLevel) {
7447     CONTENT_SCAFFOLD *parent
7448         = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7449     if (parent->lastchild) {
7450       dtd->scaffold[parent->lastchild].nextsib = next;
7451     }
7452     if (! parent->childcnt)
7453       parent->firstchild = next;
7454     parent->lastchild = next;
7455     parent->childcnt++;
7456   }
7457   me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7458   return next;
7459 }
7460 
7461 static XML_Content *
build_model(XML_Parser parser)7462 build_model(XML_Parser parser) {
7463   /* Function build_model transforms the existing parser->m_dtd->scaffold
7464    * array of CONTENT_SCAFFOLD tree nodes into a new array of
7465    * XML_Content tree nodes followed by a gapless list of zero-terminated
7466    * strings. */
7467   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7468   XML_Content *ret;
7469   XML_Char *str; /* the current string writing location */
7470 
7471   /* Detect and prevent integer overflow.
7472    * The preprocessor guard addresses the "always false" warning
7473    * from -Wtype-limits on platforms where
7474    * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7475 #if UINT_MAX >= SIZE_MAX
7476   if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7477     return NULL;
7478   }
7479   if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7480     return NULL;
7481   }
7482 #endif
7483   if (dtd->scaffCount * sizeof(XML_Content)
7484       > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7485     return NULL;
7486   }
7487 
7488   const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7489                             + (dtd->contentStringLen * sizeof(XML_Char)));
7490 
7491   ret = (XML_Content *)MALLOC(parser, allocsize);
7492   if (! ret)
7493     return NULL;
7494 
7495   /* What follows is an iterative implementation (of what was previously done
7496    * recursively in a dedicated function called "build_node".  The old recursive
7497    * build_node could be forced into stack exhaustion from input as small as a
7498    * few megabyte, and so that was a security issue.  Hence, a function call
7499    * stack is avoided now by resolving recursion.)
7500    *
7501    * The iterative approach works as follows:
7502    *
7503    * - We have two writing pointers, both walking up the result array; one does
7504    *   the work, the other creates "jobs" for its colleague to do, and leads
7505    *   the way:
7506    *
7507    *   - The faster one, pointer jobDest, always leads and writes "what job
7508    *     to do" by the other, once they reach that place in the
7509    *     array: leader "jobDest" stores the source node array index (relative
7510    *     to array dtd->scaffold) in field "numchildren".
7511    *
7512    *   - The slower one, pointer dest, looks at the value stored in the
7513    *     "numchildren" field (which actually holds a source node array index
7514    *     at that time) and puts the real data from dtd->scaffold in.
7515    *
7516    * - Before the loop starts, jobDest writes source array index 0
7517    *   (where the root node is located) so that dest will have something to do
7518    *   when it starts operation.
7519    *
7520    * - Whenever nodes with children are encountered, jobDest appends
7521    *   them as new jobs, in order.  As a result, tree node siblings are
7522    *   adjacent in the resulting array, for example:
7523    *
7524    *     [0] root, has two children
7525    *       [1] first child of 0, has three children
7526    *         [3] first child of 1, does not have children
7527    *         [4] second child of 1, does not have children
7528    *         [5] third child of 1, does not have children
7529    *       [2] second child of 0, does not have children
7530    *
7531    *   Or (the same data) presented in flat array view:
7532    *
7533    *     [0] root, has two children
7534    *
7535    *     [1] first child of 0, has three children
7536    *     [2] second child of 0, does not have children
7537    *
7538    *     [3] first child of 1, does not have children
7539    *     [4] second child of 1, does not have children
7540    *     [5] third child of 1, does not have children
7541    *
7542    * - The algorithm repeats until all target array indices have been processed.
7543    */
7544   XML_Content *dest = ret; /* tree node writing location, moves upwards */
7545   XML_Content *const destLimit = &ret[dtd->scaffCount];
7546   XML_Content *jobDest = ret; /* next free writing location in target array */
7547   str = (XML_Char *)&ret[dtd->scaffCount];
7548 
7549   /* Add the starting job, the root node (index 0) of the source tree  */
7550   (jobDest++)->numchildren = 0;
7551 
7552   for (; dest < destLimit; dest++) {
7553     /* Retrieve source tree array index from job storage */
7554     const int src_node = (int)dest->numchildren;
7555 
7556     /* Convert item */
7557     dest->type = dtd->scaffold[src_node].type;
7558     dest->quant = dtd->scaffold[src_node].quant;
7559     if (dest->type == XML_CTYPE_NAME) {
7560       const XML_Char *src;
7561       dest->name = str;
7562       src = dtd->scaffold[src_node].name;
7563       for (;;) {
7564         *str++ = *src;
7565         if (! *src)
7566           break;
7567         src++;
7568       }
7569       dest->numchildren = 0;
7570       dest->children = NULL;
7571     } else {
7572       unsigned int i;
7573       int cn;
7574       dest->name = NULL;
7575       dest->numchildren = dtd->scaffold[src_node].childcnt;
7576       dest->children = jobDest;
7577 
7578       /* Append scaffold indices of children to array */
7579       for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7580            i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7581         (jobDest++)->numchildren = (unsigned int)cn;
7582     }
7583   }
7584 
7585   return ret;
7586 }
7587 
7588 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7589 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7590                const char *end) {
7591   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7592   const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7593   ELEMENT_TYPE *ret;
7594 
7595   if (! name)
7596     return NULL;
7597   ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7598                                sizeof(ELEMENT_TYPE));
7599   if (! ret)
7600     return NULL;
7601   if (ret->name != name)
7602     poolDiscard(&dtd->pool);
7603   else {
7604     poolFinish(&dtd->pool);
7605     if (! setElementTypePrefix(parser, ret))
7606       return NULL;
7607   }
7608   return ret;
7609 }
7610 
7611 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7612 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7613   size_t charsRequired = 0;
7614   XML_Char *result;
7615 
7616   /* First determine how long the string is */
7617   while (s[charsRequired] != 0) {
7618     charsRequired++;
7619   }
7620   /* Include the terminator */
7621   charsRequired++;
7622 
7623   /* Now allocate space for the copy */
7624   result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7625   if (result == NULL)
7626     return NULL;
7627   /* Copy the original into place */
7628   memcpy(result, s, charsRequired * sizeof(XML_Char));
7629   return result;
7630 }
7631 
7632 #ifdef XML_DTD
7633 
7634 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7635 accountingGetCurrentAmplification(XML_Parser rootParser) {
7636   const XmlBigCount countBytesOutput
7637       = rootParser->m_accounting.countBytesDirect
7638         + rootParser->m_accounting.countBytesIndirect;
7639   const float amplificationFactor
7640       = rootParser->m_accounting.countBytesDirect
7641             ? (countBytesOutput
7642                / (float)(rootParser->m_accounting.countBytesDirect))
7643             : 1.0f;
7644   assert(! rootParser->m_parentParser);
7645   return amplificationFactor;
7646 }
7647 
7648 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7649 accountingReportStats(XML_Parser originParser, const char *epilog) {
7650   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7651   assert(! rootParser->m_parentParser);
7652 
7653   if (rootParser->m_accounting.debugLevel < 1) {
7654     return;
7655   }
7656 
7657   const float amplificationFactor
7658       = accountingGetCurrentAmplification(rootParser);
7659   fprintf(stderr,
7660           "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7661               "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7662           (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7663           rootParser->m_accounting.countBytesIndirect,
7664           (double)amplificationFactor, epilog);
7665 }
7666 
7667 static void
accountingOnAbort(XML_Parser originParser)7668 accountingOnAbort(XML_Parser originParser) {
7669   accountingReportStats(originParser, " ABORTING\n");
7670 }
7671 
7672 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7673 accountingReportDiff(XML_Parser rootParser,
7674                      unsigned int levelsAwayFromRootParser, const char *before,
7675                      const char *after, ptrdiff_t bytesMore, int source_line,
7676                      enum XML_Account account) {
7677   assert(! rootParser->m_parentParser);
7678 
7679   fprintf(stderr,
7680           " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7681           bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7682           levelsAwayFromRootParser, source_line, 10, "");
7683 
7684   const char ellipis[] = "[..]";
7685   const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7686   const unsigned int contextLength = 10;
7687 
7688   /* Note: Performance is of no concern here */
7689   const char *walker = before;
7690   if ((rootParser->m_accounting.debugLevel >= 3)
7691       || (after - before)
7692              <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7693     for (; walker < after; walker++) {
7694       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7695     }
7696   } else {
7697     for (; walker < before + contextLength; walker++) {
7698       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7699     }
7700     fprintf(stderr, ellipis);
7701     walker = after - contextLength;
7702     for (; walker < after; walker++) {
7703       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7704     }
7705   }
7706   fprintf(stderr, "\"\n");
7707 }
7708 
7709 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7710 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7711                         const char *after, int source_line,
7712                         enum XML_Account account) {
7713   /* Note: We need to check the token type *first* to be sure that
7714    *       we can even access variable <after>, safely.
7715    *       E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7716   switch (tok) {
7717   case XML_TOK_INVALID:
7718   case XML_TOK_PARTIAL:
7719   case XML_TOK_PARTIAL_CHAR:
7720   case XML_TOK_NONE:
7721     return XML_TRUE;
7722   }
7723 
7724   if (account == XML_ACCOUNT_NONE)
7725     return XML_TRUE; /* because these bytes have been accounted for, already */
7726 
7727   unsigned int levelsAwayFromRootParser;
7728   const XML_Parser rootParser
7729       = getRootParserOf(originParser, &levelsAwayFromRootParser);
7730   assert(! rootParser->m_parentParser);
7731 
7732   const int isDirect
7733       = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7734   const ptrdiff_t bytesMore = after - before;
7735 
7736   XmlBigCount *const additionTarget
7737       = isDirect ? &rootParser->m_accounting.countBytesDirect
7738                  : &rootParser->m_accounting.countBytesIndirect;
7739 
7740   /* Detect and avoid integer overflow */
7741   if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7742     return XML_FALSE;
7743   *additionTarget += bytesMore;
7744 
7745   const XmlBigCount countBytesOutput
7746       = rootParser->m_accounting.countBytesDirect
7747         + rootParser->m_accounting.countBytesIndirect;
7748   const float amplificationFactor
7749       = accountingGetCurrentAmplification(rootParser);
7750   const XML_Bool tolerated
7751       = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7752         || (amplificationFactor
7753             <= rootParser->m_accounting.maximumAmplificationFactor);
7754 
7755   if (rootParser->m_accounting.debugLevel >= 2) {
7756     accountingReportStats(rootParser, "");
7757     accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7758                          bytesMore, source_line, account);
7759   }
7760 
7761   return tolerated;
7762 }
7763 
7764 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7765 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7766   if (! parser)
7767     return 0;
7768   return parser->m_accounting.countBytesDirect;
7769 }
7770 
7771 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7772 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7773   if (! parser)
7774     return 0;
7775   return parser->m_accounting.countBytesIndirect;
7776 }
7777 
7778 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7779 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7780                           const char *action, int sourceLine) {
7781   assert(! rootParser->m_parentParser);
7782   if (rootParser->m_entity_stats.debugLevel < 1)
7783     return;
7784 
7785 #  if defined(XML_UNICODE)
7786   const char *const entityName = "[..]";
7787 #  else
7788   const char *const entityName = entity->name;
7789 #  endif
7790 
7791   fprintf(
7792       stderr,
7793       "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7794       (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7795       rootParser->m_entity_stats.currentDepth,
7796       rootParser->m_entity_stats.maximumDepthSeen,
7797       (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7798       entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7799       sourceLine);
7800 }
7801 
7802 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7803 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7804   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7805   assert(! rootParser->m_parentParser);
7806 
7807   rootParser->m_entity_stats.countEverOpened++;
7808   rootParser->m_entity_stats.currentDepth++;
7809   if (rootParser->m_entity_stats.currentDepth
7810       > rootParser->m_entity_stats.maximumDepthSeen) {
7811     rootParser->m_entity_stats.maximumDepthSeen++;
7812   }
7813 
7814   entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7815 }
7816 
7817 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7818 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7819   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7820   assert(! rootParser->m_parentParser);
7821 
7822   entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7823   rootParser->m_entity_stats.currentDepth--;
7824 }
7825 
7826 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7827 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7828   XML_Parser rootParser = parser;
7829   unsigned int stepsTakenUpwards = 0;
7830   while (rootParser->m_parentParser) {
7831     rootParser = rootParser->m_parentParser;
7832     stepsTakenUpwards++;
7833   }
7834   assert(! rootParser->m_parentParser);
7835   if (outLevelDiff != NULL) {
7836     *outLevelDiff = stepsTakenUpwards;
7837   }
7838   return rootParser;
7839 }
7840 
7841 const char *
unsignedCharToPrintable(unsigned char c)7842 unsignedCharToPrintable(unsigned char c) {
7843   switch (c) {
7844   case 0:
7845     return "\\0";
7846   case 1:
7847     return "\\x1";
7848   case 2:
7849     return "\\x2";
7850   case 3:
7851     return "\\x3";
7852   case 4:
7853     return "\\x4";
7854   case 5:
7855     return "\\x5";
7856   case 6:
7857     return "\\x6";
7858   case 7:
7859     return "\\x7";
7860   case 8:
7861     return "\\x8";
7862   case 9:
7863     return "\\t";
7864   case 10:
7865     return "\\n";
7866   case 11:
7867     return "\\xB";
7868   case 12:
7869     return "\\xC";
7870   case 13:
7871     return "\\r";
7872   case 14:
7873     return "\\xE";
7874   case 15:
7875     return "\\xF";
7876   case 16:
7877     return "\\x10";
7878   case 17:
7879     return "\\x11";
7880   case 18:
7881     return "\\x12";
7882   case 19:
7883     return "\\x13";
7884   case 20:
7885     return "\\x14";
7886   case 21:
7887     return "\\x15";
7888   case 22:
7889     return "\\x16";
7890   case 23:
7891     return "\\x17";
7892   case 24:
7893     return "\\x18";
7894   case 25:
7895     return "\\x19";
7896   case 26:
7897     return "\\x1A";
7898   case 27:
7899     return "\\x1B";
7900   case 28:
7901     return "\\x1C";
7902   case 29:
7903     return "\\x1D";
7904   case 30:
7905     return "\\x1E";
7906   case 31:
7907     return "\\x1F";
7908   case 32:
7909     return " ";
7910   case 33:
7911     return "!";
7912   case 34:
7913     return "\\\"";
7914   case 35:
7915     return "#";
7916   case 36:
7917     return "$";
7918   case 37:
7919     return "%";
7920   case 38:
7921     return "&";
7922   case 39:
7923     return "'";
7924   case 40:
7925     return "(";
7926   case 41:
7927     return ")";
7928   case 42:
7929     return "*";
7930   case 43:
7931     return "+";
7932   case 44:
7933     return ",";
7934   case 45:
7935     return "-";
7936   case 46:
7937     return ".";
7938   case 47:
7939     return "/";
7940   case 48:
7941     return "0";
7942   case 49:
7943     return "1";
7944   case 50:
7945     return "2";
7946   case 51:
7947     return "3";
7948   case 52:
7949     return "4";
7950   case 53:
7951     return "5";
7952   case 54:
7953     return "6";
7954   case 55:
7955     return "7";
7956   case 56:
7957     return "8";
7958   case 57:
7959     return "9";
7960   case 58:
7961     return ":";
7962   case 59:
7963     return ";";
7964   case 60:
7965     return "<";
7966   case 61:
7967     return "=";
7968   case 62:
7969     return ">";
7970   case 63:
7971     return "?";
7972   case 64:
7973     return "@";
7974   case 65:
7975     return "A";
7976   case 66:
7977     return "B";
7978   case 67:
7979     return "C";
7980   case 68:
7981     return "D";
7982   case 69:
7983     return "E";
7984   case 70:
7985     return "F";
7986   case 71:
7987     return "G";
7988   case 72:
7989     return "H";
7990   case 73:
7991     return "I";
7992   case 74:
7993     return "J";
7994   case 75:
7995     return "K";
7996   case 76:
7997     return "L";
7998   case 77:
7999     return "M";
8000   case 78:
8001     return "N";
8002   case 79:
8003     return "O";
8004   case 80:
8005     return "P";
8006   case 81:
8007     return "Q";
8008   case 82:
8009     return "R";
8010   case 83:
8011     return "S";
8012   case 84:
8013     return "T";
8014   case 85:
8015     return "U";
8016   case 86:
8017     return "V";
8018   case 87:
8019     return "W";
8020   case 88:
8021     return "X";
8022   case 89:
8023     return "Y";
8024   case 90:
8025     return "Z";
8026   case 91:
8027     return "[";
8028   case 92:
8029     return "\\\\";
8030   case 93:
8031     return "]";
8032   case 94:
8033     return "^";
8034   case 95:
8035     return "_";
8036   case 96:
8037     return "`";
8038   case 97:
8039     return "a";
8040   case 98:
8041     return "b";
8042   case 99:
8043     return "c";
8044   case 100:
8045     return "d";
8046   case 101:
8047     return "e";
8048   case 102:
8049     return "f";
8050   case 103:
8051     return "g";
8052   case 104:
8053     return "h";
8054   case 105:
8055     return "i";
8056   case 106:
8057     return "j";
8058   case 107:
8059     return "k";
8060   case 108:
8061     return "l";
8062   case 109:
8063     return "m";
8064   case 110:
8065     return "n";
8066   case 111:
8067     return "o";
8068   case 112:
8069     return "p";
8070   case 113:
8071     return "q";
8072   case 114:
8073     return "r";
8074   case 115:
8075     return "s";
8076   case 116:
8077     return "t";
8078   case 117:
8079     return "u";
8080   case 118:
8081     return "v";
8082   case 119:
8083     return "w";
8084   case 120:
8085     return "x";
8086   case 121:
8087     return "y";
8088   case 122:
8089     return "z";
8090   case 123:
8091     return "{";
8092   case 124:
8093     return "|";
8094   case 125:
8095     return "}";
8096   case 126:
8097     return "~";
8098   case 127:
8099     return "\\x7F";
8100   case 128:
8101     return "\\x80";
8102   case 129:
8103     return "\\x81";
8104   case 130:
8105     return "\\x82";
8106   case 131:
8107     return "\\x83";
8108   case 132:
8109     return "\\x84";
8110   case 133:
8111     return "\\x85";
8112   case 134:
8113     return "\\x86";
8114   case 135:
8115     return "\\x87";
8116   case 136:
8117     return "\\x88";
8118   case 137:
8119     return "\\x89";
8120   case 138:
8121     return "\\x8A";
8122   case 139:
8123     return "\\x8B";
8124   case 140:
8125     return "\\x8C";
8126   case 141:
8127     return "\\x8D";
8128   case 142:
8129     return "\\x8E";
8130   case 143:
8131     return "\\x8F";
8132   case 144:
8133     return "\\x90";
8134   case 145:
8135     return "\\x91";
8136   case 146:
8137     return "\\x92";
8138   case 147:
8139     return "\\x93";
8140   case 148:
8141     return "\\x94";
8142   case 149:
8143     return "\\x95";
8144   case 150:
8145     return "\\x96";
8146   case 151:
8147     return "\\x97";
8148   case 152:
8149     return "\\x98";
8150   case 153:
8151     return "\\x99";
8152   case 154:
8153     return "\\x9A";
8154   case 155:
8155     return "\\x9B";
8156   case 156:
8157     return "\\x9C";
8158   case 157:
8159     return "\\x9D";
8160   case 158:
8161     return "\\x9E";
8162   case 159:
8163     return "\\x9F";
8164   case 160:
8165     return "\\xA0";
8166   case 161:
8167     return "\\xA1";
8168   case 162:
8169     return "\\xA2";
8170   case 163:
8171     return "\\xA3";
8172   case 164:
8173     return "\\xA4";
8174   case 165:
8175     return "\\xA5";
8176   case 166:
8177     return "\\xA6";
8178   case 167:
8179     return "\\xA7";
8180   case 168:
8181     return "\\xA8";
8182   case 169:
8183     return "\\xA9";
8184   case 170:
8185     return "\\xAA";
8186   case 171:
8187     return "\\xAB";
8188   case 172:
8189     return "\\xAC";
8190   case 173:
8191     return "\\xAD";
8192   case 174:
8193     return "\\xAE";
8194   case 175:
8195     return "\\xAF";
8196   case 176:
8197     return "\\xB0";
8198   case 177:
8199     return "\\xB1";
8200   case 178:
8201     return "\\xB2";
8202   case 179:
8203     return "\\xB3";
8204   case 180:
8205     return "\\xB4";
8206   case 181:
8207     return "\\xB5";
8208   case 182:
8209     return "\\xB6";
8210   case 183:
8211     return "\\xB7";
8212   case 184:
8213     return "\\xB8";
8214   case 185:
8215     return "\\xB9";
8216   case 186:
8217     return "\\xBA";
8218   case 187:
8219     return "\\xBB";
8220   case 188:
8221     return "\\xBC";
8222   case 189:
8223     return "\\xBD";
8224   case 190:
8225     return "\\xBE";
8226   case 191:
8227     return "\\xBF";
8228   case 192:
8229     return "\\xC0";
8230   case 193:
8231     return "\\xC1";
8232   case 194:
8233     return "\\xC2";
8234   case 195:
8235     return "\\xC3";
8236   case 196:
8237     return "\\xC4";
8238   case 197:
8239     return "\\xC5";
8240   case 198:
8241     return "\\xC6";
8242   case 199:
8243     return "\\xC7";
8244   case 200:
8245     return "\\xC8";
8246   case 201:
8247     return "\\xC9";
8248   case 202:
8249     return "\\xCA";
8250   case 203:
8251     return "\\xCB";
8252   case 204:
8253     return "\\xCC";
8254   case 205:
8255     return "\\xCD";
8256   case 206:
8257     return "\\xCE";
8258   case 207:
8259     return "\\xCF";
8260   case 208:
8261     return "\\xD0";
8262   case 209:
8263     return "\\xD1";
8264   case 210:
8265     return "\\xD2";
8266   case 211:
8267     return "\\xD3";
8268   case 212:
8269     return "\\xD4";
8270   case 213:
8271     return "\\xD5";
8272   case 214:
8273     return "\\xD6";
8274   case 215:
8275     return "\\xD7";
8276   case 216:
8277     return "\\xD8";
8278   case 217:
8279     return "\\xD9";
8280   case 218:
8281     return "\\xDA";
8282   case 219:
8283     return "\\xDB";
8284   case 220:
8285     return "\\xDC";
8286   case 221:
8287     return "\\xDD";
8288   case 222:
8289     return "\\xDE";
8290   case 223:
8291     return "\\xDF";
8292   case 224:
8293     return "\\xE0";
8294   case 225:
8295     return "\\xE1";
8296   case 226:
8297     return "\\xE2";
8298   case 227:
8299     return "\\xE3";
8300   case 228:
8301     return "\\xE4";
8302   case 229:
8303     return "\\xE5";
8304   case 230:
8305     return "\\xE6";
8306   case 231:
8307     return "\\xE7";
8308   case 232:
8309     return "\\xE8";
8310   case 233:
8311     return "\\xE9";
8312   case 234:
8313     return "\\xEA";
8314   case 235:
8315     return "\\xEB";
8316   case 236:
8317     return "\\xEC";
8318   case 237:
8319     return "\\xED";
8320   case 238:
8321     return "\\xEE";
8322   case 239:
8323     return "\\xEF";
8324   case 240:
8325     return "\\xF0";
8326   case 241:
8327     return "\\xF1";
8328   case 242:
8329     return "\\xF2";
8330   case 243:
8331     return "\\xF3";
8332   case 244:
8333     return "\\xF4";
8334   case 245:
8335     return "\\xF5";
8336   case 246:
8337     return "\\xF6";
8338   case 247:
8339     return "\\xF7";
8340   case 248:
8341     return "\\xF8";
8342   case 249:
8343     return "\\xF9";
8344   case 250:
8345     return "\\xFA";
8346   case 251:
8347     return "\\xFB";
8348   case 252:
8349     return "\\xFC";
8350   case 253:
8351     return "\\xFD";
8352   case 254:
8353     return "\\xFE";
8354   case 255:
8355     return "\\xFF";
8356   default:
8357     assert(0); /* never gets here */
8358     return "dead code";
8359   }
8360   assert(0); /* never gets here */
8361 }
8362 
8363 #endif /* XML_DTD */
8364 
8365 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8366 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8367   const char *const valueOrNull = getenv(variableName);
8368   if (valueOrNull == NULL) {
8369     return defaultDebugLevel;
8370   }
8371   const char *const value = valueOrNull;
8372 
8373   errno = 0;
8374   char *afterValue = (char *)value;
8375   unsigned long debugLevel = strtoul(value, &afterValue, 10);
8376   if ((errno != 0) || (afterValue[0] != '\0')) {
8377     errno = 0;
8378     return defaultDebugLevel;
8379   }
8380 
8381   return debugLevel;
8382 }
8383