1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stddef.h>
37 #include <string.h>
38 
39 #include "expat.h"
40 #include "codepage.h"
41 #include "internal.h" /* for UNUSED_P only */
42 #include "xmlfile.h"
43 #include "xmltchar.h"
44 
45 #ifdef _MSC_VER
46 #  include <crtdbg.h>
47 #endif
48 
49 #ifdef XML_UNICODE
50 #  include <wchar.h>
51 #endif
52 
53 /* Structures for handler user data */
54 typedef struct NotationList {
55   struct NotationList *next;
56   const XML_Char *notationName;
57   const XML_Char *systemId;
58   const XML_Char *publicId;
59 } NotationList;
60 
61 typedef struct xmlwfUserData {
62   FILE *fp;
63   NotationList *notationListHead;
64   const XML_Char *currentDoctypeName;
65 } XmlwfUserData;
66 
67 /* This ensures proper sorting. */
68 
69 #define NSSEP T('\001')
70 
71 static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)72 characterData(void *userData, const XML_Char *s, int len) {
73   FILE *fp = ((XmlwfUserData *)userData)->fp;
74   for (; len > 0; --len, ++s) {
75     switch (*s) {
76     case T('&'):
77       fputts(T("&amp;"), fp);
78       break;
79     case T('<'):
80       fputts(T("&lt;"), fp);
81       break;
82     case T('>'):
83       fputts(T("&gt;"), fp);
84       break;
85 #ifdef W3C14N
86     case 13:
87       fputts(T("&#xD;"), fp);
88       break;
89 #else
90     case T('"'):
91       fputts(T("&quot;"), fp);
92       break;
93     case 9:
94     case 10:
95     case 13:
96       ftprintf(fp, T("&#%d;"), *s);
97       break;
98 #endif
99     default:
100       puttc(*s, fp);
101       break;
102     }
103   }
104 }
105 
106 static void
attributeValue(FILE * fp,const XML_Char * s)107 attributeValue(FILE *fp, const XML_Char *s) {
108   puttc(T('='), fp);
109   puttc(T('"'), fp);
110   assert(s);
111   for (;;) {
112     switch (*s) {
113     case 0:
114     case NSSEP:
115       puttc(T('"'), fp);
116       return;
117     case T('&'):
118       fputts(T("&amp;"), fp);
119       break;
120     case T('<'):
121       fputts(T("&lt;"), fp);
122       break;
123     case T('"'):
124       fputts(T("&quot;"), fp);
125       break;
126 #ifdef W3C14N
127     case 9:
128       fputts(T("&#x9;"), fp);
129       break;
130     case 10:
131       fputts(T("&#xA;"), fp);
132       break;
133     case 13:
134       fputts(T("&#xD;"), fp);
135       break;
136 #else
137     case T('>'):
138       fputts(T("&gt;"), fp);
139       break;
140     case 9:
141     case 10:
142     case 13:
143       ftprintf(fp, T("&#%d;"), *s);
144       break;
145 #endif
146     default:
147       puttc(*s, fp);
148       break;
149     }
150     s++;
151   }
152 }
153 
154 /* Lexicographically comparing UTF-8 encoded attribute values,
155 is equivalent to lexicographically comparing based on the character number. */
156 
157 static int
attcmp(const void * att1,const void * att2)158 attcmp(const void *att1, const void *att2) {
159   return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
160 }
161 
162 static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)163 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
164   int nAtts;
165   const XML_Char **p;
166   FILE *fp = ((XmlwfUserData *)userData)->fp;
167   puttc(T('<'), fp);
168   fputts(name, fp);
169 
170   p = atts;
171   while (*p)
172     ++p;
173   nAtts = (int)((p - atts) >> 1);
174   if (nAtts > 1)
175     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
176   while (*atts) {
177     puttc(T(' '), fp);
178     fputts(*atts++, fp);
179     attributeValue(fp, *atts);
180     atts++;
181   }
182   puttc(T('>'), fp);
183 }
184 
185 static void XMLCALL
endElement(void * userData,const XML_Char * name)186 endElement(void *userData, const XML_Char *name) {
187   FILE *fp = ((XmlwfUserData *)userData)->fp;
188   puttc(T('<'), fp);
189   puttc(T('/'), fp);
190   fputts(name, fp);
191   puttc(T('>'), fp);
192 }
193 
194 static int
nsattcmp(const void * p1,const void * p2)195 nsattcmp(const void *p1, const void *p2) {
196   const XML_Char *att1 = *(const XML_Char **)p1;
197   const XML_Char *att2 = *(const XML_Char **)p2;
198   int sep1 = (tcsrchr(att1, NSSEP) != 0);
199   int sep2 = (tcsrchr(att1, NSSEP) != 0);
200   if (sep1 != sep2)
201     return sep1 - sep2;
202   return tcscmp(att1, att2);
203 }
204 
205 static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)206 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
207   int nAtts;
208   int nsi;
209   const XML_Char **p;
210   FILE *fp = ((XmlwfUserData *)userData)->fp;
211   const XML_Char *sep;
212   puttc(T('<'), fp);
213 
214   sep = tcsrchr(name, NSSEP);
215   if (sep) {
216     fputts(T("n1:"), fp);
217     fputts(sep + 1, fp);
218     fputts(T(" xmlns:n1"), fp);
219     attributeValue(fp, name);
220     nsi = 2;
221   } else {
222     fputts(name, fp);
223     nsi = 1;
224   }
225 
226   p = atts;
227   while (*p)
228     ++p;
229   nAtts = (int)((p - atts) >> 1);
230   if (nAtts > 1)
231     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
232   while (*atts) {
233     name = *atts++;
234     sep = tcsrchr(name, NSSEP);
235     puttc(T(' '), fp);
236     if (sep) {
237       ftprintf(fp, T("n%d:"), nsi);
238       fputts(sep + 1, fp);
239     } else
240       fputts(name, fp);
241     attributeValue(fp, *atts);
242     if (sep) {
243       ftprintf(fp, T(" xmlns:n%d"), nsi++);
244       attributeValue(fp, name);
245     }
246     atts++;
247   }
248   puttc(T('>'), fp);
249 }
250 
251 static void XMLCALL
endElementNS(void * userData,const XML_Char * name)252 endElementNS(void *userData, const XML_Char *name) {
253   FILE *fp = ((XmlwfUserData *)userData)->fp;
254   const XML_Char *sep;
255   puttc(T('<'), fp);
256   puttc(T('/'), fp);
257   sep = tcsrchr(name, NSSEP);
258   if (sep) {
259     fputts(T("n1:"), fp);
260     fputts(sep + 1, fp);
261   } else
262     fputts(name, fp);
263   puttc(T('>'), fp);
264 }
265 
266 #ifndef W3C14N
267 
268 static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)269 processingInstruction(void *userData, const XML_Char *target,
270                       const XML_Char *data) {
271   FILE *fp = ((XmlwfUserData *)userData)->fp;
272   puttc(T('<'), fp);
273   puttc(T('?'), fp);
274   fputts(target, fp);
275   puttc(T(' '), fp);
276   fputts(data, fp);
277   puttc(T('?'), fp);
278   puttc(T('>'), fp);
279 }
280 
281 static XML_Char *
xcsdup(const XML_Char * s)282 xcsdup(const XML_Char *s) {
283   XML_Char *result;
284   int count = 0;
285   int numBytes;
286 
287   /* Get the length of the string, including terminator */
288   while (s[count++] != 0) {
289     /* Do nothing */
290   }
291   numBytes = count * sizeof(XML_Char);
292   result = malloc(numBytes);
293   if (result == NULL)
294     return NULL;
295   memcpy(result, s, numBytes);
296   return result;
297 }
298 
299 static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)300 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
301                  const XML_Char *sysid, const XML_Char *publid,
302                  int has_internal_subset) {
303   XmlwfUserData *data = (XmlwfUserData *)userData;
304   UNUSED_P(sysid);
305   UNUSED_P(publid);
306   UNUSED_P(has_internal_subset);
307   data->currentDoctypeName = xcsdup(doctypeName);
308 }
309 
310 static void
freeNotations(XmlwfUserData * data)311 freeNotations(XmlwfUserData *data) {
312   NotationList *notationListHead = data->notationListHead;
313 
314   while (notationListHead != NULL) {
315     NotationList *next = notationListHead->next;
316     free((void *)notationListHead->notationName);
317     free((void *)notationListHead->systemId);
318     free((void *)notationListHead->publicId);
319     free(notationListHead);
320     notationListHead = next;
321   }
322   data->notationListHead = NULL;
323 }
324 
325 static int
xcscmp(const XML_Char * xs,const XML_Char * xt)326 xcscmp(const XML_Char *xs, const XML_Char *xt) {
327   while (*xs != 0 && *xt != 0) {
328     if (*xs < *xt)
329       return -1;
330     if (*xs > *xt)
331       return 1;
332     xs++;
333     xt++;
334   }
335   if (*xs < *xt)
336     return -1;
337   if (*xs > *xt)
338     return 1;
339   return 0;
340 }
341 
342 static int
notationCmp(const void * a,const void * b)343 notationCmp(const void *a, const void *b) {
344   const NotationList *const n1 = *(NotationList **)a;
345   const NotationList *const n2 = *(NotationList **)b;
346 
347   return xcscmp(n1->notationName, n2->notationName);
348 }
349 
350 static void XMLCALL
endDoctypeDecl(void * userData)351 endDoctypeDecl(void *userData) {
352   XmlwfUserData *data = (XmlwfUserData *)userData;
353   NotationList **notations;
354   int notationCount = 0;
355   NotationList *p;
356   int i;
357 
358   /* How many notations do we have? */
359   for (p = data->notationListHead; p != NULL; p = p->next)
360     notationCount++;
361   if (notationCount == 0) {
362     /* Nothing to report */
363     free((void *)data->currentDoctypeName);
364     data->currentDoctypeName = NULL;
365     return;
366   }
367 
368   notations = malloc(notationCount * sizeof(NotationList *));
369   if (notations == NULL) {
370     fprintf(stderr, "Unable to sort notations");
371     freeNotations(data);
372     return;
373   }
374 
375   for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
376     notations[i] = p;
377   }
378   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
379 
380   /* Output the DOCTYPE header */
381   fputts(T("<!DOCTYPE "), data->fp);
382   fputts(data->currentDoctypeName, data->fp);
383   fputts(T(" [\n"), data->fp);
384 
385   /* Now the NOTATIONs */
386   for (i = 0; i < notationCount; i++) {
387     fputts(T("<!NOTATION "), data->fp);
388     fputts(notations[i]->notationName, data->fp);
389     if (notations[i]->publicId != NULL) {
390       fputts(T(" PUBLIC '"), data->fp);
391       fputts(notations[i]->publicId, data->fp);
392       puttc(T('\''), data->fp);
393       if (notations[i]->systemId != NULL) {
394         puttc(T(' '), data->fp);
395         puttc(T('\''), data->fp);
396         fputts(notations[i]->systemId, data->fp);
397         puttc(T('\''), data->fp);
398       }
399     } else if (notations[i]->systemId != NULL) {
400       fputts(T(" SYSTEM '"), data->fp);
401       fputts(notations[i]->systemId, data->fp);
402       puttc(T('\''), data->fp);
403     }
404     puttc(T('>'), data->fp);
405     puttc(T('\n'), data->fp);
406   }
407 
408   /* Finally end the DOCTYPE */
409   fputts(T("]>\n"), data->fp);
410 
411   free(notations);
412   freeNotations(data);
413   free((void *)data->currentDoctypeName);
414   data->currentDoctypeName = NULL;
415 }
416 
417 static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)418 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
419              const XML_Char *systemId, const XML_Char *publicId) {
420   XmlwfUserData *data = (XmlwfUserData *)userData;
421   NotationList *entry = malloc(sizeof(NotationList));
422   const char *errorMessage = "Unable to store NOTATION for output\n";
423 
424   UNUSED_P(base);
425   if (entry == NULL) {
426     fputs(errorMessage, stderr);
427     return; /* Nothing we can really do about this */
428   }
429   entry->notationName = xcsdup(notationName);
430   if (entry->notationName == NULL) {
431     fputs(errorMessage, stderr);
432     free(entry);
433     return;
434   }
435   if (systemId != NULL) {
436     entry->systemId = xcsdup(systemId);
437     if (entry->systemId == NULL) {
438       fputs(errorMessage, stderr);
439       free((void *)entry->notationName);
440       free(entry);
441       return;
442     }
443   } else {
444     entry->systemId = NULL;
445   }
446   if (publicId != NULL) {
447     entry->publicId = xcsdup(publicId);
448     if (entry->publicId == NULL) {
449       fputs(errorMessage, stderr);
450       free((void *)entry->systemId); /* Safe if it's NULL */
451       free((void *)entry->notationName);
452       free(entry);
453       return;
454     }
455   } else {
456     entry->publicId = NULL;
457   }
458 
459   entry->next = data->notationListHead;
460   data->notationListHead = entry;
461 }
462 
463 #endif /* not W3C14N */
464 
465 static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)466 defaultCharacterData(void *userData, const XML_Char *s, int len) {
467   UNUSED_P(s);
468   UNUSED_P(len);
469   XML_DefaultCurrent((XML_Parser)userData);
470 }
471 
472 static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)473 defaultStartElement(void *userData, const XML_Char *name,
474                     const XML_Char **atts) {
475   UNUSED_P(name);
476   UNUSED_P(atts);
477   XML_DefaultCurrent((XML_Parser)userData);
478 }
479 
480 static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)481 defaultEndElement(void *userData, const XML_Char *name) {
482   UNUSED_P(name);
483   XML_DefaultCurrent((XML_Parser)userData);
484 }
485 
486 static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)487 defaultProcessingInstruction(void *userData, const XML_Char *target,
488                              const XML_Char *data) {
489   UNUSED_P(target);
490   UNUSED_P(data);
491   XML_DefaultCurrent((XML_Parser)userData);
492 }
493 
494 static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)495 nopCharacterData(void *userData, const XML_Char *s, int len) {
496   UNUSED_P(userData);
497   UNUSED_P(s);
498   UNUSED_P(len);
499 }
500 
501 static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)502 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
503   UNUSED_P(userData);
504   UNUSED_P(name);
505   UNUSED_P(atts);
506 }
507 
508 static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)509 nopEndElement(void *userData, const XML_Char *name) {
510   UNUSED_P(userData);
511   UNUSED_P(name);
512 }
513 
514 static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)515 nopProcessingInstruction(void *userData, const XML_Char *target,
516                          const XML_Char *data) {
517   UNUSED_P(userData);
518   UNUSED_P(target);
519   UNUSED_P(data);
520 }
521 
522 static void XMLCALL
markup(void * userData,const XML_Char * s,int len)523 markup(void *userData, const XML_Char *s, int len) {
524   FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
525   for (; len > 0; --len, ++s)
526     puttc(*s, fp);
527 }
528 
529 static void
metaLocation(XML_Parser parser)530 metaLocation(XML_Parser parser) {
531   const XML_Char *uri = XML_GetBase(parser);
532   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
533   if (uri)
534     ftprintf(fp, T(" uri=\"%s\""), uri);
535   ftprintf(fp,
536            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
537                T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
538                    T(XML_FMT_INT_MOD) T("u\""),
539            XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
540            XML_GetCurrentLineNumber(parser),
541            XML_GetCurrentColumnNumber(parser));
542 }
543 
544 static void
metaStartDocument(void * userData)545 metaStartDocument(void *userData) {
546   fputts(T("<document>\n"),
547          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
548 }
549 
550 static void
metaEndDocument(void * userData)551 metaEndDocument(void *userData) {
552   fputts(T("</document>\n"),
553          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
554 }
555 
556 static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)557 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
558   XML_Parser parser = (XML_Parser)userData;
559   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
560   FILE *fp = data->fp;
561   const XML_Char **specifiedAttsEnd
562       = atts + XML_GetSpecifiedAttributeCount(parser);
563   const XML_Char **idAttPtr;
564   int idAttIndex = XML_GetIdAttributeIndex(parser);
565   if (idAttIndex < 0)
566     idAttPtr = 0;
567   else
568     idAttPtr = atts + idAttIndex;
569 
570   ftprintf(fp, T("<starttag name=\"%s\""), name);
571   metaLocation(parser);
572   if (*atts) {
573     fputts(T(">\n"), fp);
574     do {
575       ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
576       characterData(data, atts[1], (int)tcslen(atts[1]));
577       if (atts >= specifiedAttsEnd)
578         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
579       else if (atts == idAttPtr)
580         fputts(T("\" id=\"yes\"/>\n"), fp);
581       else
582         fputts(T("\"/>\n"), fp);
583     } while (*(atts += 2));
584     fputts(T("</starttag>\n"), fp);
585   } else
586     fputts(T("/>\n"), fp);
587 }
588 
589 static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)590 metaEndElement(void *userData, const XML_Char *name) {
591   XML_Parser parser = (XML_Parser)userData;
592   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
593   FILE *fp = data->fp;
594   ftprintf(fp, T("<endtag name=\"%s\""), name);
595   metaLocation(parser);
596   fputts(T("/>\n"), fp);
597 }
598 
599 static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)600 metaProcessingInstruction(void *userData, const XML_Char *target,
601                           const XML_Char *data) {
602   XML_Parser parser = (XML_Parser)userData;
603   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
604   FILE *fp = usrData->fp;
605   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
606   characterData(usrData, data, (int)tcslen(data));
607   puttc(T('"'), fp);
608   metaLocation(parser);
609   fputts(T("/>\n"), fp);
610 }
611 
612 static void XMLCALL
metaComment(void * userData,const XML_Char * data)613 metaComment(void *userData, const XML_Char *data) {
614   XML_Parser parser = (XML_Parser)userData;
615   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
616   FILE *fp = usrData->fp;
617   fputts(T("<comment data=\""), fp);
618   characterData(usrData, data, (int)tcslen(data));
619   puttc(T('"'), fp);
620   metaLocation(parser);
621   fputts(T("/>\n"), fp);
622 }
623 
624 static void XMLCALL
metaStartCdataSection(void * userData)625 metaStartCdataSection(void *userData) {
626   XML_Parser parser = (XML_Parser)userData;
627   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
628   FILE *fp = data->fp;
629   fputts(T("<startcdata"), fp);
630   metaLocation(parser);
631   fputts(T("/>\n"), fp);
632 }
633 
634 static void XMLCALL
metaEndCdataSection(void * userData)635 metaEndCdataSection(void *userData) {
636   XML_Parser parser = (XML_Parser)userData;
637   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
638   FILE *fp = data->fp;
639   fputts(T("<endcdata"), fp);
640   metaLocation(parser);
641   fputts(T("/>\n"), fp);
642 }
643 
644 static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)645 metaCharacterData(void *userData, const XML_Char *s, int len) {
646   XML_Parser parser = (XML_Parser)userData;
647   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
648   FILE *fp = data->fp;
649   fputts(T("<chars str=\""), fp);
650   characterData(data, s, len);
651   puttc(T('"'), fp);
652   metaLocation(parser);
653   fputts(T("/>\n"), fp);
654 }
655 
656 static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)657 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
658                      const XML_Char *sysid, const XML_Char *pubid,
659                      int has_internal_subset) {
660   XML_Parser parser = (XML_Parser)userData;
661   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
662   FILE *fp = data->fp;
663   UNUSED_P(sysid);
664   UNUSED_P(pubid);
665   UNUSED_P(has_internal_subset);
666   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
667   metaLocation(parser);
668   fputts(T("/>\n"), fp);
669 }
670 
671 static void XMLCALL
metaEndDoctypeDecl(void * userData)672 metaEndDoctypeDecl(void *userData) {
673   XML_Parser parser = (XML_Parser)userData;
674   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
675   FILE *fp = data->fp;
676   fputts(T("<enddoctype"), fp);
677   metaLocation(parser);
678   fputts(T("/>\n"), fp);
679 }
680 
681 static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)682 metaNotationDecl(void *userData, const XML_Char *notationName,
683                  const XML_Char *base, const XML_Char *systemId,
684                  const XML_Char *publicId) {
685   XML_Parser parser = (XML_Parser)userData;
686   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
687   FILE *fp = data->fp;
688   UNUSED_P(base);
689   ftprintf(fp, T("<notation name=\"%s\""), notationName);
690   if (publicId)
691     ftprintf(fp, T(" public=\"%s\""), publicId);
692   if (systemId) {
693     fputts(T(" system=\""), fp);
694     characterData(data, systemId, (int)tcslen(systemId));
695     puttc(T('"'), fp);
696   }
697   metaLocation(parser);
698   fputts(T("/>\n"), fp);
699 }
700 
701 static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)702 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
703                const XML_Char *value, int value_length, const XML_Char *base,
704                const XML_Char *systemId, const XML_Char *publicId,
705                const XML_Char *notationName) {
706   XML_Parser parser = (XML_Parser)userData;
707   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
708   FILE *fp = data->fp;
709 
710   UNUSED_P(is_param);
711   UNUSED_P(base);
712   if (value) {
713     ftprintf(fp, T("<entity name=\"%s\""), entityName);
714     metaLocation(parser);
715     puttc(T('>'), fp);
716     characterData(data, value, value_length);
717     fputts(T("</entity/>\n"), fp);
718   } else if (notationName) {
719     ftprintf(fp, T("<entity name=\"%s\""), entityName);
720     if (publicId)
721       ftprintf(fp, T(" public=\"%s\""), publicId);
722     fputts(T(" system=\""), fp);
723     characterData(data, systemId, (int)tcslen(systemId));
724     puttc(T('"'), fp);
725     ftprintf(fp, T(" notation=\"%s\""), notationName);
726     metaLocation(parser);
727     fputts(T("/>\n"), fp);
728   } else {
729     ftprintf(fp, T("<entity name=\"%s\""), entityName);
730     if (publicId)
731       ftprintf(fp, T(" public=\"%s\""), publicId);
732     fputts(T(" system=\""), fp);
733     characterData(data, systemId, (int)tcslen(systemId));
734     puttc(T('"'), fp);
735     metaLocation(parser);
736     fputts(T("/>\n"), fp);
737   }
738 }
739 
740 static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)741 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
742                        const XML_Char *uri) {
743   XML_Parser parser = (XML_Parser)userData;
744   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
745   FILE *fp = data->fp;
746   fputts(T("<startns"), fp);
747   if (prefix)
748     ftprintf(fp, T(" prefix=\"%s\""), prefix);
749   if (uri) {
750     fputts(T(" ns=\""), fp);
751     characterData(data, uri, (int)tcslen(uri));
752     fputts(T("\"/>\n"), fp);
753   } else
754     fputts(T("/>\n"), fp);
755 }
756 
757 static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)758 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
759   XML_Parser parser = (XML_Parser)userData;
760   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
761   FILE *fp = data->fp;
762   if (! prefix)
763     fputts(T("<endns/>\n"), fp);
764   else
765     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
766 }
767 
768 static int XMLCALL
unknownEncodingConvert(void * data,const char * p)769 unknownEncodingConvert(void *data, const char *p) {
770   return codepageConvert(*(int *)data, p);
771 }
772 
773 static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)774 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
775   int cp;
776   static const XML_Char prefixL[] = T("windows-");
777   static const XML_Char prefixU[] = T("WINDOWS-");
778   int i;
779 
780   UNUSED_P(userData);
781   for (i = 0; prefixU[i]; i++)
782     if (name[i] != prefixU[i] && name[i] != prefixL[i])
783       return 0;
784 
785   cp = 0;
786   for (; name[i]; i++) {
787     static const XML_Char digits[] = T("0123456789");
788     const XML_Char *s = tcschr(digits, name[i]);
789     if (! s)
790       return 0;
791     cp *= 10;
792     cp += (int)(s - digits);
793     if (cp >= 0x10000)
794       return 0;
795   }
796   if (! codepageMap(cp, info->map))
797     return 0;
798   info->convert = unknownEncodingConvert;
799   /* We could just cast the code page integer to a void *,
800   and avoid the use of release. */
801   info->release = free;
802   info->data = malloc(sizeof(int));
803   if (! info->data)
804     return 0;
805   *(int *)info->data = cp;
806   return 1;
807 }
808 
809 static int XMLCALL
notStandalone(void * userData)810 notStandalone(void *userData) {
811   UNUSED_P(userData);
812   return 0;
813 }
814 
815 static void
showVersion(XML_Char * prog)816 showVersion(XML_Char *prog) {
817   XML_Char *s = prog;
818   XML_Char ch;
819   const XML_Feature *features = XML_GetFeatureList();
820   while ((ch = *s) != 0) {
821     if (ch == '/'
822 #if defined(_WIN32)
823         || ch == '\\'
824 #endif
825     )
826       prog = s + 1;
827     ++s;
828   }
829   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
830   if (features != NULL && features[0].feature != XML_FEATURE_END) {
831     int i = 1;
832     ftprintf(stdout, T("%s"), features[0].name);
833     if (features[0].value)
834       ftprintf(stdout, T("=%ld"), features[0].value);
835     while (features[i].feature != XML_FEATURE_END) {
836       ftprintf(stdout, T(", %s"), features[i].name);
837       if (features[i].value)
838         ftprintf(stdout, T("=%ld"), features[i].value);
839       ++i;
840     }
841     ftprintf(stdout, T("\n"));
842   }
843 }
844 
845 static void
usage(const XML_Char * prog,int rc)846 usage(const XML_Char *prog, int rc) {
847   ftprintf(
848       stderr,
849       /* Generated with:
850        * $ xmlwf/xmlwf_helpgen.sh
851        */
852       /* clang-format off */
853       T("usage: %s [-s] [-n] [-p] [-x] [-e ENCODING] [-w] [-r] [-d DIRECTORY]\n")
854       T("             [-c | -m | -t] [-N]\n")
855       T("             [FILE [FILE ...]]\n")
856       T("\n")
857       T("xmlwf - Determines if an XML document is well-formed\n")
858       T("\n")
859       T("positional arguments:\n")
860       T("  FILE          file to process (default: STDIN)\n")
861       T("\n")
862       T("input control arguments:\n")
863       T("  -s            print an error if the document is not [s]tandalone\n")
864       T("  -n            enable [n]amespace processing\n")
865       T("  -p            enable processing external DTDs and [p]arameter entities\n")
866       T("  -x            enable processing of e[x]ternal entities\n")
867       T("  -e ENCODING   override any in-document [e]ncoding declaration\n")
868       T("  -w            enable support for [W]indows code pages\n")
869       T("  -r            disable memory-mapping and use normal file [r]ead IO calls instead\n")
870       T("\n")
871       T("output control arguments:\n")
872       T("  -d DIRECTORY  output [d]estination directory\n")
873       T("  -c            write a [c]opy of input XML, not canonical XML\n")
874       T("  -m            write [m]eta XML, not canonical XML\n")
875       T("  -t            write no XML output for [t]iming of plain parsing\n")
876       T("  -N            enable adding doctype and [n]otation declarations\n")
877       T("\n")
878       T("info arguments:\n")
879       T("  -h            show this [h]elp message and exit\n")
880       T("  -v            show program's [v]ersion number and exit\n")
881       T("\n")
882       T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
883       T("Please report bugs at https://github.com/libexpat/libexpat/issues.  Thank you!\n")
884       , /* clang-format on */
885       prog);
886   exit(rc);
887 }
888 
889 #if defined(__MINGW32__) && defined(XML_UNICODE)
890 /* Silence warning about missing prototype */
891 int wmain(int argc, XML_Char **argv);
892 #endif
893 
894 int
tmain(int argc,XML_Char ** argv)895 tmain(int argc, XML_Char **argv) {
896   int i, j;
897   const XML_Char *outputDir = NULL;
898   const XML_Char *encoding = NULL;
899   unsigned processFlags = XML_MAP_FILE;
900   int windowsCodePages = 0;
901   int outputType = 0;
902   int useNamespaces = 0;
903   int requireStandalone = 0;
904   int requiresNotations = 0;
905   enum XML_ParamEntityParsing paramEntityParsing
906       = XML_PARAM_ENTITY_PARSING_NEVER;
907   int useStdin = 0;
908   XmlwfUserData userData = {NULL, NULL, NULL};
909 
910 #ifdef _MSC_VER
911   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
912 #endif
913 
914   i = 1;
915   j = 0;
916   while (i < argc) {
917     if (j == 0) {
918       if (argv[i][0] != T('-'))
919         break;
920       if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
921         i++;
922         break;
923       }
924       j++;
925     }
926     switch (argv[i][j]) {
927     case T('r'):
928       processFlags &= ~XML_MAP_FILE;
929       j++;
930       break;
931     case T('s'):
932       requireStandalone = 1;
933       j++;
934       break;
935     case T('n'):
936       useNamespaces = 1;
937       j++;
938       break;
939     case T('p'):
940       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
941       /* fall through */
942     case T('x'):
943       processFlags |= XML_EXTERNAL_ENTITIES;
944       j++;
945       break;
946     case T('w'):
947       windowsCodePages = 1;
948       j++;
949       break;
950     case T('m'):
951       outputType = 'm';
952       j++;
953       break;
954     case T('c'):
955       outputType = 'c';
956       useNamespaces = 0;
957       j++;
958       break;
959     case T('t'):
960       outputType = 't';
961       j++;
962       break;
963     case T('N'):
964       requiresNotations = 1;
965       j++;
966       break;
967     case T('d'):
968       if (argv[i][j + 1] == T('\0')) {
969         if (++i == argc)
970           usage(argv[0], 2);
971         outputDir = argv[i];
972       } else
973         outputDir = argv[i] + j + 1;
974       i++;
975       j = 0;
976       break;
977     case T('e'):
978       if (argv[i][j + 1] == T('\0')) {
979         if (++i == argc)
980           usage(argv[0], 2);
981         encoding = argv[i];
982       } else
983         encoding = argv[i] + j + 1;
984       i++;
985       j = 0;
986       break;
987     case T('h'):
988       usage(argv[0], 0);
989       return 0;
990     case T('v'):
991       showVersion(argv[0]);
992       return 0;
993     case T('\0'):
994       if (j > 1) {
995         i++;
996         j = 0;
997         break;
998       }
999       /* fall through */
1000     default:
1001       usage(argv[0], 2);
1002     }
1003   }
1004   if (i == argc) {
1005     useStdin = 1;
1006     processFlags &= ~XML_MAP_FILE;
1007     i--;
1008   }
1009   for (; i < argc; i++) {
1010     XML_Char *outName = 0;
1011     int result;
1012     XML_Parser parser;
1013     if (useNamespaces)
1014       parser = XML_ParserCreateNS(encoding, NSSEP);
1015     else
1016       parser = XML_ParserCreate(encoding);
1017 
1018     if (! parser) {
1019       tperror(T("Could not instantiate parser"));
1020       exit(1);
1021     }
1022 
1023     if (requireStandalone)
1024       XML_SetNotStandaloneHandler(parser, notStandalone);
1025     XML_SetParamEntityParsing(parser, paramEntityParsing);
1026     if (outputType == 't') {
1027       /* This is for doing timings; this gives a more realistic estimate of
1028          the parsing time. */
1029       outputDir = 0;
1030       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1031       XML_SetCharacterDataHandler(parser, nopCharacterData);
1032       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1033     } else if (outputDir) {
1034       const XML_Char *delim = T("/");
1035       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1036       if (! useStdin) {
1037         /* Jump after last (back)slash */
1038         const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1039         if (lastDelim)
1040           file = lastDelim + 1;
1041 #if defined(_WIN32)
1042         else {
1043           const XML_Char *winDelim = T("\\");
1044           lastDelim = tcsrchr(file, winDelim[0]);
1045           if (lastDelim) {
1046             file = lastDelim + 1;
1047             delim = winDelim;
1048           }
1049         }
1050 #endif
1051       }
1052       outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1053                                    * sizeof(XML_Char));
1054       if (! outName) {
1055         tperror(T("Could not allocate memory"));
1056         exit(1);
1057       }
1058       tcscpy(outName, outputDir);
1059       tcscat(outName, delim);
1060       tcscat(outName, file);
1061       userData.fp = tfopen(outName, T("wb"));
1062       if (! userData.fp) {
1063         tperror(outName);
1064         exit(3);
1065       }
1066       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1067 #ifdef XML_UNICODE
1068       puttc(0xFEFF, userData.fp);
1069 #endif
1070       XML_SetUserData(parser, &userData);
1071       switch (outputType) {
1072       case 'm':
1073         XML_UseParserAsHandlerArg(parser);
1074         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1075         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1076         XML_SetCommentHandler(parser, metaComment);
1077         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1078                                    metaEndCdataSection);
1079         XML_SetCharacterDataHandler(parser, metaCharacterData);
1080         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1081                                   metaEndDoctypeDecl);
1082         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1083         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1084         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1085                                     metaEndNamespaceDecl);
1086         metaStartDocument(parser);
1087         break;
1088       case 'c':
1089         XML_UseParserAsHandlerArg(parser);
1090         XML_SetDefaultHandler(parser, markup);
1091         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1092         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1093         XML_SetProcessingInstructionHandler(parser,
1094                                             defaultProcessingInstruction);
1095         break;
1096       default:
1097         if (useNamespaces)
1098           XML_SetElementHandler(parser, startElementNS, endElementNS);
1099         else
1100           XML_SetElementHandler(parser, startElement, endElement);
1101         XML_SetCharacterDataHandler(parser, characterData);
1102 #ifndef W3C14N
1103         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1104         if (requiresNotations) {
1105           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1106           XML_SetNotationDeclHandler(parser, notationDecl);
1107         }
1108 #endif /* not W3C14N */
1109         break;
1110       }
1111     }
1112     if (windowsCodePages)
1113       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1114     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1115     if (outputDir) {
1116       if (outputType == 'm')
1117         metaEndDocument(parser);
1118       fclose(userData.fp);
1119       if (! result) {
1120         tremove(outName);
1121       }
1122       free(outName);
1123     }
1124     XML_ParserFree(parser);
1125     if (! result) {
1126       exit(2);
1127     }
1128   }
1129   return 0;
1130 }
1131