xref: /freebsd/contrib/expat/xmlwf/xmlwf.c (revision 6419bb52)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stddef.h>
37 #include <string.h>
38 
39 #include "expat.h"
40 #include "codepage.h"
41 #include "internal.h" /* for UNUSED_P only */
42 #include "xmlfile.h"
43 #include "xmltchar.h"
44 
45 #ifdef _MSC_VER
46 #  include <crtdbg.h>
47 #endif
48 
49 #ifdef XML_UNICODE
50 #  include <wchar.h>
51 #endif
52 
53 /* Structures for handler user data */
54 typedef struct NotationList {
55   struct NotationList *next;
56   const XML_Char *notationName;
57   const XML_Char *systemId;
58   const XML_Char *publicId;
59 } NotationList;
60 
61 typedef struct xmlwfUserData {
62   FILE *fp;
63   NotationList *notationListHead;
64   const XML_Char *currentDoctypeName;
65 } XmlwfUserData;
66 
67 /* This ensures proper sorting. */
68 
69 #define NSSEP T('\001')
70 
71 static void XMLCALL
72 characterData(void *userData, const XML_Char *s, int len) {
73   FILE *fp = ((XmlwfUserData *)userData)->fp;
74   for (; len > 0; --len, ++s) {
75     switch (*s) {
76     case T('&'):
77       fputts(T("&amp;"), fp);
78       break;
79     case T('<'):
80       fputts(T("&lt;"), fp);
81       break;
82     case T('>'):
83       fputts(T("&gt;"), fp);
84       break;
85 #ifdef W3C14N
86     case 13:
87       fputts(T("&#xD;"), fp);
88       break;
89 #else
90     case T('"'):
91       fputts(T("&quot;"), fp);
92       break;
93     case 9:
94     case 10:
95     case 13:
96       ftprintf(fp, T("&#%d;"), *s);
97       break;
98 #endif
99     default:
100       puttc(*s, fp);
101       break;
102     }
103   }
104 }
105 
106 static void
107 attributeValue(FILE *fp, const XML_Char *s) {
108   puttc(T('='), fp);
109   puttc(T('"'), fp);
110   assert(s);
111   for (;;) {
112     switch (*s) {
113     case 0:
114     case NSSEP:
115       puttc(T('"'), fp);
116       return;
117     case T('&'):
118       fputts(T("&amp;"), fp);
119       break;
120     case T('<'):
121       fputts(T("&lt;"), fp);
122       break;
123     case T('"'):
124       fputts(T("&quot;"), fp);
125       break;
126 #ifdef W3C14N
127     case 9:
128       fputts(T("&#x9;"), fp);
129       break;
130     case 10:
131       fputts(T("&#xA;"), fp);
132       break;
133     case 13:
134       fputts(T("&#xD;"), fp);
135       break;
136 #else
137     case T('>'):
138       fputts(T("&gt;"), fp);
139       break;
140     case 9:
141     case 10:
142     case 13:
143       ftprintf(fp, T("&#%d;"), *s);
144       break;
145 #endif
146     default:
147       puttc(*s, fp);
148       break;
149     }
150     s++;
151   }
152 }
153 
154 /* Lexicographically comparing UTF-8 encoded attribute values,
155 is equivalent to lexicographically comparing based on the character number. */
156 
157 static int
158 attcmp(const void *att1, const void *att2) {
159   return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
160 }
161 
162 static void XMLCALL
163 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
164   int nAtts;
165   const XML_Char **p;
166   FILE *fp = ((XmlwfUserData *)userData)->fp;
167   puttc(T('<'), fp);
168   fputts(name, fp);
169 
170   p = atts;
171   while (*p)
172     ++p;
173   nAtts = (int)((p - atts) >> 1);
174   if (nAtts > 1)
175     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
176   while (*atts) {
177     puttc(T(' '), fp);
178     fputts(*atts++, fp);
179     attributeValue(fp, *atts);
180     atts++;
181   }
182   puttc(T('>'), fp);
183 }
184 
185 static void XMLCALL
186 endElement(void *userData, const XML_Char *name) {
187   FILE *fp = ((XmlwfUserData *)userData)->fp;
188   puttc(T('<'), fp);
189   puttc(T('/'), fp);
190   fputts(name, fp);
191   puttc(T('>'), fp);
192 }
193 
194 static int
195 nsattcmp(const void *p1, const void *p2) {
196   const XML_Char *att1 = *(const XML_Char **)p1;
197   const XML_Char *att2 = *(const XML_Char **)p2;
198   int sep1 = (tcsrchr(att1, NSSEP) != 0);
199   int sep2 = (tcsrchr(att1, NSSEP) != 0);
200   if (sep1 != sep2)
201     return sep1 - sep2;
202   return tcscmp(att1, att2);
203 }
204 
205 static void XMLCALL
206 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
207   int nAtts;
208   int nsi;
209   const XML_Char **p;
210   FILE *fp = ((XmlwfUserData *)userData)->fp;
211   const XML_Char *sep;
212   puttc(T('<'), fp);
213 
214   sep = tcsrchr(name, NSSEP);
215   if (sep) {
216     fputts(T("n1:"), fp);
217     fputts(sep + 1, fp);
218     fputts(T(" xmlns:n1"), fp);
219     attributeValue(fp, name);
220     nsi = 2;
221   } else {
222     fputts(name, fp);
223     nsi = 1;
224   }
225 
226   p = atts;
227   while (*p)
228     ++p;
229   nAtts = (int)((p - atts) >> 1);
230   if (nAtts > 1)
231     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
232   while (*atts) {
233     name = *atts++;
234     sep = tcsrchr(name, NSSEP);
235     puttc(T(' '), fp);
236     if (sep) {
237       ftprintf(fp, T("n%d:"), nsi);
238       fputts(sep + 1, fp);
239     } else
240       fputts(name, fp);
241     attributeValue(fp, *atts);
242     if (sep) {
243       ftprintf(fp, T(" xmlns:n%d"), nsi++);
244       attributeValue(fp, name);
245     }
246     atts++;
247   }
248   puttc(T('>'), fp);
249 }
250 
251 static void XMLCALL
252 endElementNS(void *userData, const XML_Char *name) {
253   FILE *fp = ((XmlwfUserData *)userData)->fp;
254   const XML_Char *sep;
255   puttc(T('<'), fp);
256   puttc(T('/'), fp);
257   sep = tcsrchr(name, NSSEP);
258   if (sep) {
259     fputts(T("n1:"), fp);
260     fputts(sep + 1, fp);
261   } else
262     fputts(name, fp);
263   puttc(T('>'), fp);
264 }
265 
266 #ifndef W3C14N
267 
268 static void XMLCALL
269 processingInstruction(void *userData, const XML_Char *target,
270                       const XML_Char *data) {
271   FILE *fp = ((XmlwfUserData *)userData)->fp;
272   puttc(T('<'), fp);
273   puttc(T('?'), fp);
274   fputts(target, fp);
275   puttc(T(' '), fp);
276   fputts(data, fp);
277   puttc(T('?'), fp);
278   puttc(T('>'), fp);
279 }
280 
281 static XML_Char *
282 xcsdup(const XML_Char *s) {
283   XML_Char *result;
284   int count = 0;
285   int numBytes;
286 
287   /* Get the length of the string, including terminator */
288   while (s[count++] != 0) {
289     /* Do nothing */
290   }
291   numBytes = count * sizeof(XML_Char);
292   result = malloc(numBytes);
293   if (result == NULL)
294     return NULL;
295   memcpy(result, s, numBytes);
296   return result;
297 }
298 
299 static void XMLCALL
300 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
301                  const XML_Char *sysid, const XML_Char *publid,
302                  int has_internal_subset) {
303   XmlwfUserData *data = (XmlwfUserData *)userData;
304   UNUSED_P(sysid);
305   UNUSED_P(publid);
306   UNUSED_P(has_internal_subset);
307   data->currentDoctypeName = xcsdup(doctypeName);
308 }
309 
310 static void
311 freeNotations(XmlwfUserData *data) {
312   NotationList *notationListHead = data->notationListHead;
313 
314   while (notationListHead != NULL) {
315     NotationList *next = notationListHead->next;
316     free((void *)notationListHead->notationName);
317     free((void *)notationListHead->systemId);
318     free((void *)notationListHead->publicId);
319     free(notationListHead);
320     notationListHead = next;
321   }
322   data->notationListHead = NULL;
323 }
324 
325 static int
326 xcscmp(const XML_Char *xs, const XML_Char *xt) {
327   while (*xs != 0 && *xt != 0) {
328     if (*xs < *xt)
329       return -1;
330     if (*xs > *xt)
331       return 1;
332     xs++;
333     xt++;
334   }
335   if (*xs < *xt)
336     return -1;
337   if (*xs > *xt)
338     return 1;
339   return 0;
340 }
341 
342 static int
343 notationCmp(const void *a, const void *b) {
344   const NotationList *const n1 = *(NotationList **)a;
345   const NotationList *const n2 = *(NotationList **)b;
346 
347   return xcscmp(n1->notationName, n2->notationName);
348 }
349 
350 static void XMLCALL
351 endDoctypeDecl(void *userData) {
352   XmlwfUserData *data = (XmlwfUserData *)userData;
353   NotationList **notations;
354   int notationCount = 0;
355   NotationList *p;
356   int i;
357 
358   /* How many notations do we have? */
359   for (p = data->notationListHead; p != NULL; p = p->next)
360     notationCount++;
361   if (notationCount == 0) {
362     /* Nothing to report */
363     free((void *)data->currentDoctypeName);
364     data->currentDoctypeName = NULL;
365     return;
366   }
367 
368   notations = malloc(notationCount * sizeof(NotationList *));
369   if (notations == NULL) {
370     fprintf(stderr, "Unable to sort notations");
371     freeNotations(data);
372     return;
373   }
374 
375   for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
376     notations[i] = p;
377   }
378   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
379 
380   /* Output the DOCTYPE header */
381   fputts(T("<!DOCTYPE "), data->fp);
382   fputts(data->currentDoctypeName, data->fp);
383   fputts(T(" [\n"), data->fp);
384 
385   /* Now the NOTATIONs */
386   for (i = 0; i < notationCount; i++) {
387     fputts(T("<!NOTATION "), data->fp);
388     fputts(notations[i]->notationName, data->fp);
389     if (notations[i]->publicId != NULL) {
390       fputts(T(" PUBLIC '"), data->fp);
391       fputts(notations[i]->publicId, data->fp);
392       puttc(T('\''), data->fp);
393       if (notations[i]->systemId != NULL) {
394         puttc(T(' '), data->fp);
395         puttc(T('\''), data->fp);
396         fputts(notations[i]->systemId, data->fp);
397         puttc(T('\''), data->fp);
398       }
399     } else if (notations[i]->systemId != NULL) {
400       fputts(T(" SYSTEM '"), data->fp);
401       fputts(notations[i]->systemId, data->fp);
402       puttc(T('\''), data->fp);
403     }
404     puttc(T('>'), data->fp);
405     puttc(T('\n'), data->fp);
406   }
407 
408   /* Finally end the DOCTYPE */
409   fputts(T("]>\n"), data->fp);
410 
411   free(notations);
412   freeNotations(data);
413   free((void *)data->currentDoctypeName);
414   data->currentDoctypeName = NULL;
415 }
416 
417 static void XMLCALL
418 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
419              const XML_Char *systemId, const XML_Char *publicId) {
420   XmlwfUserData *data = (XmlwfUserData *)userData;
421   NotationList *entry = malloc(sizeof(NotationList));
422   const char *errorMessage = "Unable to store NOTATION for output\n";
423 
424   UNUSED_P(base);
425   if (entry == NULL) {
426     fputs(errorMessage, stderr);
427     return; /* Nothing we can really do about this */
428   }
429   entry->notationName = xcsdup(notationName);
430   if (entry->notationName == NULL) {
431     fputs(errorMessage, stderr);
432     free(entry);
433     return;
434   }
435   if (systemId != NULL) {
436     entry->systemId = xcsdup(systemId);
437     if (entry->systemId == NULL) {
438       fputs(errorMessage, stderr);
439       free((void *)entry->notationName);
440       free(entry);
441       return;
442     }
443   } else {
444     entry->systemId = NULL;
445   }
446   if (publicId != NULL) {
447     entry->publicId = xcsdup(publicId);
448     if (entry->publicId == NULL) {
449       fputs(errorMessage, stderr);
450       free((void *)entry->systemId); /* Safe if it's NULL */
451       free((void *)entry->notationName);
452       free(entry);
453       return;
454     }
455   } else {
456     entry->publicId = NULL;
457   }
458 
459   entry->next = data->notationListHead;
460   data->notationListHead = entry;
461 }
462 
463 #endif /* not W3C14N */
464 
465 static void XMLCALL
466 defaultCharacterData(void *userData, const XML_Char *s, int len) {
467   UNUSED_P(s);
468   UNUSED_P(len);
469   XML_DefaultCurrent((XML_Parser)userData);
470 }
471 
472 static void XMLCALL
473 defaultStartElement(void *userData, const XML_Char *name,
474                     const XML_Char **atts) {
475   UNUSED_P(name);
476   UNUSED_P(atts);
477   XML_DefaultCurrent((XML_Parser)userData);
478 }
479 
480 static void XMLCALL
481 defaultEndElement(void *userData, const XML_Char *name) {
482   UNUSED_P(name);
483   XML_DefaultCurrent((XML_Parser)userData);
484 }
485 
486 static void XMLCALL
487 defaultProcessingInstruction(void *userData, const XML_Char *target,
488                              const XML_Char *data) {
489   UNUSED_P(target);
490   UNUSED_P(data);
491   XML_DefaultCurrent((XML_Parser)userData);
492 }
493 
494 static void XMLCALL
495 nopCharacterData(void *userData, const XML_Char *s, int len) {
496   UNUSED_P(userData);
497   UNUSED_P(s);
498   UNUSED_P(len);
499 }
500 
501 static void XMLCALL
502 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
503   UNUSED_P(userData);
504   UNUSED_P(name);
505   UNUSED_P(atts);
506 }
507 
508 static void XMLCALL
509 nopEndElement(void *userData, const XML_Char *name) {
510   UNUSED_P(userData);
511   UNUSED_P(name);
512 }
513 
514 static void XMLCALL
515 nopProcessingInstruction(void *userData, const XML_Char *target,
516                          const XML_Char *data) {
517   UNUSED_P(userData);
518   UNUSED_P(target);
519   UNUSED_P(data);
520 }
521 
522 static void XMLCALL
523 markup(void *userData, const XML_Char *s, int len) {
524   FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
525   for (; len > 0; --len, ++s)
526     puttc(*s, fp);
527 }
528 
529 static void
530 metaLocation(XML_Parser parser) {
531   const XML_Char *uri = XML_GetBase(parser);
532   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
533   if (uri)
534     ftprintf(fp, T(" uri=\"%s\""), uri);
535   ftprintf(fp,
536            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
537                T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
538                    T(XML_FMT_INT_MOD) T("u\""),
539            XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
540            XML_GetCurrentLineNumber(parser),
541            XML_GetCurrentColumnNumber(parser));
542 }
543 
544 static void
545 metaStartDocument(void *userData) {
546   fputts(T("<document>\n"),
547          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
548 }
549 
550 static void
551 metaEndDocument(void *userData) {
552   fputts(T("</document>\n"),
553          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
554 }
555 
556 static void XMLCALL
557 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
558   XML_Parser parser = (XML_Parser)userData;
559   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
560   FILE *fp = data->fp;
561   const XML_Char **specifiedAttsEnd
562       = atts + XML_GetSpecifiedAttributeCount(parser);
563   const XML_Char **idAttPtr;
564   int idAttIndex = XML_GetIdAttributeIndex(parser);
565   if (idAttIndex < 0)
566     idAttPtr = 0;
567   else
568     idAttPtr = atts + idAttIndex;
569 
570   ftprintf(fp, T("<starttag name=\"%s\""), name);
571   metaLocation(parser);
572   if (*atts) {
573     fputts(T(">\n"), fp);
574     do {
575       ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
576       characterData(data, atts[1], (int)tcslen(atts[1]));
577       if (atts >= specifiedAttsEnd)
578         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
579       else if (atts == idAttPtr)
580         fputts(T("\" id=\"yes\"/>\n"), fp);
581       else
582         fputts(T("\"/>\n"), fp);
583     } while (*(atts += 2));
584     fputts(T("</starttag>\n"), fp);
585   } else
586     fputts(T("/>\n"), fp);
587 }
588 
589 static void XMLCALL
590 metaEndElement(void *userData, const XML_Char *name) {
591   XML_Parser parser = (XML_Parser)userData;
592   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
593   FILE *fp = data->fp;
594   ftprintf(fp, T("<endtag name=\"%s\""), name);
595   metaLocation(parser);
596   fputts(T("/>\n"), fp);
597 }
598 
599 static void XMLCALL
600 metaProcessingInstruction(void *userData, const XML_Char *target,
601                           const XML_Char *data) {
602   XML_Parser parser = (XML_Parser)userData;
603   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
604   FILE *fp = usrData->fp;
605   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
606   characterData(usrData, data, (int)tcslen(data));
607   puttc(T('"'), fp);
608   metaLocation(parser);
609   fputts(T("/>\n"), fp);
610 }
611 
612 static void XMLCALL
613 metaComment(void *userData, const XML_Char *data) {
614   XML_Parser parser = (XML_Parser)userData;
615   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
616   FILE *fp = usrData->fp;
617   fputts(T("<comment data=\""), fp);
618   characterData(usrData, data, (int)tcslen(data));
619   puttc(T('"'), fp);
620   metaLocation(parser);
621   fputts(T("/>\n"), fp);
622 }
623 
624 static void XMLCALL
625 metaStartCdataSection(void *userData) {
626   XML_Parser parser = (XML_Parser)userData;
627   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
628   FILE *fp = data->fp;
629   fputts(T("<startcdata"), fp);
630   metaLocation(parser);
631   fputts(T("/>\n"), fp);
632 }
633 
634 static void XMLCALL
635 metaEndCdataSection(void *userData) {
636   XML_Parser parser = (XML_Parser)userData;
637   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
638   FILE *fp = data->fp;
639   fputts(T("<endcdata"), fp);
640   metaLocation(parser);
641   fputts(T("/>\n"), fp);
642 }
643 
644 static void XMLCALL
645 metaCharacterData(void *userData, const XML_Char *s, int len) {
646   XML_Parser parser = (XML_Parser)userData;
647   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
648   FILE *fp = data->fp;
649   fputts(T("<chars str=\""), fp);
650   characterData(data, s, len);
651   puttc(T('"'), fp);
652   metaLocation(parser);
653   fputts(T("/>\n"), fp);
654 }
655 
656 static void XMLCALL
657 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
658                      const XML_Char *sysid, const XML_Char *pubid,
659                      int has_internal_subset) {
660   XML_Parser parser = (XML_Parser)userData;
661   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
662   FILE *fp = data->fp;
663   UNUSED_P(sysid);
664   UNUSED_P(pubid);
665   UNUSED_P(has_internal_subset);
666   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
667   metaLocation(parser);
668   fputts(T("/>\n"), fp);
669 }
670 
671 static void XMLCALL
672 metaEndDoctypeDecl(void *userData) {
673   XML_Parser parser = (XML_Parser)userData;
674   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
675   FILE *fp = data->fp;
676   fputts(T("<enddoctype"), fp);
677   metaLocation(parser);
678   fputts(T("/>\n"), fp);
679 }
680 
681 static void XMLCALL
682 metaNotationDecl(void *userData, const XML_Char *notationName,
683                  const XML_Char *base, const XML_Char *systemId,
684                  const XML_Char *publicId) {
685   XML_Parser parser = (XML_Parser)userData;
686   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
687   FILE *fp = data->fp;
688   UNUSED_P(base);
689   ftprintf(fp, T("<notation name=\"%s\""), notationName);
690   if (publicId)
691     ftprintf(fp, T(" public=\"%s\""), publicId);
692   if (systemId) {
693     fputts(T(" system=\""), fp);
694     characterData(data, systemId, (int)tcslen(systemId));
695     puttc(T('"'), fp);
696   }
697   metaLocation(parser);
698   fputts(T("/>\n"), fp);
699 }
700 
701 static void XMLCALL
702 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
703                const XML_Char *value, int value_length, const XML_Char *base,
704                const XML_Char *systemId, const XML_Char *publicId,
705                const XML_Char *notationName) {
706   XML_Parser parser = (XML_Parser)userData;
707   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
708   FILE *fp = data->fp;
709 
710   UNUSED_P(is_param);
711   UNUSED_P(base);
712   if (value) {
713     ftprintf(fp, T("<entity name=\"%s\""), entityName);
714     metaLocation(parser);
715     puttc(T('>'), fp);
716     characterData(data, value, value_length);
717     fputts(T("</entity/>\n"), fp);
718   } else if (notationName) {
719     ftprintf(fp, T("<entity name=\"%s\""), entityName);
720     if (publicId)
721       ftprintf(fp, T(" public=\"%s\""), publicId);
722     fputts(T(" system=\""), fp);
723     characterData(data, systemId, (int)tcslen(systemId));
724     puttc(T('"'), fp);
725     ftprintf(fp, T(" notation=\"%s\""), notationName);
726     metaLocation(parser);
727     fputts(T("/>\n"), fp);
728   } else {
729     ftprintf(fp, T("<entity name=\"%s\""), entityName);
730     if (publicId)
731       ftprintf(fp, T(" public=\"%s\""), publicId);
732     fputts(T(" system=\""), fp);
733     characterData(data, systemId, (int)tcslen(systemId));
734     puttc(T('"'), fp);
735     metaLocation(parser);
736     fputts(T("/>\n"), fp);
737   }
738 }
739 
740 static void XMLCALL
741 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
742                        const XML_Char *uri) {
743   XML_Parser parser = (XML_Parser)userData;
744   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
745   FILE *fp = data->fp;
746   fputts(T("<startns"), fp);
747   if (prefix)
748     ftprintf(fp, T(" prefix=\"%s\""), prefix);
749   if (uri) {
750     fputts(T(" ns=\""), fp);
751     characterData(data, uri, (int)tcslen(uri));
752     fputts(T("\"/>\n"), fp);
753   } else
754     fputts(T("/>\n"), fp);
755 }
756 
757 static void XMLCALL
758 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
759   XML_Parser parser = (XML_Parser)userData;
760   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
761   FILE *fp = data->fp;
762   if (! prefix)
763     fputts(T("<endns/>\n"), fp);
764   else
765     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
766 }
767 
768 static int XMLCALL
769 unknownEncodingConvert(void *data, const char *p) {
770   return codepageConvert(*(int *)data, p);
771 }
772 
773 static int XMLCALL
774 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
775   int cp;
776   static const XML_Char prefixL[] = T("windows-");
777   static const XML_Char prefixU[] = T("WINDOWS-");
778   int i;
779 
780   UNUSED_P(userData);
781   for (i = 0; prefixU[i]; i++)
782     if (name[i] != prefixU[i] && name[i] != prefixL[i])
783       return 0;
784 
785   cp = 0;
786   for (; name[i]; i++) {
787     static const XML_Char digits[] = T("0123456789");
788     const XML_Char *s = tcschr(digits, name[i]);
789     if (! s)
790       return 0;
791     cp *= 10;
792     cp += (int)(s - digits);
793     if (cp >= 0x10000)
794       return 0;
795   }
796   if (! codepageMap(cp, info->map))
797     return 0;
798   info->convert = unknownEncodingConvert;
799   /* We could just cast the code page integer to a void *,
800   and avoid the use of release. */
801   info->release = free;
802   info->data = malloc(sizeof(int));
803   if (! info->data)
804     return 0;
805   *(int *)info->data = cp;
806   return 1;
807 }
808 
809 static int XMLCALL
810 notStandalone(void *userData) {
811   UNUSED_P(userData);
812   return 0;
813 }
814 
815 static void
816 showVersion(XML_Char *prog) {
817   XML_Char *s = prog;
818   XML_Char ch;
819   const XML_Feature *features = XML_GetFeatureList();
820   while ((ch = *s) != 0) {
821     if (ch == '/'
822 #if defined(_WIN32)
823         || ch == '\\'
824 #endif
825     )
826       prog = s + 1;
827     ++s;
828   }
829   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
830   if (features != NULL && features[0].feature != XML_FEATURE_END) {
831     int i = 1;
832     ftprintf(stdout, T("%s"), features[0].name);
833     if (features[0].value)
834       ftprintf(stdout, T("=%ld"), features[0].value);
835     while (features[i].feature != XML_FEATURE_END) {
836       ftprintf(stdout, T(", %s"), features[i].name);
837       if (features[i].value)
838         ftprintf(stdout, T("=%ld"), features[i].value);
839       ++i;
840     }
841     ftprintf(stdout, T("\n"));
842   }
843 }
844 
845 static void
846 usage(const XML_Char *prog, int rc) {
847   ftprintf(
848       stderr,
849       /* Generated with:
850        * $ xmlwf/xmlwf_helpgen.sh
851        */
852       /* clang-format off */
853       T("usage: %s [-s] [-n] [-p] [-x] [-e ENCODING] [-w] [-r] [-d DIRECTORY]\n")
854       T("             [-c | -m | -t] [-N]\n")
855       T("             [FILE [FILE ...]]\n")
856       T("\n")
857       T("xmlwf - Determines if an XML document is well-formed\n")
858       T("\n")
859       T("positional arguments:\n")
860       T("  FILE          files to process (default: STDIN)\n")
861       T("\n")
862       T("input control arguments:\n")
863       T("  -s            print an error if the document is not [s]tandalone\n")
864       T("  -n            enable [n]amespace processing\n")
865       T("  -p            enable processing external DTDs and [p]arameter entities\n")
866       T("  -x            enable processing of e[x]ternal entities\n")
867       T("  -e ENCODING   override any in-document [e]ncoding declaration\n")
868       T("  -w            enable support for [W]indows code pages\n")
869       T("  -r            disable memory-mapping and use normal file [r]ead IO calls instead\n")
870       T("\n")
871       T("output control arguments:\n")
872       T("  -d DIRECTORY  output [d]estination directory\n")
873       T("  -c            write a [c]opy of input XML, not canonical XML\n")
874       T("  -m            write [m]eta XML, not canonical XML\n")
875       T("  -t            write no XML output for [t]iming of plain parsing\n")
876       T("  -N            enable adding doctype and [n]otation declarations\n")
877       T("\n")
878       T("info arguments:\n")
879       T("  -h            show this [h]elp message and exit\n")
880       T("  -v            show program's [v]ersion number and exit\n")
881       T("\n")
882       T("libexpat is software libre, licensed under the MIT license.\n")
883       T("Please report bugs at https://github.com/libexpat/libexpat/issues.  Thank you!\n")
884       , /* clang-format on */
885       prog);
886   exit(rc);
887 }
888 
889 #if defined(__MINGW32__) && defined(XML_UNICODE)
890 /* Silence warning about missing prototype */
891 int wmain(int argc, XML_Char **argv);
892 #endif
893 
894 int
895 tmain(int argc, XML_Char **argv) {
896   int i, j;
897   const XML_Char *outputDir = NULL;
898   const XML_Char *encoding = NULL;
899   unsigned processFlags = XML_MAP_FILE;
900   int windowsCodePages = 0;
901   int outputType = 0;
902   int useNamespaces = 0;
903   int requireStandalone = 0;
904   int requiresNotations = 0;
905   enum XML_ParamEntityParsing paramEntityParsing
906       = XML_PARAM_ENTITY_PARSING_NEVER;
907   int useStdin = 0;
908   XmlwfUserData userData = {NULL, NULL, NULL};
909 
910 #ifdef _MSC_VER
911   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
912 #endif
913 
914   i = 1;
915   j = 0;
916   while (i < argc) {
917     if (j == 0) {
918       if (argv[i][0] != T('-'))
919         break;
920       if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
921         i++;
922         break;
923       }
924       j++;
925     }
926     switch (argv[i][j]) {
927     case T('r'):
928       processFlags &= ~XML_MAP_FILE;
929       j++;
930       break;
931     case T('s'):
932       requireStandalone = 1;
933       j++;
934       break;
935     case T('n'):
936       useNamespaces = 1;
937       j++;
938       break;
939     case T('p'):
940       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
941       /* fall through */
942     case T('x'):
943       processFlags |= XML_EXTERNAL_ENTITIES;
944       j++;
945       break;
946     case T('w'):
947       windowsCodePages = 1;
948       j++;
949       break;
950     case T('m'):
951       outputType = 'm';
952       j++;
953       break;
954     case T('c'):
955       outputType = 'c';
956       useNamespaces = 0;
957       j++;
958       break;
959     case T('t'):
960       outputType = 't';
961       j++;
962       break;
963     case T('N'):
964       requiresNotations = 1;
965       j++;
966       break;
967     case T('d'):
968       if (argv[i][j + 1] == T('\0')) {
969         if (++i == argc)
970           usage(argv[0], 2);
971         outputDir = argv[i];
972       } else
973         outputDir = argv[i] + j + 1;
974       i++;
975       j = 0;
976       break;
977     case T('e'):
978       if (argv[i][j + 1] == T('\0')) {
979         if (++i == argc)
980           usage(argv[0], 2);
981         encoding = argv[i];
982       } else
983         encoding = argv[i] + j + 1;
984       i++;
985       j = 0;
986       break;
987     case T('h'):
988       usage(argv[0], 0);
989       return 0;
990     case T('v'):
991       showVersion(argv[0]);
992       return 0;
993     case T('\0'):
994       if (j > 1) {
995         i++;
996         j = 0;
997         break;
998       }
999       /* fall through */
1000     default:
1001       usage(argv[0], 2);
1002     }
1003   }
1004   if (i == argc) {
1005     useStdin = 1;
1006     processFlags &= ~XML_MAP_FILE;
1007     i--;
1008   }
1009   for (; i < argc; i++) {
1010     XML_Char *outName = 0;
1011     int result;
1012     XML_Parser parser;
1013     if (useNamespaces)
1014       parser = XML_ParserCreateNS(encoding, NSSEP);
1015     else
1016       parser = XML_ParserCreate(encoding);
1017 
1018     if (! parser) {
1019       tperror(T("Could not instantiate parser"));
1020       exit(1);
1021     }
1022 
1023     if (requireStandalone)
1024       XML_SetNotStandaloneHandler(parser, notStandalone);
1025     XML_SetParamEntityParsing(parser, paramEntityParsing);
1026     if (outputType == 't') {
1027       /* This is for doing timings; this gives a more realistic estimate of
1028          the parsing time. */
1029       outputDir = 0;
1030       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1031       XML_SetCharacterDataHandler(parser, nopCharacterData);
1032       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1033     } else if (outputDir) {
1034       const XML_Char *delim = T("/");
1035       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1036       if (! useStdin) {
1037         /* Jump after last (back)slash */
1038         const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1039         if (lastDelim)
1040           file = lastDelim + 1;
1041 #if defined(_WIN32)
1042         else {
1043           const XML_Char *winDelim = T("\\");
1044           lastDelim = tcsrchr(file, winDelim[0]);
1045           if (lastDelim) {
1046             file = lastDelim + 1;
1047             delim = winDelim;
1048           }
1049         }
1050 #endif
1051       }
1052       outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1053                                    * sizeof(XML_Char));
1054       tcscpy(outName, outputDir);
1055       tcscat(outName, delim);
1056       tcscat(outName, file);
1057       userData.fp = tfopen(outName, T("wb"));
1058       if (! userData.fp) {
1059         tperror(outName);
1060         exit(1);
1061       }
1062       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1063 #ifdef XML_UNICODE
1064       puttc(0xFEFF, userData.fp);
1065 #endif
1066       XML_SetUserData(parser, &userData);
1067       switch (outputType) {
1068       case 'm':
1069         XML_UseParserAsHandlerArg(parser);
1070         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1071         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1072         XML_SetCommentHandler(parser, metaComment);
1073         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1074                                    metaEndCdataSection);
1075         XML_SetCharacterDataHandler(parser, metaCharacterData);
1076         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1077                                   metaEndDoctypeDecl);
1078         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1079         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1080         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1081                                     metaEndNamespaceDecl);
1082         metaStartDocument(parser);
1083         break;
1084       case 'c':
1085         XML_UseParserAsHandlerArg(parser);
1086         XML_SetDefaultHandler(parser, markup);
1087         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1088         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1089         XML_SetProcessingInstructionHandler(parser,
1090                                             defaultProcessingInstruction);
1091         break;
1092       default:
1093         if (useNamespaces)
1094           XML_SetElementHandler(parser, startElementNS, endElementNS);
1095         else
1096           XML_SetElementHandler(parser, startElement, endElement);
1097         XML_SetCharacterDataHandler(parser, characterData);
1098 #ifndef W3C14N
1099         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1100         if (requiresNotations) {
1101           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1102           XML_SetNotationDeclHandler(parser, notationDecl);
1103         }
1104 #endif /* not W3C14N */
1105         break;
1106       }
1107     }
1108     if (windowsCodePages)
1109       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1110     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1111     if (outputDir) {
1112       if (outputType == 'm')
1113         metaEndDocument(parser);
1114       fclose(userData.fp);
1115       if (! result) {
1116         tremove(outName);
1117       }
1118       free(outName);
1119     }
1120     XML_ParserFree(parser);
1121     if (! result) {
1122       exit(2);
1123     }
1124   }
1125   return 0;
1126 }
1127