1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stddef.h>
37 #include <string.h>
38
39 #include "expat.h"
40 #include "codepage.h"
41 #include "internal.h" /* for UNUSED_P only */
42 #include "xmlfile.h"
43 #include "xmltchar.h"
44
45 #ifdef _MSC_VER
46 # include <crtdbg.h>
47 #endif
48
49 #ifdef XML_UNICODE
50 # include <wchar.h>
51 #endif
52
53 /* Structures for handler user data */
54 typedef struct NotationList {
55 struct NotationList *next;
56 const XML_Char *notationName;
57 const XML_Char *systemId;
58 const XML_Char *publicId;
59 } NotationList;
60
61 typedef struct xmlwfUserData {
62 FILE *fp;
63 NotationList *notationListHead;
64 const XML_Char *currentDoctypeName;
65 } XmlwfUserData;
66
67 /* This ensures proper sorting. */
68
69 #define NSSEP T('\001')
70
71 static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)72 characterData(void *userData, const XML_Char *s, int len) {
73 FILE *fp = ((XmlwfUserData *)userData)->fp;
74 for (; len > 0; --len, ++s) {
75 switch (*s) {
76 case T('&'):
77 fputts(T("&"), fp);
78 break;
79 case T('<'):
80 fputts(T("<"), fp);
81 break;
82 case T('>'):
83 fputts(T(">"), fp);
84 break;
85 #ifdef W3C14N
86 case 13:
87 fputts(T("
"), fp);
88 break;
89 #else
90 case T('"'):
91 fputts(T("""), fp);
92 break;
93 case 9:
94 case 10:
95 case 13:
96 ftprintf(fp, T("&#%d;"), *s);
97 break;
98 #endif
99 default:
100 puttc(*s, fp);
101 break;
102 }
103 }
104 }
105
106 static void
attributeValue(FILE * fp,const XML_Char * s)107 attributeValue(FILE *fp, const XML_Char *s) {
108 puttc(T('='), fp);
109 puttc(T('"'), fp);
110 assert(s);
111 for (;;) {
112 switch (*s) {
113 case 0:
114 case NSSEP:
115 puttc(T('"'), fp);
116 return;
117 case T('&'):
118 fputts(T("&"), fp);
119 break;
120 case T('<'):
121 fputts(T("<"), fp);
122 break;
123 case T('"'):
124 fputts(T("""), fp);
125 break;
126 #ifdef W3C14N
127 case 9:
128 fputts(T("	"), fp);
129 break;
130 case 10:
131 fputts(T("
"), fp);
132 break;
133 case 13:
134 fputts(T("
"), fp);
135 break;
136 #else
137 case T('>'):
138 fputts(T(">"), fp);
139 break;
140 case 9:
141 case 10:
142 case 13:
143 ftprintf(fp, T("&#%d;"), *s);
144 break;
145 #endif
146 default:
147 puttc(*s, fp);
148 break;
149 }
150 s++;
151 }
152 }
153
154 /* Lexicographically comparing UTF-8 encoded attribute values,
155 is equivalent to lexicographically comparing based on the character number. */
156
157 static int
attcmp(const void * att1,const void * att2)158 attcmp(const void *att1, const void *att2) {
159 return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
160 }
161
162 static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)163 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
164 int nAtts;
165 const XML_Char **p;
166 FILE *fp = ((XmlwfUserData *)userData)->fp;
167 puttc(T('<'), fp);
168 fputts(name, fp);
169
170 p = atts;
171 while (*p)
172 ++p;
173 nAtts = (int)((p - atts) >> 1);
174 if (nAtts > 1)
175 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
176 while (*atts) {
177 puttc(T(' '), fp);
178 fputts(*atts++, fp);
179 attributeValue(fp, *atts);
180 atts++;
181 }
182 puttc(T('>'), fp);
183 }
184
185 static void XMLCALL
endElement(void * userData,const XML_Char * name)186 endElement(void *userData, const XML_Char *name) {
187 FILE *fp = ((XmlwfUserData *)userData)->fp;
188 puttc(T('<'), fp);
189 puttc(T('/'), fp);
190 fputts(name, fp);
191 puttc(T('>'), fp);
192 }
193
194 static int
nsattcmp(const void * p1,const void * p2)195 nsattcmp(const void *p1, const void *p2) {
196 const XML_Char *att1 = *(const XML_Char **)p1;
197 const XML_Char *att2 = *(const XML_Char **)p2;
198 int sep1 = (tcsrchr(att1, NSSEP) != 0);
199 int sep2 = (tcsrchr(att1, NSSEP) != 0);
200 if (sep1 != sep2)
201 return sep1 - sep2;
202 return tcscmp(att1, att2);
203 }
204
205 static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)206 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
207 int nAtts;
208 int nsi;
209 const XML_Char **p;
210 FILE *fp = ((XmlwfUserData *)userData)->fp;
211 const XML_Char *sep;
212 puttc(T('<'), fp);
213
214 sep = tcsrchr(name, NSSEP);
215 if (sep) {
216 fputts(T("n1:"), fp);
217 fputts(sep + 1, fp);
218 fputts(T(" xmlns:n1"), fp);
219 attributeValue(fp, name);
220 nsi = 2;
221 } else {
222 fputts(name, fp);
223 nsi = 1;
224 }
225
226 p = atts;
227 while (*p)
228 ++p;
229 nAtts = (int)((p - atts) >> 1);
230 if (nAtts > 1)
231 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
232 while (*atts) {
233 name = *atts++;
234 sep = tcsrchr(name, NSSEP);
235 puttc(T(' '), fp);
236 if (sep) {
237 ftprintf(fp, T("n%d:"), nsi);
238 fputts(sep + 1, fp);
239 } else
240 fputts(name, fp);
241 attributeValue(fp, *atts);
242 if (sep) {
243 ftprintf(fp, T(" xmlns:n%d"), nsi++);
244 attributeValue(fp, name);
245 }
246 atts++;
247 }
248 puttc(T('>'), fp);
249 }
250
251 static void XMLCALL
endElementNS(void * userData,const XML_Char * name)252 endElementNS(void *userData, const XML_Char *name) {
253 FILE *fp = ((XmlwfUserData *)userData)->fp;
254 const XML_Char *sep;
255 puttc(T('<'), fp);
256 puttc(T('/'), fp);
257 sep = tcsrchr(name, NSSEP);
258 if (sep) {
259 fputts(T("n1:"), fp);
260 fputts(sep + 1, fp);
261 } else
262 fputts(name, fp);
263 puttc(T('>'), fp);
264 }
265
266 #ifndef W3C14N
267
268 static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)269 processingInstruction(void *userData, const XML_Char *target,
270 const XML_Char *data) {
271 FILE *fp = ((XmlwfUserData *)userData)->fp;
272 puttc(T('<'), fp);
273 puttc(T('?'), fp);
274 fputts(target, fp);
275 puttc(T(' '), fp);
276 fputts(data, fp);
277 puttc(T('?'), fp);
278 puttc(T('>'), fp);
279 }
280
281 static XML_Char *
xcsdup(const XML_Char * s)282 xcsdup(const XML_Char *s) {
283 XML_Char *result;
284 int count = 0;
285 int numBytes;
286
287 /* Get the length of the string, including terminator */
288 while (s[count++] != 0) {
289 /* Do nothing */
290 }
291 numBytes = count * sizeof(XML_Char);
292 result = malloc(numBytes);
293 if (result == NULL)
294 return NULL;
295 memcpy(result, s, numBytes);
296 return result;
297 }
298
299 static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)300 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
301 const XML_Char *sysid, const XML_Char *publid,
302 int has_internal_subset) {
303 XmlwfUserData *data = (XmlwfUserData *)userData;
304 UNUSED_P(sysid);
305 UNUSED_P(publid);
306 UNUSED_P(has_internal_subset);
307 data->currentDoctypeName = xcsdup(doctypeName);
308 }
309
310 static void
freeNotations(XmlwfUserData * data)311 freeNotations(XmlwfUserData *data) {
312 NotationList *notationListHead = data->notationListHead;
313
314 while (notationListHead != NULL) {
315 NotationList *next = notationListHead->next;
316 free((void *)notationListHead->notationName);
317 free((void *)notationListHead->systemId);
318 free((void *)notationListHead->publicId);
319 free(notationListHead);
320 notationListHead = next;
321 }
322 data->notationListHead = NULL;
323 }
324
325 static int
xcscmp(const XML_Char * xs,const XML_Char * xt)326 xcscmp(const XML_Char *xs, const XML_Char *xt) {
327 while (*xs != 0 && *xt != 0) {
328 if (*xs < *xt)
329 return -1;
330 if (*xs > *xt)
331 return 1;
332 xs++;
333 xt++;
334 }
335 if (*xs < *xt)
336 return -1;
337 if (*xs > *xt)
338 return 1;
339 return 0;
340 }
341
342 static int
notationCmp(const void * a,const void * b)343 notationCmp(const void *a, const void *b) {
344 const NotationList *const n1 = *(NotationList **)a;
345 const NotationList *const n2 = *(NotationList **)b;
346
347 return xcscmp(n1->notationName, n2->notationName);
348 }
349
350 static void XMLCALL
endDoctypeDecl(void * userData)351 endDoctypeDecl(void *userData) {
352 XmlwfUserData *data = (XmlwfUserData *)userData;
353 NotationList **notations;
354 int notationCount = 0;
355 NotationList *p;
356 int i;
357
358 /* How many notations do we have? */
359 for (p = data->notationListHead; p != NULL; p = p->next)
360 notationCount++;
361 if (notationCount == 0) {
362 /* Nothing to report */
363 free((void *)data->currentDoctypeName);
364 data->currentDoctypeName = NULL;
365 return;
366 }
367
368 notations = malloc(notationCount * sizeof(NotationList *));
369 if (notations == NULL) {
370 fprintf(stderr, "Unable to sort notations");
371 freeNotations(data);
372 return;
373 }
374
375 for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
376 notations[i] = p;
377 }
378 qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
379
380 /* Output the DOCTYPE header */
381 fputts(T("<!DOCTYPE "), data->fp);
382 fputts(data->currentDoctypeName, data->fp);
383 fputts(T(" [\n"), data->fp);
384
385 /* Now the NOTATIONs */
386 for (i = 0; i < notationCount; i++) {
387 fputts(T("<!NOTATION "), data->fp);
388 fputts(notations[i]->notationName, data->fp);
389 if (notations[i]->publicId != NULL) {
390 fputts(T(" PUBLIC '"), data->fp);
391 fputts(notations[i]->publicId, data->fp);
392 puttc(T('\''), data->fp);
393 if (notations[i]->systemId != NULL) {
394 puttc(T(' '), data->fp);
395 puttc(T('\''), data->fp);
396 fputts(notations[i]->systemId, data->fp);
397 puttc(T('\''), data->fp);
398 }
399 } else if (notations[i]->systemId != NULL) {
400 fputts(T(" SYSTEM '"), data->fp);
401 fputts(notations[i]->systemId, data->fp);
402 puttc(T('\''), data->fp);
403 }
404 puttc(T('>'), data->fp);
405 puttc(T('\n'), data->fp);
406 }
407
408 /* Finally end the DOCTYPE */
409 fputts(T("]>\n"), data->fp);
410
411 free(notations);
412 freeNotations(data);
413 free((void *)data->currentDoctypeName);
414 data->currentDoctypeName = NULL;
415 }
416
417 static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)418 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
419 const XML_Char *systemId, const XML_Char *publicId) {
420 XmlwfUserData *data = (XmlwfUserData *)userData;
421 NotationList *entry = malloc(sizeof(NotationList));
422 const char *errorMessage = "Unable to store NOTATION for output\n";
423
424 UNUSED_P(base);
425 if (entry == NULL) {
426 fputs(errorMessage, stderr);
427 return; /* Nothing we can really do about this */
428 }
429 entry->notationName = xcsdup(notationName);
430 if (entry->notationName == NULL) {
431 fputs(errorMessage, stderr);
432 free(entry);
433 return;
434 }
435 if (systemId != NULL) {
436 entry->systemId = xcsdup(systemId);
437 if (entry->systemId == NULL) {
438 fputs(errorMessage, stderr);
439 free((void *)entry->notationName);
440 free(entry);
441 return;
442 }
443 } else {
444 entry->systemId = NULL;
445 }
446 if (publicId != NULL) {
447 entry->publicId = xcsdup(publicId);
448 if (entry->publicId == NULL) {
449 fputs(errorMessage, stderr);
450 free((void *)entry->systemId); /* Safe if it's NULL */
451 free((void *)entry->notationName);
452 free(entry);
453 return;
454 }
455 } else {
456 entry->publicId = NULL;
457 }
458
459 entry->next = data->notationListHead;
460 data->notationListHead = entry;
461 }
462
463 #endif /* not W3C14N */
464
465 static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)466 defaultCharacterData(void *userData, const XML_Char *s, int len) {
467 UNUSED_P(s);
468 UNUSED_P(len);
469 XML_DefaultCurrent((XML_Parser)userData);
470 }
471
472 static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)473 defaultStartElement(void *userData, const XML_Char *name,
474 const XML_Char **atts) {
475 UNUSED_P(name);
476 UNUSED_P(atts);
477 XML_DefaultCurrent((XML_Parser)userData);
478 }
479
480 static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)481 defaultEndElement(void *userData, const XML_Char *name) {
482 UNUSED_P(name);
483 XML_DefaultCurrent((XML_Parser)userData);
484 }
485
486 static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)487 defaultProcessingInstruction(void *userData, const XML_Char *target,
488 const XML_Char *data) {
489 UNUSED_P(target);
490 UNUSED_P(data);
491 XML_DefaultCurrent((XML_Parser)userData);
492 }
493
494 static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)495 nopCharacterData(void *userData, const XML_Char *s, int len) {
496 UNUSED_P(userData);
497 UNUSED_P(s);
498 UNUSED_P(len);
499 }
500
501 static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)502 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
503 UNUSED_P(userData);
504 UNUSED_P(name);
505 UNUSED_P(atts);
506 }
507
508 static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)509 nopEndElement(void *userData, const XML_Char *name) {
510 UNUSED_P(userData);
511 UNUSED_P(name);
512 }
513
514 static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)515 nopProcessingInstruction(void *userData, const XML_Char *target,
516 const XML_Char *data) {
517 UNUSED_P(userData);
518 UNUSED_P(target);
519 UNUSED_P(data);
520 }
521
522 static void XMLCALL
markup(void * userData,const XML_Char * s,int len)523 markup(void *userData, const XML_Char *s, int len) {
524 FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
525 for (; len > 0; --len, ++s)
526 puttc(*s, fp);
527 }
528
529 static void
metaLocation(XML_Parser parser)530 metaLocation(XML_Parser parser) {
531 const XML_Char *uri = XML_GetBase(parser);
532 FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
533 if (uri)
534 ftprintf(fp, T(" uri=\"%s\""), uri);
535 ftprintf(fp,
536 T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
537 T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
538 T(XML_FMT_INT_MOD) T("u\""),
539 XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
540 XML_GetCurrentLineNumber(parser),
541 XML_GetCurrentColumnNumber(parser));
542 }
543
544 static void
metaStartDocument(void * userData)545 metaStartDocument(void *userData) {
546 fputts(T("<document>\n"),
547 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
548 }
549
550 static void
metaEndDocument(void * userData)551 metaEndDocument(void *userData) {
552 fputts(T("</document>\n"),
553 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
554 }
555
556 static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)557 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
558 XML_Parser parser = (XML_Parser)userData;
559 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
560 FILE *fp = data->fp;
561 const XML_Char **specifiedAttsEnd
562 = atts + XML_GetSpecifiedAttributeCount(parser);
563 const XML_Char **idAttPtr;
564 int idAttIndex = XML_GetIdAttributeIndex(parser);
565 if (idAttIndex < 0)
566 idAttPtr = 0;
567 else
568 idAttPtr = atts + idAttIndex;
569
570 ftprintf(fp, T("<starttag name=\"%s\""), name);
571 metaLocation(parser);
572 if (*atts) {
573 fputts(T(">\n"), fp);
574 do {
575 ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
576 characterData(data, atts[1], (int)tcslen(atts[1]));
577 if (atts >= specifiedAttsEnd)
578 fputts(T("\" defaulted=\"yes\"/>\n"), fp);
579 else if (atts == idAttPtr)
580 fputts(T("\" id=\"yes\"/>\n"), fp);
581 else
582 fputts(T("\"/>\n"), fp);
583 } while (*(atts += 2));
584 fputts(T("</starttag>\n"), fp);
585 } else
586 fputts(T("/>\n"), fp);
587 }
588
589 static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)590 metaEndElement(void *userData, const XML_Char *name) {
591 XML_Parser parser = (XML_Parser)userData;
592 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
593 FILE *fp = data->fp;
594 ftprintf(fp, T("<endtag name=\"%s\""), name);
595 metaLocation(parser);
596 fputts(T("/>\n"), fp);
597 }
598
599 static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)600 metaProcessingInstruction(void *userData, const XML_Char *target,
601 const XML_Char *data) {
602 XML_Parser parser = (XML_Parser)userData;
603 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
604 FILE *fp = usrData->fp;
605 ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
606 characterData(usrData, data, (int)tcslen(data));
607 puttc(T('"'), fp);
608 metaLocation(parser);
609 fputts(T("/>\n"), fp);
610 }
611
612 static void XMLCALL
metaComment(void * userData,const XML_Char * data)613 metaComment(void *userData, const XML_Char *data) {
614 XML_Parser parser = (XML_Parser)userData;
615 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
616 FILE *fp = usrData->fp;
617 fputts(T("<comment data=\""), fp);
618 characterData(usrData, data, (int)tcslen(data));
619 puttc(T('"'), fp);
620 metaLocation(parser);
621 fputts(T("/>\n"), fp);
622 }
623
624 static void XMLCALL
metaStartCdataSection(void * userData)625 metaStartCdataSection(void *userData) {
626 XML_Parser parser = (XML_Parser)userData;
627 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
628 FILE *fp = data->fp;
629 fputts(T("<startcdata"), fp);
630 metaLocation(parser);
631 fputts(T("/>\n"), fp);
632 }
633
634 static void XMLCALL
metaEndCdataSection(void * userData)635 metaEndCdataSection(void *userData) {
636 XML_Parser parser = (XML_Parser)userData;
637 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
638 FILE *fp = data->fp;
639 fputts(T("<endcdata"), fp);
640 metaLocation(parser);
641 fputts(T("/>\n"), fp);
642 }
643
644 static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)645 metaCharacterData(void *userData, const XML_Char *s, int len) {
646 XML_Parser parser = (XML_Parser)userData;
647 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
648 FILE *fp = data->fp;
649 fputts(T("<chars str=\""), fp);
650 characterData(data, s, len);
651 puttc(T('"'), fp);
652 metaLocation(parser);
653 fputts(T("/>\n"), fp);
654 }
655
656 static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)657 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
658 const XML_Char *sysid, const XML_Char *pubid,
659 int has_internal_subset) {
660 XML_Parser parser = (XML_Parser)userData;
661 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
662 FILE *fp = data->fp;
663 UNUSED_P(sysid);
664 UNUSED_P(pubid);
665 UNUSED_P(has_internal_subset);
666 ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
667 metaLocation(parser);
668 fputts(T("/>\n"), fp);
669 }
670
671 static void XMLCALL
metaEndDoctypeDecl(void * userData)672 metaEndDoctypeDecl(void *userData) {
673 XML_Parser parser = (XML_Parser)userData;
674 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
675 FILE *fp = data->fp;
676 fputts(T("<enddoctype"), fp);
677 metaLocation(parser);
678 fputts(T("/>\n"), fp);
679 }
680
681 static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)682 metaNotationDecl(void *userData, const XML_Char *notationName,
683 const XML_Char *base, const XML_Char *systemId,
684 const XML_Char *publicId) {
685 XML_Parser parser = (XML_Parser)userData;
686 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
687 FILE *fp = data->fp;
688 UNUSED_P(base);
689 ftprintf(fp, T("<notation name=\"%s\""), notationName);
690 if (publicId)
691 ftprintf(fp, T(" public=\"%s\""), publicId);
692 if (systemId) {
693 fputts(T(" system=\""), fp);
694 characterData(data, systemId, (int)tcslen(systemId));
695 puttc(T('"'), fp);
696 }
697 metaLocation(parser);
698 fputts(T("/>\n"), fp);
699 }
700
701 static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)702 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
703 const XML_Char *value, int value_length, const XML_Char *base,
704 const XML_Char *systemId, const XML_Char *publicId,
705 const XML_Char *notationName) {
706 XML_Parser parser = (XML_Parser)userData;
707 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
708 FILE *fp = data->fp;
709
710 UNUSED_P(is_param);
711 UNUSED_P(base);
712 if (value) {
713 ftprintf(fp, T("<entity name=\"%s\""), entityName);
714 metaLocation(parser);
715 puttc(T('>'), fp);
716 characterData(data, value, value_length);
717 fputts(T("</entity/>\n"), fp);
718 } else if (notationName) {
719 ftprintf(fp, T("<entity name=\"%s\""), entityName);
720 if (publicId)
721 ftprintf(fp, T(" public=\"%s\""), publicId);
722 fputts(T(" system=\""), fp);
723 characterData(data, systemId, (int)tcslen(systemId));
724 puttc(T('"'), fp);
725 ftprintf(fp, T(" notation=\"%s\""), notationName);
726 metaLocation(parser);
727 fputts(T("/>\n"), fp);
728 } else {
729 ftprintf(fp, T("<entity name=\"%s\""), entityName);
730 if (publicId)
731 ftprintf(fp, T(" public=\"%s\""), publicId);
732 fputts(T(" system=\""), fp);
733 characterData(data, systemId, (int)tcslen(systemId));
734 puttc(T('"'), fp);
735 metaLocation(parser);
736 fputts(T("/>\n"), fp);
737 }
738 }
739
740 static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)741 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
742 const XML_Char *uri) {
743 XML_Parser parser = (XML_Parser)userData;
744 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
745 FILE *fp = data->fp;
746 fputts(T("<startns"), fp);
747 if (prefix)
748 ftprintf(fp, T(" prefix=\"%s\""), prefix);
749 if (uri) {
750 fputts(T(" ns=\""), fp);
751 characterData(data, uri, (int)tcslen(uri));
752 fputts(T("\"/>\n"), fp);
753 } else
754 fputts(T("/>\n"), fp);
755 }
756
757 static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)758 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
759 XML_Parser parser = (XML_Parser)userData;
760 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
761 FILE *fp = data->fp;
762 if (! prefix)
763 fputts(T("<endns/>\n"), fp);
764 else
765 ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
766 }
767
768 static int XMLCALL
unknownEncodingConvert(void * data,const char * p)769 unknownEncodingConvert(void *data, const char *p) {
770 return codepageConvert(*(int *)data, p);
771 }
772
773 static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)774 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
775 int cp;
776 static const XML_Char prefixL[] = T("windows-");
777 static const XML_Char prefixU[] = T("WINDOWS-");
778 int i;
779
780 UNUSED_P(userData);
781 for (i = 0; prefixU[i]; i++)
782 if (name[i] != prefixU[i] && name[i] != prefixL[i])
783 return 0;
784
785 cp = 0;
786 for (; name[i]; i++) {
787 static const XML_Char digits[] = T("0123456789");
788 const XML_Char *s = tcschr(digits, name[i]);
789 if (! s)
790 return 0;
791 cp *= 10;
792 cp += (int)(s - digits);
793 if (cp >= 0x10000)
794 return 0;
795 }
796 if (! codepageMap(cp, info->map))
797 return 0;
798 info->convert = unknownEncodingConvert;
799 /* We could just cast the code page integer to a void *,
800 and avoid the use of release. */
801 info->release = free;
802 info->data = malloc(sizeof(int));
803 if (! info->data)
804 return 0;
805 *(int *)info->data = cp;
806 return 1;
807 }
808
809 static int XMLCALL
notStandalone(void * userData)810 notStandalone(void *userData) {
811 UNUSED_P(userData);
812 return 0;
813 }
814
815 static void
showVersion(XML_Char * prog)816 showVersion(XML_Char *prog) {
817 XML_Char *s = prog;
818 XML_Char ch;
819 const XML_Feature *features = XML_GetFeatureList();
820 while ((ch = *s) != 0) {
821 if (ch == '/'
822 #if defined(_WIN32)
823 || ch == '\\'
824 #endif
825 )
826 prog = s + 1;
827 ++s;
828 }
829 ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
830 if (features != NULL && features[0].feature != XML_FEATURE_END) {
831 int i = 1;
832 ftprintf(stdout, T("%s"), features[0].name);
833 if (features[0].value)
834 ftprintf(stdout, T("=%ld"), features[0].value);
835 while (features[i].feature != XML_FEATURE_END) {
836 ftprintf(stdout, T(", %s"), features[i].name);
837 if (features[i].value)
838 ftprintf(stdout, T("=%ld"), features[i].value);
839 ++i;
840 }
841 ftprintf(stdout, T("\n"));
842 }
843 }
844
845 static void
usage(const XML_Char * prog,int rc)846 usage(const XML_Char *prog, int rc) {
847 ftprintf(
848 stderr,
849 /* Generated with:
850 * $ xmlwf/xmlwf_helpgen.sh
851 */
852 /* clang-format off */
853 T("usage: %s [-s] [-n] [-p] [-x] [-e ENCODING] [-w] [-r] [-d DIRECTORY]\n")
854 T(" [-c | -m | -t] [-N]\n")
855 T(" [FILE [FILE ...]]\n")
856 T("\n")
857 T("xmlwf - Determines if an XML document is well-formed\n")
858 T("\n")
859 T("positional arguments:\n")
860 T(" FILE file to process (default: STDIN)\n")
861 T("\n")
862 T("input control arguments:\n")
863 T(" -s print an error if the document is not [s]tandalone\n")
864 T(" -n enable [n]amespace processing\n")
865 T(" -p enable processing external DTDs and [p]arameter entities\n")
866 T(" -x enable processing of e[x]ternal entities\n")
867 T(" -e ENCODING override any in-document [e]ncoding declaration\n")
868 T(" -w enable support for [W]indows code pages\n")
869 T(" -r disable memory-mapping and use normal file [r]ead IO calls instead\n")
870 T("\n")
871 T("output control arguments:\n")
872 T(" -d DIRECTORY output [d]estination directory\n")
873 T(" -c write a [c]opy of input XML, not canonical XML\n")
874 T(" -m write [m]eta XML, not canonical XML\n")
875 T(" -t write no XML output for [t]iming of plain parsing\n")
876 T(" -N enable adding doctype and [n]otation declarations\n")
877 T("\n")
878 T("info arguments:\n")
879 T(" -h show this [h]elp message and exit\n")
880 T(" -v show program's [v]ersion number and exit\n")
881 T("\n")
882 T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
883 T("Please report bugs at https://github.com/libexpat/libexpat/issues. Thank you!\n")
884 , /* clang-format on */
885 prog);
886 exit(rc);
887 }
888
889 #if defined(__MINGW32__) && defined(XML_UNICODE)
890 /* Silence warning about missing prototype */
891 int wmain(int argc, XML_Char **argv);
892 #endif
893
894 int
tmain(int argc,XML_Char ** argv)895 tmain(int argc, XML_Char **argv) {
896 int i, j;
897 const XML_Char *outputDir = NULL;
898 const XML_Char *encoding = NULL;
899 unsigned processFlags = XML_MAP_FILE;
900 int windowsCodePages = 0;
901 int outputType = 0;
902 int useNamespaces = 0;
903 int requireStandalone = 0;
904 int requiresNotations = 0;
905 enum XML_ParamEntityParsing paramEntityParsing
906 = XML_PARAM_ENTITY_PARSING_NEVER;
907 int useStdin = 0;
908 XmlwfUserData userData = {NULL, NULL, NULL};
909
910 #ifdef _MSC_VER
911 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
912 #endif
913
914 i = 1;
915 j = 0;
916 while (i < argc) {
917 if (j == 0) {
918 if (argv[i][0] != T('-'))
919 break;
920 if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
921 i++;
922 break;
923 }
924 j++;
925 }
926 switch (argv[i][j]) {
927 case T('r'):
928 processFlags &= ~XML_MAP_FILE;
929 j++;
930 break;
931 case T('s'):
932 requireStandalone = 1;
933 j++;
934 break;
935 case T('n'):
936 useNamespaces = 1;
937 j++;
938 break;
939 case T('p'):
940 paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
941 /* fall through */
942 case T('x'):
943 processFlags |= XML_EXTERNAL_ENTITIES;
944 j++;
945 break;
946 case T('w'):
947 windowsCodePages = 1;
948 j++;
949 break;
950 case T('m'):
951 outputType = 'm';
952 j++;
953 break;
954 case T('c'):
955 outputType = 'c';
956 useNamespaces = 0;
957 j++;
958 break;
959 case T('t'):
960 outputType = 't';
961 j++;
962 break;
963 case T('N'):
964 requiresNotations = 1;
965 j++;
966 break;
967 case T('d'):
968 if (argv[i][j + 1] == T('\0')) {
969 if (++i == argc)
970 usage(argv[0], 2);
971 outputDir = argv[i];
972 } else
973 outputDir = argv[i] + j + 1;
974 i++;
975 j = 0;
976 break;
977 case T('e'):
978 if (argv[i][j + 1] == T('\0')) {
979 if (++i == argc)
980 usage(argv[0], 2);
981 encoding = argv[i];
982 } else
983 encoding = argv[i] + j + 1;
984 i++;
985 j = 0;
986 break;
987 case T('h'):
988 usage(argv[0], 0);
989 return 0;
990 case T('v'):
991 showVersion(argv[0]);
992 return 0;
993 case T('\0'):
994 if (j > 1) {
995 i++;
996 j = 0;
997 break;
998 }
999 /* fall through */
1000 default:
1001 usage(argv[0], 2);
1002 }
1003 }
1004 if (i == argc) {
1005 useStdin = 1;
1006 processFlags &= ~XML_MAP_FILE;
1007 i--;
1008 }
1009 for (; i < argc; i++) {
1010 XML_Char *outName = 0;
1011 int result;
1012 XML_Parser parser;
1013 if (useNamespaces)
1014 parser = XML_ParserCreateNS(encoding, NSSEP);
1015 else
1016 parser = XML_ParserCreate(encoding);
1017
1018 if (! parser) {
1019 tperror(T("Could not instantiate parser"));
1020 exit(1);
1021 }
1022
1023 if (requireStandalone)
1024 XML_SetNotStandaloneHandler(parser, notStandalone);
1025 XML_SetParamEntityParsing(parser, paramEntityParsing);
1026 if (outputType == 't') {
1027 /* This is for doing timings; this gives a more realistic estimate of
1028 the parsing time. */
1029 outputDir = 0;
1030 XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1031 XML_SetCharacterDataHandler(parser, nopCharacterData);
1032 XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1033 } else if (outputDir) {
1034 const XML_Char *delim = T("/");
1035 const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1036 if (! useStdin) {
1037 /* Jump after last (back)slash */
1038 const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1039 if (lastDelim)
1040 file = lastDelim + 1;
1041 #if defined(_WIN32)
1042 else {
1043 const XML_Char *winDelim = T("\\");
1044 lastDelim = tcsrchr(file, winDelim[0]);
1045 if (lastDelim) {
1046 file = lastDelim + 1;
1047 delim = winDelim;
1048 }
1049 }
1050 #endif
1051 }
1052 outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1053 * sizeof(XML_Char));
1054 if (! outName) {
1055 tperror(T("Could not allocate memory"));
1056 exit(1);
1057 }
1058 tcscpy(outName, outputDir);
1059 tcscat(outName, delim);
1060 tcscat(outName, file);
1061 userData.fp = tfopen(outName, T("wb"));
1062 if (! userData.fp) {
1063 tperror(outName);
1064 exit(3);
1065 }
1066 setvbuf(userData.fp, NULL, _IOFBF, 16384);
1067 #ifdef XML_UNICODE
1068 puttc(0xFEFF, userData.fp);
1069 #endif
1070 XML_SetUserData(parser, &userData);
1071 switch (outputType) {
1072 case 'm':
1073 XML_UseParserAsHandlerArg(parser);
1074 XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1075 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1076 XML_SetCommentHandler(parser, metaComment);
1077 XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1078 metaEndCdataSection);
1079 XML_SetCharacterDataHandler(parser, metaCharacterData);
1080 XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1081 metaEndDoctypeDecl);
1082 XML_SetEntityDeclHandler(parser, metaEntityDecl);
1083 XML_SetNotationDeclHandler(parser, metaNotationDecl);
1084 XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1085 metaEndNamespaceDecl);
1086 metaStartDocument(parser);
1087 break;
1088 case 'c':
1089 XML_UseParserAsHandlerArg(parser);
1090 XML_SetDefaultHandler(parser, markup);
1091 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1092 XML_SetCharacterDataHandler(parser, defaultCharacterData);
1093 XML_SetProcessingInstructionHandler(parser,
1094 defaultProcessingInstruction);
1095 break;
1096 default:
1097 if (useNamespaces)
1098 XML_SetElementHandler(parser, startElementNS, endElementNS);
1099 else
1100 XML_SetElementHandler(parser, startElement, endElement);
1101 XML_SetCharacterDataHandler(parser, characterData);
1102 #ifndef W3C14N
1103 XML_SetProcessingInstructionHandler(parser, processingInstruction);
1104 if (requiresNotations) {
1105 XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1106 XML_SetNotationDeclHandler(parser, notationDecl);
1107 }
1108 #endif /* not W3C14N */
1109 break;
1110 }
1111 }
1112 if (windowsCodePages)
1113 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1114 result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1115 if (outputDir) {
1116 if (outputType == 'm')
1117 metaEndDocument(parser);
1118 fclose(userData.fp);
1119 if (! result) {
1120 tremove(outName);
1121 }
1122 free(outName);
1123 }
1124 XML_ParserFree(parser);
1125 if (! result) {
1126 exit(2);
1127 }
1128 }
1129 return 0;
1130 }
1131