1 /* Read an XML document from standard input and print
2    element declarations (if any) to standard output.
3    It must be used with Expat compiled for UTF-8 output.
4                             __  __            _
5                          ___\ \/ /_ __   __ _| |_
6                         / _ \\  /| '_ \ / _` | __|
7                        |  __//  \| |_) | (_| | |_
8                         \___/_/\_\ .__/ \__,_|\__|
9                                  |_| XML parser
10 
11    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
12    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
13    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
15    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
17    Copyright (c) 2019      Zhongyuan Zhou <zhouzhongyuan@huawei.com>
18    Licensed under the MIT license:
19 
20    Permission is  hereby granted,  free of charge,  to any  person obtaining
21    a  copy  of  this  software   and  associated  documentation  files  (the
22    "Software"),  to  deal in  the  Software  without restriction,  including
23    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
24    distribute, sublicense, and/or sell copies of the Software, and to permit
25    persons  to whom  the Software  is  furnished to  do so,  subject to  the
26    following conditions:
27 
28    The above copyright  notice and this permission notice  shall be included
29    in all copies or substantial portions of the Software.
30 
31    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
32    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
33    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
36    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37    USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39 
40 #include <stdbool.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <expat.h>
44 
45 #ifdef XML_LARGE_SIZE
46 #  define XML_FMT_INT_MOD "ll"
47 #else
48 #  define XML_FMT_INT_MOD "l"
49 #endif
50 
51 #ifdef XML_UNICODE_WCHAR_T
52 #  define XML_FMT_STR "ls"
53 #else
54 #  define XML_FMT_STR "s"
55 #endif
56 
57 // While traversing the XML_Content tree, we avoid recursion
58 // to not be vulnerable to a denial of service attack.
59 typedef struct StackStruct {
60   const XML_Content *model;
61   unsigned level;
62   struct StackStruct *prev;
63 } Stack;
64 
65 static Stack *
66 stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) {
67   Stack *const newStackTop = malloc(sizeof(Stack));
68   if (! newStackTop) {
69     return NULL;
70   }
71   newStackTop->model = model;
72   newStackTop->level = level;
73   newStackTop->prev = stackTop;
74   return newStackTop;
75 }
76 
77 static Stack *
78 stackPopFree(Stack *stackTop) {
79   Stack *const newStackTop = stackTop->prev;
80   free(stackTop);
81   return newStackTop;
82 }
83 
84 static char *
85 contentTypeName(enum XML_Content_Type contentType) {
86   switch (contentType) {
87   case XML_CTYPE_EMPTY:
88     return "EMPTY";
89   case XML_CTYPE_ANY:
90     return "ANY";
91   case XML_CTYPE_MIXED:
92     return "MIXED";
93   case XML_CTYPE_NAME:
94     return "NAME";
95   case XML_CTYPE_CHOICE:
96     return "CHOICE";
97   case XML_CTYPE_SEQ:
98     return "SEQ";
99   default:
100     return "???";
101   }
102 }
103 
104 static char *
105 contentQuantName(enum XML_Content_Quant contentQuant) {
106   switch (contentQuant) {
107   case XML_CQUANT_NONE:
108     return "NONE";
109   case XML_CQUANT_OPT:
110     return "OPT";
111   case XML_CQUANT_REP:
112     return "REP";
113   case XML_CQUANT_PLUS:
114     return "PLUS";
115   default:
116     return "???";
117   }
118 }
119 
120 static void
121 dumpContentModelElement(const XML_Content *model, unsigned level,
122                         const XML_Content *root) {
123   // Indent
124   unsigned u = 0;
125   for (; u < level; u++) {
126     printf("  ");
127   }
128 
129   // Node
130   printf("[%u] type=%s(%d), quant=%s(%d)", (unsigned)(model - root),
131          contentTypeName(model->type), model->type,
132          contentQuantName(model->quant), model->quant);
133   if (model->name) {
134     printf(", name=\"%" XML_FMT_STR "\"", model->name);
135   } else {
136     printf(", name=NULL");
137   }
138   printf(", numchildren=%d", model->numchildren);
139   printf("\n");
140 }
141 
142 static bool
143 dumpContentModel(const XML_Char *name, const XML_Content *root) {
144   printf("Element \"%" XML_FMT_STR "\":\n", name);
145   Stack *stackTop = stackPushMalloc(NULL, root, 1);
146   if (! stackTop) {
147     return false;
148   }
149 
150   while (stackTop) {
151     const XML_Content *const model = stackTop->model;
152     const unsigned level = stackTop->level;
153 
154     dumpContentModelElement(model, level, root);
155 
156     stackTop = stackPopFree(stackTop);
157 
158     for (size_t u = model->numchildren; u >= 1; u--) {
159       Stack *const newStackTop
160           = stackPushMalloc(stackTop, model->children + (u - 1), level + 1);
161       if (! newStackTop) {
162         // We ran out of memory, so let's free all memory allocated
163         // earlier in this function, to be leak-clean:
164         while (stackTop != NULL) {
165           stackTop = stackPopFree(stackTop);
166         }
167         return false;
168       }
169       stackTop = newStackTop;
170     }
171   }
172 
173   printf("\n");
174   return true;
175 }
176 
177 static void XMLCALL
178 handleElementDeclaration(void *userData, const XML_Char *name,
179                          XML_Content *model) {
180   XML_Parser parser = (XML_Parser)userData;
181   const bool success = dumpContentModel(name, model);
182   XML_FreeContentModel(parser, model);
183   if (! success) {
184     XML_StopParser(parser, /* resumable= */ XML_FALSE);
185   }
186 }
187 
188 int
189 main(void) {
190   XML_Parser parser = XML_ParserCreate(NULL);
191   int done;
192 
193   if (! parser) {
194     fprintf(stderr, "Couldn't allocate memory for parser\n");
195     return 1;
196   }
197 
198   XML_SetUserData(parser, parser);
199   XML_SetElementDeclHandler(parser, handleElementDeclaration);
200 
201   do {
202     void *const buf = XML_GetBuffer(parser, BUFSIZ);
203     if (! buf) {
204       fprintf(stderr, "Couldn't allocate memory for buffer\n");
205       XML_ParserFree(parser);
206       return 1;
207     }
208 
209     const size_t len = fread(buf, 1, BUFSIZ, stdin);
210 
211     if (ferror(stdin)) {
212       fprintf(stderr, "Read error\n");
213       XML_ParserFree(parser);
214       return 1;
215     }
216 
217     done = feof(stdin);
218 
219     if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) {
220       enum XML_Error errorCode = XML_GetErrorCode(parser);
221       if (errorCode == XML_ERROR_ABORTED) {
222         errorCode = XML_ERROR_NO_MEMORY;
223       }
224       fprintf(stderr,
225               "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n",
226               XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode));
227       XML_ParserFree(parser);
228       return 1;
229     }
230   } while (! done);
231 
232   XML_ParserFree(parser);
233   return 0;
234 }
235