1 /*
2 * solv_xmlparser.c
3 *
4 * XML parser abstraction
5 *
6 * Copyright (c) 2017, Novell Inc.
7 *
8 * This program is licensed under the BSD license, read LICENSE.BSD
9 * for further information
10 */
11
12 #include <sys/types.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #ifdef WITH_LIBXML2
18 #include <libxml/parser.h>
19 #else
20 #include <expat.h>
21 #endif
22
23 #include "util.h"
24 #include "queue.h"
25 #include "solv_xmlparser.h"
26
27 static inline void
add_contentspace(struct solv_xmlparser * xmlp,int l)28 add_contentspace(struct solv_xmlparser *xmlp, int l)
29 {
30 l += xmlp->lcontent + 1; /* plus room for trailing zero */
31 if (l > xmlp->acontent)
32 {
33 xmlp->acontent = l + 256;
34 xmlp->content = solv_realloc(xmlp->content, xmlp->acontent);
35 }
36 }
37
38
39 #ifdef WITH_LIBXML2
40 static void
character_data(void * userData,const xmlChar * s,int len)41 character_data(void *userData, const xmlChar *s, int len)
42 #else
43 static void XMLCALL
44 character_data(void *userData, const XML_Char *s, int len)
45 #endif
46 {
47 struct solv_xmlparser *xmlp = userData;
48
49 if (!xmlp->docontent || !len)
50 return;
51 add_contentspace(xmlp, len);
52 memcpy(xmlp->content + xmlp->lcontent, s, len);
53 xmlp->lcontent += len;
54 }
55
56 #ifdef WITH_LIBXML2
57 static void
start_element(void * userData,const xmlChar * name,const xmlChar ** atts)58 start_element(void *userData, const xmlChar *name, const xmlChar **atts)
59 #else
60 static void XMLCALL
61 start_element(void *userData, const char *name, const char **atts)
62 #endif
63 {
64 struct solv_xmlparser *xmlp = userData;
65 struct solv_xmlparser_element *elements;
66 Id *elementhelper;
67 struct solv_xmlparser_element *el;
68 int i, oldstate;
69
70 if (xmlp->unknowncnt)
71 {
72 xmlp->unknowncnt++;
73 return;
74 }
75 elementhelper = xmlp->elementhelper;
76 elements = xmlp->elements;
77 oldstate = xmlp->state;
78 for (i = elementhelper[xmlp->nelements + oldstate]; i; i = elementhelper[i - 1])
79 if (!strcmp(elements[i - 1].element, (char *)name))
80 break;
81 if (!i)
82 {
83 #if 0
84 fprintf(stderr, "into unknown: %s\n", name);
85 #endif
86 xmlp->unknowncnt++;
87 return;
88 }
89 el = xmlp->elements + i - 1;
90 queue_push(&xmlp->elementq, xmlp->state);
91 xmlp->state = el->tostate;
92 xmlp->docontent = el->docontent;
93 xmlp->lcontent = 0;
94 #ifdef WITH_LIBXML2
95 if (!atts)
96 {
97 static const char *nullattr;
98 atts = (const xmlChar **)&nullattr;
99 }
100 #endif
101 if (xmlp->state != oldstate)
102 xmlp->startelement(xmlp, xmlp->state, el->element, (const char **)atts);
103 }
104
105 #ifdef WITH_LIBXML2
106 static void
end_element(void * userData,const xmlChar * name)107 end_element(void *userData, const xmlChar *name)
108 #else
109 static void XMLCALL
110 end_element(void *userData, const char *name)
111 #endif
112 {
113 struct solv_xmlparser *xmlp = userData;
114
115 if (xmlp->unknowncnt)
116 {
117 xmlp->unknowncnt--;
118 xmlp->lcontent = 0;
119 xmlp->docontent = 0;
120 return;
121 }
122 xmlp->content[xmlp->lcontent] = 0;
123 if (xmlp->elementq.count && xmlp->state != xmlp->elementq.elements[xmlp->elementq.count - 1])
124 xmlp->endelement(xmlp, xmlp->state, xmlp->content);
125 xmlp->state = queue_pop(&xmlp->elementq);
126 xmlp->docontent = 0;
127 xmlp->lcontent = 0;
128 }
129
130 void
solv_xmlparser_init(struct solv_xmlparser * xmlp,struct solv_xmlparser_element * elements,void * userdata,void (* startelement)(struct solv_xmlparser *,int state,const char * name,const char ** atts),void (* endelement)(struct solv_xmlparser *,int state,char * content))131 solv_xmlparser_init(struct solv_xmlparser *xmlp,
132 struct solv_xmlparser_element *elements,
133 void *userdata,
134 void (*startelement)(struct solv_xmlparser *, int state, const char *name, const char **atts),
135 void (*endelement)(struct solv_xmlparser *, int state, char *content))
136 {
137 int i, nstates, nelements;
138 struct solv_xmlparser_element *el;
139 Id *elementhelper;
140
141 memset(xmlp, 0, sizeof(*xmlp));
142 nstates = 0;
143 nelements = 0;
144 for (el = elements; el->element; el++)
145 {
146 nelements++;
147 if (el->fromstate > nstates)
148 nstates = el->fromstate;
149 if (el->tostate > nstates)
150 nstates = el->tostate;
151 }
152 nstates++;
153
154 xmlp->elements = elements;
155 xmlp->nelements = nelements;
156 elementhelper = solv_calloc(nelements + nstates, sizeof(Id));
157 for (i = nelements - 1; i >= 0; i--)
158 {
159 int fromstate = elements[i].fromstate;
160 elementhelper[i] = elementhelper[nelements + fromstate];
161 elementhelper[nelements + fromstate] = i + 1;
162 }
163 xmlp->elementhelper = elementhelper;
164 queue_init(&xmlp->elementq);
165 xmlp->acontent = 256;
166 xmlp->content = solv_malloc(xmlp->acontent);
167
168 xmlp->userdata = userdata;
169 xmlp->startelement = startelement;
170 xmlp->endelement = endelement;
171 }
172
173 void
solv_xmlparser_free(struct solv_xmlparser * xmlp)174 solv_xmlparser_free(struct solv_xmlparser *xmlp)
175 {
176 xmlp->elementhelper = solv_free(xmlp->elementhelper);
177 queue_free(&xmlp->elementq);
178 xmlp->content = solv_free(xmlp->content);
179 xmlp->errstr = solv_free(xmlp->errstr);
180 }
181
182 static void
set_error(struct solv_xmlparser * xmlp,const char * errstr,unsigned int line,unsigned int column)183 set_error(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column)
184 {
185 solv_free(xmlp->errstr);
186 xmlp->errstr = solv_strdup(errstr);
187 xmlp->line = line;
188 xmlp->column = column;
189 }
190
191 #ifdef WITH_LIBXML2
192
193 static inline int
create_parser(struct solv_xmlparser * xmlp)194 create_parser(struct solv_xmlparser *xmlp)
195 {
196 /* delayed to parse_block so that we have the first bytes */
197 return 1;
198 }
199
200 static inline void
free_parser(struct solv_xmlparser * xmlp)201 free_parser(struct solv_xmlparser *xmlp)
202 {
203 if (xmlp->parser)
204 xmlFreeParserCtxt(xmlp->parser);
205 xmlp->parser = 0;
206 }
207
208 static xmlParserCtxtPtr
create_parser_ctx(struct solv_xmlparser * xmlp,char * buf,int l)209 create_parser_ctx(struct solv_xmlparser *xmlp, char *buf, int l)
210 {
211 xmlSAXHandler sax;
212 memset(&sax, 0, sizeof(sax));
213 sax.startElement = start_element;
214 sax.endElement = end_element;
215 sax.characters = character_data;
216 return xmlCreatePushParserCtxt(&sax, xmlp, buf, l, NULL);
217 }
218
219 static inline int
parse_block(struct solv_xmlparser * xmlp,char * buf,int l)220 parse_block(struct solv_xmlparser *xmlp, char *buf, int l)
221 {
222 if (!xmlp->parser)
223 {
224 int l2 = l > 4 ? 4 : 0;
225 xmlp->parser = create_parser_ctx(xmlp, buf, l2);
226 if (!xmlp->parser)
227 {
228 set_error(xmlp, "could not create parser", 0, 0);
229 return 0;
230 }
231 buf += l2;
232 l -= l2;
233 if (l2 && !l)
234 return 1;
235 }
236 if (xmlParseChunk(xmlp->parser, buf, l, l == 0 ? 1 : 0))
237 {
238 xmlErrorPtr err = xmlCtxtGetLastError(xmlp->parser);
239 set_error(xmlp, err->message, err->line, err->int2);
240 return 0;
241 }
242 return 1;
243 }
244
245 unsigned int
solv_xmlparser_lineno(struct solv_xmlparser * xmlp)246 solv_xmlparser_lineno(struct solv_xmlparser *xmlp)
247 {
248 return (unsigned int)xmlSAX2GetLineNumber(xmlp->parser);
249 }
250
251 #else
252
253 static inline int
create_parser(struct solv_xmlparser * xmlp)254 create_parser(struct solv_xmlparser *xmlp)
255 {
256 xmlp->parser = XML_ParserCreate(NULL);
257 if (!xmlp->parser)
258 return 0;
259 XML_SetUserData(xmlp->parser, xmlp);
260 XML_SetElementHandler(xmlp->parser, start_element, end_element);
261 XML_SetCharacterDataHandler(xmlp->parser, character_data);
262 return 1;
263 }
264
265 static inline void
free_parser(struct solv_xmlparser * xmlp)266 free_parser(struct solv_xmlparser *xmlp)
267 {
268 XML_ParserFree(xmlp->parser);
269 xmlp->parser = 0;
270 }
271
272 static inline int
parse_block(struct solv_xmlparser * xmlp,char * buf,int l)273 parse_block(struct solv_xmlparser *xmlp, char *buf, int l)
274 {
275 if (XML_Parse(xmlp->parser, buf, l, l == 0) == XML_STATUS_ERROR)
276 {
277 set_error(xmlp, XML_ErrorString(XML_GetErrorCode(xmlp->parser)), XML_GetCurrentLineNumber(xmlp->parser), XML_GetCurrentColumnNumber(xmlp->parser));
278 return 0;
279 }
280 return 1;
281 }
282
283 unsigned int
solv_xmlparser_lineno(struct solv_xmlparser * xmlp)284 solv_xmlparser_lineno(struct solv_xmlparser *xmlp)
285 {
286 return (unsigned int)XML_GetCurrentLineNumber(xmlp->parser);
287 }
288
289 #endif
290
291 int
solv_xmlparser_parse(struct solv_xmlparser * xmlp,FILE * fp)292 solv_xmlparser_parse(struct solv_xmlparser *xmlp, FILE *fp)
293 {
294 char buf[8192];
295 int l, ret = SOLV_XMLPARSER_OK;
296
297 xmlp->state = 0;
298 xmlp->unknowncnt = 0;
299 xmlp->docontent = 0;
300 xmlp->lcontent = 0;
301 queue_empty(&xmlp->elementq);
302
303 if (!create_parser(xmlp))
304 {
305 set_error(xmlp, "could not create parser", 0, 0);
306 return SOLV_XMLPARSER_ERROR;
307 }
308 for (;;)
309 {
310 l = fread(buf, 1, sizeof(buf), fp);
311 if (!parse_block(xmlp, buf, l))
312 {
313 ret = SOLV_XMLPARSER_ERROR;
314 break;
315 }
316 if (!l)
317 break;
318 }
319 free_parser(xmlp);
320 return ret;
321 }
322
323 char *
solv_xmlparser_contentspace(struct solv_xmlparser * xmlp,int l)324 solv_xmlparser_contentspace(struct solv_xmlparser *xmlp, int l)
325 {
326 xmlp->lcontent = 0;
327 if (l > xmlp->acontent)
328 {
329 xmlp->acontent = l + 256;
330 xmlp->content = solv_realloc(xmlp->content, xmlp->acontent);
331 }
332 return xmlp->content;
333 }
334
335