1 /* librepo - A library providing (libcURL like) API to downloading repository
2 * Copyright (C) 2012 Tomas Mlcoch
3 *
4 * Licensed under the GNU Lesser General Public License Version 2.1
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <assert.h>
22 #include <errno.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <libxml/parser.h>
28
29 #include "rcodes.h"
30 #include "util.h"
31 #include "metalink.h"
32 #include "xmlparser_internal.h"
33
34 /** TODO:
35 * - (?) Use GStringChunk
36 */
37
38 #define CHUNK_SIZE 8192
39 #define CONTENT_REALLOC_STEP 256
40
41 /* Metalink object manipulation helpers */
42
43 static LrMetalinkHash *
lr_new_metalinkhash(LrMetalink * m)44 lr_new_metalinkhash(LrMetalink *m)
45 {
46 assert(m);
47 LrMetalinkHash *hash = lr_malloc0(sizeof(*hash));
48 m->hashes = g_slist_append(m->hashes, hash);
49 return hash;
50 }
51
52 static LrMetalinkHash *
lr_new_metalinkalternate_hash(LrMetalinkAlternate * ma)53 lr_new_metalinkalternate_hash(LrMetalinkAlternate *ma)
54 {
55 assert(ma);
56 LrMetalinkHash *hash = lr_malloc0(sizeof(*hash));
57 ma->hashes = g_slist_append(ma->hashes, hash);
58 return hash;
59 }
60
61 static LrMetalinkUrl *
lr_new_metalinkurl(LrMetalink * m)62 lr_new_metalinkurl(LrMetalink *m)
63 {
64 assert(m);
65 LrMetalinkUrl *url = lr_malloc0(sizeof(*url));
66 m->urls = g_slist_append(m->urls, url);
67 return url;
68 }
69
70 static LrMetalinkAlternate *
lr_new_metalinkalternate(LrMetalink * m)71 lr_new_metalinkalternate(LrMetalink *m)
72 {
73 assert(m);
74 LrMetalinkAlternate *alternate = lr_malloc0(sizeof(*alternate));
75 m->alternates = g_slist_append(m->alternates, alternate);
76 return alternate;
77 }
78
79 static void
lr_free_metalinkhash(LrMetalinkHash * metalinkhash)80 lr_free_metalinkhash(LrMetalinkHash *metalinkhash)
81 {
82 if (!metalinkhash) return;
83 lr_free(metalinkhash->type);
84 lr_free(metalinkhash->value);
85 lr_free(metalinkhash);
86 }
87
88 static void
lr_free_metalinkurl(LrMetalinkUrl * metalinkurl)89 lr_free_metalinkurl(LrMetalinkUrl *metalinkurl)
90 {
91 if (!metalinkurl) return;
92 lr_free(metalinkurl->protocol);
93 lr_free(metalinkurl->type);
94 lr_free(metalinkurl->location);
95 lr_free(metalinkurl->url);
96 lr_free(metalinkurl);
97 }
98
99 static void
lr_free_metalinkalternate(LrMetalinkAlternate * metalinkalternate)100 lr_free_metalinkalternate(LrMetalinkAlternate *metalinkalternate)
101 {
102 if (!metalinkalternate) return;
103 g_slist_free_full(metalinkalternate->hashes,
104 (GDestroyNotify)lr_free_metalinkhash);
105 lr_free(metalinkalternate);
106 }
107
108 LrMetalink *
lr_metalink_init(void)109 lr_metalink_init(void)
110 {
111 return lr_malloc0(sizeof(LrMetalink));
112 }
113
114 void
lr_metalink_free(LrMetalink * metalink)115 lr_metalink_free(LrMetalink *metalink)
116 {
117 if (!metalink)
118 return;
119
120 lr_free(metalink->filename);
121 g_slist_free_full(metalink->hashes,
122 (GDestroyNotify)lr_free_metalinkhash);
123 g_slist_free_full(metalink->urls,
124 (GDestroyNotify)lr_free_metalinkurl);
125 g_slist_free_full(metalink->alternates,
126 (GDestroyNotify)lr_free_metalinkalternate);
127 lr_free(metalink);
128 }
129
130 /* Idea of parser implementation is borrowed from libsolv */
131
132 typedef enum {
133 STATE_START,
134 STATE_METALINK,
135 STATE_FILES,
136 STATE_FILE,
137 STATE_TIMESTAMP,
138 STATE_SIZE,
139 STATE_VERIFICATION,
140 STATE_HASH,
141 STATE_ALTERNATES,
142 STATE_ALTERNATE,
143 STATE_ALTERNATE_TIMESTAMP,
144 STATE_ALTERNATE_SIZE,
145 STATE_ALTERNATE_VERIFICATION,
146 STATE_ALTERNATE_HASH,
147 STATE_RESOURCES,
148 STATE_URL,
149 NUMSTATES
150 } LrState;
151
152 /* Same states in the first column must be together */
153 static LrStatesSwitch stateswitches[] = {
154 { STATE_START, "metalink", STATE_METALINK, 0 },
155 { STATE_METALINK, "files", STATE_FILES, 0 },
156 { STATE_FILES, "file", STATE_FILE, 0 },
157 { STATE_FILE, "mm0:timestamp", STATE_TIMESTAMP, 1 },
158 { STATE_FILE, "size", STATE_SIZE, 1 },
159 { STATE_FILE, "verification", STATE_VERIFICATION, 0 },
160 { STATE_FILE, "mm0:alternates", STATE_ALTERNATES, 0 },
161 { STATE_FILE, "resources", STATE_RESOURCES, 0 },
162 { STATE_VERIFICATION, "hash", STATE_HASH, 1 },
163 { STATE_ALTERNATES, "mm0:alternate", STATE_ALTERNATE, 0 },
164 { STATE_ALTERNATE, "mm0:timestamp", STATE_ALTERNATE_TIMESTAMP, 1 },
165 { STATE_ALTERNATE, "size", STATE_ALTERNATE_SIZE, 1 },
166 { STATE_ALTERNATE, "verification", STATE_ALTERNATE_VERIFICATION, 0 },
167 { STATE_ALTERNATE_VERIFICATION, "hash", STATE_ALTERNATE_HASH, 1 },
168 { STATE_RESOURCES, "url", STATE_URL, 1 },
169 { NUMSTATES, NULL, NUMSTATES, 0 }
170 };
171
172 static void
lr_metalink_start_handler(void * pdata,const xmlChar * xmlElement,const xmlChar ** xmlAttr)173 lr_metalink_start_handler(void *pdata, const xmlChar *xmlElement, const xmlChar **xmlAttr)
174 {
175 LrParserData *pd = pdata;
176 LrStatesSwitch *sw;
177 const char **attr = (const char **)xmlAttr;
178 const char *element = (const char *)xmlElement;
179
180 if (pd->err)
181 return; // There was an error -> do nothing
182
183 if (pd->depth != pd->statedepth) {
184 // We are inside of unknown element
185 pd->depth++;
186 return;
187 }
188 pd->depth++;
189
190 if (!pd->swtab[pd->state]) {
191 // Current element should not have any sub elements
192 return;
193 }
194
195 // Find current state by its name
196 for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)
197 if (!g_strcmp0(element, sw->ename))
198 break;
199 if (sw->from != pd->state) {
200 // No state for current element (unknown element)
201 lr_xml_parser_warning(pd, LR_XML_WARNING_UNKNOWNTAG,
202 "Unknown element \"%s\"", element);
203 return;
204 }
205
206 // Update parser data
207 pd->state = sw->to;
208 pd->docontent = sw->docontent;
209 pd->statedepth = pd->depth;
210 pd->lcontent = 0;
211 pd->content[0] = '\0';
212
213 if (pd->ignore && pd->state != STATE_FILE)
214 return; /* Ignore all subelements of the current file element */
215
216 switch (pd->state) {
217 case STATE_START:
218 case STATE_METALINK:
219 case STATE_FILES:
220 break;
221
222 case STATE_FILE: {
223 assert(pd->metalink);
224 assert(!pd->metalinkurl);
225 assert(!pd->metalinkhash);
226
227 const char *name = lr_find_attr("name", attr);
228 if (!name) {
229 g_debug("%s: Missing attribute \"name\" of file element", __func__);
230 g_set_error(&pd->err, LR_METALINK_ERROR, LRE_MLXML,
231 "Missing attribute \"name\" of file element");
232 break;
233 }
234 if (pd->found || g_strcmp0(name, pd->filename)) {
235 pd->ignore = 1;
236 break;
237 } else {
238 pd->ignore = 0;
239 pd->found = 1;
240 }
241 pd->metalink->filename = g_strdup(name);
242 break;
243 }
244 case STATE_TIMESTAMP:
245 case STATE_SIZE:
246 case STATE_VERIFICATION:
247 case STATE_ALTERNATES:
248 break;
249
250 case STATE_ALTERNATE:
251 assert(pd->metalink);
252 assert(!pd->metalinkurl);
253 assert(!pd->metalinkhash);
254 assert(!pd->metalinkalternate);
255
256 LrMetalinkAlternate *ma;
257 ma = lr_new_metalinkalternate(pd->metalink);
258 pd->metalinkalternate = ma;
259 break;
260
261 case STATE_ALTERNATE_TIMESTAMP:
262 case STATE_ALTERNATE_SIZE:
263 case STATE_ALTERNATE_VERIFICATION:
264 break;
265
266 case STATE_HASH: {
267 assert(pd->metalink);
268 assert(!pd->metalinkurl);
269 assert(!pd->metalinkhash);
270 assert(!pd->metalinkalternate);
271
272 LrMetalinkHash *mh;
273 const char *type = lr_find_attr("type", attr);
274 if (!type) {
275 // Type of the hash is not specifed -> skip it
276 lr_xml_parser_warning(pd, LR_XML_WARNING_MISSINGATTR,
277 "hash element doesn't have attribute \"type\"");
278 break;
279 }
280 mh = lr_new_metalinkhash(pd->metalink);
281 mh->type = g_strdup(type);
282 pd->metalinkhash = mh;
283 break;
284 }
285
286 case STATE_ALTERNATE_HASH: {
287 assert(pd->metalink);
288 assert(pd->metalinkalternate);
289 assert(!pd->metalinkurl);
290 assert(!pd->metalinkhash);
291
292 LrMetalinkHash *mh;
293 const char *type = lr_find_attr("type", attr);
294 if (!type) {
295 // Type of the hash is not specifed -> skip it
296 lr_xml_parser_warning(pd, LR_XML_WARNING_MISSINGATTR,
297 "hash element doesn't have attribute \"type\"");
298 break;
299 }
300 mh = lr_new_metalinkalternate_hash(pd->metalinkalternate);
301 mh->type = g_strdup(type);
302 pd->metalinkhash = mh;
303 break;
304 }
305
306 case STATE_RESOURCES:
307 break;
308
309 case STATE_URL: {
310 assert(pd->metalink);
311 assert(!pd->metalinkurl);
312 assert(!pd->metalinkhash);
313
314 const char *val;
315 assert(!pd->metalinkurl);
316 LrMetalinkUrl *url = lr_new_metalinkurl(pd->metalink);
317 if ((val = lr_find_attr("protocol", attr)))
318 url->protocol = g_strdup(val);
319 if ((val = lr_find_attr("type", attr)))
320 url->type = g_strdup(val);
321 if ((val = lr_find_attr("location", attr)))
322 url->location = g_strdup(val);
323 if ((val = lr_find_attr("preference", attr))) {
324 long long ll_val = lr_xml_parser_strtoll(pd, val, 0);
325 if (ll_val < 0 || ll_val > 100) {
326 lr_xml_parser_warning(pd, LR_XML_WARNING_BADATTRVAL,
327 "Bad value (\"%s\") of \"preference\" attribute in url element"
328 " (should be in range 0-100)", val);
329 } else {
330 url->preference = ll_val;
331 }
332 }
333 pd->metalinkurl = url;
334 break;
335 }
336
337 default:
338 break;
339 };
340
341 return;
342 }
343
344 static void
lr_metalink_end_handler(void * pdata,G_GNUC_UNUSED const xmlChar * element)345 lr_metalink_end_handler(void *pdata, G_GNUC_UNUSED const xmlChar *element)
346 {
347 LrParserData *pd = pdata;
348 unsigned int state = pd->state;
349
350 if (pd->err)
351 return; // There was an error -> do nothing
352
353 if (pd->depth != pd->statedepth) {
354 // Back from the unknown state
355 pd->depth--;
356 return;
357 }
358
359 pd->depth--;
360 pd->statedepth--;
361 pd->state = pd->sbtab[pd->state];
362 pd->docontent = 0;
363
364 if (pd->ignore && state != STATE_FILE) {
365 // Ignore all subelements of the current file element
366 return;
367 }
368
369 switch (state) {
370 case STATE_START:
371 case STATE_METALINK:
372 case STATE_FILES:
373 case STATE_FILE:
374 case STATE_VERIFICATION:
375 case STATE_ALTERNATES:
376 case STATE_ALTERNATE_VERIFICATION:
377 break;
378
379 case STATE_RESOURCES:
380 break;
381
382 case STATE_TIMESTAMP:
383 assert(pd->metalink);
384 assert(!pd->metalinkurl);
385 assert(!pd->metalinkhash);
386
387 pd->metalink->timestamp = lr_xml_parser_strtoll(pd, pd->content, 0);
388 break;
389
390 case STATE_SIZE:
391 assert(pd->metalink);
392 assert(!pd->metalinkurl);
393 assert(!pd->metalinkhash);
394
395 pd->metalink->size = lr_xml_parser_strtoll(pd, pd->content, 0);
396 break;
397
398 case STATE_HASH:
399 assert(pd->metalink);
400 assert(!pd->metalinkurl);
401
402 if (!pd->metalinkhash) {
403 // If hash has no type
404 break;
405 }
406
407 pd->metalinkhash->value = g_strdup(pd->content);
408 pd->metalinkhash = NULL;
409 break;
410
411 case STATE_ALTERNATE:
412 assert(pd->metalink);
413 assert(pd->metalinkalternate);
414 pd->metalinkalternate = NULL;
415 break;
416
417 case STATE_ALTERNATE_TIMESTAMP:
418 assert(pd->metalink);
419 assert(!pd->metalinkurl);
420 assert(!pd->metalinkhash);
421 assert(pd->metalinkalternate);
422
423 pd->metalinkalternate->timestamp = lr_xml_parser_strtoll(pd, pd->content, 0);
424 break;
425
426 case STATE_ALTERNATE_SIZE:
427 assert(pd->metalink);
428 assert(!pd->metalinkurl);
429 assert(!pd->metalinkhash);
430 assert(pd->metalinkalternate);
431
432 pd->metalinkalternate->size = lr_xml_parser_strtoll(pd, pd->content, 0);
433 break;
434
435 case STATE_ALTERNATE_HASH:
436 assert(pd->metalink);
437 assert(pd->metalinkalternate);
438 assert(!pd->metalinkurl);
439
440 if (!pd->metalinkhash) {
441 // If hash has no type
442 break;
443 }
444
445 pd->metalinkhash->value = g_strdup(pd->content);
446 pd->metalinkhash = NULL;
447 break;
448
449 case STATE_URL:
450 assert(pd->metalink);
451 assert(pd->metalinkurl);
452 assert(!pd->metalinkhash);
453
454 pd->metalinkurl->url = g_strdup(pd->content);
455 pd->metalinkurl = NULL;
456 break;
457
458 default:
459 break;
460 };
461
462 return;
463 }
464
465 gboolean
lr_metalink_parse_file(LrMetalink * metalink,int fd,const char * filename,LrXmlParserWarningCb warningcb,void * warningcb_data,GError ** err)466 lr_metalink_parse_file(LrMetalink *metalink,
467 int fd,
468 const char *filename,
469 LrXmlParserWarningCb warningcb,
470 void *warningcb_data,
471 GError **err)
472 {
473 gboolean ret = TRUE;
474 LrParserData *pd;
475 XmlParser parser;
476 GError *tmp_err = NULL;
477
478 assert(metalink);
479 assert(fd >= 0);
480 assert(filename);
481 assert(!err || *err == NULL);
482
483 // Init
484
485 memset(&parser, 0, sizeof(parser));
486 parser.startElement = lr_metalink_start_handler;
487 parser.endElement = lr_metalink_end_handler;
488 parser.characters = lr_char_handler;
489
490 pd = lr_xml_parser_data_new(NUMSTATES);
491 pd->parser = &parser;
492 pd->state = STATE_START;
493 pd->metalink = metalink;
494 pd->filename = (char *) filename;
495 pd->ignore = 1;
496 pd->found = 0;
497 pd->warningcb = warningcb;
498 pd->warningcb_data = warningcb_data;
499 for (LrStatesSwitch *sw = stateswitches; sw->from != NUMSTATES; sw++) {
500 if (!pd->swtab[sw->from])
501 pd->swtab[sw->from] = sw;
502 pd->sbtab[sw->to] = sw->from;
503 }
504
505 // Parsing
506
507 ret = lr_xml_parser_generic(parser, pd, fd, &tmp_err);
508 if (tmp_err) {
509 g_propagate_error(err, tmp_err);
510 goto err;
511 }
512
513 // Clean up
514
515 if (!pd->found) {
516 g_set_error(err, LR_METALINK_ERROR, LRE_MLBAD,
517 "file \"%s\" was not found in metalink", filename);
518 ret = FALSE; // The wanted file was not found in metalink
519 }
520
521 err:
522 lr_xml_parser_data_free(pd);
523
524 return ret;
525 }
526