1 /* librepo - A library providing (libcURL like) API to downloading repository
2  * Copyright (C) 2012  Tomas Mlcoch
3  *
4  * Licensed under the GNU Lesser General Public License Version 2.1
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <assert.h>
22 #include <errno.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <libxml/parser.h>
28 
29 #include "rcodes.h"
30 #include "util.h"
31 #include "metalink.h"
32 #include "xmlparser_internal.h"
33 
34 /** TODO:
35  * - (?) Use GStringChunk
36  */
37 
38 #define CHUNK_SIZE              8192
39 #define CONTENT_REALLOC_STEP    256
40 
41 /* Metalink object manipulation helpers */
42 
43 static LrMetalinkHash *
lr_new_metalinkhash(LrMetalink * m)44 lr_new_metalinkhash(LrMetalink *m)
45 {
46     assert(m);
47     LrMetalinkHash *hash = lr_malloc0(sizeof(*hash));
48     m->hashes = g_slist_append(m->hashes, hash);
49     return hash;
50 }
51 
52 static LrMetalinkHash *
lr_new_metalinkalternate_hash(LrMetalinkAlternate * ma)53 lr_new_metalinkalternate_hash(LrMetalinkAlternate *ma)
54 {
55     assert(ma);
56     LrMetalinkHash *hash = lr_malloc0(sizeof(*hash));
57     ma->hashes = g_slist_append(ma->hashes, hash);
58     return hash;
59 }
60 
61 static LrMetalinkUrl *
lr_new_metalinkurl(LrMetalink * m)62 lr_new_metalinkurl(LrMetalink *m)
63 {
64     assert(m);
65     LrMetalinkUrl *url = lr_malloc0(sizeof(*url));
66     m->urls = g_slist_append(m->urls, url);
67     return url;
68 }
69 
70 static LrMetalinkAlternate *
lr_new_metalinkalternate(LrMetalink * m)71 lr_new_metalinkalternate(LrMetalink *m)
72 {
73     assert(m);
74     LrMetalinkAlternate *alternate = lr_malloc0(sizeof(*alternate));
75     m->alternates = g_slist_append(m->alternates, alternate);
76     return alternate;
77 }
78 
79 static void
lr_free_metalinkhash(LrMetalinkHash * metalinkhash)80 lr_free_metalinkhash(LrMetalinkHash *metalinkhash)
81 {
82     if (!metalinkhash) return;
83     lr_free(metalinkhash->type);
84     lr_free(metalinkhash->value);
85     lr_free(metalinkhash);
86 }
87 
88 static void
lr_free_metalinkurl(LrMetalinkUrl * metalinkurl)89 lr_free_metalinkurl(LrMetalinkUrl *metalinkurl)
90 {
91     if (!metalinkurl) return;
92     lr_free(metalinkurl->protocol);
93     lr_free(metalinkurl->type);
94     lr_free(metalinkurl->location);
95     lr_free(metalinkurl->url);
96     lr_free(metalinkurl);
97 }
98 
99 static void
lr_free_metalinkalternate(LrMetalinkAlternate * metalinkalternate)100 lr_free_metalinkalternate(LrMetalinkAlternate *metalinkalternate)
101 {
102     if (!metalinkalternate) return;
103     g_slist_free_full(metalinkalternate->hashes,
104                       (GDestroyNotify)lr_free_metalinkhash);
105     lr_free(metalinkalternate);
106 }
107 
108 LrMetalink *
lr_metalink_init(void)109 lr_metalink_init(void)
110 {
111     return lr_malloc0(sizeof(LrMetalink));
112 }
113 
114 void
lr_metalink_free(LrMetalink * metalink)115 lr_metalink_free(LrMetalink *metalink)
116 {
117     if (!metalink)
118         return;
119 
120     lr_free(metalink->filename);
121     g_slist_free_full(metalink->hashes,
122                       (GDestroyNotify)lr_free_metalinkhash);
123     g_slist_free_full(metalink->urls,
124                       (GDestroyNotify)lr_free_metalinkurl);
125     g_slist_free_full(metalink->alternates,
126                       (GDestroyNotify)lr_free_metalinkalternate);
127     lr_free(metalink);
128 }
129 
130 /* Idea of parser implementation is borrowed from libsolv */
131 
132 typedef enum {
133     STATE_START,
134     STATE_METALINK,
135     STATE_FILES,
136     STATE_FILE,
137     STATE_TIMESTAMP,
138     STATE_SIZE,
139     STATE_VERIFICATION,
140     STATE_HASH,
141     STATE_ALTERNATES,
142     STATE_ALTERNATE,
143     STATE_ALTERNATE_TIMESTAMP,
144     STATE_ALTERNATE_SIZE,
145     STATE_ALTERNATE_VERIFICATION,
146     STATE_ALTERNATE_HASH,
147     STATE_RESOURCES,
148     STATE_URL,
149     NUMSTATES
150 } LrState;
151 
152 /* Same states in the first column must be together */
153 static LrStatesSwitch stateswitches[] = {
154     { STATE_START,      "metalink",         STATE_METALINK,                0 },
155     { STATE_METALINK,   "files",            STATE_FILES,                   0 },
156     { STATE_FILES,      "file",             STATE_FILE,                    0 },
157     { STATE_FILE,       "mm0:timestamp",    STATE_TIMESTAMP,               1 },
158     { STATE_FILE,       "size",             STATE_SIZE,                    1 },
159     { STATE_FILE,       "verification",     STATE_VERIFICATION,            0 },
160     { STATE_FILE,       "mm0:alternates",   STATE_ALTERNATES,              0 },
161     { STATE_FILE,       "resources",        STATE_RESOURCES,               0 },
162     { STATE_VERIFICATION, "hash",           STATE_HASH,                    1 },
163     { STATE_ALTERNATES, "mm0:alternate",    STATE_ALTERNATE,               0 },
164     { STATE_ALTERNATE,  "mm0:timestamp",    STATE_ALTERNATE_TIMESTAMP,     1 },
165     { STATE_ALTERNATE,  "size",             STATE_ALTERNATE_SIZE,          1 },
166     { STATE_ALTERNATE,  "verification",     STATE_ALTERNATE_VERIFICATION,  0 },
167     { STATE_ALTERNATE_VERIFICATION, "hash", STATE_ALTERNATE_HASH,          1 },
168     { STATE_RESOURCES,  "url",              STATE_URL,                     1 },
169     { NUMSTATES,        NULL,               NUMSTATES,                     0 }
170 };
171 
172 static void
lr_metalink_start_handler(void * pdata,const xmlChar * xmlElement,const xmlChar ** xmlAttr)173 lr_metalink_start_handler(void *pdata, const xmlChar *xmlElement, const xmlChar **xmlAttr)
174 {
175     LrParserData *pd = pdata;
176     LrStatesSwitch *sw;
177     const char **attr = (const char **)xmlAttr;
178     const char *element = (const char *)xmlElement;
179 
180     if (pd->err)
181         return; // There was an error -> do nothing
182 
183     if (pd->depth != pd->statedepth) {
184         // We are inside of unknown element
185         pd->depth++;
186         return;
187     }
188     pd->depth++;
189 
190     if (!pd->swtab[pd->state]) {
191         // Current element should not have any sub elements
192         return;
193     }
194 
195     // Find current state by its name
196     for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)
197         if (!g_strcmp0(element, sw->ename))
198             break;
199     if (sw->from != pd->state) {
200         // No state for current element (unknown element)
201         lr_xml_parser_warning(pd, LR_XML_WARNING_UNKNOWNTAG,
202                               "Unknown element \"%s\"", element);
203         return;
204     }
205 
206     // Update parser data
207     pd->state = sw->to;
208     pd->docontent = sw->docontent;
209     pd->statedepth = pd->depth;
210     pd->lcontent = 0;
211     pd->content[0] = '\0';
212 
213     if (pd->ignore && pd->state != STATE_FILE)
214         return; /* Ignore all subelements of the current file element */
215 
216     switch (pd->state) {
217     case STATE_START:
218     case STATE_METALINK:
219     case STATE_FILES:
220         break;
221 
222     case STATE_FILE: {
223         assert(pd->metalink);
224         assert(!pd->metalinkurl);
225         assert(!pd->metalinkhash);
226 
227         const char *name = lr_find_attr("name", attr);
228         if (!name) {
229             g_debug("%s: Missing attribute \"name\" of file element", __func__);
230             g_set_error(&pd->err, LR_METALINK_ERROR, LRE_MLXML,
231                         "Missing attribute \"name\" of file element");
232             break;
233         }
234         if (pd->found || g_strcmp0(name, pd->filename)) {
235             pd->ignore = 1;
236             break;
237         } else {
238             pd->ignore = 0;
239             pd->found = 1;
240         }
241         pd->metalink->filename = g_strdup(name);
242         break;
243     }
244     case STATE_TIMESTAMP:
245     case STATE_SIZE:
246     case STATE_VERIFICATION:
247     case STATE_ALTERNATES:
248         break;
249 
250     case STATE_ALTERNATE:
251         assert(pd->metalink);
252         assert(!pd->metalinkurl);
253         assert(!pd->metalinkhash);
254         assert(!pd->metalinkalternate);
255 
256         LrMetalinkAlternate *ma;
257         ma = lr_new_metalinkalternate(pd->metalink);
258         pd->metalinkalternate = ma;
259         break;
260 
261     case STATE_ALTERNATE_TIMESTAMP:
262     case STATE_ALTERNATE_SIZE:
263     case STATE_ALTERNATE_VERIFICATION:
264         break;
265 
266     case STATE_HASH: {
267         assert(pd->metalink);
268         assert(!pd->metalinkurl);
269         assert(!pd->metalinkhash);
270         assert(!pd->metalinkalternate);
271 
272         LrMetalinkHash *mh;
273         const char *type = lr_find_attr("type", attr);
274         if (!type) {
275             // Type of the hash is not specifed -> skip it
276             lr_xml_parser_warning(pd, LR_XML_WARNING_MISSINGATTR,
277                               "hash element doesn't have attribute \"type\"");
278             break;
279         }
280         mh = lr_new_metalinkhash(pd->metalink);
281         mh->type = g_strdup(type);
282         pd->metalinkhash = mh;
283         break;
284     }
285 
286     case STATE_ALTERNATE_HASH: {
287         assert(pd->metalink);
288         assert(pd->metalinkalternate);
289         assert(!pd->metalinkurl);
290         assert(!pd->metalinkhash);
291 
292         LrMetalinkHash *mh;
293         const char *type = lr_find_attr("type", attr);
294         if (!type) {
295             // Type of the hash is not specifed -> skip it
296             lr_xml_parser_warning(pd, LR_XML_WARNING_MISSINGATTR,
297                               "hash element doesn't have attribute \"type\"");
298             break;
299         }
300         mh = lr_new_metalinkalternate_hash(pd->metalinkalternate);
301         mh->type = g_strdup(type);
302         pd->metalinkhash = mh;
303         break;
304     }
305 
306     case STATE_RESOURCES:
307         break;
308 
309     case STATE_URL: {
310         assert(pd->metalink);
311         assert(!pd->metalinkurl);
312         assert(!pd->metalinkhash);
313 
314         const char *val;
315         assert(!pd->metalinkurl);
316         LrMetalinkUrl *url = lr_new_metalinkurl(pd->metalink);
317         if ((val = lr_find_attr("protocol", attr)))
318             url->protocol = g_strdup(val);
319         if ((val = lr_find_attr("type", attr)))
320             url->type = g_strdup(val);
321         if ((val = lr_find_attr("location", attr)))
322             url->location = g_strdup(val);
323         if ((val = lr_find_attr("preference", attr))) {
324             long long ll_val = lr_xml_parser_strtoll(pd, val, 0);
325             if (ll_val < 0 || ll_val > 100) {
326                 lr_xml_parser_warning(pd, LR_XML_WARNING_BADATTRVAL,
327                 "Bad value (\"%s\") of \"preference\" attribute in url element"
328                 " (should be in range 0-100)", val);
329             } else {
330                 url->preference = ll_val;
331             }
332         }
333         pd->metalinkurl = url;
334         break;
335     }
336 
337     default:
338         break;
339     };
340 
341     return;
342 }
343 
344 static void
lr_metalink_end_handler(void * pdata,G_GNUC_UNUSED const xmlChar * element)345 lr_metalink_end_handler(void *pdata, G_GNUC_UNUSED const xmlChar *element)
346 {
347     LrParserData *pd = pdata;
348     unsigned int state = pd->state;
349 
350     if (pd->err)
351         return; // There was an error -> do nothing
352 
353     if (pd->depth != pd->statedepth) {
354         // Back from the unknown state
355         pd->depth--;
356         return;
357     }
358 
359     pd->depth--;
360     pd->statedepth--;
361     pd->state = pd->sbtab[pd->state];
362     pd->docontent = 0;
363 
364     if (pd->ignore && state != STATE_FILE) {
365         // Ignore all subelements of the current file element
366         return;
367     }
368 
369     switch (state) {
370     case STATE_START:
371     case STATE_METALINK:
372     case STATE_FILES:
373     case STATE_FILE:
374     case STATE_VERIFICATION:
375     case STATE_ALTERNATES:
376     case STATE_ALTERNATE_VERIFICATION:
377         break;
378 
379     case STATE_RESOURCES:
380         break;
381 
382     case STATE_TIMESTAMP:
383         assert(pd->metalink);
384         assert(!pd->metalinkurl);
385         assert(!pd->metalinkhash);
386 
387         pd->metalink->timestamp = lr_xml_parser_strtoll(pd, pd->content, 0);
388         break;
389 
390     case STATE_SIZE:
391         assert(pd->metalink);
392         assert(!pd->metalinkurl);
393         assert(!pd->metalinkhash);
394 
395         pd->metalink->size = lr_xml_parser_strtoll(pd, pd->content, 0);
396         break;
397 
398     case STATE_HASH:
399         assert(pd->metalink);
400         assert(!pd->metalinkurl);
401 
402         if (!pd->metalinkhash) {
403             // If hash has no type
404             break;
405         }
406 
407         pd->metalinkhash->value = g_strdup(pd->content);
408         pd->metalinkhash = NULL;
409         break;
410 
411     case STATE_ALTERNATE:
412         assert(pd->metalink);
413         assert(pd->metalinkalternate);
414         pd->metalinkalternate = NULL;
415         break;
416 
417     case STATE_ALTERNATE_TIMESTAMP:
418         assert(pd->metalink);
419         assert(!pd->metalinkurl);
420         assert(!pd->metalinkhash);
421         assert(pd->metalinkalternate);
422 
423         pd->metalinkalternate->timestamp = lr_xml_parser_strtoll(pd, pd->content, 0);
424         break;
425 
426     case STATE_ALTERNATE_SIZE:
427         assert(pd->metalink);
428         assert(!pd->metalinkurl);
429         assert(!pd->metalinkhash);
430         assert(pd->metalinkalternate);
431 
432         pd->metalinkalternate->size = lr_xml_parser_strtoll(pd, pd->content, 0);
433         break;
434 
435     case STATE_ALTERNATE_HASH:
436         assert(pd->metalink);
437         assert(pd->metalinkalternate);
438         assert(!pd->metalinkurl);
439 
440         if (!pd->metalinkhash) {
441             // If hash has no type
442             break;
443         }
444 
445         pd->metalinkhash->value = g_strdup(pd->content);
446         pd->metalinkhash = NULL;
447         break;
448 
449     case STATE_URL:
450         assert(pd->metalink);
451         assert(pd->metalinkurl);
452         assert(!pd->metalinkhash);
453 
454         pd->metalinkurl->url = g_strdup(pd->content);
455         pd->metalinkurl = NULL;
456         break;
457 
458     default:
459         break;
460     };
461 
462     return;
463 }
464 
465 gboolean
lr_metalink_parse_file(LrMetalink * metalink,int fd,const char * filename,LrXmlParserWarningCb warningcb,void * warningcb_data,GError ** err)466 lr_metalink_parse_file(LrMetalink *metalink,
467                        int fd,
468                        const char *filename,
469                        LrXmlParserWarningCb warningcb,
470                        void *warningcb_data,
471                        GError **err)
472 {
473     gboolean ret = TRUE;
474     LrParserData *pd;
475     XmlParser parser;
476     GError *tmp_err = NULL;
477 
478     assert(metalink);
479     assert(fd >= 0);
480     assert(filename);
481     assert(!err || *err == NULL);
482 
483     // Init
484 
485     memset(&parser, 0, sizeof(parser));
486     parser.startElement = lr_metalink_start_handler;
487     parser.endElement = lr_metalink_end_handler;
488     parser.characters = lr_char_handler;
489 
490     pd = lr_xml_parser_data_new(NUMSTATES);
491     pd->parser = &parser;
492     pd->state = STATE_START;
493     pd->metalink = metalink;
494     pd->filename = (char *) filename;
495     pd->ignore = 1;
496     pd->found = 0;
497     pd->warningcb = warningcb;
498     pd->warningcb_data = warningcb_data;
499     for (LrStatesSwitch *sw = stateswitches; sw->from != NUMSTATES; sw++) {
500         if (!pd->swtab[sw->from])
501             pd->swtab[sw->from] = sw;
502         pd->sbtab[sw->to] = sw->from;
503     }
504 
505     // Parsing
506 
507     ret = lr_xml_parser_generic(parser, pd, fd, &tmp_err);
508     if (tmp_err) {
509         g_propagate_error(err, tmp_err);
510         goto err;
511     }
512 
513     // Clean up
514 
515     if (!pd->found) {
516         g_set_error(err, LR_METALINK_ERROR, LRE_MLBAD,
517                     "file \"%s\" was not found in metalink", filename);
518         ret = FALSE; // The wanted file was not found in metalink
519     }
520 
521 err:
522     lr_xml_parser_data_free(pd);
523 
524     return ret;
525 }
526