1 /* ncdc - NCurses Direct Connect client
2 
3   Copyright (c) 2011-2019 Yoran Heling
4 
5   Permission is hereby granted, free of charge, to any person obtaining
6   a copy of this software and associated documentation files (the
7   "Software"), to deal in the Software without restriction, including
8   without limitation the rights to use, copy, modify, merge, publish,
9   distribute, sublicense, and/or sell copies of the Software, and to
10   permit persons to whom the Software is furnished to do so, subject to
11   the following conditions:
12 
13   The above copyright notice and this permission notice shall be included
14   in all copies or substantial portions of the Software.
15 
16   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 
24 */
25 
26 
27 #include "ncdc.h"
28 #include "fl_load.h"
29 #include <yxml.h>
30 
31 
32 #define STACKSIZE (8*1024)
33 #define READBUFSIZE (32*1024)
34 
35 // Only used for attributes that we care about, and those tend to be short,
36 // file names being the longest possible values. I am unaware of a filesystem
37 // that allows filenames longer than 256 bytes, so this should be a safe value.
38 #define MAXATTRVAL 1024
39 
40 
41 #define S_START    0 // waiting for <FileListing>
42 #define S_FLOPEN   1 // In a <FileListing ..>
43 #define S_DIROPEN  2 // In a <Directory ..>
44 #define S_INDIR    3 // In a <Directory>..</Directory> or <FileListing>..</FileListing>
45 #define S_FILEOPEN 4 // In a <File ..>
46 #define S_INFILE   5 // In a <File>..</File>
47 
48 
49 typedef struct ctx_t {
50   gboolean local;
51   int state;
52   char filetth[24];
53   gboolean filehastth;
54   guint64 filesize;
55   char *name;
56   fl_list_t *root;
57   fl_list_t *cur;
58   int unknown_level;
59 
60   int consume;
61   char *attrp;
62   char attr[MAXATTRVAL];
63 
64   yxml_t x;
65   char stack[STACKSIZE];
66   char buf[READBUFSIZE];
67 } ctx_t;
68 
69 
70 
71 #define isvalidfilename(x) (\
72     !(((x)[0] == '.' && (!(x)[1] || ((x)[1] == '.' && !(x)[2])))) && !strchr((x), '/'))
73 
74 
fl_load_token(ctx_t * x,yxml_ret_t r,GError ** err)75 static void fl_load_token(ctx_t *x, yxml_ret_t r, GError **err) {
76   // Detect the end of the attributes for an open XML element.
77   if(r != YXML_ATTRSTART && r != YXML_ATTRVAL && r != YXML_ATTREND) {
78     if(x->state == S_DIROPEN) {
79       if(!x->name) {
80         g_set_error_literal(err, 1, 0, "Missing Name attribute in Directory element");
81         return;
82       }
83       fl_list_t *new = fl_list_create(x->name, FALSE);
84       new->isfile = FALSE;
85       new->sub = g_ptr_array_new_with_free_func(fl_list_free);
86       fl_list_add(x->cur, new, -1);
87       x->cur = new;
88 
89       g_free(x->name);
90       x->name = NULL;
91       x->state = S_INDIR;
92 
93     } else if(x->state == S_FILEOPEN) {
94       if(!x->name || !x->filehastth || x->filesize == G_MAXUINT64) {
95         g_set_error(err, 1, 0, "Missing %s attribute in File element",
96           !x->name ? "Name" : !x->filehastth ? "TTH" : "Size");
97         return;
98       }
99       // Create the file entry
100       fl_list_t *new = fl_list_create(x->name, x->local);
101       new->isfile = TRUE;
102       new->size = x->filesize;
103       new->hastth = TRUE;
104       memcpy(new->tth, x->filetth, 24);
105       fl_list_add(x->cur, new, -1);
106 
107       x->filehastth = FALSE;
108       x->filesize = G_MAXUINT64;
109       g_free(x->name);
110       x->name = NULL;
111       x->state = S_INFILE;
112 
113     } else if(x->state == S_FLOPEN)
114       x->state = S_INDIR;
115   }
116 
117   switch(r) {
118   case YXML_ELEMSTART:
119     if(x->unknown_level)
120       x->unknown_level++;
121     else if(x->state == S_START) {
122       if(g_ascii_strcasecmp(x->x.elem, "FileListing") == 0)
123         x->state = S_FLOPEN;
124       else
125         g_set_error_literal(err, 1, 0, "XML root element is not <FileListing>");
126     } else {
127       if(g_ascii_strcasecmp(x->x.elem, "File") == 0)
128         x->state = S_FILEOPEN;
129       else if(g_ascii_strcasecmp(x->x.elem, "Directory") == 0) {
130         if(x->state == S_INFILE)
131           g_set_error_literal(err, 1, 0, "Invalid <Directory> inside a <File>");
132         else
133           x->state = S_DIROPEN;
134       } else
135         x->unknown_level++;
136     }
137     break;
138 
139   case YXML_ELEMEND:
140     if(x->unknown_level)
141       x->unknown_level--;
142     else if(x->state == S_INFILE)
143       x->state = S_INDIR;
144     else {
145       fl_list_sort(x->cur);
146       x->cur = x->cur->parent;
147     }
148     break;
149 
150   case YXML_ATTRSTART:
151     x->consume = !x->unknown_level && (
152       (x->state == S_DIROPEN && g_ascii_strcasecmp(x->x.attr, "Name") == 0) ||
153       (x->state == S_FILEOPEN && (
154         g_ascii_strcasecmp(x->x.attr, "Name") == 0 ||
155         g_ascii_strcasecmp(x->x.attr, "Size") == 0 ||
156         g_ascii_strcasecmp(x->x.attr, "TTH") == 0
157       ))
158     );
159     x->attrp = x->attr;
160     break;
161 
162   case YXML_ATTRVAL:
163     if(!x->consume)
164       break;
165     if(x->attrp-x->attr > sizeof(x->attr)-5) {
166       g_set_error_literal(err, 1, 0, "Too long XML attribute");
167       return;
168     }
169     char *v = x->x.data;
170     while(*v)
171       *(x->attrp++) = *(v++);
172     break;
173 
174   case YXML_ATTREND:
175     if(!x->consume)
176       break;
177     *x->attrp = 0;
178     // Name, for either file or directory
179     if((*x->x.attr|32) == 'n' && !x->name) {
180       x->name = g_utf8_validate(x->attr, -1, NULL) ? g_strdup(x->attr) : str_convert("UTF-8", "UTF-8", x->attr);
181       if(!isvalidfilename(x->name))
182         g_set_error_literal(err, 1, 0, "Invalid file name");
183     }
184     // TTH, for files
185     if((*x->x.attr|32) == 't' && !x->filehastth) {
186       if(!istth(x->attr))
187         g_set_error_literal(err, 1, 0, "Invalid TTH");
188       else {
189         base32_decode(x->attr, x->filetth);
190         x->filehastth = TRUE;
191       }
192     }
193     // Size, for files
194     if((*x->x.attr|32) == 's' && x->filesize == G_MAXUINT64) {
195       char *end = NULL;
196       x->filesize = g_ascii_strtoull(x->attr, &end, 10);
197       if(!end || *end)
198         g_set_error_literal(err, 1, 0, "Invalid file size");
199     }
200     break;
201 
202   default:
203     break;
204   }
205 }
206 
207 
fl_load_readbz(bz_stream * bzs,int fd,char * bzbuf,GError ** err)208 static int fl_load_readbz(bz_stream *bzs, int fd, char *bzbuf, GError **err) {
209   int buflen;
210 
211   bzs->next_in = bzbuf;
212   if(bzs->avail_in == 0) {
213     buflen = read(fd, bzs->next_in + bzs->avail_in, READBUFSIZE - bzs->avail_in);
214     if(buflen == 0)
215       return -1;
216     if(buflen < 0) {
217       g_set_error(err, 1, 0, "Read error: %s", g_strerror(errno));
218       return -1;
219     }
220     bzs->avail_in += buflen;
221   }
222 
223   int bzerr = BZ2_bzDecompress(bzs);
224   if(bzerr == BZ_STREAM_END) {
225     BZ2_bzDecompressEnd(bzs);
226     BZ2_bzDecompressInit(bzs, 0, 0);
227   } else if(bzerr != BZ_OK) {
228     g_set_error(err, 1, 0, "bzip2 decompression error (%d): %s", bzerr, g_strerror(errno));
229     return -1;
230   }
231 
232   memmove(bzbuf, bzs->next_in, bzs->avail_in);
233   bzs->next_in = bzbuf;
234   return READBUFSIZE-bzs->avail_out;
235 }
236 
237 
fl_load_parse(int fd,bz_stream * bzs,gboolean local,GError ** err)238 static fl_list_t *fl_load_parse(int fd, bz_stream *bzs, gboolean local, GError **err) {
239   ctx_t *x = g_new(ctx_t, 1);
240   x->state = S_START;
241   x->root = fl_list_create("", FALSE);
242   x->root->sub = g_ptr_array_new_with_free_func(fl_list_free);
243   x->cur = x->root;
244   x->filesize = G_MAXUINT64;
245   x->local = local;
246   x->unknown_level = 0;
247   x->filehastth = FALSE;
248   x->name = NULL;
249 
250   yxml_init(&x->x, x->stack, STACKSIZE);
251   int buflen = 0;
252   char *bzbuf = NULL;
253 
254   while(1) {
255     // Fill buffer
256     if(bzs) {
257       if(!bzbuf)
258         bzbuf = g_malloc(READBUFSIZE);
259       bzs->next_out = x->buf;
260       bzs->avail_out = READBUFSIZE;
261       buflen = fl_load_readbz(bzs, fd, bzbuf, err);
262       if(buflen < 0)
263         break;
264     } else {
265       buflen = read(fd, x->buf, READBUFSIZE);
266       if(buflen == 0)
267         break;
268       if(buflen < 0) {
269         g_set_error(err, 1, 0, "Read error: %s", g_strerror(errno));
270         break;
271       }
272     }
273 
274     // And parse
275     char *pbuf = x->buf;
276     while(!*err && buflen > 0) {
277       yxml_ret_t r = yxml_parse(&x->x, *pbuf);
278       pbuf++;
279       buflen--;
280       if(r == YXML_OK)
281         continue;
282       if(r < 0) {
283         g_set_error_literal(err, 1, 0, "XML parsing error");
284         break;
285       }
286       fl_load_token(x, r, err);
287     }
288     if(*err) {
289       g_prefix_error(err, "Line %"G_GUINT32_FORMAT":%"G_GUINT64_FORMAT": ", x->x.line, x->x.byte);
290       break;
291     }
292   }
293 
294   if(!*err && yxml_eof(&x->x) < 0)
295     g_set_error_literal(err, 1, 0, "XML document did not end correctly");
296 
297   fl_list_t *root = x->root;
298   g_free(bzbuf);
299   g_free(x->name);
300   g_free(x);
301   return root;
302 }
303 
304 
fl_load(const char * file,GError ** err,gboolean local)305 fl_list_t *fl_load(const char *file, GError **err, gboolean local) {
306   g_return_val_if_fail(err == NULL || *err == NULL, NULL);
307 
308   fl_list_t *root = NULL;
309   int fd;
310   bz_stream *bzs = NULL;
311   GError *ierr = NULL;
312 
313   // open file
314   fd = open(file, O_RDONLY);
315   if(fd < 0) {
316     g_set_error_literal(&ierr, 1, 0, g_strerror(errno));
317     goto end;
318   }
319 
320   // Create BZ2 stream object if this is a bzip2 file
321   if(strlen(file) > 4 && strcmp(file+(strlen(file)-4), ".bz2") == 0) {
322     bzs = g_new0(bz_stream, 1);
323     BZ2_bzDecompressInit(bzs, 0, 0);
324   }
325 
326   root = fl_load_parse(fd, bzs, local, &ierr);
327 
328 end:
329   if(bzs) {
330     BZ2_bzDecompressEnd(bzs);
331     g_free(bzs);
332   }
333   if(fd >= 0)
334     close(fd);
335   if(ierr) {
336     g_propagate_error(err, ierr);
337     if(root)
338       fl_list_free(root);
339     root = NULL;
340   }
341   return root;
342 }
343 
344 
345 
346 
347 
348 // Async version of fl_load(). Performs the load in a background thread. Only
349 // used for non-local filelists.
350 
351 typedef struct async_t {
352   char *file;
353   void (*cb)(fl_list_t *, GError *, void *);
354   void *dat;
355   GError *err;
356   fl_list_t *fl;
357 } async_t;
358 
359 
async_d(gpointer dat)360 static gboolean async_d(gpointer dat) {
361   async_t *arg = dat;
362   arg->cb(arg->fl, arg->err, arg->dat);
363   g_free(arg->file);
364   g_slice_free(async_t, arg);
365   return FALSE;
366 }
367 
368 
async_f(gpointer dat,gpointer udat)369 static void async_f(gpointer dat, gpointer udat) {
370   async_t *arg = dat;
371   arg->fl = fl_load(arg->file, &arg->err, FALSE);
372   g_idle_add(async_d, arg);
373 }
374 
375 
376 // Ownership of both the file list and the error is passed to the callback
377 // function.
fl_load_async(const char * file,void (* cb)(fl_list_t *,GError *,void *),void * dat)378 void fl_load_async(const char *file, void (*cb)(fl_list_t *, GError *, void *), void *dat) {
379   static GThreadPool *pool = NULL;
380   if(!pool)
381     pool = g_thread_pool_new(async_f, NULL, 2, FALSE, NULL);
382   async_t *arg = g_slice_new0(async_t);
383   arg->file = g_strdup(file);
384   arg->dat = dat;
385   arg->cb = cb;
386   g_thread_pool_push(pool, arg, NULL);
387 }
388 
389