1 /***************************************************************************/
2 /* This code is part of WWW grabber called pavuk */
3 /* Copyright (c) 1997 - 2001 Stefan Ondrejicka */
4 /* Distributed under GPL 2 or later */
5 /***************************************************************************/
6
7 #include "config.h"
8
9 #include <assert.h>
10 #include <ctype.h>
11 #include <sys/types.h>
12 #include <sys/stat.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <stdint.h>
16 #include <string.h>
17 #include <unistd.h>
18 #include <limits.h>
19 #include <errno.h>
20
21 #include "gui.h"
22 #include "http.h"
23 #include "ftp.h"
24 #include "gopher.h"
25 #include "url.h"
26 #include "html.h"
27 #include "tools.h"
28 #include "authinfo.h"
29 #include "tr.h"
30 #include "dinfo.h"
31 #include "form.h"
32 #include "gui_api.h"
33 #include "lfname.h"
34
35 static char *url_decode_html(const char *, int);
36
37 /* here can you specify characters, */
38 /* which are unsafe in file names */
39 #ifdef __CYGWIN__
40 #define FS_UNSAFE_CHARACTERS "\\:*?\"<>|"
41 #endif
42
43 /* for hexadecimal encoding */
44 static const char hexa[] = "0123456789ABCDEF";
45 #define HEXASC2HEXNR(x) (((x) >= '0' && (x) <= '9') ? \
46 ((x) - '0') : (tl_ascii_toupper(x) - 'A' + 10))
47
48 #define HEX2CHAR(x) (HEXASC2HEXNR(*(x + 1)) << 4) + HEXASC2HEXNR(*(x + 2))
49
50 const protinfo prottable[] = {
51 {URLT_UNKNOWN, NULL, "unknown", NULL, 0, FALSE},
52 {URLT_HTTP, "http", "http", "http://", 80, TRUE},
53 #ifdef USE_SSL
54 {URLT_HTTPS, "https", "https", "https://", 443, TRUE},
55 #else
56 {URLT_HTTPS, "https", "https", "https://", 443, FALSE},
57 #endif
58 {URLT_FTP, "ftp", "ftp", "ftp://", 21, TRUE},
59 #ifdef USE_SSL
60 {URLT_FTPS, "ftps", "ftps", "ftps://", 21, TRUE},
61 #else
62 {URLT_FTPS, "ftps", "ftps", "ftps://", 21, FALSE},
63 #endif
64 {URLT_FILE, NULL, "file", "file://", 0, TRUE},
65 {URLT_GOPHER, "gopher", "gopher", "gopher://", 70, TRUE},
66 {URLT_FROMPARENT, NULL, "//", "//", 80, TRUE}
67 };
68
69 #define _STRCLS_LOWER "abcdefghijklmnopqrstuvwxyz"
70 #define _STRCLS_UPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
71 #define _STRCLS_DIGIT "0123456789"
72
url_parse_scheme(char * urlstr)73 char *url_parse_scheme(char *urlstr)
74 {
75 char *p;
76 char *retv = NULL;
77
78 if((p = strchr(urlstr, ':')) && tl_ascii_isalpha(*urlstr))
79 {
80 int l1 = strspn(urlstr, _STRCLS_LOWER _STRCLS_UPER _STRCLS_DIGIT "+-.");
81
82 if(l1 == (p - urlstr))
83 {
84 retv = tl_strndup(urlstr, l1);
85 lowerstr(retv);
86 }
87 }
88 else
89 {
90 if(urlstr[0] == '/' && urlstr[1] == '/')
91 retv = strdup("//");
92 }
93
94
95 return retv;
96 }
97
url_parse_authority(char * urlschpart)98 static char *url_parse_authority(char *urlschpart)
99 {
100 char *retv = NULL;
101
102 if(urlschpart[0] == '/' && urlschpart[1] == '/')
103 {
104 int l1 = strcspn(urlschpart + 2, "/?#;");
105
106 retv = tl_strndup(urlschpart + 2, l1);
107 }
108
109 return retv;
110 }
111
url_split_authority(char * authority,char ** user,char ** password,char ** host,unsigned short * port)112 static int url_split_authority(char *authority, char **user, char **password,
113 char **host, unsigned short *port)
114 {
115 char *p, *p2;
116
117 if(user)
118 *user = NULL;
119 if(password)
120 *password = NULL;
121 *host = NULL;
122 *port = 0;
123
124 if(user && (p = strrchr(authority, '@')))
125 {
126 p2 = strchr(authority, ':');
127
128 if(p2 && p2 < p)
129 {
130 *user = tl_strndup(authority, p2 - authority);
131 *password = tl_strndup(p2 + 1, p - p2 - 1);
132 }
133 else
134 {
135 *user = tl_strndup(authority, p - authority);
136 }
137 p++;
138 }
139 else
140 p = authority;
141
142 if((p2 = strrchr(p, ':')))
143 {
144 *host = tl_strndup(p, p2 - p);
145 *port = _atoi(p2 + 1);
146 }
147 else
148 {
149 *host = tl_strdup(p);
150 }
151
152 lowerstr(*host);
153
154 return 0;
155 }
156
url_split_path(char * urlpath,char ** path,char ** query,char ** anchor)157 static int url_split_path(char *urlpath, char **path, char **query,
158 char **anchor)
159 {
160 char *p = NULL, *p1 = NULL, *p2 = NULL;
161
162 *path = NULL;
163 if(query)
164 *query = NULL;
165 if(anchor)
166 *anchor = NULL;
167
168 if(anchor)
169 p1 = strchr(urlpath, '#');
170
171 if(query)
172 p2 = strchr(urlpath, '?');
173
174 if(p1 && p2)
175 {
176 if(p1 > p2)
177 {
178 *anchor = tl_strdup(p1 + 1);
179 *query = url_decode_html(p2 + 1, p1 - (p2 + 1));
180 p = p2;
181 }
182 else
183 {
184 *query = url_decode_html(p2 + 1, strlen(p2 + 1));
185 *anchor = tl_strndup(p1 + 1, p2 - (p1 + 1));
186 p = p1;
187 }
188 }
189 else if(p1)
190 {
191 *anchor = tl_strdup(p1 + 1);
192 p = p1;
193 }
194 else if(p2)
195 {
196 *query = url_decode_html(p2 + 1, strlen(p2 + 1));
197 p = p2;
198 }
199
200 if(p)
201 {
202 if(p - urlpath)
203 {
204 *path = tl_strndup(urlpath, p - urlpath);
205 if(**path == '/')
206 {
207 p = *path;
208 *path = get_abs_file_path(_strtrchr(p, '\\', '/'));
209 free(p);
210 }
211 }
212 }
213 else
214 {
215 if(*urlpath)
216 {
217 *path = tl_strdup(urlpath);
218 if(**path == '/')
219 {
220 p = *path;
221 *path = get_abs_file_path(_strtrchr(p, '\\', '/'));
222 free(p);
223 }
224 }
225 }
226
227 return 0;
228 }
229
url_scheme_to_schemeid(char * scheme)230 protocol url_scheme_to_schemeid(char *scheme)
231 {
232 int i;
233 for(i = 0; i < NUM_ELEM(prottable); i++)
234 {
235 if(prottable[i].urlid && !strcmp(prottable[i].urlid, scheme))
236 {
237 return prottable[i].id;
238 }
239 }
240 return URLT_UNKNOWN;
241 }
242
243 /*
244 * If a path is relative and starts // we need to get the type from
245 * the parent, which only the caller can do. This function is called
246 * by the caller of url_parse when url_parse has returned type = URTL_FROMPARENT
247 * and the parent can figure out the path. It basically does all the work
248 * that url_parse would do once it knew the scheme.
249 * however, we start with the urlstr in url->p.unsup.urlstr rather
250 * than as an argument
251 */
url_finishpath(url * url)252 static void url_finishpath(url * url)
253 {
254 char *authority = NULL;
255 char *p;
256
257 if(url->type == URLT_FROMPARENT)
258 url->type = URLT_UNKNOWN;
259 if(url->type == URLT_UNKNOWN)
260 return; /* can't help here */
261
262 p = url->p.unsup.urlstr;
263 authority = url_parse_authority(p);
264 if(authority)
265 p += strlen(authority) + 2;
266
267 if(authority && *authority)
268 {
269 switch (url->type)
270 {
271 case URLT_FROMPARENT:
272 break;
273 case URLT_HTTP:
274 case URLT_HTTPS:
275 url_split_authority(authority,
276 &(url->p.http.user),
277 &(url->p.http.password), &(url->p.http.host), &(url->p.http.port));
278
279 if(!url->p.http.port)
280 url->p.http.port = prottable[url->type].default_port;
281
282 url_split_path(p,
283 &(url->p.http.document),
284 &(url->p.http.searchstr), &(url->p.http.anchor_name));
285
286 if(!url->p.http.document)
287 url->p.http.document = tl_strdup("/");
288 break;
289 case URLT_FTP:
290 case URLT_FTPS:
291 url_split_authority(authority,
292 &(url->p.ftp.user),
293 &(url->p.ftp.password), &(url->p.ftp.host), &(url->p.ftp.port));
294
295 if(!url->p.ftp.port)
296 url->p.ftp.port = prottable[url->type].default_port;
297
298 url_split_path(p, &url->p.ftp.path, NULL, &url->p.ftp.anchor_name);
299
300
301 if(!url->p.ftp.path)
302 url->p.ftp.path = tl_strdup("/");
303
304 if(p && p[0] == '/' && p[1] == '/')
305 {
306 char *pp = tl_str_concat(NULL, "/", url->p.ftp.path, NULL);
307 _free(url->p.ftp.path);
308 url->p.ftp.path = pp;
309 }
310
311 if((p = strrchr(url->p.ftp.path, ';')) && !strncasecmp(p, ";type=", 6))
312 *p = '\0';
313
314 url->p.ftp.dir = tl_is_dirname(url->p.ftp.path) != 0;
315 break;
316 case URLT_GOPHER:
317 url_split_authority(authority,
318 NULL, NULL, &(url->p.gopher.host), &(url->p.gopher.port));
319
320 if(!url->p.gopher.port)
321 url->p.gopher.port = prottable[url->type].default_port;
322
323 if(*(p + 1))
324 url->p.gopher.selector = tl_strdup(p + 1);
325 else
326 url->p.gopher.selector = tl_strdup("1");
327 break;
328 case URLT_FILE:
329 url_split_path(p,
330 &(url->p.file.filename),
331 &(url->p.file.searchstr), &(url->p.file.anchor_name));
332
333 if(!url->p.file.filename)
334 url->p.file.filename = tl_strdup("");
335 break;
336 default:
337 return;
338 }
339 }
340 if(!authority || !*authority)
341 {
342 switch (url->type)
343 {
344 case URLT_FILE:
345 case URLT_FTP:
346 case URLT_FTPS:
347 case URLT_HTTP:
348 case URLT_HTTPS:
349 url->type = URLT_FILE;
350 url_split_path(p,
351 &(url->p.file.filename),
352 &(url->p.file.searchstr), &(url->p.file.anchor_name));
353
354 if(!url->p.file.filename)
355 url->p.file.filename = tl_strdup("");
356 break;
357 default:
358 url->type = URLT_UNKNOWN;
359 return;
360 break;
361 }
362 }
363 _free(authority);
364 return;
365 }
366
url_parse(char * urlstr)367 url *url_parse(char *urlstr)
368 {
369 char *scheme = NULL;
370 char *authority = NULL;
371 char *p;
372 url ret_url;
373
374 ret_url.type = URLT_UNKNOWN;
375 ret_url.status = 0;
376 ret_url.parent_url = NULL;
377 ret_url.moved_to = NULL;
378 ret_url.ref_cnt = 1;
379 ret_url.level = 0;
380 ret_url.extension = NULL;
381 ret_url.local_name = NULL;
382
383 #ifdef WITH_TREE
384 #ifdef I_FACE
385 ret_url.prop = NULL;
386 ret_url.tree_nfo = NULL;
387 #endif
388 #endif
389
390 #ifdef HAVE_MT
391 pthread_mutex_init(&ret_url.lock, NULL);
392 #endif
393
394 p = urlstr;
395
396 if(p)
397 scheme = url_parse_scheme(urlstr);
398
399 if(scheme)
400 {
401 ret_url.type = url_scheme_to_schemeid(scheme);
402
403 /* If the string starts with // then we */
404 /* don't know the scheme type so we have */
405 /* to wait for the parent to set it. */
406 if(ret_url.type == URLT_FROMPARENT)
407 {
408 ret_url.p.unsup.urlstr = tl_strdup(urlstr);
409 authority = url_parse_authority(urlstr);
410 }
411 else
412 {
413 /* We do know the scheme type, so move past it */
414 /* and get the 'authority' */
415 p += strlen(scheme) + 1;
416 authority = url_parse_authority(p);
417 }
418
419 if(authority)
420 p += strlen(authority) + 2;
421
422 if(authority && *authority)
423 {
424 switch (ret_url.type)
425 {
426 case URLT_FROMPARENT:
427 break;
428 case URLT_HTTP:
429 case URLT_HTTPS:
430 url_split_authority(authority,
431 &ret_url.p.http.user,
432 &ret_url.p.http.password,
433 &ret_url.p.http.host, &ret_url.p.http.port);
434
435 if(!ret_url.p.http.port)
436 ret_url.p.http.port = prottable[ret_url.type].default_port;
437
438 url_split_path(p,
439 &ret_url.p.http.document,
440 &ret_url.p.http.searchstr, &ret_url.p.http.anchor_name);
441
442 if(!ret_url.p.http.document)
443 ret_url.p.http.document = tl_strdup("/");
444 break;
445 case URLT_FTP:
446 case URLT_FTPS:
447 url_split_authority(authority,
448 &ret_url.p.ftp.user,
449 &ret_url.p.ftp.password, &ret_url.p.ftp.host, &ret_url.p.ftp.port);
450
451 if(!ret_url.p.ftp.port)
452 ret_url.p.ftp.port = prottable[ret_url.type].default_port;
453
454 url_split_path(p,
455 &ret_url.p.ftp.path, NULL, &ret_url.p.ftp.anchor_name);
456
457
458 if(!ret_url.p.ftp.path)
459 ret_url.p.ftp.path = tl_strdup("/");
460
461 if(p && p[0] == '/' && p[1] == '/')
462 {
463 char *pp = tl_str_concat(NULL, "/", ret_url.p.ftp.path, NULL);
464 _free(ret_url.p.ftp.path);
465 ret_url.p.ftp.path = pp;
466 }
467
468 if((p = strrchr(ret_url.p.ftp.path, ';')) &&
469 !strncasecmp(p, ";type=", 6))
470 *p = '\0';
471
472 ret_url.p.ftp.dir = tl_is_dirname(ret_url.p.ftp.path) != 0;
473 break;
474 case URLT_GOPHER:
475 url_split_authority(authority,
476 NULL, NULL, &ret_url.p.gopher.host, &ret_url.p.gopher.port);
477
478 if(!ret_url.p.gopher.port)
479 ret_url.p.gopher.port = prottable[ret_url.type].default_port;
480
481 if(*(p + 1))
482 ret_url.p.gopher.selector = tl_strdup(p + 1);
483 else
484 ret_url.p.gopher.selector = tl_strdup("1");
485 break;
486 case URLT_FILE:
487 url_split_path(p,
488 &ret_url.p.file.filename,
489 &ret_url.p.file.searchstr, &ret_url.p.file.anchor_name);
490
491 if(!ret_url.p.file.filename)
492 ret_url.p.file.filename = tl_strdup("");
493 break;
494 default:
495 ret_url.p.unsup.urlstr = tl_strdup(urlstr);
496 break;
497 }
498 }
499 }
500
501 if(!scheme || !authority || !*authority)
502 {
503 if(!scheme)
504 ret_url.type = URLT_FILE;
505
506 switch (ret_url.type)
507 {
508 case URLT_FILE:
509 case URLT_FTP:
510 case URLT_FTPS:
511 case URLT_HTTP:
512 case URLT_HTTPS:
513 case URLT_FROMPARENT:
514 ret_url.type = URLT_FILE;
515 url_split_path(p,
516 &ret_url.p.file.filename,
517 &ret_url.p.file.searchstr, &ret_url.p.file.anchor_name);
518
519 if(!ret_url.p.file.filename)
520 ret_url.p.file.filename = tl_strdup("");
521 break;
522 default:
523 ret_url.type = URLT_UNKNOWN;
524 ret_url.p.unsup.urlstr = tl_strdup(urlstr);
525 break;
526 }
527 }
528
529 _free(authority);
530 _free(scheme);
531 return new_url(&ret_url);
532 }
533
url_dup_url(url * src)534 url *url_dup_url(url * src)
535 {
536 url dst;
537
538 dst.type = src->type;
539 dst.parent_url = NULL;
540 dst.moved_to = NULL;
541 dst.level = src->level;
542 dst.ref_cnt = 1;
543 dst.status = src->status &
544 (URL_INLINE_OBJ | URL_STYLE | URL_ISHTML | URL_NORECURSE |
545 URL_FORM_ACTION | URL_ISSCRIPT | URL_ISSTARTING);
546 dst.extension = NULL;
547 dst.local_name = NULL;
548 #ifdef WITH_TREE
549 #ifdef I_FACE
550 dst.prop = NULL;
551 dst.tree_nfo = NULL;
552 #endif
553 #endif
554 #ifdef HAVE_MT
555 pthread_mutex_init(&dst.lock, NULL);
556 #endif
557 switch (dst.type)
558 {
559 case URLT_FILE:
560 dst.p.file.filename = tl_strdup(src->p.file.filename);
561 dst.p.file.searchstr = tl_strdup(src->p.file.searchstr);
562 dst.p.file.anchor_name = tl_strdup(src->p.file.anchor_name);
563 break;
564 case URLT_FTP:
565 case URLT_FTPS:
566 dst.p.ftp.host = tl_strdup(src->p.ftp.host);
567 dst.p.ftp.user = tl_strdup(src->p.ftp.user);
568 dst.p.ftp.password = tl_strdup(src->p.ftp.password);
569 dst.p.ftp.path = tl_strdup(src->p.ftp.path);
570 dst.p.ftp.anchor_name = tl_strdup(src->p.ftp.anchor_name);
571 dst.p.ftp.port = src->p.ftp.port;
572 dst.p.ftp.dir = src->p.ftp.dir;
573 if(src->extension)
574 dst.extension = ftp_url_ext_dup(src->extension);
575 break;
576 case URLT_HTTP:
577 case URLT_HTTPS:
578 dst.p.http.host = tl_strdup(src->p.http.host);
579 dst.p.http.port = src->p.http.port;
580 dst.p.http.document = tl_strdup(src->p.http.document);
581 dst.p.http.searchstr = tl_strdup(src->p.http.searchstr);
582 dst.p.http.anchor_name = tl_strdup(src->p.http.anchor_name);
583 dst.p.http.user = tl_strdup(src->p.http.user);
584 dst.p.http.password = tl_strdup(src->p.http.password);
585 if(src->extension && (src->status & URL_FORM_ACTION))
586 dst.extension = form_info_dup(src->extension);
587 break;
588 case URLT_GOPHER:
589 dst.p.gopher.host = tl_strdup(src->p.gopher.host);
590 dst.p.gopher.port = src->p.gopher.port;
591 dst.p.gopher.selector = tl_strdup(src->p.gopher.selector);
592 break;
593 case URLT_FROMPARENT: /* This is a 'can't happen'. */
594 assert(0);
595 case URLT_UNKNOWN:
596 dst.p.unsup.urlstr = tl_strdup(src->p.unsup.urlstr);
597 break;
598 }
599
600 return new_url(&dst);
601 }
602
603 /* convert any URL string to absolute path */
url_to_absolute_url(char * base,char * baset,url * parent,char * act)604 char *url_to_absolute_url(char *base, char *baset, url * parent, char *act)
605 {
606 char *psp = NULL;
607 url *purl;
608 char *pom;
609 int pomlen;
610
611 if(act[0] == 0)
612 return 0;
613 if(act[0] == '#')
614 return 0;
615
616 pomlen = strlen(url_to_filename(parent, TRUE)) + strlen(priv_cfg.cache_dir)
617 + strlen(baset) + strlen(act);
618 pom = _malloc(pomlen);
619
620 if((act[0] == '/' && act[1] == '/') && parent->type != URLT_FILE)
621 {
622 /* we should handle it like net_path */
623 snprintf(pom, pomlen, "%s:%s", prottable[parent->type].urlid, act);
624 psp = tl_strdup(pom);
625 purl = url_parse(act);
626 }
627 else
628 {
629 purl = url_parse(act);
630 }
631
632 if(purl->type == URLT_FROMPARENT)
633 {
634 purl->type = parent->type;
635 url_finishpath(purl);
636 }
637 assert(purl->type != URLT_FROMPARENT);
638
639 if(purl->type == URLT_FILE && (parent->type == URLT_FILE))
640 {
641 if(!(*purl->p.file.filename))
642 {
643 strcpy(pom, baset);
644 }
645 else
646 {
647 if(*(purl->p.file.filename) != '/')
648 {
649 strcpy(pom, base);
650 strcat(pom, purl->p.file.filename);
651
652 free(purl->p.file.filename);
653 purl->p.file.filename = tl_strdup(pom);
654 }
655 else
656 snprintf(pom, pomlen, "%s%s",
657 prottable[purl->type].typestr, purl->p.file.filename);
658 }
659 psp = tl_strdup(pom);
660 }
661 else if((purl->type == URLT_FILE) &&
662 (cfg.base_level == 0 || cfg.enable_info) &&
663 (parent->status & URL_REDIRECT || parent->status & URL_ISLOCAL))
664 {
665 char *p1, *p;
666 url *pomurl;
667
668 if(*purl->p.file.filename == '/')
669 strcpy(pom, purl->p.file.filename);
670 else
671 {
672 int l;
673 p = url_to_filename(parent, TRUE);
674 strcpy(pom, p);
675 if(*purl->p.file.filename)
676 {
677 p1 = strrchr(pom, '/');
678 if(p1)
679 *(p1 + 1) = '\0';
680 strcat(pom, purl->p.file.filename);
681 }
682 /* remove any dynamic stuff to get base name */
683 for(l = strlen(pom); l > 0 && pom[l] != '/' && pom[l] != '?'; --l)
684 ;
685 if(pom[l] == '?')
686 pom[l] = '\0';
687 /* now fix for index-name files */
688 l = strlen(pom)-strlen(priv_cfg.index_name);
689 if(l > 0 && !strcmp(pom+l, priv_cfg.index_name) && pom[l-1] == '/')
690 pom[l] = '\0';
691 }
692 if(purl->p.file.searchstr)
693 {
694 strcat(pom, "?");
695 strcat(pom, purl->p.file.searchstr);
696 }
697
698 if(purl->p.file.anchor_name)
699 {
700 strcat(pom, "#");
701 strcat(pom, purl->p.file.anchor_name);
702 }
703
704 p = get_abs_file_path(pom);
705 pomurl = filename_to_url(p);
706 _free(p);
707 if(pomurl)
708 {
709 psp = url_to_urlstr(pomurl, TRUE);
710 free_deep_url(pomurl);
711 _free(pomurl);
712 }
713 }
714 if((!psp && purl->type == URLT_FILE) &&
715 (parent->type == URLT_HTTP ||
716 parent->type == URLT_HTTPS ||
717 parent->type == URLT_FTPS || parent->type == URLT_FTP))
718 {
719 char *ri;
720 if(*(purl->p.file.filename) == '/')
721 {
722 char *idx;
723
724 strcpy(pom, base);
725 idx = strfindnchr(pom, '/', 3);
726 if(idx)
727 strcpy(idx - 1, purl->p.file.filename);
728 else
729 strcat(pom, purl->p.file.filename);
730
731 if(purl->p.file.searchstr)
732 {
733 strcat(pom, "?");
734 strcat(pom, purl->p.file.searchstr);
735 }
736
737 if(purl->p.file.anchor_name)
738 {
739 strcat(pom, "#");
740 strcat(pom, purl->p.file.anchor_name);
741 }
742 }
743 else if(!(*purl->p.file.filename) && !purl->p.file.searchstr)
744 {
745 if(purl->p.file.anchor_name)
746 {
747 /* Problem; we just have "#anchor" and unfortunately
748 baset might be the parent directory, not the
749 actual parent. (Nor is that found in "parent"
750 necessarily).
751 */
752
753 if(*baset && baset[strlen(baset) - 1] != '/')
754 {
755 strcpy(pom, baset);
756 strcat(pom, "#");
757 strcat(pom, purl->p.file.anchor_name);
758 }
759 else /* What to do? Just hope to ignore this altogeher */
760 strcpy(pom, "");
761
762 }
763 }
764 else
765 {
766 strcpy(pom, base);
767 if(!*purl->p.file.filename && purl->p.file.searchstr && parent->type == URLT_HTTP)
768 {
769 ri = strrchr(pom, '/');
770 if(ri)
771 strcpy(ri, parent->p.http.document);
772 else
773 strcat(pom, parent->p.http.document);
774 }
775 else
776 {
777 ri = strrchr(pom, '/');
778 if(ri)
779 strcpy(ri + 1, purl->p.file.filename);
780 else
781 strcat(pom, purl->p.file.filename);
782 }
783
784 if((parent->status & URL_REDIRECT) &&
785 (strlen(purl->p.file.filename) >= strlen(priv_cfg.index_name)) &&
786 !strcmp(priv_cfg.index_name,
787 purl->p.file.filename + strlen(purl->p.file.filename) -
788 strlen(priv_cfg.index_name)))
789 {
790 *(pom + strlen(pom) - strlen(priv_cfg.index_name)) = '\0';
791 }
792
793 if(purl->p.file.searchstr)
794 {
795 strcat(pom, "?");
796 strcat(pom, purl->p.file.searchstr);
797 }
798
799 if(purl->p.file.anchor_name)
800 {
801 strcat(pom, "#");
802 strcat(pom, purl->p.file.anchor_name);
803 }
804 }
805 psp = tl_strdup(pom);
806 }
807 else if(!psp)
808 {
809 psp = tl_strdup(act);
810 }
811
812 free_deep_url(purl);
813 _free(purl);
814
815 if(psp && *psp)
816 {
817 purl = url_parse(psp);
818 if(purl->type == URLT_FROMPARENT)
819 {
820 purl->type = parent->type;
821 url_finishpath(purl);
822 }
823 url_path_abs(purl);
824 if(prottable[purl->type].supported)
825 {
826 free(psp);
827 psp = url_to_urlstr(purl, TRUE);
828 }
829 free_deep_url(purl);
830 _free(purl);
831 }
832
833 _free(pom);
834
835 return psp;
836 }
837
838 /**************************************/
839 /* encode unsafe characters with */
840 /* url-encoded encoding */
841 /**************************************/
url_encode_str_real(char * urlstr,char * unsafe,int safety)842 static char *url_encode_str_real(char *urlstr, char *unsafe, int safety)
843 {
844 char *res, *p, *r;
845
846 if(urlstr == NULL)
847 return NULL;
848
849 if(cfg.noencode)
850 {
851 return strdup(urlstr);
852 }
853
854 res = _malloc(strlen(urlstr) * 3 + 1);
855
856 for(p = urlstr, r = res; *p; p++, r++)
857 {
858 if(safety && *p == '%' && tl_ascii_isxdigit(p[1]) &&
859 tl_ascii_isxdigit(p[2]))
860 {
861 *r = *p;
862 }
863 else if(strchr(unsafe, *p) ||
864 ((unsigned char) *p > 0x7f) || ((unsigned char) *p < 0x20))
865 {
866 *r = '%';
867 r++;
868 *r = hexa[((unsigned char)*p) >> 4];
869 r++;
870 *r = hexa[((unsigned char)*p) % 16];
871 }
872 else
873 {
874 *r = *p;
875 }
876 }
877 *r = '\0';
878
879 return res;
880 }
881
url_encode_str(char * urlstr,char * unsafe)882 char *url_encode_str(char *urlstr, char *unsafe)
883 {
884 return url_encode_str_real(urlstr, unsafe, FALSE);
885 }
886
url_encode_str_safe(char * urlstr,char * unsafe)887 static char *url_encode_str_safe(char *urlstr, char *unsafe)
888 {
889 return url_encode_str_real(urlstr, unsafe, TRUE);
890 }
891
892 /* Convert the HTML entities to direct characters, size is ignored at the
893 moment, res returns the encoded character, the return value is the number
894 of encoded bytes. Currently only & is handled! */
fix_html_entity(const char * str,int size,char * res)895 static int fix_html_entity(const char *str, int size, char *res)
896 {
897 if(size >= 5 && (!strncmp(str, "&", 5) || !strncmp(str, "&", 5)))
898 {
899 *res = '&';
900 return 5;
901 }
902 return 0;
903 }
904
905 /*****************************************/
906 /* dekodovanie zakodovanych znakov z URL */
907 /* FIXME: Translate me */
908 /*****************************************/
url_decode_str(const char * urlstr,int len)909 char *url_decode_str(const char *urlstr, int len)
910 {
911 char *res, *r;
912 int i;
913
914 if(urlstr == NULL)
915 return NULL;
916
917 res = tl_strndup(urlstr, len);
918
919 for(i = 0, r = res; i < len; r++, i++)
920 {
921 if(urlstr[i] == '%' && urlstr[i + 1] && urlstr[i + 2] &&
922 tl_ascii_isxdigit(urlstr[i + 1]) && tl_ascii_isxdigit(urlstr[i + 2]))
923 {
924 *r = HEX2CHAR(urlstr + i);
925 i += 2;
926 }
927 else if(urlstr[i] == '&')
928 {
929 int s;
930 if((s = fix_html_entity(urlstr+i, len-i, r)))
931 i += s-1;
932 else
933 *r = urlstr[i]; /* copy the & */
934 }
935 else
936 {
937 *r = urlstr[i];
938 }
939 }
940 *r = '\0';
941
942 return res;
943 }
944
url_decode_html(const char * urlstr,int len)945 static char *url_decode_html(const char *urlstr, int len)
946 {
947 char *res, *r;
948 int i;
949
950 if(urlstr == NULL)
951 return NULL;
952
953 res = tl_strndup(urlstr, len);
954
955 for(i = 0, r = res; i < len; r++, i++)
956 {
957 if(urlstr[i] == '&')
958 {
959 int s;
960 if((s = fix_html_entity(urlstr+i, len-i, r)))
961 i += s-1;
962 else
963 *r = urlstr[i]; /* copy the & */
964 }
965 else
966 {
967 *r = urlstr[i];
968 }
969 }
970 *r = '\0';
971
972 return res;
973 }
974
975
976 /*************************************/
977 /* uvolnenie pamate po strukture URL */
978 /* FIXME: Translate me! */
979 /*************************************/
free_deep_url(url * urlp)980 void free_deep_url(url * urlp)
981 {
982 if(urlp->local_name)
983 {
984 url_remove_from_file_hash_tab(urlp);
985 _free(urlp->local_name);
986 }
987
988 switch (urlp->type)
989 {
990 case URLT_FILE:
991 _free(urlp->p.file.filename);
992 _free(urlp->p.file.searchstr);
993 _free(urlp->p.file.anchor_name);
994 break;
995 case URLT_HTTP:
996 case URLT_HTTPS:
997 _free(urlp->p.http.host);
998 _free(urlp->p.http.document);
999 _free(urlp->p.http.searchstr);
1000 _free(urlp->p.http.anchor_name);
1001 _free(urlp->p.http.password);
1002 _free(urlp->p.http.user);
1003 if(urlp->status & URL_FORM_ACTION)
1004 {
1005 form_info *fi = (form_info *) urlp->extension;
1006 dllist *ptr;
1007
1008 _free(fi->text);
1009 _free(fi->action);
1010 ptr = fi->infos;
1011 while(ptr)
1012 {
1013 form_field *ff = (form_field *) ptr->data;
1014
1015 _free(ff->value);
1016 _free(ff->name);
1017 _free(ff);
1018
1019 ptr = dllist_remove_entry(ptr, ptr);
1020 }
1021 }
1022 break;
1023 case URLT_FTP:
1024 case URLT_FTPS:
1025 _free(urlp->p.ftp.host);
1026 _free(urlp->p.ftp.user);
1027 _free(urlp->p.ftp.password);
1028 _free(urlp->p.ftp.anchor_name);
1029 _free(urlp->p.ftp.path);
1030 if(urlp->extension)
1031 ftp_url_ext_free(urlp->extension);
1032 break;
1033 case URLT_GOPHER:
1034 _free(urlp->p.gopher.host);
1035 _free(urlp->p.gopher.selector);
1036 case URLT_FROMPARENT:
1037 default:
1038 _free(urlp->p.unsup.urlstr);
1039 break;
1040 }
1041
1042 dllist_free_all(urlp->parent_url);
1043
1044
1045 #ifdef WITH_TREE
1046 #ifdef I_FACE
1047 _free(urlp->tree_nfo);
1048
1049 if(urlp->prop)
1050 {
1051 _free(urlp->prop->type);
1052 free(urlp->prop);
1053 }
1054
1055 #endif
1056 #endif
1057
1058 #ifdef HAVE_MT
1059 pthread_mutex_destroy(&urlp->lock);
1060 #endif
1061 }
1062
cat_links_to_url_list(dllist * l1)1063 void cat_links_to_url_list(dllist * l1)
1064 {
1065 dllist *p = l1;
1066 url *same;
1067 dllist *reg = NULL, *inl = NULL;
1068 int nadd = 0;
1069 cond_info_t condp;
1070
1071 condp.level = 1;
1072 condp.urlnr = 0;
1073 condp.size = 0;
1074 condp.time = 0L;
1075 condp.mimet = NULL;
1076 condp.full_tag = NULL;
1077 condp.params = NULL;
1078 condp.html_doc = NULL;
1079 condp.html_doc_offset = 0;
1080 condp.tag = NULL;
1081 condp.attrib = NULL;
1082
1083 while(p)
1084 {
1085 if(url_append_condition((url *) p->data, &condp))
1086 {
1087 url_clear_anchor((url *) p->data);
1088 if((same = url_was_befor((url *) p->data)))
1089 {
1090 link_url_in_list(same, (url *) p->data);
1091 free_deep_url((url *) p->data);
1092 free((url *)p->data);
1093 }
1094 else
1095 {
1096 url *urlp = (url *) p->data;
1097
1098 nadd++;
1099 LOCK_TCNT;
1100 cfg.total_cnt++;
1101 UNLOCK_TCNT;
1102
1103 urlp->ref_cnt = 1;
1104
1105 #ifdef WITH_TREE
1106 #ifdef I_FACE
1107 if(cfg.xi_face)
1108 {
1109 urlp->tree_nfo = _malloc(sizeof(GUI_TREE_RTYPE));
1110 urlp->tree_nfo[0] = gui_tree_make_entry(urlp);
1111 }
1112 #endif
1113 #endif
1114
1115 url_add_to_url_hash_tab(urlp);
1116
1117 switch (cfg.scheduling_strategie)
1118 {
1119 case SSTRAT_DO_SIRKY:
1120 case SSTRAT_DO_HLBKY:
1121 reg = dllist_append(reg, (dllist_t)p->data);
1122 break;
1123 case SSTRAT_DO_SIRKY_I:
1124 case SSTRAT_DO_HLBKY_I:
1125 if(urlp->status & URL_INLINE_OBJ)
1126 inl = dllist_append(inl, (dllist_t)urlp);
1127 else
1128 reg = dllist_append(reg, (dllist_t)urlp);
1129 break;
1130 default:
1131 break;
1132 }
1133
1134 if(cfg.hack_add_index && !url_get_search_str(urlp))
1135 {
1136 char *pom;
1137 char *ustr = url_to_urlstr(urlp, FALSE);
1138
1139 pom = strrchr(ustr, '/');
1140 if(pom && pom[1])
1141 {
1142 url *nurl;
1143
1144 pom[1] = '\0';
1145 nurl = url_parse(ustr);
1146 assert(nurl->type != URLT_FROMPARENT);
1147 dllist_append(p, (dllist_t) nurl);
1148 }
1149 _free(ustr);
1150 }
1151 }
1152 }
1153 else
1154 {
1155 LOCK_REJCNT;
1156 cfg.reject_cnt++;
1157 UNLOCK_REJCNT;
1158
1159 free_deep_url((url *) p->data);
1160 free((url *)p->data);
1161 }
1162
1163 p = p->next;
1164 }
1165 dllist_free_all(l1);
1166
1167 LOCK_CFG_URLSTACK;
1168 switch (cfg.scheduling_strategie)
1169 {
1170 case SSTRAT_DO_SIRKY:
1171 case SSTRAT_DO_SIRKY_I:
1172 if(reg || inl)
1173 append_url_list_to_list(dllist_concat(inl, reg), NULL);
1174 break;
1175 case SSTRAT_DO_HLBKY:
1176 case SSTRAT_DO_HLBKY_I:
1177 if(reg || inl)
1178 append_url_list_to_list(dllist_concat(inl, reg), cfg.urlstack);
1179 break;
1180 default:
1181 break;
1182 }
1183 UNLOCK_CFG_URLSTACK;
1184 #ifdef HAVE_MT
1185 /* this is here for signaling sleeping downloading processes which */
1186 /* wait for URL to be queued inside downloading queue */
1187 for(; nadd > 0; nadd--)
1188 {
1189 mt_semaphore_up(&cfg.urlstack_sem);
1190 }
1191 #endif
1192 }
1193
append_url_to_list(url * urlp)1194 void append_url_to_list(url * urlp)
1195 {
1196 if(!prottable[urlp->type].supported)
1197 {
1198 xprintf(1, gettext("unsupported URL type \"%s\"\n"),
1199 prottable[urlp->type].urlid ? prottable[urlp->type].urlid :
1200 gettext("unknown"));
1201 return;
1202 }
1203
1204 urlp->ref_cnt = 1;
1205
1206 #ifdef WITH_TREE
1207 #ifdef I_FACE
1208 if(cfg.xi_face)
1209 {
1210 urlp->tree_nfo = _malloc(sizeof(GUI_TREE_RTYPE));
1211 urlp->tree_nfo[0] = gui_tree_make_entry(urlp);
1212 }
1213 #endif
1214 #endif
1215
1216 url_add_to_url_hash_tab(urlp);
1217 cfg.urlstack = dllist_append(cfg.urlstack, (dllist_t) urlp);
1218 cfg.total_cnt++;
1219
1220
1221 #ifdef HAVE_MT
1222 mt_semaphore_up(&cfg.urlstack_sem);
1223 #endif
1224 }
1225
append_url_list_to_list(dllist * list,dllist * after)1226 void append_url_list_to_list(dllist * list, dllist * after)
1227 {
1228 if(after)
1229 cfg.urlstack = dllist_insert_list_after(cfg.urlstack, after, list);
1230 else
1231 cfg.urlstack = dllist_concat(cfg.urlstack, list);
1232 }
1233
1234
link_url_in_list(url * orig,url * copy)1235 void link_url_in_list(url * orig, url * copy)
1236 {
1237 url *cpar;
1238
1239 LOCK_URL(copy);
1240 if(copy->parent_url)
1241 cpar = (url *) copy->parent_url->data;
1242 else
1243 cpar = NULL;
1244 UNLOCK_URL(copy);
1245
1246 if(cpar && (orig != cpar))
1247 {
1248 dllist *ptr;
1249 bool_t found = FALSE;
1250
1251 if(copy->parent_url)
1252 {
1253 LOCK_URL(orig);
1254 for(ptr = orig->parent_url; ptr; ptr = ptr->next)
1255 if((url *)ptr->data == cpar)
1256 found = TRUE;
1257 UNLOCK_URL(orig);
1258 }
1259
1260 if(!found)
1261 {
1262
1263 LOCK_URL(orig);
1264 orig->ref_cnt++;
1265 if(cpar)
1266 orig->parent_url = dllist_append(orig->parent_url, (dllist_t) cpar);
1267
1268 #ifdef WITH_TREE
1269 #ifdef I_FACE
1270 if(cfg.xi_face)
1271 {
1272 orig->tree_nfo =
1273 _realloc(orig->tree_nfo, orig->ref_cnt * sizeof(GUI_TREE_RTYPE));
1274 orig->tree_nfo[orig->ref_cnt - 1] = gui_tree_make_entry(orig);
1275 }
1276 #endif
1277 #endif
1278 UNLOCK_URL(orig);
1279
1280 if(cpar && (orig->status & URL_MOVED) && (orig->status & URL_MOVED))
1281 {
1282 url *purl = orig;
1283 char *fn;
1284
1285 while(purl->moved_to)
1286 purl = purl->moved_to;
1287
1288 if(purl->status & URL_DOWNLOADED)
1289 {
1290 fn = url_to_filename(purl, TRUE);
1291 rewrite_one_parent_links(copy, cpar, fn);
1292 }
1293 }
1294 }
1295 }
1296 }
1297
url_redirect_to(url * src,url * dst,int is_303)1298 int url_redirect_to(url * src, url * dst, int is_303)
1299 {
1300 url *pomurl, *pomurl2;
1301
1302 src->status |= URL_MOVED;
1303
1304 url_clear_anchor(dst);
1305 if((pomurl = url_was_befor(dst)))
1306 {
1307 free_deep_url(dst);
1308 _free(dst);
1309 pomurl2 = pomurl;
1310 while(pomurl2)
1311 {
1312 if(src == pomurl2)
1313 {
1314 src->status &= ~URL_MOVED;
1315 return -1;
1316 }
1317 pomurl2 = pomurl2->moved_to;
1318 }
1319
1320 LOCK_URL(pomurl);
1321
1322 pomurl->parent_url = dllist_append(pomurl->parent_url, (dllist_t) src);
1323 pomurl->ref_cnt++;
1324
1325 src->moved_to = pomurl;
1326 src->status |= URL_MOVED;
1327
1328 #ifdef WITH_TREE
1329 #ifdef I_FACE
1330 if(cfg.xi_face)
1331 {
1332 pomurl->tree_nfo = _realloc(pomurl->tree_nfo,
1333 (pomurl->ref_cnt) * sizeof(GUI_TREE_RTYPE));
1334 pomurl->tree_nfo[pomurl->ref_cnt - 1] = gui_tree_make_entry(pomurl);
1335 }
1336 #endif
1337 #endif
1338
1339 UNLOCK_URL(pomurl);
1340
1341 if((pomurl->status & URL_MOVED) || (pomurl->status & URL_DOWNLOADED))
1342 {
1343 url *purl = pomurl;
1344 char *fn;
1345
1346 xprintf(1, gettext("Moved to already processed URL.\n"));
1347
1348 if(pomurl->status & URL_MOVED)
1349 {
1350 while(purl->moved_to)
1351 purl = purl->moved_to;
1352 fn = url_to_filename(purl, TRUE);
1353 }
1354 else
1355 fn = url_to_filename(pomurl, TRUE);
1356
1357 if(cfg.rewrite_links && (purl->status & URL_DOWNLOADED))
1358 rewrite_parents_links(src, fn);
1359 }
1360 }
1361 else
1362 {
1363 dst->parent_url = dllist_append(dst->parent_url, (dllist_t) src);
1364 src->moved_to = dst;
1365 src->status |= URL_MOVED;
1366
1367 if(!is_303 && !dst->extension && (src->status & URL_FORM_ACTION))
1368 dst->extension = form_info_dup(src->extension);
1369
1370 #ifdef WITH_TREE
1371 #ifdef I_FACE
1372 if(cfg.xi_face)
1373 {
1374 dst->tree_nfo = _malloc(sizeof(GUI_TREE_RTYPE));
1375 dst->tree_nfo[0] = gui_tree_make_entry(dst);
1376 }
1377 #endif
1378 #endif
1379 dst->ref_cnt = 1;
1380 url_add_to_url_hash_tab(dst);
1381 }
1382 return 0;
1383 }
1384
url_add_to_url_hash_tab(url * urlp)1385 void url_add_to_url_hash_tab(url * urlp)
1386 {
1387 url_clear_anchor(urlp);
1388
1389 LOCK_CFG_URLHASH;
1390 dlhash_insert(cfg.url_hash_tbl, (dllist_t) urlp);
1391 UNLOCK_CFG_URLHASH;
1392 }
1393
url_remove_from_url_hash_tab(url * urlp)1394 void url_remove_from_url_hash_tab(url * urlp)
1395 {
1396 if(!prottable[urlp->type].supported)
1397 return;
1398
1399 LOCK_CFG_URLHASH;
1400 dlhash_exclude(cfg.url_hash_tbl, (dllist_t) urlp);
1401 UNLOCK_CFG_URLHASH;
1402 }
1403
url_add_to_file_hash_tab(url * urlp)1404 void url_add_to_file_hash_tab(url * urlp)
1405 {
1406 if(!prottable[urlp->type].supported)
1407 return;
1408
1409 url_to_filename(urlp, TRUE);
1410 }
1411
url_remove_from_file_hash_tab(url * urlp)1412 void url_remove_from_file_hash_tab(url * urlp)
1413 {
1414 if(!prottable[urlp->type].supported)
1415 return;
1416
1417 if(urlp->local_name)
1418 {
1419 LOCK_CFG_FILEHASH;
1420 dlhash_exclude_exact(cfg.fn_hash_tbl, (dllist_t) urlp);
1421 UNLOCK_CFG_FILEHASH;
1422 }
1423 }
1424
1425 /**********************************************/
1426 /* kopirovanie obsahu na nove miesto v pamati */
1427 /* FIXME: Translate me! */
1428 /**********************************************/
new_url(url * urlo)1429 url *new_url(url * urlo)
1430 {
1431 url *res = (url *) _malloc(sizeof(url));
1432
1433 memcpy(res, urlo, sizeof(url));
1434
1435 return res;
1436 }
1437
1438 #define isforbiddenchar(a) ((a) == '\\' || (a) == '/')
encode_forbiddenchars(const char * str)1439 static char *encode_forbiddenchars(const char *str)
1440 {
1441 int size = 1;
1442 const char *s;
1443 char *res, *r;
1444
1445 for(s = str; *s; ++s)
1446 {
1447 if(isforbiddenchar(*s))
1448 size += 2;
1449 }
1450 size += (s-str); /* add string length */
1451 r = res = (char *) _malloc(size);
1452
1453 for(s = str; *s; ++s)
1454 {
1455 if(isforbiddenchar(*s))
1456 {
1457 /* no buffer overflow possible here, sprintf is save */
1458 sprintf(r, "%%%02x", *s);
1459 r += 3;
1460 }
1461 else
1462 {
1463 *(r++) = *s;
1464 }
1465 }
1466 *r = '\0';
1467
1468 return res;
1469 }
1470
url_get_default_local_name_real(url * urlp,int add_index)1471 static char *url_get_default_local_name_real(url * urlp, int add_index)
1472 {
1473 char *pom2 = NULL;
1474 char pbuf[50];
1475 char *p;
1476
1477 snprintf(pbuf, sizeof(pbuf), "_%d", url_get_port(urlp));
1478
1479 switch (urlp->type)
1480 {
1481 case URLT_HTTP:
1482 case URLT_HTTPS:
1483 p = url_decode_str(urlp->p.http.document, strlen(urlp->p.http.document));
1484 pom2 = tl_str_concat(pom2,
1485 prottable[urlp->type].dirname, "/", urlp->p.http.host, pbuf, p, NULL);
1486 _free(p);
1487
1488 if(urlp->p.http.searchstr)
1489 {
1490 /* search strings may have a / or \ inside, which must be encoded */
1491 char *sstr = encode_forbiddenchars(urlp->p.http.searchstr);
1492
1493 pom2 = tl_str_concat(pom2, "?", sstr, NULL);
1494 _free(sstr);
1495 }
1496
1497 if(urlp->status & URL_FORM_ACTION)
1498 {
1499 form_info *fi = (form_info *) urlp->extension;
1500
1501 p = form_encode_urlencoded(((form_info *) urlp->extension)->infos);
1502 if(p)
1503 {
1504 pom2 = tl_str_concat(pom2, (fi->method == FORM_M_POST) ? "#" : "?", p,
1505 NULL);
1506 }
1507 _free(p);
1508 }
1509
1510 if(tl_is_dirname(pom2) && add_index)
1511 pom2 = tl_str_append(pom2, priv_cfg.index_name);
1512 break;
1513
1514 case URLT_FILE:
1515 pom2 =
1516 url_decode_str(urlp->p.file.filename, strlen(urlp->p.file.filename));
1517 if(urlp->p.file.searchstr)
1518 {
1519 p = url_decode_str(urlp->p.file.searchstr,
1520 strlen(urlp->p.file.searchstr));
1521 pom2 = tl_str_concat(pom2, "?", p, NULL);
1522 free(p);
1523 }
1524 break;
1525 case URLT_FTP:
1526 case URLT_FTPS:
1527 pom2 = tl_str_concat(pom2, prottable[urlp->type].dirname, "/",
1528 urlp->p.ftp.host, pbuf, "/", urlp->p.ftp.path,
1529 urlp->p.ftp.dir ? "/" : NULL,
1530 add_index ? priv_cfg.index_name : NULL, NULL);
1531 break;
1532 case URLT_GOPHER:
1533 pom2 = tl_str_concat(pom2, prottable[URLT_GOPHER].dirname, "/",
1534 urlp->p.gopher.host, pbuf, urlp->p.gopher.selector,
1535 (urlp->p.gopher.selector[0] == '1' && add_index)
1536 ? priv_cfg.index_name : NULL, NULL);
1537 break;
1538 case URLT_FROMPARENT:
1539 default:
1540 return NULL;
1541 }
1542 return pom2;
1543 }
1544
url_get_default_local_name(url * urlp)1545 char *url_get_default_local_name(url * urlp)
1546 {
1547 return url_get_default_local_name_real(urlp, TRUE);
1548 }
1549
url_get_local_name_tr(url * urlp,char * local_name,const char * mime_type,int * isdinfo)1550 static char *url_get_local_name_tr(url * urlp, char *local_name,
1551 const char *mime_type, int *isdinfo)
1552 {
1553 dllist *pl = priv_cfg.lfnames;
1554 char *ustr = url_to_urlstr(urlp, FALSE);
1555 char *trs, *lfstr = NULL;
1556 char *pom2 = local_name;
1557 char *rv = NULL;
1558
1559 while(pl)
1560 {
1561 if(lfname_match((lfname *) pl->data, ustr))
1562 {
1563 lfstr = lfname_get_by_url(urlp, ustr, mime_type, (lfname *) pl->data);
1564 pom2 = lfstr;
1565 *isdinfo = TRUE;
1566 break;
1567 }
1568 pl = pl->next;
1569 }
1570 _free(ustr);
1571
1572 trs = tr(pom2);
1573 if(tl_is_dirname(trs))
1574 rv = tl_str_concat(NULL, priv_cfg.cache_dir,
1575 (*trs == '/' ? "" : "/"), trs, priv_cfg.index_name, NULL);
1576 else
1577 rv = tl_str_concat(NULL, priv_cfg.cache_dir,
1578 (*trs == '/' ? "" : "/"), trs, NULL);
1579 _free(trs);
1580 _free(lfstr);
1581
1582 return rv;
1583 }
1584
1585 /**********************************************/
url_get_local_name_real(url * urlp,const char * mime_type,int adj)1586 char *url_get_local_name_real(url * urlp, const char *mime_type, int adj)
1587 {
1588 char *pom = NULL;
1589 char *pom2 = NULL;
1590 char *p1, *p2;
1591 char *p;
1592 int isdinfo = FALSE;
1593 struct stat estat;
1594
1595 if((urlp->status & URL_ISFIRST) &&
1596 priv_cfg.store_name /* && cfg.mode == MODE_SINGLE */ )
1597 {
1598 return get_abs_file_path_oss(priv_cfg.store_name);
1599 }
1600
1601 pom = url_get_default_local_name(urlp);
1602
1603 if(urlp->type != URLT_FILE)
1604 {
1605 pom2 = url_get_local_name_tr(urlp, pom, mime_type, &isdinfo);
1606 _free(pom);
1607 pom = pom2;
1608 }
1609
1610 #ifdef FS_UNSAFE_CHARACTERS
1611 /* This is for automatic handling of windoze */
1612 /* filesystem unsafe characters - \:*?"<>| */
1613 if(urlp->type != URLT_FILE
1614 && strlen(pom) != strcspn(pom, FS_UNSAFE_CHARACTERS))
1615 {
1616 if(strchr(FS_UNSAFE_CHARACTERS, '_'))
1617 p = tr_del_chr(FS_UNSAFE_CHARACTERS, pom);
1618 else
1619 p = tr_chr_chr(FS_UNSAFE_CHARACTERS, "_", pom);
1620 _free(pom);
1621 pom = p;
1622 }
1623 #endif
1624
1625 /* adjusting of filename size if required */
1626 if(urlp->type != URLT_FILE && tl_filename_needs_adjust(pom))
1627 {
1628 p = tl_adjust_filename(pom);
1629 _free(pom);
1630 pom = p;
1631 }
1632
1633 if(!lstat(pom, &estat) && S_ISDIR(estat.st_mode) && adj)
1634 {
1635 pom = tl_str_concat(pom, "/", priv_cfg.index_name, NULL);
1636 }
1637
1638 if((urlp->type != URLT_FILE) && cfg.base_level && !isdinfo)
1639 {
1640 p = get_abs_file_path_oss(pom);
1641 _free(pom);
1642 pom = p;
1643 p1 = pom + strlen(priv_cfg.cache_dir) +
1644 (tl_is_dirname(priv_cfg.cache_dir) == 0);
1645
1646 if(!(p2 = strfindnchr(p1, '/', cfg.base_level)))
1647 {
1648 if((p2 = strrchr(pom, '/')))
1649 p2++;
1650 }
1651
1652 if(p2)
1653 memmove(p1, p2, strlen(p2) + 1);
1654 }
1655
1656 /* this is here for ensure, that we */
1657 /* don't have directory as filename :-) */
1658 if(tl_is_dirname(pom))
1659 pom = tl_str_append(pom, priv_cfg.index_name);
1660
1661 p = get_abs_file_path_oss(pom);
1662 _free(pom);
1663
1664 /* In mode MIRROR we want to use exactly the same filenames as the
1665 remove server. Therefore we have to unquote our filename. */
1666 if(cfg.mode == MODE_MIRROR)
1667 {
1668 /* now we unquote the string */
1669
1670 char *s = p;
1671 char *t = p;
1672 int hex;
1673
1674 while(*s != 0)
1675 {
1676 if(s[0] == '%' && isxdigit(s[1]) && isxdigit(s[2]))
1677 {
1678 sscanf(s + 1, "%2x", &hex);
1679 *t++ = hex;
1680 s += 3;
1681 continue;
1682 }
1683 *t++ = *s++;
1684 }
1685
1686 *t = 0;
1687 }
1688
1689 return p;
1690 }
1691
url_get_local_name(url * urlp,const char * mime_type)1692 static char *url_get_local_name(url * urlp, const char *mime_type)
1693 {
1694 return url_get_local_name_real(urlp, mime_type, TRUE);
1695 }
1696
1697 /******************************************************/
1698 /* k danemu URL vytvori meno suboru v lokalnom strome */
1699 /* FIXME: Translate me! */
1700 /******************************************************/
url_to_filename_real(url * urlp,const char * mime_type,int lockfn)1701 static char *url_to_filename_real(url * urlp, const char *mime_type,
1702 int lockfn)
1703 {
1704 char *p;
1705 bool_t inserted = FALSE;
1706
1707 if(!urlp->local_name && prottable[urlp->type].supported)
1708 {
1709 p = url_get_local_name(urlp, mime_type);
1710 if(cfg.enable_info && urlp->type != URLT_FILE &&
1711 !(urlp->status & URL_REDIRECT))
1712 {
1713 char *di;
1714 LOCK_GETLFNAME;
1715 di = dinfo_get_unique_name(urlp, p, lockfn);
1716 UNLOCK_GETLFNAME;
1717 if(di)
1718 {
1719 _free(p);
1720 p = di;
1721 }
1722 }
1723 else if(!cfg.enable_info && cfg.unique_doc &&
1724 urlp->type != URLT_FILE && !(urlp->status & URL_REDIRECT))
1725 {
1726 /*** such filename have already other URL ***/
1727 /*** we need to compute new unique filename ***/
1728 char *f;
1729 char *pom;
1730 int i;
1731 url *inhash;
1732
1733 LOCK_CFG_FILEHASH;
1734 inhash = (url *) dlhash_find_by_key(cfg.fn_hash_tbl, (dllist_t) p);
1735
1736 if(!inhash && !inserted)
1737 {
1738 urlp->local_name = p;
1739 dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1740 inserted = TRUE;
1741 }
1742
1743 if(inhash && url_compare(inhash, urlp))
1744 inhash = NULL;
1745
1746 UNLOCK_CFG_FILEHASH;
1747
1748 if(inhash)
1749 {
1750 int pomlen = strlen(p) + 9;
1751 LOCK_GETLFNAME;
1752 pom = _malloc(pomlen);
1753
1754 f = strrchr(p, '/');
1755 if(!f)
1756 f = "";
1757 else
1758 {
1759 *f = '\0';
1760 f++;
1761 }
1762
1763 if (cfg.remove_before_store)
1764 {
1765 snprintf(pom, pomlen, "%s/%s", p, f);
1766 }
1767 else
1768 {
1769 i = 0;
1770 do
1771 {
1772 i++;
1773 snprintf(pom, pomlen, "%s/%03d%s", p, i, f);
1774 LOCK_CFG_FILEHASH;
1775 inhash = (url *) dlhash_find_by_key(cfg.fn_hash_tbl,
1776 (dllist_t) pom);
1777 if(!inhash && !inserted)
1778 {
1779 urlp->local_name = pom;
1780 dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1781 inserted = TRUE;
1782 }
1783 UNLOCK_CFG_FILEHASH;
1784 }
1785 while(inhash);
1786 }
1787 UNLOCK_GETLFNAME;
1788
1789 _free(p);
1790 p = pom;
1791 }
1792 }
1793 if(!inserted)
1794 {
1795 LOCK_CFG_FILEHASH;
1796 urlp->local_name = p;
1797 dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1798 inserted = TRUE;
1799 UNLOCK_CFG_FILEHASH;
1800 }
1801 }
1802 return urlp->local_name;
1803 }
1804
url_to_filename(url * urlp,int lockfn)1805 char *url_to_filename(url * urlp, int lockfn)
1806 {
1807 return url_to_filename_real(urlp, NULL, lockfn);
1808 }
1809
url_to_filename_with_type(url * urlp,const char * mime_type,int lockfn)1810 char *url_to_filename_with_type(url * urlp, const char *mime_type, int lockfn)
1811 {
1812 return url_to_filename_real(urlp, mime_type, lockfn);
1813 }
1814
url_set_filename(url * urlp,char * local_name)1815 void url_set_filename(url * urlp, char *local_name)
1816 {
1817 LOCK_CFG_FILEHASH;
1818 urlp->local_name = local_name;
1819 dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1820 UNLOCK_CFG_FILEHASH;
1821
1822 }
1823
1824 /******************************************************/
1825 /* k danemu URL vytvori meno suboru v lokalnom strome */
1826 /* FIXME: Translate me! */
1827 /******************************************************/
url_changed_filename(url * urlp)1828 void url_changed_filename(url * urlp)
1829 {
1830 url_remove_from_file_hash_tab(urlp);
1831 _free(urlp->local_name);
1832 url_add_to_file_hash_tab(urlp);
1833 }
1834
1835 /****************************************************************/
1836 /* k danemu URL vytvori meno docasneho suboru v lokalnom strome */
1837 /* FIXME: Translate me! */
1838 /****************************************************************/
url_to_in_filename(url * urlp)1839 char *url_to_in_filename(url * urlp)
1840 {
1841 char *pom;
1842 char *p;
1843
1844 if(cfg.mode == MODE_NOSTORE || cfg.mode == MODE_FTPDIR || (cfg.dumpfd >= 0))
1845 {
1846 int pomlen = strlen(priv_cfg.cache_dir) + 50;
1847 pom = _malloc(pomlen);
1848
1849 #ifdef HAVE_MT
1850 snprintf(pom, pomlen, "%s/.in_pavuk_nostore_%d_%ju",
1851 priv_cfg.cache_dir, (int) getpid(), (uintmax_t)pthread_self());
1852 #else
1853 snprintf(pom, pomlen, "%s/.in_pavuk_nostore_%d", priv_cfg.cache_dir,
1854 (int) getpid());
1855 #endif
1856 return pom;
1857 }
1858
1859 p = url_to_filename(urlp, TRUE);
1860
1861 pom = _malloc(strlen(p) + 5);
1862 strcpy(pom, p);
1863 p = strrchr(pom, '/');
1864 if(!p)
1865 p = pom;
1866 else
1867 p++;
1868 memmove(p + 4, p, strlen(p) + 1);
1869 strncpy(p, ".in_", 4);
1870
1871 return pom;
1872 }
1873
1874 /************************************************/
1875 /* make from URL structure URL string */
1876 /************************************************/
url_to_urlstr(url * urlp,int wa)1877 char *url_to_urlstr(url * urlp, int wa)
1878 {
1879 char *p;
1880 char portstr[10];
1881 char *retv;
1882
1883 snprintf(portstr, sizeof(portstr), ":%d", url_get_port(urlp));
1884 switch (urlp->type)
1885 {
1886 case URLT_HTTP:
1887 case URLT_HTTPS:
1888 retv = _malloc(strlen(prottable[urlp->type].typestr) +
1889 (urlp->p.http.user ? strlen(urlp->p.http.user) + 1 : 0) +
1890 (urlp->p.http.password ? strlen(urlp->p.http.password) + 1 : 0) +
1891 strlen(urlp->p.http.host) +
1892 (urlp->p.http.port ==
1893 prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +
1894 strlen(urlp->p.http.document) +
1895 (urlp->p.http.searchstr ? strlen(urlp->p.http.searchstr) + 1 : 0) +
1896 (urlp->p.http.anchor_name ? strlen(urlp->p.http.anchor_name) + 1 : 0) +
1897 1);
1898
1899
1900 sprintf(retv, "%s%s%s%s%s%s%s%s%s%s%s%s", prottable[urlp->type].typestr,
1901 urlp->p.http.user ? urlp->p.http.user : "",
1902 urlp->p.http.password ? ":" : "",
1903 urlp->p.http.password ? urlp->p.http.password : "",
1904 (urlp->p.http.password || urlp->p.http.user) ? "@" : "",
1905 urlp->p.http.host,
1906 (urlp->p.http.port ==
1907 prottable[urlp->type].default_port ? "" : portstr),
1908 urlp->p.http.document, urlp->p.http.searchstr ? "?" : "",
1909 urlp->p.http.searchstr ? urlp->p.http.searchstr : "", wa
1910 && urlp->p.http.anchor_name ? "#" : "", wa
1911 && urlp->p.http.anchor_name ? urlp->p.http.anchor_name : "");
1912
1913 if(!urlp->p.http.searchstr &&
1914 (urlp->status & URL_FORM_ACTION) &&
1915 (((form_info *) urlp->extension)->method == FORM_M_GET))
1916 {
1917 char *ss;
1918
1919 ss = form_encode_urlencoded(((form_info *) urlp->extension)->infos);
1920 if(ss)
1921 retv = tl_str_concat(retv, "?", ss, NULL);
1922 _free(ss);
1923 }
1924
1925 return retv;
1926 case URLT_FILE:
1927 p = get_abs_file_path(urlp->p.file.filename);
1928 retv = _malloc(strlen(prottable[URLT_FILE].typestr) +
1929 strlen(p) +
1930 (urlp->p.file.searchstr ? strlen(urlp->p.file.searchstr) + 1 : 0) +
1931 ((wa &&
1932 urlp->p.file.anchor_name) ? strlen(urlp->p.file.anchor_name) +
1933 1 : 0) + 1);
1934
1935 sprintf(retv, "%s%s%s%s%s%s", prottable[URLT_FILE].typestr, p,
1936 urlp->p.file.searchstr ? "?" : "",
1937 urlp->p.file.searchstr ? urlp->p.file.searchstr : "",
1938 urlp->p.file.anchor_name ? "#" : "",
1939 urlp->p.file.anchor_name ? urlp->p.file.anchor_name : "");
1940
1941 free(p);
1942
1943 return retv;
1944 case URLT_FTP:
1945 case URLT_FTPS:
1946 retv = _malloc(strlen(prottable[urlp->type].typestr) +
1947 (urlp->p.ftp.user ? strlen(urlp->p.ftp.user) + 1 : 0) +
1948 (urlp->p.ftp.password ? strlen(urlp->p.ftp.password) + 1 : 0) +
1949 strlen(urlp->p.ftp.host) +
1950 (urlp->p.ftp.port ==
1951 prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +
1952 strlen(urlp->p.ftp.path) +
1953 (urlp->p.ftp.anchor_name ? strlen(urlp->p.ftp.anchor_name) + 1 : 0) +
1954 1);
1955
1956 sprintf(retv, "%s%s%s%s%s%s%s%s%s%s", prottable[urlp->type].typestr,
1957 urlp->p.ftp.user ? urlp->p.ftp.user : "",
1958 urlp->p.ftp.password ? ":" : "",
1959 urlp->p.ftp.password ? urlp->p.ftp.password : "",
1960 (urlp->p.ftp.password || urlp->p.ftp.user) ? "@" : "",
1961 urlp->p.ftp.host,
1962 (urlp->p.ftp.port == prottable[urlp->type].default_port ? "" : portstr),
1963 urlp->p.ftp.path,
1964 wa && urlp->p.ftp.anchor_name ? "#" : "",
1965 wa && urlp->p.ftp.anchor_name ? urlp->p.ftp.anchor_name : "");
1966
1967 return retv;
1968 case URLT_GOPHER:
1969 retv = _malloc(strlen(prottable[URLT_GOPHER].typestr) +
1970 strlen(urlp->p.gopher.host) +
1971 (urlp->p.gopher.port ==
1972 prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +
1973 strlen(urlp->p.gopher.selector) + 2);
1974
1975 sprintf(retv, "%s%s%s/%s", prottable[URLT_GOPHER].typestr,
1976 urlp->p.gopher.host,
1977 (urlp->p.gopher.port ==
1978 prottable[urlp->type].default_port ? "" : portstr),
1979 urlp->p.gopher.selector);
1980
1981 return retv;
1982 case URLT_UNKNOWN:
1983 return tl_strdup(urlp->p.unsup.urlstr);
1984 case URLT_FROMPARENT:
1985 default:
1986 return NULL;
1987 }
1988 }
1989
url_to_request_urlstr(url * urlp,int absolute)1990 char *url_to_request_urlstr(url * urlp, int absolute)
1991 {
1992 char *p, *s, *w, *u;
1993 char portstr[10];
1994 char *retv = NULL;
1995
1996 snprintf(portstr, sizeof(portstr), ":%d", url_get_port(urlp));
1997
1998 switch (urlp->type)
1999 {
2000 case URLT_HTTP:
2001 case URLT_HTTPS:
2002 p = url_encode_str_safe(urlp->p.http.document, URL_PATH_UNSAFE);
2003 s = urlp->p.http.searchstr ?
2004 url_encode_str_safe(urlp->p.http.searchstr, URL_QUERY_UNSAFE) : NULL;
2005
2006 if(absolute)
2007 retv = tl_str_concat(NULL, prottable[urlp->type].typestr,
2008 urlp->p.http.host,
2009 (urlp->p.http.port ==
2010 prottable[urlp->type].default_port ? "" : portstr), NULL);
2011
2012 retv = tl_str_concat(retv, p ? p : "", s ? "?" : "", s ? s : "", NULL);
2013
2014 _free(p);
2015 _free(s);
2016
2017 if(!urlp->p.http.searchstr &&
2018 (urlp->status & URL_FORM_ACTION) &&
2019 (((form_info *) urlp->extension)->method == FORM_M_GET))
2020 {
2021 char *ss;
2022
2023 ss = form_encode_urlencoded(((form_info *) urlp->extension)->infos);
2024 if(ss)
2025 retv = tl_str_concat(retv, "?", ss, NULL);
2026 _free(ss);
2027 }
2028 break;
2029 case URLT_FTP:
2030 case URLT_FTPS:
2031 p = url_encode_str_safe(urlp->p.ftp.path, URL_PATH_UNSAFE);
2032 if(absolute)
2033 {
2034 w = urlp->p.ftp.password ?
2035 url_encode_str_safe(urlp->p.ftp.password, URL_AUTH_UNSAFE) : NULL;
2036 u = urlp->p.ftp.user ?
2037 url_encode_str_safe(urlp->p.ftp.user, URL_AUTH_UNSAFE) : NULL;
2038
2039 retv = tl_str_concat(NULL, prottable[urlp->type].typestr,
2040 u ? u : "", w ? ":" : "", w ? w : "",
2041 (w || u) ? "@" : "", urlp->p.ftp.host,
2042 (urlp->p.ftp.port ==
2043 prottable[urlp->type].default_port ? "" : portstr), NULL);
2044
2045 _free(u);
2046 _free(w);
2047 }
2048
2049 retv = tl_str_concat(retv, p, NULL);
2050 _free(p);
2051 break;
2052 case URLT_GOPHER:
2053 p = url_encode_str_safe(urlp->p.gopher.selector, URL_PATH_UNSAFE);
2054 if(absolute)
2055 retv = tl_str_concat(NULL, prottable[urlp->type].typestr,
2056 urlp->p.gopher.host,
2057 (urlp->p.gopher.port ==
2058 prottable[urlp->type].default_port ? "" : portstr), NULL);
2059
2060 retv = tl_str_concat(retv, "/", urlp->p.gopher.selector, NULL);
2061 _free(p);
2062 break;
2063 default:
2064 break;
2065 }
2066
2067 return retv;
2068 }
2069
2070 /********************************************************/
2071 /* z URL vrati adresu servera pre dokument */
2072 /* FIXME: Translate me! */
2073 /********************************************************/
url_get_site(url * urlr)2074 char *url_get_site(url * urlr)
2075 {
2076 switch (urlr->type)
2077 {
2078 case URLT_HTTP:
2079 case URLT_HTTPS:
2080 return urlr->p.http.host;
2081 case URLT_FTP:
2082 case URLT_FTPS:
2083 return urlr->p.ftp.host;
2084 case URLT_GOPHER:
2085 return urlr->p.gopher.host;
2086 default:
2087 return NULL;
2088 }
2089 }
2090
url_get_port(url * urlr)2091 int url_get_port(url * urlr)
2092 {
2093 switch (urlr->type)
2094 {
2095 case URLT_HTTP:
2096 case URLT_HTTPS:
2097 return (int) urlr->p.http.port;
2098 case URLT_FTP:
2099 case URLT_FTPS:
2100 return (int) urlr->p.ftp.port;
2101 case URLT_GOPHER:
2102 return (int) urlr->p.gopher.port;
2103 default:
2104 return 0;
2105 }
2106 }
2107
url_get_path(url * urlr)2108 char *url_get_path(url * urlr)
2109 {
2110 switch (urlr->type)
2111 {
2112 case URLT_HTTP:
2113 case URLT_HTTPS:
2114 return urlr->p.http.document;
2115 case URLT_FTP:
2116 case URLT_FTPS:
2117 return urlr->p.ftp.path;
2118 case URLT_GOPHER:
2119 return urlr->p.gopher.selector;
2120 case URLT_FILE:
2121 return urlr->p.file.filename;
2122 default:
2123 return NULL;
2124 }
2125 }
2126
url_set_path(url * urlr,char * path)2127 void url_set_path(url * urlr, char *path)
2128 {
2129 switch (urlr->type)
2130 {
2131 case URLT_HTTP:
2132 case URLT_HTTPS:
2133 _free(urlr->p.http.document);
2134 urlr->p.http.document = tl_strdup(path);
2135 break;
2136 case URLT_FTP:
2137 case URLT_FTPS:
2138 _free(urlr->p.ftp.path);
2139 urlr->p.ftp.path = tl_strdup(path);
2140 break;
2141 case URLT_GOPHER:
2142 _free(urlr->p.gopher.selector);
2143 urlr->p.gopher.selector = tl_strdup(path);
2144 break;
2145 case URLT_FILE:
2146 _free(urlr->p.file.filename);
2147 urlr->p.file.filename = tl_strdup(path);
2148 break;
2149 default:
2150 return;
2151 }
2152 url_changed_filename(urlr);
2153 }
2154
url_get_full_path(url * urlr)2155 char *url_get_full_path(url * urlr)
2156 {
2157 char *rv = NULL;
2158
2159 switch (urlr->type)
2160 {
2161 case URLT_HTTP:
2162 case URLT_HTTPS:
2163 if(urlr->p.http.searchstr)
2164 {
2165 rv =
2166 tl_str_concat(NULL, urlr->p.http.document, "?",
2167 urlr->p.http.searchstr, NULL);
2168 }
2169 else
2170 rv = tl_strdup(urlr->p.http.document);
2171 break;
2172 default:
2173 rv = tl_strdup(url_get_path(urlr));
2174 break;
2175 }
2176
2177 return rv;
2178 }
2179
url_get_pass(url * urlr,char * realm)2180 char *url_get_pass(url * urlr, char *realm)
2181 {
2182 char *pass = NULL;
2183 authinfo *ai;
2184
2185 switch (urlr->type)
2186 {
2187 case URLT_HTTP:
2188 case URLT_HTTPS:
2189 pass = urlr->p.http.password;
2190 break;
2191 case URLT_FTP:
2192 case URLT_FTPS:
2193 pass = urlr->p.ftp.password;
2194 break;
2195 default:
2196 return NULL;
2197 }
2198
2199 if(!pass)
2200 {
2201 ai = authinfo_match_entry(urlr->type, url_get_site(urlr),
2202 url_get_port(urlr), url_get_path(urlr), realm);
2203 if(ai)
2204 pass = ai->pass;
2205 }
2206
2207 if(!pass)
2208 {
2209 pass = priv_cfg.passwd_auth;
2210 }
2211
2212 return pass;
2213 }
2214
url_get_user(url * urlr,char * realm)2215 char *url_get_user(url * urlr, char *realm)
2216 {
2217 char *user = NULL;
2218 authinfo *ai;
2219
2220 switch (urlr->type)
2221 {
2222 case URLT_HTTP:
2223 case URLT_HTTPS:
2224 user = urlr->p.http.user;
2225 break;
2226 case URLT_FTP:
2227 case URLT_FTPS:
2228 user = urlr->p.ftp.user;
2229 break;
2230 default:
2231 return NULL;
2232 }
2233
2234 if(!user)
2235 {
2236 ai = authinfo_match_entry(urlr->type, url_get_site(urlr),
2237 url_get_port(urlr), url_get_path(urlr), realm);
2238 if(ai)
2239 user = ai->user;
2240 }
2241
2242 if(!user)
2243 {
2244 user = priv_cfg.name_auth;
2245 }
2246
2247 return user;
2248 }
2249
url_get_auth_scheme(url * urlr,char * realm)2250 int url_get_auth_scheme(url * urlr, char *realm)
2251 {
2252 authinfo *ai;
2253 int scheme = cfg.auth_scheme;
2254
2255 ai = authinfo_match_entry(urlr->type, url_get_site(urlr),
2256 url_get_port(urlr), url_get_path(urlr), realm);
2257 if(ai)
2258 scheme = ai->type;
2259
2260 return scheme;
2261 }
2262
url_get_anchor_name(url * urlp)2263 char *url_get_anchor_name(url * urlp)
2264 {
2265 char *anchor;
2266
2267 switch (urlp->type)
2268 {
2269 case URLT_HTTP:
2270 case URLT_HTTPS:
2271 anchor = urlp->p.http.anchor_name;
2272 break;
2273 case URLT_FTP:
2274 case URLT_FTPS:
2275 anchor = urlp->p.ftp.anchor_name;
2276 break;
2277 case URLT_FILE:
2278 anchor = urlp->p.file.anchor_name;
2279 break;
2280 default:
2281 anchor = NULL;
2282 break;
2283 }
2284
2285 return anchor;
2286 }
2287
url_clear_anchor(url * urlp)2288 void url_clear_anchor(url * urlp)
2289 {
2290 switch (urlp->type)
2291 {
2292 case URLT_HTTP:
2293 case URLT_HTTPS:
2294 _free(urlp->p.http.anchor_name);
2295 break;
2296 case URLT_FTP:
2297 case URLT_FTPS:
2298 _free(urlp->p.ftp.anchor_name);
2299 break;
2300 case URLT_FILE:
2301 _free(urlp->p.file.anchor_name);
2302 break;
2303 default:
2304 break;
2305 }
2306 }
2307
url_get_search_str(url * urlp)2308 char *url_get_search_str(url * urlp)
2309 {
2310 char *sstr;
2311
2312 switch (urlp->type)
2313 {
2314 case URLT_HTTP:
2315 case URLT_HTTPS:
2316 sstr = urlp->p.http.searchstr;
2317 break;
2318 case URLT_FILE:
2319 sstr = urlp->p.file.searchstr;
2320 break;
2321 default:
2322 sstr = NULL;
2323 break;
2324 }
2325
2326 return sstr;
2327 }
2328
url_is_dir_index(url * urlp)2329 int url_is_dir_index(url * urlp)
2330 {
2331 return ((urlp->type == URLT_HTTP || urlp->type == URLT_HTTPS) &&
2332 tl_is_dirname(urlp->p.http.document)) ||
2333 ((urlp->type == URLT_FTP || urlp->type == URLT_FTPS) && urlp->p.ftp.dir);
2334 }
2335
2336 /* Check if URL is on same site. Be careful not to disallow
2337 protocol changes like HTTP to HTTPS. */
url_is_same_site(url * urla,url * urlb)2338 int url_is_same_site(url * urla, url * urlb)
2339 {
2340 return
2341 /* (urla->type == urlb->type) &&
2342 (url_get_port(urla) == url_get_port(urlb)) && */
2343 !strcmp(url_get_site(urla), url_get_site(urlb));
2344 }
2345
2346 /**************************************************/
2347 /* FIXME: Translate me */
2348 /* absolutna cesta k dokumentu z lokalneho stromu */
2349 /* ktory je referencovany relativne */
2350 /**************************************************/
get_redirect_abs_path(url * rurl,char * fstr)2351 char *get_redirect_abs_path(url * rurl, char *fstr)
2352 {
2353 char *pom, *p, *p1;
2354
2355 pom = tl_strdup(url_to_filename(rurl, TRUE));
2356 p = strrchr(pom, '/');
2357
2358 p1 = realloc(pom, strlen(fstr) + (p - pom) + 2);
2359 strcpy(p1 + (p - pom) + 1, fstr);
2360
2361 p = get_abs_file_path_oss(p1);
2362 free(p1);
2363
2364 return p;
2365 }
2366
url_path_abs(url * urlp)2367 void url_path_abs(url * urlp)
2368 {
2369 char *p;
2370
2371 switch (urlp->type)
2372 {
2373 case URLT_HTTP:
2374 case URLT_HTTPS:
2375 p = get_abs_file_path(urlp->p.http.document);
2376 free(urlp->p.http.document);
2377 urlp->p.http.document = p;
2378 break;
2379 case URLT_FTP:
2380 case URLT_FTPS:
2381 p = get_abs_file_path(urlp->p.ftp.path);
2382 if(urlp->p.ftp.path[0] == '/' && urlp->p.ftp.path[1] == '/')
2383 {
2384 char *pp = tl_str_concat(NULL, "/", p, NULL);
2385 _free(p);
2386 p = pp;
2387 }
2388 free(urlp->p.ftp.path);
2389 urlp->p.ftp.path = p;
2390 break;
2391 case URLT_FILE:
2392 p = get_abs_file_path(urlp->p.file.filename);
2393 free(urlp->p.file.filename);
2394 urlp->p.file.filename = p;
2395 break;
2396 default:
2397 break;
2398 }
2399 }
2400
filename_to_url(char * ifn)2401 url *filename_to_url(char *ifn)
2402 {
2403 int cdln = strlen(priv_cfg.cache_dir);
2404 bool_t isok = FALSE;
2405
2406 if(*ifn != '/')
2407 return NULL;
2408
2409 if(cfg.enable_info)
2410 {
2411 url *nurl = dinfo_get_url_for_filename(ifn);
2412
2413 if(nurl)
2414 return nurl;
2415 }
2416
2417 if(!strncmp(ifn, priv_cfg.cache_dir, cdln))
2418 {
2419 char *p;
2420 int i;
2421 url *nurl = _malloc(sizeof(url));
2422 char *fn = tl_strdup(ifn);
2423
2424 p = fn + cdln;
2425 p += (*p == '/');
2426
2427 if(!strcasecmp(tl_get_extension(fn), "css"))
2428 nurl->status = URL_STYLE;
2429 else
2430 nurl->status = 0;
2431
2432 nurl->level = 0;
2433 nurl->parent_url = NULL;
2434 nurl->moved_to = NULL;
2435 nurl->extension = NULL;
2436 nurl->local_name = tl_is_dirname(ifn) ?
2437 tl_str_concat(NULL, ifn, priv_cfg.index_name, NULL) : tl_strdup(ifn);
2438 #ifdef HAVE_MT
2439 pthread_mutex_init(&nurl->lock, NULL);
2440 #endif
2441
2442 #ifdef WITH_TREE
2443 #ifdef I_FACE
2444 nurl->prop = NULL;
2445 nurl->tree_nfo = NULL;
2446 #endif
2447 #endif
2448
2449 if(cfg.base_level && cfg.default_prefix)
2450 {
2451 char *tfn, *pfn;
2452 url *purl = url_parse(priv_cfg.default_prefix);
2453 assert(purl->type != URLT_FROMPARENT);
2454
2455 pfn = url_get_default_local_name_real(purl, FALSE);
2456 tfn = tl_str_concat(NULL, priv_cfg.cache_dir,
2457 tl_is_dirname(priv_cfg.cache_dir) ? "" : "/",
2458 pfn, tl_is_dirname(pfn) ? "" : "/", p, NULL);
2459 _free(pfn);
2460 _free(fn);
2461 fn = tfn;
2462
2463 p = fn + cdln;
2464 p += (*p == '/');
2465
2466 free_deep_url(purl);
2467 }
2468
2469 for(i = 0; i < NUM_ELEM(prottable); i++)
2470 {
2471 if(prottable[i].dirname &&
2472 !strncmp(p, prottable[i].dirname,
2473 strlen(prottable[i].dirname)) &&
2474 p[strlen(prottable[i].dirname)] == '/')
2475 {
2476 isok = TRUE;
2477 break;
2478 }
2479 }
2480
2481 if(isok)
2482 {
2483 char *p2, *p3;
2484
2485 nurl->type = prottable[i].id;
2486 nurl->parent_url = NULL;
2487 p += strlen(prottable[i].dirname) + 1;
2488
2489 if(!p)
2490 {
2491 free(nurl);
2492 free(fn);
2493 return NULL;
2494 }
2495
2496 switch (nurl->type)
2497 {
2498 case URLT_HTTP:
2499 case URLT_HTTPS:
2500 nurl->p.http.password = NULL;
2501 nurl->p.http.user = NULL;
2502 nurl->p.http.anchor_name = NULL;
2503 nurl->p.http.searchstr = NULL;
2504 nurl->p.http.port = prottable[i].default_port;
2505 if((p2 = strchr(p, '/')))
2506 {
2507 int p2_len = strlen(p2);
2508 int idx_len = strlen(priv_cfg.index_name);
2509 char *query = NULL;
2510
2511 if(idx_len <= p2_len &&
2512 !strcmp((p2 + p2_len - idx_len), priv_cfg.index_name) &&
2513 ((p2_len > idx_len && *(p2 + p2_len - idx_len - 1) == '/')
2514 || idx_len == p2_len))
2515 {
2516 *(p2 + p2_len - idx_len) = '\0';
2517 }
2518
2519 /* for POST #query */
2520 p3 = strchr(p2, '#');
2521 if(p3)
2522 {
2523 form_info *fi;
2524
2525 *p3 = '\0';
2526 query = p3 + 1;
2527
2528 fi = _malloc(sizeof(form_info));
2529
2530 fi->method = FORM_M_POST;
2531 fi->encoding = FORM_E_URLENCODED;
2532 fi->action = NULL;
2533 fi->text = NULL;
2534 fi->infos = form_parse_urlencoded_query(query);
2535 fi->parent_url = NULL;
2536
2537 nurl->extension = fi;
2538 nurl->status |= URL_FORM_ACTION;
2539 }
2540
2541 /* for query part of GET request URL */
2542 p3 = strchr(p2, '?');
2543 if(p3)
2544 {
2545 *p3 = '\0';
2546 nurl->p.http.searchstr = tl_strdup(p3 + 1);
2547 }
2548
2549 nurl->p.http.document = tl_strdup(p2);
2550 *p2 = '\0';
2551 p2 = strrchr(p, '_');
2552 if(p2)
2553 {
2554 p2++;
2555 nurl->p.http.port = _atoi(p2);
2556 if(errno == ERANGE)
2557 {
2558 nurl->p.http.host = tl_strdup(p);
2559 nurl->p.http.port = prottable[i].default_port;
2560 }
2561 else
2562 {
2563 nurl->p.http.host = tl_strndup(p, p2 - p - 1);
2564 }
2565 }
2566 else
2567 nurl->p.http.host = tl_strdup(p);
2568 }
2569 else
2570 {
2571 free(nurl);
2572 free(fn);
2573 return NULL;
2574 }
2575 break;
2576 case URLT_GOPHER:
2577 nurl->p.gopher.port = prottable[i].default_port;
2578 if((p2 = strchr(p, '/')))
2579 {
2580 int p2_len = strlen(p2);
2581 int idx_len = strlen(priv_cfg.index_name);
2582
2583 p2++;
2584
2585 if(idx_len <= p2_len &&
2586 !strcmp((p2 + p2_len - idx_len), priv_cfg.index_name) &&
2587 ((p2_len > idx_len && *(p2 + p2_len - idx_len - 1) == '1')
2588 || idx_len == p2_len))
2589 {
2590 *(p2 + p2_len - idx_len) = '\0';
2591 }
2592 nurl->p.gopher.selector = tl_strdup(p2);
2593 *p2 = '\0';
2594 p2 = strrchr(p, '_');
2595 if(p2)
2596 {
2597 p2++;
2598 nurl->p.gopher.port = _atoi(p2);
2599 if(errno == ERANGE)
2600 {
2601 nurl->p.gopher.host = tl_strdup(p);
2602 nurl->p.gopher.port = prottable[i].default_port;
2603 }
2604 else
2605 {
2606 nurl->p.gopher.host = tl_strndup(p, p2 - p - 1);
2607 }
2608 }
2609 else
2610 nurl->p.gopher.host = tl_strdup(p);
2611 }
2612 else
2613 {
2614 free(nurl);
2615 free(fn);
2616 return NULL;
2617 }
2618 break;
2619 case URLT_FTP:
2620 case URLT_FTPS:
2621 nurl->p.ftp.port = prottable[i].default_port;
2622 nurl->p.ftp.password = NULL;
2623 nurl->p.ftp.user = NULL;
2624 nurl->p.ftp.dir = FALSE;
2625 nurl->p.ftp.anchor_name = NULL;
2626 if((p2 = strchr(p, '/')))
2627 {
2628 int p2_len = strlen(p2);
2629 int idx_len = strlen(priv_cfg.index_name);
2630
2631 if(idx_len <= p2_len &&
2632 !strcmp((p2 + p2_len - idx_len), priv_cfg.index_name) &&
2633 ((p2_len > idx_len && *(p2 + p2_len - idx_len - 1) == '/')
2634 || idx_len == p2_len))
2635 {
2636 *(p2 + p2_len - idx_len) = '\0';
2637 nurl->p.ftp.dir = TRUE;
2638 }
2639 nurl->p.ftp.path = tl_strdup(p2);
2640 *p2 = '\0';
2641 p2 = strrchr(p, '_');
2642 if(p2)
2643 {
2644 p2++;
2645 nurl->p.ftp.port = _atoi(p2);
2646 if(errno == ERANGE)
2647 {
2648 nurl->p.ftp.host = tl_strdup(p);
2649 nurl->p.ftp.port = prottable[i].default_port;
2650 }
2651 else
2652 {
2653 nurl->p.ftp.host = tl_strndup(p, p2 - p - 1);
2654 }
2655 }
2656 else
2657 nurl->p.ftp.host = tl_strdup(p);
2658 }
2659 else
2660 {
2661 free(nurl);
2662 free(fn);
2663 return NULL;
2664 }
2665 break;
2666 default:
2667 free(nurl);
2668 nurl = NULL;
2669 break;
2670 }
2671 free(fn);
2672 return nurl;
2673 }
2674 free(nurl);
2675 }
2676 return NULL;
2677 }
2678
2679 /****************************************/
2680 /* zisti ci bol dokument referencovany */
2681 /* v predchadzajucich cykloch */
2682 /* FIXME: Translate me! */
2683 /****************************************/
url_was_befor(url * urlp)2684 url *url_was_befor(url * urlp)
2685 {
2686 url *ret;
2687
2688 if(!prottable[urlp->type].supported)
2689 return NULL;
2690
2691 LOCK_CFG_URLHASH;
2692 ret = (url *) dlhash_find(cfg.url_hash_tbl, (dllist_t) urlp);
2693 UNLOCK_CFG_URLHASH;
2694
2695 return ret;
2696 }
2697
url_forget_filename(url * urlp)2698 void url_forget_filename(url * urlp)
2699 {
2700 if(cfg.enable_info && cfg.post_update)
2701 dinfo_remove(urlp->local_name);
2702 url_remove_from_file_hash_tab(urlp);
2703 _free(urlp->local_name);
2704 }
2705
dllist_url_compare(dllist_t key1,dllist_t key2)2706 int dllist_url_compare(dllist_t key1, dllist_t key2)
2707 {
2708 return url_compare((url *) key1, (url *) key2);
2709 }
2710
url_compare(url * u1,url * u2)2711 int url_compare(url * u1, url * u2)
2712 {
2713 int rv;
2714
2715 if(u1->type != u2->type)
2716 return 0;
2717
2718 switch (u1->type)
2719 {
2720 case URLT_HTTP:
2721 case URLT_HTTPS:
2722 if((rv = strcmp(u1->p.http.document, u2->p.http.document)))
2723 return !rv;
2724
2725 if(u1->p.http.searchstr && u2->p.http.searchstr)
2726 rv = strcmp(u1->p.http.searchstr, u2->p.http.searchstr);
2727 else
2728 rv = u1->p.http.searchstr - u2->p.http.searchstr;
2729
2730 if(rv)
2731 return !rv;
2732
2733 if(u1->p.http.user && u2->p.http.user)
2734 rv = strcmp(u1->p.http.user, u2->p.http.user);
2735 else
2736 rv = u1->p.http.user - u2->p.http.user;
2737
2738 if(rv)
2739 return !rv;
2740
2741 if(u1->p.http.password && u2->p.http.password)
2742 rv = strcmp(u1->p.http.password, u2->p.http.password);
2743 else
2744 rv = u1->p.http.password - u2->p.http.password;
2745
2746 if(rv)
2747 return !rv;
2748
2749 if((rv = strcmp(u1->p.http.host, u2->p.http.host)))
2750 return !rv;
2751
2752 if(u1->p.http.port != u2->p.http.port)
2753 return FALSE;
2754
2755 if((u1->status & URL_FORM_ACTION) != (u2->status & URL_FORM_ACTION))
2756 return FALSE;
2757
2758 if((u1->status & URL_FORM_ACTION) && (u2->status & URL_FORM_ACTION))
2759 {
2760 dllist *ptr;
2761 form_info *fi1 = (form_info *) u1->extension;
2762 form_info *fi2 = (form_info *) u2->extension;
2763
2764 if(fi1->method != fi2->method)
2765 return FALSE;
2766 if(fi1->encoding != fi2->encoding)
2767 return FALSE;
2768
2769 ptr = fi1->infos;
2770 while(ptr)
2771 {
2772 if(!dllist_find2(fi2->infos, ptr->data, form_field_compare))
2773 return FALSE;
2774 ptr = ptr->next;
2775 }
2776 }
2777
2778 return TRUE;
2779 break;
2780 case URLT_FTP:
2781 case URLT_FTPS:
2782 if((rv = strcmp(u1->p.ftp.path, u2->p.ftp.path)))
2783 return !rv;
2784
2785 if(u1->p.ftp.user && u2->p.ftp.user)
2786 rv = strcmp(u1->p.ftp.user, u2->p.ftp.user);
2787 else
2788 rv = u1->p.ftp.user - u2->p.ftp.user;
2789
2790 if(rv)
2791 return !rv;
2792
2793 if(u1->p.ftp.password && u2->p.ftp.password)
2794 rv = strcmp(u1->p.ftp.password, u2->p.ftp.password);
2795 else
2796 rv = u1->p.ftp.password - u2->p.ftp.password;
2797
2798 if(rv)
2799 return !rv;
2800
2801 if((rv = strcmp(u1->p.ftp.host, u2->p.ftp.host)))
2802 return !rv;
2803
2804 return u1->p.ftp.port == u2->p.ftp.port;
2805 break;
2806 case URLT_GOPHER:
2807 if((rv = strcmp(u1->p.gopher.selector, u2->p.gopher.selector)))
2808 return !rv;
2809
2810 if((rv = strcmp(u1->p.gopher.host, u2->p.gopher.host)))
2811 return !rv;
2812
2813 return u1->p.gopher.port == u2->p.gopher.port;
2814 break;
2815 case URLT_FILE:
2816 if((rv = strcmp(u1->p.file.filename, u2->p.file.filename)))
2817 return !rv;
2818
2819 if(u1->p.file.searchstr && u2->p.file.searchstr)
2820 rv = strcmp(u1->p.file.searchstr, u2->p.file.searchstr);
2821 else
2822 rv = u1->p.file.searchstr - u2->p.file.searchstr;
2823
2824 return !rv;
2825 break;
2826 default:
2827 return 0;
2828 }
2829 return 0;
2830 }
2831
url_info_new(char * urlstr)2832 url_info *url_info_new(char *urlstr)
2833 {
2834 url_info *ui;
2835
2836 ui = _malloc(sizeof(url_info));
2837 ui->urlstr = tl_strdup(urlstr);
2838 ui->type = URLI_NORMAL;
2839 ui->fields = NULL;
2840 ui->encoding = FORM_E_UNKNOWN;
2841 ui->method = FORM_M_GET;
2842 ui->localname = NULL;
2843
2844 return ui;
2845 }
2846
url_info_free(url_info * ui)2847 void url_info_free(url_info * ui)
2848 {
2849 dllist *ptr;
2850
2851 if(!ui)
2852 return;
2853
2854 _free(ui->urlstr);
2855
2856 if(ui->type == URLI_FORM)
2857 {
2858 for(ptr = ui->fields; ptr; ptr = dllist_remove_entry(ptr, ptr))
2859 {
2860 form_field *fi = (form_field *) ptr->data;
2861
2862 _free(fi->name);
2863 _free(fi->value);
2864 _free(fi);
2865 }
2866 }
2867 _free(ui->localname);
2868 _free(ui);
2869 }
2870
2871 static const struct
2872 {
2873 enum
2874 {
2875 _RQF_URL,
2876 _RQF_METHOD,
2877 _RQF_ENCODING,
2878 _RQF_FIELD,
2879 _RQF_FILE,
2880 _RQF_LOCALNAME
2881 } type;
2882 char *str;
2883 } _request_fields[] =
2884 {
2885 {_RQF_URL, "URL:"},
2886 {_RQF_METHOD, "METHOD:"},
2887 {_RQF_ENCODING, "ENCODING:"},
2888 {_RQF_FIELD, "FIELD:"},
2889 {_RQF_FILE, "FILE:"},
2890 {_RQF_LOCALNAME, "LNAME:"}
2891 };
2892
url_info_parse(char * str)2893 url_info *url_info_parse(char *str)
2894 {
2895 url_info *ui;
2896 char *p, *tp;
2897 int l = 0;
2898 bool_t err = FALSE;
2899 bool_t found = FALSE;
2900 int i;
2901
2902 ui = url_info_new(NULL);
2903 ui->type = URLI_FORM;
2904
2905 p = str;
2906 while(!err && *p)
2907 {
2908 p += strspn(p, " \t");
2909
2910 found = FALSE;
2911 for(i = 0; i < NUM_ELEM(_request_fields); i++)
2912 {
2913 if(!strncasecmp(p, _request_fields[i].str,
2914 strlen(_request_fields[i].str)))
2915 {
2916 found = TRUE;
2917 p += strlen(_request_fields[i].str);
2918 if(*p == '\"')
2919 {
2920 p++;
2921 l = strcspn(p, "\"");
2922 }
2923 else
2924 l = strcspn(p, " \t");
2925 if(!l)
2926 err = TRUE;
2927
2928 break;
2929 }
2930 }
2931 if(err || !found)
2932 {
2933 err = TRUE;
2934 break;
2935 }
2936 switch (_request_fields[i].type)
2937 {
2938 case _RQF_URL:
2939 {
2940 url *urlp;
2941
2942 ui->urlstr = tl_strndup(p, l);
2943 urlp = url_parse(ui->urlstr);
2944 assert(urlp->type != URLT_FROMPARENT);
2945 _free(ui->urlstr);
2946 ui->urlstr = url_to_urlstr(urlp, FALSE);
2947 free_deep_url(urlp);
2948 _free(urlp);
2949 }
2950 break;
2951 case _RQF_LOCALNAME:
2952 {
2953 char *tmp = tl_strndup(p, l);
2954
2955 ui->localname = get_abs_file_path_oss(tmp);
2956 _free(tmp);
2957 }
2958 break;
2959 case _RQF_METHOD:
2960 if(!strncasecmp(p, "GET", l))
2961 ui->method = FORM_M_GET;
2962 else if(!strncasecmp(p, "POST", l))
2963 ui->method = FORM_M_POST;
2964 else
2965 err = TRUE;
2966 break;
2967 case _RQF_ENCODING:
2968 if(!strncasecmp(p, "m", l))
2969 ui->encoding = FORM_E_MULTIPART;
2970 else if(!strncasecmp(p, "u", l))
2971 ui->encoding = FORM_E_URLENCODED;
2972 else
2973 err = TRUE;
2974 break;
2975 case _RQF_FIELD:
2976 case _RQF_FILE:
2977 {
2978 form_field *fi;
2979
2980 fi = _malloc(sizeof(form_field));
2981
2982 fi->name = NULL;
2983 fi->value = NULL;
2984
2985 fi->type = (_request_fields[i].type == _RQF_FILE) ?
2986 FORM_T_FILE : FORM_T_TEXT;
2987
2988 tp = strchr(p, '=');
2989
2990 if(!tp || (tp - p) > l)
2991 err = TRUE;
2992 else
2993 {
2994 fi->name = form_decode_urlencoded_str(p, tp - p);
2995 fi->value = form_decode_urlencoded_str(tp + 1, l - (tp - p + 1));
2996 if(fi->type == FORM_T_TEXT && strchr(fi->value, '\n'))
2997 fi->type = FORM_T_TEXTAREA;
2998 }
2999 if(err || !fi->name || !fi->value)
3000 {
3001 _free(fi->value);
3002 _free(fi->name);
3003 _free(fi);
3004 }
3005 else
3006 ui->fields = dllist_append(ui->fields, (dllist_t) fi);
3007 }
3008 break;
3009 }
3010 p += l;
3011 p += *p == '\"';
3012 }
3013
3014 if(!err)
3015 {
3016 if(!ui->urlstr)
3017 {
3018 xprintf(1, gettext("Missing specification of URL in request\n"));
3019 err = TRUE;
3020 }
3021
3022 #if 0 /* sometimes we need also empty forms */
3023 if(!ui->fields && ui->method == FORM_M_GET)
3024 ui->type = URLI_NORMAL;
3025 else if(!ui->fields)
3026 {
3027 xprintf(1,
3028 gettext("Missing request fields specification for POST request\n"));
3029 err = TRUE;
3030 }
3031 #endif
3032
3033 if(ui->method == FORM_M_GET && ui->encoding == FORM_E_MULTIPART)
3034 {
3035 xprintf(1,
3036 gettext("Multipart encoding not supported with GET requests\n"));
3037 err = TRUE;
3038 }
3039 }
3040
3041 if(err)
3042 {
3043 url_info_free(ui);
3044 ui = NULL;
3045 }
3046
3047 return ui;
3048 }
3049
url_info_dump(url_info * ui)3050 char *url_info_dump(url_info * ui)
3051 {
3052 char *retv = NULL;
3053
3054 retv = tl_str_concat(retv, "URL:\"", ui->urlstr, "\" ", NULL);
3055
3056 if(ui->localname)
3057 retv = tl_str_concat(retv, "LNAME:\"", ui->localname, "\" ", NULL);
3058
3059 if(ui->type == URLI_FORM)
3060 {
3061 dllist *ptr;
3062
3063 if(ui->method == FORM_M_GET)
3064 retv = tl_str_append(retv, "METHOD:GET ");
3065 else if(ui->method == FORM_M_POST)
3066 retv = tl_str_append(retv, "METHOD:POST ");
3067
3068 if(ui->encoding == FORM_E_URLENCODED)
3069 retv = tl_str_append(retv, "ENCODING:u ");
3070 if(ui->encoding == FORM_E_MULTIPART)
3071 retv = tl_str_append(retv, "ENCODING:m ");
3072
3073 ptr = ui->fields;
3074 while(ptr)
3075 {
3076 char *n, *v;
3077 form_field *ff = (form_field *) ptr->data;
3078
3079 n = form_encode_urlencoded_str(ff->name);
3080 v = form_encode_urlencoded_str(ff->value);
3081
3082 if(ff->type == FORM_T_FILE)
3083 retv = tl_str_concat(retv, "FILE:\"", n, "=", v, "\" ", NULL);
3084 else
3085 retv = tl_str_concat(retv, "FIELD:\"", n, "=", v, "\" ", NULL);
3086
3087 _free(n);
3088 _free(v);
3089 ptr = ptr->next;
3090 }
3091 }
3092
3093 return retv;
3094 }
3095
url_info_duplicate(url_info * ui)3096 url_info *url_info_duplicate(url_info * ui)
3097 {
3098 url_info *cui;
3099 dllist *ptr;
3100
3101 cui = url_info_new(ui->urlstr);
3102 if(ui->localname)
3103 cui->localname = tl_strdup(ui->localname);
3104 cui->method = ui->method;
3105 cui->encoding = ui->encoding;
3106 cui->type = ui->type;
3107
3108 ptr = ui->fields;
3109 while(ptr)
3110 {
3111 form_field *ff = (form_field *) ptr->data;
3112 form_field *cff = (form_field *) _malloc(sizeof(form_field));
3113
3114 cff->type = ff->type;
3115 cff->name = tl_strdup(ff->name);
3116 cff->value = tl_strdup(ff->value);
3117
3118 cui->fields = dllist_append(cui->fields, (dllist_t) cff);
3119
3120 ptr = ptr->next;
3121 }
3122 return cui;
3123 }
3124