1 /***************************************************************************/
2 /*    This code is part of WWW grabber called pavuk                        */
3 /*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          */
4 /*    Distributed under GPL 2 or later                                     */
5 /***************************************************************************/
6 
7 #include "config.h"
8 
9 #include <assert.h>
10 #include <ctype.h>
11 #include <sys/types.h>
12 #include <sys/stat.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <stdint.h>
16 #include <string.h>
17 #include <unistd.h>
18 #include <limits.h>
19 #include <errno.h>
20 
21 #include "gui.h"
22 #include "http.h"
23 #include "ftp.h"
24 #include "gopher.h"
25 #include "url.h"
26 #include "html.h"
27 #include "tools.h"
28 #include "authinfo.h"
29 #include "tr.h"
30 #include "dinfo.h"
31 #include "form.h"
32 #include "gui_api.h"
33 #include "lfname.h"
34 
35 static char *url_decode_html(const char *, int);
36 
37 /* here can you specify characters, */
38 /* which are unsafe in file names */
39 #ifdef __CYGWIN__
40 #define FS_UNSAFE_CHARACTERS "\\:*?\"<>|"
41 #endif
42 
43 /* for hexadecimal encoding */
44 static const char hexa[] = "0123456789ABCDEF";
45 #define HEXASC2HEXNR(x) (((x) >= '0' && (x) <= '9') ? \
46   ((x) - '0') : (tl_ascii_toupper(x) - 'A' + 10))
47 
48 #define HEX2CHAR(x) (HEXASC2HEXNR(*(x + 1)) << 4) + HEXASC2HEXNR(*(x + 2))
49 
50 const protinfo prottable[] = {
51   {URLT_UNKNOWN, NULL, "unknown", NULL, 0, FALSE},
52   {URLT_HTTP, "http", "http", "http://", 80, TRUE},
53 #ifdef USE_SSL
54   {URLT_HTTPS, "https", "https", "https://", 443, TRUE},
55 #else
56   {URLT_HTTPS, "https", "https", "https://", 443, FALSE},
57 #endif
58   {URLT_FTP, "ftp", "ftp", "ftp://", 21, TRUE},
59 #ifdef USE_SSL
60   {URLT_FTPS, "ftps", "ftps", "ftps://", 21, TRUE},
61 #else
62   {URLT_FTPS, "ftps", "ftps", "ftps://", 21, FALSE},
63 #endif
64   {URLT_FILE, NULL, "file", "file://", 0, TRUE},
65   {URLT_GOPHER, "gopher", "gopher", "gopher://", 70, TRUE},
66   {URLT_FROMPARENT, NULL, "//", "//", 80, TRUE}
67 };
68 
69 #define _STRCLS_LOWER "abcdefghijklmnopqrstuvwxyz"
70 #define _STRCLS_UPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
71 #define _STRCLS_DIGIT "0123456789"
72 
url_parse_scheme(char * urlstr)73 char *url_parse_scheme(char *urlstr)
74 {
75   char *p;
76   char *retv = NULL;
77 
78   if((p = strchr(urlstr, ':')) && tl_ascii_isalpha(*urlstr))
79   {
80     int l1 = strspn(urlstr, _STRCLS_LOWER _STRCLS_UPER _STRCLS_DIGIT "+-.");
81 
82     if(l1 == (p - urlstr))
83     {
84       retv = tl_strndup(urlstr, l1);
85       lowerstr(retv);
86     }
87   }
88   else
89   {
90     if(urlstr[0] == '/' && urlstr[1] == '/')
91       retv = strdup("//");
92   }
93 
94 
95   return retv;
96 }
97 
url_parse_authority(char * urlschpart)98 static char *url_parse_authority(char *urlschpart)
99 {
100   char *retv = NULL;
101 
102   if(urlschpart[0] == '/' && urlschpart[1] == '/')
103   {
104     int l1 = strcspn(urlschpart + 2, "/?#;");
105 
106     retv = tl_strndup(urlschpart + 2, l1);
107   }
108 
109   return retv;
110 }
111 
url_split_authority(char * authority,char ** user,char ** password,char ** host,unsigned short * port)112 static int url_split_authority(char *authority, char **user, char **password,
113   char **host, unsigned short *port)
114 {
115   char *p, *p2;
116 
117   if(user)
118     *user = NULL;
119   if(password)
120     *password = NULL;
121   *host = NULL;
122   *port = 0;
123 
124   if(user && (p = strrchr(authority, '@')))
125   {
126     p2 = strchr(authority, ':');
127 
128     if(p2 && p2 < p)
129     {
130       *user = tl_strndup(authority, p2 - authority);
131       *password = tl_strndup(p2 + 1, p - p2 - 1);
132     }
133     else
134     {
135       *user = tl_strndup(authority, p - authority);
136     }
137     p++;
138   }
139   else
140     p = authority;
141 
142   if((p2 = strrchr(p, ':')))
143   {
144     *host = tl_strndup(p, p2 - p);
145     *port = _atoi(p2 + 1);
146   }
147   else
148   {
149     *host = tl_strdup(p);
150   }
151 
152   lowerstr(*host);
153 
154   return 0;
155 }
156 
url_split_path(char * urlpath,char ** path,char ** query,char ** anchor)157 static int url_split_path(char *urlpath, char **path, char **query,
158   char **anchor)
159 {
160   char *p = NULL, *p1 = NULL, *p2 = NULL;
161 
162   *path = NULL;
163   if(query)
164     *query = NULL;
165   if(anchor)
166     *anchor = NULL;
167 
168   if(anchor)
169     p1 = strchr(urlpath, '#');
170 
171   if(query)
172     p2 = strchr(urlpath, '?');
173 
174   if(p1 && p2)
175   {
176     if(p1 > p2)
177     {
178       *anchor = tl_strdup(p1 + 1);
179       *query = url_decode_html(p2 + 1, p1 - (p2 + 1));
180       p = p2;
181     }
182     else
183     {
184       *query = url_decode_html(p2 + 1, strlen(p2 + 1));
185       *anchor = tl_strndup(p1 + 1, p2 - (p1 + 1));
186       p = p1;
187     }
188   }
189   else if(p1)
190   {
191     *anchor = tl_strdup(p1 + 1);
192     p = p1;
193   }
194   else if(p2)
195   {
196     *query = url_decode_html(p2 + 1, strlen(p2 + 1));
197     p = p2;
198   }
199 
200   if(p)
201   {
202     if(p - urlpath)
203     {
204       *path = tl_strndup(urlpath, p - urlpath);
205       if(**path == '/')
206       {
207         p = *path;
208         *path = get_abs_file_path(_strtrchr(p, '\\', '/'));
209         free(p);
210       }
211     }
212   }
213   else
214   {
215     if(*urlpath)
216     {
217       *path = tl_strdup(urlpath);
218       if(**path == '/')
219       {
220         p = *path;
221         *path = get_abs_file_path(_strtrchr(p, '\\', '/'));
222         free(p);
223       }
224     }
225   }
226 
227   return 0;
228 }
229 
url_scheme_to_schemeid(char * scheme)230 protocol url_scheme_to_schemeid(char *scheme)
231 {
232   int i;
233   for(i = 0; i < NUM_ELEM(prottable); i++)
234   {
235     if(prottable[i].urlid && !strcmp(prottable[i].urlid, scheme))
236     {
237       return prottable[i].id;
238     }
239   }
240   return URLT_UNKNOWN;
241 }
242 
243 /*
244  * If a path is relative and starts // we need to get the type from
245  * the parent, which only the caller can do. This function is called
246  * by the caller of url_parse when url_parse has returned  type = URTL_FROMPARENT
247  * and the parent can figure out the path. It basically does all the work
248  * that url_parse would do once it knew the scheme.
249  * however, we start with the urlstr in url->p.unsup.urlstr rather
250  * than as an argument
251  */
url_finishpath(url * url)252 static void url_finishpath(url * url)
253 {
254   char *authority = NULL;
255   char *p;
256 
257   if(url->type == URLT_FROMPARENT)
258     url->type = URLT_UNKNOWN;
259   if(url->type == URLT_UNKNOWN)
260     return;                     /* can't help here */
261 
262   p = url->p.unsup.urlstr;
263   authority = url_parse_authority(p);
264   if(authority)
265     p += strlen(authority) + 2;
266 
267   if(authority && *authority)
268   {
269     switch (url->type)
270     {
271     case URLT_FROMPARENT:
272       break;
273     case URLT_HTTP:
274     case URLT_HTTPS:
275       url_split_authority(authority,
276         &(url->p.http.user),
277         &(url->p.http.password), &(url->p.http.host), &(url->p.http.port));
278 
279       if(!url->p.http.port)
280         url->p.http.port = prottable[url->type].default_port;
281 
282       url_split_path(p,
283         &(url->p.http.document),
284         &(url->p.http.searchstr), &(url->p.http.anchor_name));
285 
286       if(!url->p.http.document)
287         url->p.http.document = tl_strdup("/");
288       break;
289     case URLT_FTP:
290     case URLT_FTPS:
291       url_split_authority(authority,
292         &(url->p.ftp.user),
293         &(url->p.ftp.password), &(url->p.ftp.host), &(url->p.ftp.port));
294 
295       if(!url->p.ftp.port)
296         url->p.ftp.port = prottable[url->type].default_port;
297 
298       url_split_path(p, &url->p.ftp.path, NULL, &url->p.ftp.anchor_name);
299 
300 
301       if(!url->p.ftp.path)
302         url->p.ftp.path = tl_strdup("/");
303 
304       if(p && p[0] == '/' && p[1] == '/')
305       {
306         char *pp = tl_str_concat(NULL, "/", url->p.ftp.path, NULL);
307         _free(url->p.ftp.path);
308         url->p.ftp.path = pp;
309       }
310 
311       if((p = strrchr(url->p.ftp.path, ';')) && !strncasecmp(p, ";type=", 6))
312         *p = '\0';
313 
314       url->p.ftp.dir = tl_is_dirname(url->p.ftp.path) != 0;
315       break;
316     case URLT_GOPHER:
317       url_split_authority(authority,
318         NULL, NULL, &(url->p.gopher.host), &(url->p.gopher.port));
319 
320       if(!url->p.gopher.port)
321         url->p.gopher.port = prottable[url->type].default_port;
322 
323       if(*(p + 1))
324         url->p.gopher.selector = tl_strdup(p + 1);
325       else
326         url->p.gopher.selector = tl_strdup("1");
327       break;
328     case URLT_FILE:
329       url_split_path(p,
330         &(url->p.file.filename),
331         &(url->p.file.searchstr), &(url->p.file.anchor_name));
332 
333       if(!url->p.file.filename)
334         url->p.file.filename = tl_strdup("");
335       break;
336     default:
337       return;
338     }
339   }
340   if(!authority || !*authority)
341   {
342     switch (url->type)
343     {
344     case URLT_FILE:
345     case URLT_FTP:
346     case URLT_FTPS:
347     case URLT_HTTP:
348     case URLT_HTTPS:
349       url->type = URLT_FILE;
350       url_split_path(p,
351         &(url->p.file.filename),
352         &(url->p.file.searchstr), &(url->p.file.anchor_name));
353 
354       if(!url->p.file.filename)
355         url->p.file.filename = tl_strdup("");
356       break;
357     default:
358       url->type = URLT_UNKNOWN;
359       return;
360       break;
361     }
362   }
363   _free(authority);
364   return;
365 }
366 
url_parse(char * urlstr)367 url *url_parse(char *urlstr)
368 {
369   char *scheme = NULL;
370   char *authority = NULL;
371   char *p;
372   url ret_url;
373 
374   ret_url.type = URLT_UNKNOWN;
375   ret_url.status = 0;
376   ret_url.parent_url = NULL;
377   ret_url.moved_to = NULL;
378   ret_url.ref_cnt = 1;
379   ret_url.level = 0;
380   ret_url.extension = NULL;
381   ret_url.local_name = NULL;
382 
383 #ifdef WITH_TREE
384 #ifdef I_FACE
385   ret_url.prop = NULL;
386   ret_url.tree_nfo = NULL;
387 #endif
388 #endif
389 
390 #ifdef HAVE_MT
391   pthread_mutex_init(&ret_url.lock, NULL);
392 #endif
393 
394   p = urlstr;
395 
396   if(p)
397     scheme = url_parse_scheme(urlstr);
398 
399   if(scheme)
400   {
401     ret_url.type = url_scheme_to_schemeid(scheme);
402 
403     /* If the string starts with // then we */
404     /* don't know the scheme type so we have */
405     /* to wait for the parent to set it. */
406     if(ret_url.type == URLT_FROMPARENT)
407     {
408       ret_url.p.unsup.urlstr = tl_strdup(urlstr);
409       authority = url_parse_authority(urlstr);
410     }
411     else
412     {
413       /* We do know the scheme type, so move past it */
414       /* and get the 'authority' */
415       p += strlen(scheme) + 1;
416       authority = url_parse_authority(p);
417     }
418 
419     if(authority)
420       p += strlen(authority) + 2;
421 
422     if(authority && *authority)
423     {
424       switch (ret_url.type)
425       {
426       case URLT_FROMPARENT:
427         break;
428       case URLT_HTTP:
429       case URLT_HTTPS:
430         url_split_authority(authority,
431           &ret_url.p.http.user,
432           &ret_url.p.http.password,
433           &ret_url.p.http.host, &ret_url.p.http.port);
434 
435         if(!ret_url.p.http.port)
436           ret_url.p.http.port = prottable[ret_url.type].default_port;
437 
438         url_split_path(p,
439           &ret_url.p.http.document,
440           &ret_url.p.http.searchstr, &ret_url.p.http.anchor_name);
441 
442         if(!ret_url.p.http.document)
443           ret_url.p.http.document = tl_strdup("/");
444         break;
445       case URLT_FTP:
446       case URLT_FTPS:
447         url_split_authority(authority,
448           &ret_url.p.ftp.user,
449           &ret_url.p.ftp.password, &ret_url.p.ftp.host, &ret_url.p.ftp.port);
450 
451         if(!ret_url.p.ftp.port)
452           ret_url.p.ftp.port = prottable[ret_url.type].default_port;
453 
454         url_split_path(p,
455           &ret_url.p.ftp.path, NULL, &ret_url.p.ftp.anchor_name);
456 
457 
458         if(!ret_url.p.ftp.path)
459           ret_url.p.ftp.path = tl_strdup("/");
460 
461         if(p && p[0] == '/' && p[1] == '/')
462         {
463           char *pp = tl_str_concat(NULL, "/", ret_url.p.ftp.path, NULL);
464           _free(ret_url.p.ftp.path);
465           ret_url.p.ftp.path = pp;
466         }
467 
468         if((p = strrchr(ret_url.p.ftp.path, ';')) &&
469           !strncasecmp(p, ";type=", 6))
470           *p = '\0';
471 
472         ret_url.p.ftp.dir = tl_is_dirname(ret_url.p.ftp.path) != 0;
473         break;
474       case URLT_GOPHER:
475         url_split_authority(authority,
476           NULL, NULL, &ret_url.p.gopher.host, &ret_url.p.gopher.port);
477 
478         if(!ret_url.p.gopher.port)
479           ret_url.p.gopher.port = prottable[ret_url.type].default_port;
480 
481         if(*(p + 1))
482           ret_url.p.gopher.selector = tl_strdup(p + 1);
483         else
484           ret_url.p.gopher.selector = tl_strdup("1");
485         break;
486       case URLT_FILE:
487         url_split_path(p,
488           &ret_url.p.file.filename,
489           &ret_url.p.file.searchstr, &ret_url.p.file.anchor_name);
490 
491         if(!ret_url.p.file.filename)
492           ret_url.p.file.filename = tl_strdup("");
493         break;
494       default:
495         ret_url.p.unsup.urlstr = tl_strdup(urlstr);
496         break;
497       }
498     }
499   }
500 
501   if(!scheme || !authority || !*authority)
502   {
503     if(!scheme)
504       ret_url.type = URLT_FILE;
505 
506     switch (ret_url.type)
507     {
508     case URLT_FILE:
509     case URLT_FTP:
510     case URLT_FTPS:
511     case URLT_HTTP:
512     case URLT_HTTPS:
513     case URLT_FROMPARENT:
514       ret_url.type = URLT_FILE;
515       url_split_path(p,
516         &ret_url.p.file.filename,
517         &ret_url.p.file.searchstr, &ret_url.p.file.anchor_name);
518 
519       if(!ret_url.p.file.filename)
520         ret_url.p.file.filename = tl_strdup("");
521       break;
522     default:
523       ret_url.type = URLT_UNKNOWN;
524       ret_url.p.unsup.urlstr = tl_strdup(urlstr);
525       break;
526     }
527   }
528 
529   _free(authority);
530   _free(scheme);
531   return new_url(&ret_url);
532 }
533 
url_dup_url(url * src)534 url *url_dup_url(url * src)
535 {
536   url dst;
537 
538   dst.type = src->type;
539   dst.parent_url = NULL;
540   dst.moved_to = NULL;
541   dst.level = src->level;
542   dst.ref_cnt = 1;
543   dst.status = src->status &
544     (URL_INLINE_OBJ | URL_STYLE | URL_ISHTML | URL_NORECURSE |
545     URL_FORM_ACTION | URL_ISSCRIPT | URL_ISSTARTING);
546   dst.extension = NULL;
547   dst.local_name = NULL;
548 #ifdef WITH_TREE
549 #ifdef I_FACE
550   dst.prop = NULL;
551   dst.tree_nfo = NULL;
552 #endif
553 #endif
554 #ifdef HAVE_MT
555   pthread_mutex_init(&dst.lock, NULL);
556 #endif
557   switch (dst.type)
558   {
559   case URLT_FILE:
560     dst.p.file.filename = tl_strdup(src->p.file.filename);
561     dst.p.file.searchstr = tl_strdup(src->p.file.searchstr);
562     dst.p.file.anchor_name = tl_strdup(src->p.file.anchor_name);
563     break;
564   case URLT_FTP:
565   case URLT_FTPS:
566     dst.p.ftp.host = tl_strdup(src->p.ftp.host);
567     dst.p.ftp.user = tl_strdup(src->p.ftp.user);
568     dst.p.ftp.password = tl_strdup(src->p.ftp.password);
569     dst.p.ftp.path = tl_strdup(src->p.ftp.path);
570     dst.p.ftp.anchor_name = tl_strdup(src->p.ftp.anchor_name);
571     dst.p.ftp.port = src->p.ftp.port;
572     dst.p.ftp.dir = src->p.ftp.dir;
573     if(src->extension)
574       dst.extension = ftp_url_ext_dup(src->extension);
575     break;
576   case URLT_HTTP:
577   case URLT_HTTPS:
578     dst.p.http.host = tl_strdup(src->p.http.host);
579     dst.p.http.port = src->p.http.port;
580     dst.p.http.document = tl_strdup(src->p.http.document);
581     dst.p.http.searchstr = tl_strdup(src->p.http.searchstr);
582     dst.p.http.anchor_name = tl_strdup(src->p.http.anchor_name);
583     dst.p.http.user = tl_strdup(src->p.http.user);
584     dst.p.http.password = tl_strdup(src->p.http.password);
585     if(src->extension && (src->status & URL_FORM_ACTION))
586       dst.extension = form_info_dup(src->extension);
587     break;
588   case URLT_GOPHER:
589     dst.p.gopher.host = tl_strdup(src->p.gopher.host);
590     dst.p.gopher.port = src->p.gopher.port;
591     dst.p.gopher.selector = tl_strdup(src->p.gopher.selector);
592     break;
593   case URLT_FROMPARENT:        /* This is a 'can't happen'. */
594     assert(0);
595   case URLT_UNKNOWN:
596     dst.p.unsup.urlstr = tl_strdup(src->p.unsup.urlstr);
597     break;
598   }
599 
600   return new_url(&dst);
601 }
602 
603 /* convert any URL string to absolute path */
url_to_absolute_url(char * base,char * baset,url * parent,char * act)604 char *url_to_absolute_url(char *base, char *baset, url * parent, char *act)
605 {
606   char *psp = NULL;
607   url *purl;
608   char *pom;
609   int pomlen;
610 
611   if(act[0] == 0)
612     return 0;
613   if(act[0] == '#')
614     return 0;
615 
616   pomlen = strlen(url_to_filename(parent, TRUE)) + strlen(priv_cfg.cache_dir)
617   + strlen(baset) + strlen(act);
618   pom = _malloc(pomlen);
619 
620   if((act[0] == '/' && act[1] == '/') && parent->type != URLT_FILE)
621   {
622     /* we should handle it like net_path */
623     snprintf(pom, pomlen, "%s:%s", prottable[parent->type].urlid, act);
624     psp = tl_strdup(pom);
625     purl = url_parse(act);
626   }
627   else
628   {
629     purl = url_parse(act);
630   }
631 
632   if(purl->type == URLT_FROMPARENT)
633   {
634     purl->type = parent->type;
635     url_finishpath(purl);
636   }
637   assert(purl->type != URLT_FROMPARENT);
638 
639   if(purl->type == URLT_FILE && (parent->type == URLT_FILE))
640   {
641     if(!(*purl->p.file.filename))
642     {
643       strcpy(pom, baset);
644     }
645     else
646     {
647       if(*(purl->p.file.filename) != '/')
648       {
649         strcpy(pom, base);
650         strcat(pom, purl->p.file.filename);
651 
652         free(purl->p.file.filename);
653         purl->p.file.filename = tl_strdup(pom);
654       }
655       else
656         snprintf(pom, pomlen, "%s%s",
657           prottable[purl->type].typestr, purl->p.file.filename);
658     }
659     psp = tl_strdup(pom);
660   }
661   else if((purl->type == URLT_FILE) &&
662     (cfg.base_level == 0 || cfg.enable_info) &&
663     (parent->status & URL_REDIRECT || parent->status & URL_ISLOCAL))
664   {
665     char *p1, *p;
666     url *pomurl;
667 
668     if(*purl->p.file.filename == '/')
669       strcpy(pom, purl->p.file.filename);
670     else
671     {
672       int l;
673       p = url_to_filename(parent, TRUE);
674       strcpy(pom, p);
675       if(*purl->p.file.filename)
676       {
677         p1 = strrchr(pom, '/');
678         if(p1)
679           *(p1 + 1) = '\0';
680         strcat(pom, purl->p.file.filename);
681       }
682       /* remove any dynamic stuff to get base name */
683       for(l = strlen(pom); l > 0 && pom[l] != '/' && pom[l] != '?'; --l)
684         ;
685       if(pom[l] == '?')
686         pom[l] = '\0';
687       /* now fix for index-name files */
688       l = strlen(pom)-strlen(priv_cfg.index_name);
689       if(l > 0 && !strcmp(pom+l, priv_cfg.index_name) && pom[l-1] == '/')
690         pom[l] = '\0';
691     }
692     if(purl->p.file.searchstr)
693     {
694       strcat(pom, "?");
695       strcat(pom, purl->p.file.searchstr);
696     }
697 
698     if(purl->p.file.anchor_name)
699     {
700       strcat(pom, "#");
701       strcat(pom, purl->p.file.anchor_name);
702     }
703 
704     p = get_abs_file_path(pom);
705     pomurl = filename_to_url(p);
706     _free(p);
707     if(pomurl)
708     {
709       psp = url_to_urlstr(pomurl, TRUE);
710       free_deep_url(pomurl);
711       _free(pomurl);
712     }
713   }
714   if((!psp && purl->type == URLT_FILE) &&
715     (parent->type == URLT_HTTP ||
716       parent->type == URLT_HTTPS ||
717       parent->type == URLT_FTPS || parent->type == URLT_FTP))
718   {
719     char *ri;
720     if(*(purl->p.file.filename) == '/')
721     {
722       char *idx;
723 
724       strcpy(pom, base);
725       idx = strfindnchr(pom, '/', 3);
726       if(idx)
727         strcpy(idx - 1, purl->p.file.filename);
728       else
729         strcat(pom, purl->p.file.filename);
730 
731       if(purl->p.file.searchstr)
732       {
733         strcat(pom, "?");
734         strcat(pom, purl->p.file.searchstr);
735       }
736 
737       if(purl->p.file.anchor_name)
738       {
739         strcat(pom, "#");
740         strcat(pom, purl->p.file.anchor_name);
741       }
742     }
743     else if(!(*purl->p.file.filename) && !purl->p.file.searchstr)
744     {
745       if(purl->p.file.anchor_name)
746       {
747         /* Problem; we just have "#anchor" and unfortunately
748            baset might be the parent directory, not the
749            actual parent. (Nor is that found in "parent"
750            necessarily).
751          */
752 
753         if(*baset && baset[strlen(baset) - 1] != '/')
754         {
755           strcpy(pom, baset);
756           strcat(pom, "#");
757           strcat(pom, purl->p.file.anchor_name);
758         }
759         else                    /* What to do? Just hope to ignore this altogeher */
760           strcpy(pom, "");
761 
762       }
763     }
764     else
765     {
766       strcpy(pom, base);
767       if(!*purl->p.file.filename && purl->p.file.searchstr && parent->type == URLT_HTTP)
768       {
769         ri = strrchr(pom, '/');
770         if(ri)
771           strcpy(ri, parent->p.http.document);
772         else
773           strcat(pom, parent->p.http.document);
774       }
775       else
776       {
777         ri = strrchr(pom, '/');
778         if(ri)
779           strcpy(ri + 1, purl->p.file.filename);
780         else
781           strcat(pom, purl->p.file.filename);
782       }
783 
784       if((parent->status & URL_REDIRECT) &&
785         (strlen(purl->p.file.filename) >= strlen(priv_cfg.index_name)) &&
786         !strcmp(priv_cfg.index_name,
787           purl->p.file.filename + strlen(purl->p.file.filename) -
788           strlen(priv_cfg.index_name)))
789       {
790         *(pom + strlen(pom) - strlen(priv_cfg.index_name)) = '\0';
791       }
792 
793       if(purl->p.file.searchstr)
794       {
795         strcat(pom, "?");
796         strcat(pom, purl->p.file.searchstr);
797       }
798 
799       if(purl->p.file.anchor_name)
800       {
801         strcat(pom, "#");
802         strcat(pom, purl->p.file.anchor_name);
803       }
804     }
805     psp = tl_strdup(pom);
806   }
807   else if(!psp)
808   {
809     psp = tl_strdup(act);
810   }
811 
812   free_deep_url(purl);
813   _free(purl);
814 
815   if(psp && *psp)
816   {
817     purl = url_parse(psp);
818     if(purl->type == URLT_FROMPARENT)
819     {
820       purl->type = parent->type;
821       url_finishpath(purl);
822     }
823     url_path_abs(purl);
824     if(prottable[purl->type].supported)
825     {
826       free(psp);
827       psp = url_to_urlstr(purl, TRUE);
828     }
829     free_deep_url(purl);
830     _free(purl);
831   }
832 
833   _free(pom);
834 
835   return psp;
836 }
837 
838 /**************************************/
839 /* encode unsafe characters with      */
840 /* url-encoded encoding               */
841 /**************************************/
url_encode_str_real(char * urlstr,char * unsafe,int safety)842 static char *url_encode_str_real(char *urlstr, char *unsafe, int safety)
843 {
844   char *res, *p, *r;
845 
846   if(urlstr == NULL)
847     return NULL;
848 
849   if(cfg.noencode)
850   {
851     return strdup(urlstr);
852   }
853 
854   res = _malloc(strlen(urlstr) * 3 + 1);
855 
856   for(p = urlstr, r = res; *p; p++, r++)
857   {
858     if(safety && *p == '%' && tl_ascii_isxdigit(p[1]) &&
859       tl_ascii_isxdigit(p[2]))
860     {
861       *r = *p;
862     }
863     else if(strchr(unsafe, *p) ||
864       ((unsigned char) *p > 0x7f) || ((unsigned char) *p < 0x20))
865     {
866       *r = '%';
867       r++;
868       *r = hexa[((unsigned char)*p) >> 4];
869       r++;
870       *r = hexa[((unsigned char)*p) % 16];
871     }
872     else
873     {
874       *r = *p;
875     }
876   }
877   *r = '\0';
878 
879   return res;
880 }
881 
url_encode_str(char * urlstr,char * unsafe)882 char *url_encode_str(char *urlstr, char *unsafe)
883 {
884   return url_encode_str_real(urlstr, unsafe, FALSE);
885 }
886 
url_encode_str_safe(char * urlstr,char * unsafe)887 static char *url_encode_str_safe(char *urlstr, char *unsafe)
888 {
889   return url_encode_str_real(urlstr, unsafe, TRUE);
890 }
891 
892 /* Convert the HTML entities to direct characters, size is ignored at the
893 moment, res returns the encoded character, the return value is the number
894 of encoded bytes. Currently only &amp; is handled! */
fix_html_entity(const char * str,int size,char * res)895 static int fix_html_entity(const char *str, int size, char *res)
896 {
897   if(size >= 5 && (!strncmp(str, "&amp;", 5) || !strncmp(str, "&#38;", 5)))
898   {
899     *res = '&';
900     return 5;
901   }
902   return 0;
903 }
904 
905 /*****************************************/
906 /* dekodovanie zakodovanych znakov z URL */
907 /* FIXME: Translate me                   */
908 /*****************************************/
url_decode_str(const char * urlstr,int len)909 char *url_decode_str(const char *urlstr, int len)
910 {
911   char *res, *r;
912   int i;
913 
914   if(urlstr == NULL)
915     return NULL;
916 
917   res = tl_strndup(urlstr, len);
918 
919   for(i = 0, r = res; i < len; r++, i++)
920   {
921     if(urlstr[i] == '%' && urlstr[i + 1] && urlstr[i + 2] &&
922       tl_ascii_isxdigit(urlstr[i + 1]) && tl_ascii_isxdigit(urlstr[i + 2]))
923     {
924       *r = HEX2CHAR(urlstr + i);
925       i += 2;
926     }
927     else if(urlstr[i] == '&')
928     {
929       int s;
930       if((s = fix_html_entity(urlstr+i, len-i, r)))
931         i += s-1;
932       else
933         *r = urlstr[i]; /* copy the & */
934     }
935     else
936     {
937       *r = urlstr[i];
938     }
939   }
940   *r = '\0';
941 
942   return res;
943 }
944 
url_decode_html(const char * urlstr,int len)945 static char *url_decode_html(const char *urlstr, int len)
946 {
947   char *res, *r;
948   int i;
949 
950   if(urlstr == NULL)
951     return NULL;
952 
953   res = tl_strndup(urlstr, len);
954 
955   for(i = 0, r = res; i < len; r++, i++)
956   {
957     if(urlstr[i] == '&')
958     {
959       int s;
960       if((s = fix_html_entity(urlstr+i, len-i, r)))
961         i += s-1;
962       else
963         *r = urlstr[i]; /* copy the & */
964     }
965     else
966     {
967       *r = urlstr[i];
968     }
969   }
970   *r = '\0';
971 
972   return res;
973 }
974 
975 
976 /*************************************/
977 /* uvolnenie pamate po strukture URL */
978 /* FIXME: Translate me!              */
979 /*************************************/
free_deep_url(url * urlp)980 void free_deep_url(url * urlp)
981 {
982   if(urlp->local_name)
983   {
984     url_remove_from_file_hash_tab(urlp);
985     _free(urlp->local_name);
986   }
987 
988   switch (urlp->type)
989   {
990   case URLT_FILE:
991     _free(urlp->p.file.filename);
992     _free(urlp->p.file.searchstr);
993     _free(urlp->p.file.anchor_name);
994     break;
995   case URLT_HTTP:
996   case URLT_HTTPS:
997     _free(urlp->p.http.host);
998     _free(urlp->p.http.document);
999     _free(urlp->p.http.searchstr);
1000     _free(urlp->p.http.anchor_name);
1001     _free(urlp->p.http.password);
1002     _free(urlp->p.http.user);
1003     if(urlp->status & URL_FORM_ACTION)
1004     {
1005       form_info *fi = (form_info *) urlp->extension;
1006       dllist *ptr;
1007 
1008       _free(fi->text);
1009       _free(fi->action);
1010       ptr = fi->infos;
1011       while(ptr)
1012       {
1013         form_field *ff = (form_field *) ptr->data;
1014 
1015         _free(ff->value);
1016         _free(ff->name);
1017         _free(ff);
1018 
1019         ptr = dllist_remove_entry(ptr, ptr);
1020       }
1021     }
1022     break;
1023   case URLT_FTP:
1024   case URLT_FTPS:
1025     _free(urlp->p.ftp.host);
1026     _free(urlp->p.ftp.user);
1027     _free(urlp->p.ftp.password);
1028     _free(urlp->p.ftp.anchor_name);
1029     _free(urlp->p.ftp.path);
1030     if(urlp->extension)
1031       ftp_url_ext_free(urlp->extension);
1032     break;
1033   case URLT_GOPHER:
1034     _free(urlp->p.gopher.host);
1035     _free(urlp->p.gopher.selector);
1036   case URLT_FROMPARENT:
1037   default:
1038     _free(urlp->p.unsup.urlstr);
1039     break;
1040   }
1041 
1042   dllist_free_all(urlp->parent_url);
1043 
1044 
1045 #ifdef WITH_TREE
1046 #ifdef I_FACE
1047   _free(urlp->tree_nfo);
1048 
1049   if(urlp->prop)
1050   {
1051     _free(urlp->prop->type);
1052     free(urlp->prop);
1053   }
1054 
1055 #endif
1056 #endif
1057 
1058 #ifdef HAVE_MT
1059   pthread_mutex_destroy(&urlp->lock);
1060 #endif
1061 }
1062 
cat_links_to_url_list(dllist * l1)1063 void cat_links_to_url_list(dllist * l1)
1064 {
1065   dllist *p = l1;
1066   url *same;
1067   dllist *reg = NULL, *inl = NULL;
1068   int nadd = 0;
1069   cond_info_t condp;
1070 
1071   condp.level = 1;
1072   condp.urlnr = 0;
1073   condp.size = 0;
1074   condp.time = 0L;
1075   condp.mimet = NULL;
1076   condp.full_tag = NULL;
1077   condp.params = NULL;
1078   condp.html_doc = NULL;
1079   condp.html_doc_offset = 0;
1080   condp.tag = NULL;
1081   condp.attrib = NULL;
1082 
1083   while(p)
1084   {
1085     if(url_append_condition((url *) p->data, &condp))
1086     {
1087       url_clear_anchor((url *) p->data);
1088       if((same = url_was_befor((url *) p->data)))
1089       {
1090         link_url_in_list(same, (url *) p->data);
1091         free_deep_url((url *) p->data);
1092         free((url *)p->data);
1093       }
1094       else
1095       {
1096         url *urlp = (url *) p->data;
1097 
1098         nadd++;
1099         LOCK_TCNT;
1100         cfg.total_cnt++;
1101         UNLOCK_TCNT;
1102 
1103         urlp->ref_cnt = 1;
1104 
1105 #ifdef WITH_TREE
1106 #ifdef I_FACE
1107         if(cfg.xi_face)
1108         {
1109           urlp->tree_nfo = _malloc(sizeof(GUI_TREE_RTYPE));
1110           urlp->tree_nfo[0] = gui_tree_make_entry(urlp);
1111         }
1112 #endif
1113 #endif
1114 
1115         url_add_to_url_hash_tab(urlp);
1116 
1117         switch (cfg.scheduling_strategie)
1118         {
1119         case SSTRAT_DO_SIRKY:
1120         case SSTRAT_DO_HLBKY:
1121           reg = dllist_append(reg, (dllist_t)p->data);
1122           break;
1123         case SSTRAT_DO_SIRKY_I:
1124         case SSTRAT_DO_HLBKY_I:
1125           if(urlp->status & URL_INLINE_OBJ)
1126             inl = dllist_append(inl, (dllist_t)urlp);
1127           else
1128             reg = dllist_append(reg, (dllist_t)urlp);
1129           break;
1130         default:
1131           break;
1132         }
1133 
1134         if(cfg.hack_add_index && !url_get_search_str(urlp))
1135         {
1136           char *pom;
1137           char *ustr = url_to_urlstr(urlp, FALSE);
1138 
1139           pom = strrchr(ustr, '/');
1140           if(pom && pom[1])
1141           {
1142             url *nurl;
1143 
1144             pom[1] = '\0';
1145             nurl = url_parse(ustr);
1146             assert(nurl->type != URLT_FROMPARENT);
1147             dllist_append(p, (dllist_t) nurl);
1148           }
1149           _free(ustr);
1150         }
1151       }
1152     }
1153     else
1154     {
1155       LOCK_REJCNT;
1156       cfg.reject_cnt++;
1157       UNLOCK_REJCNT;
1158 
1159       free_deep_url((url *) p->data);
1160       free((url *)p->data);
1161     }
1162 
1163     p = p->next;
1164   }
1165   dllist_free_all(l1);
1166 
1167   LOCK_CFG_URLSTACK;
1168   switch (cfg.scheduling_strategie)
1169   {
1170   case SSTRAT_DO_SIRKY:
1171   case SSTRAT_DO_SIRKY_I:
1172     if(reg || inl)
1173       append_url_list_to_list(dllist_concat(inl, reg), NULL);
1174     break;
1175   case SSTRAT_DO_HLBKY:
1176   case SSTRAT_DO_HLBKY_I:
1177     if(reg || inl)
1178       append_url_list_to_list(dllist_concat(inl, reg), cfg.urlstack);
1179     break;
1180   default:
1181     break;
1182   }
1183   UNLOCK_CFG_URLSTACK;
1184 #ifdef HAVE_MT
1185   /* this is here for signaling sleeping downloading processes which */
1186   /* wait for URL to be queued inside downloading queue              */
1187   for(; nadd > 0; nadd--)
1188   {
1189     mt_semaphore_up(&cfg.urlstack_sem);
1190   }
1191 #endif
1192 }
1193 
append_url_to_list(url * urlp)1194 void append_url_to_list(url * urlp)
1195 {
1196   if(!prottable[urlp->type].supported)
1197   {
1198     xprintf(1, gettext("unsupported URL type \"%s\"\n"),
1199       prottable[urlp->type].urlid ? prottable[urlp->type].urlid :
1200       gettext("unknown"));
1201     return;
1202   }
1203 
1204   urlp->ref_cnt = 1;
1205 
1206 #ifdef WITH_TREE
1207 #ifdef I_FACE
1208   if(cfg.xi_face)
1209   {
1210     urlp->tree_nfo = _malloc(sizeof(GUI_TREE_RTYPE));
1211     urlp->tree_nfo[0] = gui_tree_make_entry(urlp);
1212   }
1213 #endif
1214 #endif
1215 
1216   url_add_to_url_hash_tab(urlp);
1217   cfg.urlstack = dllist_append(cfg.urlstack, (dllist_t) urlp);
1218   cfg.total_cnt++;
1219 
1220 
1221 #ifdef HAVE_MT
1222   mt_semaphore_up(&cfg.urlstack_sem);
1223 #endif
1224 }
1225 
append_url_list_to_list(dllist * list,dllist * after)1226 void append_url_list_to_list(dllist * list, dllist * after)
1227 {
1228   if(after)
1229     cfg.urlstack = dllist_insert_list_after(cfg.urlstack, after, list);
1230   else
1231     cfg.urlstack = dllist_concat(cfg.urlstack, list);
1232 }
1233 
1234 
link_url_in_list(url * orig,url * copy)1235 void link_url_in_list(url * orig, url * copy)
1236 {
1237   url *cpar;
1238 
1239   LOCK_URL(copy);
1240   if(copy->parent_url)
1241     cpar = (url *) copy->parent_url->data;
1242   else
1243     cpar = NULL;
1244   UNLOCK_URL(copy);
1245 
1246   if(cpar && (orig != cpar))
1247   {
1248     dllist *ptr;
1249     bool_t found = FALSE;
1250 
1251     if(copy->parent_url)
1252     {
1253       LOCK_URL(orig);
1254       for(ptr = orig->parent_url; ptr; ptr = ptr->next)
1255         if((url *)ptr->data == cpar)
1256           found = TRUE;
1257       UNLOCK_URL(orig);
1258     }
1259 
1260     if(!found)
1261     {
1262 
1263       LOCK_URL(orig);
1264       orig->ref_cnt++;
1265       if(cpar)
1266         orig->parent_url = dllist_append(orig->parent_url, (dllist_t) cpar);
1267 
1268 #ifdef WITH_TREE
1269 #ifdef I_FACE
1270       if(cfg.xi_face)
1271       {
1272         orig->tree_nfo =
1273           _realloc(orig->tree_nfo, orig->ref_cnt * sizeof(GUI_TREE_RTYPE));
1274         orig->tree_nfo[orig->ref_cnt - 1] = gui_tree_make_entry(orig);
1275       }
1276 #endif
1277 #endif
1278       UNLOCK_URL(orig);
1279 
1280       if(cpar && (orig->status & URL_MOVED) && (orig->status & URL_MOVED))
1281       {
1282         url *purl = orig;
1283         char *fn;
1284 
1285         while(purl->moved_to)
1286           purl = purl->moved_to;
1287 
1288         if(purl->status & URL_DOWNLOADED)
1289         {
1290           fn = url_to_filename(purl, TRUE);
1291           rewrite_one_parent_links(copy, cpar, fn);
1292         }
1293       }
1294     }
1295   }
1296 }
1297 
url_redirect_to(url * src,url * dst,int is_303)1298 int url_redirect_to(url * src, url * dst, int is_303)
1299 {
1300   url *pomurl, *pomurl2;
1301 
1302   src->status |= URL_MOVED;
1303 
1304   url_clear_anchor(dst);
1305   if((pomurl = url_was_befor(dst)))
1306   {
1307     free_deep_url(dst);
1308     _free(dst);
1309     pomurl2 = pomurl;
1310     while(pomurl2)
1311     {
1312       if(src == pomurl2)
1313       {
1314         src->status &= ~URL_MOVED;
1315         return -1;
1316       }
1317       pomurl2 = pomurl2->moved_to;
1318     }
1319 
1320     LOCK_URL(pomurl);
1321 
1322     pomurl->parent_url = dllist_append(pomurl->parent_url, (dllist_t) src);
1323     pomurl->ref_cnt++;
1324 
1325     src->moved_to = pomurl;
1326     src->status |= URL_MOVED;
1327 
1328 #ifdef WITH_TREE
1329 #ifdef I_FACE
1330     if(cfg.xi_face)
1331     {
1332       pomurl->tree_nfo = _realloc(pomurl->tree_nfo,
1333         (pomurl->ref_cnt) * sizeof(GUI_TREE_RTYPE));
1334       pomurl->tree_nfo[pomurl->ref_cnt - 1] = gui_tree_make_entry(pomurl);
1335     }
1336 #endif
1337 #endif
1338 
1339     UNLOCK_URL(pomurl);
1340 
1341     if((pomurl->status & URL_MOVED) || (pomurl->status & URL_DOWNLOADED))
1342     {
1343       url *purl = pomurl;
1344       char *fn;
1345 
1346       xprintf(1, gettext("Moved to already processed URL.\n"));
1347 
1348       if(pomurl->status & URL_MOVED)
1349       {
1350         while(purl->moved_to)
1351           purl = purl->moved_to;
1352         fn = url_to_filename(purl, TRUE);
1353       }
1354       else
1355         fn = url_to_filename(pomurl, TRUE);
1356 
1357       if(cfg.rewrite_links && (purl->status & URL_DOWNLOADED))
1358         rewrite_parents_links(src, fn);
1359     }
1360   }
1361   else
1362   {
1363     dst->parent_url = dllist_append(dst->parent_url, (dllist_t) src);
1364     src->moved_to = dst;
1365     src->status |= URL_MOVED;
1366 
1367     if(!is_303 && !dst->extension && (src->status & URL_FORM_ACTION))
1368       dst->extension = form_info_dup(src->extension);
1369 
1370 #ifdef WITH_TREE
1371 #ifdef I_FACE
1372     if(cfg.xi_face)
1373     {
1374       dst->tree_nfo = _malloc(sizeof(GUI_TREE_RTYPE));
1375       dst->tree_nfo[0] = gui_tree_make_entry(dst);
1376     }
1377 #endif
1378 #endif
1379     dst->ref_cnt = 1;
1380     url_add_to_url_hash_tab(dst);
1381   }
1382   return 0;
1383 }
1384 
url_add_to_url_hash_tab(url * urlp)1385 void url_add_to_url_hash_tab(url * urlp)
1386 {
1387   url_clear_anchor(urlp);
1388 
1389   LOCK_CFG_URLHASH;
1390   dlhash_insert(cfg.url_hash_tbl, (dllist_t) urlp);
1391   UNLOCK_CFG_URLHASH;
1392 }
1393 
url_remove_from_url_hash_tab(url * urlp)1394 void url_remove_from_url_hash_tab(url * urlp)
1395 {
1396   if(!prottable[urlp->type].supported)
1397     return;
1398 
1399   LOCK_CFG_URLHASH;
1400   dlhash_exclude(cfg.url_hash_tbl, (dllist_t) urlp);
1401   UNLOCK_CFG_URLHASH;
1402 }
1403 
url_add_to_file_hash_tab(url * urlp)1404 void url_add_to_file_hash_tab(url * urlp)
1405 {
1406   if(!prottable[urlp->type].supported)
1407     return;
1408 
1409   url_to_filename(urlp, TRUE);
1410 }
1411 
url_remove_from_file_hash_tab(url * urlp)1412 void url_remove_from_file_hash_tab(url * urlp)
1413 {
1414   if(!prottable[urlp->type].supported)
1415     return;
1416 
1417   if(urlp->local_name)
1418   {
1419     LOCK_CFG_FILEHASH;
1420     dlhash_exclude_exact(cfg.fn_hash_tbl, (dllist_t) urlp);
1421     UNLOCK_CFG_FILEHASH;
1422   }
1423 }
1424 
1425 /**********************************************/
1426 /* kopirovanie obsahu na nove miesto v pamati */
1427 /* FIXME: Translate me!                       */
1428 /**********************************************/
new_url(url * urlo)1429 url *new_url(url * urlo)
1430 {
1431   url *res = (url *) _malloc(sizeof(url));
1432 
1433   memcpy(res, urlo, sizeof(url));
1434 
1435   return res;
1436 }
1437 
1438 #define isforbiddenchar(a) ((a) == '\\' || (a) == '/')
encode_forbiddenchars(const char * str)1439 static char *encode_forbiddenchars(const char *str)
1440 {
1441   int size = 1;
1442   const char *s;
1443   char *res, *r;
1444 
1445   for(s = str; *s; ++s)
1446   {
1447     if(isforbiddenchar(*s))
1448       size += 2;
1449   }
1450   size += (s-str); /* add string length */
1451   r = res = (char *) _malloc(size);
1452 
1453   for(s = str; *s; ++s)
1454   {
1455     if(isforbiddenchar(*s))
1456     {
1457       /* no buffer overflow possible here, sprintf is save */
1458       sprintf(r, "%%%02x", *s);
1459       r += 3;
1460     }
1461     else
1462     {
1463       *(r++) = *s;
1464     }
1465   }
1466   *r = '\0';
1467 
1468   return res;
1469 }
1470 
url_get_default_local_name_real(url * urlp,int add_index)1471 static char *url_get_default_local_name_real(url * urlp, int add_index)
1472 {
1473   char *pom2 = NULL;
1474   char pbuf[50];
1475   char *p;
1476 
1477   snprintf(pbuf, sizeof(pbuf), "_%d", url_get_port(urlp));
1478 
1479   switch (urlp->type)
1480   {
1481   case URLT_HTTP:
1482   case URLT_HTTPS:
1483     p = url_decode_str(urlp->p.http.document, strlen(urlp->p.http.document));
1484     pom2 = tl_str_concat(pom2,
1485       prottable[urlp->type].dirname, "/", urlp->p.http.host, pbuf, p, NULL);
1486     _free(p);
1487 
1488     if(urlp->p.http.searchstr)
1489     {
1490       /* search strings may have a / or \ inside, which must be encoded */
1491       char *sstr = encode_forbiddenchars(urlp->p.http.searchstr);
1492 
1493       pom2 = tl_str_concat(pom2, "?", sstr, NULL);
1494       _free(sstr);
1495     }
1496 
1497     if(urlp->status & URL_FORM_ACTION)
1498     {
1499       form_info *fi = (form_info *) urlp->extension;
1500 
1501       p = form_encode_urlencoded(((form_info *) urlp->extension)->infos);
1502       if(p)
1503       {
1504         pom2 = tl_str_concat(pom2, (fi->method == FORM_M_POST) ? "#" : "?", p,
1505         NULL);
1506       }
1507       _free(p);
1508     }
1509 
1510     if(tl_is_dirname(pom2) && add_index)
1511       pom2 = tl_str_append(pom2, priv_cfg.index_name);
1512     break;
1513 
1514   case URLT_FILE:
1515     pom2 =
1516       url_decode_str(urlp->p.file.filename, strlen(urlp->p.file.filename));
1517     if(urlp->p.file.searchstr)
1518     {
1519       p = url_decode_str(urlp->p.file.searchstr,
1520         strlen(urlp->p.file.searchstr));
1521       pom2 = tl_str_concat(pom2, "?", p, NULL);
1522       free(p);
1523     }
1524     break;
1525   case URLT_FTP:
1526   case URLT_FTPS:
1527     pom2 = tl_str_concat(pom2, prottable[urlp->type].dirname, "/",
1528       urlp->p.ftp.host, pbuf, "/", urlp->p.ftp.path,
1529       urlp->p.ftp.dir ? "/" : NULL,
1530       add_index ? priv_cfg.index_name : NULL, NULL);
1531     break;
1532   case URLT_GOPHER:
1533     pom2 = tl_str_concat(pom2, prottable[URLT_GOPHER].dirname, "/",
1534       urlp->p.gopher.host, pbuf, urlp->p.gopher.selector,
1535       (urlp->p.gopher.selector[0] == '1' && add_index)
1536       ? priv_cfg.index_name : NULL, NULL);
1537     break;
1538   case URLT_FROMPARENT:
1539   default:
1540     return NULL;
1541   }
1542   return pom2;
1543 }
1544 
url_get_default_local_name(url * urlp)1545 char *url_get_default_local_name(url * urlp)
1546 {
1547   return url_get_default_local_name_real(urlp, TRUE);
1548 }
1549 
url_get_local_name_tr(url * urlp,char * local_name,const char * mime_type,int * isdinfo)1550 static char *url_get_local_name_tr(url * urlp, char *local_name,
1551   const char *mime_type, int *isdinfo)
1552 {
1553   dllist *pl = priv_cfg.lfnames;
1554   char *ustr = url_to_urlstr(urlp, FALSE);
1555   char *trs, *lfstr = NULL;
1556   char *pom2 = local_name;
1557   char *rv = NULL;
1558 
1559   while(pl)
1560   {
1561     if(lfname_match((lfname *) pl->data, ustr))
1562     {
1563       lfstr = lfname_get_by_url(urlp, ustr, mime_type, (lfname *) pl->data);
1564       pom2 = lfstr;
1565       *isdinfo = TRUE;
1566       break;
1567     }
1568     pl = pl->next;
1569   }
1570   _free(ustr);
1571 
1572   trs = tr(pom2);
1573   if(tl_is_dirname(trs))
1574     rv = tl_str_concat(NULL, priv_cfg.cache_dir,
1575       (*trs == '/' ? "" : "/"), trs, priv_cfg.index_name, NULL);
1576   else
1577     rv = tl_str_concat(NULL, priv_cfg.cache_dir,
1578       (*trs == '/' ? "" : "/"), trs, NULL);
1579   _free(trs);
1580   _free(lfstr);
1581 
1582   return rv;
1583 }
1584 
1585 /**********************************************/
url_get_local_name_real(url * urlp,const char * mime_type,int adj)1586 char *url_get_local_name_real(url * urlp, const char *mime_type, int adj)
1587 {
1588   char *pom = NULL;
1589   char *pom2 = NULL;
1590   char *p1, *p2;
1591   char *p;
1592   int isdinfo = FALSE;
1593   struct stat estat;
1594 
1595   if((urlp->status & URL_ISFIRST) &&
1596     priv_cfg.store_name /* && cfg.mode == MODE_SINGLE */ )
1597   {
1598     return get_abs_file_path_oss(priv_cfg.store_name);
1599   }
1600 
1601   pom = url_get_default_local_name(urlp);
1602 
1603   if(urlp->type != URLT_FILE)
1604   {
1605     pom2 = url_get_local_name_tr(urlp, pom, mime_type, &isdinfo);
1606     _free(pom);
1607     pom = pom2;
1608   }
1609 
1610 #ifdef FS_UNSAFE_CHARACTERS
1611   /* This is for automatic handling of windoze  */
1612   /* filesystem unsafe characters - \:*?"<>|  */
1613   if(urlp->type != URLT_FILE
1614     && strlen(pom) != strcspn(pom, FS_UNSAFE_CHARACTERS))
1615   {
1616     if(strchr(FS_UNSAFE_CHARACTERS, '_'))
1617       p = tr_del_chr(FS_UNSAFE_CHARACTERS, pom);
1618     else
1619       p = tr_chr_chr(FS_UNSAFE_CHARACTERS, "_", pom);
1620     _free(pom);
1621     pom = p;
1622   }
1623 #endif
1624 
1625   /* adjusting of filename size if required  */
1626   if(urlp->type != URLT_FILE && tl_filename_needs_adjust(pom))
1627   {
1628     p = tl_adjust_filename(pom);
1629     _free(pom);
1630     pom = p;
1631   }
1632 
1633   if(!lstat(pom, &estat) && S_ISDIR(estat.st_mode) && adj)
1634   {
1635     pom = tl_str_concat(pom, "/", priv_cfg.index_name, NULL);
1636   }
1637 
1638   if((urlp->type != URLT_FILE) && cfg.base_level && !isdinfo)
1639   {
1640     p = get_abs_file_path_oss(pom);
1641     _free(pom);
1642     pom = p;
1643     p1 = pom + strlen(priv_cfg.cache_dir) +
1644       (tl_is_dirname(priv_cfg.cache_dir) == 0);
1645 
1646     if(!(p2 = strfindnchr(p1, '/', cfg.base_level)))
1647     {
1648       if((p2 = strrchr(pom, '/')))
1649         p2++;
1650     }
1651 
1652     if(p2)
1653       memmove(p1, p2, strlen(p2) + 1);
1654   }
1655 
1656   /* this is here for ensure, that we */
1657   /* don't have directory as filename :-) */
1658   if(tl_is_dirname(pom))
1659     pom = tl_str_append(pom, priv_cfg.index_name);
1660 
1661   p = get_abs_file_path_oss(pom);
1662   _free(pom);
1663 
1664   /* In mode MIRROR we want to use exactly the same filenames as the
1665      remove server. Therefore we have to unquote our filename. */
1666   if(cfg.mode == MODE_MIRROR)
1667   {
1668     /* now we unquote the string */
1669 
1670     char *s = p;
1671     char *t = p;
1672     int hex;
1673 
1674     while(*s != 0)
1675     {
1676       if(s[0] == '%' && isxdigit(s[1]) && isxdigit(s[2]))
1677       {
1678         sscanf(s + 1, "%2x", &hex);
1679         *t++ = hex;
1680         s += 3;
1681         continue;
1682       }
1683       *t++ = *s++;
1684     }
1685 
1686     *t = 0;
1687   }
1688 
1689   return p;
1690 }
1691 
url_get_local_name(url * urlp,const char * mime_type)1692 static char *url_get_local_name(url * urlp, const char *mime_type)
1693 {
1694   return url_get_local_name_real(urlp, mime_type, TRUE);
1695 }
1696 
1697 /******************************************************/
1698 /* k danemu URL vytvori meno suboru v lokalnom strome */
1699 /* FIXME: Translate me!                               */
1700 /******************************************************/
url_to_filename_real(url * urlp,const char * mime_type,int lockfn)1701 static char *url_to_filename_real(url * urlp, const char *mime_type,
1702 int lockfn)
1703 {
1704   char *p;
1705   bool_t inserted = FALSE;
1706 
1707   if(!urlp->local_name && prottable[urlp->type].supported)
1708   {
1709     p = url_get_local_name(urlp, mime_type);
1710     if(cfg.enable_info && urlp->type != URLT_FILE &&
1711       !(urlp->status & URL_REDIRECT))
1712     {
1713       char *di;
1714       LOCK_GETLFNAME;
1715       di = dinfo_get_unique_name(urlp, p, lockfn);
1716       UNLOCK_GETLFNAME;
1717       if(di)
1718       {
1719         _free(p);
1720         p = di;
1721       }
1722     }
1723     else if(!cfg.enable_info && cfg.unique_doc &&
1724       urlp->type != URLT_FILE && !(urlp->status & URL_REDIRECT))
1725     {
1726       /*** such filename have already other URL   ***/
1727       /*** we need to compute new unique filename ***/
1728       char *f;
1729       char *pom;
1730       int i;
1731       url *inhash;
1732 
1733       LOCK_CFG_FILEHASH;
1734       inhash = (url *) dlhash_find_by_key(cfg.fn_hash_tbl, (dllist_t) p);
1735 
1736       if(!inhash && !inserted)
1737       {
1738         urlp->local_name = p;
1739         dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1740         inserted = TRUE;
1741       }
1742 
1743       if(inhash && url_compare(inhash, urlp))
1744         inhash = NULL;
1745 
1746       UNLOCK_CFG_FILEHASH;
1747 
1748       if(inhash)
1749       {
1750         int pomlen = strlen(p) + 9;
1751         LOCK_GETLFNAME;
1752         pom = _malloc(pomlen);
1753 
1754         f = strrchr(p, '/');
1755         if(!f)
1756           f = "";
1757         else
1758         {
1759           *f = '\0';
1760           f++;
1761         }
1762 
1763         if (cfg.remove_before_store)
1764         {
1765           snprintf(pom, pomlen, "%s/%s", p, f);
1766         }
1767         else
1768         {
1769           i = 0;
1770           do
1771           {
1772             i++;
1773             snprintf(pom, pomlen, "%s/%03d%s", p, i, f);
1774             LOCK_CFG_FILEHASH;
1775             inhash = (url *) dlhash_find_by_key(cfg.fn_hash_tbl,
1776             (dllist_t) pom);
1777             if(!inhash && !inserted)
1778             {
1779               urlp->local_name = pom;
1780               dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1781               inserted = TRUE;
1782             }
1783             UNLOCK_CFG_FILEHASH;
1784           }
1785           while(inhash);
1786         }
1787         UNLOCK_GETLFNAME;
1788 
1789         _free(p);
1790         p = pom;
1791       }
1792     }
1793     if(!inserted)
1794     {
1795       LOCK_CFG_FILEHASH;
1796       urlp->local_name = p;
1797       dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1798       inserted = TRUE;
1799       UNLOCK_CFG_FILEHASH;
1800     }
1801   }
1802   return urlp->local_name;
1803 }
1804 
url_to_filename(url * urlp,int lockfn)1805 char *url_to_filename(url * urlp, int lockfn)
1806 {
1807   return url_to_filename_real(urlp, NULL, lockfn);
1808 }
1809 
url_to_filename_with_type(url * urlp,const char * mime_type,int lockfn)1810 char *url_to_filename_with_type(url * urlp, const char *mime_type, int lockfn)
1811 {
1812   return url_to_filename_real(urlp, mime_type, lockfn);
1813 }
1814 
url_set_filename(url * urlp,char * local_name)1815 void url_set_filename(url * urlp, char *local_name)
1816 {
1817   LOCK_CFG_FILEHASH;
1818   urlp->local_name = local_name;
1819   dlhash_insert(cfg.fn_hash_tbl, (dllist_t) urlp);
1820   UNLOCK_CFG_FILEHASH;
1821 
1822 }
1823 
1824 /******************************************************/
1825 /* k danemu URL vytvori meno suboru v lokalnom strome */
1826 /* FIXME: Translate me!                               */
1827 /******************************************************/
url_changed_filename(url * urlp)1828 void url_changed_filename(url * urlp)
1829 {
1830   url_remove_from_file_hash_tab(urlp);
1831   _free(urlp->local_name);
1832   url_add_to_file_hash_tab(urlp);
1833 }
1834 
1835 /****************************************************************/
1836 /* k danemu URL vytvori meno docasneho suboru v lokalnom strome */
1837 /* FIXME: Translate me!                                         */
1838 /****************************************************************/
url_to_in_filename(url * urlp)1839 char *url_to_in_filename(url * urlp)
1840 {
1841   char *pom;
1842   char *p;
1843 
1844   if(cfg.mode == MODE_NOSTORE || cfg.mode == MODE_FTPDIR || (cfg.dumpfd >= 0))
1845   {
1846     int pomlen = strlen(priv_cfg.cache_dir) + 50;
1847     pom = _malloc(pomlen);
1848 
1849 #ifdef HAVE_MT
1850     snprintf(pom, pomlen, "%s/.in_pavuk_nostore_%d_%ju",
1851       priv_cfg.cache_dir, (int) getpid(), (uintmax_t)pthread_self());
1852 #else
1853     snprintf(pom, pomlen, "%s/.in_pavuk_nostore_%d", priv_cfg.cache_dir,
1854       (int) getpid());
1855 #endif
1856     return pom;
1857   }
1858 
1859   p = url_to_filename(urlp, TRUE);
1860 
1861   pom = _malloc(strlen(p) + 5);
1862   strcpy(pom, p);
1863   p = strrchr(pom, '/');
1864   if(!p)
1865     p = pom;
1866   else
1867     p++;
1868   memmove(p + 4, p, strlen(p) + 1);
1869   strncpy(p, ".in_", 4);
1870 
1871   return pom;
1872 }
1873 
1874 /************************************************/
1875 /* make from URL structure URL string           */
1876 /************************************************/
url_to_urlstr(url * urlp,int wa)1877 char *url_to_urlstr(url * urlp, int wa)
1878 {
1879   char *p;
1880   char portstr[10];
1881   char *retv;
1882 
1883   snprintf(portstr, sizeof(portstr), ":%d", url_get_port(urlp));
1884   switch (urlp->type)
1885   {
1886   case URLT_HTTP:
1887   case URLT_HTTPS:
1888     retv = _malloc(strlen(prottable[urlp->type].typestr) +
1889       (urlp->p.http.user ? strlen(urlp->p.http.user) + 1 : 0) +
1890       (urlp->p.http.password ? strlen(urlp->p.http.password) + 1 : 0) +
1891       strlen(urlp->p.http.host) +
1892       (urlp->p.http.port ==
1893         prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +
1894       strlen(urlp->p.http.document) +
1895       (urlp->p.http.searchstr ? strlen(urlp->p.http.searchstr) + 1 : 0) +
1896       (urlp->p.http.anchor_name ? strlen(urlp->p.http.anchor_name) + 1 : 0) +
1897       1);
1898 
1899 
1900     sprintf(retv, "%s%s%s%s%s%s%s%s%s%s%s%s", prottable[urlp->type].typestr,
1901       urlp->p.http.user ? urlp->p.http.user : "",
1902       urlp->p.http.password ? ":" : "",
1903       urlp->p.http.password ? urlp->p.http.password : "",
1904       (urlp->p.http.password || urlp->p.http.user) ? "@" : "",
1905       urlp->p.http.host,
1906       (urlp->p.http.port ==
1907         prottable[urlp->type].default_port ? "" : portstr),
1908       urlp->p.http.document, urlp->p.http.searchstr ? "?" : "",
1909       urlp->p.http.searchstr ? urlp->p.http.searchstr : "", wa
1910       && urlp->p.http.anchor_name ? "#" : "", wa
1911       && urlp->p.http.anchor_name ? urlp->p.http.anchor_name : "");
1912 
1913     if(!urlp->p.http.searchstr &&
1914       (urlp->status & URL_FORM_ACTION) &&
1915       (((form_info *) urlp->extension)->method == FORM_M_GET))
1916     {
1917       char *ss;
1918 
1919       ss = form_encode_urlencoded(((form_info *) urlp->extension)->infos);
1920       if(ss)
1921         retv = tl_str_concat(retv, "?", ss, NULL);
1922       _free(ss);
1923     }
1924 
1925     return retv;
1926   case URLT_FILE:
1927     p = get_abs_file_path(urlp->p.file.filename);
1928     retv = _malloc(strlen(prottable[URLT_FILE].typestr) +
1929       strlen(p) +
1930       (urlp->p.file.searchstr ? strlen(urlp->p.file.searchstr) + 1 : 0) +
1931       ((wa &&
1932           urlp->p.file.anchor_name) ? strlen(urlp->p.file.anchor_name) +
1933         1 : 0) + 1);
1934 
1935     sprintf(retv, "%s%s%s%s%s%s", prottable[URLT_FILE].typestr, p,
1936       urlp->p.file.searchstr ? "?" : "",
1937       urlp->p.file.searchstr ? urlp->p.file.searchstr : "",
1938       urlp->p.file.anchor_name ? "#" : "",
1939       urlp->p.file.anchor_name ? urlp->p.file.anchor_name : "");
1940 
1941     free(p);
1942 
1943     return retv;
1944   case URLT_FTP:
1945   case URLT_FTPS:
1946     retv = _malloc(strlen(prottable[urlp->type].typestr) +
1947       (urlp->p.ftp.user ? strlen(urlp->p.ftp.user) + 1 : 0) +
1948       (urlp->p.ftp.password ? strlen(urlp->p.ftp.password) + 1 : 0) +
1949       strlen(urlp->p.ftp.host) +
1950       (urlp->p.ftp.port ==
1951         prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +
1952       strlen(urlp->p.ftp.path) +
1953       (urlp->p.ftp.anchor_name ? strlen(urlp->p.ftp.anchor_name) + 1 : 0) +
1954       1);
1955 
1956     sprintf(retv, "%s%s%s%s%s%s%s%s%s%s", prottable[urlp->type].typestr,
1957       urlp->p.ftp.user ? urlp->p.ftp.user : "",
1958       urlp->p.ftp.password ? ":" : "",
1959       urlp->p.ftp.password ? urlp->p.ftp.password : "",
1960       (urlp->p.ftp.password || urlp->p.ftp.user) ? "@" : "",
1961       urlp->p.ftp.host,
1962       (urlp->p.ftp.port == prottable[urlp->type].default_port ? "" : portstr),
1963       urlp->p.ftp.path,
1964       wa && urlp->p.ftp.anchor_name ? "#" : "",
1965       wa && urlp->p.ftp.anchor_name ? urlp->p.ftp.anchor_name : "");
1966 
1967     return retv;
1968   case URLT_GOPHER:
1969     retv = _malloc(strlen(prottable[URLT_GOPHER].typestr) +
1970       strlen(urlp->p.gopher.host) +
1971       (urlp->p.gopher.port ==
1972         prottable[urlp->type].default_port ? 0 : strlen(portstr) + 1) +
1973       strlen(urlp->p.gopher.selector) + 2);
1974 
1975     sprintf(retv, "%s%s%s/%s", prottable[URLT_GOPHER].typestr,
1976       urlp->p.gopher.host,
1977       (urlp->p.gopher.port ==
1978         prottable[urlp->type].default_port ? "" : portstr),
1979       urlp->p.gopher.selector);
1980 
1981     return retv;
1982   case URLT_UNKNOWN:
1983     return tl_strdup(urlp->p.unsup.urlstr);
1984   case URLT_FROMPARENT:
1985   default:
1986     return NULL;
1987   }
1988 }
1989 
url_to_request_urlstr(url * urlp,int absolute)1990 char *url_to_request_urlstr(url * urlp, int absolute)
1991 {
1992   char *p, *s, *w, *u;
1993   char portstr[10];
1994   char *retv = NULL;
1995 
1996   snprintf(portstr, sizeof(portstr), ":%d", url_get_port(urlp));
1997 
1998   switch (urlp->type)
1999   {
2000   case URLT_HTTP:
2001   case URLT_HTTPS:
2002     p = url_encode_str_safe(urlp->p.http.document, URL_PATH_UNSAFE);
2003     s = urlp->p.http.searchstr ?
2004       url_encode_str_safe(urlp->p.http.searchstr, URL_QUERY_UNSAFE) : NULL;
2005 
2006     if(absolute)
2007       retv = tl_str_concat(NULL, prottable[urlp->type].typestr,
2008         urlp->p.http.host,
2009         (urlp->p.http.port ==
2010           prottable[urlp->type].default_port ? "" : portstr), NULL);
2011 
2012     retv = tl_str_concat(retv, p ? p : "", s ? "?" : "", s ? s : "", NULL);
2013 
2014     _free(p);
2015     _free(s);
2016 
2017     if(!urlp->p.http.searchstr &&
2018       (urlp->status & URL_FORM_ACTION) &&
2019       (((form_info *) urlp->extension)->method == FORM_M_GET))
2020     {
2021       char *ss;
2022 
2023       ss = form_encode_urlencoded(((form_info *) urlp->extension)->infos);
2024       if(ss)
2025         retv = tl_str_concat(retv, "?", ss, NULL);
2026       _free(ss);
2027     }
2028     break;
2029   case URLT_FTP:
2030   case URLT_FTPS:
2031     p = url_encode_str_safe(urlp->p.ftp.path, URL_PATH_UNSAFE);
2032     if(absolute)
2033     {
2034       w = urlp->p.ftp.password ?
2035         url_encode_str_safe(urlp->p.ftp.password, URL_AUTH_UNSAFE) : NULL;
2036       u = urlp->p.ftp.user ?
2037         url_encode_str_safe(urlp->p.ftp.user, URL_AUTH_UNSAFE) : NULL;
2038 
2039       retv = tl_str_concat(NULL, prottable[urlp->type].typestr,
2040         u ? u : "", w ? ":" : "", w ? w : "",
2041         (w || u) ? "@" : "", urlp->p.ftp.host,
2042         (urlp->p.ftp.port ==
2043           prottable[urlp->type].default_port ? "" : portstr), NULL);
2044 
2045       _free(u);
2046       _free(w);
2047     }
2048 
2049     retv = tl_str_concat(retv, p, NULL);
2050     _free(p);
2051     break;
2052   case URLT_GOPHER:
2053     p = url_encode_str_safe(urlp->p.gopher.selector, URL_PATH_UNSAFE);
2054     if(absolute)
2055       retv = tl_str_concat(NULL, prottable[urlp->type].typestr,
2056         urlp->p.gopher.host,
2057         (urlp->p.gopher.port ==
2058           prottable[urlp->type].default_port ? "" : portstr), NULL);
2059 
2060     retv = tl_str_concat(retv, "/", urlp->p.gopher.selector, NULL);
2061     _free(p);
2062     break;
2063   default:
2064     break;
2065   }
2066 
2067   return retv;
2068 }
2069 
2070 /********************************************************/
2071 /* z URL vrati adresu servera pre dokument              */
2072 /* FIXME: Translate me!                                 */
2073 /********************************************************/
url_get_site(url * urlr)2074 char *url_get_site(url * urlr)
2075 {
2076   switch (urlr->type)
2077   {
2078   case URLT_HTTP:
2079   case URLT_HTTPS:
2080     return urlr->p.http.host;
2081   case URLT_FTP:
2082   case URLT_FTPS:
2083     return urlr->p.ftp.host;
2084   case URLT_GOPHER:
2085     return urlr->p.gopher.host;
2086   default:
2087     return NULL;
2088   }
2089 }
2090 
url_get_port(url * urlr)2091 int url_get_port(url * urlr)
2092 {
2093   switch (urlr->type)
2094   {
2095   case URLT_HTTP:
2096   case URLT_HTTPS:
2097     return (int) urlr->p.http.port;
2098   case URLT_FTP:
2099   case URLT_FTPS:
2100     return (int) urlr->p.ftp.port;
2101   case URLT_GOPHER:
2102     return (int) urlr->p.gopher.port;
2103   default:
2104     return 0;
2105   }
2106 }
2107 
url_get_path(url * urlr)2108 char *url_get_path(url * urlr)
2109 {
2110   switch (urlr->type)
2111   {
2112   case URLT_HTTP:
2113   case URLT_HTTPS:
2114     return urlr->p.http.document;
2115   case URLT_FTP:
2116   case URLT_FTPS:
2117     return urlr->p.ftp.path;
2118   case URLT_GOPHER:
2119     return urlr->p.gopher.selector;
2120   case URLT_FILE:
2121     return urlr->p.file.filename;
2122   default:
2123     return NULL;
2124   }
2125 }
2126 
url_set_path(url * urlr,char * path)2127 void url_set_path(url * urlr, char *path)
2128 {
2129   switch (urlr->type)
2130   {
2131   case URLT_HTTP:
2132   case URLT_HTTPS:
2133     _free(urlr->p.http.document);
2134     urlr->p.http.document = tl_strdup(path);
2135     break;
2136   case URLT_FTP:
2137   case URLT_FTPS:
2138     _free(urlr->p.ftp.path);
2139     urlr->p.ftp.path = tl_strdup(path);
2140     break;
2141   case URLT_GOPHER:
2142     _free(urlr->p.gopher.selector);
2143     urlr->p.gopher.selector = tl_strdup(path);
2144     break;
2145   case URLT_FILE:
2146     _free(urlr->p.file.filename);
2147     urlr->p.file.filename = tl_strdup(path);
2148     break;
2149   default:
2150     return;
2151   }
2152   url_changed_filename(urlr);
2153 }
2154 
url_get_full_path(url * urlr)2155 char *url_get_full_path(url * urlr)
2156 {
2157   char *rv = NULL;
2158 
2159   switch (urlr->type)
2160   {
2161   case URLT_HTTP:
2162   case URLT_HTTPS:
2163     if(urlr->p.http.searchstr)
2164     {
2165       rv =
2166         tl_str_concat(NULL, urlr->p.http.document, "?",
2167         urlr->p.http.searchstr, NULL);
2168     }
2169     else
2170       rv = tl_strdup(urlr->p.http.document);
2171     break;
2172   default:
2173     rv = tl_strdup(url_get_path(urlr));
2174     break;
2175   }
2176 
2177   return rv;
2178 }
2179 
url_get_pass(url * urlr,char * realm)2180 char *url_get_pass(url * urlr, char *realm)
2181 {
2182   char *pass = NULL;
2183   authinfo *ai;
2184 
2185   switch (urlr->type)
2186   {
2187   case URLT_HTTP:
2188   case URLT_HTTPS:
2189     pass = urlr->p.http.password;
2190     break;
2191   case URLT_FTP:
2192   case URLT_FTPS:
2193     pass = urlr->p.ftp.password;
2194     break;
2195   default:
2196     return NULL;
2197   }
2198 
2199   if(!pass)
2200   {
2201     ai = authinfo_match_entry(urlr->type, url_get_site(urlr),
2202       url_get_port(urlr), url_get_path(urlr), realm);
2203     if(ai)
2204       pass = ai->pass;
2205   }
2206 
2207   if(!pass)
2208   {
2209     pass = priv_cfg.passwd_auth;
2210   }
2211 
2212   return pass;
2213 }
2214 
url_get_user(url * urlr,char * realm)2215 char *url_get_user(url * urlr, char *realm)
2216 {
2217   char *user = NULL;
2218   authinfo *ai;
2219 
2220   switch (urlr->type)
2221   {
2222   case URLT_HTTP:
2223   case URLT_HTTPS:
2224     user = urlr->p.http.user;
2225     break;
2226   case URLT_FTP:
2227   case URLT_FTPS:
2228     user = urlr->p.ftp.user;
2229     break;
2230   default:
2231     return NULL;
2232   }
2233 
2234   if(!user)
2235   {
2236     ai = authinfo_match_entry(urlr->type, url_get_site(urlr),
2237       url_get_port(urlr), url_get_path(urlr), realm);
2238     if(ai)
2239       user = ai->user;
2240   }
2241 
2242   if(!user)
2243   {
2244     user = priv_cfg.name_auth;
2245   }
2246 
2247   return user;
2248 }
2249 
url_get_auth_scheme(url * urlr,char * realm)2250 int url_get_auth_scheme(url * urlr, char *realm)
2251 {
2252   authinfo *ai;
2253   int scheme = cfg.auth_scheme;
2254 
2255   ai = authinfo_match_entry(urlr->type, url_get_site(urlr),
2256     url_get_port(urlr), url_get_path(urlr), realm);
2257   if(ai)
2258     scheme = ai->type;
2259 
2260   return scheme;
2261 }
2262 
url_get_anchor_name(url * urlp)2263 char *url_get_anchor_name(url * urlp)
2264 {
2265   char *anchor;
2266 
2267   switch (urlp->type)
2268   {
2269   case URLT_HTTP:
2270   case URLT_HTTPS:
2271     anchor = urlp->p.http.anchor_name;
2272     break;
2273   case URLT_FTP:
2274   case URLT_FTPS:
2275     anchor = urlp->p.ftp.anchor_name;
2276     break;
2277   case URLT_FILE:
2278     anchor = urlp->p.file.anchor_name;
2279     break;
2280   default:
2281     anchor = NULL;
2282     break;
2283   }
2284 
2285   return anchor;
2286 }
2287 
url_clear_anchor(url * urlp)2288 void url_clear_anchor(url * urlp)
2289 {
2290   switch (urlp->type)
2291   {
2292   case URLT_HTTP:
2293   case URLT_HTTPS:
2294     _free(urlp->p.http.anchor_name);
2295     break;
2296   case URLT_FTP:
2297   case URLT_FTPS:
2298     _free(urlp->p.ftp.anchor_name);
2299     break;
2300   case URLT_FILE:
2301     _free(urlp->p.file.anchor_name);
2302     break;
2303   default:
2304     break;
2305   }
2306 }
2307 
url_get_search_str(url * urlp)2308 char *url_get_search_str(url * urlp)
2309 {
2310   char *sstr;
2311 
2312   switch (urlp->type)
2313   {
2314   case URLT_HTTP:
2315   case URLT_HTTPS:
2316     sstr = urlp->p.http.searchstr;
2317     break;
2318   case URLT_FILE:
2319     sstr = urlp->p.file.searchstr;
2320     break;
2321   default:
2322     sstr = NULL;
2323     break;
2324   }
2325 
2326   return sstr;
2327 }
2328 
url_is_dir_index(url * urlp)2329 int url_is_dir_index(url * urlp)
2330 {
2331   return ((urlp->type == URLT_HTTP || urlp->type == URLT_HTTPS) &&
2332     tl_is_dirname(urlp->p.http.document)) ||
2333     ((urlp->type == URLT_FTP || urlp->type == URLT_FTPS) && urlp->p.ftp.dir);
2334 }
2335 
2336 /* Check if URL is on same site. Be careful not to disallow
2337    protocol changes like HTTP to HTTPS. */
url_is_same_site(url * urla,url * urlb)2338 int url_is_same_site(url * urla, url * urlb)
2339 {
2340   return
2341   /* (urla->type == urlb->type) &&
2342      (url_get_port(urla) == url_get_port(urlb)) && */
2343    !strcmp(url_get_site(urla), url_get_site(urlb));
2344 }
2345 
2346 /**************************************************/
2347 /* FIXME: Translate me                            */
2348 /* absolutna cesta k dokumentu z lokalneho stromu */
2349 /* ktory je referencovany relativne               */
2350 /**************************************************/
get_redirect_abs_path(url * rurl,char * fstr)2351 char *get_redirect_abs_path(url * rurl, char *fstr)
2352 {
2353   char *pom, *p, *p1;
2354 
2355   pom = tl_strdup(url_to_filename(rurl, TRUE));
2356   p = strrchr(pom, '/');
2357 
2358   p1 = realloc(pom, strlen(fstr) + (p - pom) + 2);
2359   strcpy(p1 + (p - pom) + 1, fstr);
2360 
2361   p = get_abs_file_path_oss(p1);
2362   free(p1);
2363 
2364   return p;
2365 }
2366 
url_path_abs(url * urlp)2367 void url_path_abs(url * urlp)
2368 {
2369   char *p;
2370 
2371   switch (urlp->type)
2372   {
2373   case URLT_HTTP:
2374   case URLT_HTTPS:
2375     p = get_abs_file_path(urlp->p.http.document);
2376     free(urlp->p.http.document);
2377     urlp->p.http.document = p;
2378     break;
2379   case URLT_FTP:
2380   case URLT_FTPS:
2381     p = get_abs_file_path(urlp->p.ftp.path);
2382     if(urlp->p.ftp.path[0] == '/' && urlp->p.ftp.path[1] == '/')
2383     {
2384       char *pp = tl_str_concat(NULL, "/", p, NULL);
2385       _free(p);
2386       p = pp;
2387     }
2388     free(urlp->p.ftp.path);
2389     urlp->p.ftp.path = p;
2390     break;
2391   case URLT_FILE:
2392     p = get_abs_file_path(urlp->p.file.filename);
2393     free(urlp->p.file.filename);
2394     urlp->p.file.filename = p;
2395     break;
2396   default:
2397     break;
2398   }
2399 }
2400 
filename_to_url(char * ifn)2401 url *filename_to_url(char *ifn)
2402 {
2403   int cdln = strlen(priv_cfg.cache_dir);
2404   bool_t isok = FALSE;
2405 
2406   if(*ifn != '/')
2407     return NULL;
2408 
2409   if(cfg.enable_info)
2410   {
2411     url *nurl = dinfo_get_url_for_filename(ifn);
2412 
2413     if(nurl)
2414       return nurl;
2415   }
2416 
2417   if(!strncmp(ifn, priv_cfg.cache_dir, cdln))
2418   {
2419     char *p;
2420     int i;
2421     url *nurl = _malloc(sizeof(url));
2422     char *fn = tl_strdup(ifn);
2423 
2424     p = fn + cdln;
2425     p += (*p == '/');
2426 
2427     if(!strcasecmp(tl_get_extension(fn), "css"))
2428       nurl->status = URL_STYLE;
2429     else
2430       nurl->status = 0;
2431 
2432     nurl->level = 0;
2433     nurl->parent_url = NULL;
2434     nurl->moved_to = NULL;
2435     nurl->extension = NULL;
2436     nurl->local_name = tl_is_dirname(ifn) ?
2437       tl_str_concat(NULL, ifn, priv_cfg.index_name, NULL) : tl_strdup(ifn);
2438 #ifdef HAVE_MT
2439     pthread_mutex_init(&nurl->lock, NULL);
2440 #endif
2441 
2442 #ifdef WITH_TREE
2443 #ifdef I_FACE
2444     nurl->prop = NULL;
2445     nurl->tree_nfo = NULL;
2446 #endif
2447 #endif
2448 
2449     if(cfg.base_level && cfg.default_prefix)
2450     {
2451       char *tfn, *pfn;
2452       url *purl = url_parse(priv_cfg.default_prefix);
2453       assert(purl->type != URLT_FROMPARENT);
2454 
2455       pfn = url_get_default_local_name_real(purl, FALSE);
2456       tfn = tl_str_concat(NULL, priv_cfg.cache_dir,
2457         tl_is_dirname(priv_cfg.cache_dir) ? "" : "/",
2458         pfn, tl_is_dirname(pfn) ? "" : "/", p, NULL);
2459       _free(pfn);
2460       _free(fn);
2461       fn = tfn;
2462 
2463       p = fn + cdln;
2464       p += (*p == '/');
2465 
2466       free_deep_url(purl);
2467     }
2468 
2469     for(i = 0; i < NUM_ELEM(prottable); i++)
2470     {
2471       if(prottable[i].dirname &&
2472         !strncmp(p, prottable[i].dirname,
2473           strlen(prottable[i].dirname)) &&
2474         p[strlen(prottable[i].dirname)] == '/')
2475       {
2476         isok = TRUE;
2477         break;
2478       }
2479     }
2480 
2481     if(isok)
2482     {
2483       char *p2, *p3;
2484 
2485       nurl->type = prottable[i].id;
2486       nurl->parent_url = NULL;
2487       p += strlen(prottable[i].dirname) + 1;
2488 
2489       if(!p)
2490       {
2491         free(nurl);
2492         free(fn);
2493         return NULL;
2494       }
2495 
2496       switch (nurl->type)
2497       {
2498       case URLT_HTTP:
2499       case URLT_HTTPS:
2500         nurl->p.http.password = NULL;
2501         nurl->p.http.user = NULL;
2502         nurl->p.http.anchor_name = NULL;
2503         nurl->p.http.searchstr = NULL;
2504         nurl->p.http.port = prottable[i].default_port;
2505         if((p2 = strchr(p, '/')))
2506         {
2507           int p2_len = strlen(p2);
2508           int idx_len = strlen(priv_cfg.index_name);
2509           char *query = NULL;
2510 
2511           if(idx_len <= p2_len &&
2512             !strcmp((p2 + p2_len - idx_len), priv_cfg.index_name) &&
2513             ((p2_len > idx_len && *(p2 + p2_len - idx_len - 1) == '/')
2514               || idx_len == p2_len))
2515           {
2516             *(p2 + p2_len - idx_len) = '\0';
2517           }
2518 
2519           /* for POST #query */
2520           p3 = strchr(p2, '#');
2521           if(p3)
2522           {
2523             form_info *fi;
2524 
2525             *p3 = '\0';
2526             query = p3 + 1;
2527 
2528             fi = _malloc(sizeof(form_info));
2529 
2530             fi->method = FORM_M_POST;
2531             fi->encoding = FORM_E_URLENCODED;
2532             fi->action = NULL;
2533             fi->text = NULL;
2534             fi->infos = form_parse_urlencoded_query(query);
2535             fi->parent_url = NULL;
2536 
2537             nurl->extension = fi;
2538             nurl->status |= URL_FORM_ACTION;
2539           }
2540 
2541           /* for query part of GET request URL */
2542           p3 = strchr(p2, '?');
2543           if(p3)
2544           {
2545             *p3 = '\0';
2546             nurl->p.http.searchstr = tl_strdup(p3 + 1);
2547           }
2548 
2549           nurl->p.http.document = tl_strdup(p2);
2550           *p2 = '\0';
2551           p2 = strrchr(p, '_');
2552           if(p2)
2553           {
2554             p2++;
2555             nurl->p.http.port = _atoi(p2);
2556             if(errno == ERANGE)
2557             {
2558               nurl->p.http.host = tl_strdup(p);
2559               nurl->p.http.port = prottable[i].default_port;
2560             }
2561             else
2562             {
2563               nurl->p.http.host = tl_strndup(p, p2 - p - 1);
2564             }
2565           }
2566           else
2567             nurl->p.http.host = tl_strdup(p);
2568         }
2569         else
2570         {
2571           free(nurl);
2572           free(fn);
2573           return NULL;
2574         }
2575         break;
2576       case URLT_GOPHER:
2577         nurl->p.gopher.port = prottable[i].default_port;
2578         if((p2 = strchr(p, '/')))
2579         {
2580           int p2_len = strlen(p2);
2581           int idx_len = strlen(priv_cfg.index_name);
2582 
2583           p2++;
2584 
2585           if(idx_len <= p2_len &&
2586             !strcmp((p2 + p2_len - idx_len), priv_cfg.index_name) &&
2587             ((p2_len > idx_len && *(p2 + p2_len - idx_len - 1) == '1')
2588               || idx_len == p2_len))
2589           {
2590             *(p2 + p2_len - idx_len) = '\0';
2591           }
2592           nurl->p.gopher.selector = tl_strdup(p2);
2593           *p2 = '\0';
2594           p2 = strrchr(p, '_');
2595           if(p2)
2596           {
2597             p2++;
2598             nurl->p.gopher.port = _atoi(p2);
2599             if(errno == ERANGE)
2600             {
2601               nurl->p.gopher.host = tl_strdup(p);
2602               nurl->p.gopher.port = prottable[i].default_port;
2603             }
2604             else
2605             {
2606               nurl->p.gopher.host = tl_strndup(p, p2 - p - 1);
2607             }
2608           }
2609           else
2610             nurl->p.gopher.host = tl_strdup(p);
2611         }
2612         else
2613         {
2614           free(nurl);
2615           free(fn);
2616           return NULL;
2617         }
2618         break;
2619       case URLT_FTP:
2620       case URLT_FTPS:
2621         nurl->p.ftp.port = prottable[i].default_port;
2622         nurl->p.ftp.password = NULL;
2623         nurl->p.ftp.user = NULL;
2624         nurl->p.ftp.dir = FALSE;
2625         nurl->p.ftp.anchor_name = NULL;
2626         if((p2 = strchr(p, '/')))
2627         {
2628           int p2_len = strlen(p2);
2629           int idx_len = strlen(priv_cfg.index_name);
2630 
2631           if(idx_len <= p2_len &&
2632             !strcmp((p2 + p2_len - idx_len), priv_cfg.index_name) &&
2633             ((p2_len > idx_len && *(p2 + p2_len - idx_len - 1) == '/')
2634               || idx_len == p2_len))
2635           {
2636             *(p2 + p2_len - idx_len) = '\0';
2637             nurl->p.ftp.dir = TRUE;
2638           }
2639           nurl->p.ftp.path = tl_strdup(p2);
2640           *p2 = '\0';
2641           p2 = strrchr(p, '_');
2642           if(p2)
2643           {
2644             p2++;
2645             nurl->p.ftp.port = _atoi(p2);
2646             if(errno == ERANGE)
2647             {
2648               nurl->p.ftp.host = tl_strdup(p);
2649               nurl->p.ftp.port = prottable[i].default_port;
2650             }
2651             else
2652             {
2653               nurl->p.ftp.host = tl_strndup(p, p2 - p - 1);
2654             }
2655           }
2656           else
2657             nurl->p.ftp.host = tl_strdup(p);
2658         }
2659         else
2660         {
2661           free(nurl);
2662           free(fn);
2663           return NULL;
2664         }
2665         break;
2666       default:
2667         free(nurl);
2668         nurl = NULL;
2669         break;
2670       }
2671       free(fn);
2672       return nurl;
2673     }
2674     free(nurl);
2675   }
2676   return NULL;
2677 }
2678 
2679 /****************************************/
2680 /* zisti ci bol dokument referencovany  */
2681 /* v predchadzajucich cykloch           */
2682 /* FIXME: Translate me!                 */
2683 /****************************************/
url_was_befor(url * urlp)2684 url *url_was_befor(url * urlp)
2685 {
2686   url *ret;
2687 
2688   if(!prottable[urlp->type].supported)
2689     return NULL;
2690 
2691   LOCK_CFG_URLHASH;
2692   ret = (url *) dlhash_find(cfg.url_hash_tbl, (dllist_t) urlp);
2693   UNLOCK_CFG_URLHASH;
2694 
2695   return ret;
2696 }
2697 
url_forget_filename(url * urlp)2698 void url_forget_filename(url * urlp)
2699 {
2700   if(cfg.enable_info && cfg.post_update)
2701     dinfo_remove(urlp->local_name);
2702   url_remove_from_file_hash_tab(urlp);
2703   _free(urlp->local_name);
2704 }
2705 
dllist_url_compare(dllist_t key1,dllist_t key2)2706 int dllist_url_compare(dllist_t key1, dllist_t key2)
2707 {
2708   return url_compare((url *) key1, (url *) key2);
2709 }
2710 
url_compare(url * u1,url * u2)2711 int url_compare(url * u1, url * u2)
2712 {
2713   int rv;
2714 
2715   if(u1->type != u2->type)
2716     return 0;
2717 
2718   switch (u1->type)
2719   {
2720   case URLT_HTTP:
2721   case URLT_HTTPS:
2722     if((rv = strcmp(u1->p.http.document, u2->p.http.document)))
2723       return !rv;
2724 
2725     if(u1->p.http.searchstr && u2->p.http.searchstr)
2726       rv = strcmp(u1->p.http.searchstr, u2->p.http.searchstr);
2727     else
2728       rv = u1->p.http.searchstr - u2->p.http.searchstr;
2729 
2730     if(rv)
2731       return !rv;
2732 
2733     if(u1->p.http.user && u2->p.http.user)
2734       rv = strcmp(u1->p.http.user, u2->p.http.user);
2735     else
2736       rv = u1->p.http.user - u2->p.http.user;
2737 
2738     if(rv)
2739       return !rv;
2740 
2741     if(u1->p.http.password && u2->p.http.password)
2742       rv = strcmp(u1->p.http.password, u2->p.http.password);
2743     else
2744       rv = u1->p.http.password - u2->p.http.password;
2745 
2746     if(rv)
2747       return !rv;
2748 
2749     if((rv = strcmp(u1->p.http.host, u2->p.http.host)))
2750       return !rv;
2751 
2752     if(u1->p.http.port != u2->p.http.port)
2753       return FALSE;
2754 
2755     if((u1->status & URL_FORM_ACTION) != (u2->status & URL_FORM_ACTION))
2756       return FALSE;
2757 
2758     if((u1->status & URL_FORM_ACTION) && (u2->status & URL_FORM_ACTION))
2759     {
2760       dllist *ptr;
2761       form_info *fi1 = (form_info *) u1->extension;
2762       form_info *fi2 = (form_info *) u2->extension;
2763 
2764       if(fi1->method != fi2->method)
2765         return FALSE;
2766       if(fi1->encoding != fi2->encoding)
2767         return FALSE;
2768 
2769       ptr = fi1->infos;
2770       while(ptr)
2771       {
2772         if(!dllist_find2(fi2->infos, ptr->data, form_field_compare))
2773           return FALSE;
2774         ptr = ptr->next;
2775       }
2776     }
2777 
2778     return TRUE;
2779     break;
2780   case URLT_FTP:
2781   case URLT_FTPS:
2782     if((rv = strcmp(u1->p.ftp.path, u2->p.ftp.path)))
2783       return !rv;
2784 
2785     if(u1->p.ftp.user && u2->p.ftp.user)
2786       rv = strcmp(u1->p.ftp.user, u2->p.ftp.user);
2787     else
2788       rv = u1->p.ftp.user - u2->p.ftp.user;
2789 
2790     if(rv)
2791       return !rv;
2792 
2793     if(u1->p.ftp.password && u2->p.ftp.password)
2794       rv = strcmp(u1->p.ftp.password, u2->p.ftp.password);
2795     else
2796       rv = u1->p.ftp.password - u2->p.ftp.password;
2797 
2798     if(rv)
2799       return !rv;
2800 
2801     if((rv = strcmp(u1->p.ftp.host, u2->p.ftp.host)))
2802       return !rv;
2803 
2804     return u1->p.ftp.port == u2->p.ftp.port;
2805     break;
2806   case URLT_GOPHER:
2807     if((rv = strcmp(u1->p.gopher.selector, u2->p.gopher.selector)))
2808       return !rv;
2809 
2810     if((rv = strcmp(u1->p.gopher.host, u2->p.gopher.host)))
2811       return !rv;
2812 
2813     return u1->p.gopher.port == u2->p.gopher.port;
2814     break;
2815   case URLT_FILE:
2816     if((rv = strcmp(u1->p.file.filename, u2->p.file.filename)))
2817       return !rv;
2818 
2819     if(u1->p.file.searchstr && u2->p.file.searchstr)
2820       rv = strcmp(u1->p.file.searchstr, u2->p.file.searchstr);
2821     else
2822       rv = u1->p.file.searchstr - u2->p.file.searchstr;
2823 
2824     return !rv;
2825     break;
2826   default:
2827     return 0;
2828   }
2829   return 0;
2830 }
2831 
url_info_new(char * urlstr)2832 url_info *url_info_new(char *urlstr)
2833 {
2834   url_info *ui;
2835 
2836   ui = _malloc(sizeof(url_info));
2837   ui->urlstr = tl_strdup(urlstr);
2838   ui->type = URLI_NORMAL;
2839   ui->fields = NULL;
2840   ui->encoding = FORM_E_UNKNOWN;
2841   ui->method = FORM_M_GET;
2842   ui->localname = NULL;
2843 
2844   return ui;
2845 }
2846 
url_info_free(url_info * ui)2847 void url_info_free(url_info * ui)
2848 {
2849   dllist *ptr;
2850 
2851   if(!ui)
2852     return;
2853 
2854   _free(ui->urlstr);
2855 
2856   if(ui->type == URLI_FORM)
2857   {
2858     for(ptr = ui->fields; ptr; ptr = dllist_remove_entry(ptr, ptr))
2859     {
2860       form_field *fi = (form_field *) ptr->data;
2861 
2862       _free(fi->name);
2863       _free(fi->value);
2864       _free(fi);
2865     }
2866   }
2867   _free(ui->localname);
2868   _free(ui);
2869 }
2870 
2871 static const struct
2872 {
2873   enum
2874   {
2875     _RQF_URL,
2876     _RQF_METHOD,
2877     _RQF_ENCODING,
2878     _RQF_FIELD,
2879     _RQF_FILE,
2880     _RQF_LOCALNAME
2881   } type;
2882   char *str;
2883 } _request_fields[] =
2884 {
2885   {_RQF_URL, "URL:"},
2886   {_RQF_METHOD, "METHOD:"},
2887   {_RQF_ENCODING, "ENCODING:"},
2888   {_RQF_FIELD, "FIELD:"},
2889   {_RQF_FILE, "FILE:"},
2890   {_RQF_LOCALNAME, "LNAME:"}
2891 };
2892 
url_info_parse(char * str)2893 url_info *url_info_parse(char *str)
2894 {
2895   url_info *ui;
2896   char *p, *tp;
2897   int l = 0;
2898   bool_t err = FALSE;
2899   bool_t found = FALSE;
2900   int i;
2901 
2902   ui = url_info_new(NULL);
2903   ui->type = URLI_FORM;
2904 
2905   p = str;
2906   while(!err && *p)
2907   {
2908     p += strspn(p, " \t");
2909 
2910     found = FALSE;
2911     for(i = 0; i < NUM_ELEM(_request_fields); i++)
2912     {
2913       if(!strncasecmp(p, _request_fields[i].str,
2914           strlen(_request_fields[i].str)))
2915       {
2916         found = TRUE;
2917         p += strlen(_request_fields[i].str);
2918         if(*p == '\"')
2919         {
2920           p++;
2921           l = strcspn(p, "\"");
2922         }
2923         else
2924           l = strcspn(p, " \t");
2925         if(!l)
2926           err = TRUE;
2927 
2928         break;
2929       }
2930     }
2931     if(err || !found)
2932     {
2933       err = TRUE;
2934       break;
2935     }
2936     switch (_request_fields[i].type)
2937     {
2938     case _RQF_URL:
2939       {
2940         url *urlp;
2941 
2942         ui->urlstr = tl_strndup(p, l);
2943         urlp = url_parse(ui->urlstr);
2944         assert(urlp->type != URLT_FROMPARENT);
2945         _free(ui->urlstr);
2946         ui->urlstr = url_to_urlstr(urlp, FALSE);
2947         free_deep_url(urlp);
2948         _free(urlp);
2949       }
2950       break;
2951     case _RQF_LOCALNAME:
2952       {
2953         char *tmp = tl_strndup(p, l);
2954 
2955         ui->localname = get_abs_file_path_oss(tmp);
2956         _free(tmp);
2957       }
2958       break;
2959     case _RQF_METHOD:
2960       if(!strncasecmp(p, "GET", l))
2961         ui->method = FORM_M_GET;
2962       else if(!strncasecmp(p, "POST", l))
2963         ui->method = FORM_M_POST;
2964       else
2965         err = TRUE;
2966       break;
2967     case _RQF_ENCODING:
2968       if(!strncasecmp(p, "m", l))
2969         ui->encoding = FORM_E_MULTIPART;
2970       else if(!strncasecmp(p, "u", l))
2971         ui->encoding = FORM_E_URLENCODED;
2972       else
2973         err = TRUE;
2974       break;
2975     case _RQF_FIELD:
2976     case _RQF_FILE:
2977       {
2978         form_field *fi;
2979 
2980         fi = _malloc(sizeof(form_field));
2981 
2982         fi->name = NULL;
2983         fi->value = NULL;
2984 
2985         fi->type = (_request_fields[i].type == _RQF_FILE) ?
2986           FORM_T_FILE : FORM_T_TEXT;
2987 
2988         tp = strchr(p, '=');
2989 
2990         if(!tp || (tp - p) > l)
2991           err = TRUE;
2992         else
2993         {
2994           fi->name = form_decode_urlencoded_str(p, tp - p);
2995           fi->value = form_decode_urlencoded_str(tp + 1, l - (tp - p + 1));
2996           if(fi->type == FORM_T_TEXT && strchr(fi->value, '\n'))
2997             fi->type = FORM_T_TEXTAREA;
2998         }
2999         if(err || !fi->name || !fi->value)
3000         {
3001           _free(fi->value);
3002           _free(fi->name);
3003           _free(fi);
3004         }
3005         else
3006           ui->fields = dllist_append(ui->fields, (dllist_t) fi);
3007       }
3008       break;
3009     }
3010     p += l;
3011     p += *p == '\"';
3012   }
3013 
3014   if(!err)
3015   {
3016     if(!ui->urlstr)
3017     {
3018       xprintf(1, gettext("Missing specification of URL in request\n"));
3019       err = TRUE;
3020     }
3021 
3022 #if 0                           /* sometimes we need also empty forms */
3023     if(!ui->fields && ui->method == FORM_M_GET)
3024       ui->type = URLI_NORMAL;
3025     else if(!ui->fields)
3026     {
3027       xprintf(1,
3028         gettext("Missing request fields specification for POST request\n"));
3029       err = TRUE;
3030     }
3031 #endif
3032 
3033     if(ui->method == FORM_M_GET && ui->encoding == FORM_E_MULTIPART)
3034     {
3035       xprintf(1,
3036         gettext("Multipart encoding not supported with GET requests\n"));
3037       err = TRUE;
3038     }
3039   }
3040 
3041   if(err)
3042   {
3043     url_info_free(ui);
3044     ui = NULL;
3045   }
3046 
3047   return ui;
3048 }
3049 
url_info_dump(url_info * ui)3050 char *url_info_dump(url_info * ui)
3051 {
3052   char *retv = NULL;
3053 
3054   retv = tl_str_concat(retv, "URL:\"", ui->urlstr, "\" ", NULL);
3055 
3056   if(ui->localname)
3057     retv = tl_str_concat(retv, "LNAME:\"", ui->localname, "\" ", NULL);
3058 
3059   if(ui->type == URLI_FORM)
3060   {
3061     dllist *ptr;
3062 
3063     if(ui->method == FORM_M_GET)
3064       retv = tl_str_append(retv, "METHOD:GET ");
3065     else if(ui->method == FORM_M_POST)
3066       retv = tl_str_append(retv, "METHOD:POST ");
3067 
3068     if(ui->encoding == FORM_E_URLENCODED)
3069       retv = tl_str_append(retv, "ENCODING:u ");
3070     if(ui->encoding == FORM_E_MULTIPART)
3071       retv = tl_str_append(retv, "ENCODING:m ");
3072 
3073     ptr = ui->fields;
3074     while(ptr)
3075     {
3076       char *n, *v;
3077       form_field *ff = (form_field *) ptr->data;
3078 
3079       n = form_encode_urlencoded_str(ff->name);
3080       v = form_encode_urlencoded_str(ff->value);
3081 
3082       if(ff->type == FORM_T_FILE)
3083         retv = tl_str_concat(retv, "FILE:\"", n, "=", v, "\" ", NULL);
3084       else
3085         retv = tl_str_concat(retv, "FIELD:\"", n, "=", v, "\" ", NULL);
3086 
3087       _free(n);
3088       _free(v);
3089       ptr = ptr->next;
3090     }
3091   }
3092 
3093   return retv;
3094 }
3095 
url_info_duplicate(url_info * ui)3096 url_info *url_info_duplicate(url_info * ui)
3097 {
3098   url_info *cui;
3099   dllist *ptr;
3100 
3101   cui = url_info_new(ui->urlstr);
3102   if(ui->localname)
3103     cui->localname = tl_strdup(ui->localname);
3104   cui->method = ui->method;
3105   cui->encoding = ui->encoding;
3106   cui->type = ui->type;
3107 
3108   ptr = ui->fields;
3109   while(ptr)
3110   {
3111     form_field *ff = (form_field *) ptr->data;
3112     form_field *cff = (form_field *) _malloc(sizeof(form_field));
3113 
3114     cff->type = ff->type;
3115     cff->name = tl_strdup(ff->name);
3116     cff->value = tl_strdup(ff->value);
3117 
3118     cui->fields = dllist_append(cui->fields, (dllist_t) cff);
3119 
3120     ptr = ptr->next;
3121   }
3122   return cui;
3123 }
3124