1 /***************************************************************************/
2 /*    This code is part of WWW grabber called pavuk                        */
3 /*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          */
4 /*    Distributed under GPL 2 or later                                     */
5 /***************************************************************************/
6 
7 #include "config.h"
8 
9 #include <assert.h>
10 #include <string.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <errno.h>
14 #include <netdb.h>
15 #include <errno.h>
16 #include <unistd.h>
17 #include <sys/socket.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <dirent.h>
21 #include <limits.h>
22 #include <time.h>
23 #include <signal.h>
24 
25 #include "url.h"
26 #include "doc.h"
27 #include "tools.h"
28 #include "html.h"
29 #include "http.h"
30 #include "ftp.h"
31 #include "myssl.h"
32 #include "abstract.h"
33 #include "recurse.h"
34 #include "mime.h"
35 #include "robots.h"
36 #include "mode.h"
37 #include "times.h"
38 #include "stats.h"
39 #include "errcode.h"
40 #include "cookie.h"
41 #include "log.h"
42 #include "gui_api.h"
43 #include "form.h"
44 #include "ainterface.h"
45 #include "gcinfo.h"
46 
47 static void dump_ftp_list(dllist *);
48 static void dump_urls_list(dllist *);
49 
50 #define SETNEXTURL  doc_cleanup(docu); \
51       _free(pstr); \
52                         return docu->errcode;
53 
54 #ifdef HAVE_MT
_sigintthr(int nr)55 static void _sigintthr(int nr)
56 {
57 #ifdef I_FACE
58   if(!cfg.processing)
59   {
60     exit(0);
61   }
62 #endif
63 
64   errno = EINTR;
65   cfg.stop = TRUE;
66   cfg.rbreak = TRUE;
67 }
68 
_sigquitthr(int nr)69 static void _sigquitthr(int nr)
70 {
71   pthread_exit(NULL);
72 }
73 #endif
74 
reschedule_url(url * urlp)75 static void reschedule_url(url * urlp)
76 {
77   DEBUG_MISC(gettext("Rescheduling locked URL as no. %d\n"), cfg.total_cnt);
78   LOCK_CFG_URLSTACK;
79   cfg.urlstack = dllist_append(cfg.urlstack, (dllist_t) urlp);
80 #ifdef HAVE_MT
81   mt_semaphore_up(&cfg.urlstack_sem);
82 #endif
83   cfg.total_cnt++;
84   UNLOCK_CFG_URLSTACK;
85 }
86 
run_post_command(doc * docp)87 static void run_post_command(doc * docp)
88 {
89   char *urlstr;
90   char *cmd;
91 
92   DEBUG_MISC(gettext("Running post-processing command\n"));
93   urlstr = url_to_urlstr(docp->doc_url, TRUE);
94 
95   cmd = tl_str_concat(NULL, priv_cfg.post_cmd, " \'",
96     url_to_filename(docp->doc_url, FALSE),
97     docp->is_parsable ? "\' 1 \'" : "\' 0 \'", urlstr, "\'", NULL);
98 
99   _free(urlstr);
100 
101   tl_system(cmd);
102 
103   _free(cmd);
104 }
105 
add_matching_form(doc * docp,int nform,url_info * ui)106 static void add_matching_form(doc * docp, int nform, url_info * ui)
107 {
108   char *ftext;
109   int flen;
110   form_info *fi;
111   dllist *ptr, *fields, *sfields;
112   url_info *nui;
113 
114   if(!(ftext = form_get_text(nform, docp->contents, docp->size, &flen)))
115   {
116     return;
117   }
118 
119   fi = form_parse(ftext, flen);
120 
121   if(!fi)
122     return;
123 
124   /* copy all fields supplied on cmdln */
125   fields = NULL;
126   for(ptr = ui->fields; ptr; ptr = ptr->next)
127   {
128     fields = dllist_prepend(fields, (dllist_t)
129       form_field_duplicate((form_field *) ptr->data));
130   }
131 
132   /* copy all suitable fields from HTML form */
133   sfields = NULL;
134   form_get_default_successful(NULL, fi->infos, &sfields);
135 
136   for(; sfields; sfields = dllist_remove_entry(sfields, sfields))
137   {
138     form_field *ff = (form_field *) sfields->data;
139 
140     if(dllist_find2(fields, (dllist_t) ff, form_field_compare_name))
141     {
142       _free(ff->name);
143       _free(ff->value);
144       _free(ff);
145     }
146     else
147     {
148       fields = dllist_prepend(fields, (dllist_t) ff);
149     }
150   }
151 
152   nui = url_info_new(fi->action);
153   nui->type = URLI_FORM;
154   nui->fields = fields;
155   nui->encoding = fi->encoding;
156   nui->method = fi->method;
157   nui->localname = tl_strdup(ui->localname);
158 
159   form_free(fi);
160 
161   append_starting_url(nui, docp->doc_url);
162 
163   url_info_free(nui);
164 }
165 
add_matching_forms(doc * docp,dllist * formlist)166 static void add_matching_forms(doc * docp, dllist * formlist)
167 {
168   dllist *fptr, *uptr;
169   int nform;
170 
171   for(fptr = formlist, nform = 0; fptr; fptr = fptr->next, nform++)
172   {
173     url *urlp;
174 
175     urlp = url_parse((char *) fptr->data);
176     assert(urlp->type != URLT_FROMPARENT);
177 
178 
179     if((urlp->type != URLT_HTTP) && (urlp->type != URLT_HTTPS))
180     {
181       free_deep_url(urlp);
182       _free(urlp);
183       continue;
184     }
185     free_deep_url(urlp);
186     _free(urlp);
187 
188     for(uptr = priv_cfg.formdata; uptr; uptr = uptr->next)
189     {
190       url_info *ui = (url_info *) uptr->data;
191 
192       if(!strcmp(ui->urlstr, (char *) fptr->data))
193       {
194         add_matching_form(docp, nform, ui);
195       }
196     }
197   }
198 }
199 
process_document(doc * docu,int check_lim)200 int process_document(doc * docu, int check_lim)
201 {
202   url *urlr;
203   int nreget = 0, nredir = 0, pokus = 0;
204   time_t atm;
205   char cpom[64];
206   char *pstr = NULL;
207   int store_stat;
208   struct stat estat;
209 
210   urlr = docu->doc_url;
211 
212   docu->check_limits = check_lim;
213 
214   _Xt_Serve;
215 
216   if(docu->check_limits)
217     docu->check_limits = (urlr->parent_url != NULL);
218 
219   while(!cfg.stop && !cfg.rbreak)
220   {
221     _free(docu->ftp_pasv_host);
222     docu->errcode = ERR_NOERROR;
223     docu->mime = NULL;
224     docu->type_str = NULL;
225     docu->doc_url = urlr;
226     docu->dtime = 0L;
227     docu->contents = NULL;
228     docu->is_chunked = FALSE;
229     docu->read_chunksize = FALSE;
230     docu->read_trailer = FALSE;
231     docu->ftp_fatal_err = FALSE;
232     pstr = url_to_urlstr(urlr, FALSE);
233 
234     if(pokus)
235       xprintf(1, gettext("retry no. %d\n"), pokus);
236 
237 #ifdef HAVE_MT
238     xprintf(1, gettext("URL[%2d]: %5d(%d) of %5d  %s\n"), docu->threadnr + 1,
239       docu->doc_nr, cfg.fail_cnt, cfg.total_cnt, pstr);
240 #else
241     xprintf(1, gettext("URL: %5d(%d) of %5d  %s\n"), docu->doc_nr,
242       cfg.fail_cnt, cfg.total_cnt, pstr);
243 #endif
244 
245 #ifdef I_FACE
246     if(cfg.xi_face)
247     {
248       gui_set_doccounter();
249 
250       gui_set_url(pstr);
251 
252       gui_set_status(gettext("Starting download"));
253     }
254 #endif
255     /*** to be able to revisit moved documents ***/
256     /*** especially for authorization purposes ***/
257     if((urlr->status & URL_PROCESSED) && urlr->moved_to && nredir)
258     {
259       urlr->status &= ~URL_PROCESSED;
260     }
261 
262     if(docu->check_limits)
263     {
264       cond_info_t condp;
265 
266       condp.level = 2;
267       condp.urlnr = docu->doc_nr;
268       condp.size = 0;
269       condp.time = 0L;
270       condp.mimet = NULL;
271       condp.full_tag = NULL;
272       condp.params = NULL;
273       condp.html_doc = NULL;
274       condp.html_doc_offset = 0;
275       condp.tag = NULL;
276       condp.attrib = NULL;
277 
278       if(urlr->status & URL_PROCESSED)
279       {
280         xprintf(1, gettext("Already processed\n"));
281         docu->errcode = ERR_PROCESSED;
282         SETNEXTURL;
283       }
284 
285       if(urlr->status & URL_USER_DISABLED)
286       {
287         xprintf(1, gettext("Disallowed by user\n"));
288         docu->errcode = ERR_UDISABLED;
289         SETNEXTURL;
290       }
291 
292       if(!prottable[urlr->type].supported || (urlr->parent_url
293       && (urlr->type == URLT_FTP || urlr->type == URLT_FTPS)
294       && urlr->p.ftp.dir && !cfg.condition.ftpdir)
295       || (urlr->parent_url && !url_append_condition(urlr, &condp)))
296       {
297         xprintf(1, gettext("Disallowed by rules\n"));
298 
299         urlr->status |= URL_REJECTED;
300         docu->errcode = ERR_RDISABLED;
301         SETNEXTURL;
302       }
303 
304       gui_set_status(gettext("Checking \"robots.txt\""));
305 
306       if(!robots_check(urlr))
307       {
308         xprintf(1, gettext("Disallowed by \"robots.txt\"\n"));
309         urlr->status |= URL_REJECTED;
310         docu->errcode = ERR_RDISABLED;
311         SETNEXTURL;
312       }
313     }
314 
315     if(cfg.mode == MODE_FTPDIR &&
316       (urlr->type != URLT_FTP && urlr->type != URLT_FTPS))
317     {
318       xprintf(1,
319         gettext("This URL type is not supported with ftpdir mode\n"));
320 
321       urlr->status |= URL_REJECTED;
322       docu->errcode = ERR_RDISABLED;
323       SETNEXTURL;
324     }
325 
326     _Xt_Serve;
327 
328     if(cfg.mode == MODE_SYNC)
329     {
330       char *pp = url_to_filename(urlr, TRUE);
331 
332       if(!stat(pp, &estat) && !S_ISDIR(estat.st_mode))
333       {
334         atm = time(NULL) - 86400 * cfg.ddays;
335         /*
336            pro: We do not want the message
337            "No transfer - file not expired"
338            if the server's clock is ahead of our clock.
339            If no parameter cfg.ddays is given, then
340            we do not compare the file modification times.
341          */
342         if(cfg.ddays == 0 || estat.st_mtime < atm)
343           docu->dtime = estat.st_mtime;
344         else
345         {
346           xprintf(1, gettext("No transfer - file not expired\n"));
347           urlr->status |= URL_REJECTED;
348           docu->errcode = ERR_RDISABLED;
349           SETNEXTURL;
350         }
351         urlr->status |= URL_ISLOCAL;
352         docu->origsize = estat.st_size;
353       }
354     }
355 
356     if(cfg.show_time)
357     {
358       atm = time(NULL);
359       LOCK_TIME;
360       strftime(cpom, sizeof(cpom), "%H:%M:%S", localtime(&atm));
361       UNLOCK_TIME;
362       xprintf(1, gettext("Starting time :  %s\n"), cpom);
363     }
364 
365 #ifdef I_FACE
366     if(cfg.stop || cfg.rbreak)
367     {
368       _free(pstr);
369       break;
370     }
371 #endif
372 
373     _Xt_Serve;
374 
375     if((urlr->type == URLT_FTP || urlr->type == URLT_FTP)
376       && urlr->extension &&
377       ((ftp_url_extension *) urlr->extension)->type == FTP_TYPE_L &&
378       ((ftp_url_extension *) urlr->extension)->slink)
379     {
380       if(cfg.retrieve_slink)
381       {
382         /** need to kill extension, because we must **/
383         /** guess the file type beside the symlink  **/
384         ftp_url_ext_free(urlr->extension);
385         urlr->extension = NULL;
386       }
387       else
388       {
389         ftp_make_symlink(urlr);
390         urlr->status |= URL_PROCESSED;
391         docu->errcode = ERR_NOERROR;
392         SETNEXTURL;
393       }
394     }
395 
396     gui_set_status(gettext("Starting download"));
397 
398     if(doc_download(docu, FALSE, FALSE))
399     {
400       if(cfg.show_time)
401       {
402         atm = time(NULL);
403         LOCK_TIME;
404         strftime(cpom, sizeof(cpom), "%H:%M:%S", localtime(&atm));
405         UNLOCK_TIME;
406         xprintf(1, gettext("Ending time :    %s\n"), cpom);
407       }
408 
409       _Xt_Serve;
410       doc_remove_lock(docu);
411       _free(docu->contents);
412 
413       report_error(docu, gettext("download"));
414       DEBUG_USER("Error status code - (%d)\n");
415 
416       if((nreget < cfg.nreget &&
417           (docu->errcode == ERR_HTTP_TRUNC ||
418             docu->errcode == ERR_FTP_TRUNC ||
419             docu->errcode == ERR_LOW_TRANSFER_RATE ||
420             docu->errcode == ERR_HTTP_FAILREGET ||
421             docu->errcode == ERR_HTTP_TIMEOUT ||
422             docu->errcode == ERR_HTTP_GW_TIMEOUT)) ||
423         (nredir < cfg.nredir &&
424           docu->errcode == ERR_HTTP_REDIR) ||
425         (docu->errcode == ERR_HTTP_AUTH) ||
426         (docu->errcode == ERR_HTTP_PROXY_AUTH))
427       {
428         if(docu->errcode == ERR_HTTP_REDIR)
429         {
430           urlr->status |= URL_PROCESSED;
431           if((urlr->moved_to->status & URL_PROCESSED) &&
432             (!urlr->moved_to->moved_to))
433           {
434             SETNEXTURL;
435           }
436           else
437           {
438 #ifdef I_FACE
439             if(cfg.xi_face)
440               gui_tree_set_icon_for_doc(docu);
441 #endif
442             urlr = urlr->moved_to;
443           }
444         }
445 
446         if(docu->errcode == ERR_HTTP_TRUNC)
447         {
448           urlr->status |= URL_TRUNCATED;
449           _free(docu->etag);
450 
451           docu->etag = get_mime_param_val_str("ETag:", docu->mime);
452           if(!docu->etag)
453             docu->etag =
454               get_mime_param_val_str("Content-Location:", docu->mime);
455           if(!docu->etag)
456             docu->etag = get_mime_param_val_str("Last-Modified", docu->mime);
457         }
458 
459         if(docu->errcode == ERR_HTTP_AUTH)
460         {
461           docu->doc_url->status |= URL_PROCESSED;
462           docu->doc_url->status |= URL_ERR_REC;
463           SETNEXTURL;
464         }
465 
466         if(docu->errcode == ERR_HTTP_PROXY_AUTH)
467         {
468           docu->doc_url->status |= URL_PROCESSED;
469           docu->doc_url->status |= URL_ERR_REC;
470           SETNEXTURL;
471         }
472 
473         _free(docu->mime);
474         _free(docu->type_str);
475 
476         nreget += (docu->errcode == ERR_HTTP_TRUNC ||
477           docu->errcode == ERR_FTP_TRUNC) && cfg.mode != MODE_SREGET;
478         nredir += (docu->errcode == ERR_HTTP_REDIR);
479         _free(pstr);
480         continue;
481       }
482 
483       if(docu->errcode == ERR_FTP_UNKNOWN ||
484         docu->errcode == ERR_FTP_CONNECT ||
485         docu->errcode == ERR_FTP_DATACON ||
486         docu->errcode == ERR_FTPS_CONNECT ||
487         docu->errcode == ERR_FTPS_DATASSLCONNECT ||
488         docu->errcode == ERR_HTTP_UNKNOWN ||
489         docu->errcode == ERR_HTTP_CONNECT ||
490         docu->errcode == ERR_HTTP_SNDREQ ||
491         docu->errcode == ERR_HTTP_SNDREQDATA ||
492         docu->errcode == ERR_HTTP_RCVRESP ||
493         docu->errcode == ERR_HTTP_SERV ||
494         docu->errcode == ERR_HTTP_TIMEOUT ||
495         docu->errcode == ERR_HTTP_PROXY_CONN ||
496         docu->errcode == ERR_HTTPS_CONNECT ||
497         docu->errcode == ERR_READ ||
498         docu->errcode == ERR_ZERO_SIZE ||
499         docu->errcode == ERR_GOPHER_CONNECT ||
500         docu->errcode == ERR_PROXY_CONNECT || docu->errcode == ERR_HTTP_SERV)
501       {
502         urlr->status |= URL_ERR_REC;
503         pokus++;
504         /*** retry only when allowed ***/
505         if(pokus >= cfg.nretry)
506         {
507           urlr->status |= URL_PROCESSED;
508           SETNEXTURL;
509         }
510         _free(pstr);
511         _free(docu->mime);
512         _free(docu->type_str);
513         continue;
514       }
515       else if(docu->errcode == ERR_LOCKED)
516       {
517         if(!cfg.urlstack)
518         {
519           xprintf(1,
520             gettext("last document locked -> sleeping for 5 seconds\n"));
521           tl_sleep(5);
522         }
523         reschedule_url(urlr);
524         SETNEXTURL;
525       }
526       else if(docu->errcode == ERR_BIGGER ||
527         docu->errcode == ERR_SMALLER ||
528         docu->errcode == ERR_NOMIMET ||
529         docu->errcode == ERR_OUTTIME || docu->errcode == ERR_SCRIPT_DISABLED)
530       {
531         urlr->status |= URL_PROCESSED;
532         urlr->status |= URL_ERR_REC;
533         SETNEXTURL;
534       }
535       else
536       {
537         /*** remove improper documents if required ***/
538         if((cfg.remove_old &&
539             (cfg.mode == MODE_SYNC ||
540               cfg.mode == MODE_MIRROR)) &&
541           (((docu->errcode == ERR_FTP_GET ||
542                 docu->errcode == ERR_FTP_BDIR ||
543                 docu->errcode == ERR_FTP_NODIR) &&
544               docu->ftp_respc == 550) ||
545             docu->errcode == ERR_HTTP_NFOUND ||
546             docu->errcode == ERR_HTTP_GONE))
547         {
548           doc_remove(docu->doc_url);
549         }
550 
551         urlr->status |= URL_ERR_UNREC;
552         urlr->status |= URL_PROCESSED;
553 
554         SETNEXTURL;
555       }
556     }
557 
558     _Xt_Serve;
559 
560     if(urlr->status & URL_TRUNCATED)
561       urlr->status &= ~URL_TRUNCATED;
562 
563     if(urlr->status & URL_ERR_REC)
564       urlr->status &= ~URL_ERR_REC;
565 
566     if(cfg.show_time)
567     {
568       atm = time(NULL);
569       LOCK_TIME;
570       strftime(cpom, sizeof(cpom), "%H:%M:%S", localtime(&atm));
571       UNLOCK_TIME;
572       xprintf(1, gettext("Ending time :    %s\n"), cpom);
573     }
574 
575     report_error(docu, gettext("download"));
576 
577     _Xt_Serve;
578 
579     if(docu->contents)
580     {
581       if(docu->is_parsable)
582       {
583         dllist *formlist = NULL;
584         dllist *urls;
585 
586         gui_set_status(gettext("Relocating and scanning HTML document"));
587 
588         urls =
589           html_process_document(docu, priv_cfg.formdata ? &formlist : NULL);
590 
591         _Xt_Serve;
592 
593         if(urls && cfg.dump_urlfd >= 0)
594         {
595           dump_urls_list(urls);
596         }
597 
598         if(priv_cfg.formdata && formlist)
599         {
600           add_matching_forms(docu, formlist);
601           while(formlist)
602           {
603             if(formlist->data) free((void *) formlist->data);
604             formlist = dllist_remove_entry(formlist, formlist);
605           }
606         }
607 
608         if(cfg.mode != MODE_SREGET &&
609           cfg.mode != MODE_FTPDIR && !(docu->doc_url->status & URL_NORECURSE))
610         {
611           gui_tree_add_start();
612           cat_links_to_url_list(urls);
613           gui_tree_add_end();
614         }
615         else if(cfg.mode == MODE_FTPDIR)
616         {
617           dump_ftp_list(urls);
618         }
619         else
620         {
621           for(; urls; urls = dllist_remove_entry(urls, urls))
622           {
623             free_deep_url((url *) urls->data);
624             if(urls->data) free((url *)urls->data);
625           }
626         }
627 
628         _Xt_Serve;
629       }
630 
631       store_stat = 0;
632 
633       if(cfg.dumpfd >= 0 && cfg.dump_after)
634       {
635         bufio *fd;
636 
637         gui_set_status(gettext("Dumping processed document"));
638         LOCK_DUMPFD;
639         fd = bufio_dupfd(cfg.dumpfd);
640 
641         if(docu->mime && cfg.dump_resp)
642           bufio_write(fd, docu->mime, strlen(docu->mime));
643 
644         bufio_write(fd, docu->contents, docu->size);
645 
646         bufio_close(fd);
647         UNLOCK_DUMPFD;
648       }
649       else if((docu->doc_url->type != URLT_FILE) &&
650         !(docu->doc_url->status & URL_REDIRECT) &&
651         (docu->errcode != ERR_HTTP_ACTUAL) &&
652         (docu->errcode != ERR_FTP_ACTUAL) &&
653         (cfg.mode != MODE_NOSTORE) &&
654         (cfg.dumpfd < 0) && (cfg.mode != MODE_FTPDIR))
655       {
656         gui_set_status(gettext("Storing document"));
657 
658         store_stat = doc_store(docu, TRUE);
659 
660         if(store_stat)
661         {
662           xprintf(1, gettext("Store failed\n"));
663           urlr->status &= ~URL_ERR_REC;
664         }
665       }
666 
667       _Xt_Serve;
668 
669       if(priv_cfg.post_cmd)
670         run_post_command(docu);
671 
672       doc_remove_lock(docu);
673 
674       doc_update_parent_links(docu);
675     }
676     else
677     {
678       if(priv_cfg.post_cmd)
679         run_post_command(docu);
680 
681       doc_remove_lock(docu);
682 
683       doc_update_parent_links(docu);
684     }
685 
686     urlr->status |= URL_DOWNLOADED;
687     urlr->status |= URL_PROCESSED;
688     SETNEXTURL;
689   }
690   return ERR_UNKNOWN;
691 }
692 
693 #ifdef I_FACE
download_single_doc(url * urlp)694 int download_single_doc(url * urlp)
695 {
696   int rv;
697   doc docu;
698   global_connection_info con_info;
699 #if defined(HAVE_MT) && defined(I_FACE)
700   _config_struct_priv_t privcfg;
701 
702 #if defined (__OSF__) || defined (__osf__)
703 #define __builtin_try
704 #define __builtin_finally
705 #endif
706 
707   privcfg_make_copy(&privcfg);
708   pthread_setspecific(cfg.privcfg_key, (void *) (&privcfg));
709   pthread_cleanup_push((void *) privcfg_free, (void *) (&privcfg));
710 #endif
711 
712   gui_start_download(FALSE);
713 
714 #ifdef HAVE_MT
715   {
716     sigset_t smask;
717 
718     sigemptyset(&smask);
719     sigaddset(&smask, SIGINT);
720     sigaddset(&smask, SIGQUIT);
721     pthread_sigmask(SIG_UNBLOCK, &smask, NULL);
722 
723     signal(SIGINT, _sigintthr);
724     signal(SIGQUIT, _sigquitthr);
725   }
726 
727   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
728   pthread_setspecific(cfg.thrnr_key, (void *) 0);
729   DEBUG_MTTHR("starting thread(%ld) %d\n", pthread_self(), 0);
730 
731   cfg.allthreadsnr = 0;
732   gui_mt_thread_start(cfg.allthreadsnr);
733 #endif
734 
735   cfg.rbreak = FALSE;
736   cfg.stop = FALSE;
737   cfg.processing = TRUE;
738 
739   doc_init(&docu, urlp);
740 #ifdef HAVE_MT
741   docu.threadnr = 0;
742   pthread_setspecific(cfg.currdoc_key, (void *) NULL);
743   pthread_setspecific(cfg.herrno_key, (void *) (&(docu.__herrno)));
744 #endif
745   rv = process_document(&docu, FALSE);
746 
747   init_global_connection_data(&con_info);
748   save_global_connection_data(&con_info, &docu);
749   kill_global_connection_data(&con_info);
750 
751   cfg.processing = FALSE;
752 
753   cfg.rbreak = FALSE;
754   cfg.stop = FALSE;
755 
756 #if defined(HAVE_MT) && defined(I_FACE)
757   pthread_cleanup_pop(TRUE);
758 #endif
759 #ifdef HAVE_MT
760   doc_finish_processing(&docu);
761   cfg.allthreadsnr = 0;
762   gui_mt_thread_end(0);
763 #endif
764   gui_beep();
765   gui_set_msg(gettext("Done"), 0);
766 
767   return rv;
768 }
769 #endif
770 
771 /*********************************************/
772 /* rekurzivne prechadzanie stromu dokumentov */
773 /* FIXME: Translate me!                      */
774 /*********************************************/
775 #ifdef HAVE_MT
_recurse(int thnr)776 static void _recurse(int thnr)
777 #else
778 void recurse(int thnr)
779 #endif
780 {
781   bool_t rbreaksave, stopsave;
782 
783   global_connection_info con_info;
784 
785   if(cfg.urlstack == NULL)
786     return;
787 
788   init_global_connection_data(&con_info);
789 
790 /**** obsluzenie vsetkych URL v zozname ****/
791 /**** FIXME: Translate me!              ****/
792   while(cfg.urlstack && !cfg.stop)
793   {
794     doc docu;
795     url *urlp;
796 
797     LOCK_CFG_URLSTACK;
798     if(cfg.urlstack)
799     {
800       urlp = (url *) cfg.urlstack->data;
801       cfg.urlstack = dllist_remove_entry(cfg.urlstack, cfg.urlstack);
802 #ifdef HAVE_MT
803       mt_semaphore_decrement(&cfg.urlstack_sem);
804 #endif
805       UNLOCK_CFG_URLSTACK;
806     }
807     else
808     {
809       UNLOCK_CFG_URLSTACK;
810       break;
811     }
812 
813     doc_init(&docu, urlp);
814 
815 #ifdef HAVE_MT
816     docu.threadnr = thnr;
817     pthread_setspecific(cfg.currdoc_key, (void *) (&docu));
818     pthread_setspecific(cfg.herrno_key, (void *) (&(docu.__herrno)));
819 #endif
820 
821     LOCK_DCNT;
822     cfg.docnr++;
823     docu.doc_nr = cfg.docnr;
824     UNLOCK_DCNT;
825 
826     restore_global_connection_data(&con_info, &docu);
827 
828     process_document(&docu, TRUE);
829 
830     save_global_connection_data(&con_info, &docu);
831 
832 #ifdef HAVE_MT
833     doc_finish_processing(&docu);
834 #endif
835 
836     if(docu.errcode == ERR_QUOTA_FS ||
837       docu.errcode == ERR_QUOTA_TRANS ||
838       docu.errcode == ERR_QUOTA_TIME || cfg.rbreak)
839     {
840       LOCK_CFG_URLSTACK;
841       cfg.docnr--;
842       cfg.urlstack = dllist_prepend(cfg.urlstack, (dllist_t) urlp);
843 #ifdef HAVE_MT
844       mt_semaphore_up(&cfg.urlstack_sem);
845 #endif
846       UNLOCK_CFG_URLSTACK;
847       break;
848     }
849   }
850 #if defined(I_FACE) && !defined(HAVE_MT)
851   if(cfg.xi_face)
852   {
853     gui_set_status(gettext("Done"));
854   }
855 #endif
856 
857 #ifdef I_FACE
858   if(cfg.xi_face)
859     gui_set_doccounter();
860 #endif
861 
862   stopsave = cfg.stop;
863   rbreaksave = cfg.rbreak;
864   cfg.stop = FALSE;
865   cfg.rbreak = FALSE;
866 
867   kill_global_connection_data(&con_info);
868 
869   if(cfg.update_cookies)
870   {
871     cookie_update_file(TRUE);
872   }
873 
874   cfg.stop = stopsave;
875   cfg.rbreak = rbreaksave;
876 
877   if(!cfg.rbreak && !cfg.stop && cfg.stats_file)
878   {
879     stats_fill_spage(cfg.stats_file, NULL);
880   }
881 }
882 
883 #ifdef HAVE_MT
_recurse_thrd(int thrnr)884 static void _recurse_thrd(int thrnr)
885 {
886   bool_t init = (thrnr == 0);
887 #ifdef I_FACE
888   _config_struct_priv_t privcfg;
889 #endif
890 
891   {
892     sigset_t smask;
893 
894     sigemptyset(&smask);
895     sigaddset(&smask, SIGINT);
896     sigaddset(&smask, SIGQUIT);
897     pthread_sigmask(SIG_UNBLOCK, &smask, NULL);
898 
899     signal(SIGINT, _sigintthr);
900     signal(SIGQUIT, _sigquitthr);
901   }
902 
903   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
904   pthread_setspecific(cfg.currdoc_key, (void *) NULL);
905   pthread_setspecific(cfg.thrnr_key, (void *) thrnr);
906   DEBUG_MTTHR("starting thread(%ld) %d\n", pthread_self(), thrnr);
907 
908 #ifdef I_FACE
909   privcfg_make_copy(&privcfg);
910   pthread_setspecific(cfg.privcfg_key, (void *) (&privcfg));
911   pthread_cleanup_push((void *) privcfg_free, (void *) (&privcfg));
912 #endif
913 
914   for(; !cfg.rbreak && !cfg.stop;)
915   {
916     int v;
917 
918     pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
919     DEBUG_MTTHR("thread %d awaking\n", thrnr);
920 
921     _recurse(thrnr);
922     init = FALSE;
923     gui_clear_status();
924     DEBUG_MTTHR("thread %d sleeping\n", thrnr);
925     gui_set_status(gettext("Sleeping ..."));
926     pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
927     mt_semaphore_up(&cfg.nrunning_sem);
928     /* UN-critical section */
929 
930     while(!cfg.stop && !cfg.rbreak &&
931       (v = mt_semaphore_timed_wait(&cfg.urlstack_sem, 400)) < 0);
932 
933     mt_semaphore_decrement(&cfg.nrunning_sem);
934   }
935 #ifdef I_FACE
936   pthread_cleanup_pop(TRUE);
937 #endif
938   DEBUG_MTTHR("thread %d exiting\n", thrnr);
939   gui_set_status(gettext("Exiting ..."));
940   pthread_exit(NULL);
941 }
942 
recurse(int dumb)943 void recurse(int dumb)
944 {
945   pthread_attr_t thrdattr;
946   int i;
947   int num = cfg.nthr;
948   sigset_t smask;
949 
950   sigemptyset(&smask);
951   sigaddset(&smask, SIGINT);
952   sigaddset(&smask, SIGQUIT);
953   pthread_sigmask(SIG_UNBLOCK, &smask, NULL);
954 
955   signal(SIGQUIT, _sigquitthr);
956 
957   pthread_attr_init(&thrdattr);
958   pthread_attr_setscope(&thrdattr, PTHREAD_SCOPE_SYSTEM);
959   pthread_attr_setstacksize(&thrdattr, MT_STACK_SIZE);
960   mt_semaphore_init(&cfg.nrunning_sem);
961 
962   if(num <= 0)
963     num = 1;
964 
965   cfg.allthreadsnr = 0;
966   cfg.allthreads = _malloc(num * sizeof(pthread_t));
967 
968   mt_semaphore_decrement(&cfg.urlstack_sem);
969 
970   for(i = 0; i < num; i++)
971   {
972     if(!pthread_create(&(cfg.allthreads[cfg.allthreadsnr]),
973         &thrdattr, (void *) _recurse_thrd, (void *) cfg.allthreadsnr))
974     {
975       cfg.allthreadsnr++;
976       gui_mt_thread_start(cfg.allthreadsnr);
977       mt_semaphore_decrement(&cfg.nrunning_sem);
978     }
979     else
980     {
981       char pom[100];
982       sprintf(pom, "Create downloading thread %d", i);
983       xperror(pom);
984     }
985     if(cfg.rbreak || cfg.stop)
986       break;
987   }
988 
989   while(!cfg.stop && !cfg.rbreak &&
990     mt_semaphore_timed_down(&cfg.nrunning_sem, 500) < 0);
991 
992   cfg.stop = TRUE;
993 
994   tl_msleep(300);
995 
996   for(i = 0; i < cfg.allthreadsnr; i++)
997   {
998 /*
999     pthread_cancel(cfg.allthreads[i]);
1000     pthread_kill(cfg.allthreads[i], SIGQUIT);
1001 */
1002     pthread_join(cfg.allthreads[i], NULL);
1003   }
1004 
1005   mt_semaphore_destroy(&cfg.nrunning_sem);
1006   _free(cfg.allthreads);
1007   cfg.allthreadsnr = 0;
1008   gui_mt_thread_end(0);
1009 }
1010 #endif
1011 
dump_ftp_list(dllist * urllst)1012 static void dump_ftp_list(dllist * urllst)
1013 {
1014   dllist *ptr = urllst;
1015 
1016   while(ptr)
1017   {
1018     url *urlp = (url *) ptr->data;
1019     void *dupl;
1020 
1021     dupl = dllist_find2(ptr->next, (dllist_t) urlp, dllist_url_compare);
1022 
1023     if(!dupl && !(urlp->status & URL_INLINE_OBJ) &&
1024       (urlp->type == URLT_FTP || urlp->type == URLT_FTPS))
1025     {
1026       char *p, *pp;
1027 
1028       p = url_get_path(urlp);
1029       pp = strrchr(p, '/');
1030       if(pp)
1031       {
1032         pp++;
1033         if(!*pp)
1034         {
1035           pp -= 2;
1036           while(pp > p && *pp != '/')
1037             pp--;
1038           pp++;
1039         }
1040         if(urlp->extension)
1041         {
1042           ftp_url_extension *fe = urlp->extension;
1043 
1044           if(fe->type == FTP_TYPE_F)
1045             xprintf(1, gettext("\t%s    (%d bytes)\n"), pp, fe->size);
1046           else if(fe->type == FTP_TYPE_L)
1047             xprintf(1, "\t%s    -> %s\n", pp, fe->slink);
1048           else if(fe->type == FTP_TYPE_D)
1049             xprintf(1, "\t%s/\n", pp, fe->slink);
1050         }
1051         else
1052           xprintf(1, "\t%s\n", pp);
1053       }
1054     }
1055     free_deep_url(urlp);
1056     free(urlp);
1057     ptr = dllist_remove_entry(ptr, ptr);
1058   }
1059 }
1060 
dump_urls_list(dllist * urls)1061 static void dump_urls_list(dllist * urls)
1062 {
1063   dllist *ptr;
1064 
1065   LOCK_DUMPURLS;
1066   for(ptr = urls; ptr; ptr = ptr->next)
1067   {
1068     void *dupl;
1069 
1070     dupl = dllist_find2(ptr->next, (dllist_t) ptr->data, dllist_url_compare);
1071 
1072     if(!dupl)
1073     {
1074       char *ustr = url_to_urlstr((url *) ptr->data, FALSE);
1075 
1076       if(ustr)
1077       {
1078         write(cfg.dump_urlfd, ustr, strlen(ustr));
1079         write(cfg.dump_urlfd, "\n", 1);
1080         free(ustr);
1081       }
1082     }
1083   }
1084   UNLOCK_DUMPURLS;
1085 }
1086 
get_urls_to_resume(char * dirname)1087 void get_urls_to_resume(char *dirname)
1088 {
1089   DIR *dir;
1090   struct dirent *dent;
1091   char next_dir[PATH_MAX];
1092   struct stat estat;
1093   url *purl;
1094 
1095   if(!(dir = opendir(dirname)))
1096   {
1097     xperror(dirname);
1098     return;
1099   }
1100 
1101   gui_set_msg(gettext("Searching for files to resume"), 0);
1102 
1103   while((dent = readdir(dir)))
1104   {
1105     _Xt_Serve;
1106 
1107     snprintf(next_dir, sizeof(next_dir), "%s/%s", dirname, dent->d_name);
1108     if(!strcmp(dent->d_name, "."))
1109       continue;
1110     if(!strcmp(dent->d_name, ".."))
1111       continue;
1112     if(lstat(next_dir, &estat))
1113     {
1114       xperror(next_dir);
1115       continue;
1116     }
1117 
1118     if(S_ISDIR(estat.st_mode))
1119     {
1120       if(!strcmp(dent->d_name, ".pavuk_info") && cfg.enable_info)
1121         continue;
1122 
1123       get_urls_to_resume(next_dir);
1124     }
1125     else if(!strncmp(".in_", dent->d_name, 4))
1126     {
1127       snprintf(next_dir, sizeof(next_dir), "%s/%s", dirname, dent->d_name + 4);
1128       if((purl = filename_to_url(next_dir)))
1129       {
1130         if(cfg.mode != MODE_MIRROR)
1131         {
1132           xprintf(1, gettext("Adding %s to resume list\n"), next_dir);
1133         }
1134         purl->status |= URL_ISSTARTING;
1135         url_set_filename(purl, tl_strdup(next_dir));
1136         append_url_to_list(purl);
1137       }
1138     }
1139 
1140 #ifdef I_FACE
1141     if(cfg.xi_face && (cfg.rbreak || cfg.stop))
1142       break;
1143 #endif
1144   }
1145 
1146   closedir(dir);
1147 }
1148 
get_urls_to_synchronize(char * dirname,dllist ** list)1149 void get_urls_to_synchronize(char *dirname, dllist ** list)
1150 {
1151   DIR *dir;
1152   struct dirent *dent;
1153   char next_dir[PATH_MAX];
1154   struct stat estat;
1155   url *purl;
1156 
1157 
1158   if(!(dir = opendir(dirname)))
1159   {
1160     xperror(dirname);
1161     return;
1162   }
1163 
1164   gui_set_msg(gettext("Searching for documents to synchronize"), 0);
1165 
1166   while((dent = readdir(dir)))
1167   {
1168     _Xt_Serve;
1169 
1170     snprintf(next_dir, sizeof(next_dir), "%s/%s", dirname, dent->d_name);
1171     if(!strcmp(dent->d_name, "."))
1172       continue;
1173     if(!strcmp(dent->d_name, ".."))
1174       continue;
1175     if(lstat(next_dir, &estat))
1176     {
1177       xperror(next_dir);
1178       continue;
1179     }
1180 
1181     if(S_ISDIR(estat.st_mode))
1182     {
1183       if(!strcmp(dent->d_name, ".pavuk_info") && cfg.enable_info)
1184         continue;
1185 
1186       strcat(next_dir, "/");
1187 
1188       if((purl = filename_to_url(next_dir)) &&
1189         purl->type == URLT_FTP && !cfg.store_index)
1190       {
1191         purl->status |= URL_ISSTARTING;
1192         purl->extension = ftp_url_ext_new(FTP_TYPE_D, -1, -1, NULL, 0);
1193         url_set_filename(purl,
1194           tl_str_concat(NULL, next_dir, priv_cfg.index_name, NULL));
1195         *list = dllist_prepend(*list, (dllist_t) purl);
1196       }
1197       else if(purl)
1198       {
1199         free_deep_url(purl);
1200         _free(purl);
1201       }
1202 
1203       next_dir[strlen(next_dir) - 1] = '\0';
1204 
1205       get_urls_to_synchronize(next_dir, list);
1206     }
1207     else if(cfg.enable_info && !strcmp(dent->d_name, ".lock"))
1208     {
1209       /* do nothing */
1210       continue;
1211     }
1212     else if(!strncmp(".in_", dent->d_name, 4))
1213     {
1214       snprintf(next_dir, sizeof(next_dir), "%s/%s", dirname, dent->d_name + 4);
1215       if((purl = filename_to_url(next_dir)))
1216       {
1217         char *ustr;
1218 
1219         ustr = url_to_urlstr(purl, FALSE);
1220         if(cfg.mode != MODE_MIRROR)
1221         {
1222           xprintf(1, gettext("Adding file %s to sync list as URL %s\n"),
1223             next_dir, ustr);
1224         }
1225         _free(ustr);
1226         if(purl->type == URLT_FTP)
1227         {
1228           int tp;
1229 
1230           if(purl->p.ftp.dir)
1231             tp = FTP_TYPE_D;
1232           else
1233             tp = FTP_TYPE_F;
1234           purl->extension = ftp_url_ext_new(tp, -1, -1, NULL, 0);
1235         }
1236         purl->status |= URL_ISSTARTING;
1237 
1238         url_set_filename(purl, tl_strdup(next_dir));
1239         *list = dllist_prepend(*list, (dllist_t) purl);
1240       }
1241     }
1242     else
1243     {
1244       if((purl = filename_to_url(next_dir)))
1245       {
1246         char *ustr;
1247 
1248         ustr = url_to_urlstr(purl, FALSE);
1249         if(cfg.mode != MODE_MIRROR)
1250         {
1251           xprintf(1, gettext("Adding file %s to sync list as URL %s\n"),
1252             next_dir, ustr);
1253         }
1254         _free(ustr);
1255 
1256         if(purl->type == URLT_FTP)
1257         {
1258           int tp;
1259 
1260           if(purl->p.ftp.dir)
1261             tp = FTP_TYPE_D;
1262 #ifdef S_ISLNK
1263           else if(S_ISLNK(estat.st_mode))
1264             tp = FTP_TYPE_L;
1265 #endif
1266           else
1267             tp = FTP_TYPE_F;
1268           purl->extension = ftp_url_ext_new(tp, -1, -1, NULL, 0);
1269         }
1270 
1271         purl->status |= URL_ISSTARTING;
1272         url_set_filename(purl, tl_strdup(next_dir));
1273         *list = dllist_prepend(*list, (dllist_t) purl);
1274       }
1275     }
1276 
1277 #ifdef I_FACE
1278     if(cfg.xi_face && (cfg.rbreak || cfg.stop))
1279       break;
1280 #endif
1281   }
1282 
1283   closedir(dir);
1284 }
1285