1 /***************************************************************************/
2 /*    This code is part of WWW grabber called pavuk                        */
3 /*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          */
4 /*    Distributed under GPL 2 or later                                     */
5 /***************************************************************************/
6 
7 #include "config.h"
8 
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <limits.h>
16 #include <sys/types.h>
17 #include <sys/socket.h>
18 #include <sys/stat.h>
19 #ifdef HAVE_SYS_PARAM_H
20 #include <sys/param.h>
21 #endif
22 #ifdef HAVE_FSTATVFS
23 #ifdef HAVE_SYS_STATVFS_H
24 #include <sys/statvfs.h>
25 #endif
26 #else
27 #ifdef HAVE_FSTATFS
28 #ifdef HAVE_SYS_STATFS_H
29 #include <sys/statfs.h>
30 #endif
31 #ifdef HAVE_SYS_VFS_H
32 #include <sys/vfs.h>
33 #endif
34 #ifdef HAVE_SYS_MOUNT_H
35 #include <sys/mount.h>
36 #endif
37 #endif
38 #endif
39 #include <sys/time.h>
40 #include <time.h>
41 #include <utime.h>
42 
43 #include "url.h"
44 #include "doc.h"
45 #include "tools.h"
46 #include "mime.h"
47 #include "http.h"
48 #include "ftp.h"
49 #include "gopher.h"
50 #include "decode.h"
51 #include "abstract.h"
52 #include "mode.h"
53 #include "times.h"
54 #include "dinfo.h"
55 #include "errcode.h"
56 #include "log.h"
57 #include "gui_api.h"
58 #include "html.h"
59 
60 #ifdef I_FACE
61 static void doc_set_info(doc *);
62 #endif
63 
64 static void show_progress(doc *, ssize_t, int);
65 static double compute_speed_rate(time_t, ssize_t);
66 
doc_download_init(doc * docu,int load)67 int doc_download_init(doc * docu, int load)
68 {
69   docu->remove_lock = TRUE;
70   docu->lock_fn = NULL;
71   docu->is_parsable = cfg.enable_js && (docu->doc_url->status & URL_ISSCRIPT);
72   docu->contents = NULL;
73   docu->mime = NULL;
74   docu->type_str = NULL;
75   docu->save_online = FALSE;
76   docu->size = 0;
77   docu->current_size = 0;
78   docu->totsz = -1;
79   docu->origsize = 0;
80   docu->rest_pos = 0;
81   docu->stime = time(NULL);
82   docu->s_sock = NULL;
83   docu->is_http11 = FALSE;
84   docu->is_chunked = FALSE;
85   docu->is_persistent = FALSE;
86   docu->read_chunksize = FALSE;
87   docu->read_trailer = FALSE;
88   docu->doreget = FALSE;
89   docu->origtime = docu->dtime;
90   docu->adj_sz = 0;
91   docu->load = load;
92   docu->http_proxy_10 = FALSE;
93   docu->ftp_data_con_finished = FALSE;
94 
95   docu->num_auth = cfg.auth_reuse_nonce ? 1 : 0;
96   docu->num_proxy_auth = cfg.auth_reuse_proxy_nonce ? 1 : 0;
97 
98   docu->is_http_transfer =
99     docu->doc_url->type == URLT_HTTP ||
100     docu->doc_url->type == URLT_HTTPS ||
101     (docu->doc_url->type == URLT_FTP &&
102     priv_cfg.ftp_proxy && cfg.ftp_via_http && !cfg.ftp_dirtyp) ||
103     (docu->doc_url->type == URLT_GOPHER &&
104     priv_cfg.gopher_proxy && cfg.gopher_via_http);
105 
106   /*** just default value, later will be assigned properly ***/
107   docu->request_type = HTTP_REQ_UNKNOWN;
108 
109   if(docu->is_http_transfer && !docu->http_proxy)
110   {
111     char *proxy = NULL;
112     unsigned short port = 0;
113 
114     switch (docu->doc_url->type)
115     {
116     case URLT_HTTP:
117       {
118         http_proxy *pr = NULL;
119 
120         LOCK_PROXY;
121         pr = http_proxy_get();
122         if(pr)
123         {
124           http_proxy_check(pr, docu);
125           proxy = tl_strdup(pr->addr);
126           port = pr->port;
127           docu->http_proxy_10 = (pr->is_10 != 0);
128         }
129         UNLOCK_PROXY;
130       }
131       break;
132 #ifdef USE_SSL
133     case URLT_HTTPS:
134       if(priv_cfg.ssl_proxy)
135       {
136         proxy = tl_strdup(priv_cfg.ssl_proxy);
137         port = cfg.ssl_proxy_port;
138       }
139       break;
140 #endif
141     case URLT_FTP:
142       if(priv_cfg.ftp_proxy)
143       {
144         proxy = tl_strdup(priv_cfg.ftp_proxy);
145         port = cfg.ftp_proxy_port;
146       }
147       break;
148     case URLT_GOPHER:
149       if(priv_cfg.gopher_proxy)
150       {
151         proxy = tl_strdup(priv_cfg.gopher_proxy);
152         port = cfg.gopher_proxy_port;
153       }
154       break;
155     default:
156       proxy = NULL;
157       port = 0;
158       break;
159     }
160     docu->http_proxy = proxy;
161     docu->http_proxy_port = port;
162   }
163 
164   if(cfg.dumpfd >= 0)
165   {
166     docu->remove_lock = FALSE;
167     if(cfg.dump_after)
168     {
169       docu->load = TRUE;
170       docu->save_online = FALSE;
171       docu->s_sock = NULL;
172     }
173     else
174     {
175       docu->save_online = TRUE;
176       docu->s_sock = bufio_dupfd(cfg.dumpfd);
177 
178       if(!docu->s_sock)
179       {
180         xperror("bufio_dupfd()");
181         docu->errcode = ERR_STORE_DOC;
182         return -1;
183       }
184     }
185   }
186 
187   gettimeofday(&docu->hr_start_time, NULL);
188   timerclear(&docu->redirect_time);
189   timerclear(&docu->dns_time);
190   timerclear(&docu->connect_time);
191   timerclear(&docu->first_byte_time);
192   timerclear(&docu->end_time);
193 
194   return 0;
195 }
196 
doc_check_quotas(doc * docu,ssize_t len,ssize_t totallen)197 static int doc_check_quotas(doc * docu, ssize_t len, ssize_t totallen)
198 {
199   int retcode = 0;
200 
201 #define KILL_PERSISTANT_CONNECTION \
202   if(docu->doc_url->type == URLT_FTP || docu->doc_url->type == URLT_FTPS) \
203     docu->ftp_fatal_err = TRUE; \
204   if(docu->is_http11) \
205     docu->is_persistent = FALSE;
206 
207   if(cfg.minrate > 0.0 && (docu->doc_url->type != URLT_FILE &&
208     !(docu->doc_url->status & URL_REDIRECT)))
209   {
210     time_t _tm = doc_etime(docu, FALSE);
211     double _rt = compute_speed_rate(_tm, totallen);
212     if(_rt < (cfg.minrate * 1024.0))
213     {
214       KILL_PERSISTANT_CONNECTION;
215       docu->errcode = ERR_LOW_TRANSFER_RATE;
216       retcode = -1;
217     }
218   }
219 
220   if(cfg.max_time > 0.0)
221   {
222     if((cfg.start_time + (int) (60.0 * cfg.max_time)) < time(NULL))
223     {
224       KILL_PERSISTANT_CONNECTION;
225       docu->errcode = ERR_QUOTA_TIME;
226       retcode = -1;
227     }
228   }
229 
230   if(docu->doc_url->type != URLT_FILE &&
231     !(docu->doc_url->status & URL_REDIRECT))
232     cfg.trans_size += len;
233 
234   if(cfg.file_quota && ((cfg.file_quota * 1024) <= totallen) &&
235     (docu->doc_url->type != URLT_FILE) &&
236     !(docu->doc_url->status & URL_REDIRECT))
237   {
238     KILL_PERSISTANT_CONNECTION;
239     docu->errcode = ERR_QUOTA_FILE;
240     retcode = 1;
241   }
242 
243   if(cfg.trans_quota && ((cfg.trans_quota * 1024) <= cfg.trans_size))
244   {
245     KILL_PERSISTANT_CONNECTION;
246     docu->errcode = ERR_QUOTA_TRANS;
247     retcode = -1;
248   }
249 
250 #if defined HAVE_FSTATFS || defined HAVE_FSTATVFS
251   if((cfg.dumpfd < 0) && cfg.fs_quota &&
252     (docu->doc_url->type != URLT_FILE) &&
253     !(docu->doc_url->status & URL_REDIRECT) && docu->s_sock && cfg.dumpfd < 0)
254   {
255 #ifdef HAVE_FSTATVFS
256     struct statvfs fss;
257     if(fstatvfs(bufio_getfd(docu->s_sock), &fss))
258       xperror("fstatvfs");
259 #else
260     struct statfs fss;
261     if(fstatfs(bufio_getfd(docu->s_sock), &fss))
262       xperror("fstatfs");
263 #endif
264 
265     else
266     {
267       long freespace = (fss.f_bsize * fss.f_bavail) / 1024;
268 
269       if(freespace < cfg.fs_quota)
270       {
271         KILL_PERSISTANT_CONNECTION;
272         docu->errcode = ERR_QUOTA_FS;
273         retcode = -1;
274       }
275     }
276   }
277 #endif
278   return retcode;
279 }
280 
doc_transfer_data(doc * docu)281 static int doc_transfer_data(doc * docu)
282 {
283   char *buf;
284   int bufsize;
285   ssize_t len, totallen = 0;
286   int retcode = 0;
287 
288   if(docu->report_size)
289     gui_set_status(gettext("Transfering data"));
290 
291   show_progress(docu, docu->adj_sz, FALSE);
292 
293   bufsize = (cfg.bufsize > 0 ? cfg.bufsize : 1) * 1024;
294   buf = _malloc(bufsize);
295 
296 #ifdef SO_RCVBUF
297 #ifndef __QNX__
298   if(bufio_is_sock(docu->datasock))
299   {
300     if(setsockopt(bufio_getfd(docu->datasock),
301         SOL_SOCKET, SO_RCVBUF, (char *) &bufsize, sizeof(bufsize)))
302     {
303       xperror(gettext("setsockopt: SO_RCVBUF failed"));
304     }
305   }
306 #endif
307 #endif
308   if(docu->save_online)
309   {
310     DEBUG_USER("Storing to file: %s\n", url_to_filename(docu->doc_url, TRUE));
311   }
312 
313   if(docu->mime && cfg.dump_resp && cfg.dumpfd >= 0 && !cfg.dump_after)
314     bufio_write(docu->s_sock, docu->mime, strlen(docu->mime));
315 
316   while((len = abs_read_data(docu, docu->datasock, buf, bufsize)) > 0)
317   {
318     if(docu->save_online)
319     {
320       if(write(bufio_getfd(docu->s_sock), buf, len) != len)
321       {
322         docu->errcode = ERR_STORE_DOC;
323         xperror(gettext("storing document"));
324         retcode = -1;
325         if(docu->doc_url->type == URLT_FTP
326           || docu->doc_url->type == URLT_FTPS)
327           docu->ftp_fatal_err = TRUE;
328         if(docu->is_http11)
329           docu->is_persistent = FALSE;
330         break;
331       }
332     }
333 
334     totallen += len;
335     docu->current_size += len;
336 
337     if(cfg.maxrate > 0.0 &&
338       (docu->doc_url->type != URLT_FILE &&
339         !(docu->doc_url->status & URL_REDIRECT)))
340     {
341       time_t _tm = doc_etime(docu, FALSE);
342       double _rt = compute_speed_rate(_tm, totallen);
343       if(_rt > (cfg.maxrate * 1024.0))
344       {
345         tl_msleep((time_t) (1000.0 * ((double) totallen) / (cfg.maxrate *
346               1024.0)) - _tm);
347       }
348     }
349 
350     docu->size = totallen;
351     show_progress(docu, docu->adj_sz, FALSE);
352 
353     if(docu->load || docu->is_parsable ||
354       ((docu->doc_url->type == URLT_FTP ||
355           docu->doc_url->type == URLT_FTPS) &&
356         docu->doc_url->p.ftp.dir) ||
357       (docu->doc_url->type == URLT_GOPHER &&
358         (docu->doc_url->p.gopher.selector[0] == '1' ||
359           docu->doc_url->p.gopher.selector[0] == 'h')))
360     {
361       docu->contents = _realloc(docu->contents, totallen + 1);
362       memmove(docu->contents + totallen - len, buf, len);
363     }
364 
365     retcode = doc_check_quotas(docu, len, totallen);
366 
367     if(retcode)
368     {
369       if(retcode == 1)
370         retcode = 0;
371       break;
372     }
373     if(docu->totsz > 0 && docu->totsz <= docu->current_size)
374       break;
375   }
376 
377   show_progress(docu, docu->adj_sz, TRUE);
378 
379   if(cfg.dumpfd >= 0 && !cfg.dump_after)
380   {
381     bufio_close(docu->s_sock);
382     docu->s_sock = NULL;
383     docu->save_online = FALSE;
384   }
385 
386   if(cfg.progres && docu->report_size
387 #ifdef I_FACE
388     && !cfg.xi_face
389 #endif
390   )
391   {
392     xprintf(0, "\n");
393   }
394 
395   if(len < 0 || ((docu->totsz > 0)
396   && (docu->totsz != (docu->size + docu->rest_pos))))
397   {
398     xperror(gettext("Document transfer data"));
399     if((docu->doc_url->type == URLT_HTTP || docu->doc_url->type ==
400     URLT_HTTPS) && (!(docu->doc_url->status & URL_REDIRECT)))
401     {
402       docu->errcode = ERR_HTTP_TRUNC;
403     }
404     else if((docu->doc_url->type == URLT_FTP || docu->doc_url->type ==
405     URLT_FTPS) && (!(docu->doc_url->status & URL_REDIRECT)))
406     {
407       docu->errcode = ERR_FTP_TRUNC;
408     }
409     else if(!docu->errcode)
410       docu->errcode = ERR_READ;
411 
412     docu->remove_lock = FALSE;
413     retcode = -1;
414   }
415 
416   if(docu->report_size)
417     gui_set_status(gettext("Data transfer done"));
418 
419   if((docu->doc_url->type == URLT_FTP ||
420       docu->doc_url->type == URLT_FTPS) && docu->errcode == ERR_FTP_TRUNC)
421   {
422     docu->remove_lock = FALSE;
423     retcode = -1;
424   }
425 
426   /*** if transfer was not from begining, reread  ***/
427   /*** document content to memory form local file ***/
428   /*** to be sure we will process whole document  ***/
429   if(!retcode &&
430     docu->rest_pos && (docu->load || docu->is_parsable) && (cfg.dumpfd < 0))
431   {
432     _free(docu->contents);
433     totallen = 0;
434     lseek(bufio_getfd(docu->s_sock), 0, SEEK_SET);
435     bufio_reset(docu->s_sock);
436 
437     while((len = bufio_read(docu->s_sock, buf, bufsize)) > 0)
438     {
439       totallen += len;
440       docu->contents = _realloc(docu->contents, totallen + 1);
441       memmove(docu->contents + totallen - len, buf, len);
442     }
443   }
444 
445   if(docu->contents)
446     *(docu->contents + totallen) = '\0';
447 
448   _free(buf);
449   docu->size = totallen;
450 
451   return retcode;
452 }
453 
doc_check_doc_file(doc * docu,int * rv)454 static int doc_check_doc_file(doc * docu, int *rv)
455 {
456   char *fn;
457   struct stat estat;
458 
459   fn = url_to_filename(docu->doc_url, TRUE);
460 
461   if(cfg.mode != MODE_SYNC && cfg.mode != MODE_MIRROR)
462   {
463     if(docu->doc_url->type != URLT_FILE && (access(fn, R_OK) != -1))
464     {
465       if(!stat(fn, &estat))
466       {
467         if(!S_ISDIR(estat.st_mode))
468         {
469           docu->doc_url->status |= URL_REDIRECT;
470         }
471         else
472         {
473           char *pom;
474           char *savepath = url_get_path(docu->doc_url);
475 
476           pom = tl_str_concat(NULL, fn, "/", priv_cfg.index_name, NULL);
477 
478           if(!stat(pom, &estat))
479           {
480             _free(pom);
481             if(!S_ISDIR(estat.st_mode))
482             {
483               url *newurl = url_dup_url(docu->doc_url);
484               if(newurl->type != URLT_FILE)
485                 pom = tl_str_concat(NULL, savepath, "/", NULL);
486               if(newurl->type == URLT_FTP || newurl->type == URLT_FTPS)
487                 newurl->p.ftp.dir = TRUE;
488 
489               url_set_path(newurl, pom);
490               _free(pom);
491 
492               if(url_redirect_to(docu->doc_url, newurl, FALSE))
493                 docu->errcode = ERR_HTTP_CYCLIC;
494               else
495                 docu->errcode = ERR_HTTP_REDIR;
496 
497               *rv = -1;
498               return -1;
499             }
500           }
501           _free(pom);
502           fn = url_to_filename(docu->doc_url, TRUE);
503         }
504       }
505     }
506 
507     if((docu->doc_url->type == URLT_FILE ||
508         (docu->doc_url->status & URL_REDIRECT)) && !docu->load)
509     {
510       if(!stat(fn, &estat))
511       {
512         if(S_ISDIR(estat.st_mode))
513         {
514           docu->errcode = ERR_DIR_URL;
515           *rv = -1;
516           return -1;
517         }
518       }
519       else
520       {
521         docu->errcode = ERR_FILE_OPEN;
522         *rv = -1;
523         return -1;
524       }
525 
526       if((!cfg.ftp_html &&
527           strcmp(tl_get_basename(fn), priv_cfg.index_name) &&
528           (docu->doc_url->type == URLT_FTP ||
529             docu->doc_url->type == URLT_FTPS) &&
530           !docu->doc_url->p.ftp.dir) || !file_is_html(fn))
531       {
532         docu->is_parsable = FALSE;
533         docu->save_online = TRUE;
534         docu->size = estat.st_size;
535 #ifdef I_FACE
536         if(cfg.xi_face)
537           doc_set_info(docu);
538 #endif
539         xprintf(1, gettext("File redirect\n"));
540         *rv = 0;
541         return -1;
542       }
543       else
544       {
545         if(!strcasecmp("css", tl_get_extension(fn)))
546           docu->doc_url->status |= URL_STYLE;
547         docu->is_parsable = TRUE;
548       }
549     }
550   }
551   else
552   {
553     if(!stat(fn, &estat))
554     {
555       docu->origsize = estat.st_size;
556       /*
557          pro: somehow it must have been forgotten to set the
558          time as well...
559        */
560       docu->origtime = estat.st_mtime;
561     }
562   }
563 
564   return 0;
565 }
566 
doc_open_existing_in_file(doc * docu,int b_lock,int * rv)567 static int doc_open_existing_in_file(doc * docu, int b_lock, int *rv)
568 {
569   char *inname;
570   struct stat estat;
571 
572   if((cfg.dumpfd < 0) && (inname = url_to_in_filename(docu->doc_url)))
573   {
574     if(!stat(inname, &estat) && !S_ISDIR(estat.st_mode))
575     {
576       if(doc_lock(docu, b_lock))
577       {
578         docu->errcode = ERR_STORE_DOC;
579         _free(inname);
580         *rv = -1;
581         return -1;
582       }
583 
584       docu->rest_pos = estat.st_size - cfg.rollback;
585 
586       if(docu->rest_pos)
587       {
588         xprintf(1, gettext("Trying to resume from position %d\n"),
589           docu->rest_pos);
590 
591         docu->origtime = estat.st_mtime;
592         docu->stime = estat.st_mtime;
593         docu->doreget = TRUE;
594         docu->remove_lock = FALSE;
595       }
596     }
597     _free(inname);
598   }
599 
600   return 0;
601 }
602 
doc_open_new_in_file(doc * docu,int b_lock)603 static int doc_open_new_in_file(doc * docu, int b_lock)
604 {
605   if((cfg.dumpfd < 0) && !docu->s_sock)
606   {
607     if(cfg.post_update && docu->type_str)
608     {
609       /** dirty hack, but is required to support **/
610       /** file naming by its MIME type     **/
611       url_forget_filename(docu->doc_url);
612       url_to_filename_with_type(docu->doc_url, docu->type_str, TRUE);
613     }
614 
615     if(doc_lock(docu, b_lock))
616     {
617       docu->errcode = ERR_STORE_DOC;
618       return -1;
619     }
620   }
621 
622   return 0;
623 }
624 
625 /********************************************************/
626 /* nacitanie dokumentu + specificke upravy              */
627 /* FIXME: Translate me!                                 */
628 /********************************************************/
doc_download_helper(doc * docu,int load,int b_lock)629 static int doc_download_helper(doc * docu, int load, int b_lock)
630 {
631   char *p = NULL, *p1 = 0;
632   ssize_t len = 0;
633   int retcode = 0;
634   int rv;
635   struct stat estat;
636   bufio *saved_datasock = NULL;
637 
638   if(doc_download_init(docu, load))
639     return -1;
640 
641   gettimeofday(&docu->hr_start_time, NULL);
642 
643   if(doc_check_doc_file(docu, &rv))
644     return rv;
645 
646   if(doc_open_existing_in_file(docu, b_lock, &rv))
647     return rv;
648 
649   if(docu->report_size)
650     gui_set_status(gettext("Opening connection"));
651 
652 
653   if(!(docu->datasock = abs_get_data_socket(docu)) &&
654     /*
655        pro: add test for errcodes. The errcode tests are needed;
656        otherwise the "if" some lines later will never get
657        anything to do...
658      */
659     docu->errcode != ERR_FTP_ACTUAL && docu->errcode != ERR_HTTP_ACTUAL)
660   {
661     if(docu->mime &&
662       docu->doc_url->type != URLT_FILE &&
663       !(docu->doc_url->status & URL_REDIRECT))
664       dinfo_save(docu);
665     docu->remove_lock = FALSE;
666     abs_close_socket(docu, FALSE);
667     return -1;
668   }
669 
670   doc_etime(docu, TRUE);
671 
672   if(docu->errcode == ERR_HTTP_ACTUAL || docu->errcode == ERR_FTP_ACTUAL)
673   {
674     abs_close_socket(docu, FALSE);
675     saved_datasock = docu->datasock;
676 
677     if(docu->load || docu->is_parsable)
678     {
679       xprintf(1, gettext("Loading local copy\n"));
680 
681       p = url_to_filename(docu->doc_url, TRUE);
682       if(!(docu->datasock = bufio_open(p, O_BINARY | O_RDONLY)))
683       {
684         docu->datasock = saved_datasock;
685         docu->errcode = ERR_FILE_OPEN;
686         return -1;
687       }
688       docu->doc_url->status |= URL_REDIRECT;
689       docu->doc_url->status |= URL_ISLOCAL;
690       docu->save_online = FALSE;
691     }
692     else
693     {
694       docu->doc_url->status |= URL_REDIRECT;
695       return 1;
696     }
697   }
698   else if(docu->errcode == ERR_HTTP_NOREGET)
699   {
700     if(!cfg.freget)
701     {
702       docu->is_persistent = FALSE;
703       abs_close_socket(docu, FALSE);
704       docu->remove_lock = FALSE;
705       docu->ftp_fatal_err = TRUE;
706       return -1;
707     }
708     else
709     {
710       docu->rest_pos = 0;
711     }
712   }
713   else if(docu->errcode == ERR_FTP_NOREGET)
714   {
715     if(!cfg.freget)
716     {
717       docu->is_persistent = FALSE;
718       abs_close_socket(docu, FALSE);
719       docu->remove_lock = FALSE;
720       docu->ftp_fatal_err = TRUE;
721       return -1;
722     }
723     else
724     {
725       docu->rest_pos = 0;
726     }
727   }
728   else if(docu->errcode)
729   {
730     if(docu->mime &&
731       docu->doc_url->type != URLT_FILE &&
732       !(docu->doc_url->status & URL_REDIRECT))
733       dinfo_save(docu);
734     docu->is_persistent = FALSE;
735     docu->ftp_fatal_err = TRUE;
736     abs_close_socket(docu, FALSE);
737     return -1;
738   }
739 
740   if(doc_open_new_in_file(docu, b_lock))
741   {
742     docu->is_persistent = FALSE;
743     docu->ftp_fatal_err = TRUE;
744     abs_close_socket(docu, FALSE);
745     return -1;
746   }
747 
748   if(docu->doc_url->type != URLT_FILE &&
749     !(docu->doc_url->status & URL_REDIRECT))
750     dinfo_save(docu);
751 
752   if((((docu->doc_url->type == URLT_FTP ||
753           docu->doc_url->type == URLT_FTPS) &&
754         !docu->doc_url->p.ftp.dir) ||
755       (docu->doc_url->type == URLT_GOPHER &&
756         (docu->doc_url->p.gopher.selector[0] != '1' ||
757           docu->doc_url->p.gopher.selector[0] != 'h')) ||
758       (docu->doc_url->type == URLT_HTTP ||
759         docu->doc_url->type == URLT_HTTPS)) &&
760     !(docu->doc_url->status & URL_REDIRECT))
761   {
762     if(cfg.dumpfd >= 0 && cfg.dump_after)
763       docu->save_online = FALSE;
764     else
765       docu->save_online = TRUE;
766   }
767 
768   if(cfg.ftp_html &&
769     (docu->doc_url->type == URLT_FTP ||
770       docu->doc_url->type == URLT_FTPS) &&
771     ext_is_html(docu->doc_url->p.ftp.path))
772   {
773     docu->is_parsable = TRUE;
774   }
775 
776   if(docu->doc_url->status & URL_INNSCACHE)
777   {
778     fstat(bufio_getfd(docu->datasock), &estat);
779     docu->totsz = estat.st_size;
780     docu->is_parsable = (docu->doc_url->status & URL_ISHTML) != 0;
781   }
782 
783   if(docu->errcode == ERR_HTTP_FAILREGET)
784   {
785     docu->rest_pos = 0;
786     docu->save_online = FALSE;
787   }
788 
789   if(docu->save_online)
790   {
791     if(cfg.dumpfd < 0)
792     {
793       ftruncate(bufio_getfd(docu->s_sock), docu->rest_pos);
794       lseek(bufio_getfd(docu->s_sock), docu->rest_pos, SEEK_SET);
795       bufio_reset(docu->s_sock);
796     }
797   }
798 
799   /* We measure time-to-first-byte here again, to add all the processing
800    * timing noise (1-2ms) to the large value -- FB, which is typically
801    * on the order of 100ms, rather then to the time-to-last-byte,
802    * which is often around 0ms
803    */
804   gettimeofday(&docu->first_byte_time, NULL);
805 
806   retcode = doc_transfer_data(docu);
807 
808   abs_close_socket(docu, TRUE);
809 
810   if(!retcode)
811     docu->remove_lock = TRUE;
812 
813   if(docu->errcode == ERR_HTTP_ACTUAL)
814   {
815     docu->doc_url->status &= ~URL_REDIRECT;
816     docu->datasock = saved_datasock;
817   }
818 
819   if(!retcode && docu->doc_url->status & URL_INNSCACHE)
820   {
821     docu->is_parsable = (docu->doc_url->status & URL_ISHTML) != 0;
822   }
823   else if(!retcode &&
824     docu->doc_url->type == URLT_GOPHER &&
825     !(docu->doc_url->status & URL_REDIRECT) &&
826     !(priv_cfg.gopher_proxy && cfg.gopher_via_http))
827   {
828     docu->is_parsable = FALSE;
829 
830     /**** convert Gopher directory to HTML ****/
831     if(docu->doc_url->p.gopher.selector[0] == '1')
832     {
833       if(!(docu->doc_url->status & URL_REDIRECT))
834         gopher_dir_to_html(docu);
835       docu->is_parsable = TRUE;
836     }
837     else if(docu->doc_url->p.gopher.selector[0] == 'h')
838     {
839       docu->is_parsable = TRUE;
840     }
841   }
842   else if(!retcode &&
843     (docu->doc_url->type == URLT_FTP ||
844       docu->doc_url->type == URLT_FTPS) &&
845     !(priv_cfg.ftp_proxy && cfg.ftp_via_http && !cfg.ftp_dirtyp) &&
846     !(docu->doc_url->status & URL_REDIRECT))
847   {
848     docu->is_parsable = ext_is_html(docu->doc_url->p.ftp.path) != 0;
849 
850     /*** convert FTP directory listing to HTML ***/
851     if(docu->doc_url->p.ftp.dir)
852     {
853       if(!(docu->doc_url->status & URL_REDIRECT))
854       {
855         ftp_dir_to_html(docu);
856       }
857       docu->is_parsable = TRUE;
858     }
859   }
860   else if(docu->is_http_transfer && !retcode &&
861     !(docu->doc_url->status & URL_REDIRECT))
862   {
863     http_response *resp;
864 
865     /*** get HTTP response status info ***/
866     resp = http_get_response_info(docu->mime);
867 
868     if(resp)
869     {
870       /*** set proper HTTP error code ***/
871       if(resp->ret_code >= 400)
872       {
873         docu->errcode = 2000 + resp->ret_code;
874         http_response_free(resp);
875         return -1;
876       }
877 
878       /*** redirect to other URL ***/
879       if(resp->ret_code == 303 ||
880         resp->ret_code == 302 ||
881         resp->ret_code == 307 || resp->ret_code == 301)
882       {
883         http_handle_redirect(docu, resp->ret_code);
884         http_response_free(resp);
885         if(docu->is_persistent)
886         {
887           if(docu->doc_url->moved_to &&
888             ((url_get_port(docu->doc_url) !=
889                 url_get_port(docu->doc_url->moved_to))
890               || strcmp(url_get_site(docu->doc_url),
891                 url_get_site(docu->doc_url->moved_to))))
892           {
893             abs_close_socket(docu, TRUE);
894           }
895           docu->is_persistent = FALSE;
896         }
897         return -1;
898       }
899 
900       http_response_free(resp);
901     }
902 
903 
904     /*** check if document was downloaded whole  ***/
905     /*** when we know real document size and no  ***/
906     /*** other error was detected before         ***/
907     if(cfg.check_size && docu->totsz > 0 && docu->errcode == ERR_NOERROR)
908     {
909       /*** if docu->contents && docu->rest_pos  ***/
910       /*** document was reareaded from file and ***/
911       /*** docu->size is total len              ***/
912       if(docu->totsz != docu->size + (docu->contents ? 0 : docu->rest_pos))
913       {
914         docu->errcode = ERR_HTTP_TRUNC;
915         docu->remove_lock = FALSE;
916         retcode = -1;
917         xprintf(1, gettext("File may be truncated\n"));
918       }
919     }
920 
921     /*** handle encoded document and decode   ***/
922     /*** it if possible and user requested it ***/
923     p = get_mime_param_val_str("Content-Encoding:", docu->mime);
924     if(cfg.use_enc && !retcode && p &&
925       (!strncasecmp(docu->type_str, "text/plain", 10) ||
926         !strncasecmp(docu->type_str, "text/css", 8) ||
927         !strncasecmp(docu->type_str, "text/html", 9)))
928     {
929       if((!strcasecmp(p, "x-gzip")) ||
930         (!strcasecmp(p, "gzip")) ||
931         (!strcasecmp(p, "x-compress")) || (!strcasecmp(p, "compress")))
932       {
933         if(!gzip_decode(docu->contents, docu->size,
934             &p1, &len, (docu->contents ? NULL : docu->lock_fn)))
935         {
936           docu->size = len;
937           _free(docu->contents);
938           docu->contents = p1;
939           xprintf(1, gettext("Decoding document - OK\n"));
940         }
941         else
942           xperror(gettext("Decoding document - failed\n"));
943       }
944       else if(!strcasecmp(p, "deflate"))
945       {
946         if(!inflate_decode(docu->contents, docu->size,
947             &p1, &len, (docu->contents ? NULL : docu->lock_fn)))
948         {
949           docu->size = len;
950           _free(docu->contents);
951           docu->contents = p1;
952           xprintf(1, gettext("Decoding document - OK\n"));
953         }
954         else
955         {
956           xperror(gettext("Deflating document - failed\n"));
957         }
958       }
959       else
960         xprintf(1, gettext("Unsupported document encoding\n"));
961     }
962     else if(p && !retcode)
963     {
964       xprintf(1,
965         gettext
966         ("Received Encoded file but decoding not allowed (untouched)\n"));
967     }
968     _free(p);
969   }
970   else
971   {
972     if(docu->doc_url->type == URLT_FILE ||
973       (docu->doc_url->status & URL_REDIRECT))
974     {
975       p1 = url_to_filename(docu->doc_url, TRUE);
976 
977       if(file_is_html(p1))
978       {
979         docu->is_parsable = TRUE;
980       }
981     }
982     else
983     {
984       docu->is_parsable = FALSE;
985     }
986   }
987 
988   if(docu->totsz > 0 &&
989     docu->size == 0 &&
990     (docu->doc_url->type == URLT_HTTP || docu->doc_url->type == URLT_HTTPS))
991   {
992     if(!docu->errcode)
993       docu->errcode = ERR_ZERO_SIZE;
994     docu->remove_lock = FALSE;
995     retcode = -1;
996   }
997 #ifdef I_FACE
998   if(cfg.xi_face)
999     doc_set_info(docu);
1000 #endif
1001   if(!retcode && docu->lock_fn && docu->save_online &&
1002     (cfg.dumpfd < 0) && !docu->contents &&
1003     (cfg.mode != MODE_NOSTORE) &&
1004     (cfg.mode != MODE_FTPDIR) &&
1005     !(docu->doc_url->status & URL_REDIRECT) &&
1006     (docu->doc_url->type != URLT_FILE))
1007   {
1008     p1 = url_to_filename(docu->doc_url, TRUE);
1009 
1010     if(!access(p1, F_OK))
1011     {
1012       if(unlink(p1))
1013         xperror(p1);
1014     }
1015 
1016     if(link(docu->lock_fn, p1))
1017     {
1018 #ifdef __CYGWIN__
1019       if(errno != EPERM && errno != EACCES)
1020 #elif __BEOS__
1021       /* ?? no working link() on BeOS ?? */
1022       if(FALSE)
1023 #else
1024       if(errno != EPERM)
1025 #endif
1026         xperror(p1);
1027       else
1028       {
1029         if(copy_fd_to_file(bufio_getfd(docu->s_sock), p1))
1030           xperror(p1);
1031       }
1032     }
1033 
1034     if(cfg.preserve_time && docu->dtime)
1035     {
1036       struct utimbuf utmbf;
1037 
1038       stat(p1, &estat);
1039       utmbf.actime = estat.st_atime;
1040       utmbf.modtime = docu->dtime;
1041       utime(p1, &utmbf);
1042     }
1043 
1044     if(cfg.preserve_perm &&
1045       (docu->doc_url->type == URLT_FTP ||
1046         docu->doc_url->type == URLT_FTPS) &&
1047       docu->doc_url->extension &&
1048       (((ftp_url_extension *) docu->doc_url->extension)->perm > 0))
1049     {
1050       chmod(p1, ((ftp_url_extension *) docu->doc_url->extension)->perm);
1051     }
1052   }
1053 
1054   return retcode;
1055 }
1056 
doc_download(doc * docu,int load,int b_lock)1057 int doc_download(doc * docu, int load, int b_lock)
1058 {
1059   const int rc = doc_download_helper(docu, load, b_lock);
1060   gettimeofday(&docu->end_time, NULL);
1061   time_log(docu);
1062   return rc;
1063 }
1064 
1065 /********************************************************/
1066 /* ulozi dokument ak je to potrebne vytvori adresare    */
1067 /* FIXME: Translate me!                                 */
1068 /********************************************************/
doc_store(doc * docu,int overwrite)1069 int doc_store(doc * docu, int overwrite)
1070 {
1071   char *pom;
1072   int f;
1073   struct utimbuf utmbf;
1074   struct stat estat;
1075 
1076   if(cfg.mode == MODE_NOSTORE || cfg.mode == MODE_FTPDIR)
1077     return 0;
1078 
1079   /*** don't store directory indexes ***/
1080   if(!cfg.store_index && url_is_dir_index(docu->doc_url))
1081     return 0;
1082 
1083   pom = url_to_filename(docu->doc_url, TRUE);
1084   if(makealldirs(pom))
1085     xperror(pom);
1086 
1087   if(!access(pom, R_OK) && !overwrite)
1088   {
1089     return 0;
1090   }
1091 
1092   /*
1093      pro: before we open the file we unlink it. This way we assure that
1094      other directory that have a hard link to our (old) file will still
1095      have a hard link to the old file.
1096    */
1097   if(cfg.remove_before_store)
1098   {
1099     unlink(pom);
1100   }
1101 
1102   if((f =
1103       open(pom, O_BINARY | O_CREAT | O_TRUNC | O_WRONLY,
1104         S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR)) == -1)
1105   {
1106     if(!access(pom, R_OK))
1107       unlink(pom);
1108     xperror(pom);
1109     return -1;
1110   }
1111 
1112   if(write(f, docu->contents, docu->size) != docu->size)
1113   {
1114     if(!access(pom, R_OK))
1115       unlink(pom);
1116     xperror(pom);
1117     close(f);
1118     return -1;
1119   }
1120 
1121   close(f);
1122 
1123   if(docu->dtime && cfg.preserve_time)
1124   {
1125     utmbf.modtime = docu->dtime;
1126   }
1127   else
1128   {
1129     utmbf.modtime = docu->stime;
1130   }
1131 
1132   stat(pom, &estat);
1133   utmbf.actime = estat.st_atime;
1134   utime(pom, &utmbf);
1135 
1136   if((docu->doc_url->type == URLT_FTP ||
1137       docu->doc_url->type == URLT_FTPS) &&
1138     docu->doc_url->extension &&
1139     cfg.preserve_perm &&
1140     (((ftp_url_extension *) docu->doc_url->extension)->perm > 0))
1141   {
1142     chmod(pom, ((ftp_url_extension *) docu->doc_url->extension)->perm);
1143   }
1144 
1145   return 0;
1146 }
1147 
1148 /*** remove improper documents if required ***/
doc_remove(url * urlr)1149 int doc_remove(url * urlr)
1150 {
1151   char *fn;
1152 
1153 #ifdef DEBUG
1154   if(cfg.debug)
1155   {
1156     fn = url_to_urlstr(urlr, FALSE);
1157     xprintf(1, gettext("Removing improper document : %s\n"), fn);
1158     _free(fn);
1159   }
1160 #endif
1161 
1162   fn = url_to_filename(urlr, TRUE);
1163 
1164   if(urlr->type == URLT_FTP || urlr->type == URLT_FTPS)
1165   {
1166     char *p;
1167 
1168     p = strrchr(fn, '/');
1169     if(p)
1170       p++;
1171     else
1172       p = fn;
1173 
1174     /*** if URL FTPdir index ***/
1175     if(!strcmp(p, priv_cfg.index_name))
1176       *p = '\0';
1177 
1178     if(cfg.enable_info)
1179       dinfo_remove(fn);
1180     return unlink_recursive(fn);
1181   }
1182   else
1183   {
1184     if(cfg.enable_info)
1185       dinfo_remove(fn);
1186 
1187     if(!access(fn, F_OK) && unlink(fn))
1188     {
1189       xperror(fn);
1190       return -1;
1191     }
1192   }
1193 
1194   return 0;
1195 }
1196 
1197 #ifdef I_FACE
1198 /********************************************************/
1199 /* nastavenie info dokumentu pre informaciu pouzivatela */
1200 /* FIXME: Translate me!                                 */
1201 /********************************************************/
doc_set_info(doc * docp)1202 static void doc_set_info(doc * docp)
1203 {
1204 #ifdef WITH_TREE
1205   url_prop *prp = _malloc(sizeof(url_prop));
1206 
1207   prp->size = docp->size;
1208   prp->mdtm = docp->dtime;
1209   prp->type = NULL;
1210   switch (docp->doc_url->type)
1211   {
1212   case URLT_HTTP:
1213 #ifdef USE_SSL
1214   case URLT_HTTPS:
1215 #endif
1216     if(docp->type_str)
1217       prp->type = tl_strdup(docp->type_str);
1218     break;
1219   case URLT_FILE:
1220     prp->type = tl_strdup(gettext_nop("Local file"));
1221     break;
1222   case URLT_GOPHER:
1223     switch (docp->doc_url->p.gopher.selector[0])
1224     {
1225     case '0':
1226       prp->type = tl_strdup(gettext_nop("Gopher/Text File"));
1227       break;
1228     case '1':
1229       prp->type = tl_strdup(gettext_nop("Gopher/Directory"));
1230       break;
1231     case '2':
1232       prp->type = tl_strdup(gettext_nop("Gopher/CSO phone book"));
1233       break;
1234     case '3':
1235       prp->type = tl_strdup(gettext_nop("Gopher/Error"));
1236       break;
1237     case '4':
1238       prp->type = tl_strdup(gettext_nop("Gopher/BINHEX"));
1239       break;
1240     case '5':
1241       prp->type = tl_strdup(gettext_nop("Gopher/DOS bin"));
1242       break;
1243     case '6':
1244       prp->type = tl_strdup(gettext_nop("Gopher/UUencoded"));
1245       break;
1246     case '7':
1247       prp->type = tl_strdup(gettext_nop("Gopher/Search index"));
1248       break;
1249     case '8':
1250       prp->type = tl_strdup(gettext_nop("Gopher/Telnet session"));
1251       break;
1252     case '9':
1253       prp->type = tl_strdup(gettext_nop("Gopher/bin"));
1254       break;
1255     case '+':
1256       prp->type = tl_strdup(gettext_nop("Gopher/Duplicated server"));
1257       break;
1258     case 'T':
1259       prp->type = tl_strdup(gettext_nop("Gopher/TN3270"));
1260       break;
1261     case 'g':
1262       prp->type = tl_strdup(gettext_nop("Gopher/GIF"));
1263       break;
1264     case 'I':
1265       prp->type = tl_strdup(gettext_nop("Gopher/Image"));
1266       break;
1267     }
1268     break;
1269   case URLT_FTP:
1270     if(docp->doc_url->p.ftp.dir)
1271       prp->type = tl_strdup(gettext_nop("FTP/Directory"));
1272     else
1273       prp->type = tl_strdup(gettext_nop("FTP/File"));
1274     break;
1275   case URLT_FTPS:
1276     if(docp->doc_url->p.ftp.dir)
1277       prp->type = tl_strdup(gettext_nop("FTPS/Directory"));
1278     else
1279       prp->type = tl_strdup(gettext_nop("FTPS/File"));
1280     break;
1281   default:
1282     prp->type = tl_strdup(gettext_nop("Unsupported type"));
1283     break;
1284   }
1285 
1286   if(!prp->type)
1287     prp->type = tl_strdup(gettext_nop("Local file"));
1288 
1289   docp->doc_url->prop = prp;
1290 #endif
1291 }
1292 #endif
1293 
doc_init(doc * docp,url * urlp)1294 void doc_init(doc * docp, url * urlp)
1295 {
1296   docp->doc_nr = 0;
1297   docp->doc_url = urlp;
1298   docp->mime = NULL;
1299   docp->type_str = NULL;
1300   docp->is_parsable = cfg.enable_js && (docp->doc_url->status & URL_ISSCRIPT);
1301   docp->size = 0;
1302   docp->totsz = -1;
1303   docp->contents = NULL;
1304   docp->save_online = FALSE;
1305   docp->dtime = 0L;
1306   docp->stime = 0L;
1307   docp->rest_pos = 0;
1308   docp->rest_end_pos = -1;
1309   docp->etag = NULL;
1310   docp->errcode = ERR_NOERROR;
1311   docp->origsize = 0;
1312   docp->ftp_fatal_err = FALSE;
1313   docp->ftp_respc = 0;
1314   docp->ftp_pasv_host = NULL;
1315   docp->ftp_pasv_port = 0;
1316   docp->ftp_data_con_finished = FALSE;
1317   docp->datasock = NULL;
1318   docp->ftp_control = NULL;
1319   docp->s_sock = NULL;
1320 #ifdef USE_SSL
1321   memset(&docp->ssl_data_con, '\0', sizeof(ssl_connection));
1322 #endif
1323   docp->num_auth = 0;
1324   docp->num_proxy_auth = 0;
1325   docp->auth_digest = NULL;
1326   docp->auth_proxy_digest = NULL;
1327   docp->lock_fn = NULL;
1328   docp->report_size = TRUE;
1329   docp->check_limits = TRUE;
1330   docp->remove_lock = FALSE;
1331   docp->is_http11 = FALSE;
1332   docp->chunk_size = 0;
1333   docp->is_chunked = FALSE;
1334   docp->read_chunksize = FALSE;
1335   docp->read_trailer = FALSE;
1336   docp->is_persistent = FALSE;
1337 #ifdef HAVE_MT
1338   docp->__herrno = 0;
1339   docp->msgbuf = NULL;
1340   docp->threadnr = 0;
1341 #endif
1342   docp->is_robot = FALSE;
1343   docp->additional_headers = NULL;
1344   docp->is_http_transfer = FALSE;
1345   docp->http_proxy = NULL;
1346   docp->http_proxy_port = DEFAULT_HTTP_PROXY_PORT;
1347   docp->http_proxy_10 = FALSE;
1348   docp->request_type = HTTP_REQ_UNKNOWN;
1349   docp->connect_host = NULL;
1350   docp->connect_port = 0;
1351 
1352   timerclear(&docp->hr_start_time);
1353   timerclear(&docp->redirect_time);
1354   timerclear(&docp->dns_time);
1355   timerclear(&docp->connect_time);
1356   timerclear(&docp->first_byte_time);
1357   timerclear(&docp->end_time);
1358 }
1359 
get_rate_str(char * str,double rate)1360 static char *get_rate_str(char *str, double rate)
1361 {
1362   if(rate <= 1024.0)
1363     sprintf(str, "%5.0f  B/s", rate);
1364   else if(rate <= 1048576.0)
1365     sprintf(str, "%5.1f kB/s", rate / 1024.0);
1366   else if(rate <= 1073741824.0)
1367     sprintf(str, "%5.1f MB/s", rate / 1048576.0);
1368   else
1369     sprintf(str, "%5.1f GB/s", rate / 1073741824.0);
1370 
1371   return str;
1372 }
1373 
get_time_str(char * str,time_t tm)1374 static char *get_time_str(char *str, time_t tm)
1375 {
1376   sprintf(str, "%ld:%02ld:%02ld",
1377     tm / 3600000, (tm % 3600000) / 60000, (tm % 60000) / 1000);
1378 
1379   return str;
1380 }
1381 
get_size_str(char * str,int total,int actual)1382 static char *get_size_str(char *str, int total, int actual)
1383 {
1384   if(total)
1385   {
1386     if(total < 1000000)
1387       sprintf(str, "%6d / %d B [%5.1f%%]",
1388         actual, total, (100.0 * (double) actual / (double) total));
1389     else
1390       sprintf(str, "%7d / %d kB [%5.1f%%]",
1391         actual / 1024, total / 1024,
1392         (100.0 * (double) actual / (double) total));
1393   }
1394   else
1395   {
1396     if(actual < 1000000)
1397       sprintf(str, "%6d B", actual);
1398     else
1399       sprintf(str, "%6d kB", actual / 1024);
1400   }
1401   return str;
1402 }
1403 
doc_etime(doc * docp,int init)1404 time_t doc_etime(doc * docp, int init)
1405 {
1406 #ifdef HAVE_GETTIMEOFDAY
1407   if(init)
1408   {
1409     gettimeofday(&docp->start_time, NULL);
1410     return 0;
1411   }
1412   else
1413   {
1414     struct timeval t;
1415     gettimeofday(&t, NULL);
1416 
1417     return (1000 * (t.tv_sec - docp->start_time.tv_sec) +
1418       (t.tv_usec - docp->start_time.tv_usec) / 1000);
1419   }
1420 #else
1421   if(init)
1422   {
1423     docp->start_time = time(NULL);
1424     return 0;
1425   }
1426   else
1427   {
1428     return 1000 * (time(NULL) - docp->start_time);
1429   }
1430 #endif
1431 }
1432 
compute_speed_rate(time_t etime,ssize_t size)1433 static double compute_speed_rate(time_t etime, ssize_t size)
1434 {
1435   return (double) size *1000.0 / (etime == 0.0 ? 1.0 : etime);
1436 }
1437 
show_progress(doc * docp,ssize_t adjsz,int dolog)1438 static void show_progress(doc * docp, ssize_t adjsz, int dolog)
1439 {
1440   time_t etime = doc_etime(docp, FALSE);
1441   double rate = compute_speed_rate(etime, docp->size + adjsz);
1442   char s_rate[30] = "", s_etime[30] = "", s_rtime[30] = "", s_size[30] = "";
1443   ftp_url_extension *fe;
1444 
1445   if(docp->doc_url->type == URLT_FTP || docp->doc_url->type == URLT_FTPS)
1446     fe = (ftp_url_extension *) docp->doc_url->extension;
1447   else
1448     fe = NULL;
1449 
1450 
1451   if(docp->totsz >= 0 || (fe && fe->size > 0))
1452   {
1453     int size = docp->totsz >= 0 ? docp->totsz : fe->size;
1454 
1455     time_t rtime =
1456       (time_t) ((double) (size -
1457         docp->rest_pos) / (double) (docp->size ? docp->size : 10) *
1458       (double) ((etime != 0.0) ? etime : 1.0)) - etime;
1459 
1460     get_time_str(s_rtime, rtime);
1461     get_size_str(s_size, size, docp->size + docp->rest_pos);
1462   }
1463   else
1464     get_size_str(s_size, 0, docp->size + docp->rest_pos);
1465 
1466   get_rate_str(s_rate, rate);
1467   get_time_str(s_etime, etime);
1468 
1469   if(cfg.progres && docp->report_size && !cfg.quiet && !cfg.bgmode
1470 #ifdef I_FACE
1471     && !cfg.xi_face
1472 #endif
1473     )
1474   {
1475     if(*s_rtime)
1476       xprintf(0, gettext("S: %s [R: %s] [ET: %s] [RT: %s]"),
1477         s_size, s_rate, s_etime, s_rtime);
1478     else
1479       xprintf(0, gettext("S: %s [R: %s] [ET: %s]"), s_size, s_rate, s_etime);
1480     xprintf(0, " \r");
1481   }
1482 
1483 #ifdef I_FACE
1484   if(docp->report_size && cfg.xi_face)
1485   {
1486     gui_set_progress(s_size, s_rate, s_etime, s_rtime);
1487   }
1488 #endif
1489 }
1490 
doc_lock(doc * docp,int b_lock)1491 int doc_lock(doc * docp, int b_lock)
1492 {
1493   char *lock_name;
1494   int cyclenr = 0;
1495   bufio *s_sock = NULL;
1496 
1497   if(!(lock_name = url_to_in_filename(docp->doc_url)))
1498     return -1;
1499 
1500   do
1501   {
1502     if(makealldirs(lock_name))
1503     {
1504       if(errno != ENOENT)
1505       {
1506         xperror(lock_name);
1507         docp->errcode = ERR_STORE_DOC;
1508         break;
1509       }
1510       else
1511         continue;
1512     }
1513     else
1514     {
1515       if(cyclenr == 1)
1516       {
1517         xprintf(0, gettext("Waiting to releases document lock on: %s\n"),
1518           lock_name);
1519       }
1520 
1521       if(!(s_sock = bufio_copen(lock_name,
1522             O_BINARY | O_RDWR | O_CREAT, 0644)))
1523       {
1524         if(errno != ENOENT)
1525         {
1526           xperror(lock_name);
1527           docp->errcode = ERR_STORE_DOC;
1528           break;
1529         }
1530       }
1531       if(s_sock)
1532       {
1533         if(_flock(bufio_getfd(s_sock), lock_name,
1534             O_BINARY | O_RDWR | O_CREAT, FALSE))
1535         {
1536           if(!b_lock)
1537             docp->errcode = ERR_LOCKED;
1538           bufio_close(s_sock);
1539           s_sock = NULL;
1540         }
1541         else
1542         {
1543           tl_msleep(50);
1544           if(access(lock_name, F_OK))
1545           {
1546             if(!b_lock)
1547               docp->errcode = ERR_LOCKED;
1548             else
1549             {
1550               bufio_close(s_sock);
1551               s_sock = NULL;
1552             }
1553           }
1554         }
1555       }
1556 
1557       if(b_lock && !s_sock)
1558       {
1559         tl_sleep(1);
1560         cyclenr++;
1561       }
1562     }
1563   }
1564   while(b_lock && !s_sock);
1565 
1566   if(!s_sock)
1567   {
1568     _free(lock_name);
1569     docp->s_sock = NULL;
1570   }
1571   else
1572   {
1573     docp->s_sock = s_sock;
1574     docp->lock_fn = lock_name;
1575   }
1576 
1577   return (s_sock == NULL);
1578 }
1579 
1580 /****************************************************/
1581 /* Unlock document and remove lock file if required */
1582 /****************************************************/
doc_remove_lock(doc * docp)1583 void doc_remove_lock(doc * docp)
1584 {
1585   struct utimbuf utmbf;
1586 
1587   if(docp->s_sock)
1588   {
1589     DEBUG_LOCKS("Unlocking document %s\n", docp->lock_fn);
1590     /*_funlock(bufio_getfd(docp->s_sock));*/
1591     bufio_close(docp->s_sock);
1592     docp->s_sock = NULL;
1593 
1594     /* required because close() causes modification time */
1595     /* change on hard linked file on w2k (maybe generaly */
1596     /* on winnt or just on ntfs ???          */
1597     if(cfg.preserve_time && docp->dtime)
1598     {
1599       struct utimbuf utmbf;
1600       struct stat estat;
1601 
1602       stat(docp->lock_fn, &estat);
1603       utmbf.actime = estat.st_atime;
1604       utmbf.modtime = docp->dtime;
1605       utime(docp->lock_fn, &utmbf);
1606     }
1607 
1608     if(docp->remove_lock)
1609     {
1610       unlink(docp->lock_fn);
1611     }
1612     else
1613     {
1614       utmbf.actime = time(NULL);
1615 
1616       if(docp->dtime && cfg.preserve_time)
1617         utmbf.modtime = docp->dtime;
1618       else
1619         utmbf.modtime = docp->stime;
1620 
1621       utime(docp->lock_fn, &utmbf);
1622     }
1623 
1624     _free(docp->lock_fn);
1625   }
1626 }
1627 
doc_make_clean_dir(doc * docp)1628 static void doc_make_clean_dir(doc * docp)
1629 {
1630   char *p, *ustr;
1631 
1632   ustr = tl_strdup(url_to_filename(docp->doc_url, FALSE));
1633 
1634   if(!docp->mime && cfg.enable_info)
1635     dinfo_remove(ustr);
1636 
1637   p = strrchr(ustr, '/');
1638   if(p)
1639     *p = '\0';
1640 
1641   while(strlen(ustr) > strlen(priv_cfg.cache_dir))
1642   {
1643     if(rmdir(ustr))
1644     {
1645       if(errno != ENOTEMPTY && errno != ENOENT && errno != EEXIST)
1646         xperror(ustr);
1647       break;
1648     }
1649 
1650     p = strrchr(ustr, '/');
1651     if(p)
1652       *p = '\0';
1653   }
1654 
1655   _free(ustr);
1656 }
1657 
doc_cleanup(doc * docu)1658 void doc_cleanup(doc * docu)
1659 {
1660   gui_finish_document(docu);
1661 
1662   short_log(docu, docu->doc_url);
1663 
1664   LOCK_FAILCNT;
1665   if(!((docu->doc_url->status & URL_DOWNLOADED) ||
1666       (docu->doc_url->status & URL_REDIRECT)))
1667     cfg.fail_cnt++;
1668 
1669   cfg.process_cnt++;
1670   UNLOCK_FAILCNT;
1671 
1672   if(cfg.xi_face)
1673     gui_tree_set_icon_for_doc(docu);
1674 
1675   if(docu->errcode)
1676   {
1677     char *infn, *fn;
1678     fn = url_to_filename(docu->doc_url, FALSE);
1679     infn = url_to_in_filename(docu->doc_url);
1680     if(access(fn, F_OK) && access(infn, F_OK))
1681     {
1682       doc_make_clean_dir(docu);
1683       url_forget_filename(docu->doc_url);
1684     }
1685     _free(infn);
1686   }
1687 
1688   _free(docu->mime);
1689   _free(docu->type_str);
1690   _free(docu->contents);
1691   _free(docu->etag);
1692   _free(docu->ftp_pasv_host);
1693   _free(docu->additional_headers);
1694   _free(docu->http_proxy);
1695 
1696   if(!cfg.auth_reuse_nonce)
1697   {
1698     if(docu->auth_digest)
1699       http_digest_deep_free(docu->auth_digest);
1700     docu->auth_digest = NULL;
1701   }
1702 
1703   if(!cfg.auth_reuse_proxy_nonce)
1704   {
1705     if(docu->auth_proxy_digest)
1706       http_digest_deep_free(docu->auth_proxy_digest);
1707     docu->auth_proxy_digest = NULL;
1708   }
1709 
1710   gui_clear_status();
1711 }
1712 
doc_destroy(doc * docu)1713 void doc_destroy(doc * docu)
1714 {
1715   short_log(docu, docu->doc_url);
1716 
1717   if(docu->s_sock)
1718     bufio_close(docu->s_sock);
1719   if(docu->datasock)
1720     bufio_close(docu->datasock);
1721 
1722   _free(docu->mime);
1723   _free(docu->type_str);
1724   _free(docu->contents);
1725   _free(docu->etag);
1726   _free(docu->ftp_pasv_host);
1727   _free(docu->additional_headers);
1728   _free(docu->http_proxy);
1729 
1730   if(!cfg.auth_reuse_nonce)
1731   {
1732     if(docu->auth_digest)
1733       http_digest_deep_free(docu->auth_digest);
1734     docu->auth_digest = NULL;
1735   }
1736 
1737   if(!cfg.auth_reuse_proxy_nonce)
1738   {
1739     if(docu->auth_proxy_digest)
1740       http_digest_deep_free(docu->auth_proxy_digest);
1741     docu->auth_proxy_digest = NULL;
1742   }
1743 }
1744 
1745 #ifdef HAVE_MT
doc_finish_processing(doc * docp)1746 void doc_finish_processing(doc * docp)
1747 {
1748   dllist *ptr = docp->msgbuf;
1749   char *logstr = NULL;
1750 
1751   pthread_setspecific(cfg.currdoc_key, (void *) NULL);
1752   LOCK_OUTPUT;
1753   while(ptr)
1754   {
1755     doc_msg *dm = (doc_msg *) ptr->data;
1756 
1757     if(dm->log && cfg.logfile)
1758     {
1759       logstr = tl_str_concat(logstr, dm->msg, NULL);
1760     }
1761     xprintf(0, "%s", dm->msg);
1762 
1763     _free(dm->msg);
1764     _free(dm);
1765     ptr = dllist_remove_entry(ptr, ptr);
1766   }
1767   UNLOCK_OUTPUT;
1768   if(logstr)
1769     log_str(logstr);
1770 }
1771 #endif /* HAVE_MT */
1772 
doc_update_parent_links(doc * docu)1773 void doc_update_parent_links(doc * docu)
1774 {
1775   if((cfg.mode != MODE_NOSTORE) &&
1776     (cfg.dumpfd < 0) &&
1777     (docu->doc_url->type != URLT_FILE) &&
1778     !(docu->doc_url->status & URL_REDIRECT) &&
1779     !(docu->doc_url->status & URL_ISLOCAL) && docu->doc_url->parent_url)
1780   {
1781     if(cfg.rewrite_links &&
1782       !cfg.all_to_local && !cfg.sel_to_local && !cfg.all_to_remote)
1783     {
1784       gui_set_status(gettext("Rewriting links inside parent documents"));
1785       rewrite_parents_links(docu->doc_url, NULL);
1786     }
1787   }
1788 }
1789