1 /***************************************************************************/
2 /* This code is part of WWW grabber called pavuk */
3 /* Copyright (c) 1997 - 2001 Stefan Ondrejicka */
4 /* Distributed under GPL 2 or later */
5 /***************************************************************************/
6
7 #include "config.h"
8
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <limits.h>
16 #include <sys/types.h>
17 #include <sys/socket.h>
18 #include <sys/stat.h>
19 #ifdef HAVE_SYS_PARAM_H
20 #include <sys/param.h>
21 #endif
22 #ifdef HAVE_FSTATVFS
23 #ifdef HAVE_SYS_STATVFS_H
24 #include <sys/statvfs.h>
25 #endif
26 #else
27 #ifdef HAVE_FSTATFS
28 #ifdef HAVE_SYS_STATFS_H
29 #include <sys/statfs.h>
30 #endif
31 #ifdef HAVE_SYS_VFS_H
32 #include <sys/vfs.h>
33 #endif
34 #ifdef HAVE_SYS_MOUNT_H
35 #include <sys/mount.h>
36 #endif
37 #endif
38 #endif
39 #include <sys/time.h>
40 #include <time.h>
41 #include <utime.h>
42
43 #include "url.h"
44 #include "doc.h"
45 #include "tools.h"
46 #include "mime.h"
47 #include "http.h"
48 #include "ftp.h"
49 #include "gopher.h"
50 #include "decode.h"
51 #include "abstract.h"
52 #include "mode.h"
53 #include "times.h"
54 #include "dinfo.h"
55 #include "errcode.h"
56 #include "log.h"
57 #include "gui_api.h"
58 #include "html.h"
59
60 #ifdef I_FACE
61 static void doc_set_info(doc *);
62 #endif
63
64 static void show_progress(doc *, ssize_t, int);
65 static double compute_speed_rate(time_t, ssize_t);
66
doc_download_init(doc * docu,int load)67 int doc_download_init(doc * docu, int load)
68 {
69 docu->remove_lock = TRUE;
70 docu->lock_fn = NULL;
71 docu->is_parsable = cfg.enable_js && (docu->doc_url->status & URL_ISSCRIPT);
72 docu->contents = NULL;
73 docu->mime = NULL;
74 docu->type_str = NULL;
75 docu->save_online = FALSE;
76 docu->size = 0;
77 docu->current_size = 0;
78 docu->totsz = -1;
79 docu->origsize = 0;
80 docu->rest_pos = 0;
81 docu->stime = time(NULL);
82 docu->s_sock = NULL;
83 docu->is_http11 = FALSE;
84 docu->is_chunked = FALSE;
85 docu->is_persistent = FALSE;
86 docu->read_chunksize = FALSE;
87 docu->read_trailer = FALSE;
88 docu->doreget = FALSE;
89 docu->origtime = docu->dtime;
90 docu->adj_sz = 0;
91 docu->load = load;
92 docu->http_proxy_10 = FALSE;
93 docu->ftp_data_con_finished = FALSE;
94
95 docu->num_auth = cfg.auth_reuse_nonce ? 1 : 0;
96 docu->num_proxy_auth = cfg.auth_reuse_proxy_nonce ? 1 : 0;
97
98 docu->is_http_transfer =
99 docu->doc_url->type == URLT_HTTP ||
100 docu->doc_url->type == URLT_HTTPS ||
101 (docu->doc_url->type == URLT_FTP &&
102 priv_cfg.ftp_proxy && cfg.ftp_via_http && !cfg.ftp_dirtyp) ||
103 (docu->doc_url->type == URLT_GOPHER &&
104 priv_cfg.gopher_proxy && cfg.gopher_via_http);
105
106 /*** just default value, later will be assigned properly ***/
107 docu->request_type = HTTP_REQ_UNKNOWN;
108
109 if(docu->is_http_transfer && !docu->http_proxy)
110 {
111 char *proxy = NULL;
112 unsigned short port = 0;
113
114 switch (docu->doc_url->type)
115 {
116 case URLT_HTTP:
117 {
118 http_proxy *pr = NULL;
119
120 LOCK_PROXY;
121 pr = http_proxy_get();
122 if(pr)
123 {
124 http_proxy_check(pr, docu);
125 proxy = tl_strdup(pr->addr);
126 port = pr->port;
127 docu->http_proxy_10 = (pr->is_10 != 0);
128 }
129 UNLOCK_PROXY;
130 }
131 break;
132 #ifdef USE_SSL
133 case URLT_HTTPS:
134 if(priv_cfg.ssl_proxy)
135 {
136 proxy = tl_strdup(priv_cfg.ssl_proxy);
137 port = cfg.ssl_proxy_port;
138 }
139 break;
140 #endif
141 case URLT_FTP:
142 if(priv_cfg.ftp_proxy)
143 {
144 proxy = tl_strdup(priv_cfg.ftp_proxy);
145 port = cfg.ftp_proxy_port;
146 }
147 break;
148 case URLT_GOPHER:
149 if(priv_cfg.gopher_proxy)
150 {
151 proxy = tl_strdup(priv_cfg.gopher_proxy);
152 port = cfg.gopher_proxy_port;
153 }
154 break;
155 default:
156 proxy = NULL;
157 port = 0;
158 break;
159 }
160 docu->http_proxy = proxy;
161 docu->http_proxy_port = port;
162 }
163
164 if(cfg.dumpfd >= 0)
165 {
166 docu->remove_lock = FALSE;
167 if(cfg.dump_after)
168 {
169 docu->load = TRUE;
170 docu->save_online = FALSE;
171 docu->s_sock = NULL;
172 }
173 else
174 {
175 docu->save_online = TRUE;
176 docu->s_sock = bufio_dupfd(cfg.dumpfd);
177
178 if(!docu->s_sock)
179 {
180 xperror("bufio_dupfd()");
181 docu->errcode = ERR_STORE_DOC;
182 return -1;
183 }
184 }
185 }
186
187 gettimeofday(&docu->hr_start_time, NULL);
188 timerclear(&docu->redirect_time);
189 timerclear(&docu->dns_time);
190 timerclear(&docu->connect_time);
191 timerclear(&docu->first_byte_time);
192 timerclear(&docu->end_time);
193
194 return 0;
195 }
196
doc_check_quotas(doc * docu,ssize_t len,ssize_t totallen)197 static int doc_check_quotas(doc * docu, ssize_t len, ssize_t totallen)
198 {
199 int retcode = 0;
200
201 #define KILL_PERSISTANT_CONNECTION \
202 if(docu->doc_url->type == URLT_FTP || docu->doc_url->type == URLT_FTPS) \
203 docu->ftp_fatal_err = TRUE; \
204 if(docu->is_http11) \
205 docu->is_persistent = FALSE;
206
207 if(cfg.minrate > 0.0 && (docu->doc_url->type != URLT_FILE &&
208 !(docu->doc_url->status & URL_REDIRECT)))
209 {
210 time_t _tm = doc_etime(docu, FALSE);
211 double _rt = compute_speed_rate(_tm, totallen);
212 if(_rt < (cfg.minrate * 1024.0))
213 {
214 KILL_PERSISTANT_CONNECTION;
215 docu->errcode = ERR_LOW_TRANSFER_RATE;
216 retcode = -1;
217 }
218 }
219
220 if(cfg.max_time > 0.0)
221 {
222 if((cfg.start_time + (int) (60.0 * cfg.max_time)) < time(NULL))
223 {
224 KILL_PERSISTANT_CONNECTION;
225 docu->errcode = ERR_QUOTA_TIME;
226 retcode = -1;
227 }
228 }
229
230 if(docu->doc_url->type != URLT_FILE &&
231 !(docu->doc_url->status & URL_REDIRECT))
232 cfg.trans_size += len;
233
234 if(cfg.file_quota && ((cfg.file_quota * 1024) <= totallen) &&
235 (docu->doc_url->type != URLT_FILE) &&
236 !(docu->doc_url->status & URL_REDIRECT))
237 {
238 KILL_PERSISTANT_CONNECTION;
239 docu->errcode = ERR_QUOTA_FILE;
240 retcode = 1;
241 }
242
243 if(cfg.trans_quota && ((cfg.trans_quota * 1024) <= cfg.trans_size))
244 {
245 KILL_PERSISTANT_CONNECTION;
246 docu->errcode = ERR_QUOTA_TRANS;
247 retcode = -1;
248 }
249
250 #if defined HAVE_FSTATFS || defined HAVE_FSTATVFS
251 if((cfg.dumpfd < 0) && cfg.fs_quota &&
252 (docu->doc_url->type != URLT_FILE) &&
253 !(docu->doc_url->status & URL_REDIRECT) && docu->s_sock && cfg.dumpfd < 0)
254 {
255 #ifdef HAVE_FSTATVFS
256 struct statvfs fss;
257 if(fstatvfs(bufio_getfd(docu->s_sock), &fss))
258 xperror("fstatvfs");
259 #else
260 struct statfs fss;
261 if(fstatfs(bufio_getfd(docu->s_sock), &fss))
262 xperror("fstatfs");
263 #endif
264
265 else
266 {
267 long freespace = (fss.f_bsize * fss.f_bavail) / 1024;
268
269 if(freespace < cfg.fs_quota)
270 {
271 KILL_PERSISTANT_CONNECTION;
272 docu->errcode = ERR_QUOTA_FS;
273 retcode = -1;
274 }
275 }
276 }
277 #endif
278 return retcode;
279 }
280
doc_transfer_data(doc * docu)281 static int doc_transfer_data(doc * docu)
282 {
283 char *buf;
284 int bufsize;
285 ssize_t len, totallen = 0;
286 int retcode = 0;
287
288 if(docu->report_size)
289 gui_set_status(gettext("Transfering data"));
290
291 show_progress(docu, docu->adj_sz, FALSE);
292
293 bufsize = (cfg.bufsize > 0 ? cfg.bufsize : 1) * 1024;
294 buf = _malloc(bufsize);
295
296 #ifdef SO_RCVBUF
297 #ifndef __QNX__
298 if(bufio_is_sock(docu->datasock))
299 {
300 if(setsockopt(bufio_getfd(docu->datasock),
301 SOL_SOCKET, SO_RCVBUF, (char *) &bufsize, sizeof(bufsize)))
302 {
303 xperror(gettext("setsockopt: SO_RCVBUF failed"));
304 }
305 }
306 #endif
307 #endif
308 if(docu->save_online)
309 {
310 DEBUG_USER("Storing to file: %s\n", url_to_filename(docu->doc_url, TRUE));
311 }
312
313 if(docu->mime && cfg.dump_resp && cfg.dumpfd >= 0 && !cfg.dump_after)
314 bufio_write(docu->s_sock, docu->mime, strlen(docu->mime));
315
316 while((len = abs_read_data(docu, docu->datasock, buf, bufsize)) > 0)
317 {
318 if(docu->save_online)
319 {
320 if(write(bufio_getfd(docu->s_sock), buf, len) != len)
321 {
322 docu->errcode = ERR_STORE_DOC;
323 xperror(gettext("storing document"));
324 retcode = -1;
325 if(docu->doc_url->type == URLT_FTP
326 || docu->doc_url->type == URLT_FTPS)
327 docu->ftp_fatal_err = TRUE;
328 if(docu->is_http11)
329 docu->is_persistent = FALSE;
330 break;
331 }
332 }
333
334 totallen += len;
335 docu->current_size += len;
336
337 if(cfg.maxrate > 0.0 &&
338 (docu->doc_url->type != URLT_FILE &&
339 !(docu->doc_url->status & URL_REDIRECT)))
340 {
341 time_t _tm = doc_etime(docu, FALSE);
342 double _rt = compute_speed_rate(_tm, totallen);
343 if(_rt > (cfg.maxrate * 1024.0))
344 {
345 tl_msleep((time_t) (1000.0 * ((double) totallen) / (cfg.maxrate *
346 1024.0)) - _tm);
347 }
348 }
349
350 docu->size = totallen;
351 show_progress(docu, docu->adj_sz, FALSE);
352
353 if(docu->load || docu->is_parsable ||
354 ((docu->doc_url->type == URLT_FTP ||
355 docu->doc_url->type == URLT_FTPS) &&
356 docu->doc_url->p.ftp.dir) ||
357 (docu->doc_url->type == URLT_GOPHER &&
358 (docu->doc_url->p.gopher.selector[0] == '1' ||
359 docu->doc_url->p.gopher.selector[0] == 'h')))
360 {
361 docu->contents = _realloc(docu->contents, totallen + 1);
362 memmove(docu->contents + totallen - len, buf, len);
363 }
364
365 retcode = doc_check_quotas(docu, len, totallen);
366
367 if(retcode)
368 {
369 if(retcode == 1)
370 retcode = 0;
371 break;
372 }
373 if(docu->totsz > 0 && docu->totsz <= docu->current_size)
374 break;
375 }
376
377 show_progress(docu, docu->adj_sz, TRUE);
378
379 if(cfg.dumpfd >= 0 && !cfg.dump_after)
380 {
381 bufio_close(docu->s_sock);
382 docu->s_sock = NULL;
383 docu->save_online = FALSE;
384 }
385
386 if(cfg.progres && docu->report_size
387 #ifdef I_FACE
388 && !cfg.xi_face
389 #endif
390 )
391 {
392 xprintf(0, "\n");
393 }
394
395 if(len < 0 || ((docu->totsz > 0)
396 && (docu->totsz != (docu->size + docu->rest_pos))))
397 {
398 xperror(gettext("Document transfer data"));
399 if((docu->doc_url->type == URLT_HTTP || docu->doc_url->type ==
400 URLT_HTTPS) && (!(docu->doc_url->status & URL_REDIRECT)))
401 {
402 docu->errcode = ERR_HTTP_TRUNC;
403 }
404 else if((docu->doc_url->type == URLT_FTP || docu->doc_url->type ==
405 URLT_FTPS) && (!(docu->doc_url->status & URL_REDIRECT)))
406 {
407 docu->errcode = ERR_FTP_TRUNC;
408 }
409 else if(!docu->errcode)
410 docu->errcode = ERR_READ;
411
412 docu->remove_lock = FALSE;
413 retcode = -1;
414 }
415
416 if(docu->report_size)
417 gui_set_status(gettext("Data transfer done"));
418
419 if((docu->doc_url->type == URLT_FTP ||
420 docu->doc_url->type == URLT_FTPS) && docu->errcode == ERR_FTP_TRUNC)
421 {
422 docu->remove_lock = FALSE;
423 retcode = -1;
424 }
425
426 /*** if transfer was not from begining, reread ***/
427 /*** document content to memory form local file ***/
428 /*** to be sure we will process whole document ***/
429 if(!retcode &&
430 docu->rest_pos && (docu->load || docu->is_parsable) && (cfg.dumpfd < 0))
431 {
432 _free(docu->contents);
433 totallen = 0;
434 lseek(bufio_getfd(docu->s_sock), 0, SEEK_SET);
435 bufio_reset(docu->s_sock);
436
437 while((len = bufio_read(docu->s_sock, buf, bufsize)) > 0)
438 {
439 totallen += len;
440 docu->contents = _realloc(docu->contents, totallen + 1);
441 memmove(docu->contents + totallen - len, buf, len);
442 }
443 }
444
445 if(docu->contents)
446 *(docu->contents + totallen) = '\0';
447
448 _free(buf);
449 docu->size = totallen;
450
451 return retcode;
452 }
453
doc_check_doc_file(doc * docu,int * rv)454 static int doc_check_doc_file(doc * docu, int *rv)
455 {
456 char *fn;
457 struct stat estat;
458
459 fn = url_to_filename(docu->doc_url, TRUE);
460
461 if(cfg.mode != MODE_SYNC && cfg.mode != MODE_MIRROR)
462 {
463 if(docu->doc_url->type != URLT_FILE && (access(fn, R_OK) != -1))
464 {
465 if(!stat(fn, &estat))
466 {
467 if(!S_ISDIR(estat.st_mode))
468 {
469 docu->doc_url->status |= URL_REDIRECT;
470 }
471 else
472 {
473 char *pom;
474 char *savepath = url_get_path(docu->doc_url);
475
476 pom = tl_str_concat(NULL, fn, "/", priv_cfg.index_name, NULL);
477
478 if(!stat(pom, &estat))
479 {
480 _free(pom);
481 if(!S_ISDIR(estat.st_mode))
482 {
483 url *newurl = url_dup_url(docu->doc_url);
484 if(newurl->type != URLT_FILE)
485 pom = tl_str_concat(NULL, savepath, "/", NULL);
486 if(newurl->type == URLT_FTP || newurl->type == URLT_FTPS)
487 newurl->p.ftp.dir = TRUE;
488
489 url_set_path(newurl, pom);
490 _free(pom);
491
492 if(url_redirect_to(docu->doc_url, newurl, FALSE))
493 docu->errcode = ERR_HTTP_CYCLIC;
494 else
495 docu->errcode = ERR_HTTP_REDIR;
496
497 *rv = -1;
498 return -1;
499 }
500 }
501 _free(pom);
502 fn = url_to_filename(docu->doc_url, TRUE);
503 }
504 }
505 }
506
507 if((docu->doc_url->type == URLT_FILE ||
508 (docu->doc_url->status & URL_REDIRECT)) && !docu->load)
509 {
510 if(!stat(fn, &estat))
511 {
512 if(S_ISDIR(estat.st_mode))
513 {
514 docu->errcode = ERR_DIR_URL;
515 *rv = -1;
516 return -1;
517 }
518 }
519 else
520 {
521 docu->errcode = ERR_FILE_OPEN;
522 *rv = -1;
523 return -1;
524 }
525
526 if((!cfg.ftp_html &&
527 strcmp(tl_get_basename(fn), priv_cfg.index_name) &&
528 (docu->doc_url->type == URLT_FTP ||
529 docu->doc_url->type == URLT_FTPS) &&
530 !docu->doc_url->p.ftp.dir) || !file_is_html(fn))
531 {
532 docu->is_parsable = FALSE;
533 docu->save_online = TRUE;
534 docu->size = estat.st_size;
535 #ifdef I_FACE
536 if(cfg.xi_face)
537 doc_set_info(docu);
538 #endif
539 xprintf(1, gettext("File redirect\n"));
540 *rv = 0;
541 return -1;
542 }
543 else
544 {
545 if(!strcasecmp("css", tl_get_extension(fn)))
546 docu->doc_url->status |= URL_STYLE;
547 docu->is_parsable = TRUE;
548 }
549 }
550 }
551 else
552 {
553 if(!stat(fn, &estat))
554 {
555 docu->origsize = estat.st_size;
556 /*
557 pro: somehow it must have been forgotten to set the
558 time as well...
559 */
560 docu->origtime = estat.st_mtime;
561 }
562 }
563
564 return 0;
565 }
566
doc_open_existing_in_file(doc * docu,int b_lock,int * rv)567 static int doc_open_existing_in_file(doc * docu, int b_lock, int *rv)
568 {
569 char *inname;
570 struct stat estat;
571
572 if((cfg.dumpfd < 0) && (inname = url_to_in_filename(docu->doc_url)))
573 {
574 if(!stat(inname, &estat) && !S_ISDIR(estat.st_mode))
575 {
576 if(doc_lock(docu, b_lock))
577 {
578 docu->errcode = ERR_STORE_DOC;
579 _free(inname);
580 *rv = -1;
581 return -1;
582 }
583
584 docu->rest_pos = estat.st_size - cfg.rollback;
585
586 if(docu->rest_pos)
587 {
588 xprintf(1, gettext("Trying to resume from position %d\n"),
589 docu->rest_pos);
590
591 docu->origtime = estat.st_mtime;
592 docu->stime = estat.st_mtime;
593 docu->doreget = TRUE;
594 docu->remove_lock = FALSE;
595 }
596 }
597 _free(inname);
598 }
599
600 return 0;
601 }
602
doc_open_new_in_file(doc * docu,int b_lock)603 static int doc_open_new_in_file(doc * docu, int b_lock)
604 {
605 if((cfg.dumpfd < 0) && !docu->s_sock)
606 {
607 if(cfg.post_update && docu->type_str)
608 {
609 /** dirty hack, but is required to support **/
610 /** file naming by its MIME type **/
611 url_forget_filename(docu->doc_url);
612 url_to_filename_with_type(docu->doc_url, docu->type_str, TRUE);
613 }
614
615 if(doc_lock(docu, b_lock))
616 {
617 docu->errcode = ERR_STORE_DOC;
618 return -1;
619 }
620 }
621
622 return 0;
623 }
624
625 /********************************************************/
626 /* nacitanie dokumentu + specificke upravy */
627 /* FIXME: Translate me! */
628 /********************************************************/
doc_download_helper(doc * docu,int load,int b_lock)629 static int doc_download_helper(doc * docu, int load, int b_lock)
630 {
631 char *p = NULL, *p1 = 0;
632 ssize_t len = 0;
633 int retcode = 0;
634 int rv;
635 struct stat estat;
636 bufio *saved_datasock = NULL;
637
638 if(doc_download_init(docu, load))
639 return -1;
640
641 gettimeofday(&docu->hr_start_time, NULL);
642
643 if(doc_check_doc_file(docu, &rv))
644 return rv;
645
646 if(doc_open_existing_in_file(docu, b_lock, &rv))
647 return rv;
648
649 if(docu->report_size)
650 gui_set_status(gettext("Opening connection"));
651
652
653 if(!(docu->datasock = abs_get_data_socket(docu)) &&
654 /*
655 pro: add test for errcodes. The errcode tests are needed;
656 otherwise the "if" some lines later will never get
657 anything to do...
658 */
659 docu->errcode != ERR_FTP_ACTUAL && docu->errcode != ERR_HTTP_ACTUAL)
660 {
661 if(docu->mime &&
662 docu->doc_url->type != URLT_FILE &&
663 !(docu->doc_url->status & URL_REDIRECT))
664 dinfo_save(docu);
665 docu->remove_lock = FALSE;
666 abs_close_socket(docu, FALSE);
667 return -1;
668 }
669
670 doc_etime(docu, TRUE);
671
672 if(docu->errcode == ERR_HTTP_ACTUAL || docu->errcode == ERR_FTP_ACTUAL)
673 {
674 abs_close_socket(docu, FALSE);
675 saved_datasock = docu->datasock;
676
677 if(docu->load || docu->is_parsable)
678 {
679 xprintf(1, gettext("Loading local copy\n"));
680
681 p = url_to_filename(docu->doc_url, TRUE);
682 if(!(docu->datasock = bufio_open(p, O_BINARY | O_RDONLY)))
683 {
684 docu->datasock = saved_datasock;
685 docu->errcode = ERR_FILE_OPEN;
686 return -1;
687 }
688 docu->doc_url->status |= URL_REDIRECT;
689 docu->doc_url->status |= URL_ISLOCAL;
690 docu->save_online = FALSE;
691 }
692 else
693 {
694 docu->doc_url->status |= URL_REDIRECT;
695 return 1;
696 }
697 }
698 else if(docu->errcode == ERR_HTTP_NOREGET)
699 {
700 if(!cfg.freget)
701 {
702 docu->is_persistent = FALSE;
703 abs_close_socket(docu, FALSE);
704 docu->remove_lock = FALSE;
705 docu->ftp_fatal_err = TRUE;
706 return -1;
707 }
708 else
709 {
710 docu->rest_pos = 0;
711 }
712 }
713 else if(docu->errcode == ERR_FTP_NOREGET)
714 {
715 if(!cfg.freget)
716 {
717 docu->is_persistent = FALSE;
718 abs_close_socket(docu, FALSE);
719 docu->remove_lock = FALSE;
720 docu->ftp_fatal_err = TRUE;
721 return -1;
722 }
723 else
724 {
725 docu->rest_pos = 0;
726 }
727 }
728 else if(docu->errcode)
729 {
730 if(docu->mime &&
731 docu->doc_url->type != URLT_FILE &&
732 !(docu->doc_url->status & URL_REDIRECT))
733 dinfo_save(docu);
734 docu->is_persistent = FALSE;
735 docu->ftp_fatal_err = TRUE;
736 abs_close_socket(docu, FALSE);
737 return -1;
738 }
739
740 if(doc_open_new_in_file(docu, b_lock))
741 {
742 docu->is_persistent = FALSE;
743 docu->ftp_fatal_err = TRUE;
744 abs_close_socket(docu, FALSE);
745 return -1;
746 }
747
748 if(docu->doc_url->type != URLT_FILE &&
749 !(docu->doc_url->status & URL_REDIRECT))
750 dinfo_save(docu);
751
752 if((((docu->doc_url->type == URLT_FTP ||
753 docu->doc_url->type == URLT_FTPS) &&
754 !docu->doc_url->p.ftp.dir) ||
755 (docu->doc_url->type == URLT_GOPHER &&
756 (docu->doc_url->p.gopher.selector[0] != '1' ||
757 docu->doc_url->p.gopher.selector[0] != 'h')) ||
758 (docu->doc_url->type == URLT_HTTP ||
759 docu->doc_url->type == URLT_HTTPS)) &&
760 !(docu->doc_url->status & URL_REDIRECT))
761 {
762 if(cfg.dumpfd >= 0 && cfg.dump_after)
763 docu->save_online = FALSE;
764 else
765 docu->save_online = TRUE;
766 }
767
768 if(cfg.ftp_html &&
769 (docu->doc_url->type == URLT_FTP ||
770 docu->doc_url->type == URLT_FTPS) &&
771 ext_is_html(docu->doc_url->p.ftp.path))
772 {
773 docu->is_parsable = TRUE;
774 }
775
776 if(docu->doc_url->status & URL_INNSCACHE)
777 {
778 fstat(bufio_getfd(docu->datasock), &estat);
779 docu->totsz = estat.st_size;
780 docu->is_parsable = (docu->doc_url->status & URL_ISHTML) != 0;
781 }
782
783 if(docu->errcode == ERR_HTTP_FAILREGET)
784 {
785 docu->rest_pos = 0;
786 docu->save_online = FALSE;
787 }
788
789 if(docu->save_online)
790 {
791 if(cfg.dumpfd < 0)
792 {
793 ftruncate(bufio_getfd(docu->s_sock), docu->rest_pos);
794 lseek(bufio_getfd(docu->s_sock), docu->rest_pos, SEEK_SET);
795 bufio_reset(docu->s_sock);
796 }
797 }
798
799 /* We measure time-to-first-byte here again, to add all the processing
800 * timing noise (1-2ms) to the large value -- FB, which is typically
801 * on the order of 100ms, rather then to the time-to-last-byte,
802 * which is often around 0ms
803 */
804 gettimeofday(&docu->first_byte_time, NULL);
805
806 retcode = doc_transfer_data(docu);
807
808 abs_close_socket(docu, TRUE);
809
810 if(!retcode)
811 docu->remove_lock = TRUE;
812
813 if(docu->errcode == ERR_HTTP_ACTUAL)
814 {
815 docu->doc_url->status &= ~URL_REDIRECT;
816 docu->datasock = saved_datasock;
817 }
818
819 if(!retcode && docu->doc_url->status & URL_INNSCACHE)
820 {
821 docu->is_parsable = (docu->doc_url->status & URL_ISHTML) != 0;
822 }
823 else if(!retcode &&
824 docu->doc_url->type == URLT_GOPHER &&
825 !(docu->doc_url->status & URL_REDIRECT) &&
826 !(priv_cfg.gopher_proxy && cfg.gopher_via_http))
827 {
828 docu->is_parsable = FALSE;
829
830 /**** convert Gopher directory to HTML ****/
831 if(docu->doc_url->p.gopher.selector[0] == '1')
832 {
833 if(!(docu->doc_url->status & URL_REDIRECT))
834 gopher_dir_to_html(docu);
835 docu->is_parsable = TRUE;
836 }
837 else if(docu->doc_url->p.gopher.selector[0] == 'h')
838 {
839 docu->is_parsable = TRUE;
840 }
841 }
842 else if(!retcode &&
843 (docu->doc_url->type == URLT_FTP ||
844 docu->doc_url->type == URLT_FTPS) &&
845 !(priv_cfg.ftp_proxy && cfg.ftp_via_http && !cfg.ftp_dirtyp) &&
846 !(docu->doc_url->status & URL_REDIRECT))
847 {
848 docu->is_parsable = ext_is_html(docu->doc_url->p.ftp.path) != 0;
849
850 /*** convert FTP directory listing to HTML ***/
851 if(docu->doc_url->p.ftp.dir)
852 {
853 if(!(docu->doc_url->status & URL_REDIRECT))
854 {
855 ftp_dir_to_html(docu);
856 }
857 docu->is_parsable = TRUE;
858 }
859 }
860 else if(docu->is_http_transfer && !retcode &&
861 !(docu->doc_url->status & URL_REDIRECT))
862 {
863 http_response *resp;
864
865 /*** get HTTP response status info ***/
866 resp = http_get_response_info(docu->mime);
867
868 if(resp)
869 {
870 /*** set proper HTTP error code ***/
871 if(resp->ret_code >= 400)
872 {
873 docu->errcode = 2000 + resp->ret_code;
874 http_response_free(resp);
875 return -1;
876 }
877
878 /*** redirect to other URL ***/
879 if(resp->ret_code == 303 ||
880 resp->ret_code == 302 ||
881 resp->ret_code == 307 || resp->ret_code == 301)
882 {
883 http_handle_redirect(docu, resp->ret_code);
884 http_response_free(resp);
885 if(docu->is_persistent)
886 {
887 if(docu->doc_url->moved_to &&
888 ((url_get_port(docu->doc_url) !=
889 url_get_port(docu->doc_url->moved_to))
890 || strcmp(url_get_site(docu->doc_url),
891 url_get_site(docu->doc_url->moved_to))))
892 {
893 abs_close_socket(docu, TRUE);
894 }
895 docu->is_persistent = FALSE;
896 }
897 return -1;
898 }
899
900 http_response_free(resp);
901 }
902
903
904 /*** check if document was downloaded whole ***/
905 /*** when we know real document size and no ***/
906 /*** other error was detected before ***/
907 if(cfg.check_size && docu->totsz > 0 && docu->errcode == ERR_NOERROR)
908 {
909 /*** if docu->contents && docu->rest_pos ***/
910 /*** document was reareaded from file and ***/
911 /*** docu->size is total len ***/
912 if(docu->totsz != docu->size + (docu->contents ? 0 : docu->rest_pos))
913 {
914 docu->errcode = ERR_HTTP_TRUNC;
915 docu->remove_lock = FALSE;
916 retcode = -1;
917 xprintf(1, gettext("File may be truncated\n"));
918 }
919 }
920
921 /*** handle encoded document and decode ***/
922 /*** it if possible and user requested it ***/
923 p = get_mime_param_val_str("Content-Encoding:", docu->mime);
924 if(cfg.use_enc && !retcode && p &&
925 (!strncasecmp(docu->type_str, "text/plain", 10) ||
926 !strncasecmp(docu->type_str, "text/css", 8) ||
927 !strncasecmp(docu->type_str, "text/html", 9)))
928 {
929 if((!strcasecmp(p, "x-gzip")) ||
930 (!strcasecmp(p, "gzip")) ||
931 (!strcasecmp(p, "x-compress")) || (!strcasecmp(p, "compress")))
932 {
933 if(!gzip_decode(docu->contents, docu->size,
934 &p1, &len, (docu->contents ? NULL : docu->lock_fn)))
935 {
936 docu->size = len;
937 _free(docu->contents);
938 docu->contents = p1;
939 xprintf(1, gettext("Decoding document - OK\n"));
940 }
941 else
942 xperror(gettext("Decoding document - failed\n"));
943 }
944 else if(!strcasecmp(p, "deflate"))
945 {
946 if(!inflate_decode(docu->contents, docu->size,
947 &p1, &len, (docu->contents ? NULL : docu->lock_fn)))
948 {
949 docu->size = len;
950 _free(docu->contents);
951 docu->contents = p1;
952 xprintf(1, gettext("Decoding document - OK\n"));
953 }
954 else
955 {
956 xperror(gettext("Deflating document - failed\n"));
957 }
958 }
959 else
960 xprintf(1, gettext("Unsupported document encoding\n"));
961 }
962 else if(p && !retcode)
963 {
964 xprintf(1,
965 gettext
966 ("Received Encoded file but decoding not allowed (untouched)\n"));
967 }
968 _free(p);
969 }
970 else
971 {
972 if(docu->doc_url->type == URLT_FILE ||
973 (docu->doc_url->status & URL_REDIRECT))
974 {
975 p1 = url_to_filename(docu->doc_url, TRUE);
976
977 if(file_is_html(p1))
978 {
979 docu->is_parsable = TRUE;
980 }
981 }
982 else
983 {
984 docu->is_parsable = FALSE;
985 }
986 }
987
988 if(docu->totsz > 0 &&
989 docu->size == 0 &&
990 (docu->doc_url->type == URLT_HTTP || docu->doc_url->type == URLT_HTTPS))
991 {
992 if(!docu->errcode)
993 docu->errcode = ERR_ZERO_SIZE;
994 docu->remove_lock = FALSE;
995 retcode = -1;
996 }
997 #ifdef I_FACE
998 if(cfg.xi_face)
999 doc_set_info(docu);
1000 #endif
1001 if(!retcode && docu->lock_fn && docu->save_online &&
1002 (cfg.dumpfd < 0) && !docu->contents &&
1003 (cfg.mode != MODE_NOSTORE) &&
1004 (cfg.mode != MODE_FTPDIR) &&
1005 !(docu->doc_url->status & URL_REDIRECT) &&
1006 (docu->doc_url->type != URLT_FILE))
1007 {
1008 p1 = url_to_filename(docu->doc_url, TRUE);
1009
1010 if(!access(p1, F_OK))
1011 {
1012 if(unlink(p1))
1013 xperror(p1);
1014 }
1015
1016 if(link(docu->lock_fn, p1))
1017 {
1018 #ifdef __CYGWIN__
1019 if(errno != EPERM && errno != EACCES)
1020 #elif __BEOS__
1021 /* ?? no working link() on BeOS ?? */
1022 if(FALSE)
1023 #else
1024 if(errno != EPERM)
1025 #endif
1026 xperror(p1);
1027 else
1028 {
1029 if(copy_fd_to_file(bufio_getfd(docu->s_sock), p1))
1030 xperror(p1);
1031 }
1032 }
1033
1034 if(cfg.preserve_time && docu->dtime)
1035 {
1036 struct utimbuf utmbf;
1037
1038 stat(p1, &estat);
1039 utmbf.actime = estat.st_atime;
1040 utmbf.modtime = docu->dtime;
1041 utime(p1, &utmbf);
1042 }
1043
1044 if(cfg.preserve_perm &&
1045 (docu->doc_url->type == URLT_FTP ||
1046 docu->doc_url->type == URLT_FTPS) &&
1047 docu->doc_url->extension &&
1048 (((ftp_url_extension *) docu->doc_url->extension)->perm > 0))
1049 {
1050 chmod(p1, ((ftp_url_extension *) docu->doc_url->extension)->perm);
1051 }
1052 }
1053
1054 return retcode;
1055 }
1056
doc_download(doc * docu,int load,int b_lock)1057 int doc_download(doc * docu, int load, int b_lock)
1058 {
1059 const int rc = doc_download_helper(docu, load, b_lock);
1060 gettimeofday(&docu->end_time, NULL);
1061 time_log(docu);
1062 return rc;
1063 }
1064
1065 /********************************************************/
1066 /* ulozi dokument ak je to potrebne vytvori adresare */
1067 /* FIXME: Translate me! */
1068 /********************************************************/
doc_store(doc * docu,int overwrite)1069 int doc_store(doc * docu, int overwrite)
1070 {
1071 char *pom;
1072 int f;
1073 struct utimbuf utmbf;
1074 struct stat estat;
1075
1076 if(cfg.mode == MODE_NOSTORE || cfg.mode == MODE_FTPDIR)
1077 return 0;
1078
1079 /*** don't store directory indexes ***/
1080 if(!cfg.store_index && url_is_dir_index(docu->doc_url))
1081 return 0;
1082
1083 pom = url_to_filename(docu->doc_url, TRUE);
1084 if(makealldirs(pom))
1085 xperror(pom);
1086
1087 if(!access(pom, R_OK) && !overwrite)
1088 {
1089 return 0;
1090 }
1091
1092 /*
1093 pro: before we open the file we unlink it. This way we assure that
1094 other directory that have a hard link to our (old) file will still
1095 have a hard link to the old file.
1096 */
1097 if(cfg.remove_before_store)
1098 {
1099 unlink(pom);
1100 }
1101
1102 if((f =
1103 open(pom, O_BINARY | O_CREAT | O_TRUNC | O_WRONLY,
1104 S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR)) == -1)
1105 {
1106 if(!access(pom, R_OK))
1107 unlink(pom);
1108 xperror(pom);
1109 return -1;
1110 }
1111
1112 if(write(f, docu->contents, docu->size) != docu->size)
1113 {
1114 if(!access(pom, R_OK))
1115 unlink(pom);
1116 xperror(pom);
1117 close(f);
1118 return -1;
1119 }
1120
1121 close(f);
1122
1123 if(docu->dtime && cfg.preserve_time)
1124 {
1125 utmbf.modtime = docu->dtime;
1126 }
1127 else
1128 {
1129 utmbf.modtime = docu->stime;
1130 }
1131
1132 stat(pom, &estat);
1133 utmbf.actime = estat.st_atime;
1134 utime(pom, &utmbf);
1135
1136 if((docu->doc_url->type == URLT_FTP ||
1137 docu->doc_url->type == URLT_FTPS) &&
1138 docu->doc_url->extension &&
1139 cfg.preserve_perm &&
1140 (((ftp_url_extension *) docu->doc_url->extension)->perm > 0))
1141 {
1142 chmod(pom, ((ftp_url_extension *) docu->doc_url->extension)->perm);
1143 }
1144
1145 return 0;
1146 }
1147
1148 /*** remove improper documents if required ***/
doc_remove(url * urlr)1149 int doc_remove(url * urlr)
1150 {
1151 char *fn;
1152
1153 #ifdef DEBUG
1154 if(cfg.debug)
1155 {
1156 fn = url_to_urlstr(urlr, FALSE);
1157 xprintf(1, gettext("Removing improper document : %s\n"), fn);
1158 _free(fn);
1159 }
1160 #endif
1161
1162 fn = url_to_filename(urlr, TRUE);
1163
1164 if(urlr->type == URLT_FTP || urlr->type == URLT_FTPS)
1165 {
1166 char *p;
1167
1168 p = strrchr(fn, '/');
1169 if(p)
1170 p++;
1171 else
1172 p = fn;
1173
1174 /*** if URL FTPdir index ***/
1175 if(!strcmp(p, priv_cfg.index_name))
1176 *p = '\0';
1177
1178 if(cfg.enable_info)
1179 dinfo_remove(fn);
1180 return unlink_recursive(fn);
1181 }
1182 else
1183 {
1184 if(cfg.enable_info)
1185 dinfo_remove(fn);
1186
1187 if(!access(fn, F_OK) && unlink(fn))
1188 {
1189 xperror(fn);
1190 return -1;
1191 }
1192 }
1193
1194 return 0;
1195 }
1196
1197 #ifdef I_FACE
1198 /********************************************************/
1199 /* nastavenie info dokumentu pre informaciu pouzivatela */
1200 /* FIXME: Translate me! */
1201 /********************************************************/
doc_set_info(doc * docp)1202 static void doc_set_info(doc * docp)
1203 {
1204 #ifdef WITH_TREE
1205 url_prop *prp = _malloc(sizeof(url_prop));
1206
1207 prp->size = docp->size;
1208 prp->mdtm = docp->dtime;
1209 prp->type = NULL;
1210 switch (docp->doc_url->type)
1211 {
1212 case URLT_HTTP:
1213 #ifdef USE_SSL
1214 case URLT_HTTPS:
1215 #endif
1216 if(docp->type_str)
1217 prp->type = tl_strdup(docp->type_str);
1218 break;
1219 case URLT_FILE:
1220 prp->type = tl_strdup(gettext_nop("Local file"));
1221 break;
1222 case URLT_GOPHER:
1223 switch (docp->doc_url->p.gopher.selector[0])
1224 {
1225 case '0':
1226 prp->type = tl_strdup(gettext_nop("Gopher/Text File"));
1227 break;
1228 case '1':
1229 prp->type = tl_strdup(gettext_nop("Gopher/Directory"));
1230 break;
1231 case '2':
1232 prp->type = tl_strdup(gettext_nop("Gopher/CSO phone book"));
1233 break;
1234 case '3':
1235 prp->type = tl_strdup(gettext_nop("Gopher/Error"));
1236 break;
1237 case '4':
1238 prp->type = tl_strdup(gettext_nop("Gopher/BINHEX"));
1239 break;
1240 case '5':
1241 prp->type = tl_strdup(gettext_nop("Gopher/DOS bin"));
1242 break;
1243 case '6':
1244 prp->type = tl_strdup(gettext_nop("Gopher/UUencoded"));
1245 break;
1246 case '7':
1247 prp->type = tl_strdup(gettext_nop("Gopher/Search index"));
1248 break;
1249 case '8':
1250 prp->type = tl_strdup(gettext_nop("Gopher/Telnet session"));
1251 break;
1252 case '9':
1253 prp->type = tl_strdup(gettext_nop("Gopher/bin"));
1254 break;
1255 case '+':
1256 prp->type = tl_strdup(gettext_nop("Gopher/Duplicated server"));
1257 break;
1258 case 'T':
1259 prp->type = tl_strdup(gettext_nop("Gopher/TN3270"));
1260 break;
1261 case 'g':
1262 prp->type = tl_strdup(gettext_nop("Gopher/GIF"));
1263 break;
1264 case 'I':
1265 prp->type = tl_strdup(gettext_nop("Gopher/Image"));
1266 break;
1267 }
1268 break;
1269 case URLT_FTP:
1270 if(docp->doc_url->p.ftp.dir)
1271 prp->type = tl_strdup(gettext_nop("FTP/Directory"));
1272 else
1273 prp->type = tl_strdup(gettext_nop("FTP/File"));
1274 break;
1275 case URLT_FTPS:
1276 if(docp->doc_url->p.ftp.dir)
1277 prp->type = tl_strdup(gettext_nop("FTPS/Directory"));
1278 else
1279 prp->type = tl_strdup(gettext_nop("FTPS/File"));
1280 break;
1281 default:
1282 prp->type = tl_strdup(gettext_nop("Unsupported type"));
1283 break;
1284 }
1285
1286 if(!prp->type)
1287 prp->type = tl_strdup(gettext_nop("Local file"));
1288
1289 docp->doc_url->prop = prp;
1290 #endif
1291 }
1292 #endif
1293
doc_init(doc * docp,url * urlp)1294 void doc_init(doc * docp, url * urlp)
1295 {
1296 docp->doc_nr = 0;
1297 docp->doc_url = urlp;
1298 docp->mime = NULL;
1299 docp->type_str = NULL;
1300 docp->is_parsable = cfg.enable_js && (docp->doc_url->status & URL_ISSCRIPT);
1301 docp->size = 0;
1302 docp->totsz = -1;
1303 docp->contents = NULL;
1304 docp->save_online = FALSE;
1305 docp->dtime = 0L;
1306 docp->stime = 0L;
1307 docp->rest_pos = 0;
1308 docp->rest_end_pos = -1;
1309 docp->etag = NULL;
1310 docp->errcode = ERR_NOERROR;
1311 docp->origsize = 0;
1312 docp->ftp_fatal_err = FALSE;
1313 docp->ftp_respc = 0;
1314 docp->ftp_pasv_host = NULL;
1315 docp->ftp_pasv_port = 0;
1316 docp->ftp_data_con_finished = FALSE;
1317 docp->datasock = NULL;
1318 docp->ftp_control = NULL;
1319 docp->s_sock = NULL;
1320 #ifdef USE_SSL
1321 memset(&docp->ssl_data_con, '\0', sizeof(ssl_connection));
1322 #endif
1323 docp->num_auth = 0;
1324 docp->num_proxy_auth = 0;
1325 docp->auth_digest = NULL;
1326 docp->auth_proxy_digest = NULL;
1327 docp->lock_fn = NULL;
1328 docp->report_size = TRUE;
1329 docp->check_limits = TRUE;
1330 docp->remove_lock = FALSE;
1331 docp->is_http11 = FALSE;
1332 docp->chunk_size = 0;
1333 docp->is_chunked = FALSE;
1334 docp->read_chunksize = FALSE;
1335 docp->read_trailer = FALSE;
1336 docp->is_persistent = FALSE;
1337 #ifdef HAVE_MT
1338 docp->__herrno = 0;
1339 docp->msgbuf = NULL;
1340 docp->threadnr = 0;
1341 #endif
1342 docp->is_robot = FALSE;
1343 docp->additional_headers = NULL;
1344 docp->is_http_transfer = FALSE;
1345 docp->http_proxy = NULL;
1346 docp->http_proxy_port = DEFAULT_HTTP_PROXY_PORT;
1347 docp->http_proxy_10 = FALSE;
1348 docp->request_type = HTTP_REQ_UNKNOWN;
1349 docp->connect_host = NULL;
1350 docp->connect_port = 0;
1351
1352 timerclear(&docp->hr_start_time);
1353 timerclear(&docp->redirect_time);
1354 timerclear(&docp->dns_time);
1355 timerclear(&docp->connect_time);
1356 timerclear(&docp->first_byte_time);
1357 timerclear(&docp->end_time);
1358 }
1359
get_rate_str(char * str,double rate)1360 static char *get_rate_str(char *str, double rate)
1361 {
1362 if(rate <= 1024.0)
1363 sprintf(str, "%5.0f B/s", rate);
1364 else if(rate <= 1048576.0)
1365 sprintf(str, "%5.1f kB/s", rate / 1024.0);
1366 else if(rate <= 1073741824.0)
1367 sprintf(str, "%5.1f MB/s", rate / 1048576.0);
1368 else
1369 sprintf(str, "%5.1f GB/s", rate / 1073741824.0);
1370
1371 return str;
1372 }
1373
get_time_str(char * str,time_t tm)1374 static char *get_time_str(char *str, time_t tm)
1375 {
1376 sprintf(str, "%ld:%02ld:%02ld",
1377 tm / 3600000, (tm % 3600000) / 60000, (tm % 60000) / 1000);
1378
1379 return str;
1380 }
1381
get_size_str(char * str,int total,int actual)1382 static char *get_size_str(char *str, int total, int actual)
1383 {
1384 if(total)
1385 {
1386 if(total < 1000000)
1387 sprintf(str, "%6d / %d B [%5.1f%%]",
1388 actual, total, (100.0 * (double) actual / (double) total));
1389 else
1390 sprintf(str, "%7d / %d kB [%5.1f%%]",
1391 actual / 1024, total / 1024,
1392 (100.0 * (double) actual / (double) total));
1393 }
1394 else
1395 {
1396 if(actual < 1000000)
1397 sprintf(str, "%6d B", actual);
1398 else
1399 sprintf(str, "%6d kB", actual / 1024);
1400 }
1401 return str;
1402 }
1403
doc_etime(doc * docp,int init)1404 time_t doc_etime(doc * docp, int init)
1405 {
1406 #ifdef HAVE_GETTIMEOFDAY
1407 if(init)
1408 {
1409 gettimeofday(&docp->start_time, NULL);
1410 return 0;
1411 }
1412 else
1413 {
1414 struct timeval t;
1415 gettimeofday(&t, NULL);
1416
1417 return (1000 * (t.tv_sec - docp->start_time.tv_sec) +
1418 (t.tv_usec - docp->start_time.tv_usec) / 1000);
1419 }
1420 #else
1421 if(init)
1422 {
1423 docp->start_time = time(NULL);
1424 return 0;
1425 }
1426 else
1427 {
1428 return 1000 * (time(NULL) - docp->start_time);
1429 }
1430 #endif
1431 }
1432
compute_speed_rate(time_t etime,ssize_t size)1433 static double compute_speed_rate(time_t etime, ssize_t size)
1434 {
1435 return (double) size *1000.0 / (etime == 0.0 ? 1.0 : etime);
1436 }
1437
show_progress(doc * docp,ssize_t adjsz,int dolog)1438 static void show_progress(doc * docp, ssize_t adjsz, int dolog)
1439 {
1440 time_t etime = doc_etime(docp, FALSE);
1441 double rate = compute_speed_rate(etime, docp->size + adjsz);
1442 char s_rate[30] = "", s_etime[30] = "", s_rtime[30] = "", s_size[30] = "";
1443 ftp_url_extension *fe;
1444
1445 if(docp->doc_url->type == URLT_FTP || docp->doc_url->type == URLT_FTPS)
1446 fe = (ftp_url_extension *) docp->doc_url->extension;
1447 else
1448 fe = NULL;
1449
1450
1451 if(docp->totsz >= 0 || (fe && fe->size > 0))
1452 {
1453 int size = docp->totsz >= 0 ? docp->totsz : fe->size;
1454
1455 time_t rtime =
1456 (time_t) ((double) (size -
1457 docp->rest_pos) / (double) (docp->size ? docp->size : 10) *
1458 (double) ((etime != 0.0) ? etime : 1.0)) - etime;
1459
1460 get_time_str(s_rtime, rtime);
1461 get_size_str(s_size, size, docp->size + docp->rest_pos);
1462 }
1463 else
1464 get_size_str(s_size, 0, docp->size + docp->rest_pos);
1465
1466 get_rate_str(s_rate, rate);
1467 get_time_str(s_etime, etime);
1468
1469 if(cfg.progres && docp->report_size && !cfg.quiet && !cfg.bgmode
1470 #ifdef I_FACE
1471 && !cfg.xi_face
1472 #endif
1473 )
1474 {
1475 if(*s_rtime)
1476 xprintf(0, gettext("S: %s [R: %s] [ET: %s] [RT: %s]"),
1477 s_size, s_rate, s_etime, s_rtime);
1478 else
1479 xprintf(0, gettext("S: %s [R: %s] [ET: %s]"), s_size, s_rate, s_etime);
1480 xprintf(0, " \r");
1481 }
1482
1483 #ifdef I_FACE
1484 if(docp->report_size && cfg.xi_face)
1485 {
1486 gui_set_progress(s_size, s_rate, s_etime, s_rtime);
1487 }
1488 #endif
1489 }
1490
doc_lock(doc * docp,int b_lock)1491 int doc_lock(doc * docp, int b_lock)
1492 {
1493 char *lock_name;
1494 int cyclenr = 0;
1495 bufio *s_sock = NULL;
1496
1497 if(!(lock_name = url_to_in_filename(docp->doc_url)))
1498 return -1;
1499
1500 do
1501 {
1502 if(makealldirs(lock_name))
1503 {
1504 if(errno != ENOENT)
1505 {
1506 xperror(lock_name);
1507 docp->errcode = ERR_STORE_DOC;
1508 break;
1509 }
1510 else
1511 continue;
1512 }
1513 else
1514 {
1515 if(cyclenr == 1)
1516 {
1517 xprintf(0, gettext("Waiting to releases document lock on: %s\n"),
1518 lock_name);
1519 }
1520
1521 if(!(s_sock = bufio_copen(lock_name,
1522 O_BINARY | O_RDWR | O_CREAT, 0644)))
1523 {
1524 if(errno != ENOENT)
1525 {
1526 xperror(lock_name);
1527 docp->errcode = ERR_STORE_DOC;
1528 break;
1529 }
1530 }
1531 if(s_sock)
1532 {
1533 if(_flock(bufio_getfd(s_sock), lock_name,
1534 O_BINARY | O_RDWR | O_CREAT, FALSE))
1535 {
1536 if(!b_lock)
1537 docp->errcode = ERR_LOCKED;
1538 bufio_close(s_sock);
1539 s_sock = NULL;
1540 }
1541 else
1542 {
1543 tl_msleep(50);
1544 if(access(lock_name, F_OK))
1545 {
1546 if(!b_lock)
1547 docp->errcode = ERR_LOCKED;
1548 else
1549 {
1550 bufio_close(s_sock);
1551 s_sock = NULL;
1552 }
1553 }
1554 }
1555 }
1556
1557 if(b_lock && !s_sock)
1558 {
1559 tl_sleep(1);
1560 cyclenr++;
1561 }
1562 }
1563 }
1564 while(b_lock && !s_sock);
1565
1566 if(!s_sock)
1567 {
1568 _free(lock_name);
1569 docp->s_sock = NULL;
1570 }
1571 else
1572 {
1573 docp->s_sock = s_sock;
1574 docp->lock_fn = lock_name;
1575 }
1576
1577 return (s_sock == NULL);
1578 }
1579
1580 /****************************************************/
1581 /* Unlock document and remove lock file if required */
1582 /****************************************************/
doc_remove_lock(doc * docp)1583 void doc_remove_lock(doc * docp)
1584 {
1585 struct utimbuf utmbf;
1586
1587 if(docp->s_sock)
1588 {
1589 DEBUG_LOCKS("Unlocking document %s\n", docp->lock_fn);
1590 /*_funlock(bufio_getfd(docp->s_sock));*/
1591 bufio_close(docp->s_sock);
1592 docp->s_sock = NULL;
1593
1594 /* required because close() causes modification time */
1595 /* change on hard linked file on w2k (maybe generaly */
1596 /* on winnt or just on ntfs ??? */
1597 if(cfg.preserve_time && docp->dtime)
1598 {
1599 struct utimbuf utmbf;
1600 struct stat estat;
1601
1602 stat(docp->lock_fn, &estat);
1603 utmbf.actime = estat.st_atime;
1604 utmbf.modtime = docp->dtime;
1605 utime(docp->lock_fn, &utmbf);
1606 }
1607
1608 if(docp->remove_lock)
1609 {
1610 unlink(docp->lock_fn);
1611 }
1612 else
1613 {
1614 utmbf.actime = time(NULL);
1615
1616 if(docp->dtime && cfg.preserve_time)
1617 utmbf.modtime = docp->dtime;
1618 else
1619 utmbf.modtime = docp->stime;
1620
1621 utime(docp->lock_fn, &utmbf);
1622 }
1623
1624 _free(docp->lock_fn);
1625 }
1626 }
1627
doc_make_clean_dir(doc * docp)1628 static void doc_make_clean_dir(doc * docp)
1629 {
1630 char *p, *ustr;
1631
1632 ustr = tl_strdup(url_to_filename(docp->doc_url, FALSE));
1633
1634 if(!docp->mime && cfg.enable_info)
1635 dinfo_remove(ustr);
1636
1637 p = strrchr(ustr, '/');
1638 if(p)
1639 *p = '\0';
1640
1641 while(strlen(ustr) > strlen(priv_cfg.cache_dir))
1642 {
1643 if(rmdir(ustr))
1644 {
1645 if(errno != ENOTEMPTY && errno != ENOENT && errno != EEXIST)
1646 xperror(ustr);
1647 break;
1648 }
1649
1650 p = strrchr(ustr, '/');
1651 if(p)
1652 *p = '\0';
1653 }
1654
1655 _free(ustr);
1656 }
1657
doc_cleanup(doc * docu)1658 void doc_cleanup(doc * docu)
1659 {
1660 gui_finish_document(docu);
1661
1662 short_log(docu, docu->doc_url);
1663
1664 LOCK_FAILCNT;
1665 if(!((docu->doc_url->status & URL_DOWNLOADED) ||
1666 (docu->doc_url->status & URL_REDIRECT)))
1667 cfg.fail_cnt++;
1668
1669 cfg.process_cnt++;
1670 UNLOCK_FAILCNT;
1671
1672 if(cfg.xi_face)
1673 gui_tree_set_icon_for_doc(docu);
1674
1675 if(docu->errcode)
1676 {
1677 char *infn, *fn;
1678 fn = url_to_filename(docu->doc_url, FALSE);
1679 infn = url_to_in_filename(docu->doc_url);
1680 if(access(fn, F_OK) && access(infn, F_OK))
1681 {
1682 doc_make_clean_dir(docu);
1683 url_forget_filename(docu->doc_url);
1684 }
1685 _free(infn);
1686 }
1687
1688 _free(docu->mime);
1689 _free(docu->type_str);
1690 _free(docu->contents);
1691 _free(docu->etag);
1692 _free(docu->ftp_pasv_host);
1693 _free(docu->additional_headers);
1694 _free(docu->http_proxy);
1695
1696 if(!cfg.auth_reuse_nonce)
1697 {
1698 if(docu->auth_digest)
1699 http_digest_deep_free(docu->auth_digest);
1700 docu->auth_digest = NULL;
1701 }
1702
1703 if(!cfg.auth_reuse_proxy_nonce)
1704 {
1705 if(docu->auth_proxy_digest)
1706 http_digest_deep_free(docu->auth_proxy_digest);
1707 docu->auth_proxy_digest = NULL;
1708 }
1709
1710 gui_clear_status();
1711 }
1712
doc_destroy(doc * docu)1713 void doc_destroy(doc * docu)
1714 {
1715 short_log(docu, docu->doc_url);
1716
1717 if(docu->s_sock)
1718 bufio_close(docu->s_sock);
1719 if(docu->datasock)
1720 bufio_close(docu->datasock);
1721
1722 _free(docu->mime);
1723 _free(docu->type_str);
1724 _free(docu->contents);
1725 _free(docu->etag);
1726 _free(docu->ftp_pasv_host);
1727 _free(docu->additional_headers);
1728 _free(docu->http_proxy);
1729
1730 if(!cfg.auth_reuse_nonce)
1731 {
1732 if(docu->auth_digest)
1733 http_digest_deep_free(docu->auth_digest);
1734 docu->auth_digest = NULL;
1735 }
1736
1737 if(!cfg.auth_reuse_proxy_nonce)
1738 {
1739 if(docu->auth_proxy_digest)
1740 http_digest_deep_free(docu->auth_proxy_digest);
1741 docu->auth_proxy_digest = NULL;
1742 }
1743 }
1744
1745 #ifdef HAVE_MT
doc_finish_processing(doc * docp)1746 void doc_finish_processing(doc * docp)
1747 {
1748 dllist *ptr = docp->msgbuf;
1749 char *logstr = NULL;
1750
1751 pthread_setspecific(cfg.currdoc_key, (void *) NULL);
1752 LOCK_OUTPUT;
1753 while(ptr)
1754 {
1755 doc_msg *dm = (doc_msg *) ptr->data;
1756
1757 if(dm->log && cfg.logfile)
1758 {
1759 logstr = tl_str_concat(logstr, dm->msg, NULL);
1760 }
1761 xprintf(0, "%s", dm->msg);
1762
1763 _free(dm->msg);
1764 _free(dm);
1765 ptr = dllist_remove_entry(ptr, ptr);
1766 }
1767 UNLOCK_OUTPUT;
1768 if(logstr)
1769 log_str(logstr);
1770 }
1771 #endif /* HAVE_MT */
1772
doc_update_parent_links(doc * docu)1773 void doc_update_parent_links(doc * docu)
1774 {
1775 if((cfg.mode != MODE_NOSTORE) &&
1776 (cfg.dumpfd < 0) &&
1777 (docu->doc_url->type != URLT_FILE) &&
1778 !(docu->doc_url->status & URL_REDIRECT) &&
1779 !(docu->doc_url->status & URL_ISLOCAL) && docu->doc_url->parent_url)
1780 {
1781 if(cfg.rewrite_links &&
1782 !cfg.all_to_local && !cfg.sel_to_local && !cfg.all_to_remote)
1783 {
1784 gui_set_status(gettext("Rewriting links inside parent documents"));
1785 rewrite_parents_links(docu->doc_url, NULL);
1786 }
1787 }
1788 }
1789