1 /***************************************************************************/
2 /* This code is part of WWW grabber called pavuk */
3 /* Copyright (c) 1997 - 2001 Stefan Ondrejicka */
4 /* Distributed under GPL 2 or later */
5 /***************************************************************************/
6
7 #include "config.h"
8
9 #include <assert.h>
10 #include <unistd.h>
11 #include <limits.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <sys/types.h>
15 #include <dirent.h>
16 #include <string.h>
17
18 #include "ainterface.h"
19 #include "recurse.h"
20 #include "update_links.h"
21 #include "tools.h"
22 #include "remind.h"
23 #include "form.h"
24 #include "stats.h"
25 #include "gui_api.h"
26 #include "jsbind.h"
27 #include "myssl.h"
28
free_all(void)29 void free_all(void)
30 {
31 while(cfg.urlstack)
32 cfg.urlstack = dllist_remove_entry(cfg.urlstack, cfg.urlstack);
33 while(cfg.urls_in_dir)
34 cfg.urls_in_dir = dllist_remove_entry(cfg.urls_in_dir, cfg.urls_in_dir);
35
36 if(cfg.url_hash_tbl)
37 dlhash_empty(cfg.url_hash_tbl);
38 if(cfg.fn_hash_tbl)
39 dlhash_empty(cfg.fn_hash_tbl);
40
41 cfg.urlstack = NULL;
42 cfg.docnr = 0;
43
44 #ifdef GTK_FACE
45 gui_clear_tree();
46 form_edit_dlg_clear();
47 stats_clear();
48 #endif
49 }
50
append_starting_url(url_info * ui,url * parent)51 url *append_starting_url(url_info * ui, url * parent)
52 {
53 url *urlp;
54
55 urlp = url_parse(ui->urlstr);
56 assert(urlp->type != URLT_FROMPARENT);
57
58 if((urlp->type == URLT_FILE) && access(urlp->p.file.filename, F_OK) &&
59 (*ui->urlstr != '/') && prottable[urlp->type].supported)
60 {
61 char *p = NULL;
62
63 free_deep_url(urlp);
64 _free(urlp);
65 if(!strncasecmp(ui->urlstr, "gopher.", 7))
66 p = tl_str_concat(p, "gopher://", ui->urlstr, NULL);
67 else if(!strncasecmp(ui->urlstr, "ftp.", 4))
68 p = tl_str_concat(p, "ftp://", ui->urlstr, NULL);
69 else if(!strncasecmp(ui->urlstr, "ssl.", 4))
70 p = tl_str_concat(p, "https://", ui->urlstr, NULL);
71 else
72 p = tl_str_concat(p, "http://", ui->urlstr, NULL);
73 urlp = url_parse(p);
74 assert(urlp->type != URLT_FROMPARENT);
75 _free(p);
76 }
77 if(!cfg.urlstack)
78 urlp->status |= URL_ISFIRST;
79
80 if(ui->localname)
81 url_set_filename(urlp, ui->localname);
82
83 if(parent)
84 {
85 urlp->parent_url = dllist_append(urlp->parent_url, (dllist_t)parent);
86 urlp->level = parent->level + 1;
87 }
88 else
89 {
90 urlp->status |= URL_ISSTARTING;
91 }
92
93 if(ui->type == URLI_FORM)
94 {
95 form_info *fi;
96 dllist *ptr;
97
98 urlp->status |= URL_FORM_ACTION;
99
100 fi = _malloc(sizeof(form_info));
101
102 fi->method = ui->method;
103 fi->encoding = ui->encoding;
104 fi->action = NULL;
105 fi->text = NULL;
106 fi->infos = NULL;
107 fi->parent_url = NULL;
108
109 ptr = ui->fields;
110 while(ptr)
111 {
112 fi->infos = dllist_append(fi->infos,
113 (dllist_t)form_field_duplicate((form_field *) ptr->data));
114
115 ptr = ptr->next;
116 }
117 urlp->extension = fi;
118 }
119
120 if(!prottable[urlp->type].supported || url_was_befor(urlp))
121 {
122 if(!prottable[urlp->type].supported)
123 {
124 xprintf(1, gettext("Removing unsupported URL: %s\n"), ui->urlstr);
125 }
126 free_deep_url(urlp);
127 _free(urlp);
128 }
129 else
130 append_url_to_list(urlp);
131
132 return urlp;
133 }
134
_append_starting_urls(void)135 static void _append_starting_urls(void)
136 {
137 dllist *dptr;
138
139 for(dptr = cfg.request; dptr; dptr = dptr->next)
140 {
141 url_info *ui = (url_info *) dptr->data;
142 append_starting_url(ui, NULL);
143 }
144 }
145
absi_conf(void)146 static void absi_conf(void)
147 {
148 #ifdef HAVE_MOZJS
149 pjs_destroy();
150 pjs_init();
151 #endif
152 #ifdef USE_SSL
153 my_ssl_init_start();
154 #endif
155 }
156
absi_restart(void)157 void absi_restart(void)
158 {
159 int i;
160
161 #ifdef I_FACE
162 #ifdef HAVE_MT
163 _config_struct_priv_t privcfg;
164 #if defined (__OSF__) || defined (__osf__)
165 #define __builtin_try
166 #define __builtin_finally
167 #endif
168
169 privcfg_make_copy(&privcfg);
170 pthread_setspecific(cfg.privcfg_key, (void *) (&privcfg));
171 pthread_cleanup_push((void *) privcfg_free, (void *) (&privcfg));
172 #endif
173 cfg.rbreak = FALSE;
174 cfg.stop = FALSE;
175 #endif
176
177 absi_conf();
178
179 cfg.start_time = time(NULL);
180 gettimeofday(&cfg.hr_start_time, NULL);
181 cfg.fail_cnt = 0;
182 cfg.process_cnt = 0;
183 cfg.reject_cnt = 0;
184 cfg.mode_started = FALSE;
185 cfg.prev_mode = cfg.mode;
186
187 cfg.trans_size = 0;
188
189 /*** cleanup ***/
190 free_all();
191
192 gui_create_tree_root_node();
193
194 cfg.total_cnt = 0;
195 cfg.urlstack = NULL;
196
197 switch (cfg.mode)
198 {
199 case MODE_SINGLE:
200 case MODE_SREGET:
201 case MODE_NORMAL:
202 case MODE_NOSTORE:
203 case MODE_FTPDIR:
204 if(!cfg.xi_face)
205 if(!cfg.request)
206 usage_short();
207
208 if(cfg.request)
209 {
210 _append_starting_urls();
211 cfg.mode_started = TRUE;
212 recurse(TRUE);
213 }
214 else if(cfg.xi_face)
215 gui_set_msg(gettext("Specify at least one starting URL!"), 5);
216 else
217 xprintf(1, gettext("Specify at least one starting URL!\n"));
218 break;
219 case MODE_LNUPD:
220 if(!priv_cfg.subdir)
221 {
222 for(i = 0; i < NUM_ELEM(prottable); i++)
223 {
224 if(prottable[i].supported && prottable[i].dirname)
225 {
226 char *pom;
227
228 pom = tl_str_concat(NULL, priv_cfg.cache_dir,
229 "/", prottable[i].dirname, NULL);
230
231 if(!access(pom, F_OK))
232 {
233 LOCK_DIRR;
234 update_links(pom);
235 UNLOCK_DIRR;
236 }
237 _free(pom);
238 }
239 }
240 }
241 else
242 {
243 LOCK_DIRR;
244 update_links(priv_cfg.subdir);
245 UNLOCK_DIRR;
246 }
247 break;
248 case MODE_SYNC:
249 cfg.total_cnt = 0;
250 cfg.urlstack = NULL;
251 if(cfg.request)
252 {
253 _append_starting_urls();
254 }
255 if((cfg.remove_old && priv_cfg.subdir) || !cfg.request)
256 {
257 if(!priv_cfg.subdir)
258 {
259 for(i = 0; i < NUM_ELEM(prottable); i++)
260 {
261 if(prottable[i].supported && prottable[i].dirname)
262 {
263 char *pom;
264
265 pom = tl_str_concat(NULL, priv_cfg.cache_dir,
266 "/", prottable[i].dirname, NULL);
267
268 if(!access(pom, F_OK))
269 {
270 LOCK_DIRR;
271 get_urls_to_synchronize(pom, &cfg.urls_in_dir);
272 UNLOCK_DIRR;
273 }
274 _free(pom);
275 }
276 }
277 }
278 else
279 {
280 LOCK_DIRR;
281 get_urls_to_synchronize(priv_cfg.subdir, &cfg.urls_in_dir);
282 UNLOCK_DIRR;
283 }
284 /* rather check files from directory scan */
285 /* before each others because we can this */
286 /* way workaround the inability to use %E */
287 /* and %M in -fnrules and it won't break */
288 /* anything */
289 /* if (!cfg.request) */
290 {
291 while(cfg.urls_in_dir)
292 {
293 url *purl = (url *) cfg.urls_in_dir->data;
294
295 append_url_to_list(purl);
296 cfg.urls_in_dir =
297 dllist_remove_entry(cfg.urls_in_dir, cfg.urls_in_dir);
298 }
299 }
300 }
301 cfg.mode_started = TRUE;
302 recurse(TRUE);
303 if(!cfg.urlstack)
304 {
305 while(cfg.urls_in_dir)
306 {
307 url *purl = (url *) cfg.urls_in_dir->data;
308
309 purl->status |= URL_NORECURSE;
310 if(url_was_befor(purl))
311 {
312 free_deep_url(purl);
313 _free(purl);
314 }
315 else
316 append_url_to_list(purl);
317 cfg.urls_in_dir =
318 dllist_remove_entry(cfg.urls_in_dir, cfg.urls_in_dir);
319 }
320 if(cfg.urlstack)
321 recurse(FALSE);
322 }
323 break;
324
325 case MODE_MIRROR:
326 cfg.total_cnt = 0;
327 cfg.urlstack = NULL;
328 if(cfg.request)
329 {
330 _append_starting_urls();
331 }
332 if((cfg.remove_old && priv_cfg.subdir) || !cfg.request)
333 {
334 if(!priv_cfg.subdir)
335 {
336 for(i = 0; i < NUM_ELEM(prottable); i++)
337 {
338 if(prottable[i].supported && prottable[i].dirname)
339 {
340 char *pom;
341
342 pom = tl_str_concat(NULL, priv_cfg.cache_dir,
343 "/", prottable[i].dirname, NULL);
344
345 if(!access(pom, F_OK))
346 {
347 LOCK_DIRR;
348 get_urls_to_synchronize(pom, &cfg.urls_in_dir);
349 UNLOCK_DIRR;
350 }
351 _free(pom);
352 }
353 }
354 }
355 else
356 {
357 LOCK_DIRR;
358 get_urls_to_synchronize(priv_cfg.subdir, &cfg.urls_in_dir);
359 UNLOCK_DIRR;
360 }
361
362 /* we will not be able to use %E */
363 /* and %M in -fnrules */
364 }
365 cfg.mode_started = TRUE;
366 recurse(TRUE);
367
368 if(!cfg.urlstack)
369 {
370 /*
371 If everything was successful we remove all files
372 we had before that were not downloaded
373 */
374 while(cfg.urls_in_dir)
375 {
376 url *purl = (url *) cfg.urls_in_dir->data;
377
378 purl->status |= URL_NORECURSE;
379 if(url_was_befor(purl))
380 {
381 free_deep_url(purl);
382 _free(purl);
383 }
384 else
385 {
386 printf("no longer there; delete '%s'\n", purl->local_name);
387 doc_remove(purl);
388 }
389 cfg.urls_in_dir =
390 dllist_remove_entry(cfg.urls_in_dir, cfg.urls_in_dir);
391 }
392 }
393
394 break;
395 case MODE_RESUME:
396 cfg.total_cnt = 0;
397 cfg.urlstack = NULL;
398 if(!priv_cfg.subdir)
399 {
400 for(i = 0; i < NUM_ELEM(prottable); i++)
401 {
402 if(prottable[i].supported && prottable[i].dirname)
403 {
404 char *pom;
405
406 pom = tl_str_concat(NULL, priv_cfg.cache_dir,
407 "/", prottable[i].dirname, NULL);
408
409 if(!access(pom, F_OK))
410 {
411 LOCK_DIRR;
412 get_urls_to_resume(pom);
413 UNLOCK_DIRR;
414 }
415 _free(pom);
416 }
417 }
418 }
419 else
420 {
421 LOCK_DIRR;
422 get_urls_to_resume(priv_cfg.subdir);
423 UNLOCK_DIRR;
424 }
425 cfg.mode_started = TRUE;
426 recurse(TRUE);
427 break;
428 case MODE_REMIND:
429 remind_load_db();
430 remind_start_add();
431 remind_do();
432 remind_save_db();
433 if(!cfg.stop && !cfg.rbreak)
434 remind_send_result();
435 break;
436 default:
437 break;
438 }
439
440 #if defined(I_FACE) && defined(HAVE_MT)
441 pthread_cleanup_pop(TRUE);
442 if(cfg.xi_face)
443 gui_finish_download(FALSE);
444 #endif
445 }
446
absi_cont(void)447 void absi_cont(void)
448 {
449 int i;
450
451 #if defined(I_FACE) && defined(HAVE_MT)
452 _config_struct_priv_t privcfg;
453 #endif
454
455 if(cfg.mode == MODE_MIRROR)
456 return;
457
458 #ifdef I_FACE
459 #ifdef HAVE_MT
460 privcfg_make_copy(&privcfg);
461 pthread_setspecific(cfg.privcfg_key, (void *) (&privcfg));
462 pthread_cleanup_push((void *) privcfg_free, (void *) (&privcfg));
463 #endif
464 cfg.rbreak = FALSE;
465 cfg.stop = FALSE;
466 #endif
467
468 absi_conf();
469
470 switch (cfg.mode)
471 {
472 case MODE_SINGLE:
473 case MODE_SREGET:
474 case MODE_RESUME:
475 case MODE_NORMAL:
476 case MODE_NOSTORE:
477 case MODE_FTPDIR:
478 recurse(FALSE);
479 break;
480 case MODE_SYNC:
481 recurse(FALSE);
482 if(!cfg.urlstack)
483 {
484 while(cfg.urls_in_dir)
485 {
486 url *purl = (url *) cfg.urls_in_dir->data;
487
488 purl->status |= URL_NORECURSE;
489 if(url_was_befor(purl))
490 {
491 free_deep_url(purl);
492 _free(purl);
493 }
494 else
495 append_url_to_list(purl);
496 cfg.urls_in_dir =
497 dllist_remove_entry(cfg.urls_in_dir, cfg.urls_in_dir);
498 }
499 if(cfg.urlstack)
500 recurse(FALSE);
501 }
502 break;
503 case MODE_LNUPD:
504 if(!priv_cfg.subdir)
505 {
506 for(i = 0; i < NUM_ELEM(prottable); i++)
507 {
508 if(prottable[i].supported && prottable[i].dirname)
509 {
510 char *pom;
511
512 pom = tl_str_concat(NULL, priv_cfg.cache_dir,
513 "/", prottable[i].dirname, NULL);
514
515 if(!access(pom, F_OK))
516 {
517 LOCK_DIRR;
518 update_links(pom);
519 UNLOCK_DIRR;
520 }
521
522 _free(pom);
523 }
524 }
525 }
526 else
527 {
528 LOCK_DIRR;
529 update_links(priv_cfg.subdir);
530 UNLOCK_DIRR;
531 }
532 break;
533 case MODE_REMIND:
534 remind_start_add();
535 remind_do();
536 remind_save_db();
537 if(!cfg.stop && !cfg.rbreak)
538 remind_send_result();
539 default:
540 break;
541 }
542 #if defined(I_FACE) && defined(HAVE_MT)
543 pthread_cleanup_pop(TRUE);
544 if(cfg.xi_face)
545 gui_finish_download(FALSE);
546 #endif
547 }
548
549 #ifdef GETTEXT_NLS
get_available_languages(void)550 char **get_available_languages(void)
551 {
552 DIR *dir;
553 struct dirent *dent;
554 char msgfile[PATH_MAX];
555 char **retv = NULL;
556 int nr = 0;
557
558 LOCK_DIRR;
559 if(!cfg.msgcatd || !(dir = opendir(cfg.msgcatd)))
560 {
561 UNLOCK_DIRR;
562 xprintf(0, gettext("Can't list available message catalogs\n"));
563 return NULL;
564 }
565
566 while((dent = readdir(dir)))
567 {
568 if(!strcmp(dent->d_name, "."))
569 continue;
570 if(!strcmp(dent->d_name, ".."))
571 continue;
572 snprintf(msgfile, sizeof(msgfile), "%s/%s/LC_MESSAGES/%s.mo",
573 cfg.msgcatd, dent->d_name, PACKAGE);
574
575 if(!access(msgfile, R_OK))
576 {
577 nr++;
578 retv = _realloc(retv, (nr + 1) * sizeof(char *));
579 retv[nr - 1] = tl_strdup(dent->d_name);
580 retv[nr] = NULL;
581 }
582 }
583
584 closedir(dir);
585 UNLOCK_DIRR;
586
587 if(retv)
588 tl_strv_sort(retv);
589
590 return retv;
591 }
592 #endif
593