1 //
2 // dir.c
3 //
4 // Oliver Fromme <olli@fromme.com>
5 // @(#)$Id: dir.c,v 1.30 1998/12/04 09:04:37 olli Exp $
6 //
7
8 static const char cvsid[]
9 = "@(#)$Id: dir.c,v 1.30 1998/12/04 09:04:37 olli Exp $";
10
11 #include <stdio.h>
12 #include <string.h>
13 #include <unistd.h>
14 #include <signal.h>
15 #include <errno.h>
16 #include <time.h>
17
18 #include "omi.h"
19 #include "dir.h"
20 #include "rex.h"
21
22 #include <sys/wait.h>
23
24 //
25 // Unfortunately, there doesn't seem to be a portable way to
26 // convert a struct tm which contains GMT into a time_t.
27 // Therefore I wrote this function. It assumes that time_t
28 // complies with POSIX semantics. The calculation of leap
29 // years is a bit simplified -- it won't work correctly in
30 // the year 2100... But I promise that I'll fix it if I'm
31 // still alive then.
32 //
33
34 int monthdaysum[12] =
35 {-1, 30, 58, 89, 119, 150, 180, 211, 242, 272, 303, 333};
36
37 time_t
gmtmktime(struct tm * stm)38 gmtmktime (struct tm *stm)
39 {
40 time_t tt;
41
42 tt = (stm->tm_year - 70) * 365 + monthdaysum[stm->tm_mon]
43 + stm->tm_mday + (stm->tm_year - 68) / 4;
44 if (stm->tm_year % 4 == 0 && stm->tm_mon <= 1)
45 tt--;
46 return tt * 86400 + stm->tm_hour * 3600
47 + stm->tm_min * 60 + stm->tm_sec;
48 }
49
50 //
51 // These are the functions for parsing the remote LIST output
52 // (or rather: for trying to parse ...).
53 //
54 // Unfortunately, The FTP standard (RFC 959) doesn't specify
55 // a fixed, portable format for retrieving information about
56 // remote files, such as file size, type and modification
57 // time. Therefore we have to parse the LIST output of the
58 // remote site (which hopefully looks like a Unix ls -l) and
59 // apply heuristics -- in other words: We have to take a
60 // good guess.
61 //
62 // This particular parser actually only works well with Unix-
63 // style ls -l format. It will fail with other formats. But
64 // most FTP servers out there today use the Unix format, even
65 // those which don't run Unix. If you want to mirror a VMS-
66 // style server or something else, you'll have to write the
67 // appropriate parser yourself.
68 //
69 // The parser works as follows:
70 // - Ignore empty lines.
71 // - Ignore lines matching "total *"
72 // - Try to interpret a file entry as follows:
73 // [-dl]????????? {hlinks} owner {group} size <date> name {-> slink}
74 // where <date> is interpreted as follows:
75 // Month day {hour:min{:sec}} {year}
76 // Note that a single space in the above format can match
77 // any number of white space, and that some fields can
78 // contain white space themselves. "{}" denotes optional
79 // fields or parts.
80 // - If the file entry match fails, try to match the line as
81 // an introduction of a new subdirectory:
82 // {subdir}:
83 // - If that fails, too, ignore the line.
84 //
85
86 char *
prepare_line(char * text,int * len)87 prepare_line (char *text, int *len)
88 {
89 //
90 // Zero-terminates the line that starts at "text"
91 // (unless it is already zero-terminated), and
92 // returns its length in "len". The beginning of
93 // the following line is returned, or NULL if this
94 // is the last line.
95 // Depends on the fact that the whole text is zero-
96 // terminated.
97 //
98
99 char *cptr;
100
101 if ((cptr = strpbrk(text, "\r\n"))) {
102 *cptr = '\0';
103 *len = cptr - text;
104 cptr++;
105 cptr += strspn(cptr, "\r\n");
106 if (!*cptr)
107 return NULL;
108 }
109 else
110 *len = strlen(text);
111 return cptr;
112 }
113
114 int
next_field(char ** cptr)115 next_field (char **cptr)
116 {
117 //
118 // Skip to the beginning of the next field. Fields
119 // are separated by one or multiple tabs or spaces.
120 // If there are no further fields, the pointer will
121 // point to the terminating zero, and the result is
122 // 1. Otherwise the result is 0.
123 //
124
125 while (**cptr != ' ' && **cptr != '\t') {
126 if (!**cptr)
127 return 1;
128 ++*cptr;
129 }
130 do ++*cptr; while (**cptr == ' ' || **cptr == '\t');
131 return !**cptr;
132 }
133
134 int
get_month(char * cptr)135 get_month (char *cptr)
136 {
137 //
138 // Returns the month number (January == 0) of the
139 // 3-character string pointed to by "cptr".
140 // Returns -1 if there is no match.
141 //
142 // The matching has been a bit relaxed, so it also
143 // works with several common locales, in case someone
144 // runs her FTP server with non-standard $LANG or
145 // $LC_TIME. These locales should work: EN, DE, FR,
146 // ES, PT, IT, NL (incl. variations such as de_AT).
147 //
148
149 switch (*cptr++) {
150 case 'J': case 'j':
151 switch (*cptr++) {
152 case 'u':
153 switch (*cptr) {
154 case 'l': return 6; // Jul --> July
155 default: return 5; // Ju* --> June
156 }
157 default:
158 return 0; // J* --> January
159 }
160 case 'E': case 'e': return 0; // E* --> January (ES Enero)
161 case 'G': case 'g':
162 switch (*cptr++) {
163 case 'e': return 0; // Ge* --> January (IT Gennaio)
164 default: return 5; // G* --> June (IT Giugno)
165 }
166 case 'F': case 'f': return 1; // F* --> February
167 case 'M': case 'm':
168 switch (*cptr++) {
169 case 'e':
170 return 4; // Me* --> May (NL Mei)
171 case 'a':
172 switch (*cptr) {
173 case 'r': return 2; // Mar --> March
174 default: return 4; // Ma* --> May
175 }
176 default:
177 return 2; // M* --> March (DE, NL)
178 }
179 case 'A': case 'a':
180 switch (*cptr++) {
181 case 'b':
182 case 'p':
183 case 'v':
184 return 3; // A[bpv]* --> April
185 default:
186 return 7; // A* --> August
187 }
188 case 'L': case 'l': return 6; // L* --> July (IT Luglio)
189 case 'S': case 's': return 8; // S* --> September
190 case 'O': case 'o': return 9; // O* --> October
191 case 'N': case 'n': return 10; // N* --> November
192 case 'D': case 'd': return 11; // D* --> December
193 }
194 return -1;
195 }
196
197 int
get_perms(char * cptr,mode_t * perms,mode_t r,mode_t w,mode_t x)198 get_perms (char *cptr, mode_t *perms, mode_t r, mode_t w, mode_t x)
199 {
200 //
201 // Reads the next three characters and tries to
202 // parse them as permission flags (i.e. "rwx").
203 // Fills in the *perms variable, using the given
204 // r, w, and x masks.
205 //
206 // "s" is handled like "x" (you don't want to mirror
207 // the set-id bit, believe me). "t" (the so-called
208 // "sticky bit") is also handled like "x". Upper-
209 // case "S" and "T" are handled like "-". Apart from
210 // that, case is not significant.
211 //
212 // Returns 1 on success, 0 on parse error.
213 //
214
215 switch (cptr[0]) {
216 case 'r': case 'R':
217 *perms |= r;
218 /* FALLTHROUGH */
219 case '-':
220 break;
221 default:
222 return 0;
223 }
224 switch (cptr[1]) {
225 case 'w': case 'W':
226 *perms |= w;
227 /* FALLTHROUGH */
228 case '-':
229 break;
230 default:
231 return 0;
232 }
233 switch (cptr[2]) {
234 case 'x': case 'X': case 's': case 't':
235 *perms |= x;
236 /* FALLTHROUGH */
237 case '-': case 'S': case 'T':
238 return 1;
239 default:
240 return 0;
241 }
242 }
243
244 int
match_filentry(char * line,omifile * of)245 match_filentry (char *line, omifile *of)
246 {
247 //
248 // Try to interpret the line as a normal ls entry,
249 // parse the fields that we need and fill in the *of
250 // struct.
251 // Returns 1 on success, 0 on failure.
252 //
253
254 int i;
255 char *eptr, *psize, *pmonth, *pday;
256 struct tm date;
257
258 switch (*line++) {
259 case '-': of->mode = S_IFREG; break;
260 case 'd': of->mode = S_IFDIR; break;
261 case 'l': of->mode = S_IFLNK; break;
262 default: return 0;
263 }
264
265 //
266 // Check the permission field.
267 //
268 // If there are problems parsing the permissions,
269 // we set the OFLAG_BADPERM flag.
270 //
271
272 i = get_perms(line, &of->mode, S_IRUSR, S_IWUSR, S_IXUSR);
273 i += get_perms(line + 3, &of->mode, S_IRGRP, S_IWGRP, S_IXGRP);
274 i += get_perms(line + 6, &of->mode, S_IROTH, S_IWOTH, S_IXOTH);
275
276 if (i != 3)
277 of->flags |= OFLAG_BADPERM;
278
279 if (*(line += 9) == '+') // BSD ACL flag, see strmode(3)
280 line++;
281 if (*line != ' ' && *line != '\t')
282 return 0;
283 if (next_field(&line)) return 0;
284 if (next_field(&line)) return 0;
285
286 //
287 // Now "line" points to <owner>, <group>, or <size>
288 // (depending on the presence of the <hlinks> and
289 // <group> fields).
290 //
291 // Try to locate the <size> <Month> <day> sequence.
292 //
293
294 psize = line;
295 if (next_field(&line)) return 0;
296 pmonth = line;
297 if (next_field(&line)) return 0;
298 pday = line;
299 if (next_field(&line)) return 0;
300
301 i = 2; // Skip at most two fields.
302 while (*psize < '0' || *psize > '9'
303 || *pmonth < 'A' || *pmonth > 'S'
304 || *pday < '0' || *pday > '9'
305 || (date.tm_mon = get_month(pmonth)) < 0) {
306 if (i-- == 0) return 0;
307 psize = pmonth;
308 pmonth = pday;
309 pday = line;
310 if (next_field(&line)) return 0;
311 }
312
313 of->size = strtoul(psize, &eptr, 10);
314 if (*eptr != ' ' && *eptr != '\t') return 0;
315 date.tm_mday = strtoul(pday, &eptr, 10);
316 if ((*eptr != ' ' && *eptr != '\t') ||
317 date.tm_mday < 1 || date.tm_mday > 31) return 0;
318
319 //
320 // Now "line" points to one of these:
321 // hh:mm{:ss year}
322 // year
323 // Where <year> could be 2 or 4 digits.
324 //
325
326 if (*line < '0' || *line > '9') return 0;
327 date.tm_year = strtoul(line, &eptr, 10);
328 if (*eptr == ':') {
329 //
330 // It is hh:mm{:ss year}.
331 //
332
333 date.tm_hour = date.tm_year;
334 date.tm_year = -1;
335 line = ++eptr;
336 if (*line < '0' || *line > '9') return 0;
337 date.tm_min = strtoul(line, &eptr, 10);
338 if (*eptr == ':') {
339 //
340 // We have <seconds>, and
341 // probably also <year>.
342 //
343
344 of->tprec = 0;
345 line = ++eptr;
346 if (*line < '0' || *line > '9') return 0;
347 date.tm_sec = strtoul(line, &eptr, 10);
348 if (*eptr != ' ' && *eptr != '\t') return 0;
349 line = eptr;
350 if (next_field(&line)) return 0;
351 if (*line >= '0' && *line <= '9') {
352 psize = line;
353 next_field (&psize);
354 if (psize && *psize) {
355 date.tm_year =
356 strtoul(line, &eptr, 10);
357 if (*eptr == ' ' || *eptr == '\t')
358 line = psize;
359 else
360 date.tm_year = -1;
361 }
362 }
363 }
364 else {
365 //
366 // No seconds, no year.
367 //
368
369 of->tprec = 59;
370 date.tm_sec = 0;
371 line = eptr;
372 if (next_field(&line)) return 0;
373 }
374 if (date.tm_year < 0)
375 if (date.tm_mon > start.tm_mon)
376 date.tm_year = start.tm_year + 1899;
377 else
378 date.tm_year = start.tm_year + 1900;
379 }
380 else if (*eptr == ' ' || *eptr == '\t') {
381 //
382 // It is <year> only.
383 //
384
385 of->tprec = 86399;
386 date.tm_hour = date.tm_min = date.tm_sec = 0;
387 line = eptr;
388 if (next_field(&line)) return 0;
389 }
390 else
391 return 0;
392
393 //
394 // Try to be a bit clever about the year if it's
395 // only 2 digits. Also, change it to be the number
396 // of years since 1900.
397 //
398
399 if (date.tm_year < 1900)
400 if (date.tm_year < 69)
401 date.tm_year += 2000;
402 else
403 date.tm_year += 1900;
404 if (date.tm_year < 1970) {
405 //
406 // 1.1.19970 0:00 is the earliest date that
407 // we can represent.
408 //
409 date.tm_year = 1970;
410 date.tm_mon = date.tm_hour = date.tm_min = date.tm_sec = 0;
411 date.tm_mday = 1;
412 }
413 date.tm_year -= 1900;
414
415 if (cc->flags & MIRROR_LOCALTIME) {
416 date.tm_isdst = -1;
417 of->mtime = mktime(&date);
418 }
419 else
420 of->mtime = gmtmktime(&date);
421
422 //
423 // Now "line" should point to the filename.
424 //
425
426 i = strlen(line);
427 switch (of->mode & S_IFMT) {
428 case S_IFLNK:
429 eptr = strstr(line, " -> ");
430 if (!eptr)
431 return 0;
432 if (!(of->data = strndup(eptr + 4, i - (eptr - line) - 4)))
433 out_of_memory();
434 i = eptr - line;
435 break;
436 case S_IFDIR:
437 of->size = 0;
438 /* FALLTHROUGH */
439 default:
440 of->data = NULL;
441 }
442 if (!(of->name = strndup(line, i)))
443 out_of_memory();
444 return 1;
445 }
446
447 static omifile *
locate_dir_in_tree(char * name,omifile * root)448 locate_dir_in_tree (char *name, omifile *root)
449 {
450 unsigned int i;
451 omifile *of;
452
453 if (!name)
454 return root;
455 if (!*name || (*name == '.' && !name[1]))
456 return locate_dir_in_tree(strtok(NULL, "/"), root);
457 for (i = 0, of = (omifile *) root->data; i < root->size; i++, of++)
458 if ((of->mode & S_IFMT) == S_IFDIR && !strcmp(name, of->name))
459 return locate_dir_in_tree(strtok(NULL, "/"), of);
460 return NULL;
461 }
462
463 static char *
build_path(const char * root,const char * suffix)464 build_path (const char *root, const char* suffix)
465 {
466 //
467 // Creates a new path from the given arguments.
468 // It basically concatenates them, taking care of
469 // leading/trailing slashes. The resulting path
470 // will always start with a slash, and never end
471 // with one. Between `root' and `suffix' there
472 // will be exactly one slash (except if `suffix'
473 // is empty).
474 //
475 // The returned path has to be free()ed when it is
476 // not needed anymore.
477 //
478
479 bool rootstartslash, midslash;
480 int rootlen, suffixlen, pathlen;
481 char *path, *cptr;
482
483 pathlen = rootlen = strlen(root);
484 pathlen += suffixlen = strlen(suffix);
485 if (!(rootstartslash = *root == '/'))
486 pathlen++;
487 if (rootlen && root[rootlen - 1] == '/') {
488 midslash = TRUE;
489 if (*suffix == '/') {
490 suffix++;
491 suffixlen--;
492 pathlen--;
493 }
494 }
495 else
496 if (!(midslash = *suffix == '/'))
497 pathlen++;
498 if (suffixlen && suffix[suffixlen - 1] == '/') {
499 suffixlen--;
500 pathlen--;
501 }
502 cptr = path = tmalloc(pathlen + 1);
503 if (!rootstartslash)
504 *cptr++ = '/';
505 strncpy (cptr, root, rootlen);
506 cptr += rootlen;
507 if (!midslash)
508 *cptr++ = '/';
509 strncpy (cptr, suffix, suffixlen);
510 cptr += suffixlen;
511 if (cptr[-1] == '/')
512 cptr--;
513 *cptr = '\0';
514 return path;
515 }
516
517 static int
match_exclude(char * dir,omifile * of)518 match_exclude (char *dir, omifile *of)
519 {
520 int dirlen, namelen, result;
521 char *str;
522
523 dirlen = strlen(dir);
524 namelen = strlen(of->name);
525 str = tmalloc(dirlen + namelen + 3);
526 strcpy (str, dir);
527 str[dirlen] = '/';
528 strcpy (str + dirlen + 1, of->name);
529 if ((of->mode & S_IFMT) == S_IFDIR)
530 strcpy (str + dirlen + namelen + 1, "/");
531 if (!(result = simple_list_traverse(globalconfig.exclude,
532 (jobfunc *) regex_job, str, 0))) {
533 if (!(result = simple_list_traverse(ct->conf->exclude,
534 (jobfunc *) regex_job, str, 0))) {
535 if (!(result = simple_list_traverse(cc->exclude,
536 (jobfunc *) regex_job, str, 0))) {
537 if (cc->flags & MIRROR_EXCLUDEALL)
538 result = MATCH_DEFAULTEXCLUDE;
539 else
540 result = MATCH_DEFAULTINCLUDE;
541 }
542 }
543 }
544 if ((cc->debug & DEBUG_REMOTE_REGEX) && result)
545 fprintf (stderr, "remote %s: %s\n", matchdesc[result], str);
546 free (str);
547 return result;
548 }
549
550 //
551 // clean_excludes() removes all directories from the tree
552 // which are marked for exclusion and which don't contain
553 // any files to be mirrored.
554 //
555
556 static int
clean_excludes(omifile * omidir)557 clean_excludes (omifile *omidir)
558 {
559 omifile *of;
560 int i, size, subsize;
561
562 of = (omifile *) omidir->data;
563 size = omidir->size;
564 for (i = 0; i < omidir->size; i++, of++) {
565 if ((of->mode & S_IFMT) == S_IFDIR) {
566 subsize = clean_excludes(of);
567 if ((of->flags & OFLAG_EXCLUDE) && subsize)
568 of->flags &= ~(unsigned int) OFLAG_EXCLUDE;
569 else
570 size--;
571 }
572 }
573 return size;
574 }
575
576 //
577 // parse_lslr() returns the number of entries in the
578 // top-level directory that has been parsed.
579 //
580
581 int
parse_lslr(char * cptr,omifile * omidir,char * root)582 parse_lslr (char *cptr, omifile *omidir, char *root)
583 {
584 char *next, *path;
585 int clen, endcolon, rootcutlen, match;
586 omifile of, *od;
587
588 rootcutlen = -1;
589 od = omidir;
590 path = build_path(root, "");
591 while (cptr && *cptr) {
592 next = prepare_line(cptr, &clen);
593 if (!clen) {
594 // Ignore empty lines.
595 cptr = next;
596 continue;
597 }
598 endcolon = cptr[clen - 1] == ':'; // possibly a subdir
599 if (!strncasecmp(cptr, "total ", 6) && !endcolon) {
600 // Ignore "total *" lines.
601 cptr = next;
602 continue;
603 }
604 if (match_filentry(cptr, &of)) {
605 //
606 // This line is a normal ls entry.
607 // Add it to od->data. Note that we
608 // have to be careful to exclude "."
609 // and "..".
610 //
611
612 if (!od || (of.name[0] == '.' && (of.name[1] == '\0'
613 || (of.name[1] == '.' && of.name[2] == '\0')))
614 || (match = match_exclude(path, &of)) ==
615 MATCH_EXCLUDE || ((of.mode & S_IFMT) != S_IFDIR
616 && match == MATCH_DEFAULTEXCLUDE)) {
617 free (of.name);
618 cptr = next;
619 continue;
620 }
621 if (!(od->data = realloc(od->data,
622 (++od->size) * sizeof(omifile))))
623 out_of_memory();
624 if (match == MATCH_DEFAULTEXCLUDE
625 && (of.mode & S_IFMT) == S_IFDIR)
626 of.flags = OFLAG_EXCLUDE;
627 else
628 of.flags = 0;
629 memcpy ((omifile *) od->data + (od->size - 1),
630 &of, sizeof(omifile));
631 }
632 else if (endcolon && clen > 1) {
633 //
634 // If this line starts another
635 // subdirectory, find the entry in
636 // our existing tree and continue
637 // there.
638 //
639
640 cptr[--clen] = '\0'; // kill the trailing ':'
641 if (cptr[clen - 1] == '/') {
642 //
643 // If there's a trailing '/',
644 // then kill it, too.
645 //
646
647 cptr[--clen] = '\0';
648 }
649 if (rootcutlen < 0) {
650 char *slash;
651
652 if ((slash = strrchr(cptr, '/')))
653 rootcutlen = (slash - cptr) + 1;
654 else
655 rootcutlen = 0;
656 }
657 free (path);
658 path = build_path(root, cptr + rootcutlen);
659 od = locate_dir_in_tree
660 (strtok(cptr + rootcutlen, "/"), omidir);
661 }
662 cptr = next;
663 }
664 free (path);
665 clean_excludes (omidir);
666 return omidir->size;
667 }
668
669 void
free_tree(omifile * omidir)670 free_tree (omifile *omidir)
671 {
672 omifile *of;
673 int i;
674
675 of = (omifile *) omidir->data;
676 for (i = 0; i < omidir->size; i++, of++) {
677 if (of->name)
678 free (of->name);
679 switch (of->mode & S_IFMT) {
680 case S_IFDIR:
681 free_tree (of);
682 break;
683 case S_IFLNK:
684 if (of->data)
685 free (of->data);
686 }
687 }
688 if (omidir->data) {
689 free (omidir->data);
690 omidir->data = NULL;
691 }
692 }
693
694 void
debug_tree(char * rootname,omifile * omiroot)695 debug_tree (char *rootname, omifile *omiroot)
696 {
697 omifile *of;
698 char *n, type;
699 int i;
700
701 fprintf (stderr, "%s:\n", rootname);
702 of = (omifile *) omiroot->data;
703 for (i = 0; i < omiroot->size; i++, of++) {
704 switch (of->mode & S_IFMT) {
705 case S_IFDIR: type = 'd'; break;
706 case S_IFLNK: type = 'l'; break;
707 default: type = '-';
708 }
709 fprintf (stderr, "%c%8lu %9lu %s", type,
710 (unsigned long) of->size, (unsigned long) of->mtime,
711 of->name);
712 if (type == 'l')
713 fprintf (stderr, " -> %s\n", (char *) of->data);
714 else
715 fprintf (stderr, "\n");
716 }
717 fprintf (stderr, "\n");
718 of = (omifile *) omiroot->data;
719 for (i = 0; i < omiroot->size; i++, of++)
720 if ((of->mode & S_IFMT) == S_IFDIR) {
721 n = tmalloc(strlen(rootname) + strlen(of->name) + 2);
722 strcpy (n, rootname);
723 strcat (n, "/");
724 strcat (n, of->name);
725 debug_tree (n, of);
726 free (n);
727 }
728 }
729
730 //--
731