1 /* Parsing FTP `ls' output.
2 Copyright (C) 1996-2011, 2015, 2018-2021 Free Software Foundation,
3 Inc.
4
5 This file is part of GNU Wget.
6
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
19
20 Additional permission under GNU GPL version 3 section 7
21
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
30
31 #include "wget.h"
32
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <time.h>
39 #include "utils.h"
40 #include "ftp.h"
41 #include "url.h"
42 #include "convert.h" /* for html_quote_string prototype */
43 #include "retr.h" /* for output_stream */
44 #include "c-strcase.h"
45
46 /* Converts symbolic permissions to number-style ones, e.g. string
47 rwxr-xr-x to 755. For now, it knows nothing of
48 setuid/setgid/sticky. ACLs are ignored. */
49 static int
symperms(const char * s)50 symperms (const char *s)
51 {
52 int perms = 0, i;
53
54 if (strlen (s) < 9)
55 return 0;
56 for (i = 0; i < 3; i++, s += 3)
57 {
58 perms <<= 3;
59 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
60 (s[2] == 'x' || s[2] == 's'));
61 }
62 return perms;
63 }
64
65
66 /* Cleans a line of text so that it can be consistently parsed. Destroys
67 <CR> and <LF> in case that they occur at the end of the line and
68 replaces all <TAB> character with <SPACE>. Returns the length of the
69 modified line. */
70 static int
clean_line(char * line,int len)71 clean_line (char *line, int len)
72 {
73 if (len <= 0) return 0;
74
75 while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r'))
76 line[--len] = '\0';
77
78 if (!len) return 0;
79
80 for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
81
82 return len;
83 }
84
85 /* Convert the Un*x-ish style directory listing stored in FILE to a
86 linked list of fileinfo (system-independent) entries. The contents
87 of FILE are considered to be produced by the standard Unix `ls -la'
88 output (whatever that might be). BSD (no group) and SYSV (with
89 group) listings are handled.
90
91 The time stamps are stored in a separate variable, time_t
92 compatible (I hope). The timezones are ignored. */
93 static struct fileinfo *
ftp_parse_unix_ls(FILE * fp,int ignore_perms)94 ftp_parse_unix_ls (FILE *fp, int ignore_perms)
95 {
96 static const char *months[] = {
97 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
98 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
99 };
100 int next, len, i, error, ignore;
101 int year, month, day; /* for time analysis */
102 int hour, min, sec, ptype;
103 struct tm timestruct, *tnow;
104 time_t timenow;
105 size_t bufsize = 0;
106
107 char *line = NULL, *tok, *ptok; /* tokenizer */
108 struct fileinfo *dir, *l, cur; /* list creation */
109
110 dir = l = NULL;
111
112 /* Line loop to end of file: */
113 while ((len = getline (&line, &bufsize, fp)) > 0)
114 {
115 len = clean_line (line, len);
116 /* Skip if total... */
117 if (!c_strncasecmp (line, "total", 5))
118 continue;
119 /* Get the first token (permissions). */
120 tok = strtok (line, " ");
121 if (!tok)
122 continue;
123
124 cur.name = NULL;
125 cur.linkto = NULL;
126
127 /* Decide whether we deal with a file or a directory. */
128 switch (*tok)
129 {
130 case '-':
131 cur.type = FT_PLAINFILE;
132 DEBUGP (("PLAINFILE; "));
133 break;
134 case 'd':
135 cur.type = FT_DIRECTORY;
136 DEBUGP (("DIRECTORY; "));
137 break;
138 case 'l':
139 cur.type = FT_SYMLINK;
140 DEBUGP (("SYMLINK; "));
141 break;
142 default:
143 cur.type = FT_UNKNOWN;
144 DEBUGP (("UNKNOWN; "));
145 break;
146 }
147
148 if (ignore_perms)
149 {
150 switch (cur.type)
151 {
152 case FT_PLAINFILE:
153 cur.perms = 0644;
154 break;
155 case FT_DIRECTORY:
156 cur.perms = 0755;
157 break;
158 default:
159 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */
160 cur.perms = 0644;
161 }
162 DEBUGP (("implicit perms %0o; ", (unsigned) cur.perms));
163 }
164 else
165 {
166 cur.perms = symperms (tok + 1);
167 DEBUGP (("perms %0o; ", (unsigned) cur.perms));
168 }
169
170 error = ignore = 0; /* Erroneous and ignoring entries are
171 treated equally for now. */
172 year = hour = min = sec = 0; /* Silence the compiler. */
173 month = day = 0;
174 ptype = TT_DAY;
175 next = -1;
176 /* While there are tokens on the line, parse them. Next is the
177 number of tokens left until the filename.
178
179 Use the month-name token as the "anchor" (the place where the
180 position wrt the file name is "known"). When a month name is
181 encountered, `next' is set to 5. Also, the preceding
182 characters are parsed to get the file size.
183
184 This tactic is quite dubious when it comes to
185 internationalization issues (non-English month names), but it
186 works for now. */
187 tok = line;
188 while (ptok = tok,
189 (tok = strtok (NULL, " ")) != NULL)
190 {
191 --next;
192 if (next < 0) /* a month name was not encountered */
193 {
194 for (i = 0; i < 12; i++)
195 if (!c_strcasecmp (tok, months[i]))
196 break;
197 /* If we got a month, it means the token before it is the
198 size, and the filename is three tokens away. */
199 if (i != 12)
200 {
201 wgint size;
202
203 /* Parse the previous token with str_to_wgint. */
204 if (ptok == line)
205 {
206 /* Something has gone wrong during parsing. */
207 error = 1;
208 break;
209 }
210 errno = 0;
211 size = str_to_wgint (ptok, NULL, 10);
212 if (size == WGINT_MAX && errno == ERANGE)
213 /* Out of range -- ignore the size. #### Should
214 we refuse to start the download. */
215 cur.size = 0;
216 else
217 cur.size = size;
218 DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
219
220 month = i;
221 next = 5;
222 DEBUGP (("month: %s; ", months[month]));
223 }
224 }
225 else if (next == 4) /* days */
226 {
227 if (tok[1]) /* two-digit... */
228 day = 10 * (*tok - '0') + tok[1] - '0';
229 else /* ...or one-digit */
230 day = *tok - '0';
231 DEBUGP (("day: %d; ", day));
232 }
233 else if (next == 3)
234 {
235 /* This ought to be either the time, or the year. Let's
236 be flexible!
237
238 If we have a number x, it's a year. If we have x:y,
239 it's hours and minutes. If we have x:y:z, z are
240 seconds. */
241 year = 0;
242 min = hour = sec = 0;
243 /* We must deal with digits. */
244 if (c_isdigit (*tok))
245 {
246 /* Suppose it's year. Limit to year 99999 to avoid integer overflow. */
247 for (; c_isdigit (*tok) && year <= 99999; tok++)
248 year = (*tok - '0') + 10 * year;
249 if (*tok == ':')
250 {
251 int n;
252 /* This means these were hours! */
253 hour = year;
254 year = 0;
255 ptype = TT_HOUR_MIN;
256 ++tok;
257 /* Get the minutes... */
258 for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
259 min = (*tok - '0') + 10 * min;
260 if (*tok == ':')
261 {
262 /* ...and the seconds. */
263 ++tok;
264 for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
265 sec = (*tok - '0') + 10 * sec;
266 }
267 }
268 }
269 if (year)
270 DEBUGP (("year: %d (no tm); ", year));
271 else
272 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
273 }
274 else if (next == 2) /* The file name */
275 {
276 int fnlen;
277 char *p;
278
279 /* Since the file name may contain a SPC, it is possible
280 for strtok to handle it wrong. */
281 fnlen = strlen (tok);
282 if (fnlen < len - (tok - line))
283 {
284 /* So we have a SPC in the file name. Restore the
285 original. */
286 tok[fnlen] = ' ';
287 /* If the file is a symbolic link, it should have a
288 ` -> ' somewhere. */
289 if (cur.type == FT_SYMLINK)
290 {
291 p = strstr (tok, " -> ");
292 if (!p)
293 {
294 error = 1;
295 break;
296 }
297 cur.linkto = xstrdup (p + 4);
298 DEBUGP (("link to: %s\n", cur.linkto));
299 /* And separate it from the file name. */
300 *p = '\0';
301 }
302 }
303 /* If we have the filename, add it to the list of files or
304 directories. */
305 /* "." and ".." are an exception! */
306 if (!strcmp (tok, ".") || !strcmp (tok, ".."))
307 {
308 DEBUGP (("\nIgnoring `.' and `..'; "));
309 ignore = 1;
310 break;
311 }
312 /* Some FTP sites choose to have ls -F as their default
313 LIST output, which marks the symlinks with a trailing
314 `@', directory names with a trailing `/' and
315 executables with a trailing `*'. This is no problem
316 unless encountering a symbolic link ending with `@',
317 or an executable ending with `*' on a server without
318 default -F output. I believe these cases are very
319 rare. */
320 fnlen = strlen (tok); /* re-calculate `fnlen' */
321 cur.name = xmalloc (fnlen + 1);
322 memcpy (cur.name, tok, fnlen + 1);
323 if (fnlen)
324 {
325 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
326 {
327 cur.name[fnlen - 1] = '\0';
328 DEBUGP (("trailing `/' on dir.\n"));
329 }
330 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
331 {
332 cur.name[fnlen - 1] = '\0';
333 DEBUGP (("trailing `@' on link.\n"));
334 }
335 else if (cur.type == FT_PLAINFILE
336 && (cur.perms & 0111)
337 && cur.name[fnlen - 1] == '*')
338 {
339 cur.name[fnlen - 1] = '\0';
340 DEBUGP (("trailing `*' on exec.\n"));
341 }
342 } /* if (fnlen) */
343 else
344 error = 1;
345 break;
346 }
347 else
348 abort ();
349 } /* while */
350
351 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
352 error = 1;
353
354 DEBUGP (("%s\n", cur.name ? cur.name : ""));
355
356 if (error || ignore)
357 {
358 DEBUGP (("Skipping.\n"));
359 xfree (cur.name);
360 xfree (cur.linkto);
361 continue;
362 }
363
364 if (!dir)
365 {
366 l = dir = xnew (struct fileinfo);
367 memcpy (l, &cur, sizeof (cur));
368 l->prev = l->next = NULL;
369 }
370 else
371 {
372 cur.prev = l;
373 l->next = xnew (struct fileinfo);
374 l = l->next;
375 memcpy (l, &cur, sizeof (cur));
376 l->next = NULL;
377 }
378 /* Get the current time. */
379 timenow = time (NULL);
380 tnow = localtime (&timenow);
381 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */
382 timestruct.tm_sec = sec;
383 timestruct.tm_min = min;
384 timestruct.tm_hour = hour;
385 timestruct.tm_mday = day;
386 timestruct.tm_mon = month;
387 if (year == 0)
388 {
389 /* Some listings will not specify the year if it is "obvious"
390 that the file was from the previous year. E.g. if today
391 is 97-01-12, and you see a file of Dec 15th, its year is
392 1996, not 1997. Thanks to Vladimir Volovich for
393 mentioning this! */
394 if (month > tnow->tm_mon)
395 timestruct.tm_year = tnow->tm_year - 1;
396 else
397 timestruct.tm_year = tnow->tm_year;
398 }
399 else
400 timestruct.tm_year = year;
401 if (timestruct.tm_year >= 1900)
402 timestruct.tm_year -= 1900;
403 timestruct.tm_wday = 0;
404 timestruct.tm_yday = 0;
405 timestruct.tm_isdst = -1;
406 l->tstamp = mktime (×truct); /* store the time-stamp */
407 l->ptype = ptype;
408 }
409
410 xfree (line);
411 return dir;
412 }
413
414 static struct fileinfo *
ftp_parse_winnt_ls(FILE * fp)415 ftp_parse_winnt_ls (FILE *fp)
416 {
417 int len;
418 int year, month, day; /* for time analysis */
419 int hour, min;
420 size_t bufsize = 0;
421 struct tm timestruct;
422
423 char *line = NULL, *tok; /* tokenizer */
424 char *filename;
425 struct fileinfo *dir, *l, cur; /* list creation */
426
427 dir = l = NULL;
428 cur.name = NULL;
429
430 /* Line loop to end of file: */
431 while ((len = getline (&line, &bufsize, fp)) > 0)
432 {
433 len = clean_line (line, len);
434
435 /* Name begins at 39 column of the listing if date presented in `mm-dd-yy'
436 format or at 41 column if date presented in `mm-dd-yyyy' format. Thus,
437 we cannot extract name before we parse date. Using this information we
438 also can recognize filenames that begin with a series of space
439 characters (but who really wants to use such filenames anyway?). */
440 if (len < 40) continue;
441 filename = line + 39;
442
443 /* First column: mm-dd-yy or mm-dd-yyyy. Should atoi() on the month fail,
444 january will be assumed. */
445 tok = strtok(line, "-");
446 if (tok == NULL) continue;
447 month = atoi(tok);
448 if (month < 0) month = 0; else month--;
449 tok = strtok(NULL, "-");
450 if (tok == NULL) continue;
451 day = atoi(tok);
452 tok = strtok(NULL, " ");
453 if (tok == NULL) continue;
454 year = atoi(tok);
455 /* Assuming the epoch starting at 1.1.1970 */
456 if (year <= 70)
457 {
458 year += 100;
459 }
460 else if (year >= 1900)
461 {
462 year -= 1900;
463 if (len < 42) continue;
464 filename += 2;
465 }
466 /* Now it is possible to determine the position of the first symbol in
467 filename. */
468 xfree (cur.name);
469 memset(&cur, 0, sizeof (cur));
470 cur.name = xstrdup(filename);
471 DEBUGP (("Name: '%s'\n", cur.name));
472
473
474 /* Second column: hh:mm[AP]M, listing does not contain value for
475 seconds */
476 tok = strtok(NULL, ":");
477 if (tok == NULL) continue;
478 hour = atoi(tok);
479 tok = strtok(NULL, "M");
480 if (tok == NULL) continue;
481 min = atoi(tok);
482 /* Adjust hour from AM/PM. Just for the record, the sequence goes
483 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
484 if (tok[0] && tok[1]) tok+=2;
485 if (hour >= 12 || hour < 0) hour = 0;
486 if (*tok == 'P') hour += 12;
487
488 DEBUGP (("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
489 year+1900, month, day, hour, min));
490
491 /* Build the time-stamp (copy & paste from above) */
492 timestruct.tm_sec = 0;
493 timestruct.tm_min = min;
494 timestruct.tm_hour = hour;
495 timestruct.tm_mday = day;
496 timestruct.tm_mon = month;
497 timestruct.tm_year = year;
498 timestruct.tm_wday = 0;
499 timestruct.tm_yday = 0;
500 timestruct.tm_isdst = -1;
501 cur.tstamp = mktime (×truct); /* store the time-stamp */
502 cur.ptype = TT_HOUR_MIN;
503
504 DEBUGP (("Timestamp: %ld\n", cur.tstamp));
505
506 /* Third column: Either file length, or <DIR>. We also set the
507 permissions (guessed as 0644 for plain files and 0755 for
508 directories as the listing does not give us a clue) and filetype
509 here. */
510 tok = strtok(NULL, " ");
511 if (tok == NULL) continue;
512 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
513 if (tok == NULL) continue;
514 if (*tok == '<')
515 {
516 cur.type = FT_DIRECTORY;
517 cur.size = 0;
518 cur.perms = 0755;
519 DEBUGP (("Directory\n"));
520 }
521 else
522 {
523 wgint size;
524 cur.type = FT_PLAINFILE;
525 errno = 0;
526 size = str_to_wgint (tok, NULL, 10);
527 if (size == WGINT_MAX && errno == ERANGE)
528 cur.size = 0; /* overflow */
529 else
530 cur.size = size;
531 cur.perms = 0644;
532 DEBUGP (("File, size %s bytes\n", number_to_static_string (cur.size)));
533 }
534
535 cur.linkto = NULL;
536
537 /* And put everything into the linked list */
538 if (!dir)
539 {
540 l = dir = xnew (struct fileinfo);
541 memcpy (l, &cur, sizeof (cur));
542 l->prev = l->next = NULL;
543 }
544 else
545 {
546 cur.prev = l;
547 l->next = xnew (struct fileinfo);
548 l = l->next;
549 memcpy (l, &cur, sizeof (cur));
550 l->next = NULL;
551 }
552 cur.name = NULL;
553 }
554
555 xfree (cur.name);
556 xfree (line);
557 return dir;
558 }
559
560
561
562 /* Convert the VMS-style directory listing stored in "file" to a
563 linked list of fileinfo (system-independent) entries. The contents
564 of FILE are considered to be produced by the standard VMS
565 "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command,
566 more or less. (Different VMS FTP servers may have different headers,
567 and may not supply the same data, but all should be subsets of this.)
568
569 VMS normally provides local (server) time and date information.
570 Define the logical name or environment variable
571 "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local
572 times if different from the remote local times.
573
574 2005-02-23 SMS.
575 Added code to eliminate "^" escape characters from ODS5 extended file
576 names. The TCPIP FTP server (V5.4) seems to prefer requests which do
577 not use the escaped names which it provides.
578 */
579
580 #define VMS_DEFAULT_PROT_FILE 0644
581 #define VMS_DEFAULT_PROT_DIR 0755
582
583 /* 2005-02-23 SMS.
584 eat_carets().
585
586 Delete ODS5 extended file name escape characters ("^") in the
587 original buffer.
588 Note that the current scheme does not handle all EFN cases, but it
589 could be made more complicated.
590 */
591
eat_carets(char * str)592 static void eat_carets( char *str)
593 /* char *str; Source pointer. */
594 {
595 char *strd; /* Destination pointer. */
596 char hdgt;
597 unsigned char uchr;
598
599 /* Skip ahead to the first "^", if any. */
600 while ((*str != '\0') && (*str != '^'))
601 str++;
602
603 /* If no caret was found, quit early. */
604 if (*str != '\0')
605 {
606 /* Shift characters leftward as carets are found. */
607 strd = str;
608 while (*str != '\0')
609 {
610 uchr = *str;
611 if (uchr == '^')
612 {
613 /* Found a caret. Skip it, and check the next character. */
614 if ((char_prop[(unsigned char) str[1]] & 64) && (char_prop[(unsigned char) str[2]] & 64))
615 {
616 /* Hex digit. Get char code from this and next hex digit. */
617 uchr = *(++str);
618 if (uchr <= '9')
619 {
620 hdgt = uchr - '0'; /* '0' - '9' -> 0 - 9. */
621 }
622 else
623 {
624 hdgt = ((uchr - 'A') & 7) + 10; /* [Aa] - [Ff] -> 10 - 15. */
625 }
626 hdgt <<= 4; /* X16. */
627 uchr = *(++str); /* Next char must be hex digit. */
628 if (uchr <= '9')
629 {
630 uchr = hdgt + uchr - '0';
631 }
632 else
633 {
634 uchr = hdgt + ((uchr - 'A') & 15) + 10;
635 }
636 }
637 else if (uchr == '_')
638 {
639 /* Convert escaped "_" to " ". */
640 uchr = ' ';
641 }
642 else if (uchr == '/')
643 {
644 /* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */
645 /* Note that this is a left-over from Info-ZIP code, and is
646 probably of little value here, except perhaps to avoid
647 directory confusion which an unconverted slash might cause.
648 */
649 uchr = '?';
650 }
651 /* Else, not a hex digit. Must be a simple escaped character
652 (or Unicode, which is not yet handled here).
653 */
654 }
655 /* Else, not a caret. Use as-is. */
656 *strd = uchr;
657
658 /* Advance destination and source pointers. */
659 strd++;
660 str++;
661 }
662 /* Terminate the destination string. */
663 *strd = '\0';
664 }
665 }
666
667
668 static struct fileinfo *
ftp_parse_vms_ls(FILE * fp)669 ftp_parse_vms_ls (FILE *fp)
670 {
671 int dt, i, j, len;
672 int perms;
673 size_t bufsize = 0;
674 time_t timenow;
675 struct tm *timestruct;
676 char date_str[32];
677
678 char *line = NULL, *tok; /* tokenizer */
679 struct fileinfo *dir, *l, cur; /* list creation */
680
681 dir = l = NULL;
682
683 /* Skip blank lines, Directory heading, and more blank lines. */
684
685 for (j = 0; (i = getline (&line, &bufsize, fp)) > 0; )
686 {
687 i = clean_line (line, i);
688 if (i <= 0)
689 continue; /* Ignore blank line. */
690
691 if ((j == 0) && (line[i - 1] == ']'))
692 {
693 /* Found Directory heading line. Next non-blank line
694 is significant. */
695 j = 1;
696 }
697 else if (!strncmp (line, "Total of ", 9))
698 {
699 /* Found "Total of ..." footing line. No valid data
700 will follow (empty directory). */
701 i = 0; /* Arrange for early exit. */
702 break;
703 }
704 else
705 {
706 break; /* Must be significant data. */
707 }
708 }
709
710 /* Read remainder of file until the next blank line or EOF. */
711
712 cur.name = NULL;
713 while (i > 0)
714 {
715 char *p;
716
717 /* The first token is the file name. After a long name, other
718 data may be on the following line. A valid directory name ends
719 in ".DIR;1" (any case), although some VMS FTP servers may omit
720 the version number (";1").
721 */
722
723 tok = strtok(line, " ");
724 if (tok == NULL) tok = line;
725 DEBUGP (("file name: '%s'\n", tok));
726
727 /* Stripping the version number on a VMS system would be wrong.
728 It may be foolish on a non-VMS system, too, but that's someone
729 else's problem. (Define PRESERVE_VMS_VERSIONS for proper
730 operation on other operating systems.)
731
732 2005-02-23 SMS.
733 ODS5 extended file names may contain escaped semi-colons, so
734 the version number is identified as right-side decimal digits
735 led by a non-escaped semi-colon. It may be absent.
736 */
737
738 #if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS))
739 for (p = tok + strlen (tok); (--p > tok) && c_isdigit(*p); );
740 if (p > tok && (*p == ';') && (*(p - 1) != '^'))
741 {
742 *p = '\0';
743 }
744 #endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */
745
746 /* 2005-02-23 SMS.
747 Eliminate "^" escape characters from ODS5 extended file name.
748 (A caret is invalid in an ODS2 name, so this is always safe.)
749 */
750 eat_carets (tok);
751 DEBUGP (("file name-^: '%s'\n", tok));
752
753 /* Differentiate between a directory and any other file. A VMS
754 listing may not include file protections (permissions). Set a
755 default permissions value (according to the file type), which
756 may be overwritten later. Store directory names without the
757 ".DIR;1" file type and version number, as the plain name is
758 what will work in a CWD command.
759 */
760 len = strlen (tok);
761 if (len >= 4 && !c_strncasecmp(tok + (len - 4), ".DIR", 4))
762 {
763 *(tok + (len - 4)) = '\0'; /* Discard ".DIR". */
764 cur.type = FT_DIRECTORY;
765 cur.perms = VMS_DEFAULT_PROT_DIR;
766 DEBUGP (("Directory (nv)\n"));
767 }
768 else if (len >= 6 && !c_strncasecmp (tok + len - 6, ".DIR;1", 6))
769 {
770 *(tok + (len - 6)) = '\0'; /* Discard ".DIR;1". */
771 cur.type = FT_DIRECTORY;
772 cur.perms = VMS_DEFAULT_PROT_DIR;
773 DEBUGP (("Directory (v)\n"));
774 }
775 else
776 {
777 cur.type = FT_PLAINFILE;
778 cur.perms = VMS_DEFAULT_PROT_FILE;
779 DEBUGP (("File\n"));
780 }
781 xfree (cur.name);
782 cur.name = xstrdup (tok);
783 DEBUGP (("Name: '%s'\n", cur.name));
784
785 /* Null the date and time string. */
786 *date_str = '\0';
787
788 /* VMS lacks symbolic links. */
789 cur.linkto = NULL;
790
791 /* VMS reports file sizes in (512-byte) disk blocks, not bytes,
792 hence useless for an integrity check based on byte-count.
793 Set size to unknown.
794 */
795 cur.size = 0;
796
797 /* Get token 2, if any. A long name may force all other data onto
798 a second line. If needed, read the second line.
799 */
800
801 tok = strtok (NULL, " ");
802 if (tok == NULL)
803 {
804 DEBUGP (("Getting additional line.\n"));
805 i = getline (&line, &bufsize, fp);
806 if (i <= 0)
807 {
808 DEBUGP (("EOF. Leaving listing parser.\n"));
809 break;
810 }
811
812 /* Second line must begin with " ". Otherwise, it's a first
813 line (and we may be confused).
814 */
815 i = clean_line (line, i);
816 if (i <= 0)
817 {
818 /* Blank line. End of significant file listing. */
819 DEBUGP (("Blank line. Leaving listing parser.\n"));
820 break;
821 }
822 else if (line[0] != ' ')
823 {
824 DEBUGP (("Non-blank in column 1. Must be a new file name?\n"));
825 continue;
826 }
827 else
828 {
829 tok = strtok (line, " ");
830 if (tok == NULL)
831 {
832 /* Unexpected non-empty but apparently blank line. */
833 DEBUGP (("Null token. Leaving listing parser.\n"));
834 break;
835 }
836 }
837 }
838
839 /* Analyze tokens. (Order is not significant, except date must
840 precede time.)
841
842 Size: ddd or ddd/ddd (where "ddd" is a decimal number)
843 Date: DD-MMM-YYYY
844 Time: HH:MM or HH:MM:SS or HH:MM:SS.CC
845 Owner: [user] or [user,group]
846 Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some
847 subset thereof, for System, Owner, Group, World.
848
849 If permission is lacking, info may be replaced by the string:
850 "No privilege for attempted operation".
851 */
852 while (tok != NULL)
853 {
854 DEBUGP (("Token: >%s<: ", tok));
855
856 if ((strlen (tok) < 12) && (strchr( tok, '-') != NULL))
857 {
858 /* Date. */
859 DEBUGP (("Date.\n"));
860 snprintf(date_str, sizeof(date_str), "%s ", tok);
861 }
862 else if ((strlen (tok) < 12) && (strchr( tok, ':') != NULL))
863 {
864 /* Time. */
865 DEBUGP (("Time. "));
866 strncat( date_str,
867 tok,
868 (sizeof( date_str)- strlen (date_str) - 1));
869 DEBUGP (("Date time: >%s<\n", date_str));
870 }
871 else if (strchr (tok, '[') != NULL)
872 {
873 /* Owner. (Ignore.) */
874 DEBUGP (("Owner.\n"));
875 }
876 else if (strchr (tok, '(') != NULL)
877 {
878 /* Protections (permissions). */
879 perms = 0;
880 j = 0;
881 /*FIXME: Should not be using the variable like this. */
882 for (i = 0; i < (int) strlen(tok); i++)
883 {
884 switch (tok[ i])
885 {
886 case '(':
887 break;
888 case ')':
889 break;
890 case ',':
891 if (j == 0)
892 {
893 perms = 0;
894 }
895 else if (j < 4)
896 {
897 perms <<= 3;
898 }
899 j++;
900 break;
901 case 'R':
902 perms |= 4;
903 break;
904 case 'W':
905 perms |= 2;
906 break;
907 case 'E':
908 perms |= 1;
909 break;
910 case 'D':
911 perms |= 2;
912 break;
913 }
914 }
915 cur.perms = perms;
916 DEBUGP (("Prot. perms = %0o.\n", (unsigned) cur.perms));
917 }
918 else
919 {
920 /* Nondescript. Probably size(s), probably in blocks.
921 Could be "No privilege ..." message. (Ignore.)
922 */
923 DEBUGP (("Ignored (size?).\n"));
924 }
925
926 tok = strtok (NULL, " ");
927 }
928
929 /* Tokens exhausted. Interpret the data, and fill in the
930 structure.
931 */
932 /* Fill tm timestruct according to date-time string. Fractional
933 seconds are ignored. Default to current time, if conversion
934 fails.
935 */
936 timenow = time( NULL);
937 timestruct = localtime( &timenow );
938 strptime( date_str, "%d-%b-%Y %H:%M:%S", timestruct);
939
940 /* Convert struct tm local time to time_t local time. */
941 timenow = mktime (timestruct);
942 /* Offset local time according to environment variable (seconds). */
943 if ((tok = getenv ( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
944 {
945 dt = atoi (tok);
946 DEBUGP (("Time differential = %d.\n", dt));
947 }
948 else
949 dt = 0;
950
951 if (dt >= 0)
952 timenow += dt;
953 else
954 timenow -= (-dt);
955
956 cur.tstamp = timenow; /* Store the time-stamp. */
957 DEBUGP (("Timestamp: %ld\n", cur.tstamp));
958 cur.ptype = TT_HOUR_MIN;
959
960 /* Add the data for this item to the linked list, */
961 if (!dir)
962 {
963 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
964 cur.prev = cur.next = NULL;
965 memcpy (l, &cur, sizeof (cur));
966 }
967 else
968 {
969 cur.prev = l;
970 cur.next = NULL;
971 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
972 l = l->next;
973 memcpy (l, &cur, sizeof (cur));
974 }
975 cur.name = NULL;
976
977 i = getline (&line, &bufsize, fp);
978 if (i > 0)
979 {
980 i = clean_line (line, i);
981 if (i <= 0)
982 {
983 /* Blank line. End of significant file listing. */
984 break;
985 }
986 }
987 }
988
989 xfree (cur.name);
990 xfree (line);
991 return dir;
992 }
993
994
995 /* This function switches between the correct parsing routine depending on
996 the SYSTEM_TYPE. The system type should be based on the result of the
997 "SYST" response of the FTP server. According to this response we will
998 use on of the three different listing parsers that cover the most of FTP
999 servers used nowadays. */
1000
1001 struct fileinfo *
ftp_parse_ls(const char * file,const enum stype system_type)1002 ftp_parse_ls (const char *file, const enum stype system_type)
1003 {
1004 FILE *fp;
1005 struct fileinfo *fi;
1006
1007 fp = fopen (file, "rb");
1008 if (!fp)
1009 {
1010 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1011 return NULL;
1012 }
1013
1014 fi = ftp_parse_ls_fp (fp, system_type);
1015 fclose(fp);
1016
1017 return fi;
1018 }
1019
1020 struct fileinfo *
ftp_parse_ls_fp(FILE * fp,const enum stype system_type)1021 ftp_parse_ls_fp (FILE *fp, const enum stype system_type)
1022 {
1023 switch (system_type)
1024 {
1025 case ST_UNIX:
1026 return ftp_parse_unix_ls (fp, 0);
1027 case ST_WINNT:
1028 {
1029 /* Detect whether the listing is simulating the UNIX format */
1030 int c = fgetc(fp);
1031 rewind(fp);
1032
1033 /* If the first character of the file is '0'-'9', it's WINNT
1034 format. */
1035 if (c >= '0' && c <='9')
1036 return ftp_parse_winnt_ls (fp);
1037 else
1038 return ftp_parse_unix_ls (fp, 1);
1039 }
1040 case ST_VMS:
1041 return ftp_parse_vms_ls (fp);
1042 case ST_MACOS:
1043 return ftp_parse_unix_ls (fp, 1);
1044 default:
1045 logprintf (LOG_NOTQUIET, _("\
1046 Unsupported listing type, trying Unix listing parser.\n"));
1047 return ftp_parse_unix_ls (fp, 0);
1048 }
1049 }
1050
1051 /* Stuff for creating FTP index. */
1052
1053 /* The function creates an HTML index containing references to given
1054 directories and files on the appropriate host. The references are
1055 FTP. */
1056 uerr_t
ftp_index(const char * file,struct url * u,struct fileinfo * f)1057 ftp_index (const char *file, struct url *u, struct fileinfo *f)
1058 {
1059 FILE *fp;
1060 char *upwd;
1061 char *htcldir; /* HTML-clean dir name */
1062 char *htclfile; /* HTML-clean file name */
1063 char *urlclfile; /* URL-clean file name */
1064
1065 if (!output_stream)
1066 {
1067 fp = fopen (file, "wb");
1068 if (!fp)
1069 {
1070 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1071 return FOPENERR;
1072 }
1073 }
1074 else
1075 fp = output_stream;
1076 if (u->user)
1077 {
1078 char *tmpu, *tmpp; /* temporary, clean user and passwd */
1079
1080 tmpu = url_escape (u->user);
1081 tmpp = u->passwd ? url_escape (u->passwd) : NULL;
1082 if (tmpp)
1083 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
1084 else
1085 upwd = concat_strings (tmpu, "@", (char *) 0);
1086 xfree (tmpu);
1087 xfree (tmpp);
1088 }
1089 else
1090 upwd = xstrdup ("");
1091
1092 htcldir = html_quote_string (u->dir);
1093
1094 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
1095 fprintf (fp, "<html>\n<head>\n<title>");
1096 fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1097 fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
1098 fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1099 fprintf (fp, "</h1>\n<hr>\n<pre>\n");
1100
1101 while (f)
1102 {
1103 fprintf (fp, " ");
1104 if (f->tstamp != -1)
1105 {
1106 /* #### Should we translate the months? Or, even better, use
1107 ISO 8601 dates? */
1108 static const char *months[] = {
1109 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1110 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1111 };
1112 time_t tstamp = f->tstamp;
1113 struct tm *ptm = localtime (&tstamp);
1114
1115 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
1116 ptm->tm_mday);
1117 if (f->ptype == TT_HOUR_MIN)
1118 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
1119 else
1120 fprintf (fp, " ");
1121 }
1122 else
1123 fprintf (fp, _("time unknown "));
1124 switch (f->type)
1125 {
1126 case FT_PLAINFILE:
1127 fprintf (fp, _("File "));
1128 break;
1129 case FT_DIRECTORY:
1130 fprintf (fp, _("Directory "));
1131 break;
1132 case FT_SYMLINK:
1133 fprintf (fp, _("Link "));
1134 break;
1135 default:
1136 fprintf (fp, _("Not sure "));
1137 break;
1138 }
1139 htclfile = html_quote_string (f->name);
1140 urlclfile = url_escape_unsafe_and_reserved (f->name);
1141 fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
1142 if (*u->dir != '/')
1143 putc ('/', fp);
1144 /* XXX: Should probably URL-escape dir components here, rather
1145 * than just HTML-escape, for consistency with the next bit where
1146 * we use urlclfile for the file component. Anyway, this is safer
1147 * than what we had... */
1148 fprintf (fp, "%s", htcldir);
1149 if (*u->dir)
1150 putc ('/', fp);
1151 fprintf (fp, "%s", urlclfile);
1152 if (f->type == FT_DIRECTORY)
1153 putc ('/', fp);
1154 fprintf (fp, "\">%s", htclfile);
1155 if (f->type == FT_DIRECTORY)
1156 putc ('/', fp);
1157 fprintf (fp, "</a> ");
1158 if (f->type == FT_PLAINFILE)
1159 fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
1160 else if (f->type == FT_SYMLINK)
1161 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
1162 putc ('\n', fp);
1163 xfree (htclfile);
1164 xfree (urlclfile);
1165 f = f->next;
1166 }
1167 fprintf (fp, "</pre>\n</body>\n</html>\n");
1168 xfree (htcldir);
1169 xfree (upwd);
1170 if (!output_stream)
1171 fclose (fp);
1172 else
1173 fflush (fp);
1174 return FTPOK;
1175 }
1176