1 /* Parsing FTP `ls' output.
2    Copyright (C) 1996-2011, 2015, 2018-2021 Free Software Foundation,
3    Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 #include "wget.h"
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <time.h>
39 #include "utils.h"
40 #include "ftp.h"
41 #include "url.h"
42 #include "convert.h"            /* for html_quote_string prototype */
43 #include "retr.h"               /* for output_stream */
44 #include "c-strcase.h"
45 
46 /* Converts symbolic permissions to number-style ones, e.g. string
47    rwxr-xr-x to 755.  For now, it knows nothing of
48    setuid/setgid/sticky.  ACLs are ignored.  */
49 static int
symperms(const char * s)50 symperms (const char *s)
51 {
52   int perms = 0, i;
53 
54   if (strlen (s) < 9)
55     return 0;
56   for (i = 0; i < 3; i++, s += 3)
57     {
58       perms <<= 3;
59       perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
60                 (s[2] == 'x' || s[2] == 's'));
61     }
62   return perms;
63 }
64 
65 
66 /* Cleans a line of text so that it can be consistently parsed. Destroys
67    <CR> and <LF> in case that they occur at the end of the line and
68    replaces all <TAB> character with <SPACE>. Returns the length of the
69    modified line. */
70 static int
clean_line(char * line,int len)71 clean_line (char *line, int len)
72 {
73   if (len <= 0) return 0;
74 
75   while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r'))
76     line[--len] = '\0';
77 
78   if (!len) return 0;
79 
80   for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
81 
82   return len;
83 }
84 
85 /* Convert the Un*x-ish style directory listing stored in FILE to a
86    linked list of fileinfo (system-independent) entries.  The contents
87    of FILE are considered to be produced by the standard Unix `ls -la'
88    output (whatever that might be).  BSD (no group) and SYSV (with
89    group) listings are handled.
90 
91    The time stamps are stored in a separate variable, time_t
92    compatible (I hope).  The timezones are ignored.  */
93 static struct fileinfo *
ftp_parse_unix_ls(FILE * fp,int ignore_perms)94 ftp_parse_unix_ls (FILE *fp, int ignore_perms)
95 {
96   static const char *months[] = {
97     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
98     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
99   };
100   int next, len, i, error, ignore;
101   int year, month, day;         /* for time analysis */
102   int hour, min, sec, ptype;
103   struct tm timestruct, *tnow;
104   time_t timenow;
105   size_t bufsize = 0;
106 
107   char *line = NULL, *tok, *ptok;      /* tokenizer */
108   struct fileinfo *dir, *l, cur;       /* list creation */
109 
110   dir = l = NULL;
111 
112   /* Line loop to end of file: */
113   while ((len = getline (&line, &bufsize, fp)) > 0)
114     {
115       len = clean_line (line, len);
116       /* Skip if total...  */
117       if (!c_strncasecmp (line, "total", 5))
118         continue;
119       /* Get the first token (permissions).  */
120       tok = strtok (line, " ");
121       if (!tok)
122         continue;
123 
124       cur.name = NULL;
125       cur.linkto = NULL;
126 
127       /* Decide whether we deal with a file or a directory.  */
128       switch (*tok)
129         {
130         case '-':
131           cur.type = FT_PLAINFILE;
132           DEBUGP (("PLAINFILE; "));
133           break;
134         case 'd':
135           cur.type = FT_DIRECTORY;
136           DEBUGP (("DIRECTORY; "));
137           break;
138         case 'l':
139           cur.type = FT_SYMLINK;
140           DEBUGP (("SYMLINK; "));
141           break;
142         default:
143           cur.type = FT_UNKNOWN;
144           DEBUGP (("UNKNOWN; "));
145           break;
146         }
147 
148       if (ignore_perms)
149         {
150           switch (cur.type)
151             {
152             case FT_PLAINFILE:
153               cur.perms = 0644;
154               break;
155             case FT_DIRECTORY:
156               cur.perms = 0755;
157               break;
158             default:
159               /*cur.perms = 1023;*/     /* #### What is this?  --hniksic */
160               cur.perms = 0644;
161             }
162           DEBUGP (("implicit perms %0o; ", (unsigned) cur.perms));
163         }
164        else
165          {
166            cur.perms = symperms (tok + 1);
167            DEBUGP (("perms %0o; ", (unsigned) cur.perms));
168          }
169 
170       error = ignore = 0;       /* Erroneous and ignoring entries are
171                                    treated equally for now.  */
172       year = hour = min = sec = 0; /* Silence the compiler.  */
173       month = day = 0;
174       ptype = TT_DAY;
175       next = -1;
176       /* While there are tokens on the line, parse them.  Next is the
177          number of tokens left until the filename.
178 
179          Use the month-name token as the "anchor" (the place where the
180          position wrt the file name is "known").  When a month name is
181          encountered, `next' is set to 5.  Also, the preceding
182          characters are parsed to get the file size.
183 
184          This tactic is quite dubious when it comes to
185          internationalization issues (non-English month names), but it
186          works for now.  */
187       tok = line;
188       while (ptok = tok,
189              (tok = strtok (NULL, " ")) != NULL)
190         {
191           --next;
192           if (next < 0)         /* a month name was not encountered */
193             {
194               for (i = 0; i < 12; i++)
195                 if (!c_strcasecmp (tok, months[i]))
196                   break;
197               /* If we got a month, it means the token before it is the
198                  size, and the filename is three tokens away.  */
199               if (i != 12)
200                 {
201                   wgint size;
202 
203                   /* Parse the previous token with str_to_wgint.  */
204                   if (ptok == line)
205                     {
206                       /* Something has gone wrong during parsing. */
207                       error = 1;
208                       break;
209                     }
210                   errno = 0;
211                   size = str_to_wgint (ptok, NULL, 10);
212                   if (size == WGINT_MAX && errno == ERANGE)
213                     /* Out of range -- ignore the size.  #### Should
214                        we refuse to start the download.  */
215                     cur.size = 0;
216                   else
217                     cur.size = size;
218                   DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
219 
220                   month = i;
221                   next = 5;
222                   DEBUGP (("month: %s; ", months[month]));
223                 }
224             }
225           else if (next == 4)   /* days */
226             {
227               if (tok[1])       /* two-digit... */
228                 day = 10 * (*tok - '0') + tok[1] - '0';
229               else              /* ...or one-digit */
230                 day = *tok - '0';
231               DEBUGP (("day: %d; ", day));
232             }
233           else if (next == 3)
234             {
235               /* This ought to be either the time, or the year.  Let's
236                  be flexible!
237 
238                  If we have a number x, it's a year.  If we have x:y,
239                  it's hours and minutes.  If we have x:y:z, z are
240                  seconds.  */
241               year = 0;
242               min = hour = sec = 0;
243               /* We must deal with digits.  */
244               if (c_isdigit (*tok))
245                 {
246                   /* Suppose it's year.  Limit to year 99999 to avoid integer overflow. */
247                   for (; c_isdigit (*tok) && year <= 99999; tok++)
248                     year = (*tok - '0') + 10 * year;
249                   if (*tok == ':')
250                     {
251                       int n;
252                       /* This means these were hours!  */
253                       hour = year;
254                       year = 0;
255                       ptype = TT_HOUR_MIN;
256                       ++tok;
257                       /* Get the minutes...  */
258                       for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
259                         min = (*tok - '0') + 10 * min;
260                       if (*tok == ':')
261                         {
262                           /* ...and the seconds.  */
263                           ++tok;
264                           for (n = 0; c_isdigit (*tok) && n < 2; tok++, n++)
265                             sec = (*tok - '0') + 10 * sec;
266                         }
267                     }
268                 }
269               if (year)
270                 DEBUGP (("year: %d (no tm); ", year));
271               else
272                 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
273             }
274           else if (next == 2)    /* The file name */
275             {
276               int fnlen;
277               char *p;
278 
279               /* Since the file name may contain a SPC, it is possible
280                  for strtok to handle it wrong.  */
281               fnlen = strlen (tok);
282               if (fnlen < len - (tok - line))
283                 {
284                   /* So we have a SPC in the file name.  Restore the
285                      original.  */
286                   tok[fnlen] = ' ';
287                   /* If the file is a symbolic link, it should have a
288                      ` -> ' somewhere.  */
289                   if (cur.type == FT_SYMLINK)
290                     {
291                       p = strstr (tok, " -> ");
292                       if (!p)
293                         {
294                           error = 1;
295                           break;
296                         }
297                       cur.linkto = xstrdup (p + 4);
298                       DEBUGP (("link to: %s\n", cur.linkto));
299                       /* And separate it from the file name.  */
300                       *p = '\0';
301                     }
302                 }
303               /* If we have the filename, add it to the list of files or
304                  directories.  */
305               /* "." and ".." are an exception!  */
306               if (!strcmp (tok, ".") || !strcmp (tok, ".."))
307                 {
308                   DEBUGP (("\nIgnoring `.' and `..'; "));
309                   ignore = 1;
310                   break;
311                 }
312               /* Some FTP sites choose to have ls -F as their default
313                  LIST output, which marks the symlinks with a trailing
314                  `@', directory names with a trailing `/' and
315                  executables with a trailing `*'.  This is no problem
316                  unless encountering a symbolic link ending with `@',
317                  or an executable ending with `*' on a server without
318                  default -F output.  I believe these cases are very
319                  rare.  */
320               fnlen = strlen (tok); /* re-calculate `fnlen' */
321               cur.name = xmalloc (fnlen + 1);
322               memcpy (cur.name, tok, fnlen + 1);
323               if (fnlen)
324                 {
325                   if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
326                     {
327                       cur.name[fnlen - 1] = '\0';
328                       DEBUGP (("trailing `/' on dir.\n"));
329                     }
330                   else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
331                     {
332                       cur.name[fnlen - 1] = '\0';
333                       DEBUGP (("trailing `@' on link.\n"));
334                     }
335                   else if (cur.type == FT_PLAINFILE
336                            && (cur.perms & 0111)
337                            && cur.name[fnlen - 1] == '*')
338                     {
339                       cur.name[fnlen - 1] = '\0';
340                       DEBUGP (("trailing `*' on exec.\n"));
341                     }
342                 } /* if (fnlen) */
343               else
344                 error = 1;
345               break;
346             }
347           else
348             abort ();
349         } /* while */
350 
351       if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
352         error = 1;
353 
354       DEBUGP (("%s\n", cur.name ? cur.name : ""));
355 
356       if (error || ignore)
357         {
358           DEBUGP (("Skipping.\n"));
359           xfree (cur.name);
360           xfree (cur.linkto);
361           continue;
362         }
363 
364       if (!dir)
365         {
366           l = dir = xnew (struct fileinfo);
367           memcpy (l, &cur, sizeof (cur));
368           l->prev = l->next = NULL;
369         }
370       else
371         {
372           cur.prev = l;
373           l->next = xnew (struct fileinfo);
374           l = l->next;
375           memcpy (l, &cur, sizeof (cur));
376           l->next = NULL;
377         }
378       /* Get the current time.  */
379       timenow = time (NULL);
380       tnow = localtime (&timenow);
381       /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr).  */
382       timestruct.tm_sec   = sec;
383       timestruct.tm_min   = min;
384       timestruct.tm_hour  = hour;
385       timestruct.tm_mday  = day;
386       timestruct.tm_mon   = month;
387       if (year == 0)
388         {
389           /* Some listings will not specify the year if it is "obvious"
390              that the file was from the previous year.  E.g. if today
391              is 97-01-12, and you see a file of Dec 15th, its year is
392              1996, not 1997.  Thanks to Vladimir Volovich for
393              mentioning this!  */
394           if (month > tnow->tm_mon)
395             timestruct.tm_year = tnow->tm_year - 1;
396           else
397             timestruct.tm_year = tnow->tm_year;
398         }
399       else
400         timestruct.tm_year = year;
401       if (timestruct.tm_year >= 1900)
402         timestruct.tm_year -= 1900;
403       timestruct.tm_wday  = 0;
404       timestruct.tm_yday  = 0;
405       timestruct.tm_isdst = -1;
406       l->tstamp = mktime (&timestruct); /* store the time-stamp */
407       l->ptype = ptype;
408     }
409 
410   xfree (line);
411   return dir;
412 }
413 
414 static struct fileinfo *
ftp_parse_winnt_ls(FILE * fp)415 ftp_parse_winnt_ls (FILE *fp)
416 {
417   int len;
418   int year, month, day;         /* for time analysis */
419   int hour, min;
420   size_t bufsize = 0;
421   struct tm timestruct;
422 
423   char *line = NULL, *tok;             /* tokenizer */
424   char *filename;
425   struct fileinfo *dir, *l, cur; /* list creation */
426 
427   dir = l = NULL;
428   cur.name = NULL;
429 
430   /* Line loop to end of file: */
431   while ((len = getline (&line, &bufsize, fp)) > 0)
432     {
433       len = clean_line (line, len);
434 
435       /* Name begins at 39 column of the listing if date presented in `mm-dd-yy'
436          format or at 41 column if date presented in `mm-dd-yyyy' format. Thus,
437          we cannot extract name before we parse date. Using this information we
438          also can recognize filenames that begin with a series of space
439          characters (but who really wants to use such filenames anyway?). */
440       if (len < 40) continue;
441       filename = line + 39;
442 
443       /* First column: mm-dd-yy or mm-dd-yyyy. Should atoi() on the month fail,
444          january will be assumed.  */
445       tok = strtok(line, "-");
446       if (tok == NULL) continue;
447       month = atoi(tok);
448       if (month < 0) month = 0; else month--;
449       tok = strtok(NULL, "-");
450       if (tok == NULL) continue;
451       day = atoi(tok);
452       tok = strtok(NULL, " ");
453       if (tok == NULL) continue;
454       year = atoi(tok);
455       /* Assuming the epoch starting at 1.1.1970 */
456       if (year <= 70)
457         {
458           year += 100;
459         }
460       else if (year >= 1900)
461         {
462           year -= 1900;
463           if (len < 42) continue;
464           filename += 2;
465         }
466       /* Now it is possible to determine the position of the first symbol in
467          filename. */
468       xfree (cur.name);
469       memset(&cur, 0, sizeof (cur));
470       cur.name = xstrdup(filename);
471       DEBUGP (("Name: '%s'\n", cur.name));
472 
473 
474       /* Second column: hh:mm[AP]M, listing does not contain value for
475          seconds */
476       tok = strtok(NULL,  ":");
477       if (tok == NULL) continue;
478       hour = atoi(tok);
479       tok = strtok(NULL,  "M");
480       if (tok == NULL) continue;
481       min = atoi(tok);
482       /* Adjust hour from AM/PM. Just for the record, the sequence goes
483          11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
484       if (tok[0] && tok[1]) tok+=2;
485       if (hour >= 12 || hour < 0)  hour  = 0;
486       if (*tok == 'P') hour += 12;
487 
488       DEBUGP (("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
489               year+1900, month, day, hour, min));
490 
491       /* Build the time-stamp (copy & paste from above) */
492       timestruct.tm_sec   = 0;
493       timestruct.tm_min   = min;
494       timestruct.tm_hour  = hour;
495       timestruct.tm_mday  = day;
496       timestruct.tm_mon   = month;
497       timestruct.tm_year  = year;
498       timestruct.tm_wday  = 0;
499       timestruct.tm_yday  = 0;
500       timestruct.tm_isdst = -1;
501       cur.tstamp = mktime (&timestruct); /* store the time-stamp */
502       cur.ptype = TT_HOUR_MIN;
503 
504       DEBUGP (("Timestamp: %ld\n", cur.tstamp));
505 
506       /* Third column: Either file length, or <DIR>. We also set the
507          permissions (guessed as 0644 for plain files and 0755 for
508          directories as the listing does not give us a clue) and filetype
509          here. */
510       tok = strtok(NULL, " ");
511       if (tok == NULL) continue;
512       while ((tok != NULL) && (*tok == '\0'))  tok = strtok(NULL, " ");
513       if (tok == NULL) continue;
514       if (*tok == '<')
515         {
516           cur.type  = FT_DIRECTORY;
517           cur.size  = 0;
518           cur.perms = 0755;
519           DEBUGP (("Directory\n"));
520         }
521       else
522         {
523           wgint size;
524           cur.type  = FT_PLAINFILE;
525           errno = 0;
526           size = str_to_wgint (tok, NULL, 10);
527           if (size == WGINT_MAX && errno == ERANGE)
528             cur.size = 0;       /* overflow */
529           else
530             cur.size = size;
531           cur.perms = 0644;
532           DEBUGP (("File, size %s bytes\n", number_to_static_string (cur.size)));
533         }
534 
535       cur.linkto = NULL;
536 
537       /* And put everything into the linked list */
538       if (!dir)
539         {
540           l = dir = xnew (struct fileinfo);
541           memcpy (l, &cur, sizeof (cur));
542           l->prev = l->next = NULL;
543         }
544       else
545         {
546           cur.prev = l;
547           l->next = xnew (struct fileinfo);
548           l = l->next;
549           memcpy (l, &cur, sizeof (cur));
550           l->next = NULL;
551         }
552       cur.name = NULL;
553     }
554 
555   xfree (cur.name);
556   xfree (line);
557   return dir;
558 }
559 
560 
561 
562 /* Convert the VMS-style directory listing stored in "file" to a
563    linked list of fileinfo (system-independent) entries.  The contents
564    of FILE are considered to be produced by the standard VMS
565    "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command,
566    more or less.  (Different VMS FTP servers may have different headers,
567    and may not supply the same data, but all should be subsets of this.)
568 
569    VMS normally provides local (server) time and date information.
570    Define the logical name or environment variable
571    "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local
572    times if different from the remote local times.
573 
574    2005-02-23 SMS.
575    Added code to eliminate "^" escape characters from ODS5 extended file
576    names.  The TCPIP FTP server (V5.4) seems to prefer requests which do
577    not use the escaped names which it provides.
578 */
579 
580 #define VMS_DEFAULT_PROT_FILE 0644
581 #define VMS_DEFAULT_PROT_DIR 0755
582 
583 /* 2005-02-23 SMS.
584    eat_carets().
585 
586    Delete ODS5 extended file name escape characters ("^") in the
587    original buffer.
588    Note that the current scheme does not handle all EFN cases, but it
589    could be made more complicated.
590 */
591 
eat_carets(char * str)592 static void eat_carets( char *str)
593 /* char *str;      Source pointer. */
594 {
595   char *strd;   /* Destination pointer. */
596   char hdgt;
597   unsigned char uchr;
598 
599   /* Skip ahead to the first "^", if any. */
600   while ((*str != '\0') && (*str != '^'))
601      str++;
602 
603   /* If no caret was found, quit early. */
604   if (*str != '\0')
605   {
606     /* Shift characters leftward as carets are found. */
607     strd = str;
608     while (*str != '\0')
609     {
610       uchr = *str;
611       if (uchr == '^')
612       {
613         /* Found a caret.  Skip it, and check the next character. */
614         if ((char_prop[(unsigned char) str[1]] & 64) && (char_prop[(unsigned char) str[2]] & 64))
615         {
616           /* Hex digit.  Get char code from this and next hex digit. */
617           uchr = *(++str);
618           if (uchr <= '9')
619           {
620             hdgt = uchr - '0';           /* '0' - '9' -> 0 - 9. */
621           }
622           else
623           {
624             hdgt = ((uchr - 'A') & 7) + 10;    /* [Aa] - [Ff] -> 10 - 15. */
625           }
626           hdgt <<= 4;                   /* X16. */
627           uchr = *(++str);              /* Next char must be hex digit. */
628           if (uchr <= '9')
629           {
630             uchr = hdgt + uchr - '0';
631           }
632           else
633           {
634             uchr = hdgt + ((uchr - 'A') & 15) + 10;
635           }
636         }
637         else if (uchr == '_')
638         {
639           /* Convert escaped "_" to " ". */
640           uchr = ' ';
641         }
642         else if (uchr == '/')
643         {
644           /* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */
645           /* Note that this is a left-over from Info-ZIP code, and is
646              probably of little value here, except perhaps to avoid
647              directory confusion which an unconverted slash might cause.
648           */
649           uchr = '?';
650         }
651         /* Else, not a hex digit.  Must be a simple escaped character
652            (or Unicode, which is not yet handled here).
653         */
654       }
655       /* Else, not a caret.  Use as-is. */
656       *strd = uchr;
657 
658       /* Advance destination and source pointers. */
659       strd++;
660       str++;
661     }
662     /* Terminate the destination string. */
663     *strd = '\0';
664   }
665 }
666 
667 
668 static struct fileinfo *
ftp_parse_vms_ls(FILE * fp)669 ftp_parse_vms_ls (FILE *fp)
670 {
671   int dt, i, j, len;
672   int perms;
673   size_t bufsize = 0;
674   time_t timenow;
675   struct tm *timestruct;
676   char date_str[32];
677 
678   char *line = NULL, *tok; /* tokenizer */
679   struct fileinfo *dir, *l, cur; /* list creation */
680 
681   dir = l = NULL;
682 
683   /* Skip blank lines, Directory heading, and more blank lines. */
684 
685   for (j = 0; (i = getline (&line, &bufsize, fp)) > 0; )
686     {
687       i = clean_line (line, i);
688       if (i <= 0)
689         continue; /* Ignore blank line. */
690 
691       if ((j == 0) && (line[i - 1] == ']'))
692         {
693           /* Found Directory heading line.  Next non-blank line
694           is significant. */
695           j = 1;
696         }
697       else if (!strncmp (line, "Total of ", 9))
698         {
699           /* Found "Total of ..." footing line.  No valid data
700              will follow (empty directory). */
701           i = 0; /* Arrange for early exit. */
702           break;
703         }
704       else
705         {
706           break; /* Must be significant data. */
707         }
708     }
709 
710   /* Read remainder of file until the next blank line or EOF. */
711 
712   cur.name = NULL;
713   while (i > 0)
714     {
715       char *p;
716 
717       /* The first token is the file name.  After a long name, other
718          data may be on the following line.  A valid directory name ends
719          in ".DIR;1" (any case), although some VMS FTP servers may omit
720          the version number (";1").
721       */
722 
723       tok = strtok(line, " ");
724       if (tok == NULL) tok = line;
725       DEBUGP (("file name:   '%s'\n", tok));
726 
727       /* Stripping the version number on a VMS system would be wrong.
728          It may be foolish on a non-VMS system, too, but that's someone
729          else's problem.  (Define PRESERVE_VMS_VERSIONS for proper
730          operation on other operating systems.)
731 
732          2005-02-23 SMS.
733          ODS5 extended file names may contain escaped semi-colons, so
734          the version number is identified as right-side decimal digits
735          led by a non-escaped semi-colon.  It may be absent.
736       */
737 
738 #if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS))
739       for (p = tok + strlen (tok); (--p > tok) && c_isdigit(*p); );
740       if (p > tok && (*p == ';') && (*(p - 1) != '^'))
741         {
742           *p = '\0';
743         }
744 #endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */
745 
746       /* 2005-02-23 SMS.
747          Eliminate "^" escape characters from ODS5 extended file name.
748          (A caret is invalid in an ODS2 name, so this is always safe.)
749       */
750       eat_carets (tok);
751       DEBUGP (("file name-^: '%s'\n", tok));
752 
753       /* Differentiate between a directory and any other file.  A VMS
754          listing may not include file protections (permissions).  Set a
755          default permissions value (according to the file type), which
756          may be overwritten later.  Store directory names without the
757          ".DIR;1" file type and version number, as the plain name is
758          what will work in a CWD command.
759       */
760       len = strlen (tok);
761       if (len >= 4 && !c_strncasecmp(tok + (len - 4), ".DIR", 4))
762         {
763           *(tok + (len - 4)) = '\0'; /* Discard ".DIR". */
764           cur.type  = FT_DIRECTORY;
765           cur.perms = VMS_DEFAULT_PROT_DIR;
766           DEBUGP (("Directory (nv)\n"));
767         }
768       else if (len >= 6 && !c_strncasecmp (tok + len - 6, ".DIR;1", 6))
769         {
770           *(tok + (len - 6)) = '\0'; /* Discard ".DIR;1". */
771           cur.type  = FT_DIRECTORY;
772           cur.perms = VMS_DEFAULT_PROT_DIR;
773           DEBUGP (("Directory (v)\n"));
774         }
775       else
776         {
777           cur.type  = FT_PLAINFILE;
778           cur.perms = VMS_DEFAULT_PROT_FILE;
779           DEBUGP (("File\n"));
780         }
781       xfree (cur.name);
782       cur.name = xstrdup (tok);
783       DEBUGP (("Name: '%s'\n", cur.name));
784 
785       /* Null the date and time string. */
786       *date_str = '\0';
787 
788       /* VMS lacks symbolic links. */
789       cur.linkto = NULL;
790 
791       /* VMS reports file sizes in (512-byte) disk blocks, not bytes,
792          hence useless for an integrity check based on byte-count.
793          Set size to unknown.
794       */
795       cur.size = 0;
796 
797       /* Get token 2, if any.  A long name may force all other data onto
798          a second line.  If needed, read the second line.
799       */
800 
801       tok = strtok (NULL, " ");
802       if (tok == NULL)
803         {
804           DEBUGP (("Getting additional line.\n"));
805           i = getline (&line, &bufsize, fp);
806           if (i <= 0)
807             {
808               DEBUGP (("EOF.  Leaving listing parser.\n"));
809               break;
810             }
811 
812           /* Second line must begin with " ".  Otherwise, it's a first
813              line (and we may be confused).
814           */
815           i = clean_line (line, i);
816           if (i <= 0)
817             {
818               /* Blank line.  End of significant file listing. */
819               DEBUGP (("Blank line.  Leaving listing parser.\n"));
820               break;
821             }
822           else if (line[0] != ' ')
823             {
824               DEBUGP (("Non-blank in column 1.  Must be a new file name?\n"));
825               continue;
826             }
827           else
828             {
829               tok = strtok (line, " ");
830               if (tok == NULL)
831                 {
832                   /* Unexpected non-empty but apparently blank line. */
833                   DEBUGP (("Null token.  Leaving listing parser.\n"));
834                   break;
835                 }
836             }
837         }
838 
839       /* Analyze tokens.  (Order is not significant, except date must
840          precede time.)
841 
842          Size:       ddd or ddd/ddd (where "ddd" is a decimal number)
843          Date:       DD-MMM-YYYY
844          Time:       HH:MM or HH:MM:SS or HH:MM:SS.CC
845          Owner:      [user] or [user,group]
846          Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some
847          subset thereof, for System, Owner, Group, World.
848 
849          If permission is lacking, info may be replaced by the string:
850          "No privilege for attempted operation".
851       */
852       while (tok != NULL)
853         {
854           DEBUGP (("Token: >%s<: ", tok));
855 
856           if ((strlen (tok) < 12) && (strchr( tok, '-') != NULL))
857             {
858               /* Date. */
859               DEBUGP (("Date.\n"));
860 				  snprintf(date_str, sizeof(date_str), "%s ", tok);
861             }
862           else if ((strlen (tok) < 12) && (strchr( tok, ':') != NULL))
863             {
864               /* Time. */
865               DEBUGP (("Time. "));
866               strncat( date_str,
867                        tok,
868                        (sizeof( date_str)- strlen (date_str) - 1));
869               DEBUGP (("Date time: >%s<\n", date_str));
870             }
871           else if (strchr (tok, '[') != NULL)
872             {
873               /* Owner.  (Ignore.) */
874               DEBUGP (("Owner.\n"));
875             }
876           else if (strchr (tok, '(') != NULL)
877             {
878               /* Protections (permissions). */
879               perms = 0;
880               j = 0;
881               /*FIXME: Should not be using the variable like this. */
882               for (i = 0; i < (int) strlen(tok); i++)
883                 {
884                   switch (tok[ i])
885                     {
886                     case '(':
887                       break;
888                     case ')':
889                       break;
890                     case ',':
891                       if (j == 0)
892                         {
893                           perms = 0;
894                         }
895                       else if (j < 4)
896                         {
897                           perms <<= 3;
898                         }
899                       j++;
900                       break;
901                     case 'R':
902                       perms |= 4;
903                       break;
904                     case 'W':
905                       perms |= 2;
906                       break;
907                     case 'E':
908                       perms |= 1;
909                       break;
910                     case 'D':
911                       perms |= 2;
912                       break;
913                     }
914                 }
915               cur.perms = perms;
916               DEBUGP (("Prot.  perms = %0o.\n", (unsigned) cur.perms));
917             }
918           else
919             {
920               /* Nondescript.  Probably size(s), probably in blocks.
921                  Could be "No privilege ..." message.  (Ignore.)
922               */
923               DEBUGP (("Ignored (size?).\n"));
924             }
925 
926           tok = strtok (NULL, " ");
927         }
928 
929       /* Tokens exhausted.  Interpret the data, and fill in the
930          structure.
931       */
932       /* Fill tm timestruct according to date-time string.  Fractional
933          seconds are ignored.  Default to current time, if conversion
934          fails.
935       */
936       timenow = time( NULL);
937       timestruct = localtime( &timenow );
938       strptime( date_str, "%d-%b-%Y %H:%M:%S", timestruct);
939 
940       /* Convert struct tm local time to time_t local time. */
941       timenow = mktime (timestruct);
942       /* Offset local time according to environment variable (seconds). */
943       if ((tok = getenv ( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
944         {
945           dt = atoi (tok);
946           DEBUGP (("Time differential = %d.\n", dt));
947         }
948       else
949         dt = 0;
950 
951       if (dt >= 0)
952         timenow += dt;
953       else
954         timenow -= (-dt);
955 
956       cur.tstamp = timenow; /* Store the time-stamp. */
957       DEBUGP (("Timestamp: %ld\n", cur.tstamp));
958       cur.ptype = TT_HOUR_MIN;
959 
960       /* Add the data for this item to the linked list, */
961       if (!dir)
962         {
963           l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
964           cur.prev = cur.next = NULL;
965           memcpy (l, &cur, sizeof (cur));
966         }
967       else
968         {
969           cur.prev = l;
970 			 cur.next = NULL;
971           l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
972           l = l->next;
973           memcpy (l, &cur, sizeof (cur));
974         }
975       cur.name = NULL;
976 
977       i = getline (&line, &bufsize, fp);
978       if (i > 0)
979         {
980           i = clean_line (line, i);
981           if (i <= 0)
982             {
983               /* Blank line.  End of significant file listing. */
984               break;
985             }
986         }
987     }
988 
989   xfree (cur.name);
990   xfree (line);
991   return dir;
992 }
993 
994 
995 /* This function switches between the correct parsing routine depending on
996    the SYSTEM_TYPE. The system type should be based on the result of the
997    "SYST" response of the FTP server. According to this response we will
998    use on of the three different listing parsers that cover the most of FTP
999    servers used nowadays.  */
1000 
1001 struct fileinfo *
ftp_parse_ls(const char * file,const enum stype system_type)1002 ftp_parse_ls (const char *file, const enum stype system_type)
1003 {
1004   FILE *fp;
1005   struct fileinfo *fi;
1006 
1007   fp = fopen (file, "rb");
1008   if (!fp)
1009     {
1010       logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1011       return NULL;
1012     }
1013 
1014   fi = ftp_parse_ls_fp (fp, system_type);
1015   fclose(fp);
1016 
1017   return fi;
1018 }
1019 
1020 struct fileinfo *
ftp_parse_ls_fp(FILE * fp,const enum stype system_type)1021 ftp_parse_ls_fp (FILE *fp, const enum stype system_type)
1022 {
1023   switch (system_type)
1024     {
1025     case ST_UNIX:
1026       return ftp_parse_unix_ls (fp, 0);
1027     case ST_WINNT:
1028       {
1029         /* Detect whether the listing is simulating the UNIX format */
1030         int   c = fgetc(fp);
1031         rewind(fp);
1032 
1033         /* If the first character of the file is '0'-'9', it's WINNT
1034            format. */
1035         if (c >= '0' && c <='9')
1036           return ftp_parse_winnt_ls (fp);
1037         else
1038           return ftp_parse_unix_ls (fp, 1);
1039       }
1040     case ST_VMS:
1041       return ftp_parse_vms_ls (fp);
1042     case ST_MACOS:
1043       return ftp_parse_unix_ls (fp, 1);
1044     default:
1045       logprintf (LOG_NOTQUIET, _("\
1046 Unsupported listing type, trying Unix listing parser.\n"));
1047       return ftp_parse_unix_ls (fp, 0);
1048     }
1049 }
1050 
1051 /* Stuff for creating FTP index. */
1052 
1053 /* The function creates an HTML index containing references to given
1054    directories and files on the appropriate host.  The references are
1055    FTP.  */
1056 uerr_t
ftp_index(const char * file,struct url * u,struct fileinfo * f)1057 ftp_index (const char *file, struct url *u, struct fileinfo *f)
1058 {
1059   FILE *fp;
1060   char *upwd;
1061   char *htcldir;                /* HTML-clean dir name */
1062   char *htclfile;               /* HTML-clean file name */
1063   char *urlclfile;              /* URL-clean file name */
1064 
1065   if (!output_stream)
1066     {
1067       fp = fopen (file, "wb");
1068       if (!fp)
1069         {
1070           logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1071           return FOPENERR;
1072         }
1073     }
1074   else
1075     fp = output_stream;
1076   if (u->user)
1077     {
1078       char *tmpu, *tmpp;        /* temporary, clean user and passwd */
1079 
1080       tmpu = url_escape (u->user);
1081       tmpp = u->passwd ? url_escape (u->passwd) : NULL;
1082       if (tmpp)
1083         upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
1084       else
1085         upwd = concat_strings (tmpu, "@", (char *) 0);
1086       xfree (tmpu);
1087       xfree (tmpp);
1088     }
1089   else
1090     upwd = xstrdup ("");
1091 
1092   htcldir = html_quote_string (u->dir);
1093 
1094   fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
1095   fprintf (fp, "<html>\n<head>\n<title>");
1096   fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1097   fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
1098   fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1099   fprintf (fp, "</h1>\n<hr>\n<pre>\n");
1100 
1101   while (f)
1102     {
1103       fprintf (fp, "  ");
1104       if (f->tstamp != -1)
1105         {
1106           /* #### Should we translate the months?  Or, even better, use
1107              ISO 8601 dates?  */
1108           static const char *months[] = {
1109             "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1110             "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1111           };
1112           time_t tstamp = f->tstamp;
1113           struct tm *ptm = localtime (&tstamp);
1114 
1115           fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
1116                   ptm->tm_mday);
1117           if (f->ptype == TT_HOUR_MIN)
1118             fprintf (fp, "%02d:%02d  ", ptm->tm_hour, ptm->tm_min);
1119           else
1120             fprintf (fp, "       ");
1121         }
1122       else
1123         fprintf (fp, _("time unknown       "));
1124       switch (f->type)
1125         {
1126         case FT_PLAINFILE:
1127           fprintf (fp, _("File        "));
1128           break;
1129         case FT_DIRECTORY:
1130           fprintf (fp, _("Directory   "));
1131           break;
1132         case FT_SYMLINK:
1133           fprintf (fp, _("Link        "));
1134           break;
1135         default:
1136           fprintf (fp, _("Not sure    "));
1137           break;
1138         }
1139       htclfile = html_quote_string (f->name);
1140       urlclfile = url_escape_unsafe_and_reserved (f->name);
1141       fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
1142       if (*u->dir != '/')
1143         putc ('/', fp);
1144       /* XXX: Should probably URL-escape dir components here, rather
1145        * than just HTML-escape, for consistency with the next bit where
1146        * we use urlclfile for the file component. Anyway, this is safer
1147        * than what we had... */
1148       fprintf (fp, "%s", htcldir);
1149       if (*u->dir)
1150         putc ('/', fp);
1151       fprintf (fp, "%s", urlclfile);
1152       if (f->type == FT_DIRECTORY)
1153         putc ('/', fp);
1154       fprintf (fp, "\">%s", htclfile);
1155       if (f->type == FT_DIRECTORY)
1156         putc ('/', fp);
1157       fprintf (fp, "</a> ");
1158       if (f->type == FT_PLAINFILE)
1159         fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
1160       else if (f->type == FT_SYMLINK)
1161         fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
1162       putc ('\n', fp);
1163       xfree (htclfile);
1164       xfree (urlclfile);
1165       f = f->next;
1166     }
1167   fprintf (fp, "</pre>\n</body>\n</html>\n");
1168   xfree (htcldir);
1169   xfree (upwd);
1170   if (!output_stream)
1171     fclose (fp);
1172   else
1173     fflush (fp);
1174   return FTPOK;
1175 }
1176