1 /* Various utility functions.
2    Copyright (C) 1996-2011, 2015, 2018-2021 Free Software Foundation,
3    Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 #include "wget.h"
32 
33 #include "sha256.h"
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <time.h>
38 #include <unistd.h>
39 #ifdef HAVE_PROCESS_H
40 # include <process.h>  /* getpid() */
41 #endif
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <assert.h>
45 #include <stdarg.h>
46 #include <locale.h>
47 #include <errno.h>
48 #include <utime.h>
49 
50 #include <sys/time.h>
51 
52 #include <sys/stat.h>
53 
54 /* For TIOCGWINSZ and friends: */
55 #ifndef WINDOWS
56 # include <sys/ioctl.h>
57 # include <termios.h>
58 #endif
59 
60 /* Needed for Unix version of run_with_timeout. */
61 #include <signal.h>
62 #include <setjmp.h>
63 
64 #include <regex.h>
65 #ifdef HAVE_LIBPCRE2
66 # define PCRE2_CODE_UNIT_WIDTH 8
67 # include <pcre2.h>
68 #elif defined HAVE_LIBPCRE
69 # include <pcre.h>
70 #endif
71 
72 #ifndef HAVE_SIGSETJMP
73 /* If sigsetjmp is a macro, configure won't pick it up. */
74 # ifdef sigsetjmp
75 #  define HAVE_SIGSETJMP
76 # endif
77 #endif
78 
79 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
80 # define USE_SIGNAL_TIMEOUT
81 #endif
82 
83 /* Some systems (Linux libc5, "NCR MP-RAS 3.0", and others) don't
84    provide MAP_FAILED, a symbolic constant for the value returned by
85    mmap() when it doesn't work.  Usually, this constant should be -1.
86    This only makes sense for files that use mmap() and include
87    sys/mman.h *before* sysdep.h, but doesn't hurt others.  */
88 #ifdef HAVE_MMAP
89 # include <sys/mman.h>
90 # ifndef MAP_FAILED
91 #  define MAP_FAILED ((void *) -1)
92 # endif
93 #endif
94 
95 #include "utils.h"
96 #include "hash.h"
97 
98 #ifdef __VMS
99 #include "vms.h"
100 #endif /* def __VMS */
101 
102 #ifdef TESTING
103 #include "../tests/unit-tests.h"
104 #endif
105 
106 #include "exits.h"
107 #include "c-strcase.h"
108 
109 _Noreturn static void
memfatal(const char * context,long attempted_size)110 memfatal (const char *context, long attempted_size)
111 {
112   /* Make sure we don't try to store part of the log line, and thus
113      call malloc.  */
114   log_set_save_context (false);
115 
116   /* We have different log outputs in different situations:
117      1) output without bytes information
118      2) output with bytes information  */
119   if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
120     {
121       logprintf (LOG_ALWAYS,
122                  _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
123                  exec_name, context);
124     }
125   else
126     {
127       logprintf (LOG_ALWAYS,
128                  _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
129                  exec_name, context, attempted_size);
130     }
131 
132   exit (WGET_EXIT_GENERIC_ERROR);
133 }
134 
135 /* Character property table for (re-)escaping VMS ODS5 extended file
136    names.  Note that this table ignores Unicode.
137 
138    ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
139 
140    ODS5 Invalid characters:
141       C0 control codes (0x00 to 0x1F inclusive)
142       Asterisk (*)
143       Question mark (?)
144 
145    ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
146       Double quotation marks (")
147       Backslash (\)
148       Colon (:)
149       Left angle bracket (<)
150       Right angle bracket (>)
151       Slash (/)
152       Vertical bar (|)
153 
154    Characters escaped by "^":
155       SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
156        @  [  \  ]  ^  `  {  |  }  ~
157 
158    Either "^_" or "^ " is accepted as a space.  Period (.) is a special
159    case.  Note that un-escaped < and > can also confuse a directory
160    spec.
161 
162    Characters put out as ^xx:
163       7F (DEL)
164       80-9F (C1 control characters)
165       A0 (nonbreaking space)
166       FF (Latin small letter y diaeresis)
167 
168    Other cases:
169       Unicode: "^Uxxxx", where "xxxx" is four hex digits.
170 
171     Property table values:
172       Normal escape:    1
173       Space:            2
174       Dot:              4
175       Hex-hex escape:   8
176       ODS2 normal:     16
177       ODS2 lower case: 32
178       Hex digit:       64
179 */
180 
181 unsigned char char_prop[ 256] = {
182 
183 /* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
184     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
185 
186 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
187     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
188 
189 /*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
190     2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
191 
192 /*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
193    80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
194 
195 /*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
196     1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
197 
198 /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
199    16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
200 
201 /*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
202     1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
203 
204 /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
205    32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
206 
207     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
208     8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
209     8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
210     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
211     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
212     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
213     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
214     0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
215 };
216 
217 /* Utility function: like xstrdup(), but also lowercases S.  */
218 
219 char *
xstrdup_lower(const char * s)220 xstrdup_lower (const char *s)
221 {
222   char *copy = xstrdup (s);
223   char *p = copy;
224   for (; *p; p++)
225     *p = c_tolower (*p);
226   return copy;
227 }
228 
229 /* Copy the string formed by two pointers (one on the beginning, other
230    on the char after the last char) to a new, malloc-ed location.
231    0-terminate it.
232    If both pointers are NULL, the function returns an empty string.  */
233 char *
strdupdelim(const char * beg,const char * end)234 strdupdelim (const char *beg, const char *end)
235 {
236   if (beg && beg <= end)
237     {
238       char *res = xmalloc (end - beg + 1);
239       memcpy (res, beg, end - beg);
240       res[end - beg] = '\0';
241       return res;
242     }
243 
244   return xstrdup("");
245 }
246 
247 /* Parse a string containing comma-separated elements, and return a
248    vector of char pointers with the elements.  Spaces following the
249    commas are ignored.  */
250 char **
sepstring(const char * s)251 sepstring (const char *s)
252 {
253   char **res;
254   const char *p;
255   int i = 0;
256 
257   if (!s || !*s)
258     return NULL;
259   res = NULL;
260   p = s;
261   while (*s)
262     {
263       if (*s == ',')
264         {
265           res = xrealloc (res, (i + 2) * sizeof (char *));
266           res[i] = strdupdelim (p, s);
267           res[++i] = NULL;
268           ++s;
269           /* Skip the blanks following the ','.  */
270           while (c_isspace (*s))
271             ++s;
272           p = s;
273         }
274       else
275         ++s;
276     }
277   res = xrealloc (res, (i + 2) * sizeof (char *));
278   res[i] = strdupdelim (p, s);
279   res[i + 1] = NULL;
280   return res;
281 }
282 
283 /* Like sprintf, but prints into a string of sufficient size freshly
284    allocated with malloc, which is returned.  If unable to print due
285    to invalid format, returns NULL.  Inability to allocate needed
286    memory results in abort, as with xmalloc.  This is in spirit
287    similar to the GNU/BSD extension asprintf, but somewhat easier to
288    use.
289 
290    Internally the function either calls vasprintf or loops around
291    vsnprintf until the correct size is found.  Since Wget also ships a
292    fallback implementation of vsnprintf, this should be portable.  */
293 
294 char *
aprintf(const char * fmt,...)295 aprintf (const char *fmt, ...)
296 {
297 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
298   /* Use vasprintf. */
299   int ret;
300   va_list args;
301   char *str;
302   va_start (args, fmt);
303   ret = vasprintf (&str, fmt, args);
304   va_end (args);
305   if (ret < 0 && errno == ENOMEM)
306     memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
307                                                       with xmalloc/xrealloc */
308   else if (ret < 0)
309     return NULL;
310   return str;
311 #else  /* not HAVE_VASPRINTF */
312 
313 /* Constant is using for limits memory allocation for text buffer.
314    Applicable in situation when: vasprintf is not available in the system
315    and vsnprintf return -1 when long line is truncated (in old versions of
316    glibc and in other system where C99 doesn`t support) */
317 
318 #define FMT_MAX_LENGTH 1048576
319 
320   /* vasprintf is unavailable.  snprintf into a small buffer and
321      resize it as necessary. */
322   int size = 32;
323   char *str = xmalloc (size);
324 
325   /* #### This code will infloop and eventually abort in xrealloc if
326      passed a FMT that causes snprintf to consistently return -1.  */
327 
328   while (1)
329     {
330       int n;
331       va_list args;
332 
333       va_start (args, fmt);
334       n = vsnprintf (str, size, fmt, args);
335       va_end (args);
336 
337       /* If the printing worked, return the string. */
338       if (n > -1 && n < size)
339         return str;
340 
341       /* Else try again with a larger buffer. */
342       if (n > -1)               /* C99 */
343         size = n + 1;           /* precisely what is needed */
344       else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
345         {                               /* maybe we have some wrong
346                                            format string? */
347           logprintf (LOG_ALWAYS,
348                      _("%s: aprintf: text buffer is too big (%d bytes), "
349                        "aborting.\n"),
350                      exec_name, size);  /* printout a log message */
351           abort ();                     /* and abort... */
352         }
353       else
354         {
355           /* else, we continue to grow our
356            * buffer: Twice the old size. */
357           size <<= 1;
358         }
359       str = xrealloc (str, size);
360     }
361 #endif /* not HAVE_VASPRINTF */
362 }
363 
364 #ifndef HAVE_STRLCPY
365 /* strlcpy() is a BSD function that sometimes is really handy.
366  * It is the same as snprintf(dst,dstsize,"%s",src), but much faster. */
367 
368 size_t
strlcpy(char * dst,const char * src,size_t size)369 strlcpy (char *dst, const char *src, size_t size)
370 {
371   const char *old = src;
372 
373   /* Copy as many bytes as will fit */
374   if (size)
375     {
376       while (--size)
377         {
378           if (!(*dst++ = *src++))
379             return src - old - 1;
380         }
381 
382       *dst = 0;
383     }
384 
385   while (*src++);
386   return src - old - 1;
387 }
388 #endif
389 
390 /* Concatenate the NULL-terminated list of string arguments into
391    freshly allocated space.  */
392 
393 char *
concat_strings(const char * str0,...)394 concat_strings (const char *str0, ...)
395 {
396   va_list args;
397   const char *arg;
398   size_t length = 0, pos = 0;
399   char *s;
400 
401   if (!str0)
402     return NULL;
403 
404   /* calculate the length of the resulting string */
405   va_start (args, str0);
406   for (arg = str0; arg; arg = va_arg (args, const char *))
407     length += strlen(arg);
408   va_end (args);
409 
410   s = xmalloc (length + 1);
411 
412   /* concatenate strings */
413   va_start (args, str0);
414   for (arg = str0; arg; arg = va_arg (args, const char *))
415     pos += strlcpy(s + pos, arg, length - pos + 1);
416   va_end (args);
417 
418   return s;
419 }
420 
421 /* Format the provided time according to the specified format.  The
422    format is a string with format elements supported by strftime.  */
423 
424 static char *
fmttime(time_t t,const char * fmt)425 fmttime (time_t t, const char *fmt)
426 {
427   static char output[32];
428   struct tm *tm = localtime(&t);
429   if (!tm)
430     abort ();
431   if (!strftime(output, sizeof(output), fmt, tm))
432     abort ();
433   return output;
434 }
435 
436 /* Return pointer to a static char[] buffer in which zero-terminated
437    string-representation of TM (in form hh:mm:ss) is printed.
438 
439    If TM is NULL, the current time will be used.  */
440 
441 char *
time_str(time_t t)442 time_str (time_t t)
443 {
444   return fmttime(t, "%H:%M:%S");
445 }
446 
447 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
448 
449 char *
datetime_str(time_t t)450 datetime_str (time_t t)
451 {
452   return fmttime(t, "%Y-%m-%d %H:%M:%S");
453 }
454 
455 /* The Windows versions of the following two functions are defined in
456    mswindows.c. On MSDOS this function should never be called. */
457 
458 #ifdef __VMS
459 
460 bool
fork_to_background(void)461 fork_to_background (void)
462 {
463   return false;
464 }
465 
466 #else /* def __VMS */
467 
468 #if !defined(WINDOWS) && !defined(MSDOS)
469 bool
fork_to_background(void)470 fork_to_background (void)
471 {
472   pid_t pid;
473   /* Whether we arrange our own version of opt.lfilename here.  */
474   bool logfile_changed = false;
475 
476   if (!opt.lfilename && (!opt.quiet || opt.server_response))
477     {
478       /* We must create the file immediately to avoid either a race
479          condition (which arises from using unique_name and failing to
480          use fopen_excl) or lying to the user about the log file name
481          (which arises from using unique_name, printing the name, and
482          using fopen_excl later on.)  */
483       FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
484       if (new_log_fp)
485         {
486           logfile_changed = true;
487           fclose (new_log_fp);
488         }
489     }
490   pid = fork ();
491   if (pid < 0)
492     {
493       /* parent, error */
494       perror ("fork");
495       exit (WGET_EXIT_GENERIC_ERROR);
496     }
497   else if (pid != 0)
498     {
499       /* parent, no error */
500       printf (_("Continuing in background, pid %d.\n"), (int) pid);
501       if (logfile_changed)
502         printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
503       exit (WGET_EXIT_SUCCESS);                 /* #### should we use _exit()? */
504     }
505 
506   /* child: give up the privileges and keep running. */
507   setsid ();
508   if (freopen ("/dev/null", "r", stdin) == NULL)
509     DEBUGP (("Failed to redirect stdin to /dev/null.\n"));
510   if (freopen ("/dev/null", "w", stdout) == NULL)
511     DEBUGP (("Failed to redirect stdout to /dev/null.\n"));
512   if (freopen ("/dev/null", "w", stderr) == NULL)
513     DEBUGP (("Failed to redirect stderr to /dev/null.\n"));
514 
515   return logfile_changed;
516 }
517 #endif /* !WINDOWS && !MSDOS */
518 
519 #endif /* def __VMS [else] */
520 
521 
522 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
523    specified with TM.  The atime ("access time") is set to the current
524    time.  */
525 
526 void
touch(const char * file,time_t tm)527 touch (const char *file, time_t tm)
528 {
529   struct utimbuf times;
530 
531   times.modtime = tm;
532   times.actime = time (NULL);
533 
534   if (utime (file, &times) == -1)
535     logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
536 }
537 
538 /* Checks if FILE is a symbolic link, and removes it if it is.  Does
539    nothing under MS-Windows.  */
540 int
remove_link(const char * file)541 remove_link (const char *file)
542 {
543   int err = 0;
544   struct stat st;
545 
546   if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
547     {
548       DEBUGP (("Unlinking %s (symlink).\n", file));
549       err = unlink (file);
550       if (err != 0)
551         logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
552                    quote (file), strerror (errno));
553     }
554   return err;
555 }
556 
557 /* Does FILENAME exist? */
558 bool
file_exists_p(const char * filename,file_stats_t * fstats)559 file_exists_p (const char *filename, file_stats_t *fstats)
560 {
561   struct stat buf;
562 
563   if (!filename)
564 	  return false;
565 
566 #if defined(WINDOWS) || defined(__VMS)
567     int ret = stat (filename, &buf);
568     if (ret >= 0)
569     {
570       if (fstats != NULL)
571         fstats->access_err = errno;
572     }
573     return ret >= 0;
574 #else
575   errno = 0;
576   if (stat (filename, &buf) == 0 && S_ISREG(buf.st_mode) &&
577               (((S_IRUSR & buf.st_mode) && (getuid() == buf.st_uid))  ||
578                ((S_IRGRP & buf.st_mode) && group_member(buf.st_gid))  ||
579                 (S_IROTH & buf.st_mode))) {
580     if (fstats != NULL)
581     {
582       fstats->access_err = 0;
583       fstats->st_ino = buf.st_ino;
584       fstats->st_dev = buf.st_dev;
585     }
586     return true;
587   }
588   else
589   {
590     if (fstats != NULL)
591       fstats->access_err = (errno == 0 ? EACCES : errno);
592     errno = 0;
593     return false;
594   }
595   /* NOTREACHED */
596 #endif
597 }
598 
599 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
600    Returns 0 on error.  */
601 bool
file_non_directory_p(const char * path)602 file_non_directory_p (const char *path)
603 {
604   struct stat buf;
605   /* Use lstat() rather than stat() so that symbolic links pointing to
606      directories can be identified correctly.  */
607   if (lstat (path, &buf) != 0)
608     return false;
609   return S_ISDIR (buf.st_mode) ? false : true;
610 }
611 
612 /* Return the size of file named by FILENAME, or -1 if it cannot be
613    opened or sought into. */
614 wgint
file_size(const char * filename)615 file_size (const char *filename)
616 {
617 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
618   wgint size;
619   /* We use fseek rather than stat to determine the file size because
620      that way we can also verify that the file is readable without
621      explicitly checking for permissions.  Inspired by the POST patch
622      by Arnaud Wylie.  */
623   FILE *fp = fopen (filename, "rb");
624   if (!fp)
625     return -1;
626   fseeko (fp, 0, SEEK_END);
627   size = ftello (fp);
628   fclose (fp);
629   return size;
630 #else
631   struct stat st;
632   if (stat (filename, &st) < 0)
633     return -1;
634   return st.st_size;
635 #endif
636 }
637 
638 /* 2005-02-19 SMS.
639    If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
640    original name.  With the VMS file systems' versioning, everything
641    should be fine, and appending ".NN" just causes trouble.
642 */
643 
644 #ifdef UNIQ_SEP
645 
646 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
647    doesn't exist is found.  Return a freshly allocated copy of the
648    unused file name.  */
649 
650 static char *
unique_name_1(const char * prefix)651 unique_name_1 (const char *prefix)
652 {
653   int count = 1;
654   int plen = strlen (prefix);
655   char *template = xmalloc (plen + 1 + 24);
656   char *template_tail = template + plen;
657 
658   memcpy (template, prefix, plen);
659   *template_tail++ = UNIQ_SEP;
660 
661   do
662     number_to_string (template_tail, count++);
663   while (file_exists_p (template, NULL) && count < 999999);
664 
665   return template;
666 }
667 
668 /* Return a unique file name, based on FILE.
669 
670    More precisely, if FILE doesn't exist, it is returned unmodified.
671    If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
672    file name that doesn't exist is returned.
673 
674    2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
675 
676    The resulting file is not created, only verified that it didn't
677    exist at the point in time when the function was called.
678    Therefore, where security matters, don't rely that the file created
679    by this function exists until you open it with O_EXCL or
680    equivalent.
681 
682    unique_name() always returns a freshly allocated string.
683 
684    unique_name_passthrough() may return FILE if the file doesn't exist
685    (and therefore doesn't need changing).  */
686 
687 char *
unique_name_passthrough(const char * file)688 unique_name_passthrough (const char *file)
689 {
690   /* If the FILE itself doesn't exist, return it without
691      modification. Otherwise, find a numeric suffix that results in unused
692      file name and return it.  */
693   return file_exists_p (file, NULL) ? unique_name_1 (file) : (char *) file;
694 }
695 
696 char *
unique_name(const char * file)697 unique_name (const char *file)
698 {
699   /* If the FILE itself doesn't exist, return it without
700      modification. Otherwise, find a numeric suffix that results in unused
701      file name and return it.  */
702   return file_exists_p (file, NULL) ? unique_name_1 (file) : xstrdup (file);
703 }
704 
705 #else /* def UNIQ_SEP */
706 
707 /* Dummy unique_name() for VMS.  Return the original name as easily as
708    possible.
709 */
710 char *
unique_name_passthrough(const char * file,bool allow_passthrough)711 unique_name_passthrough (const char *file, bool allow_passthrough)
712 {
713   /* Return the FILE itself, without modification, irregardful. */
714   return (char *) file;
715 }
716 char *
717 
unique_name(const char * file)718 unique_name (const char *file)
719 {
720   /* Return the FILE itself, without modification, irregardful. */
721   return xstrdup (file);
722 }
723 
724 #endif /* def UNIQ_SEP [else] */
725 
726 /* Create a file based on NAME, except without overwriting an existing
727    file with that name.  Providing O_EXCL is correctly implemented,
728    this function does not have the race condition associated with
729    opening the file returned by unique_name.  */
730 
731 FILE *
unique_create(const char * name,bool binary,char ** opened_name)732 unique_create (const char *name, bool binary, char **opened_name)
733 {
734   /* unique file name, based on NAME */
735   char *uname = unique_name (name);
736   FILE *fp;
737   while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
738     {
739       xfree (uname);
740       uname = unique_name (name);
741     }
742   if (opened_name)
743     {
744       if (fp)
745         *opened_name = uname;
746       else
747         {
748           *opened_name = NULL;
749           xfree (uname);
750         }
751     }
752   else
753     xfree (uname);
754   return fp;
755 }
756 
757 /* Open the file for writing, with the addition that the file is
758    opened "exclusively".  This means that, if the file already exists,
759    this function will *fail* and errno will be set to EEXIST.  If
760    BINARY is set, the file will be opened in binary mode, equivalent
761    to fopen's "wb".
762 
763    If opening the file fails for any reason, including the file having
764    previously existed, this function returns NULL and sets errno
765    appropriately.  */
766 
767 FILE *
fopen_excl(const char * fname,int binary)768 fopen_excl (const char *fname, int binary)
769 {
770   int fd;
771 #ifdef O_EXCL
772 
773 /* 2005-04-14 SMS.
774    VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
775    It also has file versions which obviate all the O_EXCL effort.
776    O_TRUNC (something of a misnomer) requests a new version.
777 */
778 # ifdef __VMS
779 /* Common open() optional arguments:
780    sequential access only, access callback function.
781 */
782 #  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
783 
784   int open_id;
785   int flags = O_WRONLY | O_CREAT | O_TRUNC;
786 
787   if (binary > 1)
788     {
789       open_id = 11;
790       fd = open( fname,                 /* File name. */
791        flags,                           /* Flags. */
792        0777,                            /* Mode for default protection. */
793        "ctx=bin,stm",                   /* Binary, stream access. */
794        "rfm=stmlf",                     /* Stream_LF. */
795        OPEN_OPT_ARGS);                  /* Access callback. */
796     }
797   else if (binary)
798     {
799       open_id = 12;
800       fd = open( fname,                 /* File name. */
801        flags,                           /* Flags. */
802        0777,                            /* Mode for default protection. */
803        "ctx=bin,stm",                   /* Binary, stream access. */
804        "rfm=fix",                       /* Fixed-length, */
805        "mrs=512",                       /* 512-byte records. */
806        OPEN_OPT_ARGS);                  /* Access callback. */
807     }
808   else
809     {
810       open_id = 13;
811       fd = open( fname,                 /* File name. */
812        flags,                           /* Flags. */
813        0777,                            /* Mode for default protection. */
814        "rfm=stmlf",                     /* Stream_LF. */
815        OPEN_OPT_ARGS);                  /* Access callback. */
816     }
817 # else /* def __VMS */
818   int flags = O_WRONLY | O_CREAT | O_EXCL;
819 # ifdef O_BINARY
820   if (binary)
821     flags |= O_BINARY;
822 # endif
823   fd = open (fname, flags, 0666);
824 # endif /* def __VMS [else] */
825 
826   if (fd < 0)
827     return NULL;
828   return fdopen (fd, binary ? "wb" : "w");
829 #else  /* not O_EXCL */
830   /* Manually check whether the file exists.  This is prone to race
831      conditions, but systems without O_EXCL haven't deserved
832      better.  */
833   if (file_exists_p (fname, NULL))
834     {
835       errno = EEXIST;
836       return NULL;
837     }
838   return fopen (fname, binary ? "wb" : "w");
839 #endif /* not O_EXCL */
840 }
841 
842 /* fopen_stat() assumes that file_exists_p() was called earlier.
843    file_stats_t passed to this function was returned from file_exists_p()
844    This is to prevent TOCTTOU race condition.
845    Details : FIO45-C from https://www.securecoding.cert.org/
846    Note that for creating a new file, this check is not useful
847 
848    Input:
849      fname  => Name of file to open
850      mode   => File open mode
851      fstats => Saved file_stats_t about file that was checked for existence
852 
853    Returns:
854      NULL if there was an error
855      FILE * of opened file stream
856 */
857 FILE *
fopen_stat(const char * fname,const char * mode,file_stats_t * fstats)858 fopen_stat(const char *fname, const char *mode, file_stats_t *fstats)
859 {
860   int fd;
861   FILE *fp;
862   struct stat fdstats;
863 
864 #if defined FUZZING && defined TESTING
865   fp = fopen_wgetrc (fname, mode);
866   return fp;
867 #else
868   fp = fopen (fname, mode);
869 #endif
870   if (fp == NULL)
871   {
872     logprintf (LOG_NOTQUIET, _("Failed to Fopen file %s\n"), fname);
873     return NULL;
874   }
875   fd = fileno (fp);
876   if (fd < 0)
877   {
878     logprintf (LOG_NOTQUIET, _("Failed to get FD for file %s\n"), fname);
879     fclose (fp);
880     return NULL;
881   }
882   memset(&fdstats, 0, sizeof(fdstats));
883   if (fstat (fd, &fdstats) == -1)
884   {
885     logprintf (LOG_NOTQUIET, _("Failed to stat file %s, (check permissions)\n"), fname);
886     fclose (fp);
887     return NULL;
888   }
889 #if !(defined(WINDOWS) || defined(__VMS))
890   if (fstats != NULL &&
891       (fdstats.st_dev != fstats->st_dev ||
892        fdstats.st_ino != fstats->st_ino))
893   {
894     /* File changed since file_exists_p() : NOT SAFE */
895     logprintf (LOG_NOTQUIET, _("File %s changed since the last check. Security check failed.\n"), fname);
896     fclose (fp);
897     return NULL;
898   }
899 #endif
900 
901   return fp;
902 }
903 
904 /* open_stat assumes that file_exists_p() was called earlier to save file_stats
905    file_stats_t passed to this function was returned from file_exists_p()
906    This is to prevent TOCTTOU race condition.
907    Details : FIO45-C from https://www.securecoding.cert.org/
908    Note that for creating a new file, this check is not useful
909 
910 
911    Input:
912      fname  => Name of file to open
913      flags  => File open flags
914      mode   => File open mode
915      fstats => Saved file_stats_t about file that was checked for existence
916 
917    Returns:
918      -1 if there was an error
919      file descriptor of opened file stream
920 */
921 int
open_stat(const char * fname,int flags,mode_t mode,file_stats_t * fstats)922 open_stat(const char *fname, int flags, mode_t mode, file_stats_t *fstats)
923 {
924   int fd;
925   struct stat fdstats;
926 
927   fd = open (fname, flags, mode);
928   if (fd < 0)
929   {
930     logprintf (LOG_NOTQUIET, _("Failed to open file %s, reason :%s\n"), fname, strerror(errno));
931     return -1;
932   }
933   memset(&fdstats, 0, sizeof(fdstats));
934   if (fstat (fd, &fdstats) == -1)
935   {
936     logprintf (LOG_NOTQUIET, _("Failed to stat file %s, error: %s\n"), fname, strerror(errno));
937     close (fd);
938     return -1;
939   }
940 #if !(defined(WINDOWS) || defined(__VMS))
941   if (fstats != NULL &&
942       (fdstats.st_dev != fstats->st_dev ||
943        fdstats.st_ino != fstats->st_ino))
944   {
945     /* File changed since file_exists_p() : NOT SAFE */
946     logprintf (LOG_NOTQUIET, _("Trying to open file %s but it changed since last check. Security check failed.\n"), fname);
947     close (fd);
948     return -1;
949   }
950 #endif
951 
952   return fd;
953 }
954 
955 /* Create DIRECTORY.  If some of the pathname components of DIRECTORY
956    are missing, create them first.  In case any mkdir() call fails,
957    return its error status.  Returns 0 on successful completion.
958 
959    The behaviour of this function should be identical to the behaviour
960    of `mkdir -p' on systems where mkdir supports the `-p' option.  */
961 int
make_directory(const char * directory)962 make_directory (const char *directory)
963 {
964   int i, ret, quit = 0;
965   char buf[1024];
966   char *dir;
967   size_t len = strlen (directory);
968 
969   /* Make a copy of dir, to be able to write to it.  Otherwise, the
970      function is unsafe if called with a read-only char *argument.  */
971   if (len < sizeof(buf))
972     {
973       memcpy(buf, directory, len + 1);
974       dir = buf;
975 	}
976   else
977     dir = xstrdup(directory);
978 
979   /* If the first character of dir is '/', skip it (and thus enable
980      creation of absolute-pathname directories.  */
981   for (i = (*dir == '/'); 1; ++i)
982     {
983       for (; dir[i] && dir[i] != '/'; i++)
984         ;
985       if (!dir[i])
986         quit = 1;
987       dir[i] = '\0';
988       /* Check whether the directory already exists.  Allow creation of
989          of intermediate directories to fail, as the initial path components
990          are not necessarily directories!  */
991       if (!file_exists_p (dir, NULL))
992         ret = mkdir (dir, 0777);
993       else
994         ret = 0;
995       if (quit)
996         break;
997       else
998         dir[i] = '/';
999     }
1000 
1001   if (dir != buf)
1002 	  xfree (dir);
1003 
1004   return ret;
1005 }
1006 
1007 /* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
1008    should be a file name.
1009 
1010    file_merge("/foo/bar", "baz")  => "/foo/baz"
1011    file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
1012    file_merge("foo", "bar")       => "bar"
1013 
1014    In other words, it's a simpler and gentler version of uri_merge.  */
1015 
1016 char *
file_merge(const char * base,const char * file)1017 file_merge (const char *base, const char *file)
1018 {
1019   char *result;
1020   const char *cut = (const char *)strrchr (base, '/');
1021 
1022   if (!cut)
1023     return xstrdup (file);
1024 
1025   result = xmalloc (cut - base + 1 + strlen (file) + 1);
1026   memcpy (result, base, cut - base);
1027   result[cut - base] = '/';
1028   strcpy (result + (cut - base) + 1, file);
1029 
1030   return result;
1031 }
1032 
1033 /* Like fnmatch, but performs a case-insensitive match.  */
1034 
1035 int
fnmatch_nocase(const char * pattern,const char * string,int flags)1036 fnmatch_nocase (const char *pattern, const char *string, int flags)
1037 {
1038   /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
1039      also present on *BSD platforms, and possibly elsewhere.
1040      Gnulib provides this flag in case it doesn't exist.  */
1041   return fnmatch (pattern, string, flags | FNM_CASEFOLD);
1042 }
1043 
1044 static bool in_acclist (const char *const *, const char *, bool);
1045 
1046 /* Determine whether a file is acceptable to be followed, according to
1047    lists of patterns to accept/reject.  */
1048 bool
acceptable(const char * s)1049 acceptable (const char *s)
1050 {
1051   const char *p;
1052 
1053   if (opt.output_document && strcmp (s, opt.output_document) == 0)
1054     return true;
1055 
1056   if ((p = strrchr (s, '/')))
1057     s = p + 1;
1058 
1059   if (opt.accepts)
1060     {
1061       if (opt.rejects)
1062         return (in_acclist ((const char *const *)opt.accepts, s, true)
1063                 && !in_acclist ((const char *const *)opt.rejects, s, true));
1064       else
1065         return in_acclist ((const char *const *)opt.accepts, s, true);
1066     }
1067   else if (opt.rejects)
1068     return !in_acclist ((const char *const *)opt.rejects, s, true);
1069 
1070   return true;
1071 }
1072 
1073 /* Determine whether an URL is acceptable to be followed, according to
1074    regex patterns to accept/reject.  */
1075 bool
accept_url(const char * s)1076 accept_url (const char *s)
1077 {
1078   if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
1079     return false;
1080   if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
1081     return false;
1082 
1083   return true;
1084 }
1085 
1086 /* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
1087    will return true if and only if D2 begins with `/something/' or is exactly
1088    '/something'.  */
1089 bool
subdir_p(const char * d1,const char * d2)1090 subdir_p (const char *d1, const char *d2)
1091 {
1092   if (*d1 == '\0')
1093     return true;
1094   if (!opt.ignore_case)
1095     for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
1096       ;
1097   else
1098     for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
1099       ;
1100 
1101   return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
1102 }
1103 
1104 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
1105    first element that matches DIR, through wildcards or front comparison (as
1106    appropriate).  */
1107 static bool
dir_matches_p(const char ** dirlist,const char * dir)1108 dir_matches_p (const char **dirlist, const char *dir)
1109 {
1110   const char **x;
1111   int (*matcher) (const char *, const char *, int)
1112     = opt.ignore_case ? fnmatch_nocase : fnmatch;
1113 
1114   for (x = dirlist; *x; x++)
1115     {
1116       /* Remove leading '/' */
1117       const char *p = *x + (**x == '/');
1118       if (has_wildcards_p (p))
1119         {
1120           if (matcher (p, dir, FNM_PATHNAME) == 0)
1121             break;
1122         }
1123       else
1124         {
1125           if (subdir_p (p, dir))
1126             break;
1127         }
1128     }
1129 
1130   return *x ? true : false;
1131 }
1132 
1133 /* Returns whether DIRECTORY is acceptable for download, wrt the
1134    include/exclude lists.
1135 
1136    The leading `/' is ignored in paths; relative and absolute paths
1137    may be freely intermixed.  */
1138 
1139 bool
accdir(const char * directory)1140 accdir (const char *directory)
1141 {
1142   /* Remove starting '/'.  */
1143   if (*directory == '/')
1144     ++directory;
1145   if (opt.includes)
1146     {
1147       if (!dir_matches_p (opt.includes, directory))
1148         return false;
1149     }
1150   if (opt.excludes)
1151     {
1152       if (dir_matches_p (opt.excludes, directory))
1153         return false;
1154     }
1155   return true;
1156 }
1157 
1158 /* Return true if STRING ends with TAIL.  For instance:
1159 
1160    match_tail ("abc", "bc", false)  -> 1
1161    match_tail ("abc", "ab", false)  -> 0
1162    match_tail ("abc", "abc", false) -> 1
1163 
1164    If FOLD_CASE is true, the comparison will be case-insensitive.  */
1165 
1166 bool
match_tail(const char * string,const char * tail,bool fold_case)1167 match_tail (const char *string, const char *tail, bool fold_case)
1168 {
1169   int pos = (int) strlen (string) - (int) strlen (tail);
1170 
1171   if (pos < 0)
1172     return false;  /* tail is longer than string.  */
1173 
1174   if (!fold_case)
1175     return !strcmp (string + pos, tail);
1176   else
1177     return !strcasecmp (string + pos, tail);
1178 }
1179 
1180 /* Checks whether string S matches each element of ACCEPTS.  A list
1181    element are matched either with fnmatch() or match_tail(),
1182    according to whether the element contains wildcards or not.
1183 
1184    If the BACKWARD is false, don't do backward comparison -- just compare
1185    them normally.  */
1186 static bool
in_acclist(const char * const * accepts,const char * s,bool backward)1187 in_acclist (const char *const *accepts, const char *s, bool backward)
1188 {
1189   for (; *accepts; accepts++)
1190     {
1191       if (has_wildcards_p (*accepts))
1192         {
1193           int res = opt.ignore_case
1194             ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1195           /* fnmatch returns 0 if the pattern *does* match the string.  */
1196           if (res == 0)
1197             return true;
1198         }
1199       else
1200         {
1201           if (backward)
1202             {
1203               if (match_tail (s, *accepts, opt.ignore_case))
1204                 return true;
1205             }
1206           else
1207             {
1208               int cmp = opt.ignore_case
1209                 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1210               if (cmp == 0)
1211                 return true;
1212             }
1213         }
1214     }
1215   return false;
1216 }
1217 
1218 /* Return the location of STR's suffix (file extension).  Examples:
1219    suffix ("foo.bar")       -> "bar"
1220    suffix ("foo.bar.baz")   -> "baz"
1221    suffix ("/foo/bar")      -> NULL
1222    suffix ("/foo.bar/baz")  -> NULL  */
1223 char *
suffix(const char * str)1224 suffix (const char *str)
1225 {
1226   char *p;
1227 
1228   if ((p = strrchr (str, '.')) && !strchr (p + 1, '/'))
1229     return p + 1;
1230 
1231   return NULL;
1232 }
1233 
1234 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1235    `]').  */
1236 
1237 bool
has_wildcards_p(const char * s)1238 has_wildcards_p (const char *s)
1239 {
1240   return !!strpbrk (s, "*?[]");
1241 }
1242 
1243 /* Return true if FNAME ends with a typical HTML suffix.  The
1244    following (case-insensitive) suffixes are presumed to be HTML
1245    files:
1246 
1247      html
1248      htm
1249      ?html (`?' matches one character)
1250 
1251    #### CAVEAT.  This is not necessarily a good indication that FNAME
1252    refers to a file that contains HTML!  */
1253 bool
has_html_suffix_p(const char * fname)1254 has_html_suffix_p (const char *fname)
1255 {
1256   char *suf;
1257 
1258   if ((suf = suffix (fname)) == NULL)
1259     return false;
1260   if (!c_strcasecmp (suf, "html"))
1261     return true;
1262   if (!c_strcasecmp (suf, "htm"))
1263     return true;
1264   if (suf[0] && !c_strcasecmp (suf + 1, "html"))
1265     return true;
1266   return false;
1267 }
1268 
1269 /* Read FILE into memory.  A pointer to `struct file_memory' are
1270    returned; use struct element `content' to access file contents, and
1271    the element `length' to know the file length.  `content' is *not*
1272    zero-terminated, and you should *not* read or write beyond the [0,
1273    length) range of characters.
1274 
1275    After you are done with the file contents, call wget_read_file_free to
1276    release the memory.
1277 
1278    Depending on the operating system and the type of file that is
1279    being read, wget_read_file() either mmap's the file into memory, or
1280    reads the file into the core using read().
1281 
1282    If file is named "-", fileno(stdin) is used for reading instead.
1283    If you want to read from a real file named "-", use "./-" instead.  */
1284 
1285 struct file_memory *
wget_read_file(const char * file)1286 wget_read_file (const char *file)
1287 {
1288   int fd;
1289   struct file_memory *fm;
1290   long size;
1291   bool inhibit_close = false;
1292 
1293   /* Some magic in the finest tradition of Perl and its kin: if FILE
1294      is "-", just use stdin.  */
1295 #ifndef FUZZING
1296   if (HYPHENP (file))
1297     {
1298       fd = fileno (stdin);
1299       inhibit_close = true;
1300       /* Note that we don't inhibit mmap() in this case.  If stdin is
1301          redirected from a regular file, mmap() will still work.  */
1302     }
1303   else
1304 #endif
1305     fd = open (file, O_RDONLY);
1306   if (fd < 0)
1307     return NULL;
1308   fm = xnew (struct file_memory);
1309 
1310 #ifdef HAVE_MMAP
1311   {
1312     struct stat buf;
1313     if (fstat (fd, &buf) < 0)
1314       goto mmap_lose;
1315     fm->length = buf.st_size;
1316     /* NOTE: As far as I know, the callers of this function never
1317        modify the file text.  Relying on this would enable us to
1318        specify PROT_READ and MAP_SHARED for a marginal gain in
1319        efficiency, but at some cost to generality.  */
1320     fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1321                         MAP_PRIVATE, fd, 0);
1322     if (fm->content == (char *)MAP_FAILED)
1323       goto mmap_lose;
1324     if (!inhibit_close)
1325       close (fd);
1326 
1327     fm->mmap_p = 1;
1328     return fm;
1329   }
1330 
1331  mmap_lose:
1332   /* The most common reason why mmap() fails is that FD does not point
1333      to a plain file.  However, it's also possible that mmap() doesn't
1334      work for a particular type of file.  Therefore, whenever mmap()
1335      fails, we just fall back to the regular method.  */
1336 #endif /* HAVE_MMAP */
1337 
1338   fm->length = 0;
1339   size = 512;                   /* number of bytes fm->contents can
1340                                    hold at any given time. */
1341   fm->content = xmalloc (size);
1342   while (1)
1343     {
1344       wgint nread;
1345       if (fm->length > size / 2)
1346         {
1347           /* #### I'm not sure whether the whole exponential-growth
1348              thing makes sense with kernel read.  On Linux at least,
1349              read() refuses to read more than 4K from a file at a
1350              single chunk anyway.  But other Unixes might optimize it
1351              better, and it doesn't *hurt* anything, so I'm leaving
1352              it.  */
1353 
1354           /* Normally, we grow SIZE exponentially to make the number
1355              of calls to read() and realloc() logarithmic in relation
1356              to file size.  However, read() can read an amount of data
1357              smaller than requested, and it would be unreasonable to
1358              double SIZE every time *something* was read.  Therefore,
1359              we double SIZE only when the length exceeds half of the
1360              entire allocated size.  */
1361           size <<= 1;
1362           fm->content = xrealloc (fm->content, size);
1363         }
1364       nread = read (fd, fm->content + fm->length, size - fm->length);
1365       if (nread > 0)
1366         /* Successful read. */
1367         fm->length += nread;
1368       else if (nread < 0)
1369         /* Error. */
1370         goto lose;
1371       else
1372         /* EOF */
1373         break;
1374     }
1375   if (!inhibit_close)
1376     close (fd);
1377   if (size > fm->length && fm->length != 0)
1378     /* Due to exponential growth of fm->content, the allocated region
1379        might be much larger than what is actually needed.  */
1380     fm->content = xrealloc (fm->content, fm->length);
1381   fm->mmap_p = 0;
1382   return fm;
1383 
1384  lose:
1385   if (!inhibit_close)
1386     close (fd);
1387   xfree (fm->content);
1388   xfree (fm);
1389   return NULL;
1390 }
1391 
1392 /* Release the resources held by FM.  Specifically, this calls
1393    munmap() or xfree() on fm->content, depending whether mmap or
1394    malloc/read were used to read in the file.  It also frees the
1395    memory needed to hold the FM structure itself.  */
1396 
1397 void
wget_read_file_free(struct file_memory * fm)1398 wget_read_file_free (struct file_memory *fm)
1399 {
1400 #ifdef HAVE_MMAP
1401   if (fm->mmap_p)
1402     {
1403       munmap (fm->content, fm->length);
1404     }
1405   else
1406 #endif
1407     {
1408       xfree (fm->content);
1409     }
1410   xfree (fm);
1411 }
1412 
1413 /* Free the pointers in a NULL-terminated vector of pointers, then
1414    free the pointer itself.  */
1415 void
free_vec(char ** vec)1416 free_vec (char **vec)
1417 {
1418   if (vec)
1419     {
1420       char **p = vec;
1421       while (*p)
1422         {
1423           xfree (*p);
1424           p++;
1425         }
1426       xfree (vec);
1427     }
1428 }
1429 
1430 /* Append vector V2 to vector V1.  The function frees V2 and
1431    reallocates V1 (thus you may not use the contents of neither
1432    pointer after the call).  If V1 is NULL, V2 is returned.  */
1433 char **
merge_vecs(char ** v1,char ** v2)1434 merge_vecs (char **v1, char **v2)
1435 {
1436   int i, j;
1437 
1438   if (!v1)
1439     return v2;
1440   if (!v2)
1441     return v1;
1442   if (!*v2)
1443     {
1444       /* To avoid j == 0 */
1445       xfree (v2);
1446       return v1;
1447     }
1448   /* Count v1.  */
1449   for (i = 0; v1[i]; i++)
1450     ;
1451   /* Count v2.  */
1452   for (j = 0; v2[j]; j++)
1453     ;
1454   /* Reallocate v1.  */
1455   v1 = xrealloc (v1, (i + j + 1) * sizeof (char *));
1456   memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1457   xfree (v2);
1458   return v1;
1459 }
1460 
1461 /* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1462    is allocated as needed.  Return the new value of the vector. */
1463 
1464 char **
vec_append(char ** vec,const char * str)1465 vec_append (char **vec, const char *str)
1466 {
1467   int cnt;                      /* count of vector elements, including
1468                                    the one we're about to append */
1469   if (vec != NULL)
1470     {
1471       for (cnt = 0; vec[cnt]; cnt++)
1472         ;
1473       ++cnt;
1474     }
1475   else
1476     cnt = 1;
1477   /* Reallocate the array to fit the new element and the NULL. */
1478   vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1479   /* Append a copy of STR to the vector. */
1480   vec[cnt - 1] = xstrdup (str);
1481   vec[cnt] = NULL;
1482   return vec;
1483 }
1484 
1485 /* Sometimes it's useful to create "sets" of strings, i.e. special
1486    hash tables where you want to store strings as keys and merely
1487    query for their existence.  Here is a set of utility routines that
1488    makes that transparent.  */
1489 
1490 void
string_set_add(struct hash_table * ht,const char * s)1491 string_set_add (struct hash_table *ht, const char *s)
1492 {
1493   /* First check whether the set element already exists.  If it does,
1494      do nothing so that we don't have to free() the old element and
1495      then strdup() a new one.  */
1496   if (hash_table_contains (ht, s))
1497     return;
1498 
1499   /* We use "1" as value.  It provides us a useful and clear arbitrary
1500      value, and it consumes no memory -- the pointers to the same
1501      string "1" will be shared by all the key-value pairs in all `set'
1502      hash tables.  */
1503   hash_table_put (ht, xstrdup (s), "1");
1504 }
1505 
1506 /* Synonym for hash_table_contains... */
1507 
1508 int
string_set_contains(struct hash_table * ht,const char * s)1509 string_set_contains (struct hash_table *ht, const char *s)
1510 {
1511   return hash_table_contains (ht, s);
1512 }
1513 
1514 /* Convert the specified string set to array.  ARRAY should be large
1515    enough to hold hash_table_count(ht) char pointers.  */
1516 
string_set_to_array(struct hash_table * ht,char ** array)1517 void string_set_to_array (struct hash_table *ht, char **array)
1518 {
1519   hash_table_iterator iter;
1520   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1521     *array++ = iter.key;
1522 }
1523 
1524 /* Free the string set.  This frees both the storage allocated for
1525    keys and the actual hash table.  (hash_table_destroy would only
1526    destroy the hash table.)  */
1527 
1528 void
string_set_free(struct hash_table * ht)1529 string_set_free (struct hash_table *ht)
1530 {
1531   hash_table_iterator iter;
1532   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1533     xfree (iter.key);
1534   hash_table_destroy (ht);
1535 }
1536 
1537 /* Utility function: simply call xfree() on all keys and values of HT.  */
1538 
1539 void
free_keys_and_values(struct hash_table * ht)1540 free_keys_and_values (struct hash_table *ht)
1541 {
1542   hash_table_iterator iter;
1543   for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1544     {
1545       xfree (iter.key);
1546       xfree (iter.value);
1547     }
1548 }
1549 
1550 /* Get digit grouping data for thousand separors by calling
1551    localeconv().  The data includes separator string and grouping info
1552    and is cached after the first call to the function.
1553 
1554    In locales that don't set a thousand separator (such as the "C"
1555    locale), this forces it to be ",".  We are now only showing
1556    thousand separators in one place, so this shouldn't be a problem in
1557    practice.  */
1558 
1559 static void
get_grouping_data(const char ** sep,const char ** grouping)1560 get_grouping_data (const char **sep, const char **grouping)
1561 {
1562   static const char *cached_sep;
1563   static const char *cached_grouping;
1564   static bool initialized;
1565   if (!initialized)
1566     {
1567       /* Get the grouping info from the locale. */
1568       struct lconv *lconv = localeconv ();
1569       cached_sep = lconv->thousands_sep;
1570       cached_grouping = lconv->grouping;
1571 #if ! USE_NLS_PROGRESS_BAR
1572       /* We can't count column widths, so ensure that the separator
1573        * is single-byte only (let check below determine what byte). */
1574       if (strlen(cached_sep) > 1)
1575         cached_sep = "";
1576 #endif
1577       if (!*cached_sep)
1578         {
1579           /* Many locales (such as "C" or "hr_HR") don't specify
1580              grouping, which we still want to use it for legibility.
1581              In those locales set the sep char to ',', unless that
1582              character is used for decimal point, in which case set it
1583              to ".".  */
1584           if (*lconv->decimal_point != ',')
1585             cached_sep = ",";
1586           else
1587             cached_sep = ".";
1588           cached_grouping = "\x03";
1589         }
1590       initialized = true;
1591     }
1592   *sep = cached_sep;
1593   *grouping = cached_grouping;
1594 }
1595 
1596 /* Return a printed representation of N with thousand separators.
1597    This should respect locale settings, with the exception of the "C"
1598    locale which mandates no separator, but we use one anyway.
1599 
1600    Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1601    the separators because it's too non-portable, and it's hard to test
1602    for this feature at configure time.  Besides, it wouldn't display
1603    separators in the "C" locale, still used by many Unix users.  */
1604 
1605 const char *
with_thousand_seps(wgint n)1606 with_thousand_seps (wgint n)
1607 {
1608   static char outbuf[48];
1609   char *p = outbuf + sizeof outbuf;
1610 
1611   /* Info received from locale */
1612   const char *grouping, *sep;
1613   int seplen;
1614 
1615   /* State information */
1616   int i = 0, groupsize;
1617   const char *atgroup;
1618 
1619   bool negative = n < 0;
1620 
1621   /* Initialize grouping data. */
1622   get_grouping_data (&sep, &grouping);
1623   seplen = strlen (sep);
1624   atgroup = grouping;
1625   groupsize = *atgroup++;
1626 
1627   /* This would overflow on WGINT_MIN, but printing negative numbers
1628      is not an important goal of this fuinction.  */
1629   if (negative)
1630     n = -n;
1631 
1632   /* Write the number into the buffer, backwards, inserting the
1633      separators as necessary.  */
1634   *--p = '\0';
1635   while (1)
1636     {
1637       *--p = n % 10 + '0';
1638       n /= 10;
1639       if (n == 0)
1640         break;
1641       /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1642       if (++i == groupsize)
1643         {
1644           if (seplen == 1)
1645             *--p = *sep;
1646           else
1647             memcpy (p -= seplen, sep, seplen);
1648           i = 0;
1649           if (*atgroup)
1650             groupsize = *atgroup++;
1651         }
1652     }
1653   if (negative)
1654     *--p = '-';
1655 
1656   return p;
1657 }
1658 
1659 /* N, a byte quantity, is converted to a human-readable abberviated
1660    form a la sizes printed by `ls -lh'.  The result is written to a
1661    static buffer, a pointer to which is returned.
1662 
1663    Unlike `with_thousand_seps', this approximates to the nearest unit.
1664    Quoting GNU libit: "Most people visually process strings of 3-4
1665    digits effectively, but longer strings of digits are more prone to
1666    misinterpretation.  Hence, converting to an abbreviated form
1667    usually improves readability."
1668 
1669    This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1670    original computer-related meaning of "powers of 1024".  We don't
1671    use the "*bibyte" names invented in 1998, and seldom used in
1672    practice.  Wikipedia's entry on "binary prefix" discusses this in
1673    some detail.  */
1674 
1675 char *
human_readable(wgint n,const int acc,const int decimals)1676 human_readable (wgint n, const int acc, const int decimals)
1677 {
1678   /* These suffixes are compatible with those of GNU `ls -lh'. */
1679   static char powers[] =
1680     {
1681       'K',                      /* kilobyte, 2^10 bytes */
1682       'M',                      /* megabyte, 2^20 bytes */
1683       'G',                      /* gigabyte, 2^30 bytes */
1684       'T',                      /* terabyte, 2^40 bytes */
1685       'P',                      /* petabyte, 2^50 bytes */
1686       'E',                      /* exabyte,  2^60 bytes */
1687     };
1688   static char buf[8];
1689   size_t i;
1690 
1691   /* If the quantity is smaller than 1K, just print it. */
1692   if (n < 1024)
1693     {
1694       snprintf (buf, sizeof (buf), "%d", (int) n);
1695       return buf;
1696     }
1697 
1698   /* Loop over powers, dividing N with 1024 in each iteration.  This
1699      works unchanged for all sizes of wgint, while still avoiding
1700      non-portable `long double' arithmetic.  */
1701   for (i = 0; i < countof (powers); i++)
1702     {
1703       /* At each iteration N is greater than the *subsequent* power.
1704          That way N/1024.0 produces a decimal number in the units of
1705          *this* power.  */
1706       if ((n / 1024) < 1024 || i == countof (powers) - 1)
1707         {
1708           double val = n / 1024.0;
1709           /* Print values smaller than the accuracy level (acc) with (decimal)
1710            * decimal digits, and others without any decimals.  */
1711           snprintf (buf, sizeof (buf), "%.*f%c",
1712                     val < acc ? decimals : 0, val, powers[i]);
1713           return buf;
1714         }
1715       n /= 1024;
1716     }
1717   return NULL;                  /* unreached */
1718 }
1719 
1720 /* Count the digits in the provided number.  Used to allocate space
1721    when printing numbers.  */
1722 
1723 int
numdigit(wgint number)1724 numdigit (wgint number)
1725 {
1726   int cnt = 1;
1727   if (number < 0)
1728     ++cnt;                      /* accommodate '-' */
1729   while ((number /= 10) != 0)
1730     ++cnt;
1731   return cnt;
1732 }
1733 
1734 #define PR(mask) *p++ = n / (mask) + '0'
1735 
1736 /* DIGITS_<D> is used to print a D-digit number and should be called
1737    with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1738    n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1739    Recursively this continues until DIGITS_1 is invoked.  */
1740 
1741 #define DIGITS_1(mask) PR (mask)
1742 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1743 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1744 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1745 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1746 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1747 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1748 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1749 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1750 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1751 
1752 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1753 
1754 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1755 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1756 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1757 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1758 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1759 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1760 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1761 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1762 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1763 
1764 /* Shorthand for casting to wgint. */
1765 #define W wgint
1766 
1767 /* Print NUMBER to BUFFER in base 10.  This is equivalent to
1768    `sprintf(buffer, "%lld", (long long) number)', only typically much
1769    faster and portable to machines without long long.
1770 
1771    The speedup may make a difference in programs that frequently
1772    convert numbers to strings.  Some implementations of sprintf,
1773    particularly the one in some versions of GNU libc, have been known
1774    to be quite slow when converting integers to strings.
1775 
1776    Return the pointer to the location where the terminating zero was
1777    printed.  (Equivalent to calling buffer+strlen(buffer) after the
1778    function is done.)
1779 
1780    BUFFER should be large enough to accept as many bytes as you expect
1781    the number to take up.  On machines with 64-bit wgints the maximum
1782    needed size is 24 bytes.  That includes the digits needed for the
1783    largest 64-bit number, the `-' sign in case it's negative, and the
1784    terminating '\0'.  */
1785 
1786 char *
number_to_string(char * buffer,wgint number)1787 number_to_string (char *buffer, wgint number)
1788 {
1789   char *p = buffer;
1790   wgint n = number;
1791 
1792   int last_digit_char = 0;
1793 
1794   if (n < 0)
1795     {
1796       if (n < -WGINT_MAX)
1797         {
1798           /* n = -n would overflow because -n would evaluate to a
1799              wgint value larger than WGINT_MAX.  Need to make n
1800              smaller and handle the last digit separately.  */
1801           int last_digit = n % 10;
1802           /* The sign of n%10 is implementation-defined. */
1803           if (last_digit < 0)
1804             last_digit_char = '0' - last_digit;
1805           else
1806             last_digit_char = '0' + last_digit;
1807           /* After n is made smaller, -n will not overflow. */
1808           n /= 10;
1809         }
1810 
1811       *p++ = '-';
1812       n = -n;
1813     }
1814 
1815   /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1816      way printing any N is fully open-coded without a loop or jump.
1817      (Also see description of DIGITS_*.)  */
1818 
1819   if      (n < 10)                       DIGITS_1 (1);
1820   else if (n < 100)                      DIGITS_2 (10);
1821   else if (n < 1000)                     DIGITS_3 (100);
1822   else if (n < 10000)                    DIGITS_4 (1000);
1823   else if (n < 100000)                   DIGITS_5 (10000);
1824   else if (n < 1000000)                  DIGITS_6 (100000);
1825   else if (n < 10000000)                 DIGITS_7 (1000000);
1826   else if (n < 100000000)                DIGITS_8 (10000000);
1827   else if (n < 1000000000)               DIGITS_9 (100000000);
1828   else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1829   else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1830   else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1831   else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1832   else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1833   else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1834   else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1835   else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1836   else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1837   else                                   DIGITS_19 (1000000000*(W)1000000000);
1838 
1839   if (last_digit_char)
1840     *p++ = last_digit_char;
1841 
1842   *p = '\0';
1843 
1844   return p;
1845 }
1846 
1847 #undef PR
1848 #undef W
1849 #undef SPRINTF_WGINT
1850 #undef DIGITS_1
1851 #undef DIGITS_2
1852 #undef DIGITS_3
1853 #undef DIGITS_4
1854 #undef DIGITS_5
1855 #undef DIGITS_6
1856 #undef DIGITS_7
1857 #undef DIGITS_8
1858 #undef DIGITS_9
1859 #undef DIGITS_10
1860 #undef DIGITS_11
1861 #undef DIGITS_12
1862 #undef DIGITS_13
1863 #undef DIGITS_14
1864 #undef DIGITS_15
1865 #undef DIGITS_16
1866 #undef DIGITS_17
1867 #undef DIGITS_18
1868 #undef DIGITS_19
1869 
1870 #define RING_SIZE 3
1871 
1872 /* Print NUMBER to a statically allocated string and return a pointer
1873    to the printed representation.
1874 
1875    This function is intended to be used in conjunction with printf.
1876    It is hard to portably print wgint values:
1877     a) you cannot use printf("%ld", number) because wgint can be long
1878        long on 32-bit machines with LFS.
1879     b) you cannot use printf("%lld", number) because NUMBER could be
1880        long on 32-bit machines without LFS, or on 64-bit machines,
1881        which do not require LFS.  Also, Windows doesn't support %lld.
1882     c) you cannot use printf("%j", (int_max_t) number) because not all
1883        versions of printf support "%j", the most notable being the one
1884        on Windows.
1885     d) you cannot #define WGINT_FMT to the appropriate format and use
1886        printf(WGINT_FMT, number) because that would break translations
1887        for user-visible messages, such as printf("Downloaded: %d
1888        bytes\n", number).
1889 
1890    What you should use instead is printf("%s", number_to_static_string
1891    (number)).
1892 
1893    CAVEAT: since the function returns pointers to static data, you
1894    must be careful to copy its result before calling it again.
1895    However, to make it more useful with printf, the function maintains
1896    an internal ring of static buffers to return.  That way things like
1897    printf("%s %s", number_to_static_string (num1),
1898    number_to_static_string (num2)) work as expected.  Three buffers
1899    are currently used, which means that "%s %s %s" will work, but "%s
1900    %s %s %s" won't.  If you need to print more than three wgints,
1901    bump the RING_SIZE (or rethink your message.)  */
1902 
1903 char *
number_to_static_string(wgint number)1904 number_to_static_string (wgint number)
1905 {
1906   static char ring[RING_SIZE][24];
1907   static int ringpos;
1908   char *buf = ring[ringpos];
1909   number_to_string (buf, number);
1910   ringpos = (ringpos + 1) % RING_SIZE;
1911   return buf;
1912 }
1913 
1914 /* Converts the byte to bits format if --report-bps option is enabled
1915  */
1916 wgint
convert_to_bits(wgint num)1917 convert_to_bits (wgint num)
1918 {
1919   if (opt.report_bps)
1920     return num * 8;
1921   return num;
1922 }
1923 
1924 
1925 /* Determine the width of the terminal we're running on.  If that's
1926    not possible, return 0.  */
1927 
1928 int
determine_screen_width(void)1929 determine_screen_width (void)
1930 {
1931   /* If there's a way to get the terminal size using POSIX
1932      tcgetattr(), somebody please tell me.  */
1933 #ifdef TIOCGWINSZ
1934   int fd;
1935   struct winsize wsz;
1936 
1937   if (opt.lfilename != NULL && opt.show_progress != 1)
1938     return 0;
1939 
1940   fd = fileno (stderr);
1941   if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1942     return 0;                   /* most likely ENOTTY */
1943 
1944   return wsz.ws_col;
1945 #elif defined(WINDOWS)
1946   CONSOLE_SCREEN_BUFFER_INFO csbi;
1947   if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1948     return 0;
1949   return csbi.dwSize.X;
1950 #else  /* neither TIOCGWINSZ nor WINDOWS */
1951   return 0;
1952 #endif /* neither TIOCGWINSZ nor WINDOWS */
1953 }
1954 
1955 /* Whether the rnd system (either rand or [dl]rand48) has been
1956    seeded.  */
1957 static int rnd_seeded;
1958 
1959 /* Return a random number between 0 and MAX-1, inclusive.
1960 
1961    If the system does not support lrand48 and MAX is greater than the
1962    value of RAND_MAX+1 on the system, the returned value will be in
1963    the range [0, RAND_MAX].  This may be fixed in a future release.
1964    The random number generator is seeded automatically the first time
1965    it is called.
1966 
1967    This uses lrand48 where available, rand elsewhere.  DO NOT use it
1968    for cryptography.  It is only meant to be used in situations where
1969    quality of the random numbers returned doesn't really matter.  */
1970 
1971 int
random_number(int max)1972 random_number (int max)
1973 {
1974 #ifdef HAVE_RANDOM
1975   if (!rnd_seeded)
1976     {
1977       srandom ((long) time (NULL) ^ (long) getpid ());
1978       rnd_seeded = 1;
1979     }
1980   return random () % max;
1981 #elif defined HAVE_DRAND48
1982   if (!rnd_seeded)
1983     {
1984       srand48 ((long) time (NULL) ^ (long) getpid ());
1985       rnd_seeded = 1;
1986     }
1987   return lrand48 () % max;
1988 #else  /* not HAVE_DRAND48 */
1989 
1990   double bounded;
1991   int rnd;
1992   if (!rnd_seeded)
1993     {
1994       srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1995       rnd_seeded = 1;
1996     }
1997   rnd = rand ();
1998 
1999   /* Like rand() % max, but uses the high-order bits for better
2000      randomness on architectures where rand() is implemented using a
2001      simple congruential generator.  */
2002 
2003   bounded = (double) max * rnd / (RAND_MAX + 1.0);
2004   return (int) bounded;
2005 
2006 #endif /* not HAVE_DRAND48 */
2007 }
2008 
2009 /* Return a random uniformly distributed floating point number in the
2010    [0, 1) range.  Uses drand48 where available, and a really lame
2011    kludge elsewhere.  */
2012 
2013 double
random_float(void)2014 random_float (void)
2015 {
2016 #ifdef HAVE_RANDOM
2017     return ((double) random_number (RAND_MAX)) / RAND_MAX;
2018 #elif defined HAVE_DRAND48
2019   if (!rnd_seeded)
2020     {
2021       srand48 ((long) time (NULL) ^ (long) getpid ());
2022       rnd_seeded = 1;
2023     }
2024   return drand48 ();
2025 #else  /* not HAVE_DRAND48 */
2026   return (  random_number (10000) / 10000.0
2027           + random_number (10000) / (10000.0 * 10000.0)
2028           + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
2029           + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
2030 #endif /* not HAVE_DRAND48 */
2031 }
2032 
2033 /* Implementation of run_with_timeout, a generic timeout-forcing
2034    routine for systems with Unix-like signal handling.  */
2035 
2036 #ifdef USE_SIGNAL_TIMEOUT
2037 # ifdef HAVE_SIGSETJMP
2038 #  define SETJMP(env) sigsetjmp (env, 1)
2039 
2040 static sigjmp_buf run_with_timeout_env;
2041 
2042 _Noreturn static void
abort_run_with_timeout(int sig _GL_UNUSED)2043 abort_run_with_timeout (int sig _GL_UNUSED)
2044 {
2045   assert (sig == SIGALRM);
2046   siglongjmp (run_with_timeout_env, -1);
2047 }
2048 # else /* not HAVE_SIGSETJMP */
2049 #  define SETJMP(env) setjmp (env)
2050 
2051 static jmp_buf run_with_timeout_env;
2052 
2053 static void _Noreturn
abort_run_with_timeout(int sig _GL_UNUSED)2054 abort_run_with_timeout (int sig _GL_UNUSED)
2055 {
2056   assert (sig == SIGALRM);
2057   /* We don't have siglongjmp to preserve the set of blocked signals;
2058      if we longjumped out of the handler at this point, SIGALRM would
2059      remain blocked.  We must unblock it manually. */
2060   sigset_t set;
2061   sigemptyset (&set);
2062   sigaddset (&set, SIGALRM);
2063   sigprocmask (SIG_BLOCK, &set, NULL);
2064 
2065   /* Now it's safe to longjump. */
2066   longjmp (run_with_timeout_env, -1);
2067 }
2068 # endif /* not HAVE_SIGSETJMP */
2069 
2070 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
2071    setitimer where available, alarm otherwise.
2072 
2073    TIMEOUT should be non-zero.  If the timeout value is so small that
2074    it would be rounded to zero, it is rounded to the least legal value
2075    instead (1us for setitimer, 1s for alarm).  That ensures that
2076    SIGALRM will be delivered in all cases.  */
2077 
2078 static void
alarm_set(double timeout)2079 alarm_set (double timeout)
2080 {
2081 #ifdef ITIMER_REAL
2082   /* Use the modern itimer interface. */
2083   struct itimerval itv;
2084   xzero (itv);
2085   itv.it_value.tv_sec = (long) timeout;
2086   itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
2087   if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
2088     /* Ensure that we wait for at least the minimum interval.
2089        Specifying zero would mean "wait forever".  */
2090     itv.it_value.tv_usec = 1;
2091   setitimer (ITIMER_REAL, &itv, NULL);
2092 #else  /* not ITIMER_REAL */
2093   /* Use the old alarm() interface. */
2094   int secs = (int) timeout;
2095   if (secs == 0)
2096     /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
2097        because alarm(0) means "never deliver the alarm", i.e. "wait
2098        forever", which is not what someone who specifies a 0.5s
2099        timeout would expect.  */
2100     secs = 1;
2101   alarm (secs);
2102 #endif /* not ITIMER_REAL */
2103 }
2104 
2105 /* Cancel the alarm set with alarm_set. */
2106 
2107 static void
alarm_cancel(void)2108 alarm_cancel (void)
2109 {
2110 #ifdef ITIMER_REAL
2111   struct itimerval disable;
2112   xzero (disable);
2113   setitimer (ITIMER_REAL, &disable, NULL);
2114 #else  /* not ITIMER_REAL */
2115   alarm (0);
2116 #endif /* not ITIMER_REAL */
2117 }
2118 
2119 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2120    seconds.  Returns true if the function was interrupted with a
2121    timeout, false otherwise.
2122 
2123    This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2124    using setitimer() or alarm().  The timeout is enforced by
2125    longjumping out of the SIGALRM handler.  This has several
2126    advantages compared to the traditional approach of relying on
2127    signals causing system calls to exit with EINTR:
2128 
2129      * The callback function is *forcibly* interrupted after the
2130        timeout expires, (almost) regardless of what it was doing and
2131        whether it was in a syscall.  For example, a calculation that
2132        takes a long time is interrupted as reliably as an IO
2133        operation.
2134 
2135      * It works with both SYSV and BSD signals because it doesn't
2136        depend on the default setting of SA_RESTART.
2137 
2138      * It doesn't require special handler setup beyond a simple call
2139        to signal().  (It does use sigsetjmp/siglongjmp, but they're
2140        optional.)
2141 
2142    The only downside is that, if FUN allocates internal resources that
2143    are normally freed prior to exit from the functions, they will be
2144    lost in case of timeout.  */
2145 
2146 bool
run_with_timeout(double timeout,void (* fun)(void *),void * arg)2147 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2148 {
2149   int saved_errno;
2150 
2151   if (timeout == 0)
2152     {
2153       fun (arg);
2154       return false;
2155     }
2156 
2157   if (SETJMP (run_with_timeout_env) != 0)
2158     {
2159       /* Longjumped out of FUN with a timeout. */
2160       signal (SIGALRM, SIG_DFL);
2161       return true;
2162     }
2163   else
2164     {
2165       signal (SIGALRM, abort_run_with_timeout);
2166     }
2167   alarm_set (timeout);
2168   fun (arg);
2169 
2170   /* Preserve errno in case alarm() or signal() modifies it. */
2171   saved_errno = errno;
2172   alarm_cancel ();
2173   signal (SIGALRM, SIG_DFL);
2174   errno = saved_errno;
2175 
2176   return false;
2177 }
2178 
2179 #else  /* not USE_SIGNAL_TIMEOUT */
2180 
2181 #ifndef WINDOWS
2182 /* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2183    define it under Windows, because Windows has its own version of
2184    run_with_timeout that uses threads.  */
2185 
2186 bool
run_with_timeout(double timeout,void (* fun)(void *),void * arg)2187 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2188 {
2189   fun (arg);
2190   return false;
2191 }
2192 #endif /* not WINDOWS */
2193 #endif /* not USE_SIGNAL_TIMEOUT */
2194 
2195 #ifndef WINDOWS
2196 
2197 /* Sleep the specified amount of seconds.  On machines without
2198    nanosleep(), this may sleep shorter if interrupted by signals.  */
2199 
2200 #if defined FUZZING && defined TESTING
2201 void
xsleep(double seconds)2202 xsleep (double seconds)
2203 {
2204   // Don't wait when fuzzing
2205 }
2206 #else
2207 void
xsleep(double seconds)2208 xsleep (double seconds)
2209 {
2210 #ifdef HAVE_NANOSLEEP
2211   /* nanosleep is the preferred interface because it offers high
2212      accuracy and, more importantly, because it allows us to reliably
2213      restart receiving a signal such as SIGWINCH.  (There was an
2214      actual Debian bug report about --limit-rate malfunctioning while
2215      the terminal was being resized.)  */
2216   struct timespec sleep, remaining;
2217   sleep.tv_sec = (long) seconds;
2218   sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2219   while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2220     /* If nanosleep has been interrupted by a signal, adjust the
2221        sleeping period and return to sleep.  */
2222     sleep = remaining;
2223 #elif defined(HAVE_USLEEP)
2224   /* If usleep is available, use it in preference to select.  */
2225   if (seconds >= 1)
2226     {
2227       /* On some systems, usleep cannot handle values larger than
2228          1,000,000.  If the period is larger than that, use sleep
2229          first, then add usleep for subsecond accuracy.  */
2230       sleep (seconds);
2231       seconds -= (long) seconds;
2232     }
2233   usleep (seconds * 1000000);
2234 #else /* fall back select */
2235   /* Note that, although Windows supports select, it can't be used to
2236      implement sleeping because Winsock's select doesn't implement
2237      timeout when it is passed NULL pointers for all fd sets.  (But it
2238      does under Cygwin, which implements Unix-compatible select.)  */
2239   struct timeval sleep;
2240   sleep.tv_sec = (long) seconds;
2241   sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2242   select (0, NULL, NULL, NULL, &sleep);
2243   /* If select returns -1 and errno is EINTR, it means we were
2244      interrupted by a signal.  But without knowing how long we've
2245      actually slept, we can't return to sleep.  Using gettimeofday to
2246      track sleeps is slow and unreliable due to clock skew.  */
2247 #endif
2248 }
2249 #endif
2250 
2251 #endif /* not WINDOWS */
2252 
2253 /* Encode the octets in DATA of length LENGTH to base64 format,
2254    storing the result to DEST.  The output will be zero-terminated,
2255    and must point to a writable buffer of at least
2256    1+BASE64_LENGTH(length) bytes.  The function returns the length of
2257    the resulting base64 data, not counting the terminating zero.
2258 
2259    This implementation does not emit newlines after 76 characters of
2260    base64 data.  */
2261 
2262 size_t
wget_base64_encode(const void * data,size_t length,char * dest)2263 wget_base64_encode (const void *data, size_t length, char *dest)
2264 {
2265   /* Conversion table.  */
2266   static const char tbl[64] = {
2267     'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2268     'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2269     'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2270     'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2271   };
2272   /* Access bytes in DATA as unsigned char, otherwise the shifts below
2273      don't work for data with MSB set. */
2274   const unsigned char *s = data;
2275   /* Theoretical ANSI violation when length < 3. */
2276   const unsigned char *end = (const unsigned char *) data + length - 2;
2277   char *p = dest;
2278 
2279   /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2280   for (; s < end; s += 3)
2281     {
2282       *p++ = tbl[s[0] >> 2];
2283       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2284       *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2285       *p++ = tbl[s[2] & 0x3f];
2286     }
2287 
2288   /* Pad the result if necessary...  */
2289   switch (length % 3)
2290     {
2291     case 1:
2292       *p++ = tbl[s[0] >> 2];
2293       *p++ = tbl[(s[0] & 3) << 4];
2294       *p++ = '=';
2295       *p++ = '=';
2296       break;
2297     case 2:
2298       *p++ = tbl[s[0] >> 2];
2299       *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2300       *p++ = tbl[((s[1] & 0xf) << 2)];
2301       *p++ = '=';
2302       break;
2303     }
2304   /* ...and zero-terminate it.  */
2305   *p = '\0';
2306 
2307   return p - dest;
2308 }
2309 
2310 /* Store in C the next non-whitespace character from the string, or \0
2311    when end of string is reached.  */
2312 #define NEXT_CHAR(c, p) do {                    \
2313   c = (unsigned char) *p++;                     \
2314 } while (c_isspace (c))
2315 
2316 #define IS_ASCII(c) (((c) & 0x80) == 0)
2317 
2318 /* Decode data from BASE64 (a null-terminated string) into memory
2319    pointed to by DEST.  DEST is assumed to be large enough to
2320    accommodate the decoded data, which is guaranteed to be no more than
2321    3/4*strlen(base64).
2322 
2323    Since DEST is assumed to contain binary data, it is not
2324    NUL-terminated.  The function returns the length of the data
2325    written to "TO".  -1 is returned in case of error caused by malformed
2326    base64 input.
2327 
2328    This function originates from Free Recode.  */
2329 
2330 ssize_t
wget_base64_decode(const char * base64,void * dest,size_t size)2331 wget_base64_decode (const char *base64, void *dest, size_t size)
2332 {
2333   /* Table of base64 values for first 128 characters.  Note that this
2334      assumes ASCII (but so does Wget in other places).  */
2335   static const signed char base64_char_to_value[128] =
2336     {
2337       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2338       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2339       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2340       -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2341       -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2342       54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2343       -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2344       5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2345       15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2346       25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2347       29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2348       39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2349       49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2350     };
2351 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2352 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2353 
2354   const char *p = base64;
2355   unsigned char *q = dest;
2356   ssize_t n = 0;
2357 
2358   while (1)
2359     {
2360       unsigned char c;
2361       unsigned long value;
2362 
2363       /* Process first byte of a quadruplet.  */
2364       NEXT_CHAR (c, p);
2365       if (!c)
2366         break;
2367       if (c == '=' || !IS_BASE64 (c))
2368         return -1;              /* illegal char while decoding base64 */
2369       value = BASE64_CHAR_TO_VALUE (c) << 18;
2370 
2371       /* Process second byte of a quadruplet.  */
2372       NEXT_CHAR (c, p);
2373       if (!c)
2374         return -1;              /* premature EOF while decoding base64 */
2375       if (c == '=' || !IS_BASE64 (c))
2376         return -1;              /* illegal char while decoding base64 */
2377       value |= BASE64_CHAR_TO_VALUE (c) << 12;
2378       if (size)
2379         {
2380           *q++ = value >> 16;
2381           size--;
2382         }
2383       n++;
2384 
2385       /* Process third byte of a quadruplet.  */
2386       NEXT_CHAR (c, p);
2387       if (!c)
2388         return -1;              /* premature EOF while decoding base64 */
2389       if (!IS_BASE64 (c))
2390         return -1;              /* illegal char while decoding base64 */
2391 
2392       if (c == '=')
2393         {
2394           NEXT_CHAR (c, p);
2395           if (!c)
2396             return -1;          /* premature EOF while decoding base64 */
2397           if (c != '=')
2398             return -1;          /* padding `=' expected but not found */
2399           continue;
2400         }
2401 
2402       value |= BASE64_CHAR_TO_VALUE (c) << 6;
2403       if (size)
2404         {
2405           *q++ = 0xff & value >> 8;
2406           size--;
2407         }
2408       n++;
2409 
2410       /* Process fourth byte of a quadruplet.  */
2411       NEXT_CHAR (c, p);
2412       if (!c)
2413         return -1;              /* premature EOF while decoding base64 */
2414       if (c == '=')
2415         continue;
2416       if (!IS_BASE64 (c))
2417         return -1;              /* illegal char while decoding base64 */
2418 
2419       value |= BASE64_CHAR_TO_VALUE (c);
2420       if (size)
2421         {
2422           *q++ = 0xff & value;
2423           size--;
2424         }
2425       n++;
2426     }
2427 #undef IS_BASE64
2428 #undef BASE64_CHAR_TO_VALUE
2429 
2430   return n;
2431 }
2432 
2433 #ifdef HAVE_LIBPCRE2
2434 /* Compiles the PCRE2 regex. */
2435 void *
compile_pcre2_regex(const char * str)2436 compile_pcre2_regex (const char *str)
2437 {
2438   int errornumber;
2439   PCRE2_SIZE erroroffset;
2440   pcre2_code *regex = pcre2_compile((PCRE2_SPTR) str, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
2441   if (! regex)
2442     {
2443       fprintf (stderr, _("Invalid regular expression %s, PCRE2 error %d\n"),
2444                quote (str), errornumber);
2445     }
2446   return regex;
2447 }
2448 #endif
2449 
2450 #ifdef HAVE_LIBPCRE
2451 /* Compiles the PCRE regex. */
2452 void *
compile_pcre_regex(const char * str)2453 compile_pcre_regex (const char *str)
2454 {
2455   const char *errbuf;
2456   int erroffset;
2457   pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
2458   if (! regex)
2459     {
2460       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2461                quote (str), errbuf);
2462     }
2463   return regex;
2464 }
2465 #endif
2466 
2467 /* Compiles the POSIX regex. */
2468 void *
compile_posix_regex(const char * str)2469 compile_posix_regex (const char *str)
2470 {
2471   regex_t *regex = xmalloc (sizeof (regex_t));
2472 #ifdef TESTING
2473   /* regcomp might be *very* cpu+memory intensive,
2474    *  see https://sourceware.org/glibc/wiki/Security%20Exceptions */
2475   str = "a";
2476 #endif
2477   int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
2478   if (errcode != 0)
2479     {
2480       size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
2481       char *errbuf = xmalloc (errbuf_size);
2482       regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
2483       fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2484                quote (str), errbuf);
2485       xfree (errbuf);
2486       xfree (regex);
2487       return NULL;
2488     }
2489 
2490   return regex;
2491 }
2492 
2493 #ifdef HAVE_LIBPCRE2
2494 /* Matches a PCRE2 regex.  */
2495 bool
match_pcre2_regex(const void * regex,const char * str)2496 match_pcre2_regex (const void *regex, const char *str)
2497 {
2498   int rc;
2499   pcre2_match_data *match_data;
2500 
2501   match_data = pcre2_match_data_create_from_pattern(regex, NULL);
2502 
2503   if (match_data)
2504     {
2505       rc = pcre2_match(regex, (PCRE2_SPTR) str, strlen(str), 0, 0, match_data, NULL);
2506       pcre2_match_data_free(match_data);
2507     }
2508   else
2509 	  rc = PCRE2_ERROR_NOMEMORY;
2510 
2511   if (rc < 0 && rc != PCRE2_ERROR_NOMATCH)
2512     {
2513       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2514                  quote (str), rc);
2515     }
2516 
2517   return rc >= 0;
2518 }
2519 #endif
2520 
2521 #ifdef HAVE_LIBPCRE
2522 #define OVECCOUNT 30
2523 /* Matches a PCRE regex.  */
2524 bool
match_pcre_regex(const void * regex,const char * str)2525 match_pcre_regex (const void *regex, const char *str)
2526 {
2527   size_t l = strlen (str);
2528   int ovector[OVECCOUNT];
2529 
2530   int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT);
2531   if (rc == PCRE_ERROR_NOMATCH)
2532     return false;
2533   else if (rc < 0)
2534     {
2535       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2536                  quote (str), rc);
2537       return false;
2538     }
2539   else
2540     return true;
2541 }
2542 #undef OVECCOUNT
2543 #endif
2544 
2545 /* Matches a POSIX regex.  */
2546 bool
match_posix_regex(const void * regex,const char * str)2547 match_posix_regex (const void *regex, const char *str)
2548 {
2549   int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
2550   if (rc == REG_NOMATCH)
2551     return false;
2552   else if (rc == 0)
2553     return true;
2554   else
2555     {
2556       size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
2557       char *errbuf = xmalloc (errbuf_size);
2558       regerror (rc, opt.acceptregex, errbuf, errbuf_size);
2559       logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2560                  quote (str), rc);
2561       xfree (errbuf);
2562       return false;
2563     }
2564 }
2565 
2566 #undef IS_ASCII
2567 #undef NEXT_CHAR
2568 
2569 /* Simple merge sort for use by stable_sort.  Implementation courtesy
2570    Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2571 
2572 static void
mergesort_internal(void * base,void * temp,size_t size,size_t from,size_t to,int (* cmpfun)(const void *,const void *))2573 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2574                     int (*cmpfun) (const void *, const void *))
2575 {
2576 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2577   if (from < to)
2578     {
2579       size_t i, j, k;
2580       size_t mid = (to + from) / 2;
2581       mergesort_internal (base, temp, size, from, mid, cmpfun);
2582       mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2583       i = from;
2584       j = mid + 1;
2585       for (k = from; (i <= mid) && (j <= to); k++)
2586         if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2587           memcpy (ELT (temp, k), ELT (base, i++), size);
2588         else
2589           memcpy (ELT (temp, k), ELT (base, j++), size);
2590       while (i <= mid)
2591         memcpy (ELT (temp, k++), ELT (base, i++), size);
2592       while (j <= to)
2593         memcpy (ELT (temp, k++), ELT (base, j++), size);
2594       for (k = from; k <= to; k++)
2595         memcpy (ELT (base, k), ELT (temp, k), size);
2596     }
2597 #undef ELT
2598 }
2599 
2600 /* Stable sort with interface exactly like standard library's qsort.
2601    Uses mergesort internally. */
2602 
2603 void
stable_sort(void * base,size_t nmemb,size_t size,int (* cmpfun)(const void *,const void *))2604 stable_sort (void *base, size_t nmemb, size_t size,
2605              int (*cmpfun) (const void *, const void *))
2606 {
2607   if (nmemb > 1 && size > 1)
2608     {
2609       void *temp = xmalloc (nmemb * size);
2610       mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2611       xfree(temp);
2612     }
2613 }
2614 
2615 /* Print a decimal number.  If it is equal to or larger than ten, the
2616    number is rounded.  Otherwise it is printed with one significant
2617    digit without trailing zeros and with no more than three fractional
2618    digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2619    printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2620 
2621    This is useful for displaying durations because it provides
2622    order-of-magnitude information without unnecessary clutter --
2623    long-running downloads are shown without the fractional part, and
2624    short ones still retain one significant digit.  */
2625 
2626 const char *
print_decimal(double number)2627 print_decimal (double number)
2628 {
2629   static char buf[32];
2630   double n = number >= 0 ? number : -number;
2631 
2632   if (n >= 9.95)
2633     /* Cut off at 9.95 because the below %.1f would round 9.96 to
2634        "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2635     snprintf (buf, sizeof buf, "%.0f", number);
2636   else if (n >= 0.95)
2637     snprintf (buf, sizeof buf, "%.1f", number);
2638   else if (n >= 0.001)
2639     snprintf (buf, sizeof buf, "%.1g", number);
2640   else if (n >= 0.0005)
2641     /* round [0.0005, 0.001) to 0.001 */
2642     snprintf (buf, sizeof buf, "%.3f", number);
2643   else
2644     /* print numbers close to 0 as 0, not 0.000 */
2645     strcpy (buf, "0");
2646 
2647   return buf;
2648 }
2649 
2650 /* Get the maximum name length for the given path. */
2651 /* Return 0 if length is unknown. */
2652 long
get_max_length(const char * path,int length,int name)2653 get_max_length (const char *path, int length, int name)
2654 {
2655   long ret;
2656   char *p, *d;
2657 
2658   /* Make a copy of the path that we can modify. */
2659   p = path ? strdupdelim (path, path + length) : strdup ("");
2660 
2661   for (;;)
2662     {
2663       errno = 0;
2664       /* For an empty path query the current directory. */
2665 #if HAVE_PATHCONF
2666       ret = pathconf (*p ? p : ".", name);
2667       if (!(ret < 0 && errno == ENOENT))
2668         break;
2669 #else
2670       ret = PATH_MAX;
2671 #endif
2672 
2673       /* The path does not exist yet, but may be created. */
2674       /* Already at current or root directory, give up. */
2675       if (!*p || strcmp (p, "/") == 0)
2676         break;
2677 
2678       /* Remove one directory level and try again. */
2679       d = strrchr (p, '/');
2680       if (d == p)
2681         p[1] = '\0';  /* check root directory */
2682       else if (d)
2683         *d = '\0';  /* remove last directory part */
2684       else
2685         *p = '\0';  /* check current directory */
2686     }
2687 
2688   xfree (p);
2689 
2690   if (ret < 0)
2691     {
2692       /* pathconf() has a message for us. */
2693       if (errno != 0)
2694           perror ("pathconf");
2695 
2696       /* If (errno == 0) then there is no max length.
2697          Even on error return 0 so the caller can continue. */
2698       return 0;
2699     }
2700 
2701   return ret;
2702 }
2703 
2704 void
wg_hex_to_string(char * str_buffer,const char * hex_buffer,size_t hex_len)2705 wg_hex_to_string (char *str_buffer, const char *hex_buffer, size_t hex_len)
2706 {
2707   size_t i;
2708 
2709   for (i = 0; i < hex_len; i++)
2710     {
2711       /* Each byte takes 2 characters.  */
2712       sprintf (str_buffer + 2 * i, "%02x", (unsigned) (hex_buffer[i] & 0xFF));
2713     }
2714 
2715   /* Null-terminate result.  */
2716   str_buffer[2 * i] = '\0';
2717 }
2718 
2719 #ifdef HAVE_SSL
2720 
2721 /*
2722  * Public key pem to der conversion
2723  */
2724 
2725 static bool
wg_pubkey_pem_to_der(const char * pem,unsigned char ** der,size_t * der_len)2726 wg_pubkey_pem_to_der (const char *pem, unsigned char **der, size_t *der_len)
2727 {
2728   char *stripped_pem, *begin_pos, *end_pos;
2729   size_t pem_count, stripped_pem_count = 0, pem_len;
2730   ssize_t size;
2731   unsigned char *base64data;
2732 
2733   *der = NULL;
2734   *der_len = 0;
2735 
2736   /* if no pem, exit. */
2737   if (!pem)
2738     return false;
2739 
2740   begin_pos = strstr (pem, "-----BEGIN PUBLIC KEY-----");
2741   if (!begin_pos)
2742     return false;
2743 
2744   pem_count = begin_pos - pem;
2745   /* Invalid if not at beginning AND not directly following \n */
2746   if (0 != pem_count && '\n' != pem[pem_count - 1])
2747     return false;
2748 
2749   /* 26 is length of "-----BEGIN PUBLIC KEY-----" */
2750   pem_count += 26;
2751 
2752   /* Invalid if not directly following \n */
2753   end_pos = strstr (pem + pem_count, "\n-----END PUBLIC KEY-----");
2754   if (!end_pos)
2755     return false;
2756 
2757   pem_len = end_pos - pem;
2758 
2759   stripped_pem = xmalloc (pem_len - pem_count + 1);
2760 
2761   /*
2762    * Here we loop through the pem array one character at a time between the
2763    * correct indices, and place each character that is not '\n' or '\r'
2764    * into the stripped_pem array, which should represent the raw base64 string
2765    */
2766   while (pem_count < pem_len) {
2767     if ('\n' != pem[pem_count] && '\r' != pem[pem_count])
2768       stripped_pem[stripped_pem_count++] = pem[pem_count];
2769     ++pem_count;
2770   }
2771   /* Place the null terminator in the correct place */
2772   stripped_pem[stripped_pem_count] = '\0';
2773 
2774   base64data = xmalloc (BASE64_LENGTH(stripped_pem_count));
2775 
2776   size = wget_base64_decode (stripped_pem, base64data, BASE64_LENGTH(stripped_pem_count));
2777 
2778   if (size < 0) {
2779     xfree (base64data);           /* malformed base64 from server */
2780   } else {
2781     *der = base64data;
2782     *der_len = (size_t) size;
2783   }
2784 
2785   xfree (stripped_pem);
2786 
2787   return *der_len > 0;
2788 }
2789 
2790 /*
2791  * Generic pinned public key check.
2792  */
2793 
2794 bool
wg_pin_peer_pubkey(const char * pinnedpubkey,const char * pubkey,size_t pubkeylen)2795 wg_pin_peer_pubkey (const char *pinnedpubkey, const char *pubkey, size_t pubkeylen)
2796 {
2797   struct file_memory *fm;
2798   unsigned char *buf = NULL, *pem_ptr = NULL;
2799   size_t size, pem_len;
2800   bool pem_read;
2801   bool result = false;
2802 
2803   size_t pinkeylen;
2804   ssize_t decoded_hash_length;
2805   char *pinkeycopy, *begin_pos, *end_pos;
2806   unsigned char *sha256sumdigest = NULL, *expectedsha256sumdigest = NULL;
2807 
2808   /* if a path wasn't specified, don't pin */
2809   if (!pinnedpubkey)
2810     return true;
2811   if (!pubkey || !pubkeylen)
2812     return result;
2813 
2814   /* only do this if pinnedpubkey starts with "sha256//", length 8 */
2815   if (strncmp (pinnedpubkey, "sha256//", 8) == 0)
2816     {
2817       /* compute sha256sum of public key */
2818       sha256sumdigest = xmalloc (SHA256_DIGEST_SIZE);
2819       sha256_buffer (pubkey, pubkeylen, sha256sumdigest);
2820       expectedsha256sumdigest = xmalloc (SHA256_DIGEST_SIZE);
2821 
2822       /* it starts with sha256//, copy so we can modify it */
2823       pinkeylen = strlen (pinnedpubkey) + 1;
2824       pinkeycopy = xmalloc (pinkeylen);
2825       memcpy (pinkeycopy, pinnedpubkey, pinkeylen);
2826 
2827       /* point begin_pos to the copy, and start extracting keys */
2828       begin_pos = pinkeycopy;
2829       do
2830         {
2831           end_pos = strstr (begin_pos, ";sha256//");
2832           /*
2833            * if there is an end_pos, null terminate,
2834            * otherwise it'll go to the end of the original string
2835            */
2836           if (end_pos)
2837             end_pos[0] = '\0';
2838 
2839           /* decode base64 pinnedpubkey, 8 is length of "sha256//" */
2840           decoded_hash_length = wget_base64_decode (begin_pos + 8, expectedsha256sumdigest, SHA256_DIGEST_SIZE);
2841 
2842           /* if valid base64, compare sha256 digests directly */
2843           if (SHA256_DIGEST_SIZE == decoded_hash_length)
2844             {
2845               if (!memcmp (sha256sumdigest, expectedsha256sumdigest, SHA256_DIGEST_SIZE))
2846                 {
2847                   result = true;
2848                   break;
2849                 }
2850             }
2851           else
2852             logprintf (LOG_VERBOSE, _ ("Skipping key with wrong size (%d/%d): %s\n"),
2853                        (int) (strlen (begin_pos + 8) * 3) / 4, SHA256_DIGEST_SIZE,
2854                        quote (begin_pos + 8));
2855 
2856           /*
2857            * change back the null-terminator we changed earlier,
2858            * and look for next begin
2859            */
2860           if (end_pos)
2861             {
2862               end_pos[0] = ';';
2863               begin_pos = strstr (end_pos, "sha256//");
2864             }
2865         }
2866       while (end_pos && begin_pos);
2867 
2868       xfree (sha256sumdigest);
2869       xfree (expectedsha256sumdigest);
2870       xfree (pinkeycopy);
2871 
2872       return result;
2873     }
2874 
2875   /* fall back to assuming this is a file path */
2876   fm = wget_read_file (pinnedpubkey);
2877   if (!fm)
2878     return result;
2879 
2880   /* Check the file's size */
2881   if (fm->length < 0 || fm->length > MAX_PINNED_PUBKEY_SIZE)
2882     goto cleanup;
2883 
2884   /*
2885    * if the size of our certificate is bigger than the file
2886    * size then it can't match
2887    */
2888   size = (size_t) fm->length;
2889   if (pubkeylen > size)
2890     goto cleanup;
2891 
2892   /* If the sizes are the same, it can't be base64 encoded, must be der */
2893   if (pubkeylen == size)
2894     {
2895       if (!memcmp (pubkey, fm->content, pubkeylen))
2896         result = true;
2897       goto cleanup;
2898     }
2899 
2900   /*
2901    * Otherwise we will assume it's PEM and try to decode it
2902    * after placing null terminator
2903    */
2904   buf = xmalloc (size + 1);
2905   memcpy (buf, fm->content, size);
2906   buf[size] = '\0';
2907 
2908   pem_read = wg_pubkey_pem_to_der ((const char *) buf, &pem_ptr, &pem_len);
2909   /* if it wasn't read successfully, exit */
2910   if (!pem_read)
2911     goto cleanup;
2912 
2913   /*
2914    * if the size of our certificate doesn't match the size of
2915    * the decoded file, they can't be the same, otherwise compare
2916    */
2917   if (pubkeylen == pem_len && !memcmp (pubkey, pem_ptr, pubkeylen))
2918     result = true;
2919 
2920 cleanup:
2921   xfree (buf);
2922   xfree (pem_ptr);
2923   wget_read_file_free (fm);
2924 
2925   return result;
2926 }
2927 
2928 #endif /* HAVE_SSL */
2929 
2930 #ifdef TESTING
2931 
2932 const char *
test_subdir_p(void)2933 test_subdir_p(void)
2934 {
2935   static const struct {
2936     const char *d1;
2937     const char *d2;
2938     bool result;
2939   } test_array[] = {
2940     { "/somedir", "/somedir", true },
2941     { "/somedir", "/somedir/d2", true },
2942     { "/somedir/d1", "/somedir", false },
2943   };
2944   unsigned i;
2945 
2946   for (i = 0; i < countof(test_array); ++i)
2947     {
2948       bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2949 
2950       mu_assert ("test_subdir_p: wrong result",
2951                  res == test_array[i].result);
2952     }
2953 
2954   return NULL;
2955 }
2956 
2957 const char *
test_dir_matches_p(void)2958 test_dir_matches_p(void)
2959 {
2960   static struct {
2961     const char *dirlist[3];
2962     const char *dir;
2963     bool result;
2964   } test_array[] = {
2965     { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2966     { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2967     { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2968     { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2969     { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2970     { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2971     { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2972     { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2973     { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2974     { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2975     { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2976     { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2977     { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2978     { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2979     { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2980   };
2981   unsigned i;
2982 
2983   for (i = 0; i < countof(test_array); ++i)
2984     {
2985       bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2986 
2987       mu_assert ("test_dir_matches_p: wrong result",
2988                  res == test_array[i].result);
2989     }
2990 
2991   return NULL;
2992 }
2993 
2994 #endif /* TESTING */
2995