1 /* Various utility functions.
2 Copyright (C) 1996-2011, 2015, 2018-2021 Free Software Foundation,
3 Inc.
4
5 This file is part of GNU Wget.
6
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
19
20 Additional permission under GNU GPL version 3 section 7
21
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
30
31 #include "wget.h"
32
33 #include "sha256.h"
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <time.h>
38 #include <unistd.h>
39 #ifdef HAVE_PROCESS_H
40 # include <process.h> /* getpid() */
41 #endif
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <assert.h>
45 #include <stdarg.h>
46 #include <locale.h>
47 #include <errno.h>
48 #include <utime.h>
49
50 #include <sys/time.h>
51
52 #include <sys/stat.h>
53
54 /* For TIOCGWINSZ and friends: */
55 #ifndef WINDOWS
56 # include <sys/ioctl.h>
57 # include <termios.h>
58 #endif
59
60 /* Needed for Unix version of run_with_timeout. */
61 #include <signal.h>
62 #include <setjmp.h>
63
64 #include <regex.h>
65 #ifdef HAVE_LIBPCRE2
66 # define PCRE2_CODE_UNIT_WIDTH 8
67 # include <pcre2.h>
68 #elif defined HAVE_LIBPCRE
69 # include <pcre.h>
70 #endif
71
72 #ifndef HAVE_SIGSETJMP
73 /* If sigsetjmp is a macro, configure won't pick it up. */
74 # ifdef sigsetjmp
75 # define HAVE_SIGSETJMP
76 # endif
77 #endif
78
79 #if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
80 # define USE_SIGNAL_TIMEOUT
81 #endif
82
83 /* Some systems (Linux libc5, "NCR MP-RAS 3.0", and others) don't
84 provide MAP_FAILED, a symbolic constant for the value returned by
85 mmap() when it doesn't work. Usually, this constant should be -1.
86 This only makes sense for files that use mmap() and include
87 sys/mman.h *before* sysdep.h, but doesn't hurt others. */
88 #ifdef HAVE_MMAP
89 # include <sys/mman.h>
90 # ifndef MAP_FAILED
91 # define MAP_FAILED ((void *) -1)
92 # endif
93 #endif
94
95 #include "utils.h"
96 #include "hash.h"
97
98 #ifdef __VMS
99 #include "vms.h"
100 #endif /* def __VMS */
101
102 #ifdef TESTING
103 #include "../tests/unit-tests.h"
104 #endif
105
106 #include "exits.h"
107 #include "c-strcase.h"
108
109 _Noreturn static void
memfatal(const char * context,long attempted_size)110 memfatal (const char *context, long attempted_size)
111 {
112 /* Make sure we don't try to store part of the log line, and thus
113 call malloc. */
114 log_set_save_context (false);
115
116 /* We have different log outputs in different situations:
117 1) output without bytes information
118 2) output with bytes information */
119 if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
120 {
121 logprintf (LOG_ALWAYS,
122 _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
123 exec_name, context);
124 }
125 else
126 {
127 logprintf (LOG_ALWAYS,
128 _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
129 exec_name, context, attempted_size);
130 }
131
132 exit (WGET_EXIT_GENERIC_ERROR);
133 }
134
135 /* Character property table for (re-)escaping VMS ODS5 extended file
136 names. Note that this table ignores Unicode.
137
138 ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
139
140 ODS5 Invalid characters:
141 C0 control codes (0x00 to 0x1F inclusive)
142 Asterisk (*)
143 Question mark (?)
144
145 ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
146 Double quotation marks (")
147 Backslash (\)
148 Colon (:)
149 Left angle bracket (<)
150 Right angle bracket (>)
151 Slash (/)
152 Vertical bar (|)
153
154 Characters escaped by "^":
155 SP ! " # % & ' ( ) + , . : ; =
156 @ [ \ ] ^ ` { | } ~
157
158 Either "^_" or "^ " is accepted as a space. Period (.) is a special
159 case. Note that un-escaped < and > can also confuse a directory
160 spec.
161
162 Characters put out as ^xx:
163 7F (DEL)
164 80-9F (C1 control characters)
165 A0 (nonbreaking space)
166 FF (Latin small letter y diaeresis)
167
168 Other cases:
169 Unicode: "^Uxxxx", where "xxxx" is four hex digits.
170
171 Property table values:
172 Normal escape: 1
173 Space: 2
174 Dot: 4
175 Hex-hex escape: 8
176 ODS2 normal: 16
177 ODS2 lower case: 32
178 Hex digit: 64
179 */
180
181 unsigned char char_prop[ 256] = {
182
183 /* NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI */
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185
186 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
187 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188
189 /* SP ! " # $ % & ' ( ) * + , - . / */
190 2, 1, 1, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0,
191
192 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
193 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 1, 1, 1, 1, 1, 1,
194
195 /* @ A B C D E F G H I J K L M N O */
196 1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16,
197
198 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
199 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 16,
200
201 /* ` a b c d e f g h i j k l m n o */
202 1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32,
203
204 /* p q r s t u v w x y z { | } ~ DEL */
205 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 17, 8,
206
207 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
208 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
209 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8
215 };
216
217 /* Utility function: like xstrdup(), but also lowercases S. */
218
219 char *
xstrdup_lower(const char * s)220 xstrdup_lower (const char *s)
221 {
222 char *copy = xstrdup (s);
223 char *p = copy;
224 for (; *p; p++)
225 *p = c_tolower (*p);
226 return copy;
227 }
228
229 /* Copy the string formed by two pointers (one on the beginning, other
230 on the char after the last char) to a new, malloc-ed location.
231 0-terminate it.
232 If both pointers are NULL, the function returns an empty string. */
233 char *
strdupdelim(const char * beg,const char * end)234 strdupdelim (const char *beg, const char *end)
235 {
236 if (beg && beg <= end)
237 {
238 char *res = xmalloc (end - beg + 1);
239 memcpy (res, beg, end - beg);
240 res[end - beg] = '\0';
241 return res;
242 }
243
244 return xstrdup("");
245 }
246
247 /* Parse a string containing comma-separated elements, and return a
248 vector of char pointers with the elements. Spaces following the
249 commas are ignored. */
250 char **
sepstring(const char * s)251 sepstring (const char *s)
252 {
253 char **res;
254 const char *p;
255 int i = 0;
256
257 if (!s || !*s)
258 return NULL;
259 res = NULL;
260 p = s;
261 while (*s)
262 {
263 if (*s == ',')
264 {
265 res = xrealloc (res, (i + 2) * sizeof (char *));
266 res[i] = strdupdelim (p, s);
267 res[++i] = NULL;
268 ++s;
269 /* Skip the blanks following the ','. */
270 while (c_isspace (*s))
271 ++s;
272 p = s;
273 }
274 else
275 ++s;
276 }
277 res = xrealloc (res, (i + 2) * sizeof (char *));
278 res[i] = strdupdelim (p, s);
279 res[i + 1] = NULL;
280 return res;
281 }
282
283 /* Like sprintf, but prints into a string of sufficient size freshly
284 allocated with malloc, which is returned. If unable to print due
285 to invalid format, returns NULL. Inability to allocate needed
286 memory results in abort, as with xmalloc. This is in spirit
287 similar to the GNU/BSD extension asprintf, but somewhat easier to
288 use.
289
290 Internally the function either calls vasprintf or loops around
291 vsnprintf until the correct size is found. Since Wget also ships a
292 fallback implementation of vsnprintf, this should be portable. */
293
294 char *
aprintf(const char * fmt,...)295 aprintf (const char *fmt, ...)
296 {
297 #if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
298 /* Use vasprintf. */
299 int ret;
300 va_list args;
301 char *str;
302 va_start (args, fmt);
303 ret = vasprintf (&str, fmt, args);
304 va_end (args);
305 if (ret < 0 && errno == ENOMEM)
306 memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE); /* for consistency
307 with xmalloc/xrealloc */
308 else if (ret < 0)
309 return NULL;
310 return str;
311 #else /* not HAVE_VASPRINTF */
312
313 /* Constant is using for limits memory allocation for text buffer.
314 Applicable in situation when: vasprintf is not available in the system
315 and vsnprintf return -1 when long line is truncated (in old versions of
316 glibc and in other system where C99 doesn`t support) */
317
318 #define FMT_MAX_LENGTH 1048576
319
320 /* vasprintf is unavailable. snprintf into a small buffer and
321 resize it as necessary. */
322 int size = 32;
323 char *str = xmalloc (size);
324
325 /* #### This code will infloop and eventually abort in xrealloc if
326 passed a FMT that causes snprintf to consistently return -1. */
327
328 while (1)
329 {
330 int n;
331 va_list args;
332
333 va_start (args, fmt);
334 n = vsnprintf (str, size, fmt, args);
335 va_end (args);
336
337 /* If the printing worked, return the string. */
338 if (n > -1 && n < size)
339 return str;
340
341 /* Else try again with a larger buffer. */
342 if (n > -1) /* C99 */
343 size = n + 1; /* precisely what is needed */
344 else if (size >= FMT_MAX_LENGTH) /* We have a huge buffer, */
345 { /* maybe we have some wrong
346 format string? */
347 logprintf (LOG_ALWAYS,
348 _("%s: aprintf: text buffer is too big (%d bytes), "
349 "aborting.\n"),
350 exec_name, size); /* printout a log message */
351 abort (); /* and abort... */
352 }
353 else
354 {
355 /* else, we continue to grow our
356 * buffer: Twice the old size. */
357 size <<= 1;
358 }
359 str = xrealloc (str, size);
360 }
361 #endif /* not HAVE_VASPRINTF */
362 }
363
364 #ifndef HAVE_STRLCPY
365 /* strlcpy() is a BSD function that sometimes is really handy.
366 * It is the same as snprintf(dst,dstsize,"%s",src), but much faster. */
367
368 size_t
strlcpy(char * dst,const char * src,size_t size)369 strlcpy (char *dst, const char *src, size_t size)
370 {
371 const char *old = src;
372
373 /* Copy as many bytes as will fit */
374 if (size)
375 {
376 while (--size)
377 {
378 if (!(*dst++ = *src++))
379 return src - old - 1;
380 }
381
382 *dst = 0;
383 }
384
385 while (*src++);
386 return src - old - 1;
387 }
388 #endif
389
390 /* Concatenate the NULL-terminated list of string arguments into
391 freshly allocated space. */
392
393 char *
concat_strings(const char * str0,...)394 concat_strings (const char *str0, ...)
395 {
396 va_list args;
397 const char *arg;
398 size_t length = 0, pos = 0;
399 char *s;
400
401 if (!str0)
402 return NULL;
403
404 /* calculate the length of the resulting string */
405 va_start (args, str0);
406 for (arg = str0; arg; arg = va_arg (args, const char *))
407 length += strlen(arg);
408 va_end (args);
409
410 s = xmalloc (length + 1);
411
412 /* concatenate strings */
413 va_start (args, str0);
414 for (arg = str0; arg; arg = va_arg (args, const char *))
415 pos += strlcpy(s + pos, arg, length - pos + 1);
416 va_end (args);
417
418 return s;
419 }
420
421 /* Format the provided time according to the specified format. The
422 format is a string with format elements supported by strftime. */
423
424 static char *
fmttime(time_t t,const char * fmt)425 fmttime (time_t t, const char *fmt)
426 {
427 static char output[32];
428 struct tm *tm = localtime(&t);
429 if (!tm)
430 abort ();
431 if (!strftime(output, sizeof(output), fmt, tm))
432 abort ();
433 return output;
434 }
435
436 /* Return pointer to a static char[] buffer in which zero-terminated
437 string-representation of TM (in form hh:mm:ss) is printed.
438
439 If TM is NULL, the current time will be used. */
440
441 char *
time_str(time_t t)442 time_str (time_t t)
443 {
444 return fmttime(t, "%H:%M:%S");
445 }
446
447 /* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
448
449 char *
datetime_str(time_t t)450 datetime_str (time_t t)
451 {
452 return fmttime(t, "%Y-%m-%d %H:%M:%S");
453 }
454
455 /* The Windows versions of the following two functions are defined in
456 mswindows.c. On MSDOS this function should never be called. */
457
458 #ifdef __VMS
459
460 bool
fork_to_background(void)461 fork_to_background (void)
462 {
463 return false;
464 }
465
466 #else /* def __VMS */
467
468 #if !defined(WINDOWS) && !defined(MSDOS)
469 bool
fork_to_background(void)470 fork_to_background (void)
471 {
472 pid_t pid;
473 /* Whether we arrange our own version of opt.lfilename here. */
474 bool logfile_changed = false;
475
476 if (!opt.lfilename && (!opt.quiet || opt.server_response))
477 {
478 /* We must create the file immediately to avoid either a race
479 condition (which arises from using unique_name and failing to
480 use fopen_excl) or lying to the user about the log file name
481 (which arises from using unique_name, printing the name, and
482 using fopen_excl later on.) */
483 FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
484 if (new_log_fp)
485 {
486 logfile_changed = true;
487 fclose (new_log_fp);
488 }
489 }
490 pid = fork ();
491 if (pid < 0)
492 {
493 /* parent, error */
494 perror ("fork");
495 exit (WGET_EXIT_GENERIC_ERROR);
496 }
497 else if (pid != 0)
498 {
499 /* parent, no error */
500 printf (_("Continuing in background, pid %d.\n"), (int) pid);
501 if (logfile_changed)
502 printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
503 exit (WGET_EXIT_SUCCESS); /* #### should we use _exit()? */
504 }
505
506 /* child: give up the privileges and keep running. */
507 setsid ();
508 if (freopen ("/dev/null", "r", stdin) == NULL)
509 DEBUGP (("Failed to redirect stdin to /dev/null.\n"));
510 if (freopen ("/dev/null", "w", stdout) == NULL)
511 DEBUGP (("Failed to redirect stdout to /dev/null.\n"));
512 if (freopen ("/dev/null", "w", stderr) == NULL)
513 DEBUGP (("Failed to redirect stderr to /dev/null.\n"));
514
515 return logfile_changed;
516 }
517 #endif /* !WINDOWS && !MSDOS */
518
519 #endif /* def __VMS [else] */
520
521
522 /* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
523 specified with TM. The atime ("access time") is set to the current
524 time. */
525
526 void
touch(const char * file,time_t tm)527 touch (const char *file, time_t tm)
528 {
529 struct utimbuf times;
530
531 times.modtime = tm;
532 times.actime = time (NULL);
533
534 if (utime (file, ×) == -1)
535 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
536 }
537
538 /* Checks if FILE is a symbolic link, and removes it if it is. Does
539 nothing under MS-Windows. */
540 int
remove_link(const char * file)541 remove_link (const char *file)
542 {
543 int err = 0;
544 struct stat st;
545
546 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
547 {
548 DEBUGP (("Unlinking %s (symlink).\n", file));
549 err = unlink (file);
550 if (err != 0)
551 logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
552 quote (file), strerror (errno));
553 }
554 return err;
555 }
556
557 /* Does FILENAME exist? */
558 bool
file_exists_p(const char * filename,file_stats_t * fstats)559 file_exists_p (const char *filename, file_stats_t *fstats)
560 {
561 struct stat buf;
562
563 if (!filename)
564 return false;
565
566 #if defined(WINDOWS) || defined(__VMS)
567 int ret = stat (filename, &buf);
568 if (ret >= 0)
569 {
570 if (fstats != NULL)
571 fstats->access_err = errno;
572 }
573 return ret >= 0;
574 #else
575 errno = 0;
576 if (stat (filename, &buf) == 0 && S_ISREG(buf.st_mode) &&
577 (((S_IRUSR & buf.st_mode) && (getuid() == buf.st_uid)) ||
578 ((S_IRGRP & buf.st_mode) && group_member(buf.st_gid)) ||
579 (S_IROTH & buf.st_mode))) {
580 if (fstats != NULL)
581 {
582 fstats->access_err = 0;
583 fstats->st_ino = buf.st_ino;
584 fstats->st_dev = buf.st_dev;
585 }
586 return true;
587 }
588 else
589 {
590 if (fstats != NULL)
591 fstats->access_err = (errno == 0 ? EACCES : errno);
592 errno = 0;
593 return false;
594 }
595 /* NOTREACHED */
596 #endif
597 }
598
599 /* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
600 Returns 0 on error. */
601 bool
file_non_directory_p(const char * path)602 file_non_directory_p (const char *path)
603 {
604 struct stat buf;
605 /* Use lstat() rather than stat() so that symbolic links pointing to
606 directories can be identified correctly. */
607 if (lstat (path, &buf) != 0)
608 return false;
609 return S_ISDIR (buf.st_mode) ? false : true;
610 }
611
612 /* Return the size of file named by FILENAME, or -1 if it cannot be
613 opened or sought into. */
614 wgint
file_size(const char * filename)615 file_size (const char *filename)
616 {
617 #if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
618 wgint size;
619 /* We use fseek rather than stat to determine the file size because
620 that way we can also verify that the file is readable without
621 explicitly checking for permissions. Inspired by the POST patch
622 by Arnaud Wylie. */
623 FILE *fp = fopen (filename, "rb");
624 if (!fp)
625 return -1;
626 fseeko (fp, 0, SEEK_END);
627 size = ftello (fp);
628 fclose (fp);
629 return size;
630 #else
631 struct stat st;
632 if (stat (filename, &st) < 0)
633 return -1;
634 return st.st_size;
635 #endif
636 }
637
638 /* 2005-02-19 SMS.
639 If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
640 original name. With the VMS file systems' versioning, everything
641 should be fine, and appending ".NN" just causes trouble.
642 */
643
644 #ifdef UNIQ_SEP
645
646 /* stat file names named PREFIX.1, PREFIX.2, etc., until one that
647 doesn't exist is found. Return a freshly allocated copy of the
648 unused file name. */
649
650 static char *
unique_name_1(const char * prefix)651 unique_name_1 (const char *prefix)
652 {
653 int count = 1;
654 int plen = strlen (prefix);
655 char *template = xmalloc (plen + 1 + 24);
656 char *template_tail = template + plen;
657
658 memcpy (template, prefix, plen);
659 *template_tail++ = UNIQ_SEP;
660
661 do
662 number_to_string (template_tail, count++);
663 while (file_exists_p (template, NULL) && count < 999999);
664
665 return template;
666 }
667
668 /* Return a unique file name, based on FILE.
669
670 More precisely, if FILE doesn't exist, it is returned unmodified.
671 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
672 file name that doesn't exist is returned.
673
674 2005-02-19 SMS. "." is now UNIQ_SEP, and may be different.
675
676 The resulting file is not created, only verified that it didn't
677 exist at the point in time when the function was called.
678 Therefore, where security matters, don't rely that the file created
679 by this function exists until you open it with O_EXCL or
680 equivalent.
681
682 unique_name() always returns a freshly allocated string.
683
684 unique_name_passthrough() may return FILE if the file doesn't exist
685 (and therefore doesn't need changing). */
686
687 char *
unique_name_passthrough(const char * file)688 unique_name_passthrough (const char *file)
689 {
690 /* If the FILE itself doesn't exist, return it without
691 modification. Otherwise, find a numeric suffix that results in unused
692 file name and return it. */
693 return file_exists_p (file, NULL) ? unique_name_1 (file) : (char *) file;
694 }
695
696 char *
unique_name(const char * file)697 unique_name (const char *file)
698 {
699 /* If the FILE itself doesn't exist, return it without
700 modification. Otherwise, find a numeric suffix that results in unused
701 file name and return it. */
702 return file_exists_p (file, NULL) ? unique_name_1 (file) : xstrdup (file);
703 }
704
705 #else /* def UNIQ_SEP */
706
707 /* Dummy unique_name() for VMS. Return the original name as easily as
708 possible.
709 */
710 char *
unique_name_passthrough(const char * file,bool allow_passthrough)711 unique_name_passthrough (const char *file, bool allow_passthrough)
712 {
713 /* Return the FILE itself, without modification, irregardful. */
714 return (char *) file;
715 }
716 char *
717
unique_name(const char * file)718 unique_name (const char *file)
719 {
720 /* Return the FILE itself, without modification, irregardful. */
721 return xstrdup (file);
722 }
723
724 #endif /* def UNIQ_SEP [else] */
725
726 /* Create a file based on NAME, except without overwriting an existing
727 file with that name. Providing O_EXCL is correctly implemented,
728 this function does not have the race condition associated with
729 opening the file returned by unique_name. */
730
731 FILE *
unique_create(const char * name,bool binary,char ** opened_name)732 unique_create (const char *name, bool binary, char **opened_name)
733 {
734 /* unique file name, based on NAME */
735 char *uname = unique_name (name);
736 FILE *fp;
737 while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
738 {
739 xfree (uname);
740 uname = unique_name (name);
741 }
742 if (opened_name)
743 {
744 if (fp)
745 *opened_name = uname;
746 else
747 {
748 *opened_name = NULL;
749 xfree (uname);
750 }
751 }
752 else
753 xfree (uname);
754 return fp;
755 }
756
757 /* Open the file for writing, with the addition that the file is
758 opened "exclusively". This means that, if the file already exists,
759 this function will *fail* and errno will be set to EEXIST. If
760 BINARY is set, the file will be opened in binary mode, equivalent
761 to fopen's "wb".
762
763 If opening the file fails for any reason, including the file having
764 previously existed, this function returns NULL and sets errno
765 appropriately. */
766
767 FILE *
fopen_excl(const char * fname,int binary)768 fopen_excl (const char *fname, int binary)
769 {
770 int fd;
771 #ifdef O_EXCL
772
773 /* 2005-04-14 SMS.
774 VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
775 It also has file versions which obviate all the O_EXCL effort.
776 O_TRUNC (something of a misnomer) requests a new version.
777 */
778 # ifdef __VMS
779 /* Common open() optional arguments:
780 sequential access only, access callback function.
781 */
782 # define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
783
784 int open_id;
785 int flags = O_WRONLY | O_CREAT | O_TRUNC;
786
787 if (binary > 1)
788 {
789 open_id = 11;
790 fd = open( fname, /* File name. */
791 flags, /* Flags. */
792 0777, /* Mode for default protection. */
793 "ctx=bin,stm", /* Binary, stream access. */
794 "rfm=stmlf", /* Stream_LF. */
795 OPEN_OPT_ARGS); /* Access callback. */
796 }
797 else if (binary)
798 {
799 open_id = 12;
800 fd = open( fname, /* File name. */
801 flags, /* Flags. */
802 0777, /* Mode for default protection. */
803 "ctx=bin,stm", /* Binary, stream access. */
804 "rfm=fix", /* Fixed-length, */
805 "mrs=512", /* 512-byte records. */
806 OPEN_OPT_ARGS); /* Access callback. */
807 }
808 else
809 {
810 open_id = 13;
811 fd = open( fname, /* File name. */
812 flags, /* Flags. */
813 0777, /* Mode for default protection. */
814 "rfm=stmlf", /* Stream_LF. */
815 OPEN_OPT_ARGS); /* Access callback. */
816 }
817 # else /* def __VMS */
818 int flags = O_WRONLY | O_CREAT | O_EXCL;
819 # ifdef O_BINARY
820 if (binary)
821 flags |= O_BINARY;
822 # endif
823 fd = open (fname, flags, 0666);
824 # endif /* def __VMS [else] */
825
826 if (fd < 0)
827 return NULL;
828 return fdopen (fd, binary ? "wb" : "w");
829 #else /* not O_EXCL */
830 /* Manually check whether the file exists. This is prone to race
831 conditions, but systems without O_EXCL haven't deserved
832 better. */
833 if (file_exists_p (fname, NULL))
834 {
835 errno = EEXIST;
836 return NULL;
837 }
838 return fopen (fname, binary ? "wb" : "w");
839 #endif /* not O_EXCL */
840 }
841
842 /* fopen_stat() assumes that file_exists_p() was called earlier.
843 file_stats_t passed to this function was returned from file_exists_p()
844 This is to prevent TOCTTOU race condition.
845 Details : FIO45-C from https://www.securecoding.cert.org/
846 Note that for creating a new file, this check is not useful
847
848 Input:
849 fname => Name of file to open
850 mode => File open mode
851 fstats => Saved file_stats_t about file that was checked for existence
852
853 Returns:
854 NULL if there was an error
855 FILE * of opened file stream
856 */
857 FILE *
fopen_stat(const char * fname,const char * mode,file_stats_t * fstats)858 fopen_stat(const char *fname, const char *mode, file_stats_t *fstats)
859 {
860 int fd;
861 FILE *fp;
862 struct stat fdstats;
863
864 #if defined FUZZING && defined TESTING
865 fp = fopen_wgetrc (fname, mode);
866 return fp;
867 #else
868 fp = fopen (fname, mode);
869 #endif
870 if (fp == NULL)
871 {
872 logprintf (LOG_NOTQUIET, _("Failed to Fopen file %s\n"), fname);
873 return NULL;
874 }
875 fd = fileno (fp);
876 if (fd < 0)
877 {
878 logprintf (LOG_NOTQUIET, _("Failed to get FD for file %s\n"), fname);
879 fclose (fp);
880 return NULL;
881 }
882 memset(&fdstats, 0, sizeof(fdstats));
883 if (fstat (fd, &fdstats) == -1)
884 {
885 logprintf (LOG_NOTQUIET, _("Failed to stat file %s, (check permissions)\n"), fname);
886 fclose (fp);
887 return NULL;
888 }
889 #if !(defined(WINDOWS) || defined(__VMS))
890 if (fstats != NULL &&
891 (fdstats.st_dev != fstats->st_dev ||
892 fdstats.st_ino != fstats->st_ino))
893 {
894 /* File changed since file_exists_p() : NOT SAFE */
895 logprintf (LOG_NOTQUIET, _("File %s changed since the last check. Security check failed.\n"), fname);
896 fclose (fp);
897 return NULL;
898 }
899 #endif
900
901 return fp;
902 }
903
904 /* open_stat assumes that file_exists_p() was called earlier to save file_stats
905 file_stats_t passed to this function was returned from file_exists_p()
906 This is to prevent TOCTTOU race condition.
907 Details : FIO45-C from https://www.securecoding.cert.org/
908 Note that for creating a new file, this check is not useful
909
910
911 Input:
912 fname => Name of file to open
913 flags => File open flags
914 mode => File open mode
915 fstats => Saved file_stats_t about file that was checked for existence
916
917 Returns:
918 -1 if there was an error
919 file descriptor of opened file stream
920 */
921 int
open_stat(const char * fname,int flags,mode_t mode,file_stats_t * fstats)922 open_stat(const char *fname, int flags, mode_t mode, file_stats_t *fstats)
923 {
924 int fd;
925 struct stat fdstats;
926
927 fd = open (fname, flags, mode);
928 if (fd < 0)
929 {
930 logprintf (LOG_NOTQUIET, _("Failed to open file %s, reason :%s\n"), fname, strerror(errno));
931 return -1;
932 }
933 memset(&fdstats, 0, sizeof(fdstats));
934 if (fstat (fd, &fdstats) == -1)
935 {
936 logprintf (LOG_NOTQUIET, _("Failed to stat file %s, error: %s\n"), fname, strerror(errno));
937 close (fd);
938 return -1;
939 }
940 #if !(defined(WINDOWS) || defined(__VMS))
941 if (fstats != NULL &&
942 (fdstats.st_dev != fstats->st_dev ||
943 fdstats.st_ino != fstats->st_ino))
944 {
945 /* File changed since file_exists_p() : NOT SAFE */
946 logprintf (LOG_NOTQUIET, _("Trying to open file %s but it changed since last check. Security check failed.\n"), fname);
947 close (fd);
948 return -1;
949 }
950 #endif
951
952 return fd;
953 }
954
955 /* Create DIRECTORY. If some of the pathname components of DIRECTORY
956 are missing, create them first. In case any mkdir() call fails,
957 return its error status. Returns 0 on successful completion.
958
959 The behaviour of this function should be identical to the behaviour
960 of `mkdir -p' on systems where mkdir supports the `-p' option. */
961 int
make_directory(const char * directory)962 make_directory (const char *directory)
963 {
964 int i, ret, quit = 0;
965 char buf[1024];
966 char *dir;
967 size_t len = strlen (directory);
968
969 /* Make a copy of dir, to be able to write to it. Otherwise, the
970 function is unsafe if called with a read-only char *argument. */
971 if (len < sizeof(buf))
972 {
973 memcpy(buf, directory, len + 1);
974 dir = buf;
975 }
976 else
977 dir = xstrdup(directory);
978
979 /* If the first character of dir is '/', skip it (and thus enable
980 creation of absolute-pathname directories. */
981 for (i = (*dir == '/'); 1; ++i)
982 {
983 for (; dir[i] && dir[i] != '/'; i++)
984 ;
985 if (!dir[i])
986 quit = 1;
987 dir[i] = '\0';
988 /* Check whether the directory already exists. Allow creation of
989 of intermediate directories to fail, as the initial path components
990 are not necessarily directories! */
991 if (!file_exists_p (dir, NULL))
992 ret = mkdir (dir, 0777);
993 else
994 ret = 0;
995 if (quit)
996 break;
997 else
998 dir[i] = '/';
999 }
1000
1001 if (dir != buf)
1002 xfree (dir);
1003
1004 return ret;
1005 }
1006
1007 /* Merge BASE with FILE. BASE can be a directory or a file name, FILE
1008 should be a file name.
1009
1010 file_merge("/foo/bar", "baz") => "/foo/baz"
1011 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
1012 file_merge("foo", "bar") => "bar"
1013
1014 In other words, it's a simpler and gentler version of uri_merge. */
1015
1016 char *
file_merge(const char * base,const char * file)1017 file_merge (const char *base, const char *file)
1018 {
1019 char *result;
1020 const char *cut = (const char *)strrchr (base, '/');
1021
1022 if (!cut)
1023 return xstrdup (file);
1024
1025 result = xmalloc (cut - base + 1 + strlen (file) + 1);
1026 memcpy (result, base, cut - base);
1027 result[cut - base] = '/';
1028 strcpy (result + (cut - base) + 1, file);
1029
1030 return result;
1031 }
1032
1033 /* Like fnmatch, but performs a case-insensitive match. */
1034
1035 int
fnmatch_nocase(const char * pattern,const char * string,int flags)1036 fnmatch_nocase (const char *pattern, const char *string, int flags)
1037 {
1038 /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
1039 also present on *BSD platforms, and possibly elsewhere.
1040 Gnulib provides this flag in case it doesn't exist. */
1041 return fnmatch (pattern, string, flags | FNM_CASEFOLD);
1042 }
1043
1044 static bool in_acclist (const char *const *, const char *, bool);
1045
1046 /* Determine whether a file is acceptable to be followed, according to
1047 lists of patterns to accept/reject. */
1048 bool
acceptable(const char * s)1049 acceptable (const char *s)
1050 {
1051 const char *p;
1052
1053 if (opt.output_document && strcmp (s, opt.output_document) == 0)
1054 return true;
1055
1056 if ((p = strrchr (s, '/')))
1057 s = p + 1;
1058
1059 if (opt.accepts)
1060 {
1061 if (opt.rejects)
1062 return (in_acclist ((const char *const *)opt.accepts, s, true)
1063 && !in_acclist ((const char *const *)opt.rejects, s, true));
1064 else
1065 return in_acclist ((const char *const *)opt.accepts, s, true);
1066 }
1067 else if (opt.rejects)
1068 return !in_acclist ((const char *const *)opt.rejects, s, true);
1069
1070 return true;
1071 }
1072
1073 /* Determine whether an URL is acceptable to be followed, according to
1074 regex patterns to accept/reject. */
1075 bool
accept_url(const char * s)1076 accept_url (const char *s)
1077 {
1078 if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
1079 return false;
1080 if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
1081 return false;
1082
1083 return true;
1084 }
1085
1086 /* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
1087 will return true if and only if D2 begins with `/something/' or is exactly
1088 '/something'. */
1089 bool
subdir_p(const char * d1,const char * d2)1090 subdir_p (const char *d1, const char *d2)
1091 {
1092 if (*d1 == '\0')
1093 return true;
1094 if (!opt.ignore_case)
1095 for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
1096 ;
1097 else
1098 for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
1099 ;
1100
1101 return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
1102 }
1103
1104 /* Iterate through DIRLIST (which must be NULL-terminated), and return the
1105 first element that matches DIR, through wildcards or front comparison (as
1106 appropriate). */
1107 static bool
dir_matches_p(const char ** dirlist,const char * dir)1108 dir_matches_p (const char **dirlist, const char *dir)
1109 {
1110 const char **x;
1111 int (*matcher) (const char *, const char *, int)
1112 = opt.ignore_case ? fnmatch_nocase : fnmatch;
1113
1114 for (x = dirlist; *x; x++)
1115 {
1116 /* Remove leading '/' */
1117 const char *p = *x + (**x == '/');
1118 if (has_wildcards_p (p))
1119 {
1120 if (matcher (p, dir, FNM_PATHNAME) == 0)
1121 break;
1122 }
1123 else
1124 {
1125 if (subdir_p (p, dir))
1126 break;
1127 }
1128 }
1129
1130 return *x ? true : false;
1131 }
1132
1133 /* Returns whether DIRECTORY is acceptable for download, wrt the
1134 include/exclude lists.
1135
1136 The leading `/' is ignored in paths; relative and absolute paths
1137 may be freely intermixed. */
1138
1139 bool
accdir(const char * directory)1140 accdir (const char *directory)
1141 {
1142 /* Remove starting '/'. */
1143 if (*directory == '/')
1144 ++directory;
1145 if (opt.includes)
1146 {
1147 if (!dir_matches_p (opt.includes, directory))
1148 return false;
1149 }
1150 if (opt.excludes)
1151 {
1152 if (dir_matches_p (opt.excludes, directory))
1153 return false;
1154 }
1155 return true;
1156 }
1157
1158 /* Return true if STRING ends with TAIL. For instance:
1159
1160 match_tail ("abc", "bc", false) -> 1
1161 match_tail ("abc", "ab", false) -> 0
1162 match_tail ("abc", "abc", false) -> 1
1163
1164 If FOLD_CASE is true, the comparison will be case-insensitive. */
1165
1166 bool
match_tail(const char * string,const char * tail,bool fold_case)1167 match_tail (const char *string, const char *tail, bool fold_case)
1168 {
1169 int pos = (int) strlen (string) - (int) strlen (tail);
1170
1171 if (pos < 0)
1172 return false; /* tail is longer than string. */
1173
1174 if (!fold_case)
1175 return !strcmp (string + pos, tail);
1176 else
1177 return !strcasecmp (string + pos, tail);
1178 }
1179
1180 /* Checks whether string S matches each element of ACCEPTS. A list
1181 element are matched either with fnmatch() or match_tail(),
1182 according to whether the element contains wildcards or not.
1183
1184 If the BACKWARD is false, don't do backward comparison -- just compare
1185 them normally. */
1186 static bool
in_acclist(const char * const * accepts,const char * s,bool backward)1187 in_acclist (const char *const *accepts, const char *s, bool backward)
1188 {
1189 for (; *accepts; accepts++)
1190 {
1191 if (has_wildcards_p (*accepts))
1192 {
1193 int res = opt.ignore_case
1194 ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1195 /* fnmatch returns 0 if the pattern *does* match the string. */
1196 if (res == 0)
1197 return true;
1198 }
1199 else
1200 {
1201 if (backward)
1202 {
1203 if (match_tail (s, *accepts, opt.ignore_case))
1204 return true;
1205 }
1206 else
1207 {
1208 int cmp = opt.ignore_case
1209 ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1210 if (cmp == 0)
1211 return true;
1212 }
1213 }
1214 }
1215 return false;
1216 }
1217
1218 /* Return the location of STR's suffix (file extension). Examples:
1219 suffix ("foo.bar") -> "bar"
1220 suffix ("foo.bar.baz") -> "baz"
1221 suffix ("/foo/bar") -> NULL
1222 suffix ("/foo.bar/baz") -> NULL */
1223 char *
suffix(const char * str)1224 suffix (const char *str)
1225 {
1226 char *p;
1227
1228 if ((p = strrchr (str, '.')) && !strchr (p + 1, '/'))
1229 return p + 1;
1230
1231 return NULL;
1232 }
1233
1234 /* Return true if S contains globbing wildcards (`*', `?', `[' or
1235 `]'). */
1236
1237 bool
has_wildcards_p(const char * s)1238 has_wildcards_p (const char *s)
1239 {
1240 return !!strpbrk (s, "*?[]");
1241 }
1242
1243 /* Return true if FNAME ends with a typical HTML suffix. The
1244 following (case-insensitive) suffixes are presumed to be HTML
1245 files:
1246
1247 html
1248 htm
1249 ?html (`?' matches one character)
1250
1251 #### CAVEAT. This is not necessarily a good indication that FNAME
1252 refers to a file that contains HTML! */
1253 bool
has_html_suffix_p(const char * fname)1254 has_html_suffix_p (const char *fname)
1255 {
1256 char *suf;
1257
1258 if ((suf = suffix (fname)) == NULL)
1259 return false;
1260 if (!c_strcasecmp (suf, "html"))
1261 return true;
1262 if (!c_strcasecmp (suf, "htm"))
1263 return true;
1264 if (suf[0] && !c_strcasecmp (suf + 1, "html"))
1265 return true;
1266 return false;
1267 }
1268
1269 /* Read FILE into memory. A pointer to `struct file_memory' are
1270 returned; use struct element `content' to access file contents, and
1271 the element `length' to know the file length. `content' is *not*
1272 zero-terminated, and you should *not* read or write beyond the [0,
1273 length) range of characters.
1274
1275 After you are done with the file contents, call wget_read_file_free to
1276 release the memory.
1277
1278 Depending on the operating system and the type of file that is
1279 being read, wget_read_file() either mmap's the file into memory, or
1280 reads the file into the core using read().
1281
1282 If file is named "-", fileno(stdin) is used for reading instead.
1283 If you want to read from a real file named "-", use "./-" instead. */
1284
1285 struct file_memory *
wget_read_file(const char * file)1286 wget_read_file (const char *file)
1287 {
1288 int fd;
1289 struct file_memory *fm;
1290 long size;
1291 bool inhibit_close = false;
1292
1293 /* Some magic in the finest tradition of Perl and its kin: if FILE
1294 is "-", just use stdin. */
1295 #ifndef FUZZING
1296 if (HYPHENP (file))
1297 {
1298 fd = fileno (stdin);
1299 inhibit_close = true;
1300 /* Note that we don't inhibit mmap() in this case. If stdin is
1301 redirected from a regular file, mmap() will still work. */
1302 }
1303 else
1304 #endif
1305 fd = open (file, O_RDONLY);
1306 if (fd < 0)
1307 return NULL;
1308 fm = xnew (struct file_memory);
1309
1310 #ifdef HAVE_MMAP
1311 {
1312 struct stat buf;
1313 if (fstat (fd, &buf) < 0)
1314 goto mmap_lose;
1315 fm->length = buf.st_size;
1316 /* NOTE: As far as I know, the callers of this function never
1317 modify the file text. Relying on this would enable us to
1318 specify PROT_READ and MAP_SHARED for a marginal gain in
1319 efficiency, but at some cost to generality. */
1320 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1321 MAP_PRIVATE, fd, 0);
1322 if (fm->content == (char *)MAP_FAILED)
1323 goto mmap_lose;
1324 if (!inhibit_close)
1325 close (fd);
1326
1327 fm->mmap_p = 1;
1328 return fm;
1329 }
1330
1331 mmap_lose:
1332 /* The most common reason why mmap() fails is that FD does not point
1333 to a plain file. However, it's also possible that mmap() doesn't
1334 work for a particular type of file. Therefore, whenever mmap()
1335 fails, we just fall back to the regular method. */
1336 #endif /* HAVE_MMAP */
1337
1338 fm->length = 0;
1339 size = 512; /* number of bytes fm->contents can
1340 hold at any given time. */
1341 fm->content = xmalloc (size);
1342 while (1)
1343 {
1344 wgint nread;
1345 if (fm->length > size / 2)
1346 {
1347 /* #### I'm not sure whether the whole exponential-growth
1348 thing makes sense with kernel read. On Linux at least,
1349 read() refuses to read more than 4K from a file at a
1350 single chunk anyway. But other Unixes might optimize it
1351 better, and it doesn't *hurt* anything, so I'm leaving
1352 it. */
1353
1354 /* Normally, we grow SIZE exponentially to make the number
1355 of calls to read() and realloc() logarithmic in relation
1356 to file size. However, read() can read an amount of data
1357 smaller than requested, and it would be unreasonable to
1358 double SIZE every time *something* was read. Therefore,
1359 we double SIZE only when the length exceeds half of the
1360 entire allocated size. */
1361 size <<= 1;
1362 fm->content = xrealloc (fm->content, size);
1363 }
1364 nread = read (fd, fm->content + fm->length, size - fm->length);
1365 if (nread > 0)
1366 /* Successful read. */
1367 fm->length += nread;
1368 else if (nread < 0)
1369 /* Error. */
1370 goto lose;
1371 else
1372 /* EOF */
1373 break;
1374 }
1375 if (!inhibit_close)
1376 close (fd);
1377 if (size > fm->length && fm->length != 0)
1378 /* Due to exponential growth of fm->content, the allocated region
1379 might be much larger than what is actually needed. */
1380 fm->content = xrealloc (fm->content, fm->length);
1381 fm->mmap_p = 0;
1382 return fm;
1383
1384 lose:
1385 if (!inhibit_close)
1386 close (fd);
1387 xfree (fm->content);
1388 xfree (fm);
1389 return NULL;
1390 }
1391
1392 /* Release the resources held by FM. Specifically, this calls
1393 munmap() or xfree() on fm->content, depending whether mmap or
1394 malloc/read were used to read in the file. It also frees the
1395 memory needed to hold the FM structure itself. */
1396
1397 void
wget_read_file_free(struct file_memory * fm)1398 wget_read_file_free (struct file_memory *fm)
1399 {
1400 #ifdef HAVE_MMAP
1401 if (fm->mmap_p)
1402 {
1403 munmap (fm->content, fm->length);
1404 }
1405 else
1406 #endif
1407 {
1408 xfree (fm->content);
1409 }
1410 xfree (fm);
1411 }
1412
1413 /* Free the pointers in a NULL-terminated vector of pointers, then
1414 free the pointer itself. */
1415 void
free_vec(char ** vec)1416 free_vec (char **vec)
1417 {
1418 if (vec)
1419 {
1420 char **p = vec;
1421 while (*p)
1422 {
1423 xfree (*p);
1424 p++;
1425 }
1426 xfree (vec);
1427 }
1428 }
1429
1430 /* Append vector V2 to vector V1. The function frees V2 and
1431 reallocates V1 (thus you may not use the contents of neither
1432 pointer after the call). If V1 is NULL, V2 is returned. */
1433 char **
merge_vecs(char ** v1,char ** v2)1434 merge_vecs (char **v1, char **v2)
1435 {
1436 int i, j;
1437
1438 if (!v1)
1439 return v2;
1440 if (!v2)
1441 return v1;
1442 if (!*v2)
1443 {
1444 /* To avoid j == 0 */
1445 xfree (v2);
1446 return v1;
1447 }
1448 /* Count v1. */
1449 for (i = 0; v1[i]; i++)
1450 ;
1451 /* Count v2. */
1452 for (j = 0; v2[j]; j++)
1453 ;
1454 /* Reallocate v1. */
1455 v1 = xrealloc (v1, (i + j + 1) * sizeof (char *));
1456 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1457 xfree (v2);
1458 return v1;
1459 }
1460
1461 /* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it
1462 is allocated as needed. Return the new value of the vector. */
1463
1464 char **
vec_append(char ** vec,const char * str)1465 vec_append (char **vec, const char *str)
1466 {
1467 int cnt; /* count of vector elements, including
1468 the one we're about to append */
1469 if (vec != NULL)
1470 {
1471 for (cnt = 0; vec[cnt]; cnt++)
1472 ;
1473 ++cnt;
1474 }
1475 else
1476 cnt = 1;
1477 /* Reallocate the array to fit the new element and the NULL. */
1478 vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1479 /* Append a copy of STR to the vector. */
1480 vec[cnt - 1] = xstrdup (str);
1481 vec[cnt] = NULL;
1482 return vec;
1483 }
1484
1485 /* Sometimes it's useful to create "sets" of strings, i.e. special
1486 hash tables where you want to store strings as keys and merely
1487 query for their existence. Here is a set of utility routines that
1488 makes that transparent. */
1489
1490 void
string_set_add(struct hash_table * ht,const char * s)1491 string_set_add (struct hash_table *ht, const char *s)
1492 {
1493 /* First check whether the set element already exists. If it does,
1494 do nothing so that we don't have to free() the old element and
1495 then strdup() a new one. */
1496 if (hash_table_contains (ht, s))
1497 return;
1498
1499 /* We use "1" as value. It provides us a useful and clear arbitrary
1500 value, and it consumes no memory -- the pointers to the same
1501 string "1" will be shared by all the key-value pairs in all `set'
1502 hash tables. */
1503 hash_table_put (ht, xstrdup (s), "1");
1504 }
1505
1506 /* Synonym for hash_table_contains... */
1507
1508 int
string_set_contains(struct hash_table * ht,const char * s)1509 string_set_contains (struct hash_table *ht, const char *s)
1510 {
1511 return hash_table_contains (ht, s);
1512 }
1513
1514 /* Convert the specified string set to array. ARRAY should be large
1515 enough to hold hash_table_count(ht) char pointers. */
1516
string_set_to_array(struct hash_table * ht,char ** array)1517 void string_set_to_array (struct hash_table *ht, char **array)
1518 {
1519 hash_table_iterator iter;
1520 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1521 *array++ = iter.key;
1522 }
1523
1524 /* Free the string set. This frees both the storage allocated for
1525 keys and the actual hash table. (hash_table_destroy would only
1526 destroy the hash table.) */
1527
1528 void
string_set_free(struct hash_table * ht)1529 string_set_free (struct hash_table *ht)
1530 {
1531 hash_table_iterator iter;
1532 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1533 xfree (iter.key);
1534 hash_table_destroy (ht);
1535 }
1536
1537 /* Utility function: simply call xfree() on all keys and values of HT. */
1538
1539 void
free_keys_and_values(struct hash_table * ht)1540 free_keys_and_values (struct hash_table *ht)
1541 {
1542 hash_table_iterator iter;
1543 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1544 {
1545 xfree (iter.key);
1546 xfree (iter.value);
1547 }
1548 }
1549
1550 /* Get digit grouping data for thousand separors by calling
1551 localeconv(). The data includes separator string and grouping info
1552 and is cached after the first call to the function.
1553
1554 In locales that don't set a thousand separator (such as the "C"
1555 locale), this forces it to be ",". We are now only showing
1556 thousand separators in one place, so this shouldn't be a problem in
1557 practice. */
1558
1559 static void
get_grouping_data(const char ** sep,const char ** grouping)1560 get_grouping_data (const char **sep, const char **grouping)
1561 {
1562 static const char *cached_sep;
1563 static const char *cached_grouping;
1564 static bool initialized;
1565 if (!initialized)
1566 {
1567 /* Get the grouping info from the locale. */
1568 struct lconv *lconv = localeconv ();
1569 cached_sep = lconv->thousands_sep;
1570 cached_grouping = lconv->grouping;
1571 #if ! USE_NLS_PROGRESS_BAR
1572 /* We can't count column widths, so ensure that the separator
1573 * is single-byte only (let check below determine what byte). */
1574 if (strlen(cached_sep) > 1)
1575 cached_sep = "";
1576 #endif
1577 if (!*cached_sep)
1578 {
1579 /* Many locales (such as "C" or "hr_HR") don't specify
1580 grouping, which we still want to use it for legibility.
1581 In those locales set the sep char to ',', unless that
1582 character is used for decimal point, in which case set it
1583 to ".". */
1584 if (*lconv->decimal_point != ',')
1585 cached_sep = ",";
1586 else
1587 cached_sep = ".";
1588 cached_grouping = "\x03";
1589 }
1590 initialized = true;
1591 }
1592 *sep = cached_sep;
1593 *grouping = cached_grouping;
1594 }
1595
1596 /* Return a printed representation of N with thousand separators.
1597 This should respect locale settings, with the exception of the "C"
1598 locale which mandates no separator, but we use one anyway.
1599
1600 Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1601 the separators because it's too non-portable, and it's hard to test
1602 for this feature at configure time. Besides, it wouldn't display
1603 separators in the "C" locale, still used by many Unix users. */
1604
1605 const char *
with_thousand_seps(wgint n)1606 with_thousand_seps (wgint n)
1607 {
1608 static char outbuf[48];
1609 char *p = outbuf + sizeof outbuf;
1610
1611 /* Info received from locale */
1612 const char *grouping, *sep;
1613 int seplen;
1614
1615 /* State information */
1616 int i = 0, groupsize;
1617 const char *atgroup;
1618
1619 bool negative = n < 0;
1620
1621 /* Initialize grouping data. */
1622 get_grouping_data (&sep, &grouping);
1623 seplen = strlen (sep);
1624 atgroup = grouping;
1625 groupsize = *atgroup++;
1626
1627 /* This would overflow on WGINT_MIN, but printing negative numbers
1628 is not an important goal of this fuinction. */
1629 if (negative)
1630 n = -n;
1631
1632 /* Write the number into the buffer, backwards, inserting the
1633 separators as necessary. */
1634 *--p = '\0';
1635 while (1)
1636 {
1637 *--p = n % 10 + '0';
1638 n /= 10;
1639 if (n == 0)
1640 break;
1641 /* Prepend SEP to every groupsize'd digit and get new groupsize. */
1642 if (++i == groupsize)
1643 {
1644 if (seplen == 1)
1645 *--p = *sep;
1646 else
1647 memcpy (p -= seplen, sep, seplen);
1648 i = 0;
1649 if (*atgroup)
1650 groupsize = *atgroup++;
1651 }
1652 }
1653 if (negative)
1654 *--p = '-';
1655
1656 return p;
1657 }
1658
1659 /* N, a byte quantity, is converted to a human-readable abberviated
1660 form a la sizes printed by `ls -lh'. The result is written to a
1661 static buffer, a pointer to which is returned.
1662
1663 Unlike `with_thousand_seps', this approximates to the nearest unit.
1664 Quoting GNU libit: "Most people visually process strings of 3-4
1665 digits effectively, but longer strings of digits are more prone to
1666 misinterpretation. Hence, converting to an abbreviated form
1667 usually improves readability."
1668
1669 This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1670 original computer-related meaning of "powers of 1024". We don't
1671 use the "*bibyte" names invented in 1998, and seldom used in
1672 practice. Wikipedia's entry on "binary prefix" discusses this in
1673 some detail. */
1674
1675 char *
human_readable(wgint n,const int acc,const int decimals)1676 human_readable (wgint n, const int acc, const int decimals)
1677 {
1678 /* These suffixes are compatible with those of GNU `ls -lh'. */
1679 static char powers[] =
1680 {
1681 'K', /* kilobyte, 2^10 bytes */
1682 'M', /* megabyte, 2^20 bytes */
1683 'G', /* gigabyte, 2^30 bytes */
1684 'T', /* terabyte, 2^40 bytes */
1685 'P', /* petabyte, 2^50 bytes */
1686 'E', /* exabyte, 2^60 bytes */
1687 };
1688 static char buf[8];
1689 size_t i;
1690
1691 /* If the quantity is smaller than 1K, just print it. */
1692 if (n < 1024)
1693 {
1694 snprintf (buf, sizeof (buf), "%d", (int) n);
1695 return buf;
1696 }
1697
1698 /* Loop over powers, dividing N with 1024 in each iteration. This
1699 works unchanged for all sizes of wgint, while still avoiding
1700 non-portable `long double' arithmetic. */
1701 for (i = 0; i < countof (powers); i++)
1702 {
1703 /* At each iteration N is greater than the *subsequent* power.
1704 That way N/1024.0 produces a decimal number in the units of
1705 *this* power. */
1706 if ((n / 1024) < 1024 || i == countof (powers) - 1)
1707 {
1708 double val = n / 1024.0;
1709 /* Print values smaller than the accuracy level (acc) with (decimal)
1710 * decimal digits, and others without any decimals. */
1711 snprintf (buf, sizeof (buf), "%.*f%c",
1712 val < acc ? decimals : 0, val, powers[i]);
1713 return buf;
1714 }
1715 n /= 1024;
1716 }
1717 return NULL; /* unreached */
1718 }
1719
1720 /* Count the digits in the provided number. Used to allocate space
1721 when printing numbers. */
1722
1723 int
numdigit(wgint number)1724 numdigit (wgint number)
1725 {
1726 int cnt = 1;
1727 if (number < 0)
1728 ++cnt; /* accommodate '-' */
1729 while ((number /= 10) != 0)
1730 ++cnt;
1731 return cnt;
1732 }
1733
1734 #define PR(mask) *p++ = n / (mask) + '0'
1735
1736 /* DIGITS_<D> is used to print a D-digit number and should be called
1737 with mask==10^(D-1). It prints n/mask (the first digit), reducing
1738 n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1739 Recursively this continues until DIGITS_1 is invoked. */
1740
1741 #define DIGITS_1(mask) PR (mask)
1742 #define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1743 #define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1744 #define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1745 #define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1746 #define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1747 #define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1748 #define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1749 #define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1750 #define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1751
1752 /* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1753
1754 #define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1755 #define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1756 #define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1757 #define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1758 #define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1759 #define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1760 #define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1761 #define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1762 #define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1763
1764 /* Shorthand for casting to wgint. */
1765 #define W wgint
1766
1767 /* Print NUMBER to BUFFER in base 10. This is equivalent to
1768 `sprintf(buffer, "%lld", (long long) number)', only typically much
1769 faster and portable to machines without long long.
1770
1771 The speedup may make a difference in programs that frequently
1772 convert numbers to strings. Some implementations of sprintf,
1773 particularly the one in some versions of GNU libc, have been known
1774 to be quite slow when converting integers to strings.
1775
1776 Return the pointer to the location where the terminating zero was
1777 printed. (Equivalent to calling buffer+strlen(buffer) after the
1778 function is done.)
1779
1780 BUFFER should be large enough to accept as many bytes as you expect
1781 the number to take up. On machines with 64-bit wgints the maximum
1782 needed size is 24 bytes. That includes the digits needed for the
1783 largest 64-bit number, the `-' sign in case it's negative, and the
1784 terminating '\0'. */
1785
1786 char *
number_to_string(char * buffer,wgint number)1787 number_to_string (char *buffer, wgint number)
1788 {
1789 char *p = buffer;
1790 wgint n = number;
1791
1792 int last_digit_char = 0;
1793
1794 if (n < 0)
1795 {
1796 if (n < -WGINT_MAX)
1797 {
1798 /* n = -n would overflow because -n would evaluate to a
1799 wgint value larger than WGINT_MAX. Need to make n
1800 smaller and handle the last digit separately. */
1801 int last_digit = n % 10;
1802 /* The sign of n%10 is implementation-defined. */
1803 if (last_digit < 0)
1804 last_digit_char = '0' - last_digit;
1805 else
1806 last_digit_char = '0' + last_digit;
1807 /* After n is made smaller, -n will not overflow. */
1808 n /= 10;
1809 }
1810
1811 *p++ = '-';
1812 n = -n;
1813 }
1814
1815 /* Use the DIGITS_ macro appropriate for N's number of digits. That
1816 way printing any N is fully open-coded without a loop or jump.
1817 (Also see description of DIGITS_*.) */
1818
1819 if (n < 10) DIGITS_1 (1);
1820 else if (n < 100) DIGITS_2 (10);
1821 else if (n < 1000) DIGITS_3 (100);
1822 else if (n < 10000) DIGITS_4 (1000);
1823 else if (n < 100000) DIGITS_5 (10000);
1824 else if (n < 1000000) DIGITS_6 (100000);
1825 else if (n < 10000000) DIGITS_7 (1000000);
1826 else if (n < 100000000) DIGITS_8 (10000000);
1827 else if (n < 1000000000) DIGITS_9 (100000000);
1828 else if (n < 10*(W)1000000000) DIGITS_10 (1000000000);
1829 else if (n < 100*(W)1000000000) DIGITS_11 (10*(W)1000000000);
1830 else if (n < 1000*(W)1000000000) DIGITS_12 (100*(W)1000000000);
1831 else if (n < 10000*(W)1000000000) DIGITS_13 (1000*(W)1000000000);
1832 else if (n < 100000*(W)1000000000) DIGITS_14 (10000*(W)1000000000);
1833 else if (n < 1000000*(W)1000000000) DIGITS_15 (100000*(W)1000000000);
1834 else if (n < 10000000*(W)1000000000) DIGITS_16 (1000000*(W)1000000000);
1835 else if (n < 100000000*(W)1000000000) DIGITS_17 (10000000*(W)1000000000);
1836 else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1837 else DIGITS_19 (1000000000*(W)1000000000);
1838
1839 if (last_digit_char)
1840 *p++ = last_digit_char;
1841
1842 *p = '\0';
1843
1844 return p;
1845 }
1846
1847 #undef PR
1848 #undef W
1849 #undef SPRINTF_WGINT
1850 #undef DIGITS_1
1851 #undef DIGITS_2
1852 #undef DIGITS_3
1853 #undef DIGITS_4
1854 #undef DIGITS_5
1855 #undef DIGITS_6
1856 #undef DIGITS_7
1857 #undef DIGITS_8
1858 #undef DIGITS_9
1859 #undef DIGITS_10
1860 #undef DIGITS_11
1861 #undef DIGITS_12
1862 #undef DIGITS_13
1863 #undef DIGITS_14
1864 #undef DIGITS_15
1865 #undef DIGITS_16
1866 #undef DIGITS_17
1867 #undef DIGITS_18
1868 #undef DIGITS_19
1869
1870 #define RING_SIZE 3
1871
1872 /* Print NUMBER to a statically allocated string and return a pointer
1873 to the printed representation.
1874
1875 This function is intended to be used in conjunction with printf.
1876 It is hard to portably print wgint values:
1877 a) you cannot use printf("%ld", number) because wgint can be long
1878 long on 32-bit machines with LFS.
1879 b) you cannot use printf("%lld", number) because NUMBER could be
1880 long on 32-bit machines without LFS, or on 64-bit machines,
1881 which do not require LFS. Also, Windows doesn't support %lld.
1882 c) you cannot use printf("%j", (int_max_t) number) because not all
1883 versions of printf support "%j", the most notable being the one
1884 on Windows.
1885 d) you cannot #define WGINT_FMT to the appropriate format and use
1886 printf(WGINT_FMT, number) because that would break translations
1887 for user-visible messages, such as printf("Downloaded: %d
1888 bytes\n", number).
1889
1890 What you should use instead is printf("%s", number_to_static_string
1891 (number)).
1892
1893 CAVEAT: since the function returns pointers to static data, you
1894 must be careful to copy its result before calling it again.
1895 However, to make it more useful with printf, the function maintains
1896 an internal ring of static buffers to return. That way things like
1897 printf("%s %s", number_to_static_string (num1),
1898 number_to_static_string (num2)) work as expected. Three buffers
1899 are currently used, which means that "%s %s %s" will work, but "%s
1900 %s %s %s" won't. If you need to print more than three wgints,
1901 bump the RING_SIZE (or rethink your message.) */
1902
1903 char *
number_to_static_string(wgint number)1904 number_to_static_string (wgint number)
1905 {
1906 static char ring[RING_SIZE][24];
1907 static int ringpos;
1908 char *buf = ring[ringpos];
1909 number_to_string (buf, number);
1910 ringpos = (ringpos + 1) % RING_SIZE;
1911 return buf;
1912 }
1913
1914 /* Converts the byte to bits format if --report-bps option is enabled
1915 */
1916 wgint
convert_to_bits(wgint num)1917 convert_to_bits (wgint num)
1918 {
1919 if (opt.report_bps)
1920 return num * 8;
1921 return num;
1922 }
1923
1924
1925 /* Determine the width of the terminal we're running on. If that's
1926 not possible, return 0. */
1927
1928 int
determine_screen_width(void)1929 determine_screen_width (void)
1930 {
1931 /* If there's a way to get the terminal size using POSIX
1932 tcgetattr(), somebody please tell me. */
1933 #ifdef TIOCGWINSZ
1934 int fd;
1935 struct winsize wsz;
1936
1937 if (opt.lfilename != NULL && opt.show_progress != 1)
1938 return 0;
1939
1940 fd = fileno (stderr);
1941 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1942 return 0; /* most likely ENOTTY */
1943
1944 return wsz.ws_col;
1945 #elif defined(WINDOWS)
1946 CONSOLE_SCREEN_BUFFER_INFO csbi;
1947 if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1948 return 0;
1949 return csbi.dwSize.X;
1950 #else /* neither TIOCGWINSZ nor WINDOWS */
1951 return 0;
1952 #endif /* neither TIOCGWINSZ nor WINDOWS */
1953 }
1954
1955 /* Whether the rnd system (either rand or [dl]rand48) has been
1956 seeded. */
1957 static int rnd_seeded;
1958
1959 /* Return a random number between 0 and MAX-1, inclusive.
1960
1961 If the system does not support lrand48 and MAX is greater than the
1962 value of RAND_MAX+1 on the system, the returned value will be in
1963 the range [0, RAND_MAX]. This may be fixed in a future release.
1964 The random number generator is seeded automatically the first time
1965 it is called.
1966
1967 This uses lrand48 where available, rand elsewhere. DO NOT use it
1968 for cryptography. It is only meant to be used in situations where
1969 quality of the random numbers returned doesn't really matter. */
1970
1971 int
random_number(int max)1972 random_number (int max)
1973 {
1974 #ifdef HAVE_RANDOM
1975 if (!rnd_seeded)
1976 {
1977 srandom ((long) time (NULL) ^ (long) getpid ());
1978 rnd_seeded = 1;
1979 }
1980 return random () % max;
1981 #elif defined HAVE_DRAND48
1982 if (!rnd_seeded)
1983 {
1984 srand48 ((long) time (NULL) ^ (long) getpid ());
1985 rnd_seeded = 1;
1986 }
1987 return lrand48 () % max;
1988 #else /* not HAVE_DRAND48 */
1989
1990 double bounded;
1991 int rnd;
1992 if (!rnd_seeded)
1993 {
1994 srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1995 rnd_seeded = 1;
1996 }
1997 rnd = rand ();
1998
1999 /* Like rand() % max, but uses the high-order bits for better
2000 randomness on architectures where rand() is implemented using a
2001 simple congruential generator. */
2002
2003 bounded = (double) max * rnd / (RAND_MAX + 1.0);
2004 return (int) bounded;
2005
2006 #endif /* not HAVE_DRAND48 */
2007 }
2008
2009 /* Return a random uniformly distributed floating point number in the
2010 [0, 1) range. Uses drand48 where available, and a really lame
2011 kludge elsewhere. */
2012
2013 double
random_float(void)2014 random_float (void)
2015 {
2016 #ifdef HAVE_RANDOM
2017 return ((double) random_number (RAND_MAX)) / RAND_MAX;
2018 #elif defined HAVE_DRAND48
2019 if (!rnd_seeded)
2020 {
2021 srand48 ((long) time (NULL) ^ (long) getpid ());
2022 rnd_seeded = 1;
2023 }
2024 return drand48 ();
2025 #else /* not HAVE_DRAND48 */
2026 return ( random_number (10000) / 10000.0
2027 + random_number (10000) / (10000.0 * 10000.0)
2028 + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
2029 + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
2030 #endif /* not HAVE_DRAND48 */
2031 }
2032
2033 /* Implementation of run_with_timeout, a generic timeout-forcing
2034 routine for systems with Unix-like signal handling. */
2035
2036 #ifdef USE_SIGNAL_TIMEOUT
2037 # ifdef HAVE_SIGSETJMP
2038 # define SETJMP(env) sigsetjmp (env, 1)
2039
2040 static sigjmp_buf run_with_timeout_env;
2041
2042 _Noreturn static void
abort_run_with_timeout(int sig _GL_UNUSED)2043 abort_run_with_timeout (int sig _GL_UNUSED)
2044 {
2045 assert (sig == SIGALRM);
2046 siglongjmp (run_with_timeout_env, -1);
2047 }
2048 # else /* not HAVE_SIGSETJMP */
2049 # define SETJMP(env) setjmp (env)
2050
2051 static jmp_buf run_with_timeout_env;
2052
2053 static void _Noreturn
abort_run_with_timeout(int sig _GL_UNUSED)2054 abort_run_with_timeout (int sig _GL_UNUSED)
2055 {
2056 assert (sig == SIGALRM);
2057 /* We don't have siglongjmp to preserve the set of blocked signals;
2058 if we longjumped out of the handler at this point, SIGALRM would
2059 remain blocked. We must unblock it manually. */
2060 sigset_t set;
2061 sigemptyset (&set);
2062 sigaddset (&set, SIGALRM);
2063 sigprocmask (SIG_BLOCK, &set, NULL);
2064
2065 /* Now it's safe to longjump. */
2066 longjmp (run_with_timeout_env, -1);
2067 }
2068 # endif /* not HAVE_SIGSETJMP */
2069
2070 /* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
2071 setitimer where available, alarm otherwise.
2072
2073 TIMEOUT should be non-zero. If the timeout value is so small that
2074 it would be rounded to zero, it is rounded to the least legal value
2075 instead (1us for setitimer, 1s for alarm). That ensures that
2076 SIGALRM will be delivered in all cases. */
2077
2078 static void
alarm_set(double timeout)2079 alarm_set (double timeout)
2080 {
2081 #ifdef ITIMER_REAL
2082 /* Use the modern itimer interface. */
2083 struct itimerval itv;
2084 xzero (itv);
2085 itv.it_value.tv_sec = (long) timeout;
2086 itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
2087 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
2088 /* Ensure that we wait for at least the minimum interval.
2089 Specifying zero would mean "wait forever". */
2090 itv.it_value.tv_usec = 1;
2091 setitimer (ITIMER_REAL, &itv, NULL);
2092 #else /* not ITIMER_REAL */
2093 /* Use the old alarm() interface. */
2094 int secs = (int) timeout;
2095 if (secs == 0)
2096 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
2097 because alarm(0) means "never deliver the alarm", i.e. "wait
2098 forever", which is not what someone who specifies a 0.5s
2099 timeout would expect. */
2100 secs = 1;
2101 alarm (secs);
2102 #endif /* not ITIMER_REAL */
2103 }
2104
2105 /* Cancel the alarm set with alarm_set. */
2106
2107 static void
alarm_cancel(void)2108 alarm_cancel (void)
2109 {
2110 #ifdef ITIMER_REAL
2111 struct itimerval disable;
2112 xzero (disable);
2113 setitimer (ITIMER_REAL, &disable, NULL);
2114 #else /* not ITIMER_REAL */
2115 alarm (0);
2116 #endif /* not ITIMER_REAL */
2117 }
2118
2119 /* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
2120 seconds. Returns true if the function was interrupted with a
2121 timeout, false otherwise.
2122
2123 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
2124 using setitimer() or alarm(). The timeout is enforced by
2125 longjumping out of the SIGALRM handler. This has several
2126 advantages compared to the traditional approach of relying on
2127 signals causing system calls to exit with EINTR:
2128
2129 * The callback function is *forcibly* interrupted after the
2130 timeout expires, (almost) regardless of what it was doing and
2131 whether it was in a syscall. For example, a calculation that
2132 takes a long time is interrupted as reliably as an IO
2133 operation.
2134
2135 * It works with both SYSV and BSD signals because it doesn't
2136 depend on the default setting of SA_RESTART.
2137
2138 * It doesn't require special handler setup beyond a simple call
2139 to signal(). (It does use sigsetjmp/siglongjmp, but they're
2140 optional.)
2141
2142 The only downside is that, if FUN allocates internal resources that
2143 are normally freed prior to exit from the functions, they will be
2144 lost in case of timeout. */
2145
2146 bool
run_with_timeout(double timeout,void (* fun)(void *),void * arg)2147 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2148 {
2149 int saved_errno;
2150
2151 if (timeout == 0)
2152 {
2153 fun (arg);
2154 return false;
2155 }
2156
2157 if (SETJMP (run_with_timeout_env) != 0)
2158 {
2159 /* Longjumped out of FUN with a timeout. */
2160 signal (SIGALRM, SIG_DFL);
2161 return true;
2162 }
2163 else
2164 {
2165 signal (SIGALRM, abort_run_with_timeout);
2166 }
2167 alarm_set (timeout);
2168 fun (arg);
2169
2170 /* Preserve errno in case alarm() or signal() modifies it. */
2171 saved_errno = errno;
2172 alarm_cancel ();
2173 signal (SIGALRM, SIG_DFL);
2174 errno = saved_errno;
2175
2176 return false;
2177 }
2178
2179 #else /* not USE_SIGNAL_TIMEOUT */
2180
2181 #ifndef WINDOWS
2182 /* A stub version of run_with_timeout that just calls FUN(ARG). Don't
2183 define it under Windows, because Windows has its own version of
2184 run_with_timeout that uses threads. */
2185
2186 bool
run_with_timeout(double timeout,void (* fun)(void *),void * arg)2187 run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2188 {
2189 fun (arg);
2190 return false;
2191 }
2192 #endif /* not WINDOWS */
2193 #endif /* not USE_SIGNAL_TIMEOUT */
2194
2195 #ifndef WINDOWS
2196
2197 /* Sleep the specified amount of seconds. On machines without
2198 nanosleep(), this may sleep shorter if interrupted by signals. */
2199
2200 #if defined FUZZING && defined TESTING
2201 void
xsleep(double seconds)2202 xsleep (double seconds)
2203 {
2204 // Don't wait when fuzzing
2205 }
2206 #else
2207 void
xsleep(double seconds)2208 xsleep (double seconds)
2209 {
2210 #ifdef HAVE_NANOSLEEP
2211 /* nanosleep is the preferred interface because it offers high
2212 accuracy and, more importantly, because it allows us to reliably
2213 restart receiving a signal such as SIGWINCH. (There was an
2214 actual Debian bug report about --limit-rate malfunctioning while
2215 the terminal was being resized.) */
2216 struct timespec sleep, remaining;
2217 sleep.tv_sec = (long) seconds;
2218 sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2219 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2220 /* If nanosleep has been interrupted by a signal, adjust the
2221 sleeping period and return to sleep. */
2222 sleep = remaining;
2223 #elif defined(HAVE_USLEEP)
2224 /* If usleep is available, use it in preference to select. */
2225 if (seconds >= 1)
2226 {
2227 /* On some systems, usleep cannot handle values larger than
2228 1,000,000. If the period is larger than that, use sleep
2229 first, then add usleep for subsecond accuracy. */
2230 sleep (seconds);
2231 seconds -= (long) seconds;
2232 }
2233 usleep (seconds * 1000000);
2234 #else /* fall back select */
2235 /* Note that, although Windows supports select, it can't be used to
2236 implement sleeping because Winsock's select doesn't implement
2237 timeout when it is passed NULL pointers for all fd sets. (But it
2238 does under Cygwin, which implements Unix-compatible select.) */
2239 struct timeval sleep;
2240 sleep.tv_sec = (long) seconds;
2241 sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2242 select (0, NULL, NULL, NULL, &sleep);
2243 /* If select returns -1 and errno is EINTR, it means we were
2244 interrupted by a signal. But without knowing how long we've
2245 actually slept, we can't return to sleep. Using gettimeofday to
2246 track sleeps is slow and unreliable due to clock skew. */
2247 #endif
2248 }
2249 #endif
2250
2251 #endif /* not WINDOWS */
2252
2253 /* Encode the octets in DATA of length LENGTH to base64 format,
2254 storing the result to DEST. The output will be zero-terminated,
2255 and must point to a writable buffer of at least
2256 1+BASE64_LENGTH(length) bytes. The function returns the length of
2257 the resulting base64 data, not counting the terminating zero.
2258
2259 This implementation does not emit newlines after 76 characters of
2260 base64 data. */
2261
2262 size_t
wget_base64_encode(const void * data,size_t length,char * dest)2263 wget_base64_encode (const void *data, size_t length, char *dest)
2264 {
2265 /* Conversion table. */
2266 static const char tbl[64] = {
2267 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2268 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2269 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2270 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2271 };
2272 /* Access bytes in DATA as unsigned char, otherwise the shifts below
2273 don't work for data with MSB set. */
2274 const unsigned char *s = data;
2275 /* Theoretical ANSI violation when length < 3. */
2276 const unsigned char *end = (const unsigned char *) data + length - 2;
2277 char *p = dest;
2278
2279 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
2280 for (; s < end; s += 3)
2281 {
2282 *p++ = tbl[s[0] >> 2];
2283 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2284 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2285 *p++ = tbl[s[2] & 0x3f];
2286 }
2287
2288 /* Pad the result if necessary... */
2289 switch (length % 3)
2290 {
2291 case 1:
2292 *p++ = tbl[s[0] >> 2];
2293 *p++ = tbl[(s[0] & 3) << 4];
2294 *p++ = '=';
2295 *p++ = '=';
2296 break;
2297 case 2:
2298 *p++ = tbl[s[0] >> 2];
2299 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2300 *p++ = tbl[((s[1] & 0xf) << 2)];
2301 *p++ = '=';
2302 break;
2303 }
2304 /* ...and zero-terminate it. */
2305 *p = '\0';
2306
2307 return p - dest;
2308 }
2309
2310 /* Store in C the next non-whitespace character from the string, or \0
2311 when end of string is reached. */
2312 #define NEXT_CHAR(c, p) do { \
2313 c = (unsigned char) *p++; \
2314 } while (c_isspace (c))
2315
2316 #define IS_ASCII(c) (((c) & 0x80) == 0)
2317
2318 /* Decode data from BASE64 (a null-terminated string) into memory
2319 pointed to by DEST. DEST is assumed to be large enough to
2320 accommodate the decoded data, which is guaranteed to be no more than
2321 3/4*strlen(base64).
2322
2323 Since DEST is assumed to contain binary data, it is not
2324 NUL-terminated. The function returns the length of the data
2325 written to "TO". -1 is returned in case of error caused by malformed
2326 base64 input.
2327
2328 This function originates from Free Recode. */
2329
2330 ssize_t
wget_base64_decode(const char * base64,void * dest,size_t size)2331 wget_base64_decode (const char *base64, void *dest, size_t size)
2332 {
2333 /* Table of base64 values for first 128 characters. Note that this
2334 assumes ASCII (but so does Wget in other places). */
2335 static const signed char base64_char_to_value[128] =
2336 {
2337 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
2338 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
2339 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
2340 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
2341 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */
2342 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
2343 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
2344 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
2345 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
2346 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */
2347 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
2348 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
2349 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
2350 };
2351 #define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2352 #define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2353
2354 const char *p = base64;
2355 unsigned char *q = dest;
2356 ssize_t n = 0;
2357
2358 while (1)
2359 {
2360 unsigned char c;
2361 unsigned long value;
2362
2363 /* Process first byte of a quadruplet. */
2364 NEXT_CHAR (c, p);
2365 if (!c)
2366 break;
2367 if (c == '=' || !IS_BASE64 (c))
2368 return -1; /* illegal char while decoding base64 */
2369 value = BASE64_CHAR_TO_VALUE (c) << 18;
2370
2371 /* Process second byte of a quadruplet. */
2372 NEXT_CHAR (c, p);
2373 if (!c)
2374 return -1; /* premature EOF while decoding base64 */
2375 if (c == '=' || !IS_BASE64 (c))
2376 return -1; /* illegal char while decoding base64 */
2377 value |= BASE64_CHAR_TO_VALUE (c) << 12;
2378 if (size)
2379 {
2380 *q++ = value >> 16;
2381 size--;
2382 }
2383 n++;
2384
2385 /* Process third byte of a quadruplet. */
2386 NEXT_CHAR (c, p);
2387 if (!c)
2388 return -1; /* premature EOF while decoding base64 */
2389 if (!IS_BASE64 (c))
2390 return -1; /* illegal char while decoding base64 */
2391
2392 if (c == '=')
2393 {
2394 NEXT_CHAR (c, p);
2395 if (!c)
2396 return -1; /* premature EOF while decoding base64 */
2397 if (c != '=')
2398 return -1; /* padding `=' expected but not found */
2399 continue;
2400 }
2401
2402 value |= BASE64_CHAR_TO_VALUE (c) << 6;
2403 if (size)
2404 {
2405 *q++ = 0xff & value >> 8;
2406 size--;
2407 }
2408 n++;
2409
2410 /* Process fourth byte of a quadruplet. */
2411 NEXT_CHAR (c, p);
2412 if (!c)
2413 return -1; /* premature EOF while decoding base64 */
2414 if (c == '=')
2415 continue;
2416 if (!IS_BASE64 (c))
2417 return -1; /* illegal char while decoding base64 */
2418
2419 value |= BASE64_CHAR_TO_VALUE (c);
2420 if (size)
2421 {
2422 *q++ = 0xff & value;
2423 size--;
2424 }
2425 n++;
2426 }
2427 #undef IS_BASE64
2428 #undef BASE64_CHAR_TO_VALUE
2429
2430 return n;
2431 }
2432
2433 #ifdef HAVE_LIBPCRE2
2434 /* Compiles the PCRE2 regex. */
2435 void *
compile_pcre2_regex(const char * str)2436 compile_pcre2_regex (const char *str)
2437 {
2438 int errornumber;
2439 PCRE2_SIZE erroroffset;
2440 pcre2_code *regex = pcre2_compile((PCRE2_SPTR) str, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
2441 if (! regex)
2442 {
2443 fprintf (stderr, _("Invalid regular expression %s, PCRE2 error %d\n"),
2444 quote (str), errornumber);
2445 }
2446 return regex;
2447 }
2448 #endif
2449
2450 #ifdef HAVE_LIBPCRE
2451 /* Compiles the PCRE regex. */
2452 void *
compile_pcre_regex(const char * str)2453 compile_pcre_regex (const char *str)
2454 {
2455 const char *errbuf;
2456 int erroffset;
2457 pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
2458 if (! regex)
2459 {
2460 fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2461 quote (str), errbuf);
2462 }
2463 return regex;
2464 }
2465 #endif
2466
2467 /* Compiles the POSIX regex. */
2468 void *
compile_posix_regex(const char * str)2469 compile_posix_regex (const char *str)
2470 {
2471 regex_t *regex = xmalloc (sizeof (regex_t));
2472 #ifdef TESTING
2473 /* regcomp might be *very* cpu+memory intensive,
2474 * see https://sourceware.org/glibc/wiki/Security%20Exceptions */
2475 str = "a";
2476 #endif
2477 int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
2478 if (errcode != 0)
2479 {
2480 size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
2481 char *errbuf = xmalloc (errbuf_size);
2482 regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
2483 fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2484 quote (str), errbuf);
2485 xfree (errbuf);
2486 xfree (regex);
2487 return NULL;
2488 }
2489
2490 return regex;
2491 }
2492
2493 #ifdef HAVE_LIBPCRE2
2494 /* Matches a PCRE2 regex. */
2495 bool
match_pcre2_regex(const void * regex,const char * str)2496 match_pcre2_regex (const void *regex, const char *str)
2497 {
2498 int rc;
2499 pcre2_match_data *match_data;
2500
2501 match_data = pcre2_match_data_create_from_pattern(regex, NULL);
2502
2503 if (match_data)
2504 {
2505 rc = pcre2_match(regex, (PCRE2_SPTR) str, strlen(str), 0, 0, match_data, NULL);
2506 pcre2_match_data_free(match_data);
2507 }
2508 else
2509 rc = PCRE2_ERROR_NOMEMORY;
2510
2511 if (rc < 0 && rc != PCRE2_ERROR_NOMATCH)
2512 {
2513 logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2514 quote (str), rc);
2515 }
2516
2517 return rc >= 0;
2518 }
2519 #endif
2520
2521 #ifdef HAVE_LIBPCRE
2522 #define OVECCOUNT 30
2523 /* Matches a PCRE regex. */
2524 bool
match_pcre_regex(const void * regex,const char * str)2525 match_pcre_regex (const void *regex, const char *str)
2526 {
2527 size_t l = strlen (str);
2528 int ovector[OVECCOUNT];
2529
2530 int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT);
2531 if (rc == PCRE_ERROR_NOMATCH)
2532 return false;
2533 else if (rc < 0)
2534 {
2535 logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2536 quote (str), rc);
2537 return false;
2538 }
2539 else
2540 return true;
2541 }
2542 #undef OVECCOUNT
2543 #endif
2544
2545 /* Matches a POSIX regex. */
2546 bool
match_posix_regex(const void * regex,const char * str)2547 match_posix_regex (const void *regex, const char *str)
2548 {
2549 int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
2550 if (rc == REG_NOMATCH)
2551 return false;
2552 else if (rc == 0)
2553 return true;
2554 else
2555 {
2556 size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
2557 char *errbuf = xmalloc (errbuf_size);
2558 regerror (rc, opt.acceptregex, errbuf, errbuf_size);
2559 logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2560 quote (str), rc);
2561 xfree (errbuf);
2562 return false;
2563 }
2564 }
2565
2566 #undef IS_ASCII
2567 #undef NEXT_CHAR
2568
2569 /* Simple merge sort for use by stable_sort. Implementation courtesy
2570 Zeljko Vrba with additional debugging by Nenad Barbutov. */
2571
2572 static void
mergesort_internal(void * base,void * temp,size_t size,size_t from,size_t to,int (* cmpfun)(const void *,const void *))2573 mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2574 int (*cmpfun) (const void *, const void *))
2575 {
2576 #define ELT(array, pos) ((char *)(array) + (pos) * size)
2577 if (from < to)
2578 {
2579 size_t i, j, k;
2580 size_t mid = (to + from) / 2;
2581 mergesort_internal (base, temp, size, from, mid, cmpfun);
2582 mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2583 i = from;
2584 j = mid + 1;
2585 for (k = from; (i <= mid) && (j <= to); k++)
2586 if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2587 memcpy (ELT (temp, k), ELT (base, i++), size);
2588 else
2589 memcpy (ELT (temp, k), ELT (base, j++), size);
2590 while (i <= mid)
2591 memcpy (ELT (temp, k++), ELT (base, i++), size);
2592 while (j <= to)
2593 memcpy (ELT (temp, k++), ELT (base, j++), size);
2594 for (k = from; k <= to; k++)
2595 memcpy (ELT (base, k), ELT (temp, k), size);
2596 }
2597 #undef ELT
2598 }
2599
2600 /* Stable sort with interface exactly like standard library's qsort.
2601 Uses mergesort internally. */
2602
2603 void
stable_sort(void * base,size_t nmemb,size_t size,int (* cmpfun)(const void *,const void *))2604 stable_sort (void *base, size_t nmemb, size_t size,
2605 int (*cmpfun) (const void *, const void *))
2606 {
2607 if (nmemb > 1 && size > 1)
2608 {
2609 void *temp = xmalloc (nmemb * size);
2610 mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2611 xfree(temp);
2612 }
2613 }
2614
2615 /* Print a decimal number. If it is equal to or larger than ten, the
2616 number is rounded. Otherwise it is printed with one significant
2617 digit without trailing zeros and with no more than three fractional
2618 digits total. For example, 0.1 is printed as "0.1", 0.035 is
2619 printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2620
2621 This is useful for displaying durations because it provides
2622 order-of-magnitude information without unnecessary clutter --
2623 long-running downloads are shown without the fractional part, and
2624 short ones still retain one significant digit. */
2625
2626 const char *
print_decimal(double number)2627 print_decimal (double number)
2628 {
2629 static char buf[32];
2630 double n = number >= 0 ? number : -number;
2631
2632 if (n >= 9.95)
2633 /* Cut off at 9.95 because the below %.1f would round 9.96 to
2634 "10.0" instead of "10". OTOH 9.94 will print as "9.9". */
2635 snprintf (buf, sizeof buf, "%.0f", number);
2636 else if (n >= 0.95)
2637 snprintf (buf, sizeof buf, "%.1f", number);
2638 else if (n >= 0.001)
2639 snprintf (buf, sizeof buf, "%.1g", number);
2640 else if (n >= 0.0005)
2641 /* round [0.0005, 0.001) to 0.001 */
2642 snprintf (buf, sizeof buf, "%.3f", number);
2643 else
2644 /* print numbers close to 0 as 0, not 0.000 */
2645 strcpy (buf, "0");
2646
2647 return buf;
2648 }
2649
2650 /* Get the maximum name length for the given path. */
2651 /* Return 0 if length is unknown. */
2652 long
get_max_length(const char * path,int length,int name)2653 get_max_length (const char *path, int length, int name)
2654 {
2655 long ret;
2656 char *p, *d;
2657
2658 /* Make a copy of the path that we can modify. */
2659 p = path ? strdupdelim (path, path + length) : strdup ("");
2660
2661 for (;;)
2662 {
2663 errno = 0;
2664 /* For an empty path query the current directory. */
2665 #if HAVE_PATHCONF
2666 ret = pathconf (*p ? p : ".", name);
2667 if (!(ret < 0 && errno == ENOENT))
2668 break;
2669 #else
2670 ret = PATH_MAX;
2671 #endif
2672
2673 /* The path does not exist yet, but may be created. */
2674 /* Already at current or root directory, give up. */
2675 if (!*p || strcmp (p, "/") == 0)
2676 break;
2677
2678 /* Remove one directory level and try again. */
2679 d = strrchr (p, '/');
2680 if (d == p)
2681 p[1] = '\0'; /* check root directory */
2682 else if (d)
2683 *d = '\0'; /* remove last directory part */
2684 else
2685 *p = '\0'; /* check current directory */
2686 }
2687
2688 xfree (p);
2689
2690 if (ret < 0)
2691 {
2692 /* pathconf() has a message for us. */
2693 if (errno != 0)
2694 perror ("pathconf");
2695
2696 /* If (errno == 0) then there is no max length.
2697 Even on error return 0 so the caller can continue. */
2698 return 0;
2699 }
2700
2701 return ret;
2702 }
2703
2704 void
wg_hex_to_string(char * str_buffer,const char * hex_buffer,size_t hex_len)2705 wg_hex_to_string (char *str_buffer, const char *hex_buffer, size_t hex_len)
2706 {
2707 size_t i;
2708
2709 for (i = 0; i < hex_len; i++)
2710 {
2711 /* Each byte takes 2 characters. */
2712 sprintf (str_buffer + 2 * i, "%02x", (unsigned) (hex_buffer[i] & 0xFF));
2713 }
2714
2715 /* Null-terminate result. */
2716 str_buffer[2 * i] = '\0';
2717 }
2718
2719 #ifdef HAVE_SSL
2720
2721 /*
2722 * Public key pem to der conversion
2723 */
2724
2725 static bool
wg_pubkey_pem_to_der(const char * pem,unsigned char ** der,size_t * der_len)2726 wg_pubkey_pem_to_der (const char *pem, unsigned char **der, size_t *der_len)
2727 {
2728 char *stripped_pem, *begin_pos, *end_pos;
2729 size_t pem_count, stripped_pem_count = 0, pem_len;
2730 ssize_t size;
2731 unsigned char *base64data;
2732
2733 *der = NULL;
2734 *der_len = 0;
2735
2736 /* if no pem, exit. */
2737 if (!pem)
2738 return false;
2739
2740 begin_pos = strstr (pem, "-----BEGIN PUBLIC KEY-----");
2741 if (!begin_pos)
2742 return false;
2743
2744 pem_count = begin_pos - pem;
2745 /* Invalid if not at beginning AND not directly following \n */
2746 if (0 != pem_count && '\n' != pem[pem_count - 1])
2747 return false;
2748
2749 /* 26 is length of "-----BEGIN PUBLIC KEY-----" */
2750 pem_count += 26;
2751
2752 /* Invalid if not directly following \n */
2753 end_pos = strstr (pem + pem_count, "\n-----END PUBLIC KEY-----");
2754 if (!end_pos)
2755 return false;
2756
2757 pem_len = end_pos - pem;
2758
2759 stripped_pem = xmalloc (pem_len - pem_count + 1);
2760
2761 /*
2762 * Here we loop through the pem array one character at a time between the
2763 * correct indices, and place each character that is not '\n' or '\r'
2764 * into the stripped_pem array, which should represent the raw base64 string
2765 */
2766 while (pem_count < pem_len) {
2767 if ('\n' != pem[pem_count] && '\r' != pem[pem_count])
2768 stripped_pem[stripped_pem_count++] = pem[pem_count];
2769 ++pem_count;
2770 }
2771 /* Place the null terminator in the correct place */
2772 stripped_pem[stripped_pem_count] = '\0';
2773
2774 base64data = xmalloc (BASE64_LENGTH(stripped_pem_count));
2775
2776 size = wget_base64_decode (stripped_pem, base64data, BASE64_LENGTH(stripped_pem_count));
2777
2778 if (size < 0) {
2779 xfree (base64data); /* malformed base64 from server */
2780 } else {
2781 *der = base64data;
2782 *der_len = (size_t) size;
2783 }
2784
2785 xfree (stripped_pem);
2786
2787 return *der_len > 0;
2788 }
2789
2790 /*
2791 * Generic pinned public key check.
2792 */
2793
2794 bool
wg_pin_peer_pubkey(const char * pinnedpubkey,const char * pubkey,size_t pubkeylen)2795 wg_pin_peer_pubkey (const char *pinnedpubkey, const char *pubkey, size_t pubkeylen)
2796 {
2797 struct file_memory *fm;
2798 unsigned char *buf = NULL, *pem_ptr = NULL;
2799 size_t size, pem_len;
2800 bool pem_read;
2801 bool result = false;
2802
2803 size_t pinkeylen;
2804 ssize_t decoded_hash_length;
2805 char *pinkeycopy, *begin_pos, *end_pos;
2806 unsigned char *sha256sumdigest = NULL, *expectedsha256sumdigest = NULL;
2807
2808 /* if a path wasn't specified, don't pin */
2809 if (!pinnedpubkey)
2810 return true;
2811 if (!pubkey || !pubkeylen)
2812 return result;
2813
2814 /* only do this if pinnedpubkey starts with "sha256//", length 8 */
2815 if (strncmp (pinnedpubkey, "sha256//", 8) == 0)
2816 {
2817 /* compute sha256sum of public key */
2818 sha256sumdigest = xmalloc (SHA256_DIGEST_SIZE);
2819 sha256_buffer (pubkey, pubkeylen, sha256sumdigest);
2820 expectedsha256sumdigest = xmalloc (SHA256_DIGEST_SIZE);
2821
2822 /* it starts with sha256//, copy so we can modify it */
2823 pinkeylen = strlen (pinnedpubkey) + 1;
2824 pinkeycopy = xmalloc (pinkeylen);
2825 memcpy (pinkeycopy, pinnedpubkey, pinkeylen);
2826
2827 /* point begin_pos to the copy, and start extracting keys */
2828 begin_pos = pinkeycopy;
2829 do
2830 {
2831 end_pos = strstr (begin_pos, ";sha256//");
2832 /*
2833 * if there is an end_pos, null terminate,
2834 * otherwise it'll go to the end of the original string
2835 */
2836 if (end_pos)
2837 end_pos[0] = '\0';
2838
2839 /* decode base64 pinnedpubkey, 8 is length of "sha256//" */
2840 decoded_hash_length = wget_base64_decode (begin_pos + 8, expectedsha256sumdigest, SHA256_DIGEST_SIZE);
2841
2842 /* if valid base64, compare sha256 digests directly */
2843 if (SHA256_DIGEST_SIZE == decoded_hash_length)
2844 {
2845 if (!memcmp (sha256sumdigest, expectedsha256sumdigest, SHA256_DIGEST_SIZE))
2846 {
2847 result = true;
2848 break;
2849 }
2850 }
2851 else
2852 logprintf (LOG_VERBOSE, _ ("Skipping key with wrong size (%d/%d): %s\n"),
2853 (int) (strlen (begin_pos + 8) * 3) / 4, SHA256_DIGEST_SIZE,
2854 quote (begin_pos + 8));
2855
2856 /*
2857 * change back the null-terminator we changed earlier,
2858 * and look for next begin
2859 */
2860 if (end_pos)
2861 {
2862 end_pos[0] = ';';
2863 begin_pos = strstr (end_pos, "sha256//");
2864 }
2865 }
2866 while (end_pos && begin_pos);
2867
2868 xfree (sha256sumdigest);
2869 xfree (expectedsha256sumdigest);
2870 xfree (pinkeycopy);
2871
2872 return result;
2873 }
2874
2875 /* fall back to assuming this is a file path */
2876 fm = wget_read_file (pinnedpubkey);
2877 if (!fm)
2878 return result;
2879
2880 /* Check the file's size */
2881 if (fm->length < 0 || fm->length > MAX_PINNED_PUBKEY_SIZE)
2882 goto cleanup;
2883
2884 /*
2885 * if the size of our certificate is bigger than the file
2886 * size then it can't match
2887 */
2888 size = (size_t) fm->length;
2889 if (pubkeylen > size)
2890 goto cleanup;
2891
2892 /* If the sizes are the same, it can't be base64 encoded, must be der */
2893 if (pubkeylen == size)
2894 {
2895 if (!memcmp (pubkey, fm->content, pubkeylen))
2896 result = true;
2897 goto cleanup;
2898 }
2899
2900 /*
2901 * Otherwise we will assume it's PEM and try to decode it
2902 * after placing null terminator
2903 */
2904 buf = xmalloc (size + 1);
2905 memcpy (buf, fm->content, size);
2906 buf[size] = '\0';
2907
2908 pem_read = wg_pubkey_pem_to_der ((const char *) buf, &pem_ptr, &pem_len);
2909 /* if it wasn't read successfully, exit */
2910 if (!pem_read)
2911 goto cleanup;
2912
2913 /*
2914 * if the size of our certificate doesn't match the size of
2915 * the decoded file, they can't be the same, otherwise compare
2916 */
2917 if (pubkeylen == pem_len && !memcmp (pubkey, pem_ptr, pubkeylen))
2918 result = true;
2919
2920 cleanup:
2921 xfree (buf);
2922 xfree (pem_ptr);
2923 wget_read_file_free (fm);
2924
2925 return result;
2926 }
2927
2928 #endif /* HAVE_SSL */
2929
2930 #ifdef TESTING
2931
2932 const char *
test_subdir_p(void)2933 test_subdir_p(void)
2934 {
2935 static const struct {
2936 const char *d1;
2937 const char *d2;
2938 bool result;
2939 } test_array[] = {
2940 { "/somedir", "/somedir", true },
2941 { "/somedir", "/somedir/d2", true },
2942 { "/somedir/d1", "/somedir", false },
2943 };
2944 unsigned i;
2945
2946 for (i = 0; i < countof(test_array); ++i)
2947 {
2948 bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2949
2950 mu_assert ("test_subdir_p: wrong result",
2951 res == test_array[i].result);
2952 }
2953
2954 return NULL;
2955 }
2956
2957 const char *
test_dir_matches_p(void)2958 test_dir_matches_p(void)
2959 {
2960 static struct {
2961 const char *dirlist[3];
2962 const char *dir;
2963 bool result;
2964 } test_array[] = {
2965 { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2966 { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2967 { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2968 { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2969 { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2970 { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2971 { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2972 { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2973 { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2974 { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2975 { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2976 { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2977 { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2978 { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2979 { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2980 };
2981 unsigned i;
2982
2983 for (i = 0; i < countof(test_array); ++i)
2984 {
2985 bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2986
2987 mu_assert ("test_dir_matches_p: wrong result",
2988 res == test_array[i].result);
2989 }
2990
2991 return NULL;
2992 }
2993
2994 #endif /* TESTING */
2995