1 /* Miscellaneous declarations.
2    Copyright (C) 1996-2011, 2015, 2018-2021 Free Software Foundation,
3    Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 /* This file contains declarations that are universally useful and
32    those that don't fit elsewhere.  It also includes sysdep.h which
33    includes some often-needed system includes, like the obnoxious
34    <time.h> inclusion.  */
35 
36 #ifndef WGET_H
37 #define WGET_H
38 
39 #include "config.h"
40 
41 #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
42 # define WINDOWS
43 #endif
44 
45 /* Include these, so random files need not include them.  */
46 #include "sysdep.h"
47 
48 /* Disable assertions when debug support is not compiled in. */
49 #ifndef ENABLE_DEBUG
50 #ifndef NDEBUG
51 # define NDEBUG
52 #endif
53 #endif
54 
55 /* Is OpenSSL or GNUTLS available? */
56 #if defined HAVE_LIBSSL || defined HAVE_LIBSSL32 || defined HAVE_LIBGNUTLS
57 # define HAVE_SSL
58 # define HAVE_HSTS /* There's no sense in enabling HSTS without SSL */
59 #endif
60 
61 /* `gettext (FOO)' is long to write, so we use `_(FOO)'.  If NLS is
62    unavailable, _(STRING) simply returns STRING.  */
63 #include "gettext.h"
64 #define _(STRING) gettext(STRING)
65 
66 /* A pseudo function call that serves as a marker for the automated
67    extraction of messages, but does not call gettext().  The run-time
68    translation is done at a different place in the code.  The purpose
69    of the N_("...") call is to make the message snarfer aware that the
70    "..." string needs to be translated.  STRING should be a string
71    literal.  Concatenated strings and other string expressions won't
72    work.  The macro's expansion is not parenthesized, so that it is
73    suitable as initializer for static 'char[]' or 'const char[]'
74    variables.  -- explanation partly taken from GNU make.  */
75 #define N_(string) string
76 
77 #if HAVE_WCWIDTH && HAVE_MBTOWC
78 # define USE_NLS_PROGRESS_BAR 1
79 #else
80 /* Just to be a little paranoid about it. */
81 # undef  USE_NLS_PROGRESS_BAR
82 #endif
83 
84 /* I18N NOTE: You will notice that none of the DEBUGP messages are
85    marked as translatable.  This is intentional, for a few reasons:
86 
87    1) The debug messages are not meant for the users to look at, but
88    for the developers; as such, they should be considered more like
89    source comments than real program output.
90 
91    2) The messages are numerous, and yet they are random and frivolous
92    ("double yuck!" and such).  There would be a lot of work with no
93    gain.
94 
95    3) Finally, the debug messages are meant to be a clue for me to
96    debug problems with Wget.  If I get them in a language I don't
97    understand, debugging will become a new challenge of its own!  */
98 
99 /* locale independent replacement for ctype.h */
100 #include "c-ctype.h"
101 
102 /* Conditionalize the use of GCC's __attribute__((format)) and
103    __builtin_expect features using macros.  */
104 
105 #if defined(__GNUC__) && __GNUC__ >= 3
106 # define GCC_FORMAT_ATTR(a, b) __attribute__ ((format (printf, a, b)))
107 # define LIKELY(exp)   __builtin_expect (!!(exp), 1)
108 # define UNLIKELY(exp) __builtin_expect ((exp), 0)
109 #else
110 # define GCC_FORMAT_ATTR(a, b)
111 # define LIKELY(exp)   (exp)
112 # define UNLIKELY(exp) (exp)
113 #endif
114 
115 /* Execute the following statement if debugging is both enabled at
116    compile-time and requested at run-time; a no-op otherwise.  */
117 
118 #ifdef ENABLE_DEBUG
119 # define IF_DEBUG if (UNLIKELY (opt.debug))
120 #else
121 # define IF_DEBUG if (0)
122 #endif
123 
124 /* Print ARGS if debugging is enabled and requested, otherwise do
125    nothing.  This must be called with an extra level of parentheses
126    because it's not possible to pass a variable number of arguments to
127    a macro (in portable C89).  ARGS are like arguments to printf.  */
128 
129 #define DEBUGP(args) do { IF_DEBUG { debug_logprintf args; } } while (0)
130 
131 /* Pick an integer type large enough for file sizes, content lengths,
132    and such.  Because today's files can be very large, it should be a
133    signed integer at least 64 bits wide.  This can't be typedeffed to
134    off_t because: a) off_t is always 32-bit on Windows, and b) we
135    don't necessarily want to tie having a 64-bit type for internal
136    calculations to having LFS support.  */
137 
138 /* Gnulib's stdint.h module essentially guarantees the existence of int64_t.
139  * Thus we can simply assume it always exists and use it.
140  */
141 #include <stdint.h>
142 
143 typedef int64_t wgint;
144 #define WGINT_MIN INT64_MIN
145 #define WGINT_MAX INT64_MAX
146 
147 #define str_to_wgint strtoll
148 
149 #include "options.h"
150 
151 /* Everything uses this, so include them here directly.  */
152 #ifdef __cplusplus
153 #  undef _Noreturn
154 #endif
155 #include "xalloc.h"
156 
157 /* Likewise for logging functions.  */
158 #include "log.h"
159 
160 /* Likewise for quoting functions.  */
161 #include "quote.h"
162 #include "quotearg.h"
163 
164 /* Likewise for struct iri definition */
165 #include "iri.h"
166 
167 /* Useful macros used across the code: */
168 
169 /* The number of elements in an array.  For example:
170    static char a[] = "foo";     -- countof(a) == 4 (note terminating \0)
171    int a[5] = {1, 2};           -- countof(a) == 5
172    char *a[] = {                -- countof(a) == 3
173      "foo", "bar", "baz"
174    }; */
175 #define countof(array) (sizeof (array) / sizeof ((array)[0]))
176 
177 /* Zero out a value.  */
178 #define xzero(x) memset (&(x), '\0', sizeof (x))
179 
180 /* Convert an ASCII hex digit to the corresponding number between 0
181    and 15.  c should be a hexadecimal digit that satisfies c_isxdigit;
182    otherwise, the result is undefined.  */
_unhex(unsigned char c)183 static inline unsigned char _unhex(unsigned char c)
184 {
185 	return c <= '9' ? c - '0' : (c <= 'F' ? c - 'A' + 10 : c - 'a' + 10);
186 }
187 #define X2DIGITS_TO_NUM(h1, h2) ((_unhex (h1) << 4) + _unhex (h2))
188 
189 /* The reverse of the above: convert a number in the [0, 16) range to
190    the ASCII representation of the corresponding hexadecimal digit.
191    `+ 0' is there so you can't accidentally use it as an lvalue.  */
192 #define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0)
193 #define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0)
194 
195 /* Return non-zero if string bounded between BEG and END is equal to
196    STRING_LITERAL.  The comparison is case-sensitive.  */
197 #define BOUNDED_EQUAL(beg, end, string_literal)             \
198   ((end) - (beg) == sizeof (string_literal) - 1             \
199    && !memcmp (beg, string_literal, sizeof (string_literal) - 1))
200 
201 /* The same as above, except the comparison is case-insensitive. */
202 #define BOUNDED_EQUAL_NO_CASE(beg, end, string_literal)         \
203   ((end) - (beg) == sizeof (string_literal) - 1                 \
204    && !c_strncasecmp (beg, string_literal, sizeof (string_literal) - 1))
205 
206 /* Generally useful if you want to avoid arbitrary size limits but
207    don't need a full dynamic array.  Assumes that BASEVAR points to a
208    malloced array of TYPE objects (or possibly a NULL pointer, if
209    SIZEVAR is 0), with the total size stored in SIZEVAR.  This macro
210    will realloc BASEVAR as necessary so that it can hold at least
211    NEEDED_SIZE objects.  The reallocing is done by doubling, which
212    ensures constant amortized time per element.  */
213 
214 #define DO_REALLOC(basevar, sizevar, needed_size, type) do {    \
215   long DR_needed_size = (needed_size);                          \
216   long DR_newsize = 0;                                          \
217   while ((sizevar) < (DR_needed_size)) {                        \
218     DR_newsize = sizevar << 1;                                  \
219     if (DR_newsize < 16)                                        \
220       DR_newsize = 16;                                          \
221     (sizevar) = DR_newsize;                                     \
222   }                                                             \
223   if (DR_newsize)                                               \
224     basevar = xrealloc (basevar, DR_newsize * sizeof (type));   \
225 } while (0)
226 
227 /* Used to print pointers (usually for debugging).  Print pointers
228    using printf ("0x%0*lx", PTR_FORMAT (p)).  (%p is too unpredictable;
229    some implementations prepend 0x, while some don't, and most don't
230    0-pad the address.)  */
231 #define PTR_FORMAT(p) (int) (2 * sizeof (void *)), (unsigned long) (p)
232 
233 /* Find the maximum buffer length needed to print an integer of type `x'
234    in base 10. 24082 / 10000 = 8*log_{10}(2).  */
235 #define MAX_INT_TO_STRING_LEN(x) ((sizeof(x) * 24082 / 10000) + 2)
236 
237 /* Find the minimum or maximum of two provided values */
238 # define MIN(i, j) ((i) <= (j) ? (i) : (j))
239 # define MAX(i, j) ((i) >= (j) ? (i) : (j))
240 
241 
242 extern const char *exec_name;
243 extern const char *program_name;
244 extern const char *program_argstring;
245 
246 /* Document type ("dt") flags */
247 enum
248 {
249   TEXTHTML             = 0x0001,        /* document is of type text/html
250                                            or application/xhtml+xml */
251   RETROKF              = 0x0002,        /* retrieval was OK */
252   HEAD_ONLY            = 0x0004,        /* only send the HEAD request */
253   SEND_NOCACHE         = 0x0008,        /* send Cache-Control: no-cache and Pragma: no-cache directive */
254   ACCEPTRANGES         = 0x0010,        /* Accept-ranges header was found */
255   ADDED_HTML_EXTENSION = 0x0020,        /* added ".html" extension due to -E */
256   TEXTCSS              = 0x0040,        /* document is of type text/css */
257   IF_MODIFIED_SINCE    = 0x0080,        /* use if-modified-since header */
258   METALINK_METADATA    = 0x0100         /* use HTTP response for Metalink metadata */
259 };
260 
261 /* Universal error type -- used almost everywhere.  Error reporting of
262    this detail is not generally used or needed and should be
263    simplified.  */
264 typedef enum
265 {
266   NOCONERROR, HOSTERR, CONSOCKERR, CONERROR, CONSSLERR,
267   CONIMPOSSIBLE, NEWLOCATION,
268   FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR, FTPSYSERR,
269   FTPNSFOD, FTPUNKNOWNTYPE, FTPRERR,
270   FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR, FOPENERR,
271   FOPEN_EXCL_ERR, FWRITEERR, HEOF, GATEWAYTIMEOUT,
272   HERR, RETROK, RECLEVELEXC, WRONGCODE,
273   FTPINVPASV, FTPNOPASV, FTPNOPBSZ, FTPNOPROT, FTPNOAUTH,
274   CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED,
275   READERR, TRYLIMEXC, FILEBADFILE, RANGEERR,
276   RETRBADPATTERN, PROXERR,
277   AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR,
278   UNLINKERR, NEWLOCATION_KEEP_POST, CLOSEFAILED, ATTRMISSING, UNKNOWNATTR,
279   WARC_ERR, WARC_TMP_FOPENERR, WARC_TMP_FWRITEERR,
280   TIMECONV_ERR,
281   METALINK_PARSE_ERROR, METALINK_RETR_ERROR,
282   METALINK_CHKSUM_ERROR, METALINK_SIG_ERROR, METALINK_MISSING_RESOURCE,
283   RETR_WITH_METALINK,
284   METALINK_SIZE_ERROR
285 } uerr_t;
286 
287 /* 2005-02-19 SMS.
288    Select an appropriate "orig" suffix and a separator character for
289    adding a unique suffix to a file name.
290 
291    A VMS ODS2 file system can't tolerate multiple dots.  An ODS5 file
292    system can, but even there not all dots are equal, and heroic effort
293    would be needed to get ".html^.orig" rather than (the less desirable)
294    "^.html.orig".  It's more satisfactory always to use "_orig" on VMS
295    (rather than including "vms.h", testing "ods5_dest", and acting
296    accordingly).
297 
298    Note that code in various places assumes that this string is five
299    characters long.
300 */
301 # ifdef __VMS
302 #  define ORIG_SFX "_orig"
303 # else /* def __VMS */
304 #  define ORIG_SFX ".orig"
305 # endif /* def __VMS [else] */
306 
307 /* ".NNN" unique-ifying suffix separator character for unique_name() in
308    url.c (and anywhere else).  Note that on VMS, the file system's
309    version numbers solve the problem that unique_name() is designed to
310    handle, obviating this whole exercise.  Other systems may specify a
311    character different from "." here, if desired.
312 */
313 # ifndef __VMS
314 #  define UNIQ_SEP '.'
315 # endif /* ndef __VMS */
316 
317 #if defined FUZZING && defined TESTING
318 /* Rename fopen so we can have our own version in fuzz/main.c to
319    not create random files. */
320 #  define fopen(fp, mode) fopen_wget(fp, mode)
321 #  define exit(status) exit_wget(status)
322 
323 /* In run_wgetrc() we call fopen_wgetrc() instead of fopen, so we can catch
324    the call in our fuzzers. */
325 FILE *fopen_wget(const char *pathname, const char *mode);
326 FILE *fopen_wgetrc(const char *pathname, const char *mode);
327 void exit_wget(int status);
328 #else
329 /* When not fuzzing, we want to call fopen() instead of fopen_wgetrc() */
330 #  define fopen_wgetrc(fp, mode) fopen(fp, mode)
331 #endif /* FUZZING && TESTING */
332 
333 #endif /* WGET_H */
334