1 /* HTTP support.
2    Copyright (C) 1996-2012, 2014-2015, 2018-2021 Free Software
3    Foundation, Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 #include "wget.h"
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <assert.h>
38 #include <errno.h>
39 #include <time.h>
40 #include <locale.h>
41 #include <fcntl.h>
42 
43 #include "hash.h"
44 #include "http.h"
45 #include "hsts.h"
46 #include "utils.h"
47 #include "url.h"
48 #include "host.h"
49 #include "retr.h"
50 #include "connect.h"
51 #include "netrc.h"
52 #ifdef HAVE_SSL
53 # include "ssl.h"
54 #endif
55 #ifdef ENABLE_NTLM
56 # include "http-ntlm.h"
57 #endif
58 #include "cookies.h"
59 #include "md5.h"
60 #include "convert.h"
61 #include "spider.h"
62 #include "warc.h"
63 #include "c-strcase.h"
64 #include "version.h"
65 #include "xstrndup.h"
66 #ifdef HAVE_METALINK
67 # include "metalink.h"
68 #endif
69 #ifdef ENABLE_XATTR
70 #include "xattr.h"
71 #endif
72 
73 #ifdef TESTING
74 #include "../tests/unit-tests.h"
75 #endif
76 
77 #ifdef __VMS
78 # include "vms.h"
79 #endif /* def __VMS */
80 
81 
82 /* Forward decls. */
83 struct http_stat;
84 static char *create_authorization_line (const char *, const char *,
85                                         const char *, const char *,
86                                         const char *, bool *, uerr_t *);
87 static char *basic_authentication_encode (const char *, const char *);
88 static bool known_authentication_scheme_p (const char *, const char *);
89 static void ensure_extension (struct http_stat *, const char *, int *);
90 static void load_cookies (void);
91 
92 static bool cookies_loaded_p;
93 static struct cookie_jar *wget_cookie_jar;
94 
95 #define TEXTHTML_S "text/html"
96 #define TEXTXHTML_S "application/xhtml+xml"
97 #define TEXTCSS_S "text/css"
98 
99 /* Some status code validation macros: */
100 #define H_10X(x)        (((x) >= 100) && ((x) < 200))
101 #define H_20X(x)        (((x) >= 200) && ((x) < 300))
102 #define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
103 #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY          \
104                          || (x) == HTTP_STATUS_MOVED_TEMPORARILY       \
105                          || (x) == HTTP_STATUS_SEE_OTHER               \
106                          || (x) == HTTP_STATUS_TEMPORARY_REDIRECT      \
107                          || (x) == HTTP_STATUS_PERMANENT_REDIRECT)
108 
109 /* HTTP/1.0 status codes from RFC1945, provided for reference.  */
110 /* Successful 2xx.  */
111 #define HTTP_STATUS_OK                    200
112 #define HTTP_STATUS_CREATED               201
113 #define HTTP_STATUS_ACCEPTED              202
114 #define HTTP_STATUS_NO_CONTENT            204
115 #define HTTP_STATUS_PARTIAL_CONTENTS      206
116 
117 /* Redirection 3xx.  */
118 #define HTTP_STATUS_MULTIPLE_CHOICES      300
119 #define HTTP_STATUS_MOVED_PERMANENTLY     301
120 #define HTTP_STATUS_MOVED_TEMPORARILY     302
121 #define HTTP_STATUS_SEE_OTHER             303 /* from HTTP/1.1 */
122 #define HTTP_STATUS_NOT_MODIFIED          304
123 #define HTTP_STATUS_TEMPORARY_REDIRECT    307 /* from HTTP/1.1 */
124 #define HTTP_STATUS_PERMANENT_REDIRECT    308 /* from HTTP/1.1 */
125 
126 /* Client error 4xx.  */
127 #define HTTP_STATUS_BAD_REQUEST           400
128 #define HTTP_STATUS_UNAUTHORIZED          401
129 #define HTTP_STATUS_FORBIDDEN             403
130 #define HTTP_STATUS_NOT_FOUND             404
131 #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
132 
133 /* Server errors 5xx.  */
134 #define HTTP_STATUS_INTERNAL              500
135 #define HTTP_STATUS_NOT_IMPLEMENTED       501
136 #define HTTP_STATUS_BAD_GATEWAY           502
137 #define HTTP_STATUS_UNAVAILABLE           503
138 #define HTTP_STATUS_GATEWAY_TIMEOUT       504
139 
140 enum rp {
141   rel_none, rel_name, rel_value, rel_both
142 };
143 
144 struct request {
145   const char *method;
146   char *arg;
147 
148   struct request_header {
149     char *name, *value;
150     enum rp release_policy;
151   } *headers;
152   int hcount, hcapacity;
153 };
154 
155 
156 /* Create a new, empty request. Set the request's method and its
157    arguments.  METHOD should be a literal string (or it should outlive
158    the request) because it will not be freed.  ARG will be freed by
159    request_free.  */
160 
161 static struct request *
request_new(const char * method,char * arg)162 request_new (const char *method, char *arg)
163 {
164   struct request *req = xnew0 (struct request);
165   req->hcapacity = 8;
166   req->headers = xnew_array (struct request_header, req->hcapacity);
167   req->method = method;
168   req->arg = arg;
169   return req;
170 }
171 
172 /* Return the method string passed with the last call to
173    request_set_method.  */
174 
175 static const char *
request_method(const struct request * req)176 request_method (const struct request *req)
177 {
178   return req->method;
179 }
180 
181 /* Free one header according to the release policy specified with
182    request_set_header.  */
183 
184 static void
release_header(struct request_header * hdr)185 release_header (struct request_header *hdr)
186 {
187   switch (hdr->release_policy)
188     {
189     case rel_none:
190       break;
191     case rel_name:
192       xfree (hdr->name);
193       break;
194     case rel_value:
195       xfree (hdr->value);
196       break;
197     case rel_both:
198       xfree (hdr->name);
199       xfree (hdr->value);
200       break;
201     }
202 }
203 
204 /* Set the request named NAME to VALUE.  Specifically, this means that
205    a "NAME: VALUE\r\n" header line will be used in the request.  If a
206    header with the same name previously existed in the request, its
207    value will be replaced by this one.  A NULL value means do nothing.
208 
209    RELEASE_POLICY determines whether NAME and VALUE should be released
210    (freed) with request_free.  Allowed values are:
211 
212     - rel_none     - don't free NAME or VALUE
213     - rel_name     - free NAME when done
214     - rel_value    - free VALUE when done
215     - rel_both     - free both NAME and VALUE when done
216 
217    Setting release policy is useful when arguments come from different
218    sources.  For example:
219 
220      // Don't free literal strings!
221      request_set_header (req, "Pragma", "no-cache", rel_none);
222 
223      // Don't free a global variable, we'll need it later.
224      request_set_header (req, "Referer", opt.referer, rel_none);
225 
226      // Value freshly allocated, free it when done.
227      request_set_header (req, "Range",
228                          aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
229                          rel_value);
230    */
231 
232 static void
request_set_header(struct request * req,const char * name,const char * value,enum rp release_policy)233 request_set_header (struct request *req, const char *name, const char *value,
234                     enum rp release_policy)
235 {
236   struct request_header *hdr;
237   int i;
238 
239   if (!value)
240     {
241       /* A NULL value is a no-op; if freeing the name is requested,
242          free it now to avoid leaks.  */
243       if (release_policy == rel_name || release_policy == rel_both)
244         xfree (name);
245       return;
246     }
247 
248   for (i = 0; i < req->hcount; i++)
249     {
250       hdr = &req->headers[i];
251       if (0 == c_strcasecmp (name, hdr->name))
252         {
253           /* Replace existing header. */
254           release_header (hdr);
255           hdr->name = (void *)name;
256           hdr->value = (void *)value;
257           hdr->release_policy = release_policy;
258           return;
259         }
260     }
261 
262   /* Install new header. */
263 
264   if (req->hcount >= req->hcapacity)
265     {
266       req->hcapacity <<= 1;
267       req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
268     }
269   hdr = &req->headers[req->hcount++];
270   hdr->name = (void *)name;
271   hdr->value = (void *)value;
272   hdr->release_policy = release_policy;
273 }
274 
275 /* Like request_set_header, but sets the whole header line, as
276    provided by the user using the `--header' option.  For example,
277    request_set_user_header (req, "Foo: bar") works just like
278    request_set_header (req, "Foo", "bar").  */
279 
280 static void
request_set_user_header(struct request * req,const char * header)281 request_set_user_header (struct request *req, const char *header)
282 {
283   const char *name, *p;
284 
285   if (!(p = strchr (header, ':')))
286     return;
287 
288   name = xstrndup(header, p - header);
289 
290   ++p;
291   while (c_isspace (*p))
292     ++p;
293 
294   request_set_header (req, name, p, rel_name);
295 }
296 
297 /* Remove the header with specified name from REQ.  Returns true if
298    the header was actually removed, false otherwise.  */
299 
300 static bool
request_remove_header(struct request * req,const char * name)301 request_remove_header (struct request *req, const char *name)
302 {
303   int i;
304   for (i = 0; i < req->hcount; i++)
305     {
306       struct request_header *hdr = &req->headers[i];
307       if (0 == c_strcasecmp (name, hdr->name))
308         {
309           release_header (hdr);
310           /* Move the remaining headers by one. */
311           if (i < req->hcount - 1)
312             memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
313           --req->hcount;
314           return true;
315         }
316     }
317   return false;
318 }
319 
320 #define APPEND(p, str) do {                     \
321   int A_len = strlen (str);                     \
322   memcpy (p, str, A_len);                       \
323   p += A_len;                                   \
324 } while (0)
325 
326 /* Construct the request and write it to FD using fd_write.
327    If warc_tmp is set to a file pointer, the request string will
328    also be written to that file. */
329 
330 static int
request_send(const struct request * req,int fd,FILE * warc_tmp)331 request_send (const struct request *req, int fd, FILE *warc_tmp)
332 {
333   char *request_string, *p;
334   int i, size, write_error;
335 
336   /* Count the request size. */
337   size = 0;
338 
339   /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
340   size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
341 
342   for (i = 0; i < req->hcount; i++)
343     {
344       struct request_header *hdr = &req->headers[i];
345       /* NAME ": " VALUE "\r\n" */
346       size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
347     }
348 
349   /* "\r\n\0" */
350   size += 3;
351 
352   p = request_string = xmalloc (size);
353 
354   /* Generate the request. */
355 
356   APPEND (p, req->method); *p++ = ' ';
357   APPEND (p, req->arg);    *p++ = ' ';
358   memcpy (p, "HTTP/1.1\r\n", 10); p += 10;
359 
360   for (i = 0; i < req->hcount; i++)
361     {
362       struct request_header *hdr = &req->headers[i];
363       APPEND (p, hdr->name);
364       *p++ = ':', *p++ = ' ';
365       APPEND (p, hdr->value);
366       *p++ = '\r', *p++ = '\n';
367     }
368 
369   *p++ = '\r', *p++ = '\n', *p++ = '\0';
370   assert (p - request_string == size);
371 
372 #undef APPEND
373 
374   DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
375 
376   /* Send the request to the server. */
377 
378   write_error = fd_write (fd, request_string, size - 1, -1);
379   if (write_error < 0)
380     logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
381                fd_errstr (fd));
382   else if (warc_tmp != NULL)
383     {
384       /* Write a copy of the data to the WARC record. */
385       int warc_tmp_written = fwrite (request_string, 1, size - 1, warc_tmp);
386       if (warc_tmp_written != size - 1)
387         write_error = -2;
388     }
389   xfree (request_string);
390   return write_error;
391 }
392 
393 /* Release the resources used by REQ.
394    It is safe to call it with a valid pointer to a NULL pointer.
395    It is not safe to call it with an invalid or NULL pointer.  */
396 
397 static void
request_free(struct request ** req_ref)398 request_free (struct request **req_ref)
399 {
400   int i;
401   struct request *req = *req_ref;
402 
403   if (!req)
404     return;
405 
406   xfree (req->arg);
407   for (i = 0; i < req->hcount; i++)
408     release_header (&req->headers[i]);
409   xfree (req->headers);
410   xfree (req);
411   *req_ref = NULL;
412 }
413 
414 static struct hash_table *basic_authed_hosts;
415 
416 /* Find out if this host has issued a Basic challenge yet; if so, give
417  * it the username, password. A temporary measure until we can get
418  * proper authentication in place. */
419 
420 static bool
maybe_send_basic_creds(const char * hostname,const char * user,const char * passwd,struct request * req)421 maybe_send_basic_creds (const char *hostname, const char *user,
422                         const char *passwd, struct request *req)
423 {
424   bool do_challenge = false;
425 
426   if (opt.auth_without_challenge)
427     {
428       DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n"));
429       do_challenge = true;
430     }
431   else if (basic_authed_hosts
432       && hash_table_contains (basic_authed_hosts, hostname))
433     {
434       DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname)));
435       do_challenge = true;
436     }
437   else
438     {
439       DEBUGP (("Host %s has not issued a general basic challenge.\n",
440               quote (hostname)));
441     }
442   if (do_challenge)
443     {
444       request_set_header (req, "Authorization",
445                           basic_authentication_encode (user, passwd),
446                           rel_value);
447     }
448   return do_challenge;
449 }
450 
451 static void
register_basic_auth_host(const char * hostname)452 register_basic_auth_host (const char *hostname)
453 {
454   if (!basic_authed_hosts)
455     {
456       basic_authed_hosts = make_nocase_string_hash_table (1);
457     }
458   if (!hash_table_contains (basic_authed_hosts, hostname))
459     {
460       hash_table_put (basic_authed_hosts, xstrdup (hostname), NULL);
461       DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname)));
462     }
463 }
464 
465 /* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
466    PROMISED_SIZE bytes are sent over the wire -- if the file is
467    longer, read only that much; if the file is shorter, report an error.
468    If warc_tmp is set to a file pointer, the post data will
469    also be written to that file.  */
470 
471 static int
body_file_send(int sock,const char * file_name,wgint promised_size,FILE * warc_tmp)472 body_file_send (int sock, const char *file_name, wgint promised_size, FILE *warc_tmp)
473 {
474   static char chunk[8192];
475   wgint written = 0;
476   int write_error;
477   FILE *fp;
478 
479   DEBUGP (("[writing BODY file %s ... ", file_name));
480 
481   fp = fopen (file_name, "rb");
482   if (!fp)
483     return -1;
484   while (!feof (fp) && written < promised_size)
485     {
486       int towrite;
487       int length = fread (chunk, 1, sizeof (chunk), fp);
488       if (length == 0)
489         break;
490       towrite = MIN (promised_size - written, length);
491       write_error = fd_write (sock, chunk, towrite, -1);
492       if (write_error < 0)
493         {
494           fclose (fp);
495           return -1;
496         }
497       if (warc_tmp != NULL)
498         {
499           /* Write a copy of the data to the WARC record. */
500           int warc_tmp_written = fwrite (chunk, 1, towrite, warc_tmp);
501           if (warc_tmp_written != towrite)
502             {
503               fclose (fp);
504               return -2;
505             }
506         }
507       written += towrite;
508     }
509   fclose (fp);
510 
511   /* If we've written less than was promised, report a (probably
512      nonsensical) error rather than break the promise.  */
513   if (written < promised_size)
514     {
515       errno = EINVAL;
516       return -1;
517     }
518 
519   assert (written == promised_size);
520   DEBUGP (("done]\n"));
521   return 0;
522 }
523 
524 /* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
525    If so, return the pointer to the position after the line, otherwise
526    return NULL.  This is used as callback to fd_read_hunk.  The data
527    between START and PEEKED has been read and cannot be "unread"; the
528    data after PEEKED has only been peeked.  */
529 
530 static const char *
response_head_terminator(const char * start,const char * peeked,int peeklen)531 response_head_terminator (const char *start, const char *peeked, int peeklen)
532 {
533   const char *p, *end;
534 
535   /* If at first peek, verify whether HUNK starts with "HTTP".  If
536      not, this is a HTTP/0.9 request and we must bail out without
537      reading anything.  */
538   if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
539     return start;
540 
541   /* Look for "\n[\r]\n", and return the following position if found.
542      Start two chars before the current to cover the possibility that
543      part of the terminator (e.g. "\n\r") arrived in the previous
544      batch.  */
545   p = peeked - start < 2 ? start : peeked - 2;
546   end = peeked + peeklen;
547 
548   /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
549   for (; p < end - 2; p++)
550     if (*p == '\n')
551       {
552         if (p[1] == '\r' && p[2] == '\n')
553           return p + 3;
554         else if (p[1] == '\n')
555           return p + 2;
556       }
557   /* p==end-2: check for \n\n directly preceding END. */
558   if (peeklen >= 2 && p[0] == '\n' && p[1] == '\n')
559     return p + 2;
560 
561   return NULL;
562 }
563 
564 /* The maximum size of a single HTTP response we care to read.  Rather
565    than being a limit of the reader implementation, this limit
566    prevents Wget from slurping all available memory upon encountering
567    malicious or buggy server output, thus protecting the user.  Define
568    it to 0 to remove the limit.  */
569 
570 #define HTTP_RESPONSE_MAX_SIZE 65536
571 
572 /* Read the HTTP request head from FD and return it.  The error
573    conditions are the same as with fd_read_hunk.
574 
575    To support HTTP/0.9 responses, this function tries to make sure
576    that the data begins with "HTTP".  If this is not the case, no data
577    is read and an empty request is returned, so that the remaining
578    data can be treated as body.  */
579 
580 static char *
read_http_response_head(int fd)581 read_http_response_head (int fd)
582 {
583   return fd_read_hunk (fd, response_head_terminator, 512,
584                        HTTP_RESPONSE_MAX_SIZE);
585 }
586 
587 struct response {
588   /* The response data. */
589   const char *data;
590 
591   /* The array of pointers that indicate where each header starts.
592      For example, given this HTTP response:
593 
594        HTTP/1.0 200 Ok
595        Description: some
596         text
597        Etag: x
598 
599      The headers are located like this:
600 
601      "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
602      ^                   ^                             ^          ^
603      headers[0]          headers[1]                    headers[2] headers[3]
604 
605      I.e. headers[0] points to the beginning of the request,
606      headers[1] points to the end of the first header and the
607      beginning of the second one, etc.  */
608 
609   const char **headers;
610 };
611 
612 /* Create a new response object from the text of the HTTP response,
613    available in HEAD.  That text is automatically split into
614    constituent header lines for fast retrieval using
615    resp_header_*.  */
616 
617 static struct response *
resp_new(char * head)618 resp_new (char *head)
619 {
620   char *hdr;
621   int count, size;
622 
623   struct response *resp = xnew0 (struct response);
624   resp->data = head;
625 
626   if (*head == '\0')
627     {
628       /* Empty head means that we're dealing with a headerless
629          (HTTP/0.9) response.  In that case, don't set HEADERS at
630          all.  */
631       return resp;
632     }
633 
634   /* Split HEAD into header lines, so that resp_header_* functions
635      don't need to do this over and over again.  */
636 
637   size = count = 0;
638   hdr = head;
639   while (1)
640     {
641       DO_REALLOC (resp->headers, size, count + 1, const char *);
642       resp->headers[count++] = hdr;
643 
644       /* Break upon encountering an empty line. */
645       if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
646         break;
647 
648       /* Find the end of HDR, including continuations. */
649       for (;;)
650         {
651           char *end = strchr (hdr, '\n');
652 
653           if (!end)
654             {
655               hdr += strlen (hdr);
656               break;
657             }
658 
659           hdr = end + 1;
660 
661           if (*hdr != ' ' && *hdr != '\t')
662             break;
663 
664           // continuation, transform \r and \n into spaces
665           *end = ' ';
666           if (end > head && end[-1] == '\r')
667             end[-1] = ' ';
668         }
669     }
670   DO_REALLOC (resp->headers, size, count + 1, const char *);
671   resp->headers[count] = NULL;
672 
673   return resp;
674 }
675 
676 /* Locate the header named NAME in the request data, starting with
677    position START.  This allows the code to loop through the request
678    data, filtering for all requests of a given name.  Returns the
679    found position, or -1 for failure.  The code that uses this
680    function typically looks like this:
681 
682      for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
683        ... do something with header ...
684 
685    If you only care about one header, use resp_header_get instead of
686    this function.  */
687 
688 static int
resp_header_locate(const struct response * resp,const char * name,int start,const char ** begptr,const char ** endptr)689 resp_header_locate (const struct response *resp, const char *name, int start,
690                     const char **begptr, const char **endptr)
691 {
692   int i;
693   const char **headers = resp->headers;
694   int name_len;
695 
696   if (!headers || !headers[1])
697     return -1;
698 
699   name_len = strlen (name);
700   if (start > 0)
701     i = start;
702   else
703     i = 1;
704 
705   for (; headers[i + 1]; i++)
706     {
707       const char *b = headers[i];
708       const char *e = headers[i + 1];
709       if (e - b > name_len
710           && b[name_len] == ':'
711           && 0 == c_strncasecmp (b, name, name_len))
712         {
713           b += name_len + 1;
714           while (b < e && c_isspace (*b))
715             ++b;
716           while (b < e && c_isspace (e[-1]))
717             --e;
718           *begptr = b;
719           *endptr = e;
720           return i;
721         }
722     }
723   return -1;
724 }
725 
726 /* Find and retrieve the header named NAME in the request data.  If
727    found, set *BEGPTR to its starting, and *ENDPTR to its ending
728    position, and return true.  Otherwise return false.
729 
730    This function is used as a building block for resp_header_copy
731    and resp_header_strdup.  */
732 
733 static bool
resp_header_get(const struct response * resp,const char * name,const char ** begptr,const char ** endptr)734 resp_header_get (const struct response *resp, const char *name,
735                  const char **begptr, const char **endptr)
736 {
737   int pos = resp_header_locate (resp, name, 0, begptr, endptr);
738   return pos != -1;
739 }
740 
741 /* Copy the response header named NAME to buffer BUF, no longer than
742    BUFSIZE (BUFSIZE includes the terminating 0).  If the header
743    exists, true is returned, false otherwise.  If there should be no
744    limit on the size of the header, use resp_header_strdup instead.
745 
746    If BUFSIZE is 0, no data is copied, but the boolean indication of
747    whether the header is present is still returned.  */
748 
749 static bool
resp_header_copy(const struct response * resp,const char * name,char * buf,int bufsize)750 resp_header_copy (const struct response *resp, const char *name,
751                   char *buf, int bufsize)
752 {
753   const char *b, *e;
754   if (!resp_header_get (resp, name, &b, &e))
755     return false;
756   if (bufsize)
757     {
758       int len = MIN (e - b, bufsize - 1);
759       memcpy (buf, b, len);
760       buf[len] = '\0';
761     }
762   return true;
763 }
764 
765 /* Return the value of header named NAME in RESP, allocated with
766    malloc.  If such a header does not exist in RESP, return NULL.  */
767 
768 static char *
resp_header_strdup(const struct response * resp,const char * name)769 resp_header_strdup (const struct response *resp, const char *name)
770 {
771   const char *b, *e;
772   if (!resp_header_get (resp, name, &b, &e))
773     return NULL;
774   return strdupdelim (b, e);
775 }
776 
777 /* Parse the HTTP status line, which is of format:
778 
779    HTTP-Version SP Status-Code SP Reason-Phrase
780 
781    The function returns the status-code, or -1 if the status line
782    appears malformed.  The pointer to "reason-phrase" message is
783    returned in *MESSAGE.  */
784 
785 static int
resp_status(const struct response * resp,char ** message)786 resp_status (const struct response *resp, char **message)
787 {
788   int status;
789   const char *p, *end;
790 
791   if (!resp->headers)
792     {
793       /* For a HTTP/0.9 response, assume status 200. */
794       if (message)
795         *message = xstrdup (_("No headers, assuming HTTP/0.9"));
796       return 200;
797     }
798 
799   p = resp->headers[0];
800   end = resp->headers[1];
801 
802   if (!end)
803     return -1;
804 
805   /* "HTTP" */
806   if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
807     return -1;
808   p += 4;
809 
810   /* Match the HTTP version.  This is optional because Gnutella
811      servers have been reported to not specify HTTP version.  */
812   if (p < end && *p == '/')
813     {
814       ++p;
815       while (p < end && c_isdigit (*p))
816         ++p;
817       if (p < end && *p == '.')
818         ++p;
819       while (p < end && c_isdigit (*p))
820         ++p;
821     }
822 
823   while (p < end && c_isspace (*p))
824     ++p;
825   if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2]))
826     return -1;
827 
828   status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
829   p += 3;
830 
831   if (message)
832     {
833       while (p < end && c_isspace (*p))
834         ++p;
835       while (p < end && c_isspace (end[-1]))
836         --end;
837       *message = strdupdelim (p, end);
838     }
839 
840   return status;
841 }
842 
843 /* Release the resources used by RESP.
844    It is safe to call it with a valid pointer to a NULL pointer.
845    It is not safe to call it with a invalid or NULL pointer.  */
846 
847 static void
resp_free(struct response ** resp_ref)848 resp_free (struct response **resp_ref)
849 {
850   struct response *resp = *resp_ref;
851 
852   if (!resp)
853     return;
854 
855   xfree (resp->headers);
856   xfree (resp);
857 
858   *resp_ref = NULL;
859 }
860 
861 /* Print a single line of response, the characters [b, e).  We tried
862    getting away with
863       logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
864    but that failed to escape the non-printable characters and, in fact,
865    caused crashes in UTF-8 locales.  */
866 
867 static void
print_response_line(const char * prefix,const char * b,const char * e)868 print_response_line (const char *prefix, const char *b, const char *e)
869 {
870   char buf[1024], *copy;
871   size_t len = e - b;
872 
873   if (len < sizeof (buf))
874     copy = buf;
875   else
876     copy = xmalloc(len + 1);
877 
878   memcpy(copy, b, len);
879   copy[len] = 0;
880 
881   logprintf (LOG_ALWAYS, "%s%s\n", prefix,
882              quotearg_style (escape_quoting_style, copy));
883 
884   if (copy != buf)
885     xfree (copy);
886 }
887 
888 /* Print the server response, line by line, omitting the trailing CRLF
889    from individual header lines, and prefixed with PREFIX.  */
890 
891 static void
print_server_response(const struct response * resp,const char * prefix)892 print_server_response (const struct response *resp, const char *prefix)
893 {
894   int i;
895   if (!resp->headers)
896     return;
897   for (i = 0; resp->headers[i + 1]; i++)
898     {
899       const char *b = resp->headers[i];
900       const char *e = resp->headers[i + 1];
901       /* Skip CRLF */
902       if (b < e && e[-1] == '\n')
903         --e;
904       if (b < e && e[-1] == '\r')
905         --e;
906       print_response_line (prefix, b, e);
907     }
908 }
909 
910 /* Parse the `Content-Range' header and extract the information it
911    contains.  Returns true if successful, false otherwise.  */
912 static bool
parse_content_range(const char * hdr,wgint * first_byte_ptr,wgint * last_byte_ptr,wgint * entity_length_ptr)913 parse_content_range (const char *hdr, wgint *first_byte_ptr,
914                      wgint *last_byte_ptr, wgint *entity_length_ptr)
915 {
916   wgint num;
917 
918   /* Ancient versions of Netscape proxy server, presumably predating
919      rfc2068, sent out `Content-Range' without the "bytes"
920      specifier.  */
921   if (0 == strncasecmp (hdr, "bytes", 5))
922     {
923       hdr += 5;
924       /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
925          HTTP spec. */
926       if (*hdr == ':')
927         ++hdr;
928       while (c_isspace (*hdr))
929         ++hdr;
930       if (!*hdr)
931         return false;
932     }
933   if (!c_isdigit (*hdr))
934     return false;
935   for (num = 0; c_isdigit (*hdr); hdr++)
936     num = 10 * num + (*hdr - '0');
937   if (*hdr != '-' || !c_isdigit (*(hdr + 1)))
938     return false;
939   *first_byte_ptr = num;
940   ++hdr;
941   for (num = 0; c_isdigit (*hdr); hdr++)
942     num = 10 * num + (*hdr - '0');
943   if (*hdr != '/')
944     return false;
945   *last_byte_ptr = num;
946   if (!(c_isdigit (*(hdr + 1)) || *(hdr + 1) == '*'))
947     return false;
948   if (*last_byte_ptr < *first_byte_ptr)
949     return false;
950   ++hdr;
951   if (*hdr == '*')
952     num = -1;
953   else
954     for (num = 0; c_isdigit (*hdr); hdr++)
955       num = 10 * num + (*hdr - '0');
956   *entity_length_ptr = num;
957   if ((*entity_length_ptr <= *last_byte_ptr) && *entity_length_ptr != -1)
958     return false;
959   return true;
960 }
961 
962 /* Read the body of the request, but don't store it anywhere and don't
963    display a progress gauge.  This is useful for reading the bodies of
964    administrative responses to which we will soon issue another
965    request.  The response is not useful to the user, but reading it
966    allows us to continue using the same connection to the server.
967 
968    If reading fails, false is returned, true otherwise.  In debug
969    mode, the body is displayed for debugging purposes.  */
970 
971 static bool
skip_short_body(int fd,wgint contlen,bool chunked)972 skip_short_body (int fd, wgint contlen, bool chunked)
973 {
974   enum {
975     SKIP_SIZE = 512,                /* size of the download buffer */
976     SKIP_THRESHOLD = 4096        /* the largest size we read */
977   };
978   wgint remaining_chunk_size = 0;
979   char dlbuf[SKIP_SIZE + 1];
980   dlbuf[SKIP_SIZE] = '\0';        /* so DEBUGP can safely print it */
981 
982   /* If the body is too large, it makes more sense to simply close the
983      connection than to try to read the body.  */
984   if (contlen > SKIP_THRESHOLD)
985     return false;
986 
987   while (contlen > 0 || chunked)
988     {
989       int ret;
990       if (chunked)
991         {
992           if (remaining_chunk_size == 0)
993             {
994               char *line = fd_read_line (fd);
995               char *endl;
996               if (line == NULL)
997                 break;
998 
999               remaining_chunk_size = strtol (line, &endl, 16);
1000               xfree (line);
1001 
1002               if (remaining_chunk_size < 0)
1003                 return false;
1004 
1005               if (remaining_chunk_size == 0)
1006                 {
1007                   line = fd_read_line (fd);
1008                   xfree (line);
1009                   break;
1010                 }
1011             }
1012 
1013           contlen = MIN (remaining_chunk_size, SKIP_SIZE);
1014         }
1015 
1016       DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
1017 
1018       ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
1019       if (ret <= 0)
1020         {
1021           /* Don't normally report the error since this is an
1022              optimization that should be invisible to the user.  */
1023           DEBUGP (("] aborting (%s).\n",
1024                    ret < 0 ? fd_errstr (fd) : "EOF received"));
1025           return false;
1026         }
1027       contlen -= ret;
1028 
1029       if (chunked)
1030         {
1031           remaining_chunk_size -= ret;
1032           if (remaining_chunk_size == 0)
1033             {
1034               char *line = fd_read_line (fd);
1035               if (line == NULL)
1036                 return false;
1037               else
1038                 xfree (line);
1039             }
1040         }
1041 
1042       /* Safe even if %.*s bogusly expects terminating \0 because
1043          we've zero-terminated dlbuf above.  */
1044       DEBUGP (("%.*s", ret, dlbuf));
1045     }
1046 
1047   DEBUGP (("] done.\n"));
1048   return true;
1049 }
1050 
1051 #define NOT_RFC2231 0
1052 #define RFC2231_NOENCODING 1
1053 #define RFC2231_ENCODING 2
1054 
1055 /* extract_param extracts the parameter name into NAME.
1056    However, if the parameter name is in RFC2231 format then
1057    this function adjusts NAME by stripping of the trailing
1058    characters that are not part of the name but are present to
1059    indicate the presence of encoding information in the value
1060    or a fragment of a long parameter value
1061 */
1062 static int
modify_param_name(param_token * name)1063 modify_param_name (param_token *name)
1064 {
1065   const char *delim1 = memchr (name->b, '*', name->e - name->b);
1066   const char *delim2 = memrchr (name->b, '*', name->e - name->b);
1067 
1068   int result;
1069 
1070   if (delim1 == NULL)
1071     {
1072       result = NOT_RFC2231;
1073     }
1074   else if (delim1 == delim2)
1075     {
1076       if ((name->e - 1) == delim1)
1077         {
1078           result = RFC2231_ENCODING;
1079         }
1080       else
1081         {
1082           result = RFC2231_NOENCODING;
1083         }
1084       name->e = delim1;
1085     }
1086   else
1087     {
1088       name->e = delim1;
1089       result = RFC2231_ENCODING;
1090     }
1091   return result;
1092 }
1093 
1094 /* extract_param extract the parameter value into VALUE.
1095    Like modify_param_name this function modifies VALUE by
1096    stripping off the encoding information from the actual value
1097 */
1098 static void
modify_param_value(param_token * value,int encoding_type)1099 modify_param_value (param_token *value, int encoding_type )
1100 {
1101   if (encoding_type == RFC2231_ENCODING)
1102     {
1103       const char *delim = memrchr (value->b, '\'', value->e - value->b);
1104       if (delim != NULL)
1105         {
1106           value->b = (delim+1);
1107         }
1108     }
1109 }
1110 
1111 /* Extract a parameter from the string (typically an HTTP header) at
1112    **SOURCE and advance SOURCE to the next parameter.  Return false
1113    when there are no more parameters to extract.  The name of the
1114    parameter is returned in NAME, and the value in VALUE.  If the
1115    parameter has no value, the token's value is zeroed out.
1116 
1117    For example, if *SOURCE points to the string "attachment;
1118    filename=\"foo bar\"", the first call to this function will return
1119    the token named "attachment" and no value, and the second call will
1120    return the token named "filename" and value "foo bar".  The third
1121    call will return false, indicating no more valid tokens.
1122 
1123    is_url_encoded is an out parameter. If not NULL, a boolean value will be
1124    stored into it, letting the caller know whether or not the extracted value is
1125    URL-encoded. The caller can then decode it with url_unescape(), which however
1126    performs decoding in-place. URL-encoding is used by RFC 2231 to support
1127    non-US-ASCII characters in HTTP header values.  */
1128 
1129 bool
extract_param(const char ** source,param_token * name,param_token * value,char separator,bool * is_url_encoded)1130 extract_param (const char **source, param_token *name, param_token *value,
1131                char separator, bool *is_url_encoded)
1132 {
1133   const char *p = *source;
1134   int param_type;
1135   if (is_url_encoded)
1136     *is_url_encoded = false;   /* initializing the out parameter */
1137 
1138   while (c_isspace (*p)) ++p;
1139   if (!*p)
1140     {
1141       *source = p;
1142       return false;             /* no error; nothing more to extract */
1143     }
1144 
1145   /* Extract name. */
1146   name->b = p;
1147   while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p;
1148   name->e = p;
1149   if (name->b == name->e)
1150     return false;               /* empty name: error */
1151   while (c_isspace (*p)) ++p;
1152   if (*p == separator || !*p)           /* no value */
1153     {
1154       xzero (*value);
1155       if (*p == separator) ++p;
1156       *source = p;
1157       return true;
1158     }
1159   if (*p != '=')
1160     return false;               /* error */
1161 
1162   /* *p is '=', extract value */
1163   ++p;
1164   while (c_isspace (*p)) ++p;
1165   if (*p == '"')                /* quoted */
1166     {
1167       value->b = ++p;
1168       while (*p && *p != '"') ++p;
1169       if (!*p)
1170         return false;
1171       value->e = p++;
1172       /* Currently at closing quote; find the end of param. */
1173       while (c_isspace (*p)) ++p;
1174       while (*p && *p != separator) ++p;
1175       if (*p == separator)
1176         ++p;
1177       else if (*p)
1178         /* garbage after closed quote, e.g. foo="bar"baz */
1179         return false;
1180     }
1181   else                          /* unquoted */
1182     {
1183       value->b = p;
1184       while (*p && *p != separator) ++p;
1185       value->e = p;
1186       while (value->e != value->b && c_isspace (value->e[-1]))
1187         --value->e;
1188       if (*p == separator) ++p;
1189     }
1190   *source = p;
1191 
1192   param_type = modify_param_name (name);
1193   if (param_type != NOT_RFC2231)
1194     {
1195       if (param_type == RFC2231_ENCODING && is_url_encoded)
1196         *is_url_encoded = true;
1197       modify_param_value (value, param_type);
1198     }
1199   return true;
1200 }
1201 
1202 #undef NOT_RFC2231
1203 #undef RFC2231_NOENCODING
1204 #undef RFC2231_ENCODING
1205 
1206 /* Appends the string represented by VALUE to FILENAME */
1207 
1208 static void
append_value_to_filename(char ** filename,param_token const * const value,bool is_url_encoded)1209 append_value_to_filename (char **filename, param_token const * const value,
1210                           bool is_url_encoded)
1211 {
1212   int original_length = strlen (*filename);
1213   int new_length = strlen (*filename) + (value->e - value->b);
1214   *filename = xrealloc (*filename, new_length+1);
1215   memcpy (*filename + original_length, value->b, (value->e - value->b));
1216   (*filename)[new_length] = '\0';
1217   if (is_url_encoded)
1218     url_unescape (*filename + original_length);
1219 }
1220 
1221 /* Parse the contents of the `Content-Disposition' header, extracting
1222    the information useful to Wget.  Content-Disposition is a header
1223    borrowed from MIME; when used in HTTP, it typically serves for
1224    specifying the desired file name of the resource.  For example:
1225 
1226        Content-Disposition: attachment; filename="flora.jpg"
1227 
1228    Wget will skip the tokens it doesn't care about, such as
1229    "attachment" in the previous example; it will also skip other
1230    unrecognized params.  If the header is syntactically correct and
1231    contains a file name, a copy of the file name is stored in
1232    *filename and true is returned.  Otherwise, the function returns
1233    false.
1234 
1235    The file name is stripped of directory components and must not be
1236    empty.
1237 
1238    Historically, this function returned filename prefixed with opt.dir_prefix,
1239    now that logic is handled by the caller, new code should pay attention,
1240    changed by crq, Sep 2010.
1241 
1242 */
1243 static bool
parse_content_disposition(const char * hdr,char ** filename)1244 parse_content_disposition (const char *hdr, char **filename)
1245 {
1246   param_token name, value;
1247   bool is_url_encoded = false;
1248 
1249   char *encodedFilename = NULL;
1250   char *unencodedFilename = NULL;
1251   for ( ; extract_param (&hdr, &name, &value, ';', &is_url_encoded);
1252         is_url_encoded = false)
1253     {
1254       int isFilename = BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename");
1255       if ( isFilename && value.b != NULL)
1256         {
1257           /* Make the file name begin at the last slash or backslash. */
1258           bool isEncodedFilename;
1259           char **outFilename;
1260           const char *last_slash = memrchr (value.b, '/', value.e - value.b);
1261           const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
1262           if (last_slash && last_bs)
1263             value.b = 1 + MAX (last_slash, last_bs);
1264           else if (last_slash || last_bs)
1265             value.b = 1 + (last_slash ? last_slash : last_bs);
1266           if (value.b == value.e)
1267             continue;
1268 
1269           /* Check if the name is "filename*" as specified in RFC 6266.
1270            * Since "filename" could be broken up as "filename*N" (RFC 2231),
1271            * a check is needed to make sure this is not the case */
1272           isEncodedFilename = *name.e == '*' && !c_isdigit (*(name.e + 1));
1273           outFilename = isEncodedFilename ? &encodedFilename
1274             : &unencodedFilename;
1275           if (*outFilename)
1276             append_value_to_filename (outFilename, &value, is_url_encoded);
1277           else
1278             {
1279               *outFilename = strdupdelim (value.b, value.e);
1280               if (is_url_encoded)
1281                 url_unescape (*outFilename);
1282             }
1283         }
1284     }
1285   if (encodedFilename)
1286     {
1287       xfree (unencodedFilename);
1288       *filename = encodedFilename;
1289     }
1290   else
1291     {
1292       xfree (encodedFilename);
1293       *filename = unencodedFilename;
1294     }
1295   if (*filename)
1296     return true;
1297   else
1298     return false;
1299 }
1300 
1301 #ifdef HAVE_HSTS
1302 static bool
parse_strict_transport_security(const char * header,time_t * max_age,bool * include_subdomains)1303 parse_strict_transport_security (const char *header, time_t *max_age, bool *include_subdomains)
1304 {
1305   param_token name, value;
1306   const char *c_max_age = NULL;
1307   bool is = false; /* includeSubDomains */
1308   bool is_url_encoded = false;
1309   bool success = false;
1310 
1311   if (header)
1312     {
1313       /* Process the STS header. Keys should be matched case-insensitively. */
1314       for (; extract_param (&header, &name, &value, ';', &is_url_encoded); is_url_encoded = false)
1315         {
1316           if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "max-age"))
1317             {
1318               xfree (c_max_age);
1319               c_max_age = strdupdelim (value.b, value.e);
1320             }
1321           else if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "includeSubDomains"))
1322             is = true;
1323         }
1324 
1325       /* pass the parsed values over */
1326       if (c_max_age)
1327         {
1328           /* If the string value goes out of a long's bounds, strtol() will return LONG_MIN or LONG_MAX.
1329            * In theory, the HSTS engine should be able to handle it.
1330            * Also, time_t is normally defined as a long, so this should not break.
1331            */
1332           if (max_age)
1333             *max_age = (time_t) strtol (c_max_age, NULL, 10);
1334           if (include_subdomains)
1335             *include_subdomains = is;
1336 
1337           DEBUGP (("Parsed Strict-Transport-Security max-age = %s, includeSubDomains = %s\n",
1338                  c_max_age, (is ? "true" : "false")));
1339 
1340           xfree (c_max_age);
1341           success = true;
1342         }
1343       else
1344         {
1345           /* something weird happened */
1346           logprintf (LOG_VERBOSE, "Could not parse String-Transport-Security header\n");
1347           success = false;
1348         }
1349     }
1350 
1351   return success;
1352 }
1353 #endif
1354 
1355 /* Persistent connections.  Currently, we cache the most recently used
1356    connection as persistent, provided that the HTTP server agrees to
1357    make it such.  The persistence data is stored in the variables
1358    below.  Ideally, it should be possible to cache an arbitrary fixed
1359    number of these connections.  */
1360 
1361 /* Whether a persistent connection is active. */
1362 static bool pconn_active;
1363 
1364 static struct {
1365   /* The socket of the connection.  */
1366   int socket;
1367 
1368   /* Host and port of the currently active persistent connection. */
1369   char *host;
1370   int port;
1371 
1372   /* Whether a ssl handshake has occurred on this connection.  */
1373   bool ssl;
1374 
1375   /* Whether the connection was authorized.  This is only done by
1376      NTLM, which authorizes *connections* rather than individual
1377      requests.  (That practice is peculiar for HTTP, but it is a
1378      useful optimization.)  */
1379   bool authorized;
1380 
1381 #ifdef ENABLE_NTLM
1382   /* NTLM data of the current connection.  */
1383   struct ntlmdata ntlm;
1384 #endif
1385 } pconn;
1386 
1387 /* Mark the persistent connection as invalid and free the resources it
1388    uses.  This is used by the CLOSE_* macros after they forcefully
1389    close a registered persistent connection.  */
1390 
1391 static void
invalidate_persistent(void)1392 invalidate_persistent (void)
1393 {
1394   DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
1395   pconn_active = false;
1396   fd_close (pconn.socket);
1397   xfree (pconn.host);
1398   xzero (pconn);
1399 }
1400 
1401 /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
1402    persistent.  This will enable someone to use the same connection
1403    later.  In the context of HTTP, this must be called only AFTER the
1404    response has been received and the server has promised that the
1405    connection will remain alive.
1406 
1407    If a previous connection was persistent, it is closed. */
1408 
1409 static void
register_persistent(const char * host,int port,int fd,bool ssl)1410 register_persistent (const char *host, int port, int fd, bool ssl)
1411 {
1412   if (pconn_active)
1413     {
1414       if (pconn.socket == fd)
1415         {
1416           /* The connection FD is already registered. */
1417           return;
1418         }
1419       else
1420         {
1421           /* The old persistent connection is still active; close it
1422              first.  This situation arises whenever a persistent
1423              connection exists, but we then connect to a different
1424              host, and try to register a persistent connection to that
1425              one.  */
1426           invalidate_persistent ();
1427         }
1428     }
1429 
1430   pconn_active = true;
1431   pconn.socket = fd;
1432   pconn.host = xstrdup (host);
1433   pconn.port = port;
1434   pconn.ssl = ssl;
1435   pconn.authorized = false;
1436 
1437   DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
1438 }
1439 
1440 /* Return true if a persistent connection is available for connecting
1441    to HOST:PORT.  */
1442 
1443 static bool
persistent_available_p(const char * host,int port,bool ssl,bool * host_lookup_failed)1444 persistent_available_p (const char *host, int port, bool ssl,
1445                         bool *host_lookup_failed)
1446 {
1447   /* First, check whether a persistent connection is active at all.  */
1448   if (!pconn_active)
1449     return false;
1450 
1451   /* If we want SSL and the last connection wasn't or vice versa,
1452      don't use it.  Checking for host and port is not enough because
1453      HTTP and HTTPS can apparently coexist on the same port.  */
1454   if (ssl != pconn.ssl)
1455     return false;
1456 
1457   /* If we're not connecting to the same port, we're not interested. */
1458   if (port != pconn.port)
1459     return false;
1460 
1461   /* If the host is the same, we're in business.  If not, there is
1462      still hope -- read below.  */
1463   if (0 != strcasecmp (host, pconn.host))
1464     {
1465       /* Check if pconn.socket is talking to HOST under another name.
1466          This happens often when both sites are virtual hosts
1467          distinguished only by name and served by the same network
1468          interface, and hence the same web server (possibly set up by
1469          the ISP and serving many different web sites).  This
1470          admittedly unconventional optimization does not contradict
1471          HTTP and works well with popular server software.  */
1472 
1473       bool found;
1474       ip_address ip;
1475       struct address_list *al;
1476 
1477       if (ssl)
1478         /* Don't try to talk to two different SSL sites over the same
1479            secure connection!  (Besides, it's not clear that
1480            name-based virtual hosting is even possible with SSL.)  */
1481         return false;
1482 
1483       /* If pconn.socket's peer is one of the IP addresses HOST
1484          resolves to, pconn.socket is for all intents and purposes
1485          already talking to HOST.  */
1486 
1487       if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
1488         {
1489           /* Can't get the peer's address -- something must be very
1490              wrong with the connection.  */
1491           invalidate_persistent ();
1492           return false;
1493         }
1494       al = lookup_host (host, 0);
1495       if (!al)
1496         {
1497           *host_lookup_failed = true;
1498           return false;
1499         }
1500 
1501       found = address_list_contains (al, &ip);
1502       address_list_release (al);
1503 
1504       if (!found)
1505         return false;
1506 
1507       /* The persistent connection's peer address was found among the
1508          addresses HOST resolved to; therefore, pconn.sock is in fact
1509          already talking to HOST -- no need to reconnect.  */
1510     }
1511 
1512   /* Finally, check whether the connection is still open.  This is
1513      important because most servers implement liberal (short) timeout
1514      on persistent connections.  Wget can of course always reconnect
1515      if the connection doesn't work out, but it's nicer to know in
1516      advance.  This test is a logical followup of the first test, but
1517      is "expensive" and therefore placed at the end of the list.
1518 
1519      (Current implementation of test_socket_open has a nice side
1520      effect that it treats sockets with pending data as "closed".
1521      This is exactly what we want: if a broken server sends message
1522      body in response to HEAD, or if it sends more than conent-length
1523      data, we won't reuse the corrupted connection.)  */
1524 
1525   if (!test_socket_open (pconn.socket))
1526     {
1527       /* Oops, the socket is no longer open.  Now that we know that,
1528          let's invalidate the persistent connection before returning
1529          0.  */
1530       invalidate_persistent ();
1531       return false;
1532     }
1533 
1534   return true;
1535 }
1536 
1537 /* The idea behind these two CLOSE macros is to distinguish between
1538    two cases: one when the job we've been doing is finished, and we
1539    want to close the connection and leave, and two when something is
1540    seriously wrong and we're closing the connection as part of
1541    cleanup.
1542 
1543    In case of keep_alive, CLOSE_FINISH should leave the connection
1544    open, while CLOSE_INVALIDATE should still close it.
1545 
1546    Note that the semantics of the flag `keep_alive' is "this
1547    connection *will* be reused (the server has promised not to close
1548    the connection once we're done)", while the semantics of
1549    `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1550    active, registered connection".  */
1551 
1552 #define CLOSE_FINISH(fd) do {                   \
1553   if (!keep_alive)                              \
1554     {                                           \
1555       if (pconn_active && (fd) == pconn.socket) \
1556         invalidate_persistent ();               \
1557       else                                      \
1558           fd_close (fd);                        \
1559       fd = -1;                                  \
1560     }                                           \
1561 } while (0)
1562 
1563 #define CLOSE_INVALIDATE(fd) do {               \
1564   if (pconn_active && (fd) == pconn.socket)     \
1565     invalidate_persistent ();                   \
1566   else                                          \
1567     fd_close (fd);                              \
1568   fd = -1;                                      \
1569 } while (0)
1570 
1571 typedef enum
1572 {
1573   ENC_INVALID = -1,             /* invalid encoding */
1574   ENC_NONE = 0,                 /* no special encoding */
1575   ENC_GZIP,                     /* gzip compression */
1576   ENC_DEFLATE,                  /* deflate compression */
1577   ENC_COMPRESS,                 /* compress compression */
1578   ENC_BROTLI                    /* brotli compression */
1579 } encoding_t;
1580 
1581 struct http_stat
1582 {
1583   wgint len;                    /* received length */
1584   wgint contlen;                /* expected length */
1585   wgint restval;                /* the restart value */
1586   int res;                      /* the result of last read */
1587   char *rderrmsg;               /* error message from read error */
1588   char *newloc;                 /* new location (redirection) */
1589   char *remote_time;            /* remote time-stamp string */
1590   char *error;                  /* textual HTTP error */
1591   int statcode;                 /* status code */
1592   char *message;                /* status message */
1593   wgint rd_size;                /* amount of data read from socket */
1594   double dltime;                /* time it took to download the data */
1595   const char *referer;          /* value of the referer header. */
1596   char *local_file;             /* local file name. */
1597   bool existence_checked;       /* true if we already checked for a file's
1598                                    existence after having begun to download
1599                                    (needed in gethttp for when connection is
1600                                    interrupted/restarted. */
1601   bool timestamp_checked;       /* true if pre-download time-stamping checks
1602                                  * have already been performed */
1603   char *orig_file_name;         /* name of file to compare for time-stamping
1604                                  * (might be != local_file if -K is set) */
1605   wgint orig_file_size;         /* size of file to compare for time-stamping */
1606   time_t orig_file_tstamp;      /* time-stamp of file to compare for
1607                                  * time-stamping */
1608 #ifdef HAVE_METALINK
1609   metalink_t *metalink;
1610 #endif
1611 
1612   encoding_t local_encoding;    /* the encoding of the local file */
1613   encoding_t remote_encoding;   /* the encoding of the remote file */
1614 
1615   bool temporary;               /* downloading a temporary file */
1616 };
1617 
1618 static void
free_hstat(struct http_stat * hs)1619 free_hstat (struct http_stat *hs)
1620 {
1621   xfree (hs->newloc);
1622   xfree (hs->remote_time);
1623   xfree (hs->error);
1624   xfree (hs->rderrmsg);
1625   xfree (hs->local_file);
1626   xfree (hs->orig_file_name);
1627   xfree (hs->message);
1628 #ifdef HAVE_METALINK
1629   metalink_delete (hs->metalink);
1630   hs->metalink = NULL;
1631 #endif
1632 }
1633 
1634 static void
get_file_flags(const char * filename,int * dt)1635 get_file_flags (const char *filename, int *dt)
1636 {
1637   logprintf (LOG_VERBOSE, _("\
1638 File %s already there; not retrieving.\n\n"), quote (filename));
1639   /* If the file is there, we suppose it's retrieved OK.  */
1640   *dt |= RETROKF;
1641 
1642   /* #### Bogusness alert.  */
1643   /* If its suffix is "html" or "htm" or similar, assume text/html.  */
1644   if (has_html_suffix_p (filename))
1645     *dt |= TEXTHTML;
1646 }
1647 
1648 /* Download the response body from the socket and writes it to
1649    an output file.  The headers have already been read from the
1650    socket.  If WARC is enabled, the response body will also be
1651    written to a WARC response record.
1652 
1653    hs, contlen, contrange, chunked_transfer_encoding and url are
1654    parameters from the gethttp method.  fp is a pointer to the
1655    output file.
1656 
1657    url, warc_timestamp_str, warc_request_uuid, warc_ip, type
1658    and statcode will be saved in the headers of the WARC record.
1659    The head parameter contains the HTTP headers of the response.
1660 
1661    If fp is NULL and WARC is enabled, the response body will be
1662    written only to the WARC file.  If WARC is disabled and fp
1663    is a file pointer, the data will be written to the file.
1664    If fp is a file pointer and WARC is enabled, the body will
1665    be written to both destinations.
1666 
1667    Returns the error code.   */
1668 static int
read_response_body(struct http_stat * hs,int sock,FILE * fp,wgint contlen,wgint contrange,bool chunked_transfer_encoding,char * url,char * warc_timestamp_str,char * warc_request_uuid,ip_address * warc_ip,char * type,int statcode,char * head)1669 read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
1670                     wgint contrange, bool chunked_transfer_encoding,
1671                     char *url, char *warc_timestamp_str, char *warc_request_uuid,
1672                     ip_address *warc_ip, char *type, int statcode, char *head)
1673 {
1674   int warc_payload_offset = 0;
1675   FILE *warc_tmp = NULL;
1676   int warcerr = 0;
1677   int flags = 0;
1678 
1679   if (opt.warc_filename != NULL)
1680     {
1681       /* Open a temporary file where we can write the response before we
1682          add it to the WARC record.  */
1683       warc_tmp = warc_tempfile ();
1684       if (warc_tmp == NULL)
1685         warcerr = WARC_TMP_FOPENERR;
1686 
1687       if (warcerr == 0)
1688         {
1689           /* We should keep the response headers for the WARC record.  */
1690           int head_len = strlen (head);
1691           int warc_tmp_written = fwrite (head, 1, head_len, warc_tmp);
1692           if (warc_tmp_written != head_len)
1693             warcerr = WARC_TMP_FWRITEERR;
1694           warc_payload_offset = head_len;
1695         }
1696 
1697       if (warcerr != 0)
1698         {
1699           if (warc_tmp != NULL)
1700             fclose (warc_tmp);
1701           return warcerr;
1702         }
1703     }
1704 
1705   if (fp != NULL)
1706     {
1707       /* This confuses the timestamping code that checks for file size.
1708          #### The timestamping code should be smarter about file size.  */
1709       if (opt.save_headers && hs->restval == 0)
1710         fwrite (head, 1, strlen (head), fp);
1711     }
1712 
1713   /* Read the response body.  */
1714   if (contlen != -1)
1715     /* If content-length is present, read that much; otherwise, read
1716        until EOF.  The HTTP spec doesn't require the server to
1717        actually close the connection when it's done sending data. */
1718     flags |= rb_read_exactly;
1719   if (fp != NULL && hs->restval > 0 && contrange == 0)
1720     /* If the server ignored our range request, instruct fd_read_body
1721        to skip the first RESTVAL bytes of body.  */
1722     flags |= rb_skip_startpos;
1723   if (chunked_transfer_encoding)
1724     flags |= rb_chunked_transfer_encoding;
1725 
1726   if (hs->remote_encoding == ENC_GZIP)
1727     flags |= rb_compressed_gzip;
1728 
1729   hs->len = hs->restval;
1730   hs->rd_size = 0;
1731   /* Download the response body and write it to fp.
1732      If we are working on a WARC file, we simultaneously write the
1733      response body to warc_tmp.  */
1734   hs->res = fd_read_body (hs->local_file, sock, fp, contlen != -1 ? contlen : 0,
1735                           hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
1736                           flags, warc_tmp);
1737   if (hs->res >= 0)
1738     {
1739       if (warc_tmp != NULL)
1740         {
1741           /* Create a response record and write it to the WARC file.
1742              Note: per the WARC standard, the request and response should share
1743              the same date header.  We re-use the timestamp of the request.
1744              The response record should also refer to the uuid of the request.  */
1745           bool r = warc_write_response_record (url, warc_timestamp_str,
1746                                                warc_request_uuid, warc_ip,
1747                                                warc_tmp, warc_payload_offset,
1748                                                type, statcode, hs->newloc);
1749 
1750           /* warc_write_response_record has closed warc_tmp. */
1751 
1752           if (! r)
1753             return WARC_ERR;
1754         }
1755 
1756       return RETRFINISHED;
1757     }
1758 
1759   if (warc_tmp != NULL)
1760     fclose (warc_tmp);
1761 
1762   if (hs->res == -2)
1763     {
1764       /* Error while writing to fd. */
1765       return FWRITEERR;
1766     }
1767   else if (hs->res == -3)
1768     {
1769       /* Error while writing to warc_tmp. */
1770       return WARC_TMP_FWRITEERR;
1771     }
1772   else
1773     {
1774       /* A read error! */
1775       xfree (hs->rderrmsg);
1776       hs->rderrmsg = xstrdup (fd_errstr (sock));
1777       return RETRFINISHED;
1778     }
1779 }
1780 
1781 #define BEGINS_WITH(line, string_constant)                               \
1782   (!c_strncasecmp (line, string_constant, sizeof (string_constant) - 1)    \
1783    && (c_isspace (line[sizeof (string_constant) - 1])                      \
1784        || !line[sizeof (string_constant) - 1]))
1785 
1786 #define SET_USER_AGENT(req) do {                                         \
1787   if (!opt.useragent)                                                    \
1788     request_set_header (req, "User-Agent",                               \
1789                         aprintf ("Wget/%s",                              \
1790                         version_string),                                 \
1791                         rel_value);                                      \
1792   else if (*opt.useragent)                                               \
1793     request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
1794 } while (0)
1795 
1796 /*
1797    Convert time_t to one of valid HTTP date formats
1798    ie. rfc1123-date.
1799 
1800    HTTP-date    = rfc1123-date | rfc850-date | asctime-date
1801    rfc1123-date = wkday "," SP date1 SP time SP "GMT"
1802    rfc850-date  = weekday "," SP date2 SP time SP "GMT"
1803    asctime-date = wkday SP date3 SP time SP 4DIGIT
1804    date1        = 2DIGIT SP month SP 4DIGIT
1805                   ; day month year (e.g., 02 Jun 1982)
1806    date2        = 2DIGIT "-" month "-" 2DIGIT
1807                   ; day-month-year (e.g., 02-Jun-82)
1808    date3        = month SP ( 2DIGIT | ( SP 1DIGIT ))
1809                   ; month day (e.g., Jun  2)
1810    time         = 2DIGIT ":" 2DIGIT ":" 2DIGIT
1811                   ; 00:00:00 - 23:59:59
1812    wkday        = "Mon" | "Tue" | "Wed"
1813                 | "Thu" | "Fri" | "Sat" | "Sun"
1814    weekday      = "Monday" | "Tuesday" | "Wednesday"
1815                 | "Thursday" | "Friday" | "Saturday" | "Sunday"
1816    month        = "Jan" | "Feb" | "Mar" | "Apr"
1817                 | "May" | "Jun" | "Jul" | "Aug"
1818                 | "Sep" | "Oct" | "Nov" | "Dec"
1819 
1820    source: RFC2616  */
1821 static uerr_t
time_to_rfc1123(time_t time,char * buf,size_t bufsize)1822 time_to_rfc1123 (time_t time, char *buf, size_t bufsize)
1823 {
1824   static const char *wkday[] = { "Sun", "Mon", "Tue", "Wed",
1825                                  "Thu", "Fri", "Sat" };
1826   static const char *month[] = { "Jan", "Feb", "Mar", "Apr",
1827                                  "May", "Jun", "Jul", "Aug",
1828                                  "Sep", "Oct", "Nov", "Dec" };
1829   /* rfc1123 example: Thu, 01 Jan 1998 22:12:57 GMT  */
1830   static const char *time_format = "%s, %02d %s %04d %02d:%02d:%02d GMT";
1831 
1832   struct tm *gtm = gmtime (&time);
1833   if (!gtm)
1834     {
1835       logprintf (LOG_NOTQUIET,
1836                  _("gmtime failed. This is probably a bug.\n"));
1837       return TIMECONV_ERR;
1838     }
1839 
1840   snprintf (buf, bufsize, time_format, wkday[gtm->tm_wday],
1841             gtm->tm_mday, month[gtm->tm_mon],
1842             gtm->tm_year + 1900, gtm->tm_hour,
1843             gtm->tm_min, gtm->tm_sec);
1844 
1845   return RETROK;
1846 }
1847 
1848 static struct request *
initialize_request(const struct url * u,struct http_stat * hs,int * dt,struct url * proxy,bool inhibit_keep_alive,bool * basic_auth_finished,wgint * body_data_size,char ** user,char ** passwd,uerr_t * ret)1849 initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
1850                     bool inhibit_keep_alive, bool *basic_auth_finished,
1851                     wgint *body_data_size, char **user, char **passwd, uerr_t *ret)
1852 {
1853   bool head_only = !!(*dt & HEAD_ONLY);
1854   struct request *req;
1855 
1856   /* Prepare the request to send. */
1857   {
1858     char *meth_arg;
1859     const char *meth = "GET";
1860     if (head_only)
1861       meth = "HEAD";
1862     else if (opt.method)
1863       meth = opt.method;
1864     /* Use the full path, i.e. one that includes the leading slash and
1865        the query string.  E.g. if u->path is "foo/bar" and u->query is
1866        "param=value", full_path will be "/foo/bar?param=value".  */
1867     if (proxy
1868 #ifdef HAVE_SSL
1869         /* When using SSL over proxy, CONNECT establishes a direct
1870            connection to the HTTPS server.  Therefore use the same
1871            argument as when talking to the server directly. */
1872         && u->scheme != SCHEME_HTTPS
1873 #endif
1874         )
1875       meth_arg = xstrdup (u->url);
1876     else
1877       meth_arg = url_full_path (u);
1878     req = request_new (meth, meth_arg);
1879   }
1880 
1881   /* Generate the Host header, HOST:PORT.  Take into account that:
1882 
1883      - Broken server-side software often doesn't recognize the PORT
1884        argument, so we must generate "Host: www.server.com" instead of
1885        "Host: www.server.com:80" (and likewise for https port).
1886 
1887      - IPv6 addresses contain ":", so "Host: 3ffe:8100:200:2::2:1234"
1888        becomes ambiguous and needs to be rewritten as "Host:
1889        [3ffe:8100:200:2::2]:1234".  */
1890   {
1891     /* Formats arranged for hfmt[add_port][add_squares].  */
1892     static const char *hfmt[][2] = {
1893       { "%s", "[%s]" }, { "%s:%d", "[%s]:%d" }
1894     };
1895     int add_port = u->port != scheme_default_port (u->scheme);
1896     int add_squares = strchr (u->host, ':') != NULL;
1897     request_set_header (req, "Host",
1898                         aprintf (hfmt[add_port][add_squares], u->host, u->port),
1899                         rel_value);
1900   }
1901 
1902   request_set_header (req, "Referer", hs->referer, rel_none);
1903   if (*dt & SEND_NOCACHE)
1904     {
1905       /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms...  */
1906       request_set_header (req, "Cache-Control", "no-cache", rel_none);
1907 
1908       /* ... but some HTTP/1.0 caches doesn't implement Cache-Control.  */
1909       request_set_header (req, "Pragma", "no-cache", rel_none);
1910     }
1911   if (*dt & IF_MODIFIED_SINCE)
1912     {
1913       char strtime[32];
1914       uerr_t err = time_to_rfc1123 (hs->orig_file_tstamp, strtime, countof (strtime));
1915 
1916       if (err != RETROK)
1917         {
1918           logputs (LOG_VERBOSE, _("Cannot convert timestamp to http format. "
1919                                   "Falling back to time 0 as last modification "
1920                                   "time.\n"));
1921           strcpy (strtime, "Thu, 01 Jan 1970 00:00:00 GMT");
1922         }
1923       request_set_header (req, "If-Modified-Since", xstrdup (strtime), rel_value);
1924     }
1925   if (hs->restval)
1926     request_set_header (req, "Range",
1927                         aprintf ("bytes=%s-",
1928                                  number_to_static_string (hs->restval)),
1929                         rel_value);
1930   SET_USER_AGENT (req);
1931   request_set_header (req, "Accept", "*/*", rel_none);
1932 #ifdef HAVE_LIBZ
1933   if (opt.compression != compression_none)
1934     request_set_header (req, "Accept-Encoding", "gzip", rel_none);
1935   else
1936 #endif
1937     request_set_header (req, "Accept-Encoding", "identity", rel_none);
1938 
1939   /* Find the username with priority */
1940   if (u->user)
1941     *user = u->user;
1942   else if (opt.user && (opt.use_askpass || opt.ask_passwd))
1943     *user = opt.user;
1944   else if (opt.http_user)
1945     *user = opt.http_user;
1946   else if (opt.user)
1947     *user = opt.user;
1948   else
1949     *user = NULL;
1950 
1951   /* Find the password with priority */
1952   if (u->passwd)
1953     *passwd = u->passwd;
1954   else if (opt.passwd && (opt.use_askpass || opt.ask_passwd))
1955     *passwd = opt.passwd;
1956   else if (opt.http_passwd)
1957     *passwd = opt.http_passwd;
1958   else if (opt.passwd)
1959     *passwd = opt.passwd;
1960   else
1961     *passwd = NULL;
1962 
1963   /* Check for ~/.netrc if none of the above match */
1964   if (opt.netrc && (!*user || !*passwd))
1965     search_netrc (u->host, (const char **) user, (const char **) passwd, 0, NULL);
1966 
1967   /* We only do "site-wide" authentication with "global" user/password
1968    * values unless --auth-no-challenge has been requested; URL user/password
1969    * info overrides. */
1970   if (*user && *passwd && (!u->user || opt.auth_without_challenge))
1971     {
1972       /* If this is a host for which we've already received a Basic
1973        * challenge, we'll go ahead and send Basic authentication creds. */
1974       *basic_auth_finished = maybe_send_basic_creds (u->host, *user, *passwd, req);
1975     }
1976 
1977   if (inhibit_keep_alive)
1978     request_set_header (req, "Connection", "Close", rel_none);
1979   else
1980     {
1981       request_set_header (req, "Connection", "Keep-Alive", rel_none);
1982       if (proxy)
1983         request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
1984     }
1985 
1986   if (opt.method)
1987     {
1988 
1989       if (opt.body_data || opt.body_file)
1990         {
1991           request_set_header (req, "Content-Type",
1992                               "application/x-www-form-urlencoded", rel_none);
1993 
1994           if (opt.body_data)
1995             *body_data_size = strlen (opt.body_data);
1996           else
1997             {
1998               *body_data_size = file_size (opt.body_file);
1999               if (*body_data_size == -1)
2000                 {
2001                   logprintf (LOG_NOTQUIET, _("BODY data file %s missing: %s\n"),
2002                              quote (opt.body_file), strerror (errno));
2003                   request_free (&req);
2004                   *ret = FILEBADFILE;
2005                   return NULL;
2006                 }
2007             }
2008           request_set_header (req, "Content-Length",
2009                               xstrdup (number_to_static_string (*body_data_size)),
2010                               rel_value);
2011         }
2012       else if (c_strcasecmp (opt.method, "post") == 0
2013                || c_strcasecmp (opt.method, "put") == 0
2014                || c_strcasecmp (opt.method, "patch") == 0)
2015         request_set_header (req, "Content-Length", "0", rel_none);
2016     }
2017   return req;
2018 }
2019 
2020 static void
initialize_proxy_configuration(const struct url * u,struct request * req,struct url * proxy,char ** proxyauth)2021 initialize_proxy_configuration (const struct url *u, struct request *req,
2022                                 struct url *proxy, char **proxyauth)
2023 {
2024   char *proxy_user, *proxy_passwd;
2025   /* For normal username and password, URL components override
2026      command-line/wgetrc parameters.  With proxy
2027      authentication, it's the reverse, because proxy URLs are
2028      normally the "permanent" ones, so command-line args
2029      should take precedence.  */
2030   if (opt.proxy_user && opt.proxy_passwd)
2031     {
2032       proxy_user = opt.proxy_user;
2033       proxy_passwd = opt.proxy_passwd;
2034     }
2035   else
2036     {
2037       proxy_user = proxy->user;
2038       proxy_passwd = proxy->passwd;
2039     }
2040   /* #### This does not appear right.  Can't the proxy request,
2041      say, `Digest' authentication?  */
2042   if (proxy_user && proxy_passwd)
2043     *proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
2044 
2045   /* Proxy authorization over SSL is handled below. */
2046 #ifdef HAVE_SSL
2047   if (u->scheme != SCHEME_HTTPS)
2048 #endif
2049     request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value);
2050 }
2051 
2052 static uerr_t
establish_connection(const struct url * u,const struct url ** conn_ref,struct http_stat * hs,struct url * proxy,char ** proxyauth,struct request ** req_ref,bool * using_ssl,bool inhibit_keep_alive,int * sock_ref)2053 establish_connection (const struct url *u, const struct url **conn_ref,
2054                       struct http_stat *hs, struct url *proxy,
2055                       char **proxyauth,
2056                       struct request **req_ref, bool *using_ssl,
2057                       bool inhibit_keep_alive,
2058                       int *sock_ref)
2059 {
2060   bool host_lookup_failed = false;
2061   int sock = *sock_ref;
2062   struct request *req = *req_ref;
2063   const struct url *conn = *conn_ref;
2064   struct response *resp;
2065   int write_error;
2066   int statcode;
2067 
2068   if (! inhibit_keep_alive)
2069     {
2070       /* Look for a persistent connection to target host, unless a
2071          proxy is used.  The exception is when SSL is in use, in which
2072          case the proxy is nothing but a passthrough to the target
2073          host, registered as a connection to the latter.  */
2074       const struct url *relevant = conn;
2075 #ifdef HAVE_SSL
2076       if (u->scheme == SCHEME_HTTPS)
2077         relevant = u;
2078 #endif
2079 
2080       if (persistent_available_p (relevant->host, relevant->port,
2081 #ifdef HAVE_SSL
2082                                   relevant->scheme == SCHEME_HTTPS,
2083 #else
2084                                   0,
2085 #endif
2086                                   &host_lookup_failed))
2087         {
2088           int family = socket_family (pconn.socket, ENDPOINT_PEER);
2089           sock = pconn.socket;
2090           *using_ssl = pconn.ssl;
2091 #if ENABLE_IPV6
2092           if (family == AF_INET6)
2093              logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"),
2094                         quotearg_style (escape_quoting_style, pconn.host),
2095                          pconn.port);
2096           else
2097 #endif
2098              logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
2099                         quotearg_style (escape_quoting_style, pconn.host),
2100                         pconn.port);
2101           DEBUGP (("Reusing fd %d.\n", sock));
2102           if (pconn.authorized)
2103             /* If the connection is already authorized, the "Basic"
2104                authorization added by code above is unnecessary and
2105                only hurts us.  */
2106             request_remove_header (req, "Authorization");
2107         }
2108       else if (host_lookup_failed)
2109         {
2110           logprintf(LOG_NOTQUIET,
2111                     _("%s: unable to resolve host address %s\n"),
2112                     exec_name, quote (relevant->host));
2113           return HOSTERR;
2114         }
2115       else if (sock != -1)
2116         {
2117           sock = -1;
2118         }
2119     }
2120 
2121   if (sock < 0)
2122     {
2123       sock = connect_to_host (conn->host, conn->port);
2124       if (sock == E_HOST)
2125         return HOSTERR;
2126       else if (sock < 0)
2127         return (retryable_socket_connect_error (errno)
2128                 ? CONERROR : CONIMPOSSIBLE);
2129 
2130 #ifdef HAVE_SSL
2131       if (proxy && u->scheme == SCHEME_HTTPS)
2132         {
2133           char *head;
2134           char *message;
2135           /* When requesting SSL URLs through proxies, use the
2136              CONNECT method to request passthrough.  */
2137           struct request *connreq = request_new ("CONNECT",
2138                               aprintf ("%s:%d", u->host, u->port));
2139           SET_USER_AGENT (connreq);
2140           if (proxyauth)
2141             {
2142               request_set_header (connreq, "Proxy-Authorization",
2143                                   *proxyauth, rel_value);
2144               /* Now that PROXYAUTH is part of the CONNECT request,
2145                  zero it out so we don't send proxy authorization with
2146                  the regular request below.  */
2147               *proxyauth = NULL;
2148             }
2149           request_set_header (connreq, "Host",
2150                               aprintf ("%s:%d", u->host, u->port),
2151                               rel_value);
2152 
2153           write_error = request_send (connreq, sock, 0);
2154           request_free (&connreq);
2155           if (write_error < 0)
2156             {
2157               CLOSE_INVALIDATE (sock);
2158               return WRITEFAILED;
2159             }
2160 
2161           head = read_http_response_head (sock);
2162           if (!head)
2163             {
2164               logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
2165                          fd_errstr (sock));
2166               CLOSE_INVALIDATE (sock);
2167               return HERR;
2168             }
2169           message = NULL;
2170           if (!*head)
2171             {
2172               xfree (head);
2173               goto failed_tunnel;
2174             }
2175           DEBUGP (("proxy responded with: [%s]\n", head));
2176 
2177           resp = resp_new (head);
2178           statcode = resp_status (resp, &message);
2179           if (statcode < 0)
2180             {
2181               char *tms = datetime_str (time (NULL));
2182               logprintf (LOG_VERBOSE, "%d\n", statcode);
2183               logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
2184                          quotearg_style (escape_quoting_style,
2185                                          _("Malformed status line")));
2186               xfree (head);
2187               return HERR;
2188             }
2189           xfree (hs->message);
2190           hs->message = xstrdup (message);
2191           resp_free (&resp);
2192           xfree (head);
2193           if (statcode != 200)
2194             {
2195             failed_tunnel:
2196               logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
2197                          message ? quotearg_style (escape_quoting_style, message) : "?");
2198               xfree (message);
2199               return CONSSLERR;
2200             }
2201           xfree (message);
2202 
2203           /* SOCK is now *really* connected to u->host, so update CONN
2204              to reflect this.  That way register_persistent will
2205              register SOCK as being connected to u->host:u->port.  */
2206           conn = u;
2207         }
2208 
2209       if (conn->scheme == SCHEME_HTTPS)
2210         {
2211           if (!ssl_connect_wget (sock, u->host, NULL))
2212             {
2213               CLOSE_INVALIDATE (sock);
2214               return CONSSLERR;
2215             }
2216           else if (!ssl_check_certificate (sock, u->host))
2217             {
2218               CLOSE_INVALIDATE (sock);
2219               return VERIFCERTERR;
2220             }
2221           *using_ssl = true;
2222         }
2223 #endif /* HAVE_SSL */
2224     }
2225   *conn_ref = conn;
2226   *req_ref = req;
2227   *sock_ref = sock;
2228   return RETROK;
2229 }
2230 
2231 static uerr_t
set_file_timestamp(struct http_stat * hs)2232 set_file_timestamp (struct http_stat *hs)
2233 {
2234   bool local_dot_orig_file_exists = false;
2235   char *local_filename = NULL;
2236   struct stat st;
2237   char buf[1024];
2238 
2239   if (opt.backup_converted)
2240     /* If -K is specified, we'll act on the assumption that it was specified
2241         last time these files were downloaded as well, and instead of just
2242         comparing local file X against server file X, we'll compare local
2243         file X.orig (if extant, else X) against server file X.  If -K
2244         _wasn't_ specified last time, or the server contains files called
2245         *.orig, -N will be back to not operating correctly with -k. */
2246     {
2247       size_t filename_len = strlen (hs->local_file);
2248       char *filename_plus_orig_suffix;
2249 
2250       if (filename_len + sizeof (ORIG_SFX) > sizeof (buf))
2251         filename_plus_orig_suffix = xmalloc (filename_len + sizeof (ORIG_SFX));
2252       else
2253         filename_plus_orig_suffix = buf;
2254 
2255       /* Would a single s[n]printf() call be faster?  --dan
2256 
2257           Definitely not.  sprintf() is horribly slow.  It's a
2258           different question whether the difference between the two
2259           affects a program.  Usually I'd say "no", but at one
2260           point I profiled Wget, and found that a measurable and
2261           non-negligible amount of time was lost calling sprintf()
2262           in url.c.  Replacing sprintf with inline calls to
2263           strcpy() and number_to_string() made a difference.
2264           --hniksic */
2265       memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
2266       memcpy (filename_plus_orig_suffix + filename_len,
2267               ORIG_SFX, sizeof (ORIG_SFX));
2268 
2269       /* Try to stat() the .orig file. */
2270       if (stat (filename_plus_orig_suffix, &st) == 0)
2271         {
2272           local_dot_orig_file_exists = true;
2273           local_filename = filename_plus_orig_suffix;
2274         }
2275     }
2276 
2277   if (!local_dot_orig_file_exists)
2278     /* Couldn't stat() <file>.orig, so try to stat() <file>. */
2279     if (stat (hs->local_file, &st) == 0)
2280       {
2281         if (local_filename != buf)
2282           xfree (local_filename);
2283         local_filename = hs->local_file;
2284       }
2285 
2286   if (local_filename != NULL)
2287     /* There was a local file, so we'll check later to see if the version
2288         the server has is the same version we already have, allowing us to
2289         skip a download. */
2290     {
2291       if (local_filename == buf || local_filename == hs->local_file)
2292         hs->orig_file_name = xstrdup (local_filename); // on stack or a copy, make a heap copy
2293       else
2294         hs->orig_file_name = local_filename; // was previously malloc'ed
2295       hs->orig_file_size = st.st_size;
2296       hs->orig_file_tstamp = st.st_mtime;
2297 #ifdef WINDOWS
2298       /* Modification time granularity is 2 seconds for Windows, so
2299           increase local time by 1 second for later comparison. */
2300       ++hs->orig_file_tstamp;
2301 #endif
2302       hs->timestamp_checked = true;
2303     }
2304 
2305   return RETROK;
2306 }
2307 
2308 static uerr_t
check_file_output(const struct url * u,struct http_stat * hs,struct response * resp,char * hdrval,size_t hdrsize)2309 check_file_output (const struct url *u, struct http_stat *hs,
2310                    struct response *resp, char *hdrval, size_t hdrsize)
2311 {
2312   /* Determine the local filename if needed. Notice that if -O is used
2313    * hstat.local_file is set by http_loop to the argument of -O. */
2314   if (!hs->local_file)
2315     {
2316       char *local_file = NULL;
2317 
2318       /* Honor Content-Disposition whether possible. */
2319       if (!opt.content_disposition
2320           || !resp_header_copy (resp, "Content-Disposition",
2321                                 hdrval, hdrsize)
2322           || !parse_content_disposition (hdrval, &local_file))
2323         {
2324           /* The Content-Disposition header is missing or broken.
2325            * Choose unique file name according to given URL. */
2326           hs->local_file = url_file_name (u, NULL);
2327         }
2328       else
2329         {
2330           DEBUGP (("Parsed filename from Content-Disposition: %s\n",
2331                   local_file));
2332           hs->local_file = url_file_name (u, local_file);
2333         }
2334 
2335       xfree (local_file);
2336     }
2337 
2338   hs->temporary = opt.delete_after || opt.spider || !acceptable (hs->local_file);
2339   if (hs->temporary)
2340     {
2341       char *tmp = aprintf ("%s.tmp", hs->local_file);
2342       xfree (hs->local_file);
2343       hs->local_file = tmp;
2344     }
2345 
2346   /* TODO: perform this check only once. */
2347   if (!hs->existence_checked && file_exists_p (hs->local_file, NULL))
2348     {
2349       if (opt.noclobber && !opt.output_document)
2350         {
2351           /* If opt.noclobber is turned on and file already exists, do not
2352              retrieve the file. But if the output_document was given, then this
2353              test was already done and the file didn't exist. Hence the !opt.output_document */
2354           return RETRUNNEEDED;
2355         }
2356       else if (!ALLOW_CLOBBER)
2357         {
2358           char *unique = unique_name_passthrough (hs->local_file);
2359           if (unique != hs->local_file)
2360             xfree (hs->local_file);
2361           hs->local_file = unique;
2362         }
2363     }
2364   hs->existence_checked = true;
2365 
2366   /* Support timestamping */
2367   if (opt.timestamping && !hs->timestamp_checked)
2368     {
2369       uerr_t timestamp_err = set_file_timestamp (hs);
2370       if (timestamp_err != RETROK)
2371         return timestamp_err;
2372     }
2373   return RETROK;
2374 }
2375 
2376 static uerr_t
check_auth(const struct url * u,char * user,char * passwd,struct response * resp,struct request * req,bool * ntlm_seen_ref,bool * retry,bool * basic_auth_finished_ref,bool * auth_finished_ref)2377 check_auth (const struct url *u, char *user, char *passwd, struct response *resp,
2378             struct request *req, bool *ntlm_seen_ref, bool *retry,
2379             bool *basic_auth_finished_ref, bool *auth_finished_ref)
2380 {
2381   uerr_t auth_err = RETROK;
2382   bool basic_auth_finished = *basic_auth_finished_ref;
2383   bool auth_finished = *auth_finished_ref;
2384   bool ntlm_seen = *ntlm_seen_ref;
2385   char buf[256], *tmp = NULL;
2386 
2387   *retry = false;
2388 
2389   if (!auth_finished && (user && passwd))
2390     {
2391       /* IIS sends multiple copies of WWW-Authenticate, one with
2392          the value "negotiate", and other(s) with data.  Loop over
2393          all the occurrences and pick the one we recognize.  */
2394       int wapos;
2395       const char *www_authenticate = NULL;
2396       const char *wabeg, *waend;
2397       const char *digest = NULL, *basic = NULL, *ntlm = NULL;
2398 
2399       for (wapos = 0; !ntlm
2400              && (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
2401                                              &wabeg, &waend)) != -1;
2402            ++wapos)
2403         {
2404           param_token name, value;
2405           size_t len = waend - wabeg;
2406 
2407           if (tmp != buf)
2408             xfree (tmp);
2409 
2410           if (len < sizeof (buf))
2411             tmp = buf;
2412           else
2413             tmp = xmalloc (len + 1);
2414 
2415           memcpy (tmp, wabeg, len);
2416           tmp[len] = 0;
2417 
2418           www_authenticate = tmp;
2419 
2420           for (;!ntlm;)
2421             {
2422               /* extract the auth-scheme */
2423               while (c_isspace (*www_authenticate)) www_authenticate++;
2424               name.e = name.b = www_authenticate;
2425               while (*name.e && !c_isspace (*name.e)) name.e++;
2426 
2427               if (name.b == name.e)
2428                 break;
2429 
2430               DEBUGP (("Auth scheme found '%.*s'\n", (int) (name.e - name.b), name.b));
2431 
2432               if (known_authentication_scheme_p (name.b, name.e))
2433                 {
2434                   if (BEGINS_WITH (name.b, "NTLM"))
2435                     {
2436                       ntlm = name.b;
2437                       break; /* this is the most secure challenge, stop here */
2438                     }
2439                   else if (!digest && BEGINS_WITH (name.b, "Digest"))
2440                     digest = name.b;
2441                   else if (!basic && BEGINS_WITH (name.b, "Basic"))
2442                     basic = name.b;
2443                 }
2444 
2445               /* now advance over the auth-params */
2446               www_authenticate = name.e;
2447               DEBUGP (("Auth param list '%s'\n", www_authenticate));
2448               while (extract_param (&www_authenticate, &name, &value, ',', NULL) && name.b && value.b)
2449                 {
2450                   DEBUGP (("Auth param %.*s=%.*s\n",
2451                            (int) (name.e - name.b), name.b, (int) (value.e - value.b), value.b));
2452                 }
2453             }
2454         }
2455 
2456       if (!basic && !digest && !ntlm)
2457         {
2458           /* If the authentication header is missing or
2459              unrecognized, there's no sense in retrying.  */
2460           logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
2461         }
2462       else if (!basic_auth_finished
2463                || !basic)
2464         {
2465           char *pth = url_full_path (u);
2466           const char *value;
2467           uerr_t *auth_stat;
2468           auth_stat = xmalloc (sizeof (uerr_t));
2469           *auth_stat = RETROK;
2470 
2471           if (ntlm)
2472             www_authenticate = ntlm;
2473           else if (digest)
2474             www_authenticate = digest;
2475           else
2476             www_authenticate = basic;
2477 
2478           logprintf (LOG_NOTQUIET, _("Authentication selected: %s\n"), www_authenticate);
2479 
2480           value =  create_authorization_line (www_authenticate,
2481                                               user, passwd,
2482                                               request_method (req),
2483                                               pth,
2484                                               &auth_finished,
2485                                               auth_stat);
2486 
2487           auth_err = *auth_stat;
2488           xfree (auth_stat);
2489           xfree (pth);
2490           if (auth_err == RETROK)
2491             {
2492               request_set_header (req, "Authorization", value, rel_value);
2493 
2494               if (BEGINS_WITH (www_authenticate, "NTLM"))
2495                 ntlm_seen = true;
2496               else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
2497                 {
2498                   /* Need to register this host as using basic auth,
2499                    * so we automatically send creds next time. */
2500                   register_basic_auth_host (u->host);
2501                 }
2502 
2503               *retry = true;
2504               goto cleanup;
2505             }
2506           else
2507             {
2508               /* Creating the Authorization header went wrong */
2509               xfree (value);
2510             }
2511         }
2512       else
2513         {
2514           /* We already did Basic auth, and it failed. Gotta
2515            * give up. */
2516         }
2517     }
2518 
2519  cleanup:
2520    if (tmp != buf)
2521      xfree (tmp);
2522   *ntlm_seen_ref = ntlm_seen;
2523   *basic_auth_finished_ref = basic_auth_finished;
2524   *auth_finished_ref = auth_finished;
2525   return auth_err;
2526 }
2527 
2528 static uerr_t
open_output_stream(struct http_stat * hs,int count,FILE ** fp)2529 open_output_stream (struct http_stat *hs, int count, FILE **fp)
2530 {
2531 /* 2005-06-17 SMS.
2532    For VMS, define common fopen() optional arguments.
2533 */
2534 #ifdef __VMS
2535 # define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
2536 # define FOPEN_BIN_FLAG 3
2537 #else /* def __VMS */
2538 # define FOPEN_BIN_FLAG true
2539 #endif /* def __VMS [else] */
2540 
2541   /* Open the local file.  */
2542   if (!output_stream)
2543     {
2544       mkalldirs (hs->local_file);
2545       if (opt.backups)
2546         rotate_backups (hs->local_file);
2547       if (hs->restval)
2548         {
2549 #ifdef __VMS
2550           int open_id;
2551 
2552           open_id = 21;
2553           *fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS);
2554 #else /* def __VMS */
2555           *fp = fopen (hs->local_file, "ab");
2556 #endif /* def __VMS [else] */
2557         }
2558       else if (ALLOW_CLOBBER || count > 0)
2559         {
2560           if (opt.unlink_requested && file_exists_p (hs->local_file, NULL))
2561             {
2562               if (unlink (hs->local_file) < 0)
2563                 {
2564                   logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
2565                              strerror (errno));
2566                   return UNLINKERR;
2567                 }
2568             }
2569 
2570 #ifdef __VMS
2571           int open_id;
2572 
2573           open_id = 22;
2574           *fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS);
2575 #else /* def __VMS */
2576           if (hs->temporary)
2577             {
2578               *fp = fdopen (open (hs->local_file, O_BINARY | O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR), "wb");
2579             }
2580           else
2581             {
2582               *fp = fopen (hs->local_file, "wb");
2583             }
2584 
2585 #endif /* def __VMS [else] */
2586         }
2587       else
2588         {
2589           *fp = fopen_excl (hs->local_file, FOPEN_BIN_FLAG);
2590           if (!*fp && errno == EEXIST)
2591             {
2592               /* We cannot just invent a new name and use it (which is
2593                  what functions like unique_create typically do)
2594                  because we told the user we'd use this name.
2595                  Instead, return and retry the download.  */
2596               logprintf (LOG_NOTQUIET,
2597                          _("%s has sprung into existence.\n"),
2598                          hs->local_file);
2599               return FOPEN_EXCL_ERR;
2600             }
2601         }
2602       if (!*fp)
2603         {
2604           logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
2605           return FOPENERR;
2606         }
2607     }
2608   else
2609     *fp = output_stream;
2610 
2611   /* Print fetch message, if opt.verbose.  */
2612   logprintf (LOG_VERBOSE, _("Saving to: %s\n"),
2613              HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
2614 
2615   return RETROK;
2616 }
2617 
2618 /* Set proper type flags based on type string.  */
2619 static void
set_content_type(int * dt,const char * type)2620 set_content_type (int *dt, const char *type)
2621 {
2622   /* If content-type is not given, assume text/html.  This is because
2623      of the multitude of broken CGI's that "forget" to generate the
2624      content-type.  */
2625   if (!type ||
2626       0 == c_strcasecmp (type, TEXTHTML_S) ||
2627       0 == c_strcasecmp (type, TEXTXHTML_S))
2628     *dt |= TEXTHTML;
2629   else
2630     *dt &= ~TEXTHTML;
2631 
2632   if (type &&
2633       0 == c_strcasecmp (type, TEXTCSS_S))
2634     *dt |= TEXTCSS;
2635   else
2636     *dt &= ~TEXTCSS;
2637 }
2638 
2639 #ifdef HAVE_METALINK
2640 /* Will return proper metalink_t structure if enough data was found in
2641    http response resp. Otherwise returns NULL.
2642    Two exit points: one for success and one for failure.  */
2643 static metalink_t *
metalink_from_http(const struct response * resp,const struct http_stat * hs,const struct url * u)2644 metalink_from_http (const struct response *resp, const struct http_stat *hs,
2645                     const struct url *u)
2646 {
2647   metalink_t *metalink = NULL;
2648   metalink_file_t *mfile = xnew0 (metalink_file_t);
2649   const char *val_beg, *val_end;
2650   int res_count = 0, meta_count = 0, hash_count = 0, sig_count = 0, i;
2651 
2652   DEBUGP (("Checking for Metalink in HTTP response\n"));
2653 
2654   /* Initialize metalink file for our simple use case.  */
2655   if (hs->local_file)
2656     mfile->name = xstrdup (hs->local_file);
2657   else
2658     mfile->name = url_file_name (u, NULL);
2659 
2660   /* Begin with 1-element array (for 0-termination). */
2661   mfile->checksums = xnew0 (metalink_checksum_t *);
2662   mfile->resources = xnew0 (metalink_resource_t *);
2663   mfile->metaurls = xnew0 (metalink_metaurl_t *);
2664 
2665   /* Process the Content-Type header.  */
2666   if (resp_header_locate (resp, "Content-Type", 0, &val_beg, &val_end) != -1)
2667     {
2668       metalink_metaurl_t murl = {0};
2669 
2670       const char *type_beg, *type_end;
2671       char *typestr = NULL;
2672       char *namestr = NULL;
2673       size_t type_len;
2674 
2675       DEBUGP (("Processing Content-Type header...\n"));
2676 
2677       /* Find beginning of type.  */
2678       type_beg = val_beg;
2679       while (type_beg < val_end && c_isspace (*type_beg))
2680         type_beg++;
2681 
2682       /* Find end of type.  */
2683       type_end = type_beg + 1;
2684       while (type_end < val_end &&
2685              *type_end != ';' &&
2686              *type_end != ' ' &&
2687              *type_end != '\r' &&
2688              *type_end != '\n')
2689         type_end++;
2690 
2691       if (type_beg >= val_end || type_end > val_end)
2692         {
2693           DEBUGP (("Invalid Content-Type header. Ignoring.\n"));
2694           goto skip_content_type;
2695         }
2696 
2697       type_len = type_end - type_beg;
2698       typestr = xstrndup (type_beg, type_len);
2699 
2700       DEBUGP (("Content-Type: %s\n", typestr));
2701 
2702       if (strcmp (typestr, "application/metalink4+xml"))
2703         {
2704           xfree (typestr);
2705           goto skip_content_type;
2706         }
2707 
2708       /*
2709         Valid ranges for the "pri" attribute are from
2710         1 to 999999.  Mirror servers with a lower value of the "pri"
2711         attribute have a higher priority, while mirrors with an undefined
2712         "pri" attribute are considered to have a value of 999999, which is
2713         the lowest priority.
2714 
2715         rfc6249 section 3.1
2716       */
2717       murl.priority = DEFAULT_PRI;
2718 
2719       murl.mediatype = typestr;
2720       typestr = NULL;
2721 
2722       if (opt.content_disposition
2723           && resp_header_locate (resp, "Content-Disposition", 0, &val_beg, &val_end) != -1)
2724         {
2725           find_key_value (val_beg, val_end, "filename", &namestr);
2726           murl.name = namestr;
2727           namestr = NULL;
2728         }
2729 
2730       murl.url = xstrdup (u->url);
2731 
2732       DEBUGP (("URL=%s\n", murl.url));
2733       DEBUGP (("MEDIATYPE=%s\n", murl.mediatype));
2734       DEBUGP (("NAME=%s\n", murl.name ? murl.name : ""));
2735       DEBUGP (("PRIORITY=%d\n", murl.priority));
2736 
2737       /* 1 slot from new resource, 1 slot for null-termination.  */
2738       mfile->metaurls = xrealloc (mfile->metaurls,
2739                                   sizeof (metalink_metaurl_t *) * (meta_count + 2));
2740       mfile->metaurls[meta_count] = xnew0 (metalink_metaurl_t);
2741       *mfile->metaurls[meta_count] = murl;
2742       meta_count++;
2743     }
2744 skip_content_type:
2745 
2746   /* Find all Link headers.  */
2747   for (i = 0;
2748        (i = resp_header_locate (resp, "Link", i, &val_beg, &val_end)) != -1;
2749        i++)
2750     {
2751       char *rel = NULL, *reltype = NULL;
2752       char *urlstr = NULL;
2753       const char *url_beg, *url_end, *attrs_beg;
2754       size_t url_len;
2755 
2756       /* Sample Metalink Link headers:
2757 
2758            Link: <http://www2.example.com/dir1/dir2/dir3/dir4/dir5/example.ext>;
2759            rel=duplicate; pri=1; pref; geo=gb; depth=4
2760 
2761            Link: <http://example.com/example.ext.asc>; rel=describedby;
2762            type="application/pgp-signature"
2763        */
2764 
2765       /* Find beginning of URL.  */
2766       url_beg = val_beg;
2767       while (url_beg < val_end - 1 && c_isspace (*url_beg))
2768         url_beg++;
2769 
2770       /* Find end of URL.  */
2771       /* The convention here is that end ptr points to one element after
2772          end of string. In this case, it should be pointing to the '>', which
2773          is one element after end of actual URL. Therefore, it should never point
2774          to val_end, which is one element after entire header value string.  */
2775       url_end = url_beg + 1;
2776       while (url_end < val_end - 1 && *url_end != '>')
2777         url_end++;
2778 
2779       if (url_beg >= val_end || url_end >= val_end ||
2780           *url_beg != '<' || *url_end != '>')
2781         {
2782           DEBUGP (("This is not a valid Link header. Ignoring.\n"));
2783           continue;
2784         }
2785 
2786       /* Skip <.  */
2787       url_beg++;
2788       url_len = url_end - url_beg;
2789 
2790       /* URL found. Now handle the attributes.  */
2791       attrs_beg = url_end + 1;
2792 
2793       /* First we need to find out what type of link it is. Currently, we
2794          support rel=duplicate and rel=describedby.  */
2795       if (!find_key_value (attrs_beg, val_end, "rel", &rel))
2796         {
2797           DEBUGP (("No rel value in Link header, skipping.\n"));
2798           continue;
2799         }
2800 
2801       urlstr = xstrndup (url_beg, url_len);
2802       DEBUGP (("URL=%s\n", urlstr));
2803       DEBUGP (("rel=%s\n", rel));
2804 
2805       if (!strcmp (rel, "describedby"))
2806         find_key_value (attrs_beg, val_end, "type", &reltype);
2807 
2808       /* Handle signatures.
2809          Libmetalink only supports one signature per file. Therefore we stop
2810          as soon as we successfully get first supported signature.  */
2811       if (sig_count == 0 &&
2812           reltype && !strcmp (reltype, "application/pgp-signature"))
2813         {
2814           /* Download the signature to a temporary file.  */
2815           FILE *_output_stream = output_stream;
2816           bool _output_stream_regular = output_stream_regular;
2817 
2818           output_stream = tmpfile ();
2819           if (output_stream)
2820             {
2821               struct iri *iri = iri_new ();
2822               struct url *url;
2823               int url_err;
2824 
2825               set_uri_encoding (iri, opt.locale, true);
2826               url = url_parse (urlstr, &url_err, iri, false);
2827 
2828               if (!url)
2829                 {
2830                   char *error = url_error (urlstr, url_err);
2831                   logprintf (LOG_NOTQUIET, _("When downloading signature:\n"
2832                                              "%s: %s.\n"), urlstr, error);
2833                   xfree (error);
2834                   iri_free (iri);
2835                 }
2836               else
2837                 {
2838                   /* Avoid recursive Metalink from HTTP headers.  */
2839                   bool _metalink_http = opt.metalink_over_http;
2840                   uerr_t retr_err;
2841 
2842                   opt.metalink_over_http = false;
2843                   retr_err = retrieve_url (url, urlstr, NULL, NULL,
2844                                            NULL, NULL, false, iri, false);
2845                   opt.metalink_over_http = _metalink_http;
2846 
2847                   url_free (url);
2848                   iri_free (iri);
2849 
2850                   if (retr_err == RETROK)
2851                     {
2852                       /* Signature is in the temporary file. Read it into
2853                          metalink resource structure.  */
2854                       metalink_signature_t msig;
2855                       size_t siglen;
2856 
2857                       fseek (output_stream, 0, SEEK_END);
2858                       siglen = ftell (output_stream);
2859                       fseek (output_stream, 0, SEEK_SET);
2860 
2861                       DEBUGP (("siglen=%lu\n", siglen));
2862 
2863                       msig.signature = xmalloc (siglen + 1);
2864                       if (fread (msig.signature, siglen, 1, output_stream) != 1)
2865                         {
2866                           logputs (LOG_NOTQUIET,
2867                                    _("Unable to read signature content from "
2868                                      "temporary file. Skipping.\n"));
2869                           xfree (msig.signature);
2870                         }
2871                       else
2872                         {
2873                           msig.signature[siglen] = '\0'; /* Just in case.  */
2874                           msig.mediatype = xstrdup ("application/pgp-signature");
2875 
2876                           DEBUGP (("Signature (%s):\n%s\n",
2877                                    msig.mediatype, msig.signature));
2878 
2879                           mfile->signature = xnew (metalink_signature_t);
2880                           *mfile->signature = msig;
2881 
2882                           sig_count++;
2883                         }
2884                     }
2885                 }
2886               fclose (output_stream);
2887             }
2888           else
2889             {
2890               logputs (LOG_NOTQUIET, _("Could not create temporary file. "
2891                                        "Skipping signature download.\n"));
2892             }
2893           output_stream_regular = _output_stream_regular;
2894           output_stream = _output_stream;
2895         } /* Iterate over signatures.  */
2896 
2897         /* Handle Metalink resources.  */
2898       else if (!strcmp (rel, "duplicate"))
2899         {
2900           metalink_resource_t mres = {0};
2901           char *pristr;
2902 
2903           /*
2904              Valid ranges for the "pri" attribute are from
2905              1 to 999999.  Mirror servers with a lower value of the "pri"
2906              attribute have a higher priority, while mirrors with an undefined
2907              "pri" attribute are considered to have a value of 999999, which is
2908              the lowest priority.
2909 
2910              rfc6249 section 3.1
2911            */
2912           mres.priority = DEFAULT_PRI;
2913           if (find_key_value (url_end, val_end, "pri", &pristr))
2914             {
2915               long pri;
2916               char *end_pristr;
2917               /* Do not care for errno since 0 is error in this case.  */
2918               pri = strtol (pristr, &end_pristr, 10);
2919               if (end_pristr != pristr + strlen (pristr) ||
2920                   !VALID_PRI_RANGE (pri))
2921                 {
2922                   /* This is against the specification, so let's inform the user.  */
2923                   logprintf (LOG_NOTQUIET,
2924                              _("Invalid pri value. Assuming %d.\n"),
2925                              DEFAULT_PRI);
2926                 }
2927               else
2928                 mres.priority = pri;
2929               xfree (pristr);
2930             }
2931 
2932           switch (url_scheme (urlstr))
2933             {
2934             case SCHEME_HTTP:
2935               mres.type = xstrdup ("http");
2936               break;
2937 #ifdef HAVE_SSL
2938             case SCHEME_HTTPS:
2939               mres.type = xstrdup ("https");
2940               break;
2941             case SCHEME_FTPS:
2942               mres.type = xstrdup ("ftps");
2943               break;
2944 #endif
2945             case SCHEME_FTP:
2946               mres.type = xstrdup ("ftp");
2947               break;
2948             default:
2949               DEBUGP (("Unsupported url scheme in %s. Skipping resource.\n", urlstr));
2950             }
2951 
2952           if (mres.type)
2953             {
2954               DEBUGP (("TYPE=%s\n", mres.type));
2955 
2956               /* At this point we have validated the new resource.  */
2957 
2958               find_key_value (url_end, val_end, "geo", &mres.location);
2959 
2960               mres.url = urlstr;
2961               urlstr = NULL;
2962 
2963               mres.preference = 0;
2964               if (has_key (url_end, val_end, "pref"))
2965                 {
2966                   DEBUGP (("This resource has preference\n"));
2967                   mres.preference = 1;
2968                 }
2969 
2970               /* 1 slot from new resource, 1 slot for null-termination.  */
2971               mfile->resources = xrealloc (mfile->resources,
2972                                            sizeof (metalink_resource_t *) * (res_count + 2));
2973               mfile->resources[res_count] = xnew0 (metalink_resource_t);
2974               *mfile->resources[res_count] = mres;
2975               res_count++;
2976             }
2977         } /* Handle resource link (rel=duplicate).  */
2978 
2979       /* Handle Metalink/XML resources.  */
2980       else if (reltype && !strcmp (reltype, "application/metalink4+xml"))
2981         {
2982           metalink_metaurl_t murl = {0};
2983           char *pristr;
2984 
2985           /*
2986              Valid ranges for the "pri" attribute are from
2987              1 to 999999.  Mirror servers with a lower value of the "pri"
2988              attribute have a higher priority, while mirrors with an undefined
2989              "pri" attribute are considered to have a value of 999999, which is
2990              the lowest priority.
2991 
2992              rfc6249 section 3.1
2993            */
2994           murl.priority = DEFAULT_PRI;
2995           if (find_key_value (url_end, val_end, "pri", &pristr))
2996             {
2997               long pri;
2998               char *end_pristr;
2999               /* Do not care for errno since 0 is error in this case.  */
3000               pri = strtol (pristr, &end_pristr, 10);
3001               if (end_pristr != pristr + strlen (pristr) ||
3002                   !VALID_PRI_RANGE (pri))
3003                 {
3004                   /* This is against the specification, so let's inform the user.  */
3005                   logprintf (LOG_NOTQUIET,
3006                              _("Invalid pri value. Assuming %d.\n"),
3007                              DEFAULT_PRI);
3008                 }
3009               else
3010                 murl.priority = pri;
3011               xfree (pristr);
3012             }
3013 
3014           murl.mediatype = xstrdup (reltype);
3015 
3016           DEBUGP (("MEDIATYPE=%s\n", murl.mediatype));
3017 
3018           /* At this point we have validated the new resource.  */
3019 
3020           find_key_value (url_end, val_end, "name", &murl.name);
3021 
3022           murl.url = urlstr;
3023           urlstr = NULL;
3024 
3025           /* 1 slot from new resource, 1 slot for null-termination.  */
3026           mfile->metaurls = xrealloc (mfile->metaurls,
3027                                        sizeof (metalink_metaurl_t *) * (meta_count + 2));
3028           mfile->metaurls[meta_count] = xnew0 (metalink_metaurl_t);
3029           *mfile->metaurls[meta_count] = murl;
3030           meta_count++;
3031         } /* Handle resource link (rel=describedby).  */
3032       else
3033         DEBUGP (("This link header was not used for Metalink\n"));
3034 
3035       xfree (urlstr);
3036       xfree (reltype);
3037       xfree (rel);
3038     } /* Iterate over link headers.  */
3039 
3040   /* Null-terminate resources array.  */
3041   mfile->resources[res_count] = 0;
3042   mfile->metaurls[meta_count] = 0;
3043 
3044   if (res_count == 0 && meta_count == 0)
3045     {
3046       DEBUGP (("No valid metalink references found.\n"));
3047       goto fail;
3048     }
3049 
3050   /* Find all Digest headers.  */
3051   for (i = 0;
3052        (i = resp_header_locate (resp, "Digest", i, &val_beg, &val_end)) != -1;
3053        i++)
3054     {
3055       const char *dig_pos;
3056       char *dig_type, *dig_hash;
3057 
3058       /* Each Digest header can include multiple hashes. Example:
3059            Digest: SHA=thvDyvhfIqlvFe+A9MYgxAfm1q5=,unixsum=30637
3060            Digest: md5=HUXZLQLMuI/KZ5KDcJPcOA==
3061        */
3062       for (dig_pos = val_beg;
3063            (dig_pos = find_key_values (dig_pos, val_end, &dig_type, &dig_hash));
3064            dig_pos++)
3065         {
3066           /* The hash here is assumed to be base64. We need the hash in hex.
3067              Therefore we convert: base64 -> binary -> hex.  */
3068           const size_t dig_hash_str_len = strlen (dig_hash);
3069           char bin_hash[256];
3070           ssize_t hash_bin_len;
3071 
3072           // there is no hash with that size
3073           if (dig_hash_str_len >= sizeof (bin_hash))
3074             {
3075               DEBUGP (("Hash too long, ignored.\n"));
3076               xfree (dig_type);
3077               xfree (dig_hash);
3078               continue;
3079             }
3080 
3081           hash_bin_len = wget_base64_decode (dig_hash, bin_hash, dig_hash_str_len * 3 / 4 + 1);
3082 
3083           /* Detect malformed base64 input.  */
3084           if (hash_bin_len < 0)
3085             {
3086               DEBUGP (("Malformed base64 input, ignored.\n"));
3087               xfree (dig_type);
3088               xfree (dig_hash);
3089               continue;
3090             }
3091 
3092           /* One slot for me, one for zero-termination.  */
3093           mfile->checksums =
3094                   xrealloc (mfile->checksums,
3095                             sizeof (metalink_checksum_t *) * (hash_count + 2));
3096           mfile->checksums[hash_count] = xnew (metalink_checksum_t);
3097           mfile->checksums[hash_count]->type = dig_type;
3098 
3099           mfile->checksums[hash_count]->hash = xmalloc ((size_t)hash_bin_len * 2 + 1);
3100           wg_hex_to_string (mfile->checksums[hash_count]->hash, bin_hash, (size_t)hash_bin_len);
3101 
3102           xfree (dig_hash);
3103 
3104           hash_count++;
3105         }
3106     }
3107 
3108   /* Zero-terminate checksums array.  */
3109   mfile->checksums[hash_count] = 0;
3110 
3111   /*
3112     If Instance Digests are not provided by the Metalink servers, the
3113     Link header fields pertaining to this specification MUST be ignored.
3114 
3115     rfc6249 section 6
3116    */
3117   if (res_count && hash_count == 0)
3118     {
3119       logputs (LOG_VERBOSE,
3120                _("Could not find acceptable digest for Metalink resources.\n"
3121                  "Ignoring them.\n"));
3122       goto fail;
3123     }
3124 
3125   /* Metalink data is OK. Now we just need to sort the resources based
3126      on their priorities, preference, and perhaps location.  */
3127   stable_sort (mfile->resources, res_count, sizeof (metalink_resource_t *), metalink_res_cmp);
3128   stable_sort (mfile->metaurls, meta_count, sizeof (metalink_metaurl_t *), metalink_meta_cmp);
3129 
3130   /* Restore sensible preference values (in case someone cares to look).  */
3131   for (i = 0; i < res_count; ++i)
3132     mfile->resources[i]->preference = 1000000 - mfile->resources[i]->priority;
3133 
3134   metalink = xnew0 (metalink_t);
3135   metalink->files = xmalloc (sizeof (metalink_file_t *) * 2);
3136   metalink->files[0] = mfile;
3137   metalink->files[1] = 0;
3138   metalink->origin = xstrdup (u->url);
3139   metalink->version = METALINK_VERSION_4;
3140   /* Leave other fields set to 0.  */
3141 
3142   return metalink;
3143 
3144 fail:
3145   /* Free all allocated memory.  */
3146   if (metalink)
3147     metalink_delete (metalink);
3148   else
3149     metalink_file_delete (mfile);
3150   return NULL;
3151 }
3152 #endif /* HAVE_METALINK */
3153 
3154 /* Retrieve a document through HTTP protocol.  It recognizes status
3155    code, and correctly handles redirections.  It closes the network
3156    socket.  If it receives an error from the functions below it, it
3157    will print it if there is enough information to do so (almost
3158    always), returning the error to the caller (i.e. http_loop).
3159 
3160    Various HTTP parameters are stored to hs.
3161 
3162    If PROXY is non-NULL, the connection will be made to the proxy
3163    server, and u->url will be requested.  */
3164 static uerr_t
gethttp(const struct url * u,struct url * original_url,struct http_stat * hs,int * dt,struct url * proxy,struct iri * iri,int count)3165 gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
3166          int *dt, struct url *proxy, struct iri *iri, int count)
3167 {
3168   struct request *req = NULL;
3169 
3170   char *type = NULL;
3171   char *user, *passwd;
3172   char *proxyauth;
3173   int statcode;
3174   int write_error;
3175   wgint contlen, contrange;
3176   const struct url *conn;
3177   FILE *fp;
3178   int err;
3179   uerr_t retval;
3180 #ifdef HAVE_HSTS
3181 #ifdef TESTING
3182   /* we don't link against main.o when we're testing */
3183   hsts_store_t hsts_store = NULL;
3184 #else
3185   extern hsts_store_t hsts_store;
3186 #endif
3187   const char *hsts_params;
3188   time_t max_age;
3189   bool include_subdomains;
3190 #endif
3191 
3192   int sock = -1;
3193 
3194   /* Set to 1 when the authorization has already been sent and should
3195      not be tried again. */
3196   bool auth_finished = false;
3197 
3198   /* Set to 1 when just globally-set Basic authorization has been sent;
3199    * should prevent further Basic negotiations, but not other
3200    * mechanisms. */
3201   bool basic_auth_finished = false;
3202 
3203   /* Whether NTLM authentication is used for this request. */
3204   bool ntlm_seen = false;
3205 
3206   /* Whether our connection to the remote host is through SSL.  */
3207   bool using_ssl = false;
3208 
3209   /* Whether a HEAD request will be issued (as opposed to GET or
3210      POST). */
3211   bool head_only = !!(*dt & HEAD_ONLY);
3212 
3213   /* Whether conditional get request will be issued.  */
3214   bool cond_get = !!(*dt & IF_MODIFIED_SINCE);
3215 
3216 #ifdef HAVE_METALINK
3217   /* Are we looking for metalink info in HTTP headers?  */
3218   bool metalink = !!(*dt & METALINK_METADATA);
3219 #endif
3220 
3221   char *head = NULL;
3222   struct response *resp = NULL;
3223   char hdrval[512];
3224   char *message = NULL;
3225 
3226   /* Declare WARC variables. */
3227   bool warc_enabled = (opt.warc_filename != NULL);
3228   FILE *warc_tmp = NULL;
3229   char warc_timestamp_str [21];
3230   char warc_request_uuid [48];
3231   ip_address warc_ip_buf, *warc_ip = NULL;
3232   off_t warc_payload_offset = -1;
3233 
3234   /* Whether this connection will be kept alive after the HTTP request
3235      is done. */
3236   bool keep_alive;
3237 
3238   /* Is the server using the chunked transfer encoding?  */
3239   bool chunked_transfer_encoding = false;
3240 
3241   /* Whether keep-alive should be inhibited.  */
3242   bool inhibit_keep_alive =
3243     !opt.http_keep_alive || opt.ignore_length;
3244 
3245   /* Headers sent when using POST. */
3246   wgint body_data_size = 0;
3247 
3248 #ifdef HAVE_SSL
3249   if (u->scheme == SCHEME_HTTPS)
3250     {
3251       /* Initialize the SSL context.  After this has once been done,
3252          it becomes a no-op.  */
3253       if (!ssl_init ())
3254         {
3255           scheme_disable (SCHEME_HTTPS);
3256           logprintf (LOG_NOTQUIET,
3257                      _("Disabling SSL due to encountered errors.\n"));
3258           retval = SSLINITFAILED;
3259           goto cleanup;
3260         }
3261     }
3262 #endif /* HAVE_SSL */
3263 
3264   /* Initialize certain elements of struct http_stat.
3265    * Since this function is called in a loop, we have to xfree certain
3266    * members. */
3267   hs->len = 0;
3268   hs->contlen = -1;
3269   hs->res = -1;
3270   xfree (hs->rderrmsg);
3271   xfree (hs->newloc);
3272   xfree (hs->remote_time);
3273   xfree (hs->error);
3274   xfree (hs->message);
3275   hs->local_encoding = ENC_NONE;
3276   hs->remote_encoding = ENC_NONE;
3277 
3278   conn = u;
3279 
3280   {
3281     uerr_t ret;
3282     req = initialize_request (u, hs, dt, proxy, inhibit_keep_alive,
3283                               &basic_auth_finished, &body_data_size,
3284                               &user, &passwd, &ret);
3285     if (req == NULL)
3286       {
3287         retval = ret;
3288         goto cleanup;
3289       }
3290   }
3291  retry_with_auth:
3292   /* We need to come back here when the initial attempt to retrieve
3293      without authorization header fails.  (Expected to happen at least
3294      for the Digest authorization scheme.)  */
3295 
3296   if (opt.cookies)
3297     request_set_header (req, "Cookie",
3298                         cookie_header (wget_cookie_jar,
3299                                        u->host, u->port, u->path,
3300 #ifdef HAVE_SSL
3301                                        u->scheme == SCHEME_HTTPS
3302 #else
3303                                        0
3304 #endif
3305                                        ),
3306                         rel_value);
3307 
3308   /* Add the user headers. */
3309   if (opt.user_headers)
3310     {
3311       int i;
3312       for (i = 0; opt.user_headers[i]; i++)
3313         request_set_user_header (req, opt.user_headers[i]);
3314     }
3315 
3316   proxyauth = NULL;
3317   if (proxy)
3318     {
3319       conn = proxy;
3320       initialize_proxy_configuration (u, req, proxy, &proxyauth);
3321     }
3322   keep_alive = true;
3323 
3324   /* Establish the connection.  */
3325   if (inhibit_keep_alive)
3326     keep_alive = false;
3327 
3328   {
3329     uerr_t conn_err = establish_connection (u, &conn, hs, proxy, &proxyauth, &req,
3330                                             &using_ssl, inhibit_keep_alive, &sock);
3331     if (conn_err != RETROK)
3332       {
3333         retval = conn_err;
3334         goto cleanup;
3335       }
3336   }
3337 
3338   /* Open the temporary file where we will write the request. */
3339   if (warc_enabled)
3340     {
3341       warc_tmp = warc_tempfile ();
3342       if (warc_tmp == NULL)
3343         {
3344           CLOSE_INVALIDATE (sock);
3345           retval = WARC_TMP_FOPENERR;
3346           goto cleanup;
3347         }
3348 
3349       if (! proxy)
3350         {
3351           warc_ip = &warc_ip_buf;
3352           socket_ip_address (sock, warc_ip, ENDPOINT_PEER);
3353         }
3354     }
3355 
3356   /* Send the request to server.  */
3357   write_error = request_send (req, sock, warc_tmp);
3358 
3359   if (write_error >= 0)
3360     {
3361       if (opt.body_data)
3362         {
3363           DEBUGP (("[BODY data: %s]\n", opt.body_data));
3364           write_error = fd_write (sock, opt.body_data, body_data_size, -1);
3365           if (write_error >= 0 && warc_tmp != NULL)
3366             {
3367               int warc_tmp_written;
3368 
3369               /* Remember end of headers / start of payload. */
3370               warc_payload_offset = ftello (warc_tmp);
3371 
3372               /* Write a copy of the data to the WARC record. */
3373               warc_tmp_written = fwrite (opt.body_data, 1, body_data_size, warc_tmp);
3374               if (warc_tmp_written != body_data_size)
3375                 write_error = -2;
3376             }
3377          }
3378       else if (opt.body_file && body_data_size != 0)
3379         {
3380           if (warc_tmp != NULL)
3381             /* Remember end of headers / start of payload */
3382             warc_payload_offset = ftello (warc_tmp);
3383 
3384           write_error = body_file_send (sock, opt.body_file, body_data_size, warc_tmp);
3385         }
3386     }
3387 
3388   if (write_error < 0)
3389     {
3390       CLOSE_INVALIDATE (sock);
3391 
3392       if (warc_tmp != NULL)
3393         fclose (warc_tmp);
3394 
3395       if (write_error == -2)
3396         retval = WARC_TMP_FWRITEERR;
3397       else
3398         retval = WRITEFAILED;
3399       goto cleanup;
3400     }
3401   logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
3402              proxy ? "Proxy" : "HTTP");
3403   contlen = -1;
3404   contrange = 0;
3405   *dt &= ~RETROKF;
3406 
3407 
3408   if (warc_enabled)
3409     {
3410       bool warc_result;
3411 
3412       /* Generate a timestamp and uuid for this request. */
3413       warc_timestamp (warc_timestamp_str, sizeof (warc_timestamp_str));
3414       warc_uuid_str (warc_request_uuid, sizeof (warc_request_uuid));
3415 
3416       /* Create a request record and store it in the WARC file. */
3417       warc_result = warc_write_request_record (u->url, warc_timestamp_str,
3418                                                warc_request_uuid, warc_ip,
3419                                                warc_tmp, warc_payload_offset);
3420       if (! warc_result)
3421         {
3422           CLOSE_INVALIDATE (sock);
3423           retval = WARC_ERR;
3424           goto cleanup;
3425         }
3426 
3427       /* warc_write_request_record has also closed warc_tmp. */
3428     }
3429 
3430   /* Repeat while we receive a 10x response code.  */
3431   {
3432     bool _repeat;
3433 
3434     do
3435       {
3436         head = read_http_response_head (sock);
3437         if (!head)
3438           {
3439             if (errno == 0)
3440               {
3441                 logputs (LOG_NOTQUIET, _("No data received.\n"));
3442                 CLOSE_INVALIDATE (sock);
3443                 retval = HEOF;
3444               }
3445             else
3446               {
3447                 logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
3448                            fd_errstr (sock));
3449                 CLOSE_INVALIDATE (sock);
3450                 retval = HERR;
3451               }
3452             goto cleanup;
3453           }
3454         DEBUGP (("\n---response begin---\n%s---response end---\n", head));
3455 
3456         resp = resp_new (head);
3457 
3458         /* Check for status line.  */
3459         xfree (message);
3460         statcode = resp_status (resp, &message);
3461         if (statcode < 0)
3462           {
3463             char *tms = datetime_str (time (NULL));
3464             logprintf (LOG_VERBOSE, "%d\n", statcode);
3465             logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
3466                        quotearg_style (escape_quoting_style,
3467                                        _("Malformed status line")));
3468             CLOSE_INVALIDATE (sock);
3469             retval = HERR;
3470             goto cleanup;
3471           }
3472 
3473         if (H_10X (statcode))
3474           {
3475             xfree (head);
3476             resp_free (&resp);
3477             _repeat = true;
3478             DEBUGP (("Ignoring response\n"));
3479           }
3480         else
3481           {
3482             _repeat = false;
3483           }
3484       }
3485     while (_repeat);
3486   }
3487 
3488   xfree (hs->message);
3489   hs->message = xstrdup (message);
3490   if (!opt.server_response)
3491     logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
3492                message ? quotearg_style (escape_quoting_style, message) : "");
3493   else
3494     {
3495       logprintf (LOG_VERBOSE, "\n");
3496       print_server_response (resp, "  ");
3497     }
3498 
3499   if (!opt.ignore_length
3500       && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
3501     {
3502       wgint parsed;
3503       errno = 0;
3504       parsed = str_to_wgint (hdrval, NULL, 10);
3505       if (parsed == WGINT_MAX && errno == ERANGE)
3506         {
3507           /* Out of range.
3508              #### If Content-Length is out of range, it most likely
3509              means that the file is larger than 2G and that we're
3510              compiled without LFS.  In that case we should probably
3511              refuse to even attempt to download the file.  */
3512           contlen = -1;
3513         }
3514       else if (parsed < 0)
3515         {
3516           /* Negative Content-Length; nonsensical, so we can't
3517              assume any information about the content to receive. */
3518           contlen = -1;
3519         }
3520       else
3521         contlen = parsed;
3522     }
3523 
3524   /* Check for keep-alive related responses. */
3525   if (!inhibit_keep_alive)
3526     {
3527       if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
3528         {
3529           if (0 == c_strcasecmp (hdrval, "Close"))
3530             keep_alive = false;
3531         }
3532     }
3533 
3534   chunked_transfer_encoding = false;
3535   if (resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval))
3536       && 0 == c_strcasecmp (hdrval, "chunked"))
3537     chunked_transfer_encoding = true;
3538 
3539   /* Handle (possibly multiple instances of) the Set-Cookie header. */
3540   if (opt.cookies)
3541     {
3542       int scpos;
3543       const char *scbeg, *scend;
3544       /* The jar should have been created by now. */
3545       assert (wget_cookie_jar != NULL);
3546       for (scpos = 0;
3547            (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
3548                                         &scbeg, &scend)) != -1;
3549            ++scpos)
3550         {
3551           char buf[1024], *set_cookie;
3552           size_t len = scend - scbeg;
3553 
3554           if (len < sizeof (buf))
3555             set_cookie = buf;
3556           else
3557             set_cookie = xmalloc (len + 1);
3558 
3559           memcpy (set_cookie, scbeg, len);
3560           set_cookie[len] = 0;
3561 
3562           cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
3563                                     u->path, set_cookie);
3564 
3565           if (set_cookie != buf)
3566             xfree (set_cookie);
3567         }
3568     }
3569 
3570   if (keep_alive)
3571     /* The server has promised that it will not close the connection
3572        when we're done.  This means that we can register it.  */
3573     register_persistent (conn->host, conn->port, sock, using_ssl);
3574 
3575 #ifdef HAVE_METALINK
3576   /* We need to check for the Metalink data in the very first response
3577      we get from the server (before redirections, authorization, etc.).  */
3578   if (metalink)
3579     {
3580       hs->metalink = metalink_from_http (resp, hs, u);
3581       /* Bugfix: hs->local_file is NULL (opt.content_disposition).  */
3582       if (!hs->local_file && hs->metalink && hs->metalink->origin)
3583         hs->local_file = xstrdup (hs->metalink->origin);
3584       xfree (hs->message);
3585       retval = RETR_WITH_METALINK;
3586       CLOSE_FINISH (sock);
3587       goto cleanup;
3588     }
3589 #endif
3590 
3591   if (statcode == HTTP_STATUS_UNAUTHORIZED)
3592     {
3593       /* Authorization is required.  */
3594       uerr_t auth_err = RETROK;
3595       bool retry;
3596       /* Normally we are not interested in the response body.
3597          But if we are writing a WARC file we are: we like to keep everything.  */
3598       if (warc_enabled)
3599         {
3600           int _err;
3601           type = resp_header_strdup (resp, "Content-Type");
3602           _err = read_response_body (hs, sock, NULL, contlen, 0,
3603                                     chunked_transfer_encoding,
3604                                     u->url, warc_timestamp_str,
3605                                     warc_request_uuid, warc_ip, type,
3606                                     statcode, head);
3607           xfree (type);
3608 
3609           if (_err != RETRFINISHED || hs->res < 0)
3610             {
3611               CLOSE_INVALIDATE (sock);
3612               retval = _err;
3613               goto cleanup;
3614             }
3615           else
3616             CLOSE_FINISH (sock);
3617         }
3618       else
3619         {
3620           /* Since WARC is disabled, we are not interested in the response body.  */
3621           if (keep_alive && !head_only
3622               && skip_short_body (sock, contlen, chunked_transfer_encoding))
3623             CLOSE_FINISH (sock);
3624           else
3625             CLOSE_INVALIDATE (sock);
3626         }
3627 
3628       pconn.authorized = false;
3629 
3630       {
3631         auth_err = check_auth (u, user, passwd, resp, req,
3632                                &ntlm_seen, &retry,
3633                                &basic_auth_finished,
3634                                &auth_finished);
3635         if (auth_err == RETROK && retry)
3636           {
3637             resp_free (&resp);
3638             xfree (message);
3639             xfree (head);
3640             goto retry_with_auth;
3641           }
3642       }
3643       if (auth_err == RETROK)
3644         retval = AUTHFAILED;
3645       else
3646         retval = auth_err;
3647       goto cleanup;
3648     }
3649   else /* statcode != HTTP_STATUS_UNAUTHORIZED */
3650     {
3651       /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
3652       if (ntlm_seen)
3653         pconn.authorized = true;
3654     }
3655 
3656   {
3657     uerr_t ret = check_file_output (u, hs, resp, hdrval, sizeof hdrval);
3658     if (ret != RETROK)
3659       {
3660         retval = ret;
3661         goto cleanup;
3662       }
3663   }
3664 
3665   hs->statcode = statcode;
3666   xfree (hs->error);
3667   if (statcode == -1)
3668     hs->error = xstrdup (_("Malformed status line"));
3669   else if (!message || !*message)
3670     hs->error = xstrdup (_("(no description)"));
3671   else
3672     hs->error = xstrdup (message);
3673 
3674 #ifdef HAVE_HSTS
3675   if (opt.hsts && hsts_store)
3676     {
3677       hsts_params = resp_header_strdup (resp, "Strict-Transport-Security");
3678       if (parse_strict_transport_security (hsts_params, &max_age, &include_subdomains))
3679         {
3680           /* process strict transport security */
3681           if (hsts_store_entry (hsts_store, u->scheme, u->host, u->port, max_age, include_subdomains))
3682             DEBUGP(("Added new HSTS host: %s:%u (max-age: %lu, includeSubdomains: %s)\n",
3683                    u->host,
3684                    (unsigned) u->port,
3685                    (unsigned long) max_age,
3686                    (include_subdomains ? "true" : "false")));
3687           else
3688             DEBUGP(("Updated HSTS host: %s:%u (max-age: %lu, includeSubdomains: %s)\n",
3689                    u->host,
3690                    (unsigned) u->port,
3691                    (unsigned long) max_age,
3692                    (include_subdomains ? "true" : "false")));
3693         }
3694       xfree (hsts_params);
3695     }
3696 #endif
3697 
3698   type = resp_header_strdup (resp, "Content-Type");
3699   if (type)
3700     {
3701       char *tmp = strchr (type, ';');
3702       if (tmp)
3703         {
3704 #ifdef ENABLE_IRI
3705           /* sXXXav: only needed if IRI support is enabled */
3706           char *tmp2 = tmp + 1;
3707 #endif
3708 
3709           while (tmp > type && c_isspace (tmp[-1]))
3710             --tmp;
3711           *tmp = '\0';
3712 
3713 #ifdef ENABLE_IRI
3714           /* Try to get remote encoding if needed */
3715           if (opt.enable_iri && !opt.encoding_remote)
3716             {
3717               tmp = parse_charset (tmp2);
3718               if (tmp)
3719                 set_content_encoding (iri, tmp);
3720               xfree (tmp);
3721             }
3722 #endif
3723         }
3724     }
3725   xfree (hs->newloc);
3726   hs->newloc = resp_header_strdup (resp, "Location");
3727   xfree (hs->remote_time);
3728   hs->remote_time = resp_header_strdup (resp, "Last-Modified");
3729   if (!hs->remote_time) // now look for the Wayback Machine's timestamp
3730     hs->remote_time = resp_header_strdup (resp, "X-Archive-Orig-last-modified");
3731 
3732   if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
3733     {
3734       wgint first_byte_pos, last_byte_pos, entity_length;
3735       if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
3736                                &entity_length))
3737         {
3738           contrange = first_byte_pos;
3739           contlen = last_byte_pos - first_byte_pos + 1;
3740         }
3741     }
3742 
3743   if (resp_header_copy (resp, "Content-Encoding", hdrval, sizeof (hdrval)))
3744     {
3745       hs->local_encoding = ENC_INVALID;
3746 
3747       switch (hdrval[0])
3748         {
3749         case 'b': case 'B':
3750           if (0 == c_strcasecmp(hdrval, "br"))
3751             hs->local_encoding = ENC_BROTLI;
3752           break;
3753         case 'c': case 'C':
3754           if (0 == c_strcasecmp(hdrval, "compress"))
3755             hs->local_encoding = ENC_COMPRESS;
3756           break;
3757         case 'd': case 'D':
3758           if (0 == c_strcasecmp(hdrval, "deflate"))
3759             hs->local_encoding = ENC_DEFLATE;
3760           break;
3761         case 'g': case 'G':
3762           if (0 == c_strcasecmp(hdrval, "gzip"))
3763             hs->local_encoding = ENC_GZIP;
3764           break;
3765         case 'i': case 'I':
3766           if (0 == c_strcasecmp(hdrval, "identity"))
3767             hs->local_encoding = ENC_NONE;
3768           break;
3769         case 'x': case 'X':
3770           if (0 == c_strcasecmp(hdrval, "x-compress"))
3771             hs->local_encoding = ENC_COMPRESS;
3772           else if (0 == c_strcasecmp(hdrval, "x-gzip"))
3773             hs->local_encoding = ENC_GZIP;
3774           break;
3775         case '\0':
3776           hs->local_encoding = ENC_NONE;
3777         }
3778 
3779       if (hs->local_encoding == ENC_INVALID)
3780         {
3781           DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
3782           hs->local_encoding = ENC_NONE;
3783         }
3784 #ifdef HAVE_LIBZ
3785       else if (hs->local_encoding == ENC_GZIP
3786                && opt.compression != compression_none)
3787         {
3788           const char *p;
3789 
3790           /* Make sure the Content-Type is not gzip before decompressing */
3791           if (type)
3792             {
3793               p = strchr (type, '/');
3794               if (p == NULL)
3795                 {
3796                   hs->remote_encoding = ENC_GZIP;
3797                   hs->local_encoding = ENC_NONE;
3798                 }
3799               else
3800                 {
3801                   p++;
3802                   if (c_tolower(p[0]) == 'x' && p[1] == '-')
3803                     p += 2;
3804                   if (0 != c_strcasecmp (p, "gzip"))
3805                     {
3806                       hs->remote_encoding = ENC_GZIP;
3807                       hs->local_encoding = ENC_NONE;
3808                     }
3809                 }
3810             }
3811           else
3812             {
3813                hs->remote_encoding = ENC_GZIP;
3814                hs->local_encoding = ENC_NONE;
3815             }
3816 
3817           /* don't uncompress if a file ends with '.gz' or '.tgz' */
3818           if (hs->remote_encoding == ENC_GZIP
3819               && (p = strrchr(u->file, '.'))
3820               && (c_strcasecmp(p, ".gz") == 0 || c_strcasecmp(p, ".tgz") == 0))
3821             {
3822                DEBUGP (("Enabling broken server workaround. Will not decompress this GZip file.\n"));
3823                hs->remote_encoding = ENC_NONE;
3824             }
3825         }
3826 #endif
3827     }
3828 
3829   /* 20x responses are counted among successful by default.  */
3830   if (H_20X (statcode))
3831     *dt |= RETROKF;
3832 
3833   if (statcode == HTTP_STATUS_NO_CONTENT)
3834     {
3835       /* 204 response has no body (RFC 2616, 4.3) */
3836 
3837       /* In case the caller cares to look...  */
3838       hs->len = 0;
3839       hs->res = 0;
3840       hs->restval = 0;
3841 
3842       CLOSE_FINISH (sock);
3843 
3844       retval = RETRFINISHED;
3845       goto cleanup;
3846     }
3847 
3848   /* Return if redirected.  */
3849   if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
3850     {
3851       /* RFC2068 says that in case of the 300 (multiple choices)
3852          response, the server can output a preferred URL through
3853          `Location' header; otherwise, the request should be treated
3854          like GET.  So, if the location is set, it will be a
3855          redirection; otherwise, just proceed normally.  */
3856       if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
3857         *dt |= RETROKF;
3858       else
3859         {
3860           logprintf (LOG_VERBOSE,
3861                      _("Location: %s%s\n"),
3862                      hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
3863                      hs->newloc ? _(" [following]") : "");
3864 
3865           /* In case the caller cares to look...  */
3866           hs->len = 0;
3867           hs->res = 0;
3868           hs->restval = 0;
3869 
3870           /* Normally we are not interested in the response body of a redirect.
3871              But if we are writing a WARC file we are: we like to keep everything.  */
3872           if (warc_enabled)
3873             {
3874               int _err = read_response_body (hs, sock, NULL, contlen, 0,
3875                                             chunked_transfer_encoding,
3876                                             u->url, warc_timestamp_str,
3877                                             warc_request_uuid, warc_ip, type,
3878                                             statcode, head);
3879 
3880               if (_err != RETRFINISHED || hs->res < 0)
3881                 {
3882                   CLOSE_INVALIDATE (sock);
3883                   retval = _err;
3884                   goto cleanup;
3885                 }
3886               else
3887                 CLOSE_FINISH (sock);
3888             }
3889           else
3890             {
3891               /* Since WARC is disabled, we are not interested in the response body.  */
3892               if (keep_alive && !head_only
3893                   && skip_short_body (sock, contlen, chunked_transfer_encoding))
3894                 CLOSE_FINISH (sock);
3895               else
3896                 CLOSE_INVALIDATE (sock);
3897             }
3898 
3899           /* From RFC2616: The status codes 303 and 307 have
3900              been added for servers that wish to make unambiguously
3901              clear which kind of reaction is expected of the client.
3902 
3903              A 307 should be redirected using the same method,
3904              in other words, a POST should be preserved and not
3905              converted to a GET in that case.
3906 
3907              With strict adherence to RFC2616, POST requests are not
3908              converted to a GET request on 301 Permanent Redirect
3909              or 302 Temporary Redirect.
3910 
3911              A switch may be provided later based on the HTTPbis draft
3912              that allows clients to convert POST requests to GET
3913              requests on 301 and 302 response codes. */
3914           switch (statcode)
3915             {
3916             case HTTP_STATUS_TEMPORARY_REDIRECT:
3917             case HTTP_STATUS_PERMANENT_REDIRECT:
3918               retval = NEWLOCATION_KEEP_POST;
3919               goto cleanup;
3920             case HTTP_STATUS_MOVED_PERMANENTLY:
3921               if (opt.method && c_strcasecmp (opt.method, "post") != 0)
3922                 {
3923                   retval = NEWLOCATION_KEEP_POST;
3924                   goto cleanup;
3925                 }
3926               break;
3927             case HTTP_STATUS_MOVED_TEMPORARILY:
3928               if (opt.method && c_strcasecmp (opt.method, "post") != 0)
3929                 {
3930                   retval = NEWLOCATION_KEEP_POST;
3931                   goto cleanup;
3932                 }
3933               break;
3934             }
3935           retval = NEWLOCATION;
3936           goto cleanup;
3937         }
3938     }
3939 
3940   if (cond_get)
3941     {
3942       if (statcode == HTTP_STATUS_NOT_MODIFIED)
3943         {
3944           logprintf (LOG_VERBOSE,
3945                      _ ("File %s not modified on server. Omitting download.\n\n"),
3946                      quote (hs->local_file));
3947           *dt |= RETROKF;
3948           CLOSE_FINISH (sock);
3949           retval = RETRUNNEEDED;
3950           goto cleanup;
3951         }
3952     }
3953 
3954   set_content_type (dt, type);
3955 
3956   if (opt.adjust_extension)
3957     {
3958       const char *encoding_ext = NULL;
3959       switch (hs->local_encoding)
3960         {
3961         case ENC_INVALID:
3962         case ENC_NONE:
3963           break;
3964         case ENC_BROTLI:
3965           encoding_ext = ".br";
3966           break;
3967         case ENC_COMPRESS:
3968           encoding_ext = ".Z";
3969           break;
3970         case ENC_DEFLATE:
3971           encoding_ext = ".zlib";
3972           break;
3973         case ENC_GZIP:
3974           encoding_ext = ".gz";
3975           break;
3976         default:
3977           DEBUGP (("No extension found for encoding %d\n",
3978                    hs->local_encoding));
3979       }
3980       if (encoding_ext != NULL)
3981         {
3982           char *file_ext = strrchr (hs->local_file, '.');
3983           /* strip Content-Encoding extension (it will be re-added later) */
3984           if (file_ext != NULL && 0 == strcasecmp (file_ext, encoding_ext))
3985             *file_ext = '\0';
3986         }
3987       if (*dt & TEXTHTML)
3988         /* -E / --adjust-extension / adjust_extension = on was specified,
3989            and this is a text/html file.  If some case-insensitive
3990            variation on ".htm[l]" isn't already the file's suffix,
3991            tack on ".html". */
3992         {
3993           ensure_extension (hs, ".html", dt);
3994         }
3995       else if (*dt & TEXTCSS)
3996         {
3997           ensure_extension (hs, ".css", dt);
3998         }
3999       if (encoding_ext != NULL)
4000         {
4001           ensure_extension (hs, encoding_ext, dt);
4002         }
4003     }
4004 
4005   if (cond_get)
4006     {
4007       /* Handle the case when server ignores If-Modified-Since header.  */
4008       if (statcode == HTTP_STATUS_OK && hs->remote_time)
4009         {
4010           time_t tmr = http_atotm (hs->remote_time);
4011 
4012           /* Check if the local file is up-to-date based on Last-Modified header
4013              and content length.  */
4014           if (tmr != (time_t) - 1 && tmr <= hs->orig_file_tstamp
4015               && (contlen == -1 || contlen == hs->orig_file_size))
4016             {
4017               logprintf (LOG_VERBOSE,
4018                          _("Server ignored If-Modified-Since header for file %s.\n"
4019                            "You might want to add --no-if-modified-since option."
4020                            "\n\n"),
4021                          quote (hs->local_file));
4022               *dt |= RETROKF;
4023               CLOSE_INVALIDATE (sock);
4024               retval = RETRUNNEEDED;
4025               goto cleanup;
4026             }
4027         }
4028     }
4029 
4030   if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
4031       || (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK
4032           && contrange == 0 && contlen >= 0 && hs->restval >= contlen))
4033     {
4034       /* If `-c' is in use and the file has been fully downloaded (or
4035          the remote file has shrunk), Wget effectively requests bytes
4036          after the end of file and the server response with 416
4037          (or 200 with a <= Content-Length.  */
4038       logputs (LOG_VERBOSE, _("\
4039 \n    The file is already fully retrieved; nothing to do.\n\n"));
4040       /* In case the caller inspects. */
4041       hs->len = contlen;
4042       hs->res = 0;
4043       /* Mark as successfully retrieved. */
4044       *dt |= RETROKF;
4045 
4046       /* Try to maintain the keep-alive connection. It is often cheaper to
4047        * consume some bytes which have already been sent than to negotiate
4048        * a new connection. However, if the body is too large, or we don't
4049        * care about keep-alive, then simply terminate the connection */
4050       if (keep_alive &&
4051           skip_short_body (sock, contlen, chunked_transfer_encoding))
4052         CLOSE_FINISH (sock);
4053       else
4054         CLOSE_INVALIDATE (sock);
4055       retval = RETRUNNEEDED;
4056       goto cleanup;
4057     }
4058   if ((contrange != 0 && contrange != hs->restval)
4059       || (H_PARTIAL (statcode) && !contrange && hs->restval))
4060     {
4061       /* The Range request was somehow misunderstood by the server.
4062          Bail out.  */
4063       CLOSE_INVALIDATE (sock);
4064       retval = RANGEERR;
4065       goto cleanup;
4066     }
4067   if (contlen == -1)
4068     hs->contlen = -1;
4069   /* If the response is gzipped, the uncompressed size is unknown. */
4070   else if (hs->remote_encoding == ENC_GZIP)
4071     hs->contlen = -1;
4072   else
4073     hs->contlen = contlen + contrange;
4074 
4075   if (opt.verbose)
4076     {
4077       if (*dt & RETROKF)
4078         {
4079           /* No need to print this output if the body won't be
4080              downloaded at all, or if the original server response is
4081              printed.  */
4082           logputs (LOG_VERBOSE, _("Length: "));
4083           if (contlen != -1)
4084             {
4085               logputs (LOG_VERBOSE, number_to_static_string (contlen + contrange));
4086               if (contlen + contrange >= 1024)
4087                 logprintf (LOG_VERBOSE, " (%s)",
4088                            human_readable (contlen + contrange, 10, 1));
4089               if (contrange)
4090                 {
4091                   if (contlen >= 1024)
4092                     logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
4093                                number_to_static_string (contlen),
4094                                human_readable (contlen, 10, 1));
4095                   else
4096                     logprintf (LOG_VERBOSE, _(", %s remaining"),
4097                                number_to_static_string (contlen));
4098                 }
4099             }
4100           else
4101             logputs (LOG_VERBOSE,
4102                      opt.ignore_length ? _("ignored") : _("unspecified"));
4103           if (type)
4104             logprintf (LOG_VERBOSE, " [%s]\n", quotearg_style (escape_quoting_style, type));
4105           else
4106             logputs (LOG_VERBOSE, "\n");
4107         }
4108     }
4109 
4110   /* Return if we have no intention of further downloading.  */
4111   if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only || (opt.spider && !opt.recursive))
4112     {
4113       /* In case the caller cares to look...  */
4114       hs->len = 0;
4115       hs->res = 0;
4116       hs->restval = 0;
4117 
4118       /* Normally we are not interested in the response body of a error responses.
4119          But if we are writing a WARC file we are: we like to keep everything.  */
4120       if (warc_enabled)
4121         {
4122           int _err = read_response_body (hs, sock, NULL, contlen, 0,
4123                                         chunked_transfer_encoding,
4124                                         u->url, warc_timestamp_str,
4125                                         warc_request_uuid, warc_ip, type,
4126                                         statcode, head);
4127 
4128           if (_err != RETRFINISHED || hs->res < 0)
4129             {
4130               CLOSE_INVALIDATE (sock);
4131               retval = _err;
4132               goto cleanup;
4133             }
4134 
4135           CLOSE_FINISH (sock);
4136         }
4137       else
4138         {
4139           /* Since WARC is disabled, we are not interested in the response body.  */
4140           if (head_only)
4141             /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
4142                servers not to send body in response to a HEAD request, and
4143                those that do will likely be caught by test_socket_open.
4144                If not, they can be worked around using
4145                `--no-http-keep-alive'.  */
4146             CLOSE_FINISH (sock);
4147           else if (opt.spider && !opt.recursive)
4148             /* we just want to see if the page exists - no downloading required */
4149             CLOSE_INVALIDATE (sock);
4150           else if (keep_alive
4151                    && skip_short_body (sock, contlen, chunked_transfer_encoding))
4152             /* Successfully skipped the body; also keep using the socket. */
4153             CLOSE_FINISH (sock);
4154           else
4155             CLOSE_INVALIDATE (sock);
4156         }
4157 
4158       if (statcode == HTTP_STATUS_GATEWAY_TIMEOUT)
4159         retval = GATEWAYTIMEOUT;
4160       else
4161         retval = RETRFINISHED;
4162 
4163       goto cleanup;
4164     }
4165 
4166   err = open_output_stream (hs, count, &fp);
4167   if (err != RETROK)
4168     {
4169       CLOSE_INVALIDATE (sock);
4170       retval = err;
4171       goto cleanup;
4172     }
4173 
4174 #ifdef ENABLE_XATTR
4175   if (opt.enable_xattr)
4176     {
4177       if (original_url != u)
4178         set_file_metadata (u, original_url, fp);
4179       else
4180         set_file_metadata (u, NULL, fp);
4181     }
4182 #endif
4183 
4184   err = read_response_body (hs, sock, fp, contlen, contrange,
4185                             chunked_transfer_encoding,
4186                             u->url, warc_timestamp_str,
4187                             warc_request_uuid, warc_ip, type,
4188                             statcode, head);
4189 
4190   if (hs->res >= 0)
4191     CLOSE_FINISH (sock);
4192   else
4193     CLOSE_INVALIDATE (sock);
4194 
4195   if (!output_stream)
4196     fclose (fp);
4197 
4198   retval = err;
4199 
4200   cleanup:
4201   xfree (head);
4202   xfree (type);
4203   xfree (message);
4204   resp_free (&resp);
4205   request_free (&req);
4206 
4207   return retval;
4208 }
4209 
4210 /* Check whether the supplied HTTP status code is among those
4211    listed for the --retry-on-http-error option. */
4212 static bool
check_retry_on_http_error(const int statcode)4213 check_retry_on_http_error (const int statcode)
4214 {
4215   const char *tok = opt.retry_on_http_error;
4216   while (tok && *tok)
4217     {
4218       if (atoi (tok) == statcode)
4219         return true;
4220       if ((tok = strchr (tok, ',')))
4221         ++tok;
4222     }
4223   return false;
4224 }
4225 
4226 /* The genuine HTTP loop!  This is the part where the retrieval is
4227    retried, and retried, and retried, and...  */
4228 uerr_t
http_loop(const struct url * u,struct url * original_url,char ** newloc,char ** local_file,const char * referer,int * dt,struct url * proxy,struct iri * iri)4229 http_loop (const struct url *u, struct url *original_url, char **newloc,
4230            char **local_file, const char *referer, int *dt, struct url *proxy,
4231            struct iri *iri)
4232 {
4233   int count;
4234   bool got_head = false;         /* used for time-stamping and filename detection */
4235   bool time_came_from_head = false;
4236   bool got_name = false;
4237   char *tms;
4238   const char *tmrate;
4239   uerr_t err, ret = TRYLIMEXC;
4240   time_t tmr = -1;               /* remote time-stamp */
4241   struct http_stat hstat;        /* HTTP status */
4242   struct stat st;
4243   bool send_head_first = true;
4244   bool force_full_retrieve = false;
4245 
4246 
4247   /* If we are writing to a WARC file: always retrieve the whole file. */
4248   if (opt.warc_filename != NULL)
4249     force_full_retrieve = true;
4250 
4251 
4252   /* Assert that no value for *LOCAL_FILE was passed. */
4253   assert (local_file == NULL || *local_file == NULL);
4254 
4255   /* Set LOCAL_FILE parameter. */
4256   if (local_file && opt.output_document)
4257     *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
4258 
4259   /* Reset NEWLOC parameter. */
4260   *newloc = NULL;
4261 
4262   /* This used to be done in main, but it's a better idea to do it
4263      here so that we don't go through the hoops if we're just using
4264      FTP or whatever. */
4265   if (opt.cookies)
4266     load_cookies ();
4267 
4268   /* Warn on (likely bogus) wildcard usage in HTTP. */
4269   if (opt.ftp_glob && has_wildcards_p (u->path))
4270     logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
4271 
4272   /* Setup hstat struct. */
4273   xzero (hstat);
4274   hstat.referer = referer;
4275 
4276   if (opt.output_document)
4277     {
4278       hstat.local_file = xstrdup (opt.output_document);
4279       got_name = true;
4280     }
4281   else if (!opt.content_disposition)
4282     {
4283       hstat.local_file =
4284         url_file_name (opt.trustservernames ? u : original_url, NULL);
4285       got_name = true;
4286     }
4287 
4288   if (got_name && file_exists_p (hstat.local_file, NULL) && opt.noclobber && !opt.output_document)
4289     {
4290       /* If opt.noclobber is turned on and file already exists, do not
4291          retrieve the file. But if the output_document was given, then this
4292          test was already done and the file didn't exist. Hence the !opt.output_document */
4293       get_file_flags (hstat.local_file, dt);
4294       ret = RETROK;
4295       goto exit;
4296     }
4297 
4298   /* Reset the counter. */
4299   count = 0;
4300 
4301   /* Reset the document type. */
4302   *dt = 0;
4303 
4304   /* Skip preliminary HEAD request if we're not in spider mode.  */
4305   if (!opt.spider)
4306     send_head_first = false;
4307 
4308   /* Send preliminary HEAD request if --content-disposition and -c are used
4309      together.  */
4310   if (opt.content_disposition && opt.always_rest)
4311     send_head_first = true;
4312 
4313 #ifdef HAVE_METALINK
4314   if (opt.metalink_over_http)
4315     {
4316       *dt |= METALINK_METADATA;
4317       send_head_first = true;
4318     }
4319 #endif
4320 
4321   if (opt.timestamping)
4322     {
4323       /* Use conditional get request if requested
4324        * and if timestamp is known at this moment.  */
4325       if (opt.if_modified_since && !send_head_first && got_name && file_exists_p (hstat.local_file, NULL))
4326         {
4327           *dt |= IF_MODIFIED_SINCE;
4328           {
4329             uerr_t timestamp_err = set_file_timestamp (&hstat);
4330             if (timestamp_err != RETROK)
4331               return timestamp_err;
4332           }
4333         }
4334         /* Send preliminary HEAD request if -N is given and we have existing
4335          * destination file or content disposition is enabled.  */
4336       else if (opt.content_disposition || file_exists_p (hstat.local_file, NULL))
4337         send_head_first = true;
4338     }
4339 
4340   /* THE loop */
4341   do
4342     {
4343       /* Increment the pass counter.  */
4344       ++count;
4345       sleep_between_retrievals (count);
4346 
4347       /* Get the current time string.  */
4348       tms = datetime_str (time (NULL));
4349 
4350       if (opt.spider && !got_head)
4351         logprintf (LOG_VERBOSE,
4352 			  _("Spider mode enabled. Check if remote file exists.\n"));
4353 
4354       /* Print fetch message, if opt.verbose.  */
4355       if (opt.verbose)
4356         {
4357           char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
4358 
4359           if (count > 1)
4360             {
4361               char tmp[256];
4362               sprintf (tmp, _("(try:%2d)"), count);
4363               logprintf (LOG_NOTQUIET, "--%s--  %s  %s\n",
4364                          tms, tmp, hurl);
4365             }
4366           else
4367             {
4368               logprintf (LOG_NOTQUIET, "--%s--  %s\n",
4369                          tms, hurl);
4370             }
4371 
4372 #ifdef WINDOWS
4373           ws_changetitle (hurl);
4374 #endif
4375           xfree (hurl);
4376         }
4377 
4378       /* Default document type is empty.  However, if spider mode is
4379          on or time-stamping is employed, HEAD_ONLY commands is
4380          encoded within *dt.  */
4381       if (send_head_first && !got_head)
4382         *dt |= HEAD_ONLY;
4383       else
4384         *dt &= ~HEAD_ONLY;
4385 
4386       /* Decide whether or not to restart.  */
4387       if (force_full_retrieve)
4388         hstat.restval = hstat.len;
4389       else if (opt.start_pos >= 0)
4390         hstat.restval = opt.start_pos;
4391       else if (opt.always_rest
4392           && got_name
4393           && stat (hstat.local_file, &st) == 0
4394           && S_ISREG (st.st_mode))
4395         /* When -c is used, continue from on-disk size.  (Can't use
4396            hstat.len even if count>1 because we don't want a failed
4397            first attempt to clobber existing data.)  */
4398         hstat.restval = st.st_size;
4399       else if (count > 1)
4400         {
4401           /* otherwise, continue where the previous try left off */
4402           if (hstat.len < hstat.restval)
4403             hstat.restval -= hstat.len;
4404           else
4405             hstat.restval = hstat.len;
4406         }
4407       else
4408         hstat.restval = 0;
4409 
4410       /* Decide whether to send the no-cache directive.  We send it in
4411          two cases:
4412            a) we're using a proxy, and we're past our first retrieval.
4413               Some proxies are notorious for caching incomplete data, so
4414               we require a fresh get.
4415            b) caching is explicitly inhibited. */
4416       if ((proxy && count > 1)        /* a */
4417           || !opt.allow_cache)        /* b */
4418         *dt |= SEND_NOCACHE;
4419       else
4420         *dt &= ~SEND_NOCACHE;
4421 
4422       /* Try fetching the document, or at least its head.  */
4423       err = gethttp (u, original_url, &hstat, dt, proxy, iri, count);
4424 
4425       /* Time?  */
4426       tms = datetime_str (time (NULL));
4427 
4428       /* Get the new location (with or without the redirection).  */
4429       if (hstat.newloc)
4430         *newloc = xstrdup (hstat.newloc);
4431 
4432       switch (err)
4433         {
4434         case HERR: case HEOF: case CONSOCKERR:
4435         case CONERROR: case READERR: case WRITEFAILED:
4436         case RANGEERR: case FOPEN_EXCL_ERR: case GATEWAYTIMEOUT:
4437           /* Non-fatal errors continue executing the loop, which will
4438              bring them to "while" statement at the end, to judge
4439              whether the number of tries was exceeded.  */
4440           printwhat (count, opt.ntry);
4441           continue;
4442         case FWRITEERR: case FOPENERR:
4443           /* Another fatal error.  */
4444           logputs (LOG_VERBOSE, "\n");
4445           logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
4446                      quote (hstat.local_file), strerror (errno));
4447           ret = err;
4448           goto exit;
4449         case HOSTERR:
4450           /* Fatal unless option set otherwise. */
4451           if ( opt.retry_on_host_error )
4452             {
4453               printwhat (count, opt.ntry);
4454               continue;
4455             }
4456           ret = err;
4457           goto exit;
4458         case CONIMPOSSIBLE: case PROXERR: case SSLINITFAILED:
4459         case CONTNOTSUPPORTED: case VERIFCERTERR: case FILEBADFILE:
4460         case UNKNOWNATTR:
4461           /* Fatal errors just return from the function.  */
4462           ret = err;
4463           goto exit;
4464         case ATTRMISSING:
4465           /* A missing attribute in a Header is a fatal Protocol error. */
4466           logputs (LOG_VERBOSE, "\n");
4467           logprintf (LOG_NOTQUIET, _("Required attribute missing from Header received.\n"));
4468           ret = err;
4469           goto exit;
4470         case AUTHFAILED:
4471           logputs (LOG_VERBOSE, "\n");
4472           logprintf (LOG_NOTQUIET, _("Username/Password Authentication Failed.\n"));
4473           ret = err;
4474           goto exit;
4475         case WARC_ERR:
4476           /* A fatal WARC error. */
4477           logputs (LOG_VERBOSE, "\n");
4478           logprintf (LOG_NOTQUIET, _("Cannot write to WARC file.\n"));
4479           ret = err;
4480           goto exit;
4481         case WARC_TMP_FOPENERR: case WARC_TMP_FWRITEERR:
4482           /* A fatal WARC error. */
4483           logputs (LOG_VERBOSE, "\n");
4484           logprintf (LOG_NOTQUIET, _("Cannot write to temporary WARC file.\n"));
4485           ret = err;
4486           goto exit;
4487         case CONSSLERR:
4488           /* Another fatal error.  */
4489           logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
4490           ret = err;
4491           goto exit;
4492         case UNLINKERR:
4493           /* Another fatal error.  */
4494           logputs (LOG_VERBOSE, "\n");
4495           logprintf (LOG_NOTQUIET, _("Cannot unlink %s (%s).\n"),
4496                      quote (hstat.local_file), strerror (errno));
4497           ret = err;
4498           goto exit;
4499         case NEWLOCATION:
4500         case NEWLOCATION_KEEP_POST:
4501           /* Return the new location to the caller.  */
4502           if (!*newloc)
4503             {
4504               logprintf (LOG_NOTQUIET,
4505                          _("ERROR: Redirection (%d) without location.\n"),
4506                          hstat.statcode);
4507               ret = WRONGCODE;
4508             }
4509           else
4510             {
4511               ret = err;
4512             }
4513           goto exit;
4514         case RETRUNNEEDED:
4515           /* The file was already fully retrieved. */
4516           ret = RETROK;
4517           goto exit;
4518         case RETRFINISHED:
4519           /* Deal with you later.  */
4520           break;
4521 #ifdef HAVE_METALINK
4522         case RETR_WITH_METALINK:
4523           {
4524             if (hstat.metalink == NULL)
4525               {
4526                 logputs (LOG_NOTQUIET,
4527                          _("Could not find Metalink data in HTTP response. "
4528                            "Downloading file using HTTP GET.\n"));
4529                 *dt &= ~METALINK_METADATA;
4530                 *dt &= ~HEAD_ONLY;
4531                 got_head = true;
4532                 continue;
4533               }
4534 
4535             logputs (LOG_VERBOSE,
4536                      _("Metalink headers found. "
4537                        "Switching to Metalink mode.\n"));
4538 
4539             ret = retrieve_from_metalink (hstat.metalink);
4540             goto exit;
4541           }
4542           break;
4543 #endif
4544         default:
4545           /* All possibilities should have been exhausted.  */
4546           abort ();
4547         }
4548 
4549       if (!(*dt & RETROKF))
4550         {
4551           char *hurl = NULL;
4552           if (!opt.verbose)
4553             {
4554               /* #### Ugly ugly ugly! */
4555               hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
4556               logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
4557             }
4558 
4559           /* Fall back to GET if HEAD fails with a 500 or 501 error code. */
4560           if (*dt & HEAD_ONLY
4561               && (hstat.statcode == 500 || hstat.statcode == 501))
4562             {
4563               got_head = true;
4564               xfree (hurl);
4565               continue;
4566             }
4567           /* Maybe we should always keep track of broken links, not just in
4568            * spider mode.
4569            * Don't log error if it was UTF-8 encoded because we will try
4570            * once unencoded. */
4571           else if (opt.spider && !iri->utf8_encode)
4572             {
4573               /* #### Again: ugly ugly ugly! */
4574               if (!hurl)
4575                 hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
4576               nonexisting_url (hurl);
4577               logprintf (LOG_NOTQUIET, _("\
4578 Remote file does not exist -- broken link!!!\n"));
4579             }
4580           else if (check_retry_on_http_error (hstat.statcode))
4581             {
4582               printwhat (count, opt.ntry);
4583               xfree (hurl);
4584               continue;
4585             }
4586           else
4587             {
4588               logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
4589                          tms, hstat.statcode,
4590                          quotearg_style (escape_quoting_style, hstat.error));
4591             }
4592           logputs (LOG_VERBOSE, "\n");
4593           ret = WRONGCODE;
4594           xfree (hurl);
4595           goto exit;
4596         }
4597 
4598       /* Did we get the time-stamp? */
4599       if (!got_head || (opt.spider && !opt.recursive))
4600         {
4601           got_head = true;    /* no more time-stamping */
4602 
4603           if (opt.timestamping && !hstat.remote_time)
4604             {
4605               logputs (LOG_NOTQUIET, _("\
4606 Last-modified header missing -- time-stamps turned off.\n"));
4607             }
4608           else if (hstat.remote_time)
4609             {
4610               /* Convert the date-string into struct tm.  */
4611               tmr = http_atotm (hstat.remote_time);
4612               if (tmr == (time_t) (-1))
4613                 logputs (LOG_VERBOSE, _("\
4614 Last-modified header invalid -- time-stamp ignored.\n"));
4615               if (*dt & HEAD_ONLY)
4616                 time_came_from_head = true;
4617             }
4618 
4619           if (send_head_first)
4620             {
4621               /* The time-stamping section.  */
4622               if (opt.timestamping)
4623                 {
4624                   if (hstat.orig_file_name) /* Perform the following
4625                                                checks only if the file
4626                                                we're supposed to
4627                                                download already exists.  */
4628                     {
4629                       if (hstat.remote_time &&
4630                           tmr != (time_t) (-1))
4631                         {
4632                           /* Now time-stamping can be used validly.
4633                              Time-stamping means that if the sizes of
4634                              the local and remote file match, and local
4635                              file is newer than the remote file, it will
4636                              not be retrieved.  Otherwise, the normal
4637                              download procedure is resumed.  */
4638                           if (hstat.orig_file_tstamp >= tmr)
4639                             {
4640                               if (hstat.contlen == -1
4641                                   || hstat.orig_file_size == hstat.contlen)
4642                                 {
4643                                   logprintf (LOG_VERBOSE, _("\
4644 Server file no newer than local file %s -- not retrieving.\n\n"),
4645                                              quote (hstat.orig_file_name));
4646                                   ret = RETROK;
4647                                   goto exit;
4648                                 }
4649                               else
4650                                 {
4651                                   logprintf (LOG_VERBOSE, _("\
4652 The sizes do not match (local %s) -- retrieving.\n"),
4653                                              number_to_static_string (hstat.orig_file_size));
4654                                 }
4655                             }
4656                           else
4657                             {
4658                               force_full_retrieve = true;
4659                               logputs (LOG_VERBOSE,
4660                                        _("Remote file is newer, retrieving.\n"));
4661                             }
4662 
4663                           logputs (LOG_VERBOSE, "\n");
4664                         }
4665                     }
4666 
4667                   /* free_hstat (&hstat); */
4668                   hstat.timestamp_checked = true;
4669                 }
4670 
4671               if (opt.spider)
4672                 {
4673                   bool finished = true;
4674                   if (opt.recursive)
4675                     {
4676                       if ((*dt & TEXTHTML) || (*dt & TEXTCSS))
4677                         {
4678                           logputs (LOG_VERBOSE, _("\
4679 Remote file exists and could contain links to other resources -- retrieving.\n\n"));
4680                           finished = false;
4681                         }
4682                       else
4683                         {
4684                           logprintf (LOG_VERBOSE, _("\
4685 Remote file exists but does not contain any link -- not retrieving.\n\n"));
4686                           ret = RETROK; /* RETRUNNEEDED is not for caller. */
4687                         }
4688                     }
4689                   else
4690                     {
4691                       if ((*dt & TEXTHTML) || (*dt & TEXTCSS))
4692                         {
4693                           logprintf (LOG_VERBOSE, _("\
4694 Remote file exists and could contain further links,\n\
4695 but recursion is disabled -- not retrieving.\n\n"));
4696                         }
4697                       else
4698                         {
4699                           logprintf (LOG_VERBOSE, _("\
4700 Remote file exists.\n\n"));
4701                         }
4702                       ret = RETROK; /* RETRUNNEEDED is not for caller. */
4703                     }
4704 
4705                   if (finished)
4706                     {
4707                       logprintf (LOG_NONVERBOSE,
4708                                  _("%s URL: %s %2d %s\n"),
4709                                  tms, u->url, hstat.statcode,
4710                                  hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
4711                       goto exit;
4712                     }
4713                 }
4714 
4715               got_name = true;
4716               *dt &= ~HEAD_ONLY;
4717               count = 0;          /* the retrieve count for HEAD is reset */
4718               continue;
4719             } /* send_head_first */
4720         } /* !got_head */
4721 
4722       if (opt.useservertimestamps
4723           && (tmr != (time_t) (-1))
4724           && ((hstat.len == hstat.contlen) ||
4725               ((hstat.res == 0) && (hstat.contlen == -1))))
4726         {
4727           const char *fl = NULL;
4728           set_local_file (&fl, hstat.local_file);
4729           if (fl)
4730             {
4731               time_t newtmr = -1;
4732               /* Reparse time header, in case it's changed. */
4733               if (time_came_from_head
4734                   && hstat.remote_time && hstat.remote_time[0])
4735                 {
4736                   newtmr = http_atotm (hstat.remote_time);
4737                   if (newtmr != (time_t)-1)
4738                     tmr = newtmr;
4739                 }
4740               touch (fl, tmr);
4741             }
4742         }
4743       /* End of time-stamping section. */
4744 
4745       tmrate = retr_rate (hstat.rd_size, hstat.dltime);
4746       total_download_time += hstat.dltime;
4747 
4748       if (hstat.len == hstat.contlen)
4749         {
4750           if (*dt & RETROKF || opt.content_on_error)
4751             {
4752               bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
4753 
4754               logprintf (LOG_VERBOSE,
4755                          write_to_stdout
4756                          ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
4757                          : _("%s (%s) - %s saved [%s/%s]\n\n"),
4758                          tms, tmrate,
4759                          write_to_stdout ? "" : quote (hstat.local_file),
4760                          number_to_static_string (hstat.len),
4761                          number_to_static_string (hstat.contlen));
4762               logprintf (LOG_NONVERBOSE,
4763                          "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
4764                          tms, u->url,
4765                          number_to_static_string (hstat.len),
4766                          number_to_static_string (hstat.contlen),
4767                          hstat.local_file, count);
4768             }
4769           ++numurls;
4770           total_downloaded_bytes += hstat.rd_size;
4771 
4772           /* Remember that we downloaded the file for later ".orig" code. */
4773           if (*dt & ADDED_HTML_EXTENSION)
4774             downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
4775           else
4776             downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
4777 
4778           ret = RETROK;
4779           goto exit;
4780         }
4781       else if (hstat.res == 0) /* No read error */
4782         {
4783           if (hstat.contlen == -1)  /* We don't know how much we were supposed
4784                                        to get, so assume we succeeded. */
4785             {
4786               if (*dt & RETROKF || opt.content_on_error)
4787                 {
4788                   bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
4789 
4790                   logprintf (LOG_VERBOSE,
4791                              write_to_stdout
4792                              ? _("%s (%s) - written to stdout %s[%s]\n\n")
4793                              : _("%s (%s) - %s saved [%s]\n\n"),
4794                              tms, tmrate,
4795                              write_to_stdout ? "" : quote (hstat.local_file),
4796                              number_to_static_string (hstat.len));
4797                   logprintf (LOG_NONVERBOSE,
4798                              "%s URL:%s [%s] -> \"%s\" [%d]\n",
4799                              tms, u->url, number_to_static_string (hstat.len),
4800                              hstat.local_file, count);
4801                 }
4802               ++numurls;
4803               total_downloaded_bytes += hstat.rd_size;
4804 
4805               /* Remember that we downloaded the file for later ".orig" code. */
4806               if (*dt & ADDED_HTML_EXTENSION)
4807                 downloaded_file (FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
4808               else
4809                 downloaded_file (FILE_DOWNLOADED_NORMALLY, hstat.local_file);
4810 
4811               ret = RETROK;
4812               goto exit;
4813             }
4814           else if (hstat.len < hstat.contlen) /* meaning we lost the
4815                                                  connection too soon */
4816             {
4817               logprintf (LOG_VERBOSE,
4818                          _("%s (%s) - Connection closed at byte %s. "),
4819                          tms, tmrate, number_to_static_string (hstat.len));
4820               printwhat (count, opt.ntry);
4821               continue;
4822             }
4823           else if (hstat.len != hstat.restval)
4824             /* Getting here would mean reading more data than
4825                requested with content-length, which we never do.  */
4826             abort ();
4827           else
4828             {
4829               /* Getting here probably means that the content-length was
4830                * _less_ than the original, local size. We should probably
4831                * truncate or re-read, or something. FIXME */
4832               ret = RETROK;
4833               goto exit;
4834             }
4835         }
4836       else /* from now on hstat.res can only be -1 */
4837         {
4838           if (hstat.contlen == -1)
4839             {
4840               logprintf (LOG_VERBOSE,
4841                          _("%s (%s) - Read error at byte %s (%s)."),
4842                          tms, tmrate, number_to_static_string (hstat.len),
4843                          hstat.rderrmsg);
4844               printwhat (count, opt.ntry);
4845               continue;
4846             }
4847           else /* hstat.res == -1 and contlen is given */
4848             {
4849               logprintf (LOG_VERBOSE,
4850                          _("%s (%s) - Read error at byte %s/%s (%s). "),
4851                          tms, tmrate,
4852                          number_to_static_string (hstat.len),
4853                          number_to_static_string (hstat.contlen),
4854                          hstat.rderrmsg);
4855               printwhat (count, opt.ntry);
4856               continue;
4857             }
4858         }
4859       /* not reached */
4860     }
4861   while (!opt.ntry || (count < opt.ntry));
4862 
4863 exit:
4864   if ((ret == RETROK || opt.content_on_error) && local_file)
4865     {
4866       xfree (*local_file);
4867       /* Bugfix: Prevent SIGSEGV when hstat.local_file was left NULL
4868          (i.e. due to opt.content_disposition).  */
4869       if (hstat.local_file)
4870         {
4871           *local_file = hstat.local_file;
4872           hstat.local_file = NULL;
4873         }
4874     }
4875   free_hstat (&hstat);
4876 
4877   return ret;
4878 }
4879 
4880 /* Check whether the result of strptime() indicates success.
4881    strptime() returns the pointer to how far it got to in the string.
4882    The processing has been successful if the string is at `GMT' or
4883    `+X', or at the end of the string.
4884 
4885    In extended regexp parlance, the function returns 1 if P matches
4886    "^ *(GMT|[+-][0-9]|$)", 0 otherwise.  P being NULL (which strptime
4887    can return) is considered a failure and 0 is returned.  */
4888 static bool
check_end(const char * p)4889 check_end (const char *p)
4890 {
4891   if (!p)
4892     return false;
4893   while (c_isspace (*p))
4894     ++p;
4895   if (!*p
4896       || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
4897       || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1])))
4898     return true;
4899   else
4900     return false;
4901 }
4902 
4903 /* Convert the textual specification of time in TIME_STRING to the
4904    number of seconds since the Epoch.
4905 
4906    TIME_STRING can be in any of the three formats RFC2616 allows the
4907    HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
4908    as well as the time format used in the Set-Cookie header.
4909    Timezones are ignored, and should be GMT.
4910 
4911    Return the computed time_t representation, or -1 if the conversion
4912    fails.
4913 
4914    This function uses strptime with various string formats for parsing
4915    TIME_STRING.  This results in a parser that is not as lenient in
4916    interpreting TIME_STRING as I would like it to be.  Being based on
4917    strptime, it always allows shortened months, one-digit days, etc.,
4918    but due to the multitude of formats in which time can be
4919    represented, an ideal HTTP time parser would be even more
4920    forgiving.  It should completely ignore things like week days and
4921    concentrate only on the various forms of representing years,
4922    months, days, hours, minutes, and seconds.  For example, it would
4923    be nice if it accepted ISO 8601 out of the box.
4924 
4925    I've investigated free and PD code for this purpose, but none was
4926    usable.  getdate was big and unwieldy, and had potential copyright
4927    issues, or so I was informed.  Dr. Marcus Hennecke's atotm(),
4928    distributed with phttpd, is excellent, but we cannot use it because
4929    it is not assigned to the FSF.  So I stuck it with strptime.  */
4930 
4931 time_t
http_atotm(const char * time_string)4932 http_atotm (const char *time_string)
4933 {
4934   /* NOTE: Solaris strptime man page claims that %n and %t match white
4935      space, but that's not universally available.  Instead, we simply
4936      use ` ' to mean "skip all WS", which works under all strptime
4937      implementations I've tested.  */
4938 
4939   static const char *time_formats[] = {
4940     "%a, %d %b %Y %T",          /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
4941     "%A, %d-%b-%y %T",          /* rfc850:  Thursday, 29-Jan-98 22:12:57 */
4942     "%a %b %d %T %Y",           /* asctime: Thu Jan 29 22:12:57 1998 */
4943     "%a, %d-%b-%Y %T"           /* cookies: Thu, 29-Jan-1998 22:12:57
4944                                    (used in Set-Cookie, defined in the
4945                                    Netscape cookie specification.) */
4946   };
4947   const char *oldlocale;
4948   char savedlocale[256];
4949   size_t i;
4950   time_t ret = (time_t) -1;
4951 
4952   /* Solaris strptime fails to recognize English month names in
4953      non-English locales, which we work around by temporarily setting
4954      locale to C before invoking strptime.  */
4955   oldlocale = setlocale (LC_TIME, NULL);
4956   if (oldlocale)
4957     {
4958       size_t l = strlen (oldlocale) + 1;
4959       if (l >= sizeof savedlocale)
4960         savedlocale[0] = '\0';
4961       else
4962         memcpy (savedlocale, oldlocale, l);
4963     }
4964   else savedlocale[0] = '\0';
4965 
4966   setlocale (LC_TIME, "C");
4967 
4968   for (i = 0; i < countof (time_formats); i++)
4969     {
4970       struct tm t;
4971 
4972       /* Some versions of strptime use the existing contents of struct
4973          tm to recalculate the date according to format.  Zero it out
4974          to prevent stack garbage from influencing strptime.  */
4975       xzero (t);
4976 
4977       if (check_end (strptime (time_string, time_formats[i], &t)))
4978         {
4979           ret = timegm (&t);
4980           break;
4981         }
4982     }
4983 
4984   /* Restore the previous locale. */
4985   if (savedlocale[0])
4986     setlocale (LC_TIME, savedlocale);
4987 
4988   return ret;
4989 }
4990 
4991 /* Authorization support: We support three authorization schemes:
4992 
4993    * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
4994 
4995    * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
4996    consisting of answering to the server's challenge with the proper
4997    MD5 digests.
4998 
4999    * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
5000    Stenberg for libcurl.  Like digest, NTLM is based on a
5001    challenge-response mechanism, but unlike digest, it is non-standard
5002    (authenticates TCP connections rather than requests), undocumented
5003    and Microsoft-specific.  */
5004 
5005 /* Create the authentication header contents for the `Basic' scheme.
5006    This is done by encoding the string "USER:PASS" to base64 and
5007    prepending the string "Basic " in front of it.  */
5008 
5009 static char *
basic_authentication_encode(const char * user,const char * passwd)5010 basic_authentication_encode (const char *user, const char *passwd)
5011 {
5012   char buf_t1[256], buf_t2[256];
5013   char *t1, *t2, *ret;
5014   size_t len1 = strlen (user) + 1 + strlen (passwd);
5015 
5016   if (len1 < sizeof (buf_t1))
5017     t1 = buf_t1;
5018   else
5019     t1 = xmalloc(len1 + 1);
5020 
5021   if (BASE64_LENGTH (len1) < sizeof (buf_t2))
5022     t2 = buf_t2;
5023   else
5024     t2 = xmalloc (BASE64_LENGTH (len1) + 1);
5025 
5026   sprintf (t1, "%s:%s", user, passwd);
5027   wget_base64_encode (t1, len1, t2);
5028 
5029   ret = concat_strings ("Basic ", t2, (char *) 0);
5030 
5031   if (t2 != buf_t2)
5032     xfree (t2);
5033 
5034   if (t1 != buf_t1)
5035     xfree (t1);
5036 
5037   return ret;
5038 }
5039 
5040 #define SKIP_WS(x) do {                         \
5041   while (c_isspace (*(x)))                        \
5042     ++(x);                                      \
5043 } while (0)
5044 
5045 #ifdef ENABLE_DIGEST
5046 /* Dump the hexadecimal representation of HASH to BUF.  HASH should be
5047    an array of 16 bytes containing the hash keys, and BUF should be a
5048    buffer of 33 writable characters (32 for hex digits plus one for
5049    zero termination).  */
5050 static void
dump_hash(char * buf,const unsigned char * hash)5051 dump_hash (char *buf, const unsigned char *hash)
5052 {
5053   int i;
5054 
5055   for (i = 0; i < MD5_DIGEST_SIZE; i++, hash++)
5056     {
5057       *buf++ = XNUM_TO_digit (*hash >> 4);
5058       *buf++ = XNUM_TO_digit (*hash & 0xf);
5059     }
5060   *buf = '\0';
5061 }
5062 
5063 /* Take the line apart to find the challenge, and compose a digest
5064    authorization header.  See RFC2069 section 2.1.2.  */
5065 static char *
digest_authentication_encode(const char * au,const char * user,const char * passwd,const char * method,const char * path,uerr_t * auth_err)5066 digest_authentication_encode (const char *au, const char *user,
5067                               const char *passwd, const char *method,
5068                               const char *path, uerr_t *auth_err)
5069 {
5070   static char *realm, *opaque, *nonce, *qop, *algorithm;
5071   static struct {
5072     const char *name;
5073     char **variable;
5074   } options[] = {
5075     { "realm", &realm },
5076     { "opaque", &opaque },
5077     { "nonce", &nonce },
5078     { "qop", &qop },
5079     { "algorithm", &algorithm }
5080   };
5081   char cnonce[16] = "";
5082   char *res = NULL;
5083   int res_len;
5084   size_t res_size;
5085   param_token name, value;
5086 
5087 
5088   realm = opaque = nonce = algorithm = qop = NULL;
5089 
5090   au += 6;                      /* skip over `Digest' */
5091   while (extract_param (&au, &name, &value, ',', NULL))
5092     {
5093       size_t i;
5094       size_t namelen = name.e - name.b;
5095       for (i = 0; i < countof (options); i++)
5096         if (namelen == strlen (options[i].name)
5097             && 0 == strncmp (name.b, options[i].name,
5098                              namelen))
5099           {
5100             *options[i].variable = strdupdelim (value.b, value.e);
5101             break;
5102           }
5103     }
5104 
5105   if (qop && strcmp (qop, "auth"))
5106     {
5107       logprintf (LOG_NOTQUIET, _("Unsupported quality of protection '%s'.\n"), qop);
5108       xfree (qop); /* force freeing mem and continue */
5109     }
5110   else if (algorithm && strcmp (algorithm,"MD5") && strcmp (algorithm,"MD5-sess"))
5111     {
5112       logprintf (LOG_NOTQUIET, _("Unsupported algorithm '%s'.\n"), algorithm);
5113       xfree (algorithm); /* force freeing mem and continue */
5114     }
5115 
5116   if (!realm || !nonce || !user || !passwd || !path || !method)
5117     {
5118       *auth_err = ATTRMISSING;
5119       goto cleanup;
5120    }
5121 
5122   /* Calculate the digest value.  */
5123   {
5124     struct md5_ctx ctx;
5125     unsigned char hash[MD5_DIGEST_SIZE];
5126     char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1];
5127     char response_digest[MD5_DIGEST_SIZE * 2 + 1];
5128 
5129     /* A1BUF = H(user ":" realm ":" password) */
5130     md5_init_ctx (&ctx);
5131     md5_process_bytes ((unsigned char *)user, strlen (user), &ctx);
5132     md5_process_bytes ((unsigned char *)":", 1, &ctx);
5133     md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx);
5134     md5_process_bytes ((unsigned char *)":", 1, &ctx);
5135     md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx);
5136     md5_finish_ctx (&ctx, hash);
5137 
5138     dump_hash (a1buf, hash);
5139 
5140     if (algorithm && !strcmp (algorithm, "MD5-sess"))
5141       {
5142         /* A1BUF = H( H(user ":" realm ":" password) ":" nonce ":" cnonce ) */
5143         snprintf (cnonce, sizeof (cnonce), "%08x",
5144           (unsigned) random_number (INT_MAX));
5145 
5146         md5_init_ctx (&ctx);
5147         /* md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx); */
5148         md5_process_bytes (a1buf, MD5_DIGEST_SIZE * 2, &ctx);
5149         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5150         md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
5151         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5152         md5_process_bytes ((unsigned char *)cnonce, strlen (cnonce), &ctx);
5153         md5_finish_ctx (&ctx, hash);
5154 
5155         dump_hash (a1buf, hash);
5156       }
5157 
5158     /* A2BUF = H(method ":" path) */
5159     md5_init_ctx (&ctx);
5160     md5_process_bytes ((unsigned char *)method, strlen (method), &ctx);
5161     md5_process_bytes ((unsigned char *)":", 1, &ctx);
5162     md5_process_bytes ((unsigned char *)path, strlen (path), &ctx);
5163     md5_finish_ctx (&ctx, hash);
5164     dump_hash (a2buf, hash);
5165 
5166     if (qop && !strcmp (qop, "auth"))
5167       {
5168         /* RFC 2617 Digest Access Authentication */
5169         /* generate random hex string */
5170         if (!*cnonce)
5171           snprintf (cnonce, sizeof (cnonce), "%08x",
5172             (unsigned) random_number (INT_MAX));
5173 
5174         /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" noncecount ":" clientnonce ":" qop ": " A2BUF) */
5175         md5_init_ctx (&ctx);
5176         md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
5177         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5178         md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
5179         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5180         md5_process_bytes ((unsigned char *)"00000001", 8, &ctx); /* TODO: keep track of server nonce values */
5181         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5182         md5_process_bytes ((unsigned char *)cnonce, strlen (cnonce), &ctx);
5183         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5184         md5_process_bytes ((unsigned char *)qop, strlen (qop), &ctx);
5185         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5186         md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
5187         md5_finish_ctx (&ctx, hash);
5188       }
5189     else
5190       {
5191         /* RFC 2069 Digest Access Authentication */
5192         /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
5193         md5_init_ctx (&ctx);
5194         md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
5195         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5196         md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
5197         md5_process_bytes ((unsigned char *)":", 1, &ctx);
5198         md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
5199         md5_finish_ctx (&ctx, hash);
5200       }
5201 
5202     dump_hash (response_digest, hash);
5203 
5204     res_size = strlen (user)
5205              + strlen (realm)
5206              + strlen (nonce)
5207              + strlen (path)
5208              + 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
5209              + (opaque ? strlen (opaque) : 0)
5210              + (algorithm ? strlen (algorithm) : 0)
5211              + (qop ? 128: 0)
5212              + strlen (cnonce)
5213              + 128;
5214 
5215     res = xmalloc (res_size);
5216 
5217     if (qop && !strcmp (qop, "auth"))
5218       {
5219         res_len = snprintf (res, res_size, "Digest "\
5220                 "username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\""\
5221                 ", qop=auth, nc=00000001, cnonce=\"%s\"",
5222                   user, realm, nonce, path, response_digest, cnonce);
5223 
5224       }
5225     else
5226       {
5227         res_len = snprintf (res, res_size, "Digest "\
5228                 "username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
5229                   user, realm, nonce, path, response_digest);
5230       }
5231 
5232     if (opaque)
5233       {
5234         res_len += snprintf (res + res_len, res_size - res_len, ", opaque=\"%s\"", opaque);
5235       }
5236 
5237     if (algorithm)
5238       {
5239         snprintf (res + res_len, res_size - res_len, ", algorithm=\"%s\"", algorithm);
5240       }
5241   }
5242 
5243 cleanup:
5244   xfree (realm);
5245   xfree (opaque);
5246   xfree (nonce);
5247   xfree (qop);
5248   xfree (algorithm);
5249 
5250   return res;
5251 }
5252 #endif /* ENABLE_DIGEST */
5253 
5254 /* Computing the size of a string literal must take into account that
5255    value returned by sizeof includes the terminating \0.  */
5256 #define STRSIZE(literal) (sizeof (literal) - 1)
5257 
5258 /* Whether chars in [b, e) begin with the literal string provided as
5259    first argument and are followed by whitespace or terminating \0.
5260    The comparison is case-insensitive.  */
5261 #define STARTS(literal, b, e)                           \
5262   ((e > b) \
5263    && ((size_t) ((e) - (b))) >= STRSIZE (literal)   \
5264    && 0 == c_strncasecmp (b, literal, STRSIZE (literal))  \
5265    && ((size_t) ((e) - (b)) == STRSIZE (literal)          \
5266        || c_isspace (b[STRSIZE (literal)])))
5267 
5268 static bool
known_authentication_scheme_p(const char * hdrbeg,const char * hdrend)5269 known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
5270 {
5271   return STARTS ("Basic", hdrbeg, hdrend)
5272 #ifdef ENABLE_DIGEST
5273     || STARTS ("Digest", hdrbeg, hdrend)
5274 #endif
5275 #ifdef ENABLE_NTLM
5276     || STARTS ("NTLM", hdrbeg, hdrend)
5277 #endif
5278     ;
5279 }
5280 
5281 #undef STARTS
5282 
5283 /* Create the HTTP authorization request header.  When the
5284    `WWW-Authenticate' response header is seen, according to the
5285    authorization scheme specified in that header (`Basic' and `Digest'
5286    are supported by the current implementation), produce an
5287    appropriate HTTP authorization request header.  */
5288 static char *
create_authorization_line(const char * au,const char * user,const char * passwd,const char * method,const char * path,bool * finished,uerr_t * auth_err)5289 create_authorization_line (const char *au, const char *user,
5290                            const char *passwd, const char *method,
5291                            const char *path, bool *finished, uerr_t *auth_err)
5292 {
5293   /* We are called only with known schemes, so we can dispatch on the
5294      first letter. */
5295   switch (c_toupper (*au))
5296     {
5297     case 'B':                   /* Basic */
5298       *finished = true;
5299       return basic_authentication_encode (user, passwd);
5300 #ifdef ENABLE_DIGEST
5301     case 'D':                   /* Digest */
5302       *finished = true;
5303       return digest_authentication_encode (au, user, passwd, method, path, auth_err);
5304 #endif
5305 #ifdef ENABLE_NTLM
5306     case 'N':                   /* NTLM */
5307       if (!ntlm_input (&pconn.ntlm, au))
5308         {
5309           *finished = true;
5310           return NULL;
5311         }
5312       return ntlm_output (&pconn.ntlm, user, passwd, finished);
5313 #endif
5314     default:
5315       /* We shouldn't get here -- this function should be only called
5316          with values approved by known_authentication_scheme_p.  */
5317       abort ();
5318     }
5319 }
5320 
5321 static void
load_cookies(void)5322 load_cookies (void)
5323 {
5324   if (!wget_cookie_jar)
5325     wget_cookie_jar = cookie_jar_new ();
5326   if (opt.cookies_input && !cookies_loaded_p)
5327     {
5328       cookie_jar_load (wget_cookie_jar, opt.cookies_input);
5329       cookies_loaded_p = true;
5330     }
5331 }
5332 
5333 void
save_cookies(void)5334 save_cookies (void)
5335 {
5336   if (wget_cookie_jar)
5337     cookie_jar_save (wget_cookie_jar, opt.cookies_output);
5338 }
5339 
5340 #if defined DEBUG_MALLOC || defined TESTING
5341 void
http_cleanup(void)5342 http_cleanup (void)
5343 {
5344   if (pconn_active)
5345     invalidate_persistent ();
5346 
5347   if (wget_cookie_jar)
5348     {
5349       cookie_jar_delete (wget_cookie_jar);
5350       wget_cookie_jar = NULL;
5351     }
5352 
5353   if (basic_authed_hosts)
5354     {
5355       hash_table_iterator iter;
5356       for (hash_table_iterate (basic_authed_hosts, &iter); hash_table_iter_next (&iter); )
5357         {
5358           xfree (iter.key);
5359         }
5360       hash_table_destroy (basic_authed_hosts);
5361       basic_authed_hosts = NULL;
5362     }
5363 }
5364 #endif
5365 
5366 void
ensure_extension(struct http_stat * hs,const char * ext,int * dt)5367 ensure_extension (struct http_stat *hs, const char *ext, int *dt)
5368 {
5369   char *last_period_in_local_filename = strrchr (hs->local_file, '.');
5370   char shortext[8];
5371   int len;
5372   shortext[0] = '\0';
5373   len = strlen (ext);
5374   if (len == 5)
5375     {
5376       memcpy (shortext, ext, len - 1);
5377       shortext[len - 1] = '\0';
5378     }
5379 
5380   if (last_period_in_local_filename == NULL
5381       || !(0 == strcasecmp (last_period_in_local_filename, shortext)
5382            || 0 == strcasecmp (last_period_in_local_filename, ext)))
5383     {
5384       int local_filename_len = strlen (hs->local_file);
5385       /* Resize the local file, allowing for ".html" preceded by
5386          optional ".NUMBER".  */
5387       hs->local_file = xrealloc (hs->local_file,
5388                                  local_filename_len + 24 + len);
5389       strcpy (hs->local_file + local_filename_len, ext);
5390       /* If clobbering is not allowed and the file, as named,
5391          exists, tack on ".NUMBER.html" instead. */
5392       if (!ALLOW_CLOBBER && file_exists_p (hs->local_file, NULL))
5393         {
5394           int ext_num = 1;
5395           do
5396             sprintf (hs->local_file + local_filename_len,
5397                      ".%d%s", ext_num++, ext);
5398           while (file_exists_p (hs->local_file, NULL));
5399         }
5400       *dt |= ADDED_HTML_EXTENSION;
5401     }
5402 }
5403 
5404 #ifdef TESTING
5405 
5406 const char *
test_parse_range_header(void)5407 test_parse_range_header (void)
5408 {
5409   unsigned i;
5410   static const struct {
5411     const char * rangehdr;
5412     const wgint firstbyte;
5413     const wgint lastbyte;
5414     const wgint length;
5415     const bool shouldPass;
5416   } test_array[] = {
5417       { "bytes 0-1000/1000", 0, 1000, 1000, false },
5418       { "bytes 0-999/1000", 0, 999, 1000, true },
5419       { "bytes 100-99/1000", 100, 99, 1000, false },
5420       { "bytes 100-100/1000", 100, 100, 1000, true },
5421       { "bytes 0-1000/100000000", 0, 1000, 100000000, true },
5422       { "bytes 1-999/1000", 1, 999, 1000, true },
5423       { "bytes 42-1233/1234", 42, 1233, 1234, true },
5424       { "bytes 42-1233/*", 42, 1233, -1, true },
5425       { "bytes 0-2147483648/2147483649", 0, 2147483648U, 2147483649U, true },
5426       { "bytes 2147483648-4294967296/4294967297", 2147483648U, 4294967296ULL, 4294967297ULL, true },
5427   };
5428 
5429   wgint firstbyteptr[sizeof(wgint)];
5430   wgint lastbyteptr[sizeof(wgint)];
5431   wgint lengthptr[sizeof(wgint)];
5432   bool result;
5433   for (i = 0; i < countof (test_array); i++)
5434     {
5435       result = parse_content_range (test_array[i].rangehdr, firstbyteptr, lastbyteptr, lengthptr);
5436 #if 0
5437       printf ("%ld %ld\n", test_array[i].firstbyte, *firstbyteptr);
5438       printf ("%ld %ld\n", test_array[i].lastbyte, *lastbyteptr);
5439       printf ("%ld %ld\n", test_array[i].length, *lengthptr);
5440       printf ("\n");
5441 #endif
5442       mu_assert ("test_parse_range_header: False Negative", result == test_array[i].shouldPass);
5443       mu_assert ("test_parse_range_header: Bad parse", test_array[i].firstbyte == *firstbyteptr &&
5444                                                        test_array[i].lastbyte == *lastbyteptr &&
5445                                                        test_array[i].length == *lengthptr);
5446     }
5447 
5448   return NULL;
5449 }
5450 
5451 const char *
test_parse_content_disposition(void)5452 test_parse_content_disposition (void)
5453 {
5454   unsigned i;
5455   static const struct {
5456     const char *hdrval;
5457     const char *filename;
5458     bool result;
5459   } test_array[] = {
5460     { "filename=\"file.ext\"", "file.ext", true },
5461     { "attachment; filename=\"file.ext\"", "file.ext", true },
5462     { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
5463     { "attachment", NULL, false },
5464     { "attachment; filename*=UTF-8'en-US'hello.txt", "hello.txt", true },
5465     { "attachment; filename*0=\"hello\"; filename*1=\"world.txt\"",
5466       "helloworld.txt", true },
5467     { "attachment; filename=\"A.ext\"; filename*=\"B.ext\"", "B.ext", true },
5468     { "attachment; filename*=\"A.ext\"; filename*0=\"B\"; filename*1=\"B.ext\"",
5469       "A.ext", true },
5470     { "filename**0=\"A\"; filename**1=\"A.ext\"; filename*0=\"B\";\
5471 filename*1=\"B\"", "AA.ext", true },
5472   };
5473 
5474   for (i = 0; i < countof (test_array); ++i)
5475     {
5476       char *filename;
5477       bool res;
5478 
5479       res = parse_content_disposition (test_array[i].hdrval, &filename);
5480 
5481       mu_assert ("test_parse_content_disposition: wrong result",
5482                  res == test_array[i].result
5483                  && (res == false
5484                      || 0 == strcmp (test_array[i].filename, filename)));
5485       xfree (filename);
5486     }
5487 
5488   return NULL;
5489 }
5490 
5491 #endif /* TESTING */
5492 
5493 /*
5494  * vim: et sts=2 sw=2 cino+={s
5495  */
5496