1 /* File retrieval.
2    Copyright (C) 1996-2011, 2014-2015, 2018-2021 Free Software
3    Foundation, Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 #include "wget.h"
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <unistd.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <assert.h>
39 #ifdef VMS
40 # include <unixio.h>            /* For delete(). */
41 #endif
42 
43 #ifdef HAVE_LIBZ
44 # include <zlib.h>
45 #endif
46 
47 #include "exits.h"
48 #include "utils.h"
49 #include "retr.h"
50 #include "progress.h"
51 #include "url.h"
52 #include "recur.h"
53 #include "ftp.h"
54 #include "http.h"
55 #include "host.h"
56 #include "connect.h"
57 #include "hash.h"
58 #include "convert.h"
59 #include "ptimer.h"
60 #include "html-url.h"
61 #include "iri.h"
62 #include "hsts.h"
63 
64 /* Total size of downloaded files.  Used to enforce quota.  */
65 wgint total_downloaded_bytes;
66 
67 /* Total download time in seconds. */
68 double total_download_time;
69 
70 /* If non-NULL, the stream to which output should be written.  This
71    stream is initialized when `-O' is used.  */
72 FILE *output_stream;
73 
74 /* Whether output_document is a regular file we can manipulate,
75    i.e. not `-' or a device file. */
76 bool output_stream_regular;
77 
78 static struct {
79   wgint chunk_bytes;
80   double chunk_start;
81   double sleep_adjust;
82 } limit_data;
83 
84 static void
limit_bandwidth_reset(void)85 limit_bandwidth_reset (void)
86 {
87   xzero (limit_data);
88 }
89 
90 #ifdef HAVE_LIBZ
91 static voidpf
zalloc(voidpf opaque,unsigned int items,unsigned int size)92 zalloc (voidpf opaque, unsigned int items, unsigned int size)
93 {
94   (void) opaque;
95   return (voidpf) xcalloc (items, size);
96 }
97 
98 static void
zfree(voidpf opaque,voidpf address)99 zfree (voidpf opaque, voidpf address)
100 {
101   (void) opaque;
102   xfree (address);
103 }
104 #endif
105 
106 /* Limit the bandwidth by pausing the download for an amount of time.
107    BYTES is the number of bytes received from the network, and TIMER
108    is the timer that started at the beginning of download.  */
109 
110 static void
limit_bandwidth(wgint bytes,struct ptimer * timer)111 limit_bandwidth (wgint bytes, struct ptimer *timer)
112 {
113   double delta_t = ptimer_read (timer) - limit_data.chunk_start;
114   double expected;
115 
116   limit_data.chunk_bytes += bytes;
117 
118   /* Calculate the amount of time we expect downloading the chunk
119      should take.  If in reality it took less time, sleep to
120      compensate for the difference.  */
121   expected = (double) limit_data.chunk_bytes / opt.limit_rate;
122 
123   if (expected > delta_t)
124     {
125       double slp = expected - delta_t + limit_data.sleep_adjust;
126       double t0, t1;
127       if (slp < 0.2)
128         {
129           DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
130                    slp * 1000, number_to_static_string (limit_data.chunk_bytes),
131                    delta_t));
132           return;
133         }
134       DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
135                slp * 1000, number_to_static_string (limit_data.chunk_bytes),
136                limit_data.sleep_adjust));
137 
138       t0 = ptimer_read (timer);
139       xsleep (slp);
140       t1 = ptimer_measure (timer);
141 
142       /* Due to scheduling, we probably slept slightly longer (or
143          shorter) than desired.  Calculate the difference between the
144          desired and the actual sleep, and adjust the next sleep by
145          that amount.  */
146       limit_data.sleep_adjust = slp - (t1 - t0);
147       /* If sleep_adjust is very large, it's likely due to suspension
148          and not clock inaccuracy.  Don't enforce those.  */
149       if (limit_data.sleep_adjust > 0.5)
150         limit_data.sleep_adjust = 0.5;
151       else if (limit_data.sleep_adjust < -0.5)
152         limit_data.sleep_adjust = -0.5;
153     }
154 
155   limit_data.chunk_bytes = 0;
156   limit_data.chunk_start = ptimer_read (timer);
157 }
158 
159 /* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that
160    amount of data and decrease SKIP.  Increment *TOTAL by the amount
161    of data written.  If OUT2 is not NULL, also write BUF to OUT2.
162    In case of error writing to OUT, -2 is returned.  In case of error
163    writing to OUT2, -3 is returned.  Return 1 if the whole BUF was
164    skipped.  */
165 
166 static int
write_data(FILE * out,FILE * out2,const char * buf,int bufsize,wgint * skip,wgint * written)167 write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
168             wgint *skip, wgint *written)
169 {
170   if (out == NULL && out2 == NULL)
171     return 1;
172 
173   if (skip)
174     {
175       if (*skip > bufsize)
176         {
177           *skip -= bufsize;
178           return 1;
179         }
180       if (*skip)
181         {
182           buf += *skip;
183           bufsize -= *skip;
184           *skip = 0;
185           if (bufsize == 0)
186             return 1;
187         }
188     }
189 
190   if (out)
191     fwrite (buf, 1, bufsize, out);
192   if (out2)
193     fwrite (buf, 1, bufsize, out2);
194 
195   if (written)
196     *written += bufsize;
197 
198   /* Immediately flush the downloaded data.  This should not hinder
199      performance: fast downloads will arrive in large 16K chunks
200      (which stdio would write out immediately anyway), and slow
201      downloads wouldn't be limited by disk speed.  */
202 
203   /* 2005-04-20 SMS.
204      Perhaps it shouldn't hinder performance, but it sure does, at least
205      on VMS (more than 2X).  Rather than speculate on what it should or
206      shouldn't do, it might make more sense to test it.  Even better, it
207      might be nice to explain what possible benefit it could offer, as
208      it appears to be a clear invitation to poor performance with no
209      actual justification.  (Also, why 16K?  Anyone test other values?)
210   */
211 #ifndef __VMS
212   if (out)
213     fflush (out);
214   if (out2)
215     fflush (out2);
216 #endif /* ndef __VMS */
217 
218   if (out && ferror (out))
219     return -2;
220   else if (out2 && ferror (out2))
221     return -3;
222 
223   return 0;
224 }
225 
226 /* Read the contents of file descriptor FD until it the connection
227    terminates or a read error occurs.  The data is read in portions of
228    up to 16K and written to OUT as it arrives.  If opt.verbose is set,
229    the progress is shown.
230 
231    TOREAD is the amount of data expected to arrive, normally only used
232    by the progress gauge.
233 
234    STARTPOS is the position from which the download starts, used by
235    the progress gauge.  If QTYREAD is non-NULL, the value it points to
236    is incremented by the amount of data read from the network.  If
237    QTYWRITTEN is non-NULL, the value it points to is incremented by
238    the amount of data written to disk.  The time it took to download
239    the data is stored to ELAPSED.
240 
241    If OUT2 is non-NULL, the contents is also written to OUT2.
242    OUT2 will get an exact copy of the response: if this is a chunked
243    response, everything -- including the chunk headers -- is written
244    to OUT2.  (OUT will only get the unchunked response.)
245 
246    The function exits and returns the amount of data read.  In case of
247    error while reading data, -1 is returned.  In case of error while
248    writing data to OUT, -2 is returned.  In case of error while writing
249    data to OUT2, -3 is returned.  */
250 
251 int
fd_read_body(const char * downloaded_filename,int fd,FILE * out,wgint toread,wgint startpos,wgint * qtyread,wgint * qtywritten,double * elapsed,int flags,FILE * out2)252 fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread, wgint startpos,
253 
254               wgint *qtyread, wgint *qtywritten, double *elapsed, int flags,
255               FILE *out2)
256 {
257   int ret = 0;
258 #undef max
259 #define max(a,b) ((a) > (b) ? (a) : (b))
260   int dlbufsize = max (BUFSIZ, 8 * 1024);
261   char *dlbuf = xmalloc (dlbufsize);
262 
263   struct ptimer *timer = NULL;
264   double last_successful_read_tm = 0;
265 
266   /* The progress gauge, set according to the user preferences. */
267   void *progress = NULL;
268 
269   /* Non-zero if the progress gauge is interactive, i.e. if it can
270      continually update the display.  When true, smaller timeout
271      values are used so that the gauge can update the display when
272      data arrives slowly. */
273   bool progress_interactive = false;
274 
275   bool exact = !!(flags & rb_read_exactly);
276 
277   /* Used only by HTTP/HTTPS chunked transfer encoding.  */
278   bool chunked = flags & rb_chunked_transfer_encoding;
279   wgint skip = 0;
280 
281   /* How much data we've read/written.  */
282   wgint sum_read = 0;
283   wgint sum_written = 0;
284   wgint remaining_chunk_size = 0;
285 
286 #ifdef HAVE_LIBZ
287   /* try to minimize the number of calls to inflate() and write_data() per
288      call to fd_read() */
289   unsigned int gzbufsize = dlbufsize * 4;
290   char *gzbuf = NULL;
291   z_stream gzstream;
292 
293   if (flags & rb_compressed_gzip)
294     {
295       gzbuf = xmalloc (gzbufsize);
296       if (gzbuf != NULL)
297         {
298           gzstream.zalloc = zalloc;
299           gzstream.zfree = zfree;
300           gzstream.opaque = Z_NULL;
301           gzstream.next_in = Z_NULL;
302           gzstream.avail_in = 0;
303 
304           #define GZIP_DETECT 32 /* gzip format detection */
305           #define GZIP_WINDOW 15 /* logarithmic window size (default: 15) */
306           ret = inflateInit2 (&gzstream, GZIP_DETECT | GZIP_WINDOW);
307           if (ret != Z_OK)
308             {
309               xfree (gzbuf);
310               errno = (ret == Z_MEM_ERROR) ? ENOMEM : EINVAL;
311               ret = -1;
312               goto out;
313             }
314         }
315       else
316         {
317           errno = ENOMEM;
318           ret = -1;
319           goto out;
320         }
321     }
322 #endif
323 
324   if (flags & rb_skip_startpos)
325     skip = startpos;
326 
327   if (opt.show_progress)
328     {
329       const char *filename_progress;
330       /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
331          argument to progress_create because the indicator doesn't
332          (yet) know about "skipping" data.  */
333       wgint start = skip ? 0 : startpos;
334       if (opt.dir_prefix)
335         filename_progress = downloaded_filename + strlen (opt.dir_prefix) + 1;
336       else
337         filename_progress = downloaded_filename;
338       progress = progress_create (filename_progress, start, start + toread);
339       progress_interactive = progress_interactive_p (progress);
340     }
341 
342   if (opt.limit_rate)
343     limit_bandwidth_reset ();
344 
345   /* A timer is needed for tracking progress, for throttling, and for
346      tracking elapsed time.  If either of these are requested, start
347      the timer.  */
348   if (progress || opt.limit_rate || elapsed)
349     {
350       timer = ptimer_new ();
351       last_successful_read_tm = 0;
352     }
353 
354   /* Use a smaller buffer for low requested bandwidths.  For example,
355      with --limit-rate=2k, it doesn't make sense to slurp in 16K of
356      data and then sleep for 8s.  With buffer size equal to the limit,
357      we never have to sleep for more than one second.  */
358   if (opt.limit_rate && opt.limit_rate < dlbufsize)
359     dlbufsize = opt.limit_rate;
360 
361   /* Read from FD while there is data to read.  Normally toread==0
362      means that it is unknown how much data is to arrive.  However, if
363      EXACT is set, then toread==0 means what it says: that no data
364      should be read.  */
365   while (!exact || (sum_read < toread))
366     {
367       int rdsize;
368       double tmout = opt.read_timeout;
369 
370       if (chunked)
371         {
372           if (remaining_chunk_size == 0)
373             {
374               char *line = fd_read_line (fd);
375               char *endl;
376               if (line == NULL)
377                 {
378                   ret = -1;
379                   break;
380                 }
381               else if (out2 != NULL)
382                 fwrite (line, 1, strlen (line), out2);
383 
384               remaining_chunk_size = strtol (line, &endl, 16);
385               xfree (line);
386 
387               if (remaining_chunk_size < 0)
388                 {
389                   ret = -1;
390                   break;
391                 }
392 
393               if (remaining_chunk_size == 0)
394                 {
395                   ret = 0;
396                   line = fd_read_line (fd);
397                   if (line == NULL)
398                     ret = -1;
399                   else
400                     {
401                       if (out2 != NULL)
402                         fwrite (line, 1, strlen (line), out2);
403                       xfree (line);
404                     }
405                   break;
406                 }
407             }
408 
409           rdsize = MIN (remaining_chunk_size, dlbufsize);
410         }
411       else
412         rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
413 
414       if (progress_interactive)
415         {
416           /* For interactive progress gauges, always specify a ~1s
417              timeout, so that the gauge can be updated regularly even
418              when the data arrives very slowly or stalls.  */
419           tmout = 0.95;
420           /* avoid wrong 'interactive timeout' */
421           errno = 0;
422           if (opt.read_timeout)
423             {
424               double waittm;
425               waittm = ptimer_read (timer) - last_successful_read_tm;
426               if (waittm + tmout > opt.read_timeout)
427                 {
428                   /* Don't let total idle time exceed read timeout. */
429                   tmout = opt.read_timeout - waittm;
430                   /* if 0 fd_read can be 'blocked read' */
431                   if (tmout <= 0)
432                     {
433                       /* We've already exceeded the timeout. */
434                       ret = -1, errno = ETIMEDOUT;
435                       break;
436                     }
437                 }
438             }
439         }
440       ret = fd_read (fd, dlbuf, rdsize, tmout);
441 
442       if (progress_interactive && ret < 0 && errno == ETIMEDOUT)
443         ret = 0;                /* interactive timeout, handled above */
444       else if (ret <= 0)
445         break;                  /* EOF or read error */
446 
447       if (progress || opt.limit_rate || elapsed)
448         {
449           ptimer_measure (timer);
450           if (ret > 0)
451             last_successful_read_tm = ptimer_read (timer);
452         }
453 
454       if (ret > 0)
455         {
456           int write_res;
457 
458           sum_read += ret;
459 
460 #ifdef HAVE_LIBZ
461           if (gzbuf != NULL)
462             {
463               int err;
464               int towrite;
465 
466               /* Write original data to WARC file */
467               write_res = write_data (NULL, out2, dlbuf, ret, NULL, NULL);
468               if (write_res < 0)
469                 {
470                   ret = write_res;
471                   goto out;
472                 }
473 
474               gzstream.avail_in = ret;
475               gzstream.next_in = (unsigned char *) dlbuf;
476 
477               do
478                 {
479                   gzstream.avail_out = gzbufsize;
480                   gzstream.next_out = (unsigned char *) gzbuf;
481 
482                   err = inflate (&gzstream, Z_NO_FLUSH);
483 
484                   switch (err)
485                     {
486                     case Z_MEM_ERROR:
487                       errno = ENOMEM;
488                       ret = -1;
489                       goto out;
490                     case Z_NEED_DICT:
491                     case Z_DATA_ERROR:
492                       errno = EINVAL;
493                       ret = -1;
494                       goto out;
495                     case Z_STREAM_END:
496                       if (exact && sum_read != toread)
497                         {
498                           DEBUGP(("zlib stream ended unexpectedly after %"PRId64"/%"PRId64
499                                   " bytes\n", sum_read, toread));
500                         }
501                     }
502 
503                   towrite = gzbufsize - gzstream.avail_out;
504                   write_res = write_data (out, NULL, gzbuf, towrite, &skip,
505                                           &sum_written);
506                   if (write_res < 0)
507                     {
508                       ret = write_res;
509                       goto out;
510                     }
511                 }
512               while (gzstream.avail_out == 0);
513             }
514           else
515 #endif
516             {
517               write_res = write_data (out, out2, dlbuf, ret, &skip,
518                                       &sum_written);
519               if (write_res < 0)
520                 {
521                   ret = write_res;
522                   goto out;
523                 }
524             }
525 
526           if (chunked)
527             {
528               remaining_chunk_size -= ret;
529               if (remaining_chunk_size == 0)
530                 {
531                   char *line = fd_read_line (fd);
532                   if (line == NULL)
533                     {
534                       ret = -1;
535                       break;
536                     }
537                   else
538                     {
539                       if (out2 != NULL)
540                         fwrite (line, 1, strlen (line), out2);
541                       xfree (line);
542                     }
543                 }
544             }
545         }
546 
547       if (opt.limit_rate)
548         limit_bandwidth (ret, timer);
549 
550       if (progress)
551         progress_update (progress, ret, ptimer_read (timer));
552 #ifdef WINDOWS
553       if (toread > 0 && opt.show_progress)
554         ws_percenttitle (100.0 *
555                          (startpos + sum_read) / (startpos + toread));
556 #endif
557     }
558   if (ret < -1)
559     ret = -1;
560 
561  out:
562   if (progress)
563     progress_finish (progress, ptimer_read (timer));
564 
565   if (timer)
566     {
567       if (elapsed)
568         *elapsed = ptimer_read (timer);
569       ptimer_destroy (timer);
570     }
571 
572 #ifdef HAVE_LIBZ
573   if (gzbuf != NULL)
574     {
575       int err = inflateEnd (&gzstream);
576       if (ret >= 0)
577         {
578           /* with compression enabled, ret must be 0 if successful */
579           if (err == Z_OK)
580             ret = 0;
581           else
582             {
583               errno = EINVAL;
584               ret = -1;
585             }
586         }
587       xfree (gzbuf);
588 
589       if (gzstream.total_in != (uLong) sum_read)
590         {
591           DEBUGP(("zlib read size differs from raw read size (%lu/%"PRId64")\n",
592                   gzstream.total_in, sum_read));
593         }
594     }
595 #endif
596 
597   if (qtyread)
598     *qtyread += sum_read;
599   if (qtywritten)
600     *qtywritten += sum_written;
601 
602   xfree (dlbuf);
603 
604   return ret;
605 }
606 
607 /* Read a hunk of data from FD, up until a terminator.  The hunk is
608    limited by whatever the TERMINATOR callback chooses as its
609    terminator.  For example, if terminator stops at newline, the hunk
610    will consist of a line of data; if terminator stops at two
611    newlines, it can be used to read the head of an HTTP response.
612    Upon determining the boundary, the function returns the data (up to
613    the terminator) in malloc-allocated storage.
614 
615    In case of read error, NULL is returned.  In case of EOF and no
616    data read, NULL is returned and errno set to 0.  In case of having
617    read some data, but encountering EOF before seeing the terminator,
618    the data that has been read is returned, but it will (obviously)
619    not contain the terminator.
620 
621    The TERMINATOR function is called with three arguments: the
622    beginning of the data read so far, the beginning of the current
623    block of peeked-at data, and the length of the current block.
624    Depending on its needs, the function is free to choose whether to
625    analyze all data or just the newly arrived data.  If TERMINATOR
626    returns NULL, it means that the terminator has not been seen.
627    Otherwise it should return a pointer to the charactre immediately
628    following the terminator.
629 
630    The idea is to be able to read a line of input, or otherwise a hunk
631    of text, such as the head of an HTTP request, without crossing the
632    boundary, so that the next call to fd_read etc. reads the data
633    after the hunk.  To achieve that, this function does the following:
634 
635    1. Peek at incoming data.
636 
637    2. Determine whether the peeked data, along with the previously
638       read data, includes the terminator.
639 
640       2a. If yes, read the data until the end of the terminator, and
641           exit.
642 
643       2b. If no, read the peeked data and goto 1.
644 
645    The function is careful to assume as little as possible about the
646    implementation of peeking.  For example, every peek is followed by
647    a read.  If the read returns a different amount of data, the
648    process is retried until all data arrives safely.
649 
650    SIZEHINT is the buffer size sufficient to hold all the data in the
651    typical case (it is used as the initial buffer size).  MAXSIZE is
652    the maximum amount of memory this function is allowed to allocate,
653    or 0 if no upper limit is to be enforced.
654 
655    This function should be used as a building block for other
656    functions -- see fd_read_line as a simple example.  */
657 
658 char *
fd_read_hunk(int fd,hunk_terminator_t terminator,long sizehint,long maxsize)659 fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
660 {
661   long bufsize = sizehint;
662   char *hunk = xmalloc (bufsize);
663   int tail = 0;                 /* tail position in HUNK */
664 
665   assert (!maxsize || maxsize >= bufsize);
666 
667   while (1)
668     {
669       const char *end;
670       int pklen, rdlen, remain;
671 
672       /* First, peek at the available data. */
673 
674       pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
675       if (pklen < 0)
676         {
677           xfree (hunk);
678           return NULL;
679         }
680       end = terminator (hunk, hunk + tail, pklen);
681       if (end)
682         {
683           /* The data contains the terminator: we'll drain the data up
684              to the end of the terminator.  */
685           remain = end - (hunk + tail);
686           assert (remain >= 0);
687           if (remain == 0)
688             {
689               /* No more data needs to be read. */
690               hunk[tail] = '\0';
691               return hunk;
692             }
693           if (bufsize - 1 < tail + remain)
694             {
695               bufsize = tail + remain + 1;
696               hunk = xrealloc (hunk, bufsize);
697             }
698         }
699       else
700         /* No terminator: simply read the data we know is (or should
701            be) available.  */
702         remain = pklen;
703 
704       /* Now, read the data.  Note that we make no assumptions about
705          how much data we'll get.  (Some TCP stacks are notorious for
706          read returning less data than the previous MSG_PEEK.)  */
707 
708       rdlen = fd_read (fd, hunk + tail, remain, 0);
709       if (rdlen < 0)
710         {
711           xfree (hunk);
712           return NULL;
713         }
714       tail += rdlen;
715       hunk[tail] = '\0';
716 
717       if (rdlen == 0)
718         {
719           if (tail == 0)
720             {
721               /* EOF without anything having been read */
722               xfree (hunk);
723               errno = 0;
724               return NULL;
725             }
726           else
727             /* EOF seen: return the data we've read. */
728             return hunk;
729         }
730       if (end && rdlen == remain)
731         /* The terminator was seen and the remaining data drained --
732            we got what we came for.  */
733         return hunk;
734 
735       /* Keep looping until all the data arrives. */
736 
737       if (tail == bufsize - 1)
738         {
739           /* Double the buffer size, but refuse to allocate more than
740              MAXSIZE bytes.  */
741           if (maxsize && bufsize >= maxsize)
742             {
743               xfree (hunk);
744               errno = ENOMEM;
745               return NULL;
746             }
747           bufsize <<= 1;
748           if (maxsize && bufsize > maxsize)
749             bufsize = maxsize;
750           hunk = xrealloc (hunk, bufsize);
751         }
752     }
753 }
754 
755 static const char *
line_terminator(const char * start _GL_UNUSED,const char * peeked,int peeklen)756 line_terminator (const char *start _GL_UNUSED, const char *peeked, int peeklen)
757 {
758   const char *p = memchr (peeked, '\n', peeklen);
759   if (p)
760     /* p+1 because the line must include '\n' */
761     return p + 1;
762   return NULL;
763 }
764 
765 /* The maximum size of the single line we agree to accept.  This is
766    not meant to impose an arbitrary limit, but to protect the user
767    from Wget slurping up available memory upon encountering malicious
768    or buggy server output.  Define it to 0 to remove the limit.  */
769 #define FD_READ_LINE_MAX 4096
770 
771 /* Read one line from FD and return it.  The line is allocated using
772    malloc, but is never larger than FD_READ_LINE_MAX.
773 
774    If an error occurs, or if no data can be read, NULL is returned.
775    In the former case errno indicates the error condition, and in the
776    latter case, errno is NULL.  */
777 
778 char *
fd_read_line(int fd)779 fd_read_line (int fd)
780 {
781   return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);
782 }
783 
784 /* Return a printed representation of the download rate, along with
785    the units appropriate for the download speed.  */
786 
787 const char *
retr_rate(wgint bytes,double secs)788 retr_rate (wgint bytes, double secs)
789 {
790   static char res[20];
791   static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
792   static const char *rate_names_bits[] = {"b/s", "Kb/s", "Mb/s", "Gb/s" };
793   int units;
794 
795   double dlrate = calc_rate (bytes, secs, &units);
796   /* Use more digits for smaller numbers (regardless of unit used),
797      e.g. "1022", "247", "12.5", "2.38".  */
798   snprintf (res, sizeof(res), "%.*f %s",
799            dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
800            dlrate, !opt.report_bps ? rate_names[units]: rate_names_bits[units]);
801 
802   return res;
803 }
804 
805 /* Calculate the download rate and trim it as appropriate for the
806    speed.  Appropriate means that if rate is greater than 1K/s,
807    kilobytes are used, and if rate is greater than 1MB/s, megabytes
808    are used.
809 
810    UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
811    GB/s.  */
812 
813 double
calc_rate(wgint bytes,double secs,int * units)814 calc_rate (wgint bytes, double secs, int *units)
815 {
816   double dlrate;
817   double bibyte;
818 
819   if (!opt.report_bps)
820     bibyte = 1024.0;
821   else
822     bibyte = 1000.0;
823 
824   if (secs == 0)
825     /* If elapsed time is exactly zero, it means we're under the
826        resolution of the timer.  This can easily happen on systems
827        that use time() for the timer.  Since the interval lies between
828        0 and the timer's resolution, assume half the resolution.  */
829     secs = ptimer_resolution () / 2.0;
830 
831   dlrate = secs ? convert_to_bits (bytes) / secs : 0;
832   if (dlrate < bibyte)
833     *units = 0;
834   else if (dlrate < (bibyte * bibyte))
835     *units = 1, dlrate /= bibyte;
836   else if (dlrate < (bibyte * bibyte * bibyte))
837     *units = 2, dlrate /= (bibyte * bibyte);
838   else if (dlrate < (bibyte * bibyte * bibyte * bibyte))
839     *units = 3, dlrate /= (bibyte * bibyte * bibyte);
840   else {
841     *units = 4, dlrate /= (bibyte * bibyte * bibyte * bibyte);
842     if (dlrate > 99.99)
843 		 dlrate = 99.99; // upper limit 99.99TB/s
844   }
845 
846   return dlrate;
847 }
848 
849 
850 #define SUSPEND_METHOD do {                     \
851   method_suspended = true;                      \
852   saved_body_data = opt.body_data;              \
853   saved_body_file_name = opt.body_file;         \
854   saved_method = opt.method;                    \
855   opt.body_data = NULL;                         \
856   opt.body_file = NULL;                         \
857   opt.method = NULL;                            \
858 } while (0)
859 
860 #define RESTORE_METHOD do {                             \
861   if (method_suspended)                                 \
862     {                                                   \
863       opt.body_data = saved_body_data;                  \
864       opt.body_file = saved_body_file_name;             \
865       opt.method = saved_method;                        \
866       method_suspended = false;                         \
867     }                                                   \
868 } while (0)
869 
870 static char *getproxy (struct url *);
871 
872 /* Retrieve the given URL.  Decides which loop to call -- HTTP, FTP,
873    FTP, proxy, etc.  */
874 
875 /* #### This function should be rewritten so it doesn't return from
876    multiple points. */
877 
878 uerr_t
retrieve_url(struct url * orig_parsed,const char * origurl,char ** file,char ** newloc,const char * refurl,int * dt,bool recursive,struct iri * iri,bool register_status)879 retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
880               char **newloc, const char *refurl, int *dt, bool recursive,
881               struct iri *iri, bool register_status)
882 {
883   uerr_t result;
884   char *url;
885   bool location_changed;
886   bool iri_fallbacked = 0;
887   int dummy;
888   char *mynewloc, *proxy;
889   struct url *u = orig_parsed, *proxy_url;
890   int up_error_code;            /* url parse error code */
891   char *local_file = NULL;
892   int redirection_count = 0;
893 
894   bool method_suspended = false;
895   char *saved_body_data = NULL;
896   char *saved_method = NULL;
897   char *saved_body_file_name = NULL;
898 
899   /* If dt is NULL, use local storage.  */
900   if (!dt)
901     {
902       dt = &dummy;
903       dummy = 0;
904     }
905   url = xstrdup (origurl);
906   if (newloc)
907     *newloc = NULL;
908   if (file)
909     *file = NULL;
910 
911   if (!refurl)
912     refurl = opt.referer;
913 
914  redirected:
915   /* (also for IRI fallbacking) */
916 
917   result = NOCONERROR;
918   mynewloc = NULL;
919   xfree(local_file);
920   proxy_url = NULL;
921 
922   proxy = getproxy (u);
923   if (proxy)
924     {
925       struct iri *pi = iri_new ();
926       set_uri_encoding (pi, opt.locale, true);
927       pi->utf8_encode = false;
928 
929       /* Parse the proxy URL.  */
930       proxy_url = url_parse (proxy, &up_error_code, pi, true);
931       if (!proxy_url)
932         {
933           char *error = url_error (proxy, up_error_code);
934           logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
935                      proxy, error);
936           xfree (url);
937           xfree (error);
938           xfree (proxy);
939           iri_free (pi);
940           RESTORE_METHOD;
941           result = PROXERR;
942           if (orig_parsed != u)
943             url_free (u);
944           goto bail;
945         }
946       if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
947         {
948           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
949           url_free (proxy_url);
950           xfree (url);
951           xfree (proxy);
952           iri_free (pi);
953           RESTORE_METHOD;
954           result = PROXERR;
955           if (orig_parsed != u)
956             url_free (u);
957           goto bail;
958         }
959       iri_free(pi);
960       xfree (proxy);
961     }
962 
963   if (u->scheme == SCHEME_HTTP
964 #ifdef HAVE_SSL
965       || u->scheme == SCHEME_HTTPS
966 #endif
967       || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
968     {
969 #ifdef HAVE_HSTS
970 #ifdef TESTING
971       /* we don't link against main.o when we're testing */
972       hsts_store_t hsts_store = NULL;
973 #else
974       extern hsts_store_t hsts_store;
975 #endif
976 
977       if (opt.hsts && hsts_store)
978 	{
979 	  if (hsts_match (hsts_store, u))
980 	    logprintf (LOG_VERBOSE, "URL transformed to HTTPS due to an HSTS policy\n");
981 	}
982 #endif
983       result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt,
984                           proxy_url, iri);
985     }
986   else if (u->scheme == SCHEME_FTP
987 #ifdef HAVE_SSL
988       || u->scheme == SCHEME_FTPS
989 #endif
990       )
991     {
992       /* If this is a redirection, temporarily turn off opt.ftp_glob
993          and opt.recursive, both being undesirable when following
994          redirects.  */
995       bool oldrec = recursive, glob = opt.ftp_glob;
996       if (redirection_count)
997         oldrec = glob = false;
998 
999       result = ftp_loop (u, orig_parsed, &local_file, dt, proxy_url,
1000                          recursive, glob);
1001       recursive = oldrec;
1002 
1003       /* There is a possibility of having HTTP being redirected to
1004          FTP.  In these cases we must decide whether the text is HTML
1005          according to the suffix.  The HTML suffixes are `.html',
1006          `.htm' and a few others, case-insensitive.  */
1007       if (redirection_count && local_file && (u->scheme == SCHEME_FTP
1008 #ifdef HAVE_SSL
1009           || u->scheme == SCHEME_FTPS
1010 #endif
1011           ))
1012         {
1013           if (has_html_suffix_p (local_file))
1014             *dt |= TEXTHTML;
1015         }
1016     }
1017 
1018   if (proxy_url)
1019     {
1020       url_free (proxy_url);
1021       proxy_url = NULL;
1022     }
1023 
1024   location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST);
1025   if (location_changed)
1026     {
1027       char *construced_newloc;
1028       struct url *newloc_parsed;
1029 
1030       assert (mynewloc != NULL);
1031 
1032       xfree (local_file);
1033 
1034       /* The HTTP specs only allow absolute URLs to appear in
1035          redirects, but a ton of boneheaded webservers and CGIs out
1036          there break the rules and use relative URLs, and popular
1037          browsers are lenient about this, so wget should be too. */
1038       construced_newloc = uri_merge (url, mynewloc ? mynewloc : "");
1039       xfree (mynewloc);
1040       mynewloc = construced_newloc;
1041 
1042 #ifdef ENABLE_IRI
1043       /* Reset UTF-8 encoding state, set the URI encoding and reset
1044          the content encoding. */
1045       iri->utf8_encode = opt.enable_iri;
1046       if (opt.encoding_remote)
1047        set_uri_encoding (iri, opt.encoding_remote, true);
1048       set_content_encoding (iri, NULL);
1049       xfree (iri->orig_url);
1050 #endif
1051 
1052       /* Now, see if this new location makes sense. */
1053       newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
1054       if (!newloc_parsed)
1055         {
1056           char *error = url_error (mynewloc, up_error_code);
1057           logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
1058                      error);
1059           if (orig_parsed != u)
1060             {
1061               url_free (u);
1062             }
1063           xfree (url);
1064           xfree (mynewloc);
1065           xfree (error);
1066           RESTORE_METHOD;
1067           goto bail;
1068         }
1069 
1070       /* Now mynewloc will become newloc_parsed->url, because if the
1071          Location contained relative paths like .././something, we
1072          don't want that propagating as url.  */
1073       xfree (mynewloc);
1074       mynewloc = xstrdup (newloc_parsed->url);
1075 
1076       /* Check for max. number of redirections.  */
1077       if (++redirection_count > opt.max_redirect)
1078         {
1079           logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
1080                      opt.max_redirect);
1081           url_free (newloc_parsed);
1082           if (orig_parsed != u)
1083             {
1084               url_free (u);
1085             }
1086           xfree (url);
1087           xfree (mynewloc);
1088           RESTORE_METHOD;
1089           result = WRONGCODE;
1090           goto bail;
1091         }
1092 
1093       xfree (url);
1094       url = mynewloc;
1095       if (orig_parsed != u)
1096         {
1097           url_free (u);
1098         }
1099       u = newloc_parsed;
1100 
1101       /* If we're being redirected from POST, and we received a
1102          redirect code different than 307, we don't want to POST
1103          again.  Many requests answer POST with a redirection to an
1104          index page; that redirection is clearly a GET.  We "suspend"
1105          POST data for the duration of the redirections, and restore
1106          it when we're done.
1107 
1108          RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
1109          specifically to preserve the method of the request.
1110      */
1111       if (result != NEWLOCATION_KEEP_POST && !method_suspended)
1112         SUSPEND_METHOD;
1113 
1114       goto redirected;
1115     }
1116   else
1117     {
1118       xfree(mynewloc);
1119     }
1120 
1121   /* Try to not encode in UTF-8 if fetching failed */
1122   if (!(*dt & RETROKF) && iri->utf8_encode)
1123     {
1124       iri->utf8_encode = false;
1125       if (orig_parsed != u)
1126         {
1127           url_free (u);
1128         }
1129       u = url_parse (origurl, NULL, iri, true);
1130       if (u)
1131         {
1132           if (strcmp(u->url, orig_parsed->url))
1133             {
1134               DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
1135               xfree (url);
1136               url = xstrdup (u->url);
1137               iri_fallbacked = 1;
1138               goto redirected;
1139             }
1140           else
1141               DEBUGP (("[Needn't fallback to non-utf8 for %s\n", quote (url)));
1142         }
1143       else
1144           DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
1145     }
1146 
1147   if (local_file && u && (*dt & RETROKF || opt.content_on_error))
1148     {
1149       register_download (u->url, local_file);
1150 
1151       if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url))
1152         register_redirection (origurl, u->url);
1153 
1154       if (*dt & TEXTHTML)
1155         register_html (local_file);
1156 
1157       if (*dt & TEXTCSS)
1158         register_css (local_file);
1159     }
1160 
1161   if (file)
1162     *file = local_file ? local_file : NULL;
1163   else
1164     xfree (local_file);
1165 
1166   if (orig_parsed != u)
1167     url_free (u);
1168 
1169   if (redirection_count || iri_fallbacked)
1170     {
1171       if (newloc)
1172         *newloc = url;
1173       else
1174         xfree (url);
1175     }
1176   else
1177     {
1178       if (newloc)
1179         *newloc = NULL;
1180       xfree (url);
1181     }
1182 
1183   RESTORE_METHOD;
1184 
1185 bail:
1186   if (register_status)
1187     inform_exit_status (result);
1188 
1189   return result;
1190 }
1191 
1192 /* Find the URLs in the file and call retrieve_url() for each of them.
1193    If HTML is true, treat the file as HTML, and construct the URLs
1194    accordingly.
1195 
1196    If opt.recursive is set, call retrieve_tree() for each file.  */
1197 
1198 uerr_t
retrieve_from_file(const char * file,bool html,int * count)1199 retrieve_from_file (const char *file, bool html, int *count)
1200 {
1201   uerr_t status;
1202   struct urlpos *url_list, *cur_url;
1203   struct iri *iri = iri_new();
1204 
1205   char *input_file, *url_file = NULL;
1206   const char *url = file;
1207 
1208   status = RETROK;             /* Suppose everything is OK.  */
1209   *count = 0;                  /* Reset the URL count.  */
1210 
1211   /* sXXXav : Assume filename and links in the file are in the locale */
1212   set_uri_encoding (iri, opt.locale, true);
1213   set_content_encoding (iri, opt.locale);
1214 
1215   if (url_valid_scheme (url))
1216     {
1217       int dt,url_err;
1218       struct url *url_parsed = url_parse (url, &url_err, iri, true);
1219       if (!url_parsed)
1220         {
1221           char *error = url_error (url, url_err);
1222           logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
1223           xfree (error);
1224           iri_free (iri);
1225           return URLERROR;
1226         }
1227 
1228       if (!opt.base_href)
1229         opt.base_href = xstrdup (url);
1230 
1231       status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt,
1232                              false, iri, true);
1233       url_free (url_parsed);
1234 
1235       if (!url_file || (status != RETROK))
1236         return status;
1237 
1238       if (dt & TEXTHTML)
1239         html = true;
1240 
1241 #ifdef ENABLE_IRI
1242       /* If we have a found a content encoding, use it.
1243        * ( == is okay, because we're checking for identical object) */
1244       if (iri->content_encoding != opt.locale)
1245           set_uri_encoding (iri, iri->content_encoding, false);
1246 #endif
1247 
1248       /* Reset UTF-8 encode status */
1249       iri->utf8_encode = opt.enable_iri;
1250       xfree (iri->orig_url);
1251 
1252       input_file = url_file;
1253     }
1254   else
1255     input_file = (char *) file;
1256 
1257   url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
1258               : get_urls_file (input_file));
1259 
1260   xfree (url_file);
1261 
1262   for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
1263     {
1264       char *filename = NULL, *new_file = NULL, *proxy;
1265       int dt = 0;
1266       struct iri *tmpiri = iri_dup (iri);
1267       struct url *parsed_url = NULL;
1268 
1269       if (cur_url->ignore_when_downloading)
1270         continue;
1271 
1272       if (opt.quota && total_downloaded_bytes > opt.quota)
1273         {
1274           status = QUOTEXC;
1275           break;
1276         }
1277 
1278       parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);
1279 
1280       proxy = getproxy (cur_url->url);
1281       if ((opt.recursive || opt.page_requisites)
1282           && ((cur_url->url->scheme != SCHEME_FTP
1283 #ifdef HAVE_SSL
1284           && cur_url->url->scheme != SCHEME_FTPS
1285 #endif
1286           ) || proxy))
1287         {
1288           int old_follow_ftp = opt.follow_ftp;
1289 
1290           /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
1291           if (cur_url->url->scheme == SCHEME_FTP
1292 #ifdef HAVE_SSL
1293               || cur_url->url->scheme == SCHEME_FTPS
1294 #endif
1295               )
1296             opt.follow_ftp = 1;
1297 
1298           status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
1299                                   tmpiri);
1300 
1301           opt.follow_ftp = old_follow_ftp;
1302         }
1303       else
1304         status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
1305                                cur_url->url->url, &filename,
1306                                &new_file, NULL, &dt, opt.recursive, tmpiri,
1307                                true);
1308       xfree (proxy);
1309 
1310       if (parsed_url)
1311           url_free (parsed_url);
1312 
1313       if (filename && opt.delete_after && file_exists_p (filename, NULL))
1314         {
1315           DEBUGP (("\
1316 Removing file due to --delete-after in retrieve_from_file():\n"));
1317           logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
1318           if (unlink (filename))
1319             logprintf (LOG_NOTQUIET, "Failed to unlink %s: (%d) %s\n", filename, errno, strerror (errno));
1320           dt &= ~RETROKF;
1321         }
1322 
1323       xfree (new_file);
1324       xfree (filename);
1325       iri_free (tmpiri);
1326     }
1327 
1328   /* Free the linked list of URL-s.  */
1329   free_urlpos (url_list);
1330 
1331   iri_free (iri);
1332 
1333   return status;
1334 }
1335 
1336 /* Print `giving up', or `retrying', depending on the impending
1337    action.  N1 and N2 are the attempt number and the attempt limit.  */
1338 void
printwhat(int n1,int n2)1339 printwhat (int n1, int n2)
1340 {
1341   logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
1342 }
1343 
1344 /* If opt.wait or opt.waitretry are specified, and if certain
1345    conditions are met, sleep the appropriate number of seconds.  See
1346    the documentation of --wait and --waitretry for more information.
1347 
1348    COUNT is the count of current retrieval, beginning with 1. */
1349 
1350 void
sleep_between_retrievals(int count)1351 sleep_between_retrievals (int count)
1352 {
1353   static bool first_retrieval = true;
1354 
1355   if (first_retrieval)
1356     {
1357       /* Don't sleep before the very first retrieval. */
1358       first_retrieval = false;
1359       return;
1360     }
1361 
1362   if (opt.waitretry && count > 1)
1363     {
1364       /* If opt.waitretry is specified and this is a retry, wait for
1365          COUNT-1 number of seconds, or for opt.waitretry seconds.  */
1366       if (count <= opt.waitretry)
1367         xsleep (count - 1);
1368       else
1369         xsleep (opt.waitretry);
1370     }
1371   else if (opt.wait)
1372     {
1373       if (!opt.random_wait || count > 1)
1374         /* If random-wait is not specified, or if we are sleeping
1375            between retries of the same download, sleep the fixed
1376            interval.  */
1377         xsleep (opt.wait);
1378       else
1379         {
1380           /* Sleep a random amount of time averaging in opt.wait
1381              seconds.  The sleeping amount ranges from 0.5*opt.wait to
1382              1.5*opt.wait.  */
1383           double waitsecs = (0.5 + random_float ()) * opt.wait;
1384           DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
1385                    opt.wait, waitsecs));
1386           xsleep (waitsecs);
1387         }
1388     }
1389 }
1390 
1391 /* Free the linked list of urlpos.  */
1392 void
free_urlpos(struct urlpos * l)1393 free_urlpos (struct urlpos *l)
1394 {
1395   while (l)
1396     {
1397       struct urlpos *next = l->next;
1398       if (l->url)
1399         url_free (l->url);
1400       xfree (l->local_name);
1401       xfree (l);
1402       l = next;
1403     }
1404 }
1405 
1406 /* Rotate FNAME opt.backups times */
1407 void
rotate_backups(const char * fname)1408 rotate_backups(const char *fname)
1409 {
1410 #ifdef __VMS
1411 # define SEP "_"
1412 # define AVS ";*"                       /* All-version suffix. */
1413 # define AVSL (sizeof (AVS) - 1)
1414 #else
1415 # define SEP "."
1416 # define AVSL 0
1417 #endif
1418 #define FILE_BUF_SIZE 1024
1419 
1420   /* avoid alloca() here */
1421   char from[FILE_BUF_SIZE], to[FILE_BUF_SIZE];
1422   struct stat sb;
1423   bool overflow;
1424   int i;
1425 
1426   if (stat (fname, &sb) == 0)
1427     if (S_ISREG (sb.st_mode) == 0)
1428       return;
1429 
1430   for (i = opt.backups; i > 1; i--)
1431     {
1432 #ifdef VMS
1433       /* Delete (all versions of) any existing max-suffix file, to avoid
1434        * creating multiple versions of it.  (On VMS, rename() will
1435        * create a new version of an existing destination file, not
1436        * destroy/overwrite it.)
1437        */
1438       if (i == opt.backups)
1439         {
1440           if (((unsigned) snprintf (to, sizeof (to), "%s%s%d%s", fname, SEP, i, AVS)) >= sizeof (to))
1441             logprintf (LOG_NOTQUIET, "Failed to delete %s: File name truncation\n", to);
1442           else
1443             delete (to);
1444         }
1445 #endif
1446       overflow = (unsigned) snprintf (to, FILE_BUF_SIZE, "%s%s%d", fname, SEP, i) >= FILE_BUF_SIZE;
1447       overflow |= (unsigned) snprintf (from, FILE_BUF_SIZE, "%s%s%d", fname, SEP, i - 1) >= FILE_BUF_SIZE;
1448 
1449       if (overflow)
1450           errno = ENAMETOOLONG;
1451       if (overflow || rename (from, to))
1452         logprintf (LOG_NOTQUIET, "Failed to rename %s to %s: (%d) %s\n",
1453                    from, to, errno, strerror (errno));
1454     }
1455 
1456   overflow = (unsigned) snprintf (to, FILE_BUF_SIZE, "%s%s%d", fname, SEP, 1) >= FILE_BUF_SIZE;
1457   if (overflow)
1458     errno = ENAMETOOLONG;
1459   if (overflow || rename(fname, to))
1460     logprintf (LOG_NOTQUIET, "Failed to rename %s to %s: (%d) %s\n",
1461                fname, to, errno, strerror (errno));
1462 
1463 #undef FILE_BUF_SIZE
1464 }
1465 
1466 static bool no_proxy_match (const char *, const char **);
1467 
1468 /* Return the URL of the proxy appropriate for url U.  */
1469 
1470 static char *
getproxy(struct url * u)1471 getproxy (struct url *u)
1472 {
1473   char *proxy = NULL;
1474   char *rewritten_url;
1475 
1476   if (!opt.use_proxy)
1477     return NULL;
1478   if (no_proxy_match (u->host, (const char **)opt.no_proxy))
1479     return NULL;
1480 
1481   switch (u->scheme)
1482     {
1483     case SCHEME_HTTP:
1484       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
1485       break;
1486 #ifdef HAVE_SSL
1487     case SCHEME_HTTPS:
1488       proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
1489       break;
1490     case SCHEME_FTPS:
1491       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftps_proxy");
1492       break;
1493 #endif
1494     case SCHEME_FTP:
1495       proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
1496       break;
1497     case SCHEME_INVALID:
1498       break;
1499     }
1500   if (!proxy || !*proxy)
1501     return NULL;
1502 
1503   /* Handle shorthands.  `rewritten_storage' is a kludge to allow
1504      getproxy() to return static storage. */
1505   rewritten_url = rewrite_shorthand_url (proxy);
1506   if (rewritten_url)
1507     return rewritten_url;
1508 
1509   return strdup(proxy);
1510 }
1511 
1512 /* Returns true if URL would be downloaded through a proxy. */
1513 
1514 bool
url_uses_proxy(struct url * u)1515 url_uses_proxy (struct url * u)
1516 {
1517   bool ret;
1518   char *proxy;
1519 
1520   if (!u)
1521     return false;
1522   proxy = getproxy (u);
1523   ret = proxy != NULL;
1524   xfree (proxy);
1525   return ret;
1526 }
1527 
1528 /* Should a host be accessed through proxy, concerning no_proxy?  */
1529 static bool
no_proxy_match(const char * host,const char ** no_proxy)1530 no_proxy_match (const char *host, const char **no_proxy)
1531 {
1532   if (!no_proxy)
1533     return false;
1534   else
1535     return sufmatch (no_proxy, host);
1536 }
1537 
1538 /* Set the file parameter to point to the local file string.  */
1539 void
set_local_file(const char ** file,const char * default_file)1540 set_local_file (const char **file, const char *default_file)
1541 {
1542   if (opt.output_document)
1543     {
1544       if (output_stream_regular)
1545         *file = opt.output_document;
1546     }
1547   else
1548     *file = default_file;
1549 }
1550 
1551 /* Return true for an input file's own URL, false otherwise.  */
1552 bool
input_file_url(const char * input_file)1553 input_file_url (const char *input_file)
1554 {
1555   static bool first = true;
1556 
1557   if (input_file
1558       && url_has_scheme (input_file)
1559       && first)
1560     {
1561       first = false;
1562       return true;
1563     }
1564   else
1565     return false;
1566 }
1567