1 /* Establishing and handling network connections.
2    Copyright (C) 1995-2011, 2015, 2018-2021 Free Software Foundation,
3    Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 #include "wget.h"
32 
33 #include "exits.h"
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <assert.h>
38 
39 #include <sys/socket.h>
40 #include <sys/select.h>
41 
42 #ifndef WINDOWS
43 # ifdef __VMS
44 #  include "vms_ip.h"
45 # else /* def __VMS */
46 #  include <netdb.h>
47 # endif /* def __VMS [else] */
48 # include <netinet/in.h>
49 # ifndef __BEOS__
50 #  include <arpa/inet.h>
51 # endif
52 #endif /* not WINDOWS */
53 
54 #include <errno.h>
55 #include <string.h>
56 #include <sys/time.h>
57 
58 #include "utils.h"
59 #include "host.h"
60 #include "connect.h"
61 #include "hash.h"
62 
63 #include <stdint.h>
64 
65 /* Define sockaddr_storage where unavailable (presumably on IPv4-only
66    hosts).  */
67 
68 #ifndef ENABLE_IPV6
69 # ifndef HAVE_STRUCT_SOCKADDR_STORAGE
70 #  define sockaddr_storage sockaddr_in
71 # endif
72 #endif /* ENABLE_IPV6 */
73 
74 /* Fill SA as per the data in IP and PORT.  SA should point to struct
75    sockaddr_storage if ENABLE_IPV6 is defined, to struct sockaddr_in
76    otherwise.  */
77 
78 static void
sockaddr_set_data(struct sockaddr * sa,const ip_address * ip,int port)79 sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port)
80 {
81   switch (ip->family)
82     {
83     case AF_INET:
84       {
85         struct sockaddr_in *sin = (struct sockaddr_in *)sa;
86         xzero (*sin);
87         sin->sin_family = AF_INET;
88         sin->sin_port = htons (port);
89         sin->sin_addr = ip->data.d4;
90         break;
91       }
92 #ifdef ENABLE_IPV6
93     case AF_INET6:
94       {
95         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
96         xzero (*sin6);
97         sin6->sin6_family = AF_INET6;
98         sin6->sin6_port = htons (port);
99         sin6->sin6_addr = ip->data.d6;
100 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
101         sin6->sin6_scope_id = ip->ipv6_scope;
102 #endif
103         break;
104       }
105 #endif /* ENABLE_IPV6 */
106     default:
107       abort ();
108     }
109 }
110 
111 /* Get the data of SA, specifically the IP address and the port.  If
112    you're not interested in one or the other information, pass NULL as
113    the pointer.  */
114 
115 static void
sockaddr_get_data(const struct sockaddr * sa,ip_address * ip,int * port)116 sockaddr_get_data (const struct sockaddr *sa, ip_address *ip, int *port)
117 {
118   switch (sa->sa_family)
119     {
120     case AF_INET:
121       {
122         struct sockaddr_in *sin = (struct sockaddr_in *)sa;
123         if (ip)
124           {
125             ip->family = AF_INET;
126             ip->data.d4 = sin->sin_addr;
127           }
128         if (port)
129           *port = ntohs (sin->sin_port);
130         break;
131       }
132 #ifdef ENABLE_IPV6
133     case AF_INET6:
134       {
135         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
136         if (ip)
137           {
138             ip->family = AF_INET6;
139             ip->data.d6 = sin6->sin6_addr;
140 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
141             ip->ipv6_scope = sin6->sin6_scope_id;
142 #endif
143           }
144         if (port)
145           *port = ntohs (sin6->sin6_port);
146         break;
147       }
148 #endif
149     default:
150       abort ();
151     }
152 }
153 
154 /* Return the size of the sockaddr structure depending on its
155    family.  */
156 
157 static socklen_t
sockaddr_size(const struct sockaddr * sa)158 sockaddr_size (const struct sockaddr *sa)
159 {
160   switch (sa->sa_family)
161     {
162     case AF_INET:
163       return sizeof (struct sockaddr_in);
164 #ifdef ENABLE_IPV6
165     case AF_INET6:
166       return sizeof (struct sockaddr_in6);
167 #endif
168     default:
169       abort ();
170     }
171 }
172 
173 /* Resolve the bind address specified via --bind-address and store it
174    to SA.  The resolved value is stored in a static variable and
175    reused after the first invocation of this function.
176 
177    Returns true on success, false on failure.  */
178 
179 static bool
resolve_bind_address(struct sockaddr * sa)180 resolve_bind_address (struct sockaddr *sa)
181 {
182   struct address_list *al;
183 
184   /* Make sure this is called only once.  opt.bind_address doesn't
185      change during a Wget run.  */
186   static bool called, should_bind;
187   static ip_address ip;
188   if (called)
189     {
190       if (should_bind)
191         sockaddr_set_data (sa, &ip, 0);
192       return should_bind;
193     }
194   called = true;
195 
196   al = lookup_host (opt.bind_address, LH_BIND | LH_SILENT);
197   if (!al)
198     {
199       /* #### We should be able to print the error message here. */
200       logprintf (LOG_NOTQUIET,
201                  _("%s: unable to resolve bind address %s; disabling bind.\n"),
202                  exec_name, quote (opt.bind_address));
203       should_bind = false;
204       return false;
205     }
206 
207   /* Pick the first address in the list and use it as bind address.
208      Perhaps we should try multiple addresses in succession, but I
209      don't think that's necessary in practice.  */
210   ip = *address_list_address_at (al, 0);
211   address_list_release (al);
212 
213   sockaddr_set_data (sa, &ip, 0);
214   should_bind = true;
215   return true;
216 }
217 
218 struct cwt_context {
219   int fd;
220   const struct sockaddr *addr;
221   socklen_t addrlen;
222   int result;
223 };
224 
225 static void
connect_with_timeout_callback(void * arg)226 connect_with_timeout_callback (void *arg)
227 {
228   struct cwt_context *ctx = (struct cwt_context *)arg;
229   ctx->result = connect (ctx->fd, ctx->addr, ctx->addrlen);
230 }
231 
232 /* Like connect, but specifies a timeout.  If connecting takes longer
233    than TIMEOUT seconds, -1 is returned and errno is set to
234    ETIMEDOUT.  */
235 
236 static int
connect_with_timeout(int fd,const struct sockaddr * addr,socklen_t addrlen,double timeout)237 connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen,
238                       double timeout)
239 {
240   struct cwt_context ctx;
241   ctx.fd = fd;
242   ctx.addr = addr;
243   ctx.addrlen = addrlen;
244 
245   if (run_with_timeout (timeout, connect_with_timeout_callback, &ctx))
246     {
247       errno = ETIMEDOUT;
248       return -1;
249     }
250   if (ctx.result == -1 && errno == EINTR)
251     errno = ETIMEDOUT;
252   return ctx.result;
253 }
254 
255 /* Connect via TCP to the specified address and port.
256 
257    If PRINT is non-NULL, it is the host name to print that we're
258    connecting to.  */
259 
260 int
connect_to_ip(const ip_address * ip,int port,const char * print)261 connect_to_ip (const ip_address *ip, int port, const char *print)
262 {
263   struct sockaddr_storage ss;
264   struct sockaddr *sa = (struct sockaddr *)&ss;
265   int sock;
266 
267   /* If PRINT is non-NULL, print the "Connecting to..." line, with
268      PRINT being the host name we're connecting to.  */
269   if (print)
270     {
271       const char *txt_addr = print_address (ip);
272       if (0 != strcmp (print, txt_addr))
273         {
274           char *str = NULL, *name;
275 
276           if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
277             {
278               str = aprintf ("%s (%s)", name, print);
279               xfree (name);
280             }
281 
282           logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
283                      str ? str : escnonprint_uri (print), txt_addr, port);
284 
285           xfree (str);
286         }
287       else
288         {
289            if (ip->family == AF_INET)
290                logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
291 #ifdef ENABLE_IPV6
292            else if (ip->family == AF_INET6)
293                logprintf (LOG_VERBOSE, _("Connecting to [%s]:%d... "), txt_addr, port);
294 #endif
295         }
296     }
297 
298   /* Store the sockaddr info to SA.  */
299   sockaddr_set_data (sa, ip, port);
300 
301   /* Create the socket of the family appropriate for the address.  */
302   sock = socket (sa->sa_family, SOCK_STREAM, 0);
303   if (sock < 0)
304     goto err;
305 
306 #if defined(ENABLE_IPV6) && defined(IPV6_V6ONLY)
307   if (opt.ipv6_only) {
308     int on = 1;
309     /* In case of error, we will go on anyway... */
310     int err = setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof (on));
311     IF_DEBUG
312       if (err < 0)
313         DEBUGP (("Failed setting IPV6_V6ONLY: %s", strerror (errno)));
314   }
315 #endif
316 
317   /* For very small rate limits, set the buffer size (and hence,
318      hopefully, the kernel's TCP window size) to the per-second limit.
319      That way we should never have to sleep for more than 1s between
320      network reads.  */
321   if (opt.limit_rate && opt.limit_rate < 8192)
322     {
323       int bufsize = opt.limit_rate;
324       if (bufsize < 512)
325         bufsize = 512;          /* avoid pathologically small values */
326 #ifdef SO_RCVBUF
327       if (setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
328                   (void *) &bufsize, (socklen_t) sizeof (bufsize)))
329         logprintf (LOG_NOTQUIET, _("setsockopt SO_RCVBUF failed: %s\n"),
330                    strerror (errno));
331 #endif
332       /* When we add limit_rate support for writing, which is useful
333          for POST, we should also set SO_SNDBUF here.  */
334     }
335 
336   if (opt.bind_address)
337     {
338       /* Bind the client side of the socket to the requested
339          address.  */
340       struct sockaddr_storage bind_ss;
341       struct sockaddr *bind_sa = (struct sockaddr *)&bind_ss;
342       if (resolve_bind_address (bind_sa))
343         {
344           if (bind (sock, bind_sa, sockaddr_size (bind_sa)) < 0)
345             goto err;
346         }
347     }
348 
349   /* Connect the socket to the remote endpoint.  */
350   if (connect_with_timeout (sock, sa, sockaddr_size (sa),
351                             opt.connect_timeout) < 0)
352     goto err;
353 
354   /* Success. */
355   assert (sock >= 0);
356   if (print)
357     logprintf (LOG_VERBOSE, _("connected.\n"));
358   DEBUGP (("Created socket %d.\n", sock));
359   return sock;
360 
361  err:
362   {
363     /* Protect errno from possible modifications by close and
364        logprintf.  */
365     int save_errno = errno;
366     if (sock >= 0)
367       {
368 #ifdef WIN32
369 	/* If the connection timed out, fd_close will hang in Gnulib's
370 	   close_fd_maybe_socket, inside the call to WSAEnumNetworkEvents.  */
371 	if (errno != ETIMEDOUT)
372 #endif
373 	  fd_close (sock);
374       }
375     if (print)
376       logprintf (LOG_NOTQUIET, _("failed: %s.\n"), strerror (errno));
377     errno = save_errno;
378     return -1;
379   }
380 }
381 
382 /* Connect via TCP to a remote host on the specified port.
383 
384    HOST is resolved as an Internet host name.  If HOST resolves to
385    more than one IP address, they are tried in the order returned by
386    DNS until connecting to one of them succeeds.  */
387 
388 int
connect_to_host(const char * host,int port)389 connect_to_host (const char *host, int port)
390 {
391   int i, start, end;
392   int sock;
393 
394   struct address_list *al = lookup_host (host, 0);
395 
396  retry:
397   if (!al)
398     {
399       logprintf (LOG_NOTQUIET,
400                  _("%s: unable to resolve host address %s\n"),
401                  exec_name, quote (host));
402       return E_HOST;
403     }
404 
405   address_list_get_bounds (al, &start, &end);
406   for (i = start; i < end; i++)
407     {
408       const ip_address *ip = address_list_address_at (al, i);
409       sock = connect_to_ip (ip, port, host);
410       if (sock >= 0)
411         {
412           /* Success. */
413           address_list_set_connected (al);
414           address_list_release (al);
415           return sock;
416         }
417 
418       /* The attempt to connect has failed.  Continue with the loop
419          and try next address. */
420 
421       address_list_set_faulty (al, i);
422     }
423 
424   /* Failed to connect to any of the addresses in AL. */
425 
426   if (address_list_connected_p (al))
427     {
428       /* We connected to AL before, but cannot do so now.  That might
429          indicate that our DNS cache entry for HOST has expired.  */
430       address_list_release (al);
431       al = lookup_host (host, LH_REFRESH);
432       goto retry;
433     }
434   address_list_release (al);
435 
436   return -1;
437 }
438 
439 /* Create a socket, bind it to local interface BIND_ADDRESS on port
440    *PORT, set up a listen backlog, and return the resulting socket, or
441    -1 in case of error.
442 
443    BIND_ADDRESS is the address of the interface to bind to.  If it is
444    NULL, the socket is bound to the default address.  PORT should
445    point to the port number that will be used for the binding.  If
446    that number is 0, the system will choose a suitable port, and the
447    chosen value will be written to *PORT.
448 
449    Calling accept() on such a socket waits for and accepts incoming
450    TCP connections.  */
451 
452 int
bind_local(const ip_address * bind_address,int * port)453 bind_local (const ip_address *bind_address, int *port)
454 {
455   int sock;
456   struct sockaddr_storage ss;
457   struct sockaddr *sa = (struct sockaddr *)&ss;
458 
459   /* For setting options with setsockopt. */
460   int setopt_val = 1;
461   void *setopt_ptr = (void *)&setopt_val;
462   socklen_t setopt_size = sizeof (setopt_val);
463 
464   sock = socket (bind_address->family, SOCK_STREAM, 0);
465   if (sock < 0)
466     return -1;
467 
468 #ifdef SO_REUSEADDR
469   if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, setopt_ptr, setopt_size))
470     logprintf (LOG_NOTQUIET, _("setsockopt SO_REUSEADDR failed: %s\n"),
471                strerror (errno));
472 #endif
473 
474   xzero (ss);
475   sockaddr_set_data (sa, bind_address, *port);
476   if (bind (sock, sa, sockaddr_size (sa)) < 0)
477     {
478       fd_close (sock);
479       return -1;
480     }
481   DEBUGP (("Local socket fd %d bound.\n", sock));
482 
483   /* If *PORT is 0, find out which port we've bound to.  */
484   if (*port == 0)
485     {
486       socklen_t addrlen = sockaddr_size (sa);
487       if (getsockname (sock, sa, &addrlen) < 0)
488         {
489           /* If we can't find out the socket's local address ("name"),
490              something is seriously wrong with the socket, and it's
491              unusable for us anyway because we must know the chosen
492              port.  */
493           fd_close (sock);
494           return -1;
495         }
496       sockaddr_get_data (sa, NULL, port);
497       DEBUGP (("binding to address %s using port %i.\n",
498                print_address (bind_address), *port));
499     }
500   if (listen (sock, 1) < 0)
501     {
502       fd_close (sock);
503       return -1;
504     }
505   return sock;
506 }
507 
508 /* Like a call to accept(), but with the added check for timeout.
509 
510    In other words, accept a client connection on LOCAL_SOCK, and
511    return the new socket used for communication with the client.
512    LOCAL_SOCK should have been bound, e.g. using bind_local().
513 
514    The caller is blocked until a connection is established.  If no
515    connection is established for opt.connect_timeout seconds, the
516    function exits with an error status.  */
517 
518 int
accept_connection(int local_sock)519 accept_connection (int local_sock)
520 {
521   int sock;
522 
523   /* We don't need the values provided by accept, but accept
524      apparently requires them to be present.  */
525   struct sockaddr_storage ss;
526   struct sockaddr *sa = (struct sockaddr *)&ss;
527   socklen_t addrlen = sizeof (ss);
528 
529   if (opt.connect_timeout)
530     {
531       int test = select_fd (local_sock, opt.connect_timeout, WAIT_FOR_READ);
532       if (test == 0)
533         errno = ETIMEDOUT;
534       if (test <= 0)
535         return -1;
536     }
537   sock = accept (local_sock, sa, &addrlen);
538   DEBUGP (("Accepted client at socket %d.\n", sock));
539   return sock;
540 }
541 
542 /* Get the IP address associated with the connection on FD and store
543    it to IP.  Return true on success, false otherwise.
544 
545    If ENDPOINT is ENDPOINT_LOCAL, it returns the address of the local
546    (client) side of the socket.  Else if ENDPOINT is ENDPOINT_PEER, it
547    returns the address of the remote (peer's) side of the socket.  */
548 
549 bool
socket_ip_address(int sock,ip_address * ip,int endpoint)550 socket_ip_address (int sock, ip_address *ip, int endpoint)
551 {
552   struct sockaddr_storage storage;
553   struct sockaddr *sockaddr = (struct sockaddr *) &storage;
554   socklen_t addrlen = sizeof (storage);
555   int ret;
556 
557   memset (sockaddr, 0, addrlen);
558   if (endpoint == ENDPOINT_LOCAL)
559     ret = getsockname (sock, sockaddr, &addrlen);
560   else if (endpoint == ENDPOINT_PEER)
561     ret = getpeername (sock, sockaddr, &addrlen);
562   else
563     abort ();
564   if (ret < 0)
565     return false;
566 
567   memset(ip, 0, sizeof(ip_address));
568   ip->family = sockaddr->sa_family;
569   switch (sockaddr->sa_family)
570     {
571 #ifdef ENABLE_IPV6
572     case AF_INET6:
573       {
574         struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&storage;
575         ip->data.d6 = sa6->sin6_addr;
576 #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
577         ip->ipv6_scope = sa6->sin6_scope_id;
578 #endif
579         DEBUGP (("conaddr is: %s\n", print_address (ip)));
580         return true;
581       }
582 #endif
583     case AF_INET:
584       {
585         struct sockaddr_in *sa = (struct sockaddr_in *)&storage;
586         ip->data.d4 = sa->sin_addr;
587         DEBUGP (("conaddr is: %s\n", print_address (ip)));
588         return true;
589       }
590     default:
591       abort ();
592     }
593 }
594 
595 /* Get the socket family of connection on FD and store
596    Return family type on success, -1 otherwise.
597 
598    If ENDPOINT is ENDPOINT_LOCAL, it returns the sock family of the local
599    (client) side of the socket.  Else if ENDPOINT is ENDPOINT_PEER, it
600    returns the sock family of the remote (peer's) side of the socket.  */
601 
602 int
socket_family(int sock,int endpoint)603 socket_family (int sock, int endpoint)
604 {
605   struct sockaddr_storage storage;
606   struct sockaddr *sockaddr = (struct sockaddr *) &storage;
607   socklen_t addrlen = sizeof (storage);
608   int ret;
609 
610   memset (sockaddr, 0, addrlen);
611 
612   if (endpoint == ENDPOINT_LOCAL)
613     ret = getsockname (sock, sockaddr, &addrlen);
614   else if (endpoint == ENDPOINT_PEER)
615     ret = getpeername (sock, sockaddr, &addrlen);
616   else
617     abort ();
618 
619   if (ret < 0)
620     return -1;
621 
622   return sockaddr->sa_family;
623 }
624 
625 /* Return true if the error from the connect code can be considered
626    retryable.  Wget normally retries after errors, but the exception
627    are the "unsupported protocol" type errors (possible on IPv4/IPv6
628    dual family systems) and "connection refused".  */
629 
630 bool
retryable_socket_connect_error(int err)631 retryable_socket_connect_error (int err)
632 {
633   /* Have to guard against some of these values not being defined.
634      Cannot use a switch statement because some of the values might be
635      equal.  */
636   if (false
637 #ifdef EAFNOSUPPORT
638       || err == EAFNOSUPPORT
639 #endif
640 #ifdef EPFNOSUPPORT
641       || err == EPFNOSUPPORT
642 #endif
643 #ifdef ESOCKTNOSUPPORT          /* no, "sockt" is not a typo! */
644       || err == ESOCKTNOSUPPORT
645 #endif
646 #ifdef EPROTONOSUPPORT
647       || err == EPROTONOSUPPORT
648 #endif
649 #ifdef ENOPROTOOPT
650       || err == ENOPROTOOPT
651 #endif
652       /* Apparently, older versions of Linux and BSD used EINVAL
653          instead of EAFNOSUPPORT and such.  */
654       || err == EINVAL
655       )
656     return false;
657 
658   if (!opt.retry_connrefused)
659     if (err == ECONNREFUSED
660 #ifdef ENETUNREACH
661         || err == ENETUNREACH   /* network is unreachable */
662 #endif
663 #ifdef EHOSTUNREACH
664         || err == EHOSTUNREACH  /* host is unreachable */
665 #endif
666         )
667       return false;
668 
669   return true;
670 }
671 
672 /* Wait for a single descriptor to become available, timing out after
673    MAXTIME seconds.  Returns 1 if FD is available, 0 for timeout and
674    -1 for error.  The argument WAIT_FOR can be a combination of
675    WAIT_FOR_READ and WAIT_FOR_WRITE.
676 
677    This is a mere convenience wrapper around the select call, and
678    should be taken as such (for example, it doesn't implement Wget's
679    0-timeout-means-no-timeout semantics.)  */
680 
681 static int
select_fd_internal(int fd,double maxtime,int wait_for,bool convert_back _GL_UNUSED)682 select_fd_internal (int fd, double maxtime, int wait_for, bool convert_back _GL_UNUSED)
683 {
684   fd_set fdset;
685   fd_set *rd = NULL, *wr = NULL;
686   struct timeval tmout;
687   int result;
688 
689   if (fd < 0)
690     return -1;
691 
692   if (fd >= FD_SETSIZE)
693     {
694       logprintf (LOG_NOTQUIET, _("Too many fds open.  Cannot use select on a fd >= %d\n"), FD_SETSIZE);
695       exit (WGET_EXIT_GENERIC_ERROR);
696     }
697   FD_ZERO (&fdset);
698   FD_SET (fd, &fdset);
699   if (wait_for & WAIT_FOR_READ)
700     rd = &fdset;
701   if (wait_for & WAIT_FOR_WRITE)
702     wr = &fdset;
703 
704   tmout.tv_sec = (long) maxtime;
705   tmout.tv_usec = 1000000 * (maxtime - (long) maxtime);
706 
707   do
708   {
709     result = select (fd + 1, rd, wr, NULL, &tmout);
710 #ifdef WINDOWS
711     /* gnulib select() converts blocking sockets to nonblocking in windows.
712        wget uses blocking sockets so we must convert them back to blocking.  */
713     if (convert_back)
714       set_windows_fd_as_blocking_socket (fd);
715 #endif
716   }
717   while (result < 0 && errno == EINTR);
718 
719   return result;
720 }
721 
722 int
select_fd(int fd,double maxtime,int wait_for)723 select_fd (int fd, double maxtime, int wait_for)
724 {
725   return select_fd_internal (fd, maxtime, wait_for, true);
726 }
727 
728 #ifdef WINDOWS
729 int
select_fd_nb(int fd,double maxtime,int wait_for)730 select_fd_nb (int fd, double maxtime, int wait_for)
731 {
732   return select_fd_internal (fd, maxtime, wait_for, false);
733 }
734 #endif
735 
736 /* Return true if the connection to the remote site established
737    through SOCK is still open.
738 
739    Specifically, this function returns true if SOCK is not ready for
740    reading.  This is because, when the connection closes, the socket
741    is ready for reading because EOF is about to be delivered.  A side
742    effect of this method is that sockets that have pending data are
743    considered non-open.  This is actually a good thing for callers of
744    this function, where such pending data can only be unwanted
745    leftover from a previous request.  */
746 
747 bool
test_socket_open(int sock)748 test_socket_open (int sock)
749 {
750   fd_set check_set;
751   struct timeval to;
752   int ret = 0;
753 
754   if (sock >= FD_SETSIZE)
755     {
756       logprintf (LOG_NOTQUIET, _("Too many fds open.  Cannot use select on a fd >= %d\n"), FD_SETSIZE);
757       exit (WGET_EXIT_GENERIC_ERROR);
758     }
759   /* Check if we still have a valid (non-EOF) connection.  From Andrew
760    * Maholski's code in the Unix Socket FAQ.  */
761 
762   FD_ZERO (&check_set);
763   FD_SET (sock, &check_set);
764 
765   /* Wait one microsecond */
766   to.tv_sec = 0;
767   to.tv_usec = 1;
768 
769   ret = select (sock + 1, &check_set, NULL, NULL, &to);
770 #ifdef WINDOWS
771 /* gnulib select() converts blocking sockets to nonblocking in windows.
772 wget uses blocking sockets so we must convert them back to blocking
773 */
774   set_windows_fd_as_blocking_socket ( sock );
775 #endif
776 
777   if ( !ret )
778     /* We got a timeout, it means we're still connected. */
779     return true;
780   else
781     /* Read now would not wait, it means we have either pending data
782        or EOF/error. */
783     return false;
784 }
785 
786 /* Basic socket operations, mostly EINTR wrappers.  */
787 
788 static int
sock_read(int fd,char * buf,int bufsize)789 sock_read (int fd, char *buf, int bufsize)
790 {
791   int res;
792   do
793     res = read (fd, buf, bufsize);
794   while (res == -1 && errno == EINTR);
795   return res;
796 }
797 
798 static int
sock_write(int fd,char * buf,int bufsize)799 sock_write (int fd, char *buf, int bufsize)
800 {
801   int res;
802   do
803     res = write (fd, buf, bufsize);
804   while (res == -1 && errno == EINTR);
805   return res;
806 }
807 
808 static int
sock_poll(int fd,double timeout,int wait_for)809 sock_poll (int fd, double timeout, int wait_for)
810 {
811   return select_fd (fd, timeout, wait_for);
812 }
813 
814 static int
sock_peek(int fd,char * buf,int bufsize)815 sock_peek (int fd, char *buf, int bufsize)
816 {
817   int res;
818   do
819     res = recv (fd, buf, bufsize, MSG_PEEK);
820   while (res == -1 && errno == EINTR);
821   return res;
822 }
823 
824 static void
sock_close(int fd)825 sock_close (int fd)
826 {
827   close (fd);
828   DEBUGP (("Closed fd %d\n", fd));
829 }
830 #undef read
831 #undef write
832 #undef close
833 
834 /* Reading and writing from the network.  We build around the socket
835    (file descriptor) API, but support "extended" operations for things
836    that are not mere file descriptors under the hood, such as SSL
837    sockets.
838 
839    That way the user code can call fd_read(fd, ...) and we'll run read
840    or SSL_read or whatever is necessary.  */
841 
842 static struct hash_table *transport_map;
843 static unsigned int transport_map_modified_tick;
844 
845 struct transport_info {
846   struct transport_implementation *imp;
847   void *ctx;
848 };
849 
850 /* Register the transport layer operations that will be used when
851    reading, writing, and polling FD.
852 
853    This should be used for transport layers like SSL that piggyback on
854    sockets.  FD should otherwise be a real socket, on which you can
855    call getpeername, etc.  */
856 
857 void
fd_register_transport(int fd,struct transport_implementation * imp,void * ctx)858 fd_register_transport (int fd, struct transport_implementation *imp, void *ctx)
859 {
860   struct transport_info *info;
861 
862   /* The file descriptor must be non-negative to be registered.
863      Negative values are ignored by fd_close(), and -1 cannot be used as
864      hash key.  */
865   assert (fd >= 0);
866 
867   info = xnew (struct transport_info);
868   info->imp = imp;
869   info->ctx = ctx;
870   if (!transport_map)
871     transport_map = hash_table_new (0, NULL, NULL);
872   hash_table_put (transport_map, (void *)(intptr_t) fd, info);
873   ++transport_map_modified_tick;
874 }
875 
876 /* Return context of the transport registered with
877    fd_register_transport.  This assumes fd_register_transport was
878    previously called on FD.  */
879 
880 void *
fd_transport_context(int fd)881 fd_transport_context (int fd)
882 {
883   struct transport_info *info = hash_table_get (transport_map, (void *)(intptr_t) fd);
884   return info ? info->ctx : NULL;
885 }
886 
887 /* When fd_read/fd_write are called multiple times in a loop, they should
888    remember the INFO pointer instead of fetching it every time.  It is
889    not enough to compare FD to LAST_FD because FD might have been
890    closed and reopened.  modified_tick ensures that changes to
891    transport_map will not be unnoticed.
892 
893    This is a macro because we want the static storage variables to be
894    per-function.  */
895 
896 #define LAZY_RETRIEVE_INFO(info) do {                                   \
897   static struct transport_info *last_info;                              \
898   static int last_fd = -1;                                              \
899   static unsigned int last_tick;                                        \
900   if (!transport_map)                                                   \
901     info = NULL;                                                        \
902   else if (last_fd == fd && last_tick == transport_map_modified_tick)   \
903     info = last_info;                                                   \
904   else                                                                  \
905     {                                                                   \
906       info = hash_table_get (transport_map, (void *)(intptr_t) fd);     \
907       last_fd = fd;                                                     \
908       last_info = info;                                                 \
909       last_tick = transport_map_modified_tick;                          \
910     }                                                                   \
911 } while (0)
912 
913 static bool
poll_internal(int fd,struct transport_info * info,int wf,double timeout)914 poll_internal (int fd, struct transport_info *info, int wf, double timeout)
915 {
916   if (timeout == -1)
917     timeout = opt.read_timeout;
918   if (timeout)
919     {
920       int test;
921       if (info && info->imp->poller)
922         test = info->imp->poller (fd, timeout, wf, info->ctx);
923       else
924         test = sock_poll (fd, timeout, wf);
925       if (test == 0)
926         errno = ETIMEDOUT;
927       if (test <= 0)
928         return false;
929     }
930   return true;
931 }
932 
933 /* Read no more than BUFSIZE bytes of data from FD, storing them to
934    BUF.  If TIMEOUT is non-zero, the operation aborts if no data is
935    received after that many seconds.  If TIMEOUT is -1, the value of
936    opt.timeout is used for TIMEOUT.  */
937 
938 int
fd_read(int fd,char * buf,int bufsize,double timeout)939 fd_read (int fd, char *buf, int bufsize, double timeout)
940 {
941   struct transport_info *info;
942   LAZY_RETRIEVE_INFO (info);
943 
944   /* let imp->reader take care about timeout.
945      (or in worst case timeout can be 2*timeout) */
946   if (info && info->imp->reader)
947     return info->imp->reader (fd, buf, bufsize, info->ctx, timeout);
948 
949   if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
950     return -1;
951   return sock_read (fd, buf, bufsize);
952 }
953 
954 /* Like fd_read, except it provides a "preview" of the data that will
955    be read by subsequent calls to fd_read.  Specifically, it copies no
956    more than BUFSIZE bytes of the currently available data to BUF and
957    returns the number of bytes copied.  Return values and timeout
958    semantics are the same as those of fd_read.
959 
960    CAVEAT: Do not assume that the first subsequent call to fd_read
961    will retrieve the same amount of data.  Reading can return more or
962    less data, depending on the TCP implementation and other
963    circumstances.  However, barring an error, it can be expected that
964    all the peeked data will eventually be read by fd_read.  */
965 
966 int
fd_peek(int fd,char * buf,int bufsize,double timeout)967 fd_peek (int fd, char *buf, int bufsize, double timeout)
968 {
969   struct transport_info *info;
970   LAZY_RETRIEVE_INFO (info);
971 
972   if (info && info->imp->peeker)
973     return info->imp->peeker (fd, buf, bufsize, info->ctx, timeout);
974 
975   if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
976     return -1;
977   return sock_peek (fd, buf, bufsize);
978 }
979 
980 /* Write the entire contents of BUF to FD.  If TIMEOUT is non-zero,
981    the operation aborts if no data is received after that many
982    seconds.  If TIMEOUT is -1, the value of opt.timeout is used for
983    TIMEOUT.  */
984 
985 int
fd_write(int fd,char * buf,int bufsize,double timeout)986 fd_write (int fd, char *buf, int bufsize, double timeout)
987 {
988   int res;
989   struct transport_info *info;
990   LAZY_RETRIEVE_INFO (info);
991 
992   /* `write' may write less than LEN bytes, thus the loop keeps trying
993      it until all was written, or an error occurred.  */
994   res = 0;
995   while (bufsize > 0)
996     {
997       if (!poll_internal (fd, info, WAIT_FOR_WRITE, timeout))
998         return -1;
999       if (info && info->imp->writer)
1000         res = info->imp->writer (fd, buf, bufsize, info->ctx);
1001       else
1002         res = sock_write (fd, buf, bufsize);
1003       if (res <= 0)
1004         break;
1005       buf += res;
1006       bufsize -= res;
1007     }
1008   return res;
1009 }
1010 
1011 /* Report the most recent error(s) on FD.  This should only be called
1012    after fd_* functions, such as fd_read and fd_write, and only if
1013    they return a negative result.  For errors coming from other calls
1014    such as setsockopt or fopen, strerror should continue to be
1015    used.
1016 
1017    If the transport doesn't support error messages or doesn't supply
1018    one, strerror(errno) is returned.  The returned error message
1019    should not be used after fd_close has been called.  */
1020 
1021 const char *
fd_errstr(int fd)1022 fd_errstr (int fd)
1023 {
1024   /* Don't bother with LAZY_RETRIEVE_INFO, as this will only be called
1025      in case of error, never in a tight loop.  */
1026   struct transport_info *info = NULL;
1027 
1028   if (transport_map)
1029     info = hash_table_get (transport_map, (void *)(intptr_t) fd);
1030 
1031   if (info && info->imp->errstr)
1032     {
1033       const char *err = info->imp->errstr (fd, info->ctx);
1034       if (err)
1035         return err;
1036       /* else, fall through and print the system error. */
1037     }
1038   return strerror (errno);
1039 }
1040 
1041 /* Close the file descriptor FD.  */
1042 
1043 void
fd_close(int fd)1044 fd_close (int fd)
1045 {
1046   struct transport_info *info;
1047   if (fd < 0)
1048     return;
1049 
1050   /* Don't use LAZY_RETRIEVE_INFO because fd_close() is only called once
1051      per socket, so that particular optimization wouldn't work.  */
1052   info = NULL;
1053   if (transport_map)
1054     info = hash_table_get (transport_map, (void *)(intptr_t) fd);
1055 
1056   if (info && info->imp->closer)
1057     info->imp->closer (fd, info->ctx);
1058   else
1059     sock_close (fd);
1060 
1061   if (info)
1062     {
1063       hash_table_remove (transport_map, (void *)(intptr_t) fd);
1064       xfree (info);
1065       ++transport_map_modified_tick;
1066     }
1067 }
1068 
1069 #if defined DEBUG_MALLOC || defined TESTING
1070 void
connect_cleanup(void)1071 connect_cleanup(void)
1072 {
1073   if (transport_map)
1074     {
1075       hash_table_iterator iter;
1076       for (hash_table_iterate (transport_map, &iter); hash_table_iter_next (&iter); )
1077         {
1078           xfree (iter.value);
1079         }
1080       hash_table_destroy (transport_map);
1081       transport_map = NULL;
1082     }
1083 }
1084 #endif
1085