1 /* nbdkit
2  * Copyright (C) 2013-2020 Red Hat Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  *
15  * * Neither the name of Red Hat nor the names of its contributors may be
16  * used to endorse or promote products derived from this software without
17  * specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <config.h>
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdbool.h>
38 #include <inttypes.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <poll.h>
42 #include <errno.h>
43 #include <assert.h>
44 #include <sys/types.h>
45 #include <sys/socket.h>
46 #include <sys/un.h>
47 #include <netinet/in.h>
48 #include <netinet/tcp.h>
49 #include <netdb.h>
50 
51 #ifdef HAVE_LINUX_VM_SOCKETS_H
52 #include <linux/vm_sockets.h>
53 #endif
54 
55 #ifdef HAVE_LIBSELINUX
56 #include <selinux/selinux.h>
57 #endif
58 
59 #include <pthread.h>
60 
61 #include "internal.h"
62 #include "utils.h"
63 #include "vector.h"
64 
65 static void
set_selinux_label(void)66 set_selinux_label (void)
67 {
68   if (selinux_label) {
69 #ifdef HAVE_LIBSELINUX
70     if (setsockcreatecon_raw (selinux_label) == -1) {
71       perror ("selinux-label: setsockcreatecon_raw");
72       exit (EXIT_FAILURE);
73     }
74 #else
75     fprintf (stderr,
76              "%s: --selinux-label option used, but "
77              "this binary was compiled without SELinux support\n",
78              program_name);
79     exit (EXIT_FAILURE);
80 #endif
81   }
82 }
83 
84 static void
clear_selinux_label(void)85 clear_selinux_label (void)
86 {
87 #ifdef HAVE_LIBSELINUX
88   if (selinux_label) {
89     if (setsockcreatecon_raw (NULL) == -1) {
90       perror ("selinux-label: setsockcreatecon_raw(NULL)");
91       exit (EXIT_FAILURE);
92     }
93   }
94 #endif
95 }
96 
97 void
bind_unix_socket(sockets * socks)98 bind_unix_socket (sockets *socks)
99 {
100   size_t len;
101   int sock;
102   struct sockaddr_un addr;
103 
104   assert (unixsocket);
105   assert (unixsocket[0] == '/');
106 
107   len = strlen (unixsocket);
108   if (len >= UNIX_PATH_MAX) {
109     fprintf (stderr, "%s: -U: path too long: length %zu > max %d bytes\n",
110              program_name, len, UNIX_PATH_MAX-1);
111     exit (EXIT_FAILURE);
112   }
113 
114   set_selinux_label ();
115 
116 #ifdef SOCK_CLOEXEC
117   sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
118 #else
119   /* Fortunately, this code is only run at startup, so there is no
120    * risk of the fd leaking to a plugin's fork()
121    */
122   sock = set_cloexec (socket (AF_UNIX, SOCK_STREAM, 0));
123 #endif
124   if (sock == -1) {
125     perror ("bind_unix_socket: socket");
126     exit (EXIT_FAILURE);
127   }
128 
129   addr.sun_family = AF_UNIX;
130   memcpy (addr.sun_path, unixsocket, len+1 /* trailing \0 */);
131 
132   if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
133     perror (unixsocket);
134     exit (EXIT_FAILURE);
135   }
136 
137   if (listen (sock, SOMAXCONN) == -1) {
138     perror ("listen");
139     exit (EXIT_FAILURE);
140   }
141 
142   clear_selinux_label ();
143 
144   if (sockets_append (socks, sock) == -1) {
145     perror ("realloc");
146     exit (EXIT_FAILURE);
147   }
148 
149   debug ("bound to unix socket %s", unixsocket);
150 }
151 
152 void
bind_tcpip_socket(sockets * socks)153 bind_tcpip_socket (sockets *socks)
154 {
155   struct addrinfo *ai = NULL;
156   struct addrinfo hints;
157   struct addrinfo *a;
158   int err, opt;
159   bool addr_in_use = false;
160 
161   if (port == NULL)
162     port = "10809";
163 
164   memset (&hints, 0, sizeof hints);
165   hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
166   hints.ai_socktype = SOCK_STREAM;
167 
168   err = getaddrinfo (ipaddr, port, &hints, &ai);
169   if (err != 0) {
170     fprintf (stderr, "%s: getaddrinfo: %s: %s: %s",
171              program_name,
172              ipaddr ? ipaddr : "<any>",
173              port,
174              gai_strerror (err));
175     exit (EXIT_FAILURE);
176   }
177 
178   for (a = ai; a != NULL; a = a->ai_next) {
179     int sock;
180 
181     set_selinux_label ();
182 
183 #ifdef SOCK_CLOEXEC
184     sock = socket (a->ai_family, a->ai_socktype | SOCK_CLOEXEC, a->ai_protocol);
185 #else
186     /* Fortunately, this code is only run at startup, so there is no
187      * risk of the fd leaking to a plugin's fork()
188      */
189     sock = set_cloexec (socket (a->ai_family, a->ai_socktype, a->ai_protocol));
190 #endif
191     if (sock == -1) {
192       perror ("bind_tcpip_socket: socket");
193       exit (EXIT_FAILURE);
194     }
195 
196     opt = 1;
197     if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof opt) == -1)
198       perror ("setsockopt: SO_REUSEADDR");
199 
200 #ifdef IPV6_V6ONLY
201     if (a->ai_family == PF_INET6) {
202       if (setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof opt) == -1)
203         perror ("setsockopt: IPv6 only");
204     }
205 #endif
206 
207     if (bind (sock, a->ai_addr, a->ai_addrlen) == -1) {
208       if (errno == EADDRINUSE) {
209         addr_in_use = true;
210         close (sock);
211         continue;
212       }
213       perror ("bind");
214       exit (EXIT_FAILURE);
215     }
216 
217     if (listen (sock, SOMAXCONN) == -1) {
218       perror ("listen");
219       exit (EXIT_FAILURE);
220     }
221 
222     clear_selinux_label ();
223 
224     if (sockets_append (socks, sock) == -1) {
225       perror ("realloc");
226       exit (EXIT_FAILURE);
227     }
228   }
229 
230   freeaddrinfo (ai);
231 
232   if (socks->size == 0 && addr_in_use) {
233     fprintf (stderr, "%s: unable to bind to any sockets: %s\n",
234              program_name, strerror (EADDRINUSE));
235     exit (EXIT_FAILURE);
236   }
237 
238   debug ("bound to IP address %s:%s (%zu socket(s))",
239          ipaddr ? ipaddr : "<any>", port, socks->size);
240 }
241 
242 void
bind_vsock(sockets * socks)243 bind_vsock (sockets *socks)
244 {
245 #ifdef AF_VSOCK
246   uint32_t vsock_port;
247   int sock;
248   struct sockaddr_vm addr;
249 
250   if (port == NULL)
251     vsock_port = 10809;
252   else {
253     /* --port parameter must be numeric for vsock, unless
254      * /etc/services is extended but that seems unlikely. XXX
255      */
256     if (nbdkit_parse_uint32_t ("port", port, &vsock_port) == -1)
257       exit (EXIT_FAILURE);
258   }
259 
260   /* Any platform with AF_VSOCK also supports SOCK_CLOEXEC so there is
261    * no fallback path.
262    */
263   sock = socket (AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
264   if (sock == -1) {
265     perror ("bind_vsock: socket");
266     exit (EXIT_FAILURE);
267   }
268 
269   memset (&addr, 0, sizeof addr);
270   addr.svm_family = AF_VSOCK;
271   addr.svm_cid = VMADDR_CID_ANY;
272   addr.svm_port = vsock_port;
273 
274   if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
275     perror (unixsocket);
276     exit (EXIT_FAILURE);
277   }
278 
279   if (listen (sock, SOMAXCONN) == -1) {
280     perror ("listen");
281     exit (EXIT_FAILURE);
282   }
283 
284   if (sockets_append (socks, sock) == -1) {
285     perror ("realloc");
286     exit (EXIT_FAILURE);
287   }
288 
289   /* It's not easy to get the actual CID here.
290    * IOCTL_VM_SOCKETS_GET_LOCAL_CID is documented, but requires
291    * opening /dev/vsock which is not accessible to non-root users.
292    * bind above doesn't update the sockaddr.  Using getsockname
293    * doesn't work.
294    */
295   debug ("bound to vsock any:%" PRIu32, addr.svm_port);
296 
297 #else
298   /* Can't happen because main() checks if AF_VSOCK is defined and
299    * prevents vsock from being set, so this function can never be
300    * called.
301    */
302   abort ();
303 #endif
304 }
305 
306 /* This counts the number of connection threads running (note: not the
307  * number of worker threads, each connection thread will start many
308  * worker independent threads in the current implementation).  The
309  * purpose of this is so we can wait for all the connection threads to
310  * exit before we return from accept_incoming_connections, so that
311  * unload-time actions happen with no connections open.
312  */
313 static pthread_mutex_t count_mutex = PTHREAD_MUTEX_INITIALIZER;
314 static pthread_cond_t count_cond = PTHREAD_COND_INITIALIZER;
315 static unsigned count = 0;
316 
317 struct thread_data {
318   int sock;
319   size_t instance_num;
320 };
321 
322 static void *
start_thread(void * datav)323 start_thread (void *datav)
324 {
325   struct thread_data *data = datav;
326 
327   debug ("accepted connection");
328 
329   pthread_mutex_lock (&count_mutex);
330   count++;
331   pthread_mutex_unlock (&count_mutex);
332 
333   /* Set thread-local data. */
334   threadlocal_new_server_thread ();
335   threadlocal_set_instance_num (data->instance_num);
336 
337   handle_single_connection (data->sock, data->sock);
338 
339   free (data);
340 
341   pthread_mutex_lock (&count_mutex);
342   count--;
343   pthread_cond_signal (&count_cond);
344   pthread_mutex_unlock (&count_mutex);
345 
346   return NULL;
347 }
348 
349 static void
accept_connection(int listen_sock)350 accept_connection (int listen_sock)
351 {
352   int err;
353   pthread_attr_t attrs;
354   pthread_t thread;
355   struct thread_data *thread_data;
356   static size_t instance_num = 1;
357   const int flag = 1;
358 
359   thread_data = malloc (sizeof *thread_data);
360   if (unlikely (!thread_data)) {
361     perror ("malloc");
362     return;
363   }
364 
365   thread_data->instance_num = instance_num++;
366  again:
367 #ifdef HAVE_ACCEPT4
368   thread_data->sock = accept4 (listen_sock, NULL, NULL, SOCK_CLOEXEC);
369 #else
370   /* If we were fully parallel, then this function could be accepting
371    * connections in one thread while another thread could be in a
372    * plugin trying to fork.  But plugins.c forced thread_model to
373    * serialize_all_requests when it detects a lack of atomic CLOEXEC,
374    * at which point, we can use a mutex to ensure we aren't accepting
375    * until the plugin is not running, making non-atomicity okay.
376    */
377   assert (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS);
378   lock_request ();
379   thread_data->sock = set_cloexec (accept (listen_sock, NULL, NULL));
380   unlock_request ();
381 #endif
382   if (thread_data->sock == -1) {
383     if (errno == EINTR || errno == EAGAIN)
384       goto again;
385     perror ("accept");
386     free (thread_data);
387     return;
388   }
389 
390   /* Disable Nagle's algorithm on this socket.  However we don't want
391    * to fail if this doesn't work.
392    */
393   setsockopt (thread_data->sock, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof flag);
394 
395   /* Start a thread to handle this connection.  Note we always do this
396    * even for non-threaded plugins.  There are mutexes in plugins.c
397    * which ensure that non-threaded plugins are handled correctly.
398    */
399   pthread_attr_init (&attrs);
400   pthread_attr_setdetachstate (&attrs, PTHREAD_CREATE_DETACHED);
401   err = pthread_create (&thread, &attrs, start_thread, thread_data);
402   pthread_attr_destroy (&attrs);
403   if (unlikely (err != 0)) {
404     fprintf (stderr, "%s: pthread_create: %s\n", program_name, strerror (err));
405     close (thread_data->sock);
406     free (thread_data);
407     return;
408   }
409 
410   /* If the thread starts successfully, then it is responsible for
411    * closing the socket and freeing thread_data.
412    */
413 }
414 
415 /* Check the list of sockets plus quit_fd until a POLLIN event occurs
416  * on any of them.
417  *
418  * If POLLIN occurs on quit_fd do nothing except returning early
419  * (don't call accept_connection in this case).
420  *
421  * If POLLIN occurs on one of the sockets, call
422  * accept_connection (socks.ptr[i]) on each of them.
423  */
424 static void
check_sockets_and_quit_fd(const sockets * socks)425 check_sockets_and_quit_fd (const sockets *socks)
426 {
427   const size_t nr_socks = socks->size;
428   size_t i;
429   int r;
430 
431   CLEANUP_FREE struct pollfd *fds =
432     malloc (sizeof (struct pollfd) * (nr_socks+1));
433   if (fds == NULL) {
434     perror ("malloc");
435     exit (EXIT_FAILURE);
436   }
437 
438   for (i = 0; i < nr_socks; ++i) {
439     fds[i].fd = socks->ptr[i];
440     fds[i].events = POLLIN;
441     fds[i].revents = 0;
442   }
443   fds[nr_socks].fd = quit_fd;
444   fds[nr_socks].events = POLLIN;
445   fds[nr_socks].revents = 0;
446 
447   r = poll (fds, nr_socks + 1, -1);
448   if (r == -1) {
449     if (errno == EINTR || errno == EAGAIN)
450       return;
451     perror ("poll");
452     exit (EXIT_FAILURE);
453   }
454 
455   /* We don't even have to read quit_fd - just knowing that it has
456    * data means the signal handler ran, so we are ready to quit the
457    * loop.
458    */
459   if (fds[nr_socks].revents & POLLIN)
460     return;
461 
462   for (i = 0; i < nr_socks; ++i) {
463     if (fds[i].revents & POLLIN)
464       accept_connection (socks->ptr[i]);
465   }
466 }
467 
468 void
accept_incoming_connections(const sockets * socks)469 accept_incoming_connections (const sockets *socks)
470 {
471   size_t i;
472   int err;
473 
474   while (!quit)
475     check_sockets_and_quit_fd (socks);
476 
477   /* Wait for all threads to exit. */
478   pthread_mutex_lock (&count_mutex);
479   for (;;) {
480     if (count == 0)
481       break;
482     err = pthread_cond_wait (&count_cond, &count_mutex);
483     if (err != 0) {
484       errno = err;
485       perror ("pthread_cond_wait");
486     }
487   }
488   pthread_mutex_unlock (&count_mutex);
489 
490   for (i = 0; i < socks->size; ++i)
491     close (socks->ptr[i]);
492   free (socks->ptr);
493 }
494