1 /*
2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 #include <assert.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <signal.h>
31 #include <pthread.h>
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/time.h>
35 #include <sys/resource.h>
36 #include <sys/uio.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <sys/poll.h>
40 
41 /*
42  * Stack allocated by thread when doing blocking operation
43  */
44 typedef struct threadEntry {
45     pthread_t thr;                      /* this thread */
46     struct threadEntry *next;           /* next thread */
47     int intr;                           /* interrupted */
48 } threadEntry_t;
49 
50 /*
51  * Heap allocated during initialized - one entry per fd
52  */
53 typedef struct {
54     pthread_mutex_t lock;               /* fd lock */
55     threadEntry_t *threads;             /* threads blocked on fd */
56 } fdEntry_t;
57 
58 /*
59  * Signal to unblock thread
60  */
61 static int sigWakeup = (__SIGRTMAX - 2);
62 
63 /*
64  * fdTable holds one entry per file descriptor, up to a certain
65  * maximum.
66  * Theoretically, the number of possible file descriptors can get
67  * large, though usually it does not. Entries for small value file
68  * descriptors are kept in a simple table, which covers most scenarios.
69  * Entries for large value file descriptors are kept in an overflow
70  * table, which is organized as a sparse two dimensional array whose
71  * slabs are allocated on demand. This covers all corner cases while
72  * keeping memory consumption reasonable.
73  */
74 
75 /* Base table for low value file descriptors */
76 static fdEntry_t* fdTable = NULL;
77 /* Maximum size of base table (in number of entries). */
78 static const int fdTableMaxSize = 0x1000; /* 4K */
79 /* Actual size of base table (in number of entries) */
80 static int fdTableLen = 0;
81 /* Max. theoretical number of file descriptors on system. */
82 static int fdLimit = 0;
83 
84 /* Overflow table, should base table not be large enough. Organized as
85  *   an array of n slabs, each holding 64k entries.
86  */
87 static fdEntry_t** fdOverflowTable = NULL;
88 /* Number of slabs in the overflow table */
89 static int fdOverflowTableLen = 0;
90 /* Number of entries in one slab */
91 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */
92 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;
93 
94 /*
95  * Null signal handler
96  */
sig_wakeup(int sig)97 static void sig_wakeup(int sig) {
98 }
99 
100 /*
101  * Initialization routine (executed when library is loaded)
102  * Allocate fd tables and sets up signal handler.
103  */
init()104 static void __attribute((constructor)) init() {
105     struct rlimit nbr_files;
106     sigset_t sigset;
107     struct sigaction sa;
108     int i = 0;
109 
110     /* Determine the maximum number of possible file descriptors. */
111     if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {
112         fprintf(stderr, "library initialization failed - "
113                 "unable to get max # of allocated fds\n");
114         abort();
115     }
116     if (nbr_files.rlim_max != RLIM_INFINITY) {
117         fdLimit = nbr_files.rlim_max;
118     } else {
119         /* We just do not know. */
120         fdLimit = INT_MAX;
121     }
122 
123     /* Allocate table for low value file descriptors. */
124     fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;
125     fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));
126     if (fdTable == NULL) {
127         fprintf(stderr, "library initialization failed - "
128                 "unable to allocate file descriptor table - out of memory");
129         abort();
130     } else {
131         for (i = 0; i < fdTableLen; i ++) {
132             pthread_mutex_init(&fdTable[i].lock, NULL);
133         }
134     }
135 
136     /* Allocate overflow table, if needed */
137     if (fdLimit > fdTableMaxSize) {
138         fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;
139         fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));
140         if (fdOverflowTable == NULL) {
141             fprintf(stderr, "library initialization failed - "
142                     "unable to allocate file descriptor overflow table - out of memory");
143             abort();
144         }
145     }
146 
147     /*
148      * Setup the signal handler
149      */
150     sa.sa_handler = sig_wakeup;
151     sa.sa_flags   = 0;
152     sigemptyset(&sa.sa_mask);
153     sigaction(sigWakeup, &sa, NULL);
154 
155     sigemptyset(&sigset);
156     sigaddset(&sigset, sigWakeup);
157     sigprocmask(SIG_UNBLOCK, &sigset, NULL);
158 }
159 
160 /*
161  * Return the fd table for this fd.
162  */
getFdEntry(int fd)163 static inline fdEntry_t *getFdEntry(int fd)
164 {
165     fdEntry_t* result = NULL;
166 
167     if (fd < 0) {
168         return NULL;
169     }
170 
171     /* This should not happen. If it does, our assumption about
172      * max. fd value was wrong. */
173     assert(fd < fdLimit);
174 
175     if (fd < fdTableMaxSize) {
176         /* fd is in base table. */
177         assert(fd < fdTableLen);
178         result = &fdTable[fd];
179     } else {
180         /* fd is in overflow table. */
181         const int indexInOverflowTable = fd - fdTableMaxSize;
182         const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;
183         const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;
184         fdEntry_t* slab = NULL;
185         assert(rootindex < fdOverflowTableLen);
186         assert(slabindex < fdOverflowTableSlabSize);
187         pthread_mutex_lock(&fdOverflowTableLock);
188         /* Allocate new slab in overflow table if needed */
189         if (fdOverflowTable[rootindex] == NULL) {
190             fdEntry_t* const newSlab =
191                 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));
192             if (newSlab == NULL) {
193                 fprintf(stderr, "Unable to allocate file descriptor overflow"
194                         " table slab - out of memory");
195                 pthread_mutex_unlock(&fdOverflowTableLock);
196                 abort();
197             } else {
198                 int i;
199                 for (i = 0; i < fdOverflowTableSlabSize; i ++) {
200                     pthread_mutex_init(&newSlab[i].lock, NULL);
201                 }
202                 fdOverflowTable[rootindex] = newSlab;
203             }
204         }
205         pthread_mutex_unlock(&fdOverflowTableLock);
206         slab = fdOverflowTable[rootindex];
207         result = &slab[slabindex];
208     }
209 
210     return result;
211 
212 }
213 
214 /*
215  * Start a blocking operation :-
216  *    Insert thread onto thread list for the fd.
217  */
startOp(fdEntry_t * fdEntry,threadEntry_t * self)218 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
219 {
220     self->thr = pthread_self();
221     self->intr = 0;
222 
223     pthread_mutex_lock(&(fdEntry->lock));
224     {
225         self->next = fdEntry->threads;
226         fdEntry->threads = self;
227     }
228     pthread_mutex_unlock(&(fdEntry->lock));
229 }
230 
231 /*
232  * End a blocking operation :-
233  *     Remove thread from thread list for the fd
234  *     If fd has been interrupted then set errno to EBADF
235  */
endOp(fdEntry_t * fdEntry,threadEntry_t * self)236 static inline void endOp
237     (fdEntry_t *fdEntry, threadEntry_t *self)
238 {
239     int orig_errno = errno;
240     pthread_mutex_lock(&(fdEntry->lock));
241     {
242         threadEntry_t *curr, *prev=NULL;
243         curr = fdEntry->threads;
244         while (curr != NULL) {
245             if (curr == self) {
246                 if (curr->intr) {
247                     orig_errno = EBADF;
248                 }
249                 if (prev == NULL) {
250                     fdEntry->threads = curr->next;
251                 } else {
252                     prev->next = curr->next;
253                 }
254                 break;
255             }
256             prev = curr;
257             curr = curr->next;
258         }
259     }
260     pthread_mutex_unlock(&(fdEntry->lock));
261     errno = orig_errno;
262 }
263 
264 /*
265  * Close or dup2 a file descriptor ensuring that all threads blocked on
266  * the file descriptor are notified via a wakeup signal.
267  *
268  *      fd1 < 0    => close(fd2)
269  *      fd1 >= 0   => dup2(fd1, fd2)
270  *
271  * Returns -1 with errno set if operation fails.
272  */
closefd(int fd1,int fd2)273 static int closefd(int fd1, int fd2) {
274     int rv, orig_errno;
275     fdEntry_t *fdEntry = getFdEntry(fd2);
276     if (fdEntry == NULL) {
277         errno = EBADF;
278         return -1;
279     }
280 
281     /*
282      * Lock the fd to hold-off additional I/O on this fd.
283      */
284     pthread_mutex_lock(&(fdEntry->lock));
285 
286     {
287         /*
288          * And close/dup the file descriptor
289          * (restart if interrupted by signal)
290          */
291         do {
292             if (fd1 < 0) {
293                 rv = close(fd2);
294             } else {
295                 rv = dup2(fd1, fd2);
296             }
297         } while (rv == -1 && errno == EINTR);
298 
299         /*
300          * Send a wakeup signal to all threads blocked on this
301          * file descriptor.
302          */
303         threadEntry_t *curr = fdEntry->threads;
304         while (curr != NULL) {
305             curr->intr = 1;
306             pthread_kill( curr->thr, sigWakeup );
307             curr = curr->next;
308         }
309     }
310 
311     /*
312      * Unlock without destroying errno
313      */
314     orig_errno = errno;
315     pthread_mutex_unlock(&(fdEntry->lock));
316     errno = orig_errno;
317 
318     return rv;
319 }
320 
321 /*
322  * Wrapper for dup2 - same semantics as dup2 system call except
323  * that any threads blocked in an I/O system call on fd2 will be
324  * preempted and return -1/EBADF;
325  */
NET_Dup2(int fd,int fd2)326 int NET_Dup2(int fd, int fd2) {
327     if (fd < 0) {
328         errno = EBADF;
329         return -1;
330     }
331     return closefd(fd, fd2);
332 }
333 
334 /*
335  * Wrapper for close - same semantics as close system call
336  * except that any threads blocked in an I/O on fd will be
337  * preempted and the I/O system call will return -1/EBADF.
338  */
NET_SocketClose(int fd)339 int NET_SocketClose(int fd) {
340     return closefd(-1, fd);
341 }
342 
343 /************** Basic I/O operations here ***************/
344 
345 /*
346  * Macro to perform a blocking IO operation. Restarts
347  * automatically if interrupted by signal (other than
348  * our wakeup signal)
349  */
350 #define BLOCKING_IO_RETURN_INT(FD, FUNC) {      \
351     int ret;                                    \
352     threadEntry_t self;                         \
353     fdEntry_t *fdEntry = getFdEntry(FD);        \
354     if (fdEntry == NULL) {                      \
355         errno = EBADF;                          \
356         return -1;                              \
357     }                                           \
358     do {                                        \
359         startOp(fdEntry, &self);                \
360         ret = FUNC;                             \
361         endOp(fdEntry, &self);                  \
362     } while (ret == -1 && errno == EINTR);      \
363     return ret;                                 \
364 }
365 
NET_Read(int s,void * buf,size_t len)366 int NET_Read(int s, void* buf, size_t len) {
367     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
368 }
369 
NET_NonBlockingRead(int s,void * buf,size_t len)370 int NET_NonBlockingRead(int s, void* buf, size_t len) {
371     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) );
372 }
373 
NET_ReadV(int s,const struct iovec * vector,int count)374 int NET_ReadV(int s, const struct iovec * vector, int count) {
375     BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
376 }
377 
NET_RecvFrom(int s,void * buf,int len,unsigned int flags,struct sockaddr * from,int * fromlen)378 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
379        struct sockaddr *from, int *fromlen) {
380     socklen_t socklen = *fromlen;
381     BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, &socklen) );
382     *fromlen = socklen;
383 }
384 
NET_Send(int s,void * msg,int len,unsigned int flags)385 int NET_Send(int s, void *msg, int len, unsigned int flags) {
386     BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
387 }
388 
NET_WriteV(int s,const struct iovec * vector,int count)389 int NET_WriteV(int s, const struct iovec * vector, int count) {
390     BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
391 }
392 
NET_SendTo(int s,const void * msg,int len,unsigned int flags,const struct sockaddr * to,int tolen)393 int NET_SendTo(int s, const void *msg, int len,  unsigned  int
394        flags, const struct sockaddr *to, int tolen) {
395     BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
396 }
397 
NET_Accept(int s,struct sockaddr * addr,int * addrlen)398 int NET_Accept(int s, struct sockaddr *addr, int *addrlen) {
399     socklen_t socklen = *addrlen;
400     BLOCKING_IO_RETURN_INT( s, accept(s, addr, &socklen) );
401     *addrlen = socklen;
402 }
403 
NET_Connect(int s,struct sockaddr * addr,int addrlen)404 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
405     BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
406 }
407 
408 #ifndef USE_SELECT
NET_Poll(struct pollfd * ufds,unsigned int nfds,int timeout)409 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
410     BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
411 }
412 #else
NET_Select(int s,fd_set * readfds,fd_set * writefds,fd_set * exceptfds,struct timeval * timeout)413 int NET_Select(int s, fd_set *readfds, fd_set *writefds,
414                fd_set *exceptfds, struct timeval *timeout) {
415     BLOCKING_IO_RETURN_INT( s-1,
416                             select(s, readfds, writefds, exceptfds, timeout) );
417 }
418 #endif
419 
420 /*
421  * Wrapper for poll(s, timeout).
422  * Auto restarts with adjusted timeout if interrupted by
423  * signal other than our wakeup signal.
424  */
NET_Timeout0(int s,long timeout,long currentTime)425 int NET_Timeout0(int s, long timeout, long currentTime) {
426     long prevtime = currentTime, newtime;
427     struct timeval t;
428     fdEntry_t *fdEntry = getFdEntry(s);
429 
430     /*
431      * Check that fd hasn't been closed.
432      */
433     if (fdEntry == NULL) {
434         errno = EBADF;
435         return -1;
436     }
437 
438     for(;;) {
439         struct pollfd pfd;
440         int rv;
441         threadEntry_t self;
442 
443         /*
444          * Poll the fd. If interrupted by our wakeup signal
445          * errno will be set to EBADF.
446          */
447         pfd.fd = s;
448         pfd.events = POLLIN | POLLERR;
449 
450         startOp(fdEntry, &self);
451         rv = poll(&pfd, 1, timeout);
452         endOp(fdEntry, &self);
453 
454         /*
455          * If interrupted then adjust timeout. If timeout
456          * has expired return 0 (indicating timeout expired).
457          */
458         if (rv < 0 && errno == EINTR) {
459             if (timeout > 0) {
460                 gettimeofday(&t, NULL);
461                 newtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
462                 timeout -= newtime - prevtime;
463                 if (timeout <= 0) {
464                     return 0;
465                 }
466                 prevtime = newtime;
467             }
468         } else {
469             return rv;
470         }
471 
472     }
473 }
474