1 /*
2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 #include <assert.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <signal.h>
31 #include <pthread.h>
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/time.h>
35 #include <sys/resource.h>
36 #include <sys/uio.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <sys/poll.h>
40
41 /*
42 * Stack allocated by thread when doing blocking operation
43 */
44 typedef struct threadEntry {
45 pthread_t thr; /* this thread */
46 struct threadEntry *next; /* next thread */
47 int intr; /* interrupted */
48 } threadEntry_t;
49
50 /*
51 * Heap allocated during initialized - one entry per fd
52 */
53 typedef struct {
54 pthread_mutex_t lock; /* fd lock */
55 threadEntry_t *threads; /* threads blocked on fd */
56 } fdEntry_t;
57
58 /*
59 * Signal to unblock thread
60 */
61 static int sigWakeup = (__SIGRTMAX - 2);
62
63 /*
64 * fdTable holds one entry per file descriptor, up to a certain
65 * maximum.
66 * Theoretically, the number of possible file descriptors can get
67 * large, though usually it does not. Entries for small value file
68 * descriptors are kept in a simple table, which covers most scenarios.
69 * Entries for large value file descriptors are kept in an overflow
70 * table, which is organized as a sparse two dimensional array whose
71 * slabs are allocated on demand. This covers all corner cases while
72 * keeping memory consumption reasonable.
73 */
74
75 /* Base table for low value file descriptors */
76 static fdEntry_t* fdTable = NULL;
77 /* Maximum size of base table (in number of entries). */
78 static const int fdTableMaxSize = 0x1000; /* 4K */
79 /* Actual size of base table (in number of entries) */
80 static int fdTableLen = 0;
81 /* Max. theoretical number of file descriptors on system. */
82 static int fdLimit = 0;
83
84 /* Overflow table, should base table not be large enough. Organized as
85 * an array of n slabs, each holding 64k entries.
86 */
87 static fdEntry_t** fdOverflowTable = NULL;
88 /* Number of slabs in the overflow table */
89 static int fdOverflowTableLen = 0;
90 /* Number of entries in one slab */
91 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */
92 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;
93
94 /*
95 * Null signal handler
96 */
sig_wakeup(int sig)97 static void sig_wakeup(int sig) {
98 }
99
100 /*
101 * Initialization routine (executed when library is loaded)
102 * Allocate fd tables and sets up signal handler.
103 */
init()104 static void __attribute((constructor)) init() {
105 struct rlimit nbr_files;
106 sigset_t sigset;
107 struct sigaction sa;
108 int i = 0;
109
110 /* Determine the maximum number of possible file descriptors. */
111 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {
112 fprintf(stderr, "library initialization failed - "
113 "unable to get max # of allocated fds\n");
114 abort();
115 }
116 if (nbr_files.rlim_max != RLIM_INFINITY) {
117 fdLimit = nbr_files.rlim_max;
118 } else {
119 /* We just do not know. */
120 fdLimit = INT_MAX;
121 }
122
123 /* Allocate table for low value file descriptors. */
124 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;
125 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));
126 if (fdTable == NULL) {
127 fprintf(stderr, "library initialization failed - "
128 "unable to allocate file descriptor table - out of memory");
129 abort();
130 } else {
131 for (i = 0; i < fdTableLen; i ++) {
132 pthread_mutex_init(&fdTable[i].lock, NULL);
133 }
134 }
135
136 /* Allocate overflow table, if needed */
137 if (fdLimit > fdTableMaxSize) {
138 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;
139 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));
140 if (fdOverflowTable == NULL) {
141 fprintf(stderr, "library initialization failed - "
142 "unable to allocate file descriptor overflow table - out of memory");
143 abort();
144 }
145 }
146
147 /*
148 * Setup the signal handler
149 */
150 sa.sa_handler = sig_wakeup;
151 sa.sa_flags = 0;
152 sigemptyset(&sa.sa_mask);
153 sigaction(sigWakeup, &sa, NULL);
154
155 sigemptyset(&sigset);
156 sigaddset(&sigset, sigWakeup);
157 sigprocmask(SIG_UNBLOCK, &sigset, NULL);
158 }
159
160 /*
161 * Return the fd table for this fd.
162 */
getFdEntry(int fd)163 static inline fdEntry_t *getFdEntry(int fd)
164 {
165 fdEntry_t* result = NULL;
166
167 if (fd < 0) {
168 return NULL;
169 }
170
171 /* This should not happen. If it does, our assumption about
172 * max. fd value was wrong. */
173 assert(fd < fdLimit);
174
175 if (fd < fdTableMaxSize) {
176 /* fd is in base table. */
177 assert(fd < fdTableLen);
178 result = &fdTable[fd];
179 } else {
180 /* fd is in overflow table. */
181 const int indexInOverflowTable = fd - fdTableMaxSize;
182 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;
183 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;
184 fdEntry_t* slab = NULL;
185 assert(rootindex < fdOverflowTableLen);
186 assert(slabindex < fdOverflowTableSlabSize);
187 pthread_mutex_lock(&fdOverflowTableLock);
188 /* Allocate new slab in overflow table if needed */
189 if (fdOverflowTable[rootindex] == NULL) {
190 fdEntry_t* const newSlab =
191 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));
192 if (newSlab == NULL) {
193 fprintf(stderr, "Unable to allocate file descriptor overflow"
194 " table slab - out of memory");
195 pthread_mutex_unlock(&fdOverflowTableLock);
196 abort();
197 } else {
198 int i;
199 for (i = 0; i < fdOverflowTableSlabSize; i ++) {
200 pthread_mutex_init(&newSlab[i].lock, NULL);
201 }
202 fdOverflowTable[rootindex] = newSlab;
203 }
204 }
205 pthread_mutex_unlock(&fdOverflowTableLock);
206 slab = fdOverflowTable[rootindex];
207 result = &slab[slabindex];
208 }
209
210 return result;
211
212 }
213
214 /*
215 * Start a blocking operation :-
216 * Insert thread onto thread list for the fd.
217 */
startOp(fdEntry_t * fdEntry,threadEntry_t * self)218 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
219 {
220 self->thr = pthread_self();
221 self->intr = 0;
222
223 pthread_mutex_lock(&(fdEntry->lock));
224 {
225 self->next = fdEntry->threads;
226 fdEntry->threads = self;
227 }
228 pthread_mutex_unlock(&(fdEntry->lock));
229 }
230
231 /*
232 * End a blocking operation :-
233 * Remove thread from thread list for the fd
234 * If fd has been interrupted then set errno to EBADF
235 */
endOp(fdEntry_t * fdEntry,threadEntry_t * self)236 static inline void endOp
237 (fdEntry_t *fdEntry, threadEntry_t *self)
238 {
239 int orig_errno = errno;
240 pthread_mutex_lock(&(fdEntry->lock));
241 {
242 threadEntry_t *curr, *prev=NULL;
243 curr = fdEntry->threads;
244 while (curr != NULL) {
245 if (curr == self) {
246 if (curr->intr) {
247 orig_errno = EBADF;
248 }
249 if (prev == NULL) {
250 fdEntry->threads = curr->next;
251 } else {
252 prev->next = curr->next;
253 }
254 break;
255 }
256 prev = curr;
257 curr = curr->next;
258 }
259 }
260 pthread_mutex_unlock(&(fdEntry->lock));
261 errno = orig_errno;
262 }
263
264 /*
265 * Close or dup2 a file descriptor ensuring that all threads blocked on
266 * the file descriptor are notified via a wakeup signal.
267 *
268 * fd1 < 0 => close(fd2)
269 * fd1 >= 0 => dup2(fd1, fd2)
270 *
271 * Returns -1 with errno set if operation fails.
272 */
closefd(int fd1,int fd2)273 static int closefd(int fd1, int fd2) {
274 int rv, orig_errno;
275 fdEntry_t *fdEntry = getFdEntry(fd2);
276 if (fdEntry == NULL) {
277 errno = EBADF;
278 return -1;
279 }
280
281 /*
282 * Lock the fd to hold-off additional I/O on this fd.
283 */
284 pthread_mutex_lock(&(fdEntry->lock));
285
286 {
287 /*
288 * And close/dup the file descriptor
289 * (restart if interrupted by signal)
290 */
291 do {
292 if (fd1 < 0) {
293 rv = close(fd2);
294 } else {
295 rv = dup2(fd1, fd2);
296 }
297 } while (rv == -1 && errno == EINTR);
298
299 /*
300 * Send a wakeup signal to all threads blocked on this
301 * file descriptor.
302 */
303 threadEntry_t *curr = fdEntry->threads;
304 while (curr != NULL) {
305 curr->intr = 1;
306 pthread_kill( curr->thr, sigWakeup );
307 curr = curr->next;
308 }
309 }
310
311 /*
312 * Unlock without destroying errno
313 */
314 orig_errno = errno;
315 pthread_mutex_unlock(&(fdEntry->lock));
316 errno = orig_errno;
317
318 return rv;
319 }
320
321 /*
322 * Wrapper for dup2 - same semantics as dup2 system call except
323 * that any threads blocked in an I/O system call on fd2 will be
324 * preempted and return -1/EBADF;
325 */
NET_Dup2(int fd,int fd2)326 int NET_Dup2(int fd, int fd2) {
327 if (fd < 0) {
328 errno = EBADF;
329 return -1;
330 }
331 return closefd(fd, fd2);
332 }
333
334 /*
335 * Wrapper for close - same semantics as close system call
336 * except that any threads blocked in an I/O on fd will be
337 * preempted and the I/O system call will return -1/EBADF.
338 */
NET_SocketClose(int fd)339 int NET_SocketClose(int fd) {
340 return closefd(-1, fd);
341 }
342
343 /************** Basic I/O operations here ***************/
344
345 /*
346 * Macro to perform a blocking IO operation. Restarts
347 * automatically if interrupted by signal (other than
348 * our wakeup signal)
349 */
350 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \
351 int ret; \
352 threadEntry_t self; \
353 fdEntry_t *fdEntry = getFdEntry(FD); \
354 if (fdEntry == NULL) { \
355 errno = EBADF; \
356 return -1; \
357 } \
358 do { \
359 startOp(fdEntry, &self); \
360 ret = FUNC; \
361 endOp(fdEntry, &self); \
362 } while (ret == -1 && errno == EINTR); \
363 return ret; \
364 }
365
NET_Read(int s,void * buf,size_t len)366 int NET_Read(int s, void* buf, size_t len) {
367 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
368 }
369
NET_NonBlockingRead(int s,void * buf,size_t len)370 int NET_NonBlockingRead(int s, void* buf, size_t len) {
371 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) );
372 }
373
NET_ReadV(int s,const struct iovec * vector,int count)374 int NET_ReadV(int s, const struct iovec * vector, int count) {
375 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
376 }
377
NET_RecvFrom(int s,void * buf,int len,unsigned int flags,struct sockaddr * from,int * fromlen)378 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
379 struct sockaddr *from, int *fromlen) {
380 socklen_t socklen = *fromlen;
381 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, &socklen) );
382 *fromlen = socklen;
383 }
384
NET_Send(int s,void * msg,int len,unsigned int flags)385 int NET_Send(int s, void *msg, int len, unsigned int flags) {
386 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
387 }
388
NET_WriteV(int s,const struct iovec * vector,int count)389 int NET_WriteV(int s, const struct iovec * vector, int count) {
390 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
391 }
392
NET_SendTo(int s,const void * msg,int len,unsigned int flags,const struct sockaddr * to,int tolen)393 int NET_SendTo(int s, const void *msg, int len, unsigned int
394 flags, const struct sockaddr *to, int tolen) {
395 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
396 }
397
NET_Accept(int s,struct sockaddr * addr,int * addrlen)398 int NET_Accept(int s, struct sockaddr *addr, int *addrlen) {
399 socklen_t socklen = *addrlen;
400 BLOCKING_IO_RETURN_INT( s, accept(s, addr, &socklen) );
401 *addrlen = socklen;
402 }
403
NET_Connect(int s,struct sockaddr * addr,int addrlen)404 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
405 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
406 }
407
408 #ifndef USE_SELECT
NET_Poll(struct pollfd * ufds,unsigned int nfds,int timeout)409 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
410 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
411 }
412 #else
NET_Select(int s,fd_set * readfds,fd_set * writefds,fd_set * exceptfds,struct timeval * timeout)413 int NET_Select(int s, fd_set *readfds, fd_set *writefds,
414 fd_set *exceptfds, struct timeval *timeout) {
415 BLOCKING_IO_RETURN_INT( s-1,
416 select(s, readfds, writefds, exceptfds, timeout) );
417 }
418 #endif
419
420 /*
421 * Wrapper for poll(s, timeout).
422 * Auto restarts with adjusted timeout if interrupted by
423 * signal other than our wakeup signal.
424 */
NET_Timeout0(int s,long timeout,long currentTime)425 int NET_Timeout0(int s, long timeout, long currentTime) {
426 long prevtime = currentTime, newtime;
427 struct timeval t;
428 fdEntry_t *fdEntry = getFdEntry(s);
429
430 /*
431 * Check that fd hasn't been closed.
432 */
433 if (fdEntry == NULL) {
434 errno = EBADF;
435 return -1;
436 }
437
438 for(;;) {
439 struct pollfd pfd;
440 int rv;
441 threadEntry_t self;
442
443 /*
444 * Poll the fd. If interrupted by our wakeup signal
445 * errno will be set to EBADF.
446 */
447 pfd.fd = s;
448 pfd.events = POLLIN | POLLERR;
449
450 startOp(fdEntry, &self);
451 rv = poll(&pfd, 1, timeout);
452 endOp(fdEntry, &self);
453
454 /*
455 * If interrupted then adjust timeout. If timeout
456 * has expired return 0 (indicating timeout expired).
457 */
458 if (rv < 0 && errno == EINTR) {
459 if (timeout > 0) {
460 gettimeofday(&t, NULL);
461 newtime = t.tv_sec * 1000 + t.tv_usec / 1000;
462 timeout -= newtime - prevtime;
463 if (timeout <= 0) {
464 return 0;
465 }
466 prevtime = newtime;
467 }
468 } else {
469 return rv;
470 }
471
472 }
473 }
474