1 /*
2  * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.nio.ch;
27 
28 import java.io.IOException;
29 import java.security.AccessController;
30 import java.util.BitSet;
31 import java.util.HashMap;
32 import java.util.Map;
33 import sun.security.action.GetIntegerAction;
34 
35 /**
36  * Manipulates a native array of epoll_event structs on Linux:
37  *
38  * typedef union epoll_data {
39  *     void *ptr;
40  *     int fd;
41  *     __uint32_t u32;
42  *     __uint64_t u64;
43  *  } epoll_data_t;
44  *
45  * struct epoll_event {
46  *     __uint32_t events;
47  *     epoll_data_t data;
48  * };
49  *
50  * The system call to wait for I/O events is epoll_wait(2). It populates an
51  * array of epoll_event structures that are passed to the call. The data
52  * member of the epoll_event structure contains the same data as was set
53  * when the file descriptor was registered to epoll via epoll_ctl(2). In
54  * this implementation we set data.fd to be the file descriptor that we
55  * register. That way, we have the file descriptor available when we
56  * process the events.
57  */
58 
59 class EPollArrayWrapper {
60     // EPOLL_EVENTS
61     private static final int EPOLLIN      = 0x001;
62 
63     // opcodes
64     private static final int EPOLL_CTL_ADD      = 1;
65     private static final int EPOLL_CTL_DEL      = 2;
66     private static final int EPOLL_CTL_MOD      = 3;
67 
68     // Miscellaneous constants
69     private static final int SIZE_EPOLLEVENT  = sizeofEPollEvent();
70     private static final int EVENT_OFFSET     = 0;
71     private static final int DATA_OFFSET      = offsetofData();
72     private static final int FD_OFFSET        = DATA_OFFSET;
73     private static final int OPEN_MAX         = IOUtil.fdLimit();
74     private static final int NUM_EPOLLEVENTS  = Math.min(OPEN_MAX, 8192);
75 
76     // Special value to indicate that an update should be ignored
77     private static final byte  KILLED = (byte)-1;
78 
79     // Initial size of arrays for fd registration changes
80     private static final int INITIAL_PENDING_UPDATE_SIZE = 64;
81 
82     // maximum size of updatesLow
83     private static final int MAX_UPDATE_ARRAY_SIZE = AccessController.doPrivileged(
84         new GetIntegerAction("sun.nio.ch.maxUpdateArraySize", Math.min(OPEN_MAX, 64*1024)));
85 
86     // The fd of the epoll driver
87     private final int epfd;
88 
89      // The epoll_event array for results from epoll_wait
90     private final AllocatedNativeObject pollArray;
91 
92     // Base address of the epoll_event array
93     private final long pollArrayAddress;
94 
95     // The fd of the interrupt line going out
96     private int outgoingInterruptFD;
97 
98     // The fd of the interrupt line coming in
99     private int incomingInterruptFD;
100 
101     // The index of the interrupt FD
102     private int interruptedIndex;
103 
104     // Number of updated pollfd entries
105     int updated;
106 
107     // object to synchronize fd registration changes
108     private final Object updateLock = new Object();
109 
110     // number of file descriptors with registration changes pending
111     private int updateCount;
112 
113     // file descriptors with registration changes pending
114     private int[] updateDescriptors = new int[INITIAL_PENDING_UPDATE_SIZE];
115 
116     // events for file descriptors with registration changes pending, indexed
117     // by file descriptor and stored as bytes for efficiency reasons. For
118     // file descriptors higher than MAX_UPDATE_ARRAY_SIZE (unlimited case at
119     // least) then the update is stored in a map.
120     private final byte[] eventsLow = new byte[MAX_UPDATE_ARRAY_SIZE];
121     private Map<Integer,Byte> eventsHigh;
122 
123     // Used by release and updateRegistrations to track whether a file
124     // descriptor is registered with epoll.
125     private final BitSet registered = new BitSet();
126 
127 
EPollArrayWrapper()128     EPollArrayWrapper() throws IOException {
129         // creates the epoll file descriptor
130         epfd = epollCreate();
131 
132         // the epoll_event array passed to epoll_wait
133         int allocationSize = NUM_EPOLLEVENTS * SIZE_EPOLLEVENT;
134         pollArray = new AllocatedNativeObject(allocationSize, true);
135         pollArrayAddress = pollArray.address();
136 
137         // eventHigh needed when using file descriptors > 64k
138         if (OPEN_MAX > MAX_UPDATE_ARRAY_SIZE)
139             eventsHigh = new HashMap<>();
140     }
141 
initInterrupt(int fd0, int fd1)142     void initInterrupt(int fd0, int fd1) {
143         outgoingInterruptFD = fd1;
144         incomingInterruptFD = fd0;
145         epollCtl(epfd, EPOLL_CTL_ADD, fd0, EPOLLIN);
146     }
147 
putEventOps(int i, int event)148     void putEventOps(int i, int event) {
149         int offset = SIZE_EPOLLEVENT * i + EVENT_OFFSET;
150         pollArray.putInt(offset, event);
151     }
152 
putDescriptor(int i, int fd)153     void putDescriptor(int i, int fd) {
154         int offset = SIZE_EPOLLEVENT * i + FD_OFFSET;
155         pollArray.putInt(offset, fd);
156     }
157 
getEventOps(int i)158     int getEventOps(int i) {
159         int offset = SIZE_EPOLLEVENT * i + EVENT_OFFSET;
160         return pollArray.getInt(offset);
161     }
162 
getDescriptor(int i)163     int getDescriptor(int i) {
164         int offset = SIZE_EPOLLEVENT * i + FD_OFFSET;
165         return pollArray.getInt(offset);
166     }
167 
168     /**
169      * Returns {@code true} if updates for the given key (file
170      * descriptor) are killed.
171      */
isEventsHighKilled(Integer key)172     private boolean isEventsHighKilled(Integer key) {
173         assert key >= MAX_UPDATE_ARRAY_SIZE;
174         Byte value = eventsHigh.get(key);
175         return (value != null && value == KILLED);
176     }
177 
178     /**
179      * Sets the pending update events for the given file descriptor. This
180      * method has no effect if the update events is already set to KILLED,
181      * unless {@code force} is {@code true}.
182      */
setUpdateEvents(int fd, byte events, boolean force)183     private void setUpdateEvents(int fd, byte events, boolean force) {
184         if (fd < MAX_UPDATE_ARRAY_SIZE) {
185             if ((eventsLow[fd] != KILLED) || force) {
186                 eventsLow[fd] = events;
187             }
188         } else {
189             Integer key = Integer.valueOf(fd);
190             if (!isEventsHighKilled(key) || force) {
191                 eventsHigh.put(key, Byte.valueOf(events));
192             }
193         }
194     }
195 
196     /**
197      * Returns the pending update events for the given file descriptor.
198      */
getUpdateEvents(int fd)199     private byte getUpdateEvents(int fd) {
200         if (fd < MAX_UPDATE_ARRAY_SIZE) {
201             return eventsLow[fd];
202         } else {
203             Byte result = eventsHigh.get(Integer.valueOf(fd));
204             // result should never be null
205             return result.byteValue();
206         }
207     }
208 
209     /**
210      * Update the events for a given file descriptor
211      */
setInterest(int fd, int mask)212     void setInterest(int fd, int mask) {
213         synchronized (updateLock) {
214             // record the file descriptor and events
215             int oldCapacity = updateDescriptors.length;
216             if (updateCount == oldCapacity) {
217                 int newCapacity = oldCapacity + INITIAL_PENDING_UPDATE_SIZE;
218                 int[] newDescriptors = new int[newCapacity];
219                 System.arraycopy(updateDescriptors, 0, newDescriptors, 0, oldCapacity);
220                 updateDescriptors = newDescriptors;
221             }
222             updateDescriptors[updateCount++] = fd;
223 
224             // events are stored as bytes for efficiency reasons
225             byte b = (byte)mask;
226             assert (b == mask) && (b != KILLED);
227             setUpdateEvents(fd, b, false);
228         }
229     }
230 
231     /**
232      * Add a file descriptor
233      */
add(int fd)234     void add(int fd) {
235         // force the initial update events to 0 as it may be KILLED by a
236         // previous registration.
237         synchronized (updateLock) {
238             assert !registered.get(fd);
239             setUpdateEvents(fd, (byte)0, true);
240         }
241     }
242 
243     /**
244      * Remove a file descriptor
245      */
remove(int fd)246     void remove(int fd) {
247         synchronized (updateLock) {
248             // kill pending and future update for this file descriptor
249             setUpdateEvents(fd, KILLED, false);
250 
251             // remove from epoll
252             if (registered.get(fd)) {
253                 epollCtl(epfd, EPOLL_CTL_DEL, fd, 0);
254                 registered.clear(fd);
255             }
256         }
257     }
258 
259     /**
260      * Close epoll file descriptor and free poll array
261      */
closeEPollFD()262     void closeEPollFD() throws IOException {
263         FileDispatcherImpl.closeIntFD(epfd);
264         pollArray.free();
265     }
266 
poll(long timeout)267     int poll(long timeout) throws IOException {
268         updateRegistrations();
269         updated = epollWait(pollArrayAddress, NUM_EPOLLEVENTS, timeout, epfd);
270         for (int i=0; i<updated; i++) {
271             if (getDescriptor(i) == incomingInterruptFD) {
272                 interruptedIndex = i;
273                 interrupted = true;
274                 break;
275             }
276         }
277         return updated;
278     }
279 
280     /**
281      * Update the pending registrations.
282      */
updateRegistrations()283     private void updateRegistrations() {
284         synchronized (updateLock) {
285             int j = 0;
286             while (j < updateCount) {
287                 int fd = updateDescriptors[j];
288                 short events = getUpdateEvents(fd);
289                 boolean isRegistered = registered.get(fd);
290                 int opcode = 0;
291 
292                 if (events != KILLED) {
293                     if (isRegistered) {
294                         opcode = (events != 0) ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
295                     } else {
296                         opcode = (events != 0) ? EPOLL_CTL_ADD : 0;
297                     }
298                     if (opcode != 0) {
299                         epollCtl(epfd, opcode, fd, events);
300                         if (opcode == EPOLL_CTL_ADD) {
301                             registered.set(fd);
302                         } else if (opcode == EPOLL_CTL_DEL) {
303                             registered.clear(fd);
304                         }
305                     }
306                 }
307                 j++;
308             }
309             updateCount = 0;
310         }
311     }
312 
313     // interrupt support
314     private boolean interrupted = false;
315 
interrupt()316     public void interrupt() {
317         interrupt(outgoingInterruptFD);
318     }
319 
interruptedIndex()320     public int interruptedIndex() {
321         return interruptedIndex;
322     }
323 
interrupted()324     boolean interrupted() {
325         return interrupted;
326     }
327 
clearInterrupted()328     void clearInterrupted() {
329         interrupted = false;
330     }
331 
332     static {
IOUtil.load()333         IOUtil.load();
init()334         init();
335     }
336 
epollCreate()337     private native int epollCreate();
epollCtl(int epfd, int opcode, int fd, int events)338     private native void epollCtl(int epfd, int opcode, int fd, int events);
epollWait(long pollAddress, int numfds, long timeout, int epfd)339     private native int epollWait(long pollAddress, int numfds, long timeout,
340                                  int epfd) throws IOException;
sizeofEPollEvent()341     private static native int sizeofEPollEvent();
offsetofData()342     private static native int offsetofData();
interrupt(int fd)343     private static native void interrupt(int fd);
init()344     private static native void init();
345 }
346