1 /*
2  * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 05    Socket Functions */
10 
11 /*
12  * The idea for this came from these two websites:
13  * http://www.xmailserver.org/linux-patches/nio-improve.html
14  * http://www.kegel.com/c10k.html
15  *
16  * This is to support the epoll sysctl being added to the linux 2.5
17  * kernel tree.  The new sys_epoll is an event based poller without
18  * most of the fuss of rtsignals.
19  *
20  * -- David Nicklay <dnicklay@web.turner.com>
21  */
22 
23 /*
24  * XXX Currently not implemented / supported by this module XXX
25  *
26  * - delay pools
27  * - deferred reads
28  *
29  */
30 
31 #include "squid.h"
32 
33 #if USE_EPOLL
34 
35 #include "comm/Loops.h"
36 #include "fde.h"
37 #include "globals.h"
38 #include "mgr/Registration.h"
39 #include "profiler/Profiler.h"
40 #include "SquidTime.h"
41 #include "StatCounters.h"
42 #include "StatHist.h"
43 #include "Store.h"
44 
45 #define DEBUG_EPOLL 0
46 
47 #include <cerrno>
48 #if HAVE_SYS_EPOLL_H
49 #include <sys/epoll.h>
50 #endif
51 
52 static int kdpfd = -1;
53 static int max_poll_time = 1000;
54 
55 static struct epoll_event *pevents;
56 
57 static void commEPollRegisterWithCacheManager(void);
58 
59 /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
60 /* Public functions */
61 
62 /*
63  * This is a needed exported function which will be called to initialise
64  * the network loop code.
65  */
66 void
SelectLoopInit(void)67 Comm::SelectLoopInit(void)
68 {
69     pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct epoll_event));
70 
71     if (!pevents) {
72         int xerrno = errno;
73         fatalf("comm_select_init: xmalloc() failed: %s\n", xstrerr(xerrno));
74     }
75 
76     kdpfd = epoll_create(SQUID_MAXFD);
77 
78     if (kdpfd < 0) {
79         int xerrno = errno;
80         fatalf("comm_select_init: epoll_create(): %s\n", xstrerr(xerrno));
81     }
82 
83     commEPollRegisterWithCacheManager();
84 }
85 
epolltype_atoi(int x)86 static const char* epolltype_atoi(int x)
87 {
88     switch (x) {
89 
90     case EPOLL_CTL_ADD:
91         return "EPOLL_CTL_ADD";
92 
93     case EPOLL_CTL_DEL:
94         return "EPOLL_CTL_DEL";
95 
96     case EPOLL_CTL_MOD:
97         return "EPOLL_CTL_MOD";
98 
99     default:
100         return "UNKNOWN_EPOLLCTL_OP";
101     }
102 }
103 
104 /**
105  * This is a needed exported function which will be called to register
106  * and deregister interest in a pending IO state for a given FD.
107  */
108 void
SetSelect(int fd,unsigned int type,PF * handler,void * client_data,time_t timeout)109 Comm::SetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout)
110 {
111     fde *F = &fd_table[fd];
112     int epoll_ctl_type = 0;
113 
114     assert(fd >= 0);
115     debugs(5, 5, HERE << "FD " << fd << ", type=" << type <<
116            ", handler=" << handler << ", client_data=" << client_data <<
117            ", timeout=" << timeout);
118 
119     struct epoll_event ev;
120     memset(&ev, 0, sizeof(ev));
121     ev.data.fd = fd;
122 
123     if (!F->flags.open) {
124         epoll_ctl(kdpfd, EPOLL_CTL_DEL, fd, &ev);
125         return;
126     }
127 
128     // If read is an interest
129 
130     if (type & COMM_SELECT_READ) {
131         if (handler) {
132             // Hack to keep the events flowing if there is data immediately ready
133             if (F->flags.read_pending)
134                 ev.events |= EPOLLOUT;
135             ev.events |= EPOLLIN;
136         }
137 
138         F->read_handler = handler;
139 
140         F->read_data = client_data;
141 
142         // Otherwise, use previously stored value
143     } else if (F->epoll_state & EPOLLIN) {
144         ev.events |= EPOLLIN;
145     }
146 
147     // If write is an interest
148     if (type & COMM_SELECT_WRITE) {
149         if (handler)
150             ev.events |= EPOLLOUT;
151 
152         F->write_handler = handler;
153 
154         F->write_data = client_data;
155 
156         // Otherwise, use previously stored value
157     } else if (F->epoll_state & EPOLLOUT) {
158         ev.events |= EPOLLOUT;
159     }
160 
161     if (ev.events)
162         ev.events |= EPOLLHUP | EPOLLERR;
163 
164     if (ev.events != F->epoll_state) {
165         if (F->epoll_state) // already monitoring something.
166             epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
167         else
168             epoll_ctl_type = EPOLL_CTL_ADD;
169 
170         F->epoll_state = ev.events;
171 
172         if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
173             int xerrno = errno;
174             debugs(5, DEBUG_EPOLL ? 0 : 8, "epoll_ctl(," << epolltype_atoi(epoll_ctl_type) <<
175                    ",,): failed on FD " << fd << ": " << xstrerr(xerrno));
176         }
177     }
178 
179     if (timeout)
180         F->timeout = squid_curtime + timeout;
181 }
182 
183 static void commIncomingStats(StoreEntry * sentry);
184 
185 static void
commEPollRegisterWithCacheManager(void)186 commEPollRegisterWithCacheManager(void)
187 {
188     Mgr::RegisterAction("comm_epoll_incoming",
189                         "comm_incoming() stats",
190                         commIncomingStats, 0, 1);
191 }
192 
193 static void
commIncomingStats(StoreEntry * sentry)194 commIncomingStats(StoreEntry * sentry)
195 {
196     StatCounters *f = &statCounter;
197     storeAppendPrintf(sentry, "Total number of epoll(2) loops: %ld\n", statCounter.select_loops);
198     storeAppendPrintf(sentry, "Histogram of returned filedescriptors\n");
199     f->select_fds_hist.dump(sentry, statHistIntDumper);
200 }
201 
202 /**
203  * Check all connections for new connections and input data that is to be
204  * processed. Also check for connections with data queued and whether we can
205  * write it out.
206  *
207  * Called to do the new-style IO, courtesy of of squid (like most of this
208  * new IO code). This routine handles the stuff we've hidden in
209  * comm_setselect and fd_table[] and calls callbacks for IO ready
210  * events.
211  */
212 Comm::Flag
DoSelect(int msec)213 Comm::DoSelect(int msec)
214 {
215     int num, i,fd;
216     fde *F;
217     PF *hdl;
218 
219     struct epoll_event *cevents;
220 
221     PROF_start(comm_check_incoming);
222 
223     if (msec > max_poll_time)
224         msec = max_poll_time;
225 
226     for (;;) {
227         num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec);
228         ++ statCounter.select_loops;
229 
230         if (num >= 0)
231             break;
232 
233         if (ignoreErrno(errno))
234             break;
235 
236         getCurrentTime();
237 
238         PROF_stop(comm_check_incoming);
239 
240         return Comm::COMM_ERROR;
241     }
242 
243     PROF_stop(comm_check_incoming);
244     getCurrentTime();
245 
246     statCounter.select_fds_hist.count(num);
247 
248     if (num == 0)
249         return Comm::TIMEOUT;       /* No error.. */
250 
251     PROF_start(comm_handle_ready_fd);
252 
253     for (i = 0, cevents = pevents; i < num; ++i, ++cevents) {
254         fd = cevents->data.fd;
255         F = &fd_table[fd];
256         debugs(5, DEBUG_EPOLL ? 0 : 8, HERE << "got FD " << fd << " events=" <<
257                std::hex << cevents->events << " monitoring=" << F->epoll_state <<
258                " F->read_handler=" << F->read_handler << " F->write_handler=" << F->write_handler);
259 
260         // TODO: add EPOLLPRI??
261 
262         if (cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR) || F->flags.read_pending) {
263             if ((hdl = F->read_handler) != NULL) {
264                 debugs(5, DEBUG_EPOLL ? 0 : 8, HERE << "Calling read handler on FD " << fd);
265                 PROF_start(comm_write_handler);
266                 F->flags.read_pending = 0;
267                 F->read_handler = NULL;
268                 hdl(fd, F->read_data);
269                 PROF_stop(comm_write_handler);
270                 ++ statCounter.select_fds;
271             } else {
272                 debugs(5, DEBUG_EPOLL ? 0 : 8, HERE << "no read handler for FD " << fd);
273                 // remove interest since no handler exist for this event.
274                 SetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
275             }
276         }
277 
278         if (cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) {
279             if ((hdl = F->write_handler) != NULL) {
280                 debugs(5, DEBUG_EPOLL ? 0 : 8, HERE << "Calling write handler on FD " << fd);
281                 PROF_start(comm_read_handler);
282                 F->write_handler = NULL;
283                 hdl(fd, F->write_data);
284                 PROF_stop(comm_read_handler);
285                 ++ statCounter.select_fds;
286             } else {
287                 debugs(5, DEBUG_EPOLL ? 0 : 8, HERE << "no write handler for FD " << fd);
288                 // remove interest since no handler exist for this event.
289                 SetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
290             }
291         }
292     }
293 
294     PROF_stop(comm_handle_ready_fd);
295 
296     return Comm::OK;
297 }
298 
299 void
QuickPollRequired(void)300 Comm::QuickPollRequired(void)
301 {
302     max_poll_time = 10;
303 }
304 
305 #endif /* USE_EPOLL */
306 
307