1 /*
2 * include/proto/fd.h
3 * File descriptors states.
4 *
5 * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #ifndef _PROTO_FD_H
23 #define _PROTO_FD_H
24
25 #include <stdio.h>
26 #include <sys/time.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29
30 #include <common/config.h>
31 #include <common/ticks.h>
32 #include <common/time.h>
33 #include <types/fd.h>
34 #include <proto/activity.h>
35
36 /* public variables */
37
38 extern volatile struct fdlist fd_cache;
39 extern volatile struct fdlist fd_cache_local[MAX_THREADS];
40
41 extern volatile struct fdlist update_list;
42
43 extern unsigned long *polled_mask;
44
45 extern unsigned long fd_cache_mask; // Mask of threads with events in the cache
46
47 extern THREAD_LOCAL int *fd_updt; // FD updates list
48 extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list
49
50 extern int poller_wr_pipe[MAX_THREADS];
51
52 extern volatile int ha_used_fds; // Number of FDs we're currently using
53
54 __decl_hathreads(extern HA_RWLOCK_T __attribute__((aligned(64))) fdcache_lock); /* global lock to protect fd_cache array */
55
56 /* Deletes an FD from the fdsets.
57 * The file descriptor is also closed.
58 */
59 void fd_delete(int fd);
60
61 /* Deletes an FD from the fdsets.
62 * The file descriptor is kept open.
63 */
64 void fd_remove(int fd);
65
66 /* close all FDs starting from <start> */
67 void my_closefrom(int start);
68
69 /* disable the specified poller */
70 void disable_poller(const char *poller_name);
71
72 void poller_pipe_io_handler(int fd);
73
74 /*
75 * Initialize the pollers till the best one is found.
76 * If none works, returns 0, otherwise 1.
77 * The pollers register themselves just before main() is called.
78 */
79 int init_pollers();
80
81 /*
82 * Deinitialize the pollers.
83 */
84 void deinit_pollers();
85
86 /*
87 * Some pollers may lose their connection after a fork(). It may be necessary
88 * to create initialize part of them again. Returns 0 in case of failure,
89 * otherwise 1. The fork() function may be NULL if unused. In case of error,
90 * the the current poller is destroyed and the caller is responsible for trying
91 * another one by calling init_pollers() again.
92 */
93 int fork_poller();
94
95 /*
96 * Lists the known pollers on <out>.
97 * Should be performed only before initialization.
98 */
99 int list_pollers(FILE *out);
100
101 /*
102 * Runs the polling loop
103 */
104 void run_poller();
105
106 /* Scan and process the cached events. This should be called right after
107 * the poller.
108 */
109 void fd_process_cached_events();
110
111 void fd_add_to_fd_list(volatile struct fdlist *list, int fd, int off);
112 void fd_rm_from_fd_list(volatile struct fdlist *list, int fd, int off);
113
114 /* Mark fd <fd> as updated for polling and allocate an entry in the update list
115 * for this if it was not already there. This can be done at any time.
116 */
updt_fd_polling(const int fd)117 static inline void updt_fd_polling(const int fd)
118 {
119 if ((fdtab[fd].thread_mask & all_threads_mask) == tid_bit) {
120
121 /* note: we don't have a test-and-set yet in hathreads */
122
123 if (HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
124 return;
125
126 fd_updt[fd_nbupdt++] = fd;
127 } else {
128 unsigned long update_mask = fdtab[fd].update_mask;
129 do {
130 if (update_mask == fdtab[fd].thread_mask)
131 return;
132 } while (!_HA_ATOMIC_CAS(&fdtab[fd].update_mask, &update_mask,
133 fdtab[fd].thread_mask));
134 fd_add_to_fd_list(&update_list, fd, offsetof(struct fdtab, update));
135 }
136
137 }
138
139 /* Called from the poller to acknoledge we read an entry from the global
140 * update list, to remove our bit from the update_mask, and remove it from
141 * the list if we were the last one.
142 */
done_update_polling(int fd)143 static inline void done_update_polling(int fd)
144 {
145 unsigned long update_mask;
146
147 update_mask = _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
148 while ((update_mask & all_threads_mask)== 0) {
149 /* If we were the last one that had to update that entry, remove it from the list */
150 fd_rm_from_fd_list(&update_list, fd, offsetof(struct fdtab, update));
151 update_mask = (volatile unsigned long)fdtab[fd].update_mask;
152 if ((update_mask & all_threads_mask) != 0) {
153 /* Maybe it's been re-updated in the meanwhile, and we
154 * wrongly removed it from the list, if so, re-add it
155 */
156 fd_add_to_fd_list(&update_list, fd, offsetof(struct fdtab, update));
157 update_mask = (volatile unsigned long)(fdtab[fd].update_mask);
158 /* And then check again, just in case after all it
159 * should be removed, even if it's very unlikely, given
160 * the current thread wouldn't have been able to take
161 * care of it yet */
162 } else
163 break;
164
165 }
166 }
167
168 /* Allocates a cache entry for a file descriptor if it does not yet have one.
169 * This can be done at any time.
170 */
fd_alloc_cache_entry(const int fd)171 static inline void fd_alloc_cache_entry(const int fd)
172 {
173 _HA_ATOMIC_OR(&fd_cache_mask, fdtab[fd].thread_mask);
174 if (!(fdtab[fd].thread_mask & (fdtab[fd].thread_mask - 1)))
175 fd_add_to_fd_list(&fd_cache_local[my_ffsl(fdtab[fd].thread_mask) - 1], fd, offsetof(struct fdtab, cache));
176 else
177 fd_add_to_fd_list(&fd_cache, fd, offsetof(struct fdtab, cache));
178 }
179
180 /* Removes entry used by fd <fd> from the FD cache and replaces it with the
181 * last one.
182 * If the fd has no entry assigned, return immediately.
183 */
fd_release_cache_entry(const int fd)184 static inline void fd_release_cache_entry(const int fd)
185 {
186 if (!(fdtab[fd].thread_mask & (fdtab[fd].thread_mask - 1)))
187 fd_rm_from_fd_list(&fd_cache_local[my_ffsl(fdtab[fd].thread_mask) - 1], fd, offsetof(struct fdtab, cache));
188 else
189 fd_rm_from_fd_list(&fd_cache, fd, offsetof(struct fdtab, cache));
190 }
191
192 /* This function automatically enables/disables caching for an entry depending
193 * on its state. It is only called on state changes.
194 */
fd_update_cache(int fd)195 static inline void fd_update_cache(int fd)
196 {
197 /* only READY and ACTIVE states (the two with both flags set) require a cache entry */
198 if (((fdtab[fd].state & (FD_EV_READY_R | FD_EV_ACTIVE_R)) == (FD_EV_READY_R | FD_EV_ACTIVE_R)) ||
199 ((fdtab[fd].state & (FD_EV_READY_W | FD_EV_ACTIVE_W)) == (FD_EV_READY_W | FD_EV_ACTIVE_W))) {
200 fd_alloc_cache_entry(fd);
201 }
202 else {
203 fd_release_cache_entry(fd);
204 }
205 }
206
207 /*
208 * returns the FD's recv state (FD_EV_*)
209 */
fd_recv_state(const int fd)210 static inline int fd_recv_state(const int fd)
211 {
212 return ((unsigned)fdtab[fd].state >> (4 * DIR_RD)) & FD_EV_STATUS;
213 }
214
215 /*
216 * returns true if the FD is active for recv
217 */
fd_recv_active(const int fd)218 static inline int fd_recv_active(const int fd)
219 {
220 return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_R;
221 }
222
223 /*
224 * returns true if the FD is ready for recv
225 */
fd_recv_ready(const int fd)226 static inline int fd_recv_ready(const int fd)
227 {
228 return (unsigned)fdtab[fd].state & FD_EV_READY_R;
229 }
230
231 /*
232 * returns true if the FD is polled for recv
233 */
fd_recv_polled(const int fd)234 static inline int fd_recv_polled(const int fd)
235 {
236 return (unsigned)fdtab[fd].state & FD_EV_POLLED_R;
237 }
238
239 /*
240 * returns the FD's send state (FD_EV_*)
241 */
fd_send_state(const int fd)242 static inline int fd_send_state(const int fd)
243 {
244 return ((unsigned)fdtab[fd].state >> (4 * DIR_WR)) & FD_EV_STATUS;
245 }
246
247 /*
248 * returns true if the FD is active for send
249 */
fd_send_active(const int fd)250 static inline int fd_send_active(const int fd)
251 {
252 return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_W;
253 }
254
255 /*
256 * returns true if the FD is ready for send
257 */
fd_send_ready(const int fd)258 static inline int fd_send_ready(const int fd)
259 {
260 return (unsigned)fdtab[fd].state & FD_EV_READY_W;
261 }
262
263 /*
264 * returns true if the FD is polled for send
265 */
fd_send_polled(const int fd)266 static inline int fd_send_polled(const int fd)
267 {
268 return (unsigned)fdtab[fd].state & FD_EV_POLLED_W;
269 }
270
271 /*
272 * returns true if the FD is active for recv or send
273 */
fd_active(const int fd)274 static inline int fd_active(const int fd)
275 {
276 return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_RW;
277 }
278
279 /* Disable processing recv events on fd <fd> */
fd_stop_recv(int fd)280 static inline void fd_stop_recv(int fd)
281 {
282 unsigned char old, new;
283 unsigned long locked;
284
285 old = fdtab[fd].state;
286 do {
287 if (!(old & FD_EV_ACTIVE_R))
288 return;
289 new = old & ~FD_EV_ACTIVE_R;
290 new &= ~FD_EV_POLLED_R;
291 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
292
293 if ((old ^ new) & FD_EV_POLLED_R)
294 updt_fd_polling(fd);
295
296 locked = atleast2(fdtab[fd].thread_mask);
297 if (locked)
298 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
299 fd_update_cache(fd); /* need an update entry to change the state */
300 if (locked)
301 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
302 }
303
304 /* Disable processing send events on fd <fd> */
fd_stop_send(int fd)305 static inline void fd_stop_send(int fd)
306 {
307 unsigned char old, new;
308 unsigned long locked;
309
310 old = fdtab[fd].state;
311 do {
312 if (!(old & FD_EV_ACTIVE_W))
313 return;
314 new = old & ~FD_EV_ACTIVE_W;
315 new &= ~FD_EV_POLLED_W;
316 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
317
318 if ((old ^ new) & FD_EV_POLLED_W)
319 updt_fd_polling(fd);
320
321 locked = atleast2(fdtab[fd].thread_mask);
322 if (locked)
323 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
324 fd_update_cache(fd); /* need an update entry to change the state */
325 if (locked)
326 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
327 }
328
329 /* Disable processing of events on fd <fd> for both directions. */
fd_stop_both(int fd)330 static inline void fd_stop_both(int fd)
331 {
332 unsigned char old, new;
333 unsigned long locked;
334
335 old = fdtab[fd].state;
336 do {
337 if (!(old & FD_EV_ACTIVE_RW))
338 return;
339 new = old & ~FD_EV_ACTIVE_RW;
340 new &= ~FD_EV_POLLED_RW;
341 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
342
343 if ((old ^ new) & FD_EV_POLLED_RW)
344 updt_fd_polling(fd);
345
346 locked = atleast2(fdtab[fd].thread_mask);
347 if (locked)
348 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
349 fd_update_cache(fd); /* need an update entry to change the state */
350 if (locked)
351 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
352 }
353
354 /* Report that FD <fd> cannot receive anymore without polling (EAGAIN detected). */
fd_cant_recv(const int fd)355 static inline void fd_cant_recv(const int fd)
356 {
357 unsigned char old, new;
358 unsigned long locked;
359
360 old = fdtab[fd].state;
361 do {
362 if (!(old & FD_EV_READY_R))
363 return;
364 new = old & ~FD_EV_READY_R;
365 if (new & FD_EV_ACTIVE_R)
366 new |= FD_EV_POLLED_R;
367 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
368
369 if ((old ^ new) & FD_EV_POLLED_R)
370 updt_fd_polling(fd);
371
372 locked = atleast2(fdtab[fd].thread_mask);
373 if (locked)
374 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
375 fd_update_cache(fd); /* need an update entry to change the state */
376 if (locked)
377 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
378 }
379
380 /* Report that FD <fd> may receive again without polling. */
fd_may_recv(const int fd)381 static inline void fd_may_recv(const int fd)
382 {
383 unsigned long locked;
384
385 /* marking ready never changes polled status */
386 if ((fdtab[fd].state & FD_EV_READY_R) ||
387 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT))
388 return;
389
390 locked = atleast2(fdtab[fd].thread_mask);
391 if (locked)
392 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
393 fd_update_cache(fd); /* need an update entry to change the state */
394 if (locked)
395 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
396 }
397
398 /* Disable readiness when polled. This is useful to interrupt reading when it
399 * is suspected that the end of data might have been reached (eg: short read).
400 * This can only be done using level-triggered pollers, so if any edge-triggered
401 * is ever implemented, a test will have to be added here.
402 */
fd_done_recv(const int fd)403 static inline void fd_done_recv(const int fd)
404 {
405 unsigned char old, new;
406 unsigned long locked;
407
408 old = fdtab[fd].state;
409 do {
410 if ((old & (FD_EV_POLLED_R|FD_EV_READY_R)) != (FD_EV_POLLED_R|FD_EV_READY_R))
411 return;
412 new = old & ~FD_EV_READY_R;
413 if (new & FD_EV_ACTIVE_R)
414 new |= FD_EV_POLLED_R;
415 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
416
417 if ((old ^ new) & FD_EV_POLLED_R)
418 updt_fd_polling(fd);
419
420 locked = atleast2(fdtab[fd].thread_mask);
421 if (locked)
422 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
423 fd_update_cache(fd); /* need an update entry to change the state */
424 if (locked)
425 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
426 }
427
428 /* Report that FD <fd> cannot send anymore without polling (EAGAIN detected). */
fd_cant_send(const int fd)429 static inline void fd_cant_send(const int fd)
430 {
431 unsigned char old, new;
432 unsigned long locked;
433
434 old = fdtab[fd].state;
435 do {
436 if (!(old & FD_EV_READY_W))
437 return;
438 new = old & ~FD_EV_READY_W;
439 if (new & FD_EV_ACTIVE_W)
440 new |= FD_EV_POLLED_W;
441 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
442
443 if ((old ^ new) & FD_EV_POLLED_W)
444 updt_fd_polling(fd);
445
446 locked = atleast2(fdtab[fd].thread_mask);
447 if (locked)
448 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
449 fd_update_cache(fd); /* need an update entry to change the state */
450 if (locked)
451 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
452 }
453
454 /* Report that FD <fd> may send again without polling (EAGAIN not detected). */
fd_may_send(const int fd)455 static inline void fd_may_send(const int fd)
456 {
457 unsigned long locked;
458
459 /* marking ready never changes polled status */
460 if ((fdtab[fd].state & FD_EV_READY_W) ||
461 HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT))
462 return;
463
464 locked = atleast2(fdtab[fd].thread_mask);
465 if (locked)
466 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
467 fd_update_cache(fd); /* need an update entry to change the state */
468 if (locked)
469 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
470 }
471
472 /* Prepare FD <fd> to try to receive */
fd_want_recv(int fd)473 static inline void fd_want_recv(int fd)
474 {
475 unsigned char old, new;
476 unsigned long locked;
477
478 old = fdtab[fd].state;
479 do {
480 if (old & FD_EV_ACTIVE_R)
481 return;
482 new = old | FD_EV_ACTIVE_R;
483 if (!(new & FD_EV_READY_R))
484 new |= FD_EV_POLLED_R;
485 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
486
487 if ((old ^ new) & FD_EV_POLLED_R)
488 updt_fd_polling(fd);
489
490 locked = atleast2(fdtab[fd].thread_mask);
491 if (locked)
492 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
493 fd_update_cache(fd); /* need an update entry to change the state */
494 if (locked)
495 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
496 }
497
498 /* Prepare FD <fd> to try to send */
fd_want_send(int fd)499 static inline void fd_want_send(int fd)
500 {
501 unsigned char old, new;
502 unsigned long locked;
503
504 old = fdtab[fd].state;
505 do {
506 if (old & FD_EV_ACTIVE_W)
507 return;
508 new = old | FD_EV_ACTIVE_W;
509 if (!(new & FD_EV_READY_W))
510 new |= FD_EV_POLLED_W;
511 } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
512
513 if ((old ^ new) & FD_EV_POLLED_W)
514 updt_fd_polling(fd);
515
516 locked = atleast2(fdtab[fd].thread_mask);
517 if (locked)
518 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
519 fd_update_cache(fd); /* need an update entry to change the state */
520 if (locked)
521 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
522 }
523
524 /* Update events seen for FD <fd> and its state if needed. This should be called
525 * by the poller to set FD_POLL_* flags. */
fd_update_events(int fd,int evts)526 static inline void fd_update_events(int fd, int evts)
527 {
528 unsigned long locked = atleast2(fdtab[fd].thread_mask);
529 unsigned char old, new;
530
531 old = fdtab[fd].ev;
532 new = (old & FD_POLL_STICKY) | evts;
533
534 if (unlikely(locked)) {
535 /* Locked FDs (those with more than 2 threads) are atomically updated */
536 while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].ev, &old, new)))
537 new = (old & FD_POLL_STICKY) | evts;
538 } else {
539 if (new != old)
540 fdtab[fd].ev = new;
541 }
542
543 if (fdtab[fd].ev & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR))
544 fd_may_recv(fd);
545
546 if (fdtab[fd].ev & (FD_POLL_OUT | FD_POLL_ERR))
547 fd_may_send(fd);
548 }
549
550 /* Prepares <fd> for being polled */
fd_insert(int fd,void * owner,void (* iocb)(int fd),unsigned long thread_mask)551 static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned long thread_mask)
552 {
553 unsigned long locked = atleast2(thread_mask);
554
555 if (locked)
556 HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
557 fdtab[fd].owner = owner;
558 fdtab[fd].iocb = iocb;
559 fdtab[fd].ev = 0;
560 fdtab[fd].linger_risk = 0;
561 fdtab[fd].cloned = 0;
562 fdtab[fd].thread_mask = thread_mask;
563 /* note: do not reset polled_mask here as it indicates which poller
564 * still knows this FD from a possible previous round.
565 */
566 if (locked)
567 HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
568 _HA_ATOMIC_ADD(&ha_used_fds, 1);
569 }
570
571 /* Computes the bounded poll() timeout based on the next expiration timer <next>
572 * by bounding it to MAX_DELAY_MS. <next> may equal TICK_ETERNITY. The pollers
573 * just needs to call this function right before polling to get their timeout
574 * value. Timeouts that are already expired (possibly due to a pending event)
575 * are accounted for in activity.poll_exp.
576 */
compute_poll_timeout(int next)577 static inline int compute_poll_timeout(int next)
578 {
579 int wait_time;
580
581 if (!tick_isset(next))
582 wait_time = MAX_DELAY_MS;
583 else if (tick_is_expired(next, now_ms)) {
584 activity[tid].poll_exp++;
585 wait_time = 0;
586 }
587 else {
588 wait_time = TICKS_TO_MS(tick_remain(now_ms, next)) + 1;
589 if (wait_time > MAX_DELAY_MS)
590 wait_time = MAX_DELAY_MS;
591 }
592 return wait_time;
593 }
594
595 /* These are replacements for FD_SET, FD_CLR, FD_ISSET, working on uints */
hap_fd_set(int fd,unsigned int * evts)596 static inline void hap_fd_set(int fd, unsigned int *evts)
597 {
598 _HA_ATOMIC_OR(&evts[fd / (8*sizeof(*evts))], 1U << (fd & (8*sizeof(*evts) - 1)));
599 }
600
hap_fd_clr(int fd,unsigned int * evts)601 static inline void hap_fd_clr(int fd, unsigned int *evts)
602 {
603 _HA_ATOMIC_AND(&evts[fd / (8*sizeof(*evts))], ~(1U << (fd & (8*sizeof(*evts) - 1))));
604 }
605
hap_fd_isset(int fd,unsigned int * evts)606 static inline unsigned int hap_fd_isset(int fd, unsigned int *evts)
607 {
608 return evts[fd / (8*sizeof(*evts))] & (1U << (fd & (8*sizeof(*evts) - 1)));
609 }
610
wake_thread(int tid)611 static inline void wake_thread(int tid)
612 {
613 char c = 'c';
614
615 shut_your_big_mouth_gcc(write(poller_wr_pipe[tid], &c, 1));
616 }
617
618
619 #endif /* _PROTO_FD_H */
620
621 /*
622 * Local variables:
623 * c-indent-level: 8
624 * c-basic-offset: 8
625 * End:
626 */
627