xref: /illumos-gate/usr/src/uts/common/fs/portfs/port_fd.c (revision 09295472)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stat.h>
32 #include <sys/errno.h>
33 #include <sys/kmem.h>
34 #include <sys/sysmacros.h>
35 #include <sys/debug.h>
36 #include <sys/poll_impl.h>
37 #include <sys/port_impl.h>
38 
39 #define	PORTHASH_START	256	/* start cache space for events */
40 #define	PORTHASH_MULT	2	/* growth threshold and factor */
41 
42 /* local functions */
43 static int	port_fd_callback(void *, int *, pid_t, int, void *);
44 static int	port_bind_pollhead(pollhead_t **, polldat_t *, short *);
45 static void	port_remove_fd_local(portfd_t *, port_fdcache_t *);
46 static void	port_close_sourcefd(void *, int, pid_t, int);
47 static void	port_cache_insert_fd(port_fdcache_t *, polldat_t *);
48 
49 /*
50  * port_fd_callback()
51  * The event port framework uses callback functions to notify associated
52  * event sources about actions on source specific objects.
53  * The source itself defines the "arg" required to identify the object with
54  * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t
55  * structure. The portfd_t structure is specific for PORT_SOURCE_FD source.
56  * The port_fd_callback() function is notified in three cases:
57  * - PORT_CALLBACK_DEFAULT
58  *	The object (fd) will be delivered to the application.
59  * - PORT_CALLBACK_DISSOCIATE
60  *	The object (fd) will be dissociated from  the port.
61  * - PORT_CALLBACK_CLOSE
62  *	The object (fd) will be dissociated from the port because the port
63  *	is being closed.
64  * A fd is shareable between processes only when
65  * - processes have the same fd id and
66  * - processes have the same fp.
67  * A fd becomes shareable:
68  * - on fork() across parent and child process and
69  * - when I_SENDFD is used to pass file descriptors between parent and child
70  *   immediately after fork() (the sender and receiver must get the same
71  *   file descriptor id).
72  * If a fd is shared between processes, all involved processes will get
73  * the same rights related to re-association of the fd with the port and
74  * retrieve of events from that fd.
75  * The process which associated the fd with a port for the first time
76  * becomes also the owner of the association. Only the owner of the
77  * association is allowed to dissociate the fd from the port.
78  */
79 /* ARGSUSED */
80 static int
81 port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
82 {
83 	portfd_t	*pfd = (portfd_t *)arg;
84 	polldat_t	*pdp = PFTOD(pfd);
85 	port_fdcache_t	*pcp;
86 	file_t		*fp;
87 	int		error;
88 
89 	ASSERT((pdp != NULL) && (events != NULL));
90 	switch (flag) {
91 	case PORT_CALLBACK_DEFAULT:
92 		if (curproc->p_pid != pid) {
93 			/*
94 			 * Check if current process is allowed to retrieve
95 			 * events from this fd.
96 			 */
97 			fp = getf(pdp->pd_fd);
98 			if (fp == NULL) {
99 				error = EACCES; /* deny delivery of events */
100 				break;
101 			}
102 			releasef(pdp->pd_fd);
103 			if (fp != pdp->pd_fp) {
104 				error = EACCES; /* deny delivery of events */
105 				break;
106 			}
107 		}
108 		*events = pdp->pd_portev->portkev_events; /* update events */
109 		error = 0;
110 		break;
111 	case PORT_CALLBACK_DISSOCIATE:
112 		error = 0;
113 		break;
114 	case PORT_CALLBACK_CLOSE:
115 		/* remove polldat/portfd struct */
116 		pdp->pd_portev = NULL;
117 		pcp = (port_fdcache_t *)pdp->pd_pcache;
118 		mutex_enter(&pcp->pc_lock);
119 		pdp->pd_fp = NULL;
120 		pdp->pd_events = 0;
121 		if (pdp->pd_php != NULL) {
122 			pollhead_delete(pdp->pd_php, pdp);
123 			pdp->pd_php = NULL;
124 		}
125 		port_pcache_remove_fd(pcp, pfd);
126 		mutex_exit(&pcp->pc_lock);
127 		error = 0;
128 		break;
129 	default:
130 		error = EINVAL;
131 		break;
132 	}
133 	return (error);
134 }
135 
136 /*
137  * This routine returns a pointer to a cached poll fd entry, or NULL if it
138  * does not find it in the hash table.
139  * The fd is used as index.
140  * The fd and the fp are used to detect a valid entry.
141  * This function returns a pointer to a valid portfd_t structure only when
142  * the fd and the fp in the args match the entries in polldat_t.
143  */
144 portfd_t *
145 port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp)
146 {
147 	polldat_t	*pdp;
148 	portfd_t	**bucket;
149 
150 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
151 	bucket = PORT_FD_BUCKET(pcp, fd);
152 	pdp = PFTOD(*bucket);
153 	while (pdp != NULL) {
154 		if (pdp->pd_fd == fd && pdp->pd_fp == fp)
155 			break;
156 		pdp = pdp->pd_hashnext;
157 	}
158 	return (PDTOF(pdp));
159 }
160 
161 /*
162  * port_associate_fd()
163  * This function associates new file descriptors with a port or
164  * reactivate already associated file descriptors.
165  * The reactivation also updates the events types to be checked and the
166  * attached user pointer.
167  * Per port a cache is used to store associated file descriptors.
168  * Internally the VOP_POLL interface is used to poll for existing events.
169  * The VOP_POLL interface can also deliver a pointer to a pollhead_t structure
170  * which is used to enqueue polldat_t structures with pending events.
171  * If VOP_POLL immediately returns valid events (revents) then those events
172  * will be submitted to the event port with port_send_event().
173  * Otherwise VOP_POLL does not return events but it delivers a pointer to a
174  * pollhead_t structure. In such a case the corresponding file system behind
175  * VOP_POLL will use the pollwakeup() function to notify about exisiting
176  * events.
177  */
178 int
179 port_associate_fd(port_t *pp, int source, uintptr_t object, int events,
180     void *user)
181 {
182 	port_fdcache_t	*pcp;
183 	int		fd;
184 	struct pollhead	*php = NULL;
185 	portfd_t	*pfd;
186 	polldat_t	*pdp;
187 	file_t		*fp;
188 	port_kevent_t	*pkevp;
189 	short		revents;
190 	int		error = 0;
191 
192 	pcp = pp->port_queue.portq_pcp;
193 	if (object > (uintptr_t)INT_MAX)
194 		return (EBADFD);
195 
196 	fd = object;
197 
198 	if ((fp = getf(fd)) == NULL)
199 		return (EBADFD);
200 
201 	mutex_enter(&pcp->pc_lock);
202 	if (pcp->pc_hash == NULL) {
203 		/*
204 		 * This is the first time that a fd is being associated with
205 		 * the current port:
206 		 * - create PORT_SOURCE_FD cache
207 		 * - associate PORT_SOURCE_FD source with the port
208 		 */
209 		error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD,
210 		    NULL, port_close_sourcefd, pp, NULL);
211 		if (error) {
212 			mutex_exit(&pcp->pc_lock);
213 			releasef(fd);
214 			return (error);
215 		}
216 
217 		/* create polldat cache */
218 		pcp->pc_hashsize = PORTHASH_START;
219 		pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize *
220 		    sizeof (portfd_t *), KM_SLEEP);
221 		pfd = NULL;
222 	} else {
223 		/* Check if the fd/fp is already associated with the port */
224 		pfd = port_cache_lookup_fp(pcp, fd, fp);
225 	}
226 
227 	if (pfd == NULL) {
228 		/*
229 		 * new entry
230 		 * Allocate a polldat_t structure per fd
231 		 * The use of the polldat_t structure to cache file descriptors
232 		 * is required to be able to share the pollwakeup() function
233 		 * with poll(2) and devpoll(7d).
234 		 */
235 		pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP);
236 		pdp = PFTOD(pfd);
237 		pdp->pd_fd = fd;
238 		pdp->pd_fp = fp;
239 		pdp->pd_pcache = (void *)pcp;
240 
241 		/* Allocate a port event structure per fd */
242 		error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED,
243 		    &pdp->pd_portev);
244 		if (error) {
245 			kmem_free(pfd, sizeof (portfd_t));
246 			releasef(fd);
247 			mutex_exit(&pcp->pc_lock);
248 			return (error);
249 		}
250 		pkevp = pdp->pd_portev;
251 		pkevp->portkev_callback = port_fd_callback;
252 		pkevp->portkev_arg = pfd;
253 
254 		/* add portfd_t entry  to the cache */
255 		port_cache_insert_fd(pcp, pdp);
256 		pkevp->portkev_object = fd;
257 		pkevp->portkev_user = user;
258 
259 		/*
260 		 * Add current port to the file descriptor interested list
261 		 * The members of the list are notified when the file descriptor
262 		 * is closed.
263 		 */
264 		addfd_port(fd, pfd);
265 	} else {
266 		/*
267 		 * The file descriptor is already associated with the port
268 		 */
269 		pdp = PFTOD(pfd);
270 		pkevp = pdp->pd_portev;
271 
272 		/*
273 		 * Check if the re-association happens before the last
274 		 * submitted event of the file descriptor was retrieved.
275 		 * Clear the PORT_KEV_VALID flag if set. No new events
276 		 * should get submitted after this flag is cleared.
277 		 */
278 		mutex_enter(&pkevp->portkev_lock);
279 		if (pkevp->portkev_flags & PORT_KEV_VALID) {
280 			pkevp->portkev_flags &= ~PORT_KEV_VALID;
281 		}
282 		if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
283 			mutex_exit(&pkevp->portkev_lock);
284 			/*
285 			 * Remove any events that where already fired
286 			 * for this fd and are still in the port queue.
287 			 */
288 			port_remove_done_event(pkevp);
289 		} else {
290 			mutex_exit(&pkevp->portkev_lock);
291 		}
292 		pkevp->portkev_user = user;
293 	}
294 
295 	mutex_enter(&pkevp->portkev_lock);
296 	pkevp->portkev_events = 0;	/* no fired events */
297 	pdp->pd_events = events;	/* events associated */
298 	/*
299 	 * allow new events.
300 	 */
301 	pkevp->portkev_flags |= PORT_KEV_VALID;
302 	mutex_exit(&pkevp->portkev_lock);
303 
304 	/*
305 	 * do VOP_POLL and cache this poll fd.
306 	 *
307 	 * XXX - pollrelock() logic needs to know
308 	 * which pollcache lock to grab. It'd be a
309 	 * cleaner solution if we could pass pcp as
310 	 * an arguement in VOP_POLL interface instead
311 	 * of implicitly passing it using thread_t
312 	 * struct. On the other hand, changing VOP_POLL
313 	 * interface will require all driver/file system
314 	 * poll routine to change.
315 	 */
316 	curthread->t_pollcache = (pollcache_t *)pcp;
317 	error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php);
318 	curthread->t_pollcache = NULL;
319 
320 	/*
321 	 * To keep synchronization between VOP_POLL above and
322 	 * pollhead_insert below, it is necessary to
323 	 * call VOP_POLL() again (see port_bind_pollhead()).
324 	 */
325 	if (error) {
326 		/* dissociate the fd from the port */
327 		delfd_port(fd, pfd);
328 		port_remove_fd_local(pfd, pcp);
329 		releasef(fd);
330 		mutex_exit(&pcp->pc_lock);
331 		return (error);
332 	}
333 
334 	if (php != NULL) {
335 		/*
336 		 * No events delivered yet.
337 		 * Bind pollhead pointer with current polldat_t structure.
338 		 * Sub-system will call pollwakeup() later with php as
339 		 * argument.
340 		 */
341 		error = port_bind_pollhead(&php, pdp, &revents);
342 		if (error) {
343 			delfd_port(fd, pfd);
344 			port_remove_fd_local(pfd, pcp);
345 			releasef(fd);
346 			mutex_exit(&pcp->pc_lock);
347 			return (error);
348 		}
349 	}
350 
351 	/*
352 	 * Check if new events where detected and no events have been
353 	 * delivered. The revents was already set after the VOP_POLL
354 	 * above or it was updated in port_bind_pollhead().
355 	 */
356 	mutex_enter(&pkevp->portkev_lock);
357 	if (revents && (pkevp->portkev_flags & PORT_KEV_VALID)) {
358 		ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0);
359 		pkevp->portkev_flags &= ~PORT_KEV_VALID;
360 		revents = revents & (pdp->pd_events | POLLHUP | POLLERR);
361 		/* send events to the event port */
362 		pkevp->portkev_events = revents;
363 		/*
364 		 * port_send_event will release the portkev_lock mutex.
365 		 */
366 		port_send_event(pkevp);
367 	} else {
368 		mutex_exit(&pkevp->portkev_lock);
369 	}
370 
371 	releasef(fd);
372 	mutex_exit(&pcp->pc_lock);
373 	return (error);
374 }
375 
376 /*
377  * The port_dissociate_fd() function dissociates the delivered file
378  * descriptor from the event port and removes already fired events.
379  * If a fd is shared between processes, all involved processes will get
380  * the same rights related to re-association of the fd with the port and
381  * retrieve of events from that fd.
382  * The process which associated the fd with a port for the first time
383  * becomes also the owner of the association. Only the owner of the
384  * association is allowed to dissociate the fd from the port.
385  */
386 int
387 port_dissociate_fd(port_t *pp, uintptr_t object)
388 {
389 	int		fd;
390 	port_fdcache_t	*pcp;
391 	portfd_t	*pfd;
392 	file_t		*fp;
393 
394 	if (object > (uintptr_t)INT_MAX)
395 		return (EBADFD);
396 
397 	fd = object;
398 	pcp = pp->port_queue.portq_pcp;
399 
400 	mutex_enter(&pcp->pc_lock);
401 	if (pcp->pc_hash == NULL) {
402 		/* no file descriptor cache available */
403 		mutex_exit(&pcp->pc_lock);
404 		return (0);
405 	}
406 	if ((fp = getf(fd)) == NULL) {
407 		mutex_exit(&pcp->pc_lock);
408 		return (EBADFD);
409 	}
410 	pfd = port_cache_lookup_fp(pcp, fd, fp);
411 	if (pfd == NULL) {
412 		releasef(fd);
413 		mutex_exit(&pcp->pc_lock);
414 		return (0);
415 	}
416 	/* only association owner is allowed to remove the association */
417 	if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) {
418 		releasef(fd);
419 		mutex_exit(&pcp->pc_lock);
420 		return (EACCES);
421 	}
422 
423 	/* remove port from the file descriptor interested list */
424 	delfd_port(fd, pfd);
425 	releasef(fd);
426 
427 	/* remove polldat & port event structure */
428 	port_remove_fd_object(pfd, pp, pcp);
429 	mutex_exit(&pcp->pc_lock);
430 	return (0);
431 }
432 
433 /*
434  * Remove the fd from the event port cache.
435  */
436 static void
437 port_remove_fd_local(portfd_t *pfd, port_fdcache_t *pcp)
438 {
439 	polldat_t	*pdp = PFTOD(pfd);
440 
441 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
442 	pdp->pd_fp = NULL;
443 	if (pdp->pd_php != NULL) {
444 		pollhead_delete(pdp->pd_php, pdp);
445 		pdp->pd_php = NULL;
446 	}
447 	port_free_event_local(pdp->pd_portev, 0);
448 	/* remove polldat struct */
449 	port_pcache_remove_fd(pcp, pfd);
450 }
451 
452 /*
453  * Associate event port polldat_t structure with sub-system pointer to
454  * a polhead_t structure.
455  */
456 static int
457 port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents)
458 {
459 	int		error;
460 	file_t		*fp;
461 
462 	/*
463 	 * During re-association of a fd with a port the pd_php pointer
464 	 * is still the same as at the first association time.
465 	 */
466 	if (pdp->pd_php == *php)
467 		return (0);		/* already associated */
468 
469 	/* polldat_t associated with another pollhead_t pointer */
470 	if (pdp->pd_php != NULL)
471 		pollhead_delete(pdp->pd_php, pdp);
472 
473 	/*
474 	 * Before pollhead_insert() pollwakeup() will not detect a polldat
475 	 * entry in the ph_list and the event notification will disappear.
476 	 * This happens because polldat_t is still not associated with
477 	 * the pointer to the pollhead_t structure.
478 	 */
479 	pollhead_insert(*php, pdp);
480 
481 	/*
482 	 * From now on event notification can be detected in pollwakeup(),
483 	 * Use VOP_POLL() again to check the current status of the event.
484 	 */
485 	pdp->pd_php = *php;
486 	fp = pdp->pd_fp;
487 	curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache;
488 	error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, revents, php);
489 	curthread->t_pollcache = NULL;
490 	return (error);
491 }
492 
493 /*
494  * Grow the hash table. Rehash all the elements on the hash table.
495  */
496 static void
497 port_cache_grow_hashtbl(port_fdcache_t *pcp)
498 {
499 	portfd_t	**oldtbl;
500 	polldat_t	*pdp;
501 	portfd_t	*pfd;
502 	polldat_t	*pdp1;
503 	int		oldsize;
504 	int		i;
505 
506 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
507 	oldsize = pcp->pc_hashsize;
508 	oldtbl = pcp->pc_hash;
509 	pcp->pc_hashsize *= PORTHASH_MULT;
510 	pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *),
511 	    KM_SLEEP);
512 	/*
513 	 * rehash existing elements
514 	 */
515 	pcp->pc_fdcount = 0;
516 	for (i = 0; i < oldsize; i++) {
517 		pfd = oldtbl[i];
518 		pdp = PFTOD(pfd);
519 		while (pdp != NULL) {
520 			pdp1 = pdp->pd_hashnext;
521 			port_cache_insert_fd(pcp, pdp);
522 			pdp = pdp1;
523 		}
524 	}
525 	kmem_free(oldtbl, oldsize * sizeof (portfd_t *));
526 }
527 /*
528  * This routine inserts a polldat into the portcache's hash table. It
529  * may be necessary to grow the size of the hash table.
530  */
531 static void
532 port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp)
533 {
534 	portfd_t	**bucket;
535 
536 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
537 	if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT))
538 		port_cache_grow_hashtbl(pcp);
539 	bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd);
540 	pdp->pd_hashnext = PFTOD(*bucket);
541 	*bucket = PDTOF(pdp);
542 	pcp->pc_fdcount++;
543 }
544 
545 
546 /*
547  * The port_remove_portfd() function dissociates the port from the fd
548  * and vive versa.
549  */
550 static void
551 port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp)
552 {
553 	port_t	*pp;
554 	file_t	*fp;
555 
556 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
557 	pp = pdp->pd_portev->portkev_port;
558 	fp = getf(pdp->pd_fd);
559 	/*
560 	 * If we did not get the fp for pd_fd but its portfd_t
561 	 * still exist in the cache, it means the pd_fd is being
562 	 * closed by some other thread which will also free the portfd_t.
563 	 */
564 	if (fp != NULL) {
565 		delfd_port(pdp->pd_fd, PDTOF(pdp));
566 		releasef(pdp->pd_fd);
567 		port_remove_fd_object(PDTOF(pdp), pp, pcp);
568 	}
569 }
570 
571 /*
572  * This function is used by port_close_sourcefd() to destroy the cache
573  * on last close.
574  */
575 static void
576 port_pcache_destroy(port_fdcache_t *pcp)
577 {
578 	ASSERT(pcp->pc_fdcount == 0);
579 	kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize);
580 	mutex_destroy(&pcp->pc_lock);
581 	kmem_free(pcp, sizeof (port_fdcache_t));
582 }
583 
584 /*
585  * port_close() calls this function to request the PORT_SOURCE_FD source
586  * to remove/free all resources allocated and associated with the port.
587  */
588 /* ARGSUSED */
589 static void
590 port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose)
591 {
592 	port_t		*pp = arg;
593 	port_fdcache_t	*pcp;
594 	portfd_t	**hashtbl;
595 	polldat_t	*pdp;
596 	polldat_t	*pdpnext;
597 	int		index;
598 
599 	pcp = pp->port_queue.portq_pcp;
600 	if (pcp == NULL)
601 		/* no cache available -> nothing to do */
602 		return;
603 
604 	mutex_enter(&pcp->pc_lock);
605 	/*
606 	 * Scan the cache and free all allocated portfd_t and port_kevent_t
607 	 * structures.
608 	 */
609 	hashtbl = pcp->pc_hash;
610 	for (index = 0; index < pcp->pc_hashsize; index++) {
611 		for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) {
612 			pdpnext = pdp->pd_hashnext;
613 			if (pid == pdp->pd_portev->portkev_pid) {
614 				/*
615 				 * remove polldat + port_event_t from cache
616 				 * only when current process did the
617 				 * association.
618 				 */
619 				port_remove_portfd(pdp, pcp);
620 			}
621 		}
622 	}
623 	if (lastclose) {
624 		/*
625 		 * Wait for all the portfd's to be freed.
626 		 * The remaining portfd_t's are the once we did not
627 		 * free in port_remove_portfd since some other thread
628 		 * is closing the fd. These threads will free the portfd_t's
629 		 * once we drop the pc_lock mutex.
630 		 */
631 		while (pcp->pc_fdcount) {
632 			(void) cv_wait_sig(&pcp->pc_lclosecv, &pcp->pc_lock);
633 		}
634 		/* event port vnode will be destroyed -> remove everything */
635 		pp->port_queue.portq_pcp = NULL;
636 	}
637 	mutex_exit(&pcp->pc_lock);
638 	/*
639 	 * last close:
640 	 * pollwakeup() can not further interact with this cache
641 	 * (all polldat structs are removed from pollhead entries).
642 	 */
643 	if (lastclose)
644 		port_pcache_destroy(pcp);
645 }
646