xref: /illumos-gate/usr/src/uts/common/fs/portfs/port_fd.c (revision 80ab886d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stat.h>
32 #include <sys/errno.h>
33 #include <sys/kmem.h>
34 #include <sys/sysmacros.h>
35 #include <sys/debug.h>
36 #include <sys/poll_impl.h>
37 #include <sys/port_impl.h>
38 
39 #define	PORTHASH_START	256	/* start cache space for events */
40 #define	PORTHASH_MULT	2	/* growth threshold and factor */
41 
42 /* local functions */
43 static int	port_fd_callback(void *, int *, pid_t, int, void *);
44 static int	port_bind_pollhead(pollhead_t **, polldat_t *, short *);
45 static void	port_remove_fd_local(portfd_t *, port_fdcache_t *);
46 static void	port_close_sourcefd(void *, int, pid_t, int);
47 static void	port_cache_insert_fd(port_fdcache_t *, polldat_t *);
48 
49 /*
50  * port_fd_callback()
51  * The event port framework uses callback functions to notify associated
52  * event sources about actions on source specific objects.
53  * The source itself defines the "arg" required to identify the object with
54  * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t
55  * structure. The portfd_t structure is specific for PORT_SOURCE_FD source.
56  * The port_fd_callback() function is notified in three cases:
57  * - PORT_CALLBACK_DEFAULT
58  *	The object (fd) will be delivered to the application.
59  * - PORT_CALLBACK_DISSOCIATE
60  *	The object (fd) will be dissociated from  the port.
61  * - PORT_CALLBACK_CLOSE
62  *	The object (fd) will be dissociated from the port because the port
63  *	is being closed.
64  * A fd is shareable between processes only when
65  * - processes have the same fd id and
66  * - processes have the same fp.
67  * A fd becomes shareable:
68  * - on fork() across parent and child process and
69  * - when I_SENDFD is used to pass file descriptors between parent and child
70  *   immediately after fork() (the sender and receiver must get the same
71  *   file descriptor id).
72  * If a fd is shared between processes, all involved processes will get
73  * the same rights related to re-association of the fd with the port and
74  * retrieve of events from that fd.
75  * The process which associated the fd with a port for the first time
76  * becomes also the owner of the association. Only the owner of the
77  * association is allowed to dissociate the fd from the port.
78  */
79 /* ARGSUSED */
80 static int
81 port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
82 {
83 	portfd_t	*pfd = (portfd_t *)arg;
84 	polldat_t	*pdp = PFTOD(pfd);
85 	port_fdcache_t	*pcp;
86 	file_t		*fp;
87 	int		error;
88 
89 	ASSERT((pdp != NULL) && (events != NULL));
90 	switch (flag) {
91 	case PORT_CALLBACK_DEFAULT:
92 		if (curproc->p_pid != pid) {
93 			/*
94 			 * Check if current process is allowed to retrieve
95 			 * events from this fd.
96 			 */
97 			fp = getf(pdp->pd_fd);
98 			if (fp == NULL) {
99 				error = EACCES; /* deny delivery of events */
100 				break;
101 			}
102 			releasef(pdp->pd_fd);
103 			if (fp != pdp->pd_fp) {
104 				error = EACCES; /* deny delivery of events */
105 				break;
106 			}
107 		}
108 		*events = pdp->pd_portev->portkev_events; /* update events */
109 		error = 0;
110 		break;
111 	case PORT_CALLBACK_DISSOCIATE:
112 		error = 0;
113 		break;
114 	case PORT_CALLBACK_CLOSE:
115 		/* remove polldat/portfd struct */
116 		pdp->pd_portev = NULL;
117 		pcp = (port_fdcache_t *)pdp->pd_pcache;
118 		mutex_enter(&pcp->pc_lock);
119 		pdp->pd_fp = NULL;
120 		pdp->pd_events = 0;
121 		if (pdp->pd_php != NULL) {
122 			pollhead_delete(pdp->pd_php, pdp);
123 			pdp->pd_php = NULL;
124 		}
125 		port_pcache_remove_fd(pcp, pfd);
126 		mutex_exit(&pcp->pc_lock);
127 		error = 0;
128 		break;
129 	default:
130 		error = EINVAL;
131 		break;
132 	}
133 	return (error);
134 }
135 
136 /*
137  * This routine returns a pointer to a cached poll fd entry, or NULL if it
138  * does not find it in the hash table.
139  * The fd is used as index.
140  * The fd and the fp are used to detect a valid entry.
141  * This function returns a pointer to a valid portfd_t structure only when
142  * the fd and the fp in the args match the entries in polldat_t.
143  */
144 portfd_t *
145 port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp)
146 {
147 	polldat_t	*pdp;
148 	portfd_t	**bucket;
149 
150 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
151 	bucket = PORT_FD_BUCKET(pcp, fd);
152 	pdp = PFTOD(*bucket);
153 	while (pdp != NULL) {
154 		if (pdp->pd_fd == fd && pdp->pd_fp == fp)
155 			break;
156 		pdp = pdp->pd_hashnext;
157 	}
158 	return (PDTOF(pdp));
159 }
160 
161 /*
162  * port_associate_fd()
163  * This function associates new file descriptors with a port or
164  * reactivate already associated file descriptors.
165  * The reactivation also updates the events types to be checked and the
166  * attached user pointer.
167  * Per port a cache is used to store associated file descriptors.
168  * Internally the VOP_POLL interface is used to poll for existing events.
169  * The VOP_POLL interface can also deliver a pointer to a pollhead_t structure
170  * which is used to enqueue polldat_t structures with pending events.
171  * If VOP_POLL immediately returns valid events (revents) then those events
172  * will be submitted to the event port with port_send_event().
173  * Otherwise VOP_POLL does not return events but it delivers a pointer to a
174  * pollhead_t structure. In such a case the corresponding file system behind
175  * VOP_POLL will use the pollwakeup() function to notify about exisiting
176  * events.
177  */
178 int
179 port_associate_fd(port_t *pp, int source, uintptr_t object, int events,
180     void *user)
181 {
182 	port_fdcache_t	*pcp;
183 	int		fd;
184 	struct pollhead	*php = NULL;
185 	portfd_t	*pfd;
186 	polldat_t	*pdp;
187 	file_t		*fp;
188 	port_kevent_t	*pkevp;
189 	short		revents;
190 	int		error = 0;
191 
192 	pcp = pp->port_queue.portq_pcp;
193 	if (object > (uintptr_t)INT_MAX)
194 		return (EBADFD);
195 
196 	fd = object;
197 
198 	if ((fp = getf(fd)) == NULL)
199 		return (EBADFD);
200 
201 	mutex_enter(&pcp->pc_lock);
202 	if (pcp->pc_hash == NULL) {
203 		/*
204 		 * This is the first time that a fd is being associated with
205 		 * the current port:
206 		 * - create PORT_SOURCE_FD cache
207 		 * - associate PORT_SOURCE_FD source with the port
208 		 */
209 		error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD,
210 		    NULL, port_close_sourcefd, pp, NULL);
211 		if (error) {
212 			mutex_exit(&pcp->pc_lock);
213 			releasef(fd);
214 			return (error);
215 		}
216 
217 		/* create polldat cache */
218 		pcp->pc_hashsize = PORTHASH_START;
219 		pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize *
220 		    sizeof (portfd_t *), KM_SLEEP);
221 		pfd = NULL;
222 	} else {
223 		/* Check if the fd/fp is already associated with the port */
224 		pfd = port_cache_lookup_fp(pcp, fd, fp);
225 	}
226 
227 	if (pfd == NULL) {
228 		/*
229 		 * new entry
230 		 * Allocate a polldat_t structure per fd
231 		 * The use of the polldat_t structure to cache file descriptors
232 		 * is required to be able to share the pollwakeup() function
233 		 * with poll(2) and devpoll(7d).
234 		 */
235 		pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP);
236 		pdp = PFTOD(pfd);
237 		pdp->pd_fd = fd;
238 		pdp->pd_fp = fp;
239 		pdp->pd_pcache = (void *)pcp;
240 
241 		/* Allocate a port event structure per fd */
242 		error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED,
243 		    &pdp->pd_portev);
244 		if (error) {
245 			kmem_free(pfd, sizeof (portfd_t));
246 			releasef(fd);
247 			mutex_exit(&pcp->pc_lock);
248 			return (error);
249 		}
250 		pkevp = pdp->pd_portev;
251 		pkevp->portkev_callback = port_fd_callback;
252 		pkevp->portkev_arg = pfd;
253 
254 		/* add portfd_t entry  to the cache */
255 		port_cache_insert_fd(pcp, pdp);
256 		pkevp->portkev_object = fd;
257 		pkevp->portkev_user = user;
258 
259 		/*
260 		 * Add current port to the file descriptor interested list
261 		 * The members of the list are notified when the file descriptor
262 		 * is closed.
263 		 */
264 		addfd_port(fd, pfd);
265 	} else {
266 		/*
267 		 * The file descriptor is already associated with the port
268 		 */
269 		pdp = PFTOD(pfd);
270 		pkevp = pdp->pd_portev;
271 
272 		/*
273 		 * Check if the re-association happens before the last
274 		 * submitted event of the file descriptor was retrieved.
275 		 * Clear the PORT_KEV_VALID flag if set. No new events
276 		 * should get submitted after this flag is cleared.
277 		 */
278 		mutex_enter(&pkevp->portkev_lock);
279 		if (pkevp->portkev_flags & PORT_KEV_VALID) {
280 			pkevp->portkev_flags &= ~PORT_KEV_VALID;
281 		}
282 		if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
283 			mutex_exit(&pkevp->portkev_lock);
284 			/*
285 			 * Remove any events that where already fired
286 			 * for this fd and are still in the port queue.
287 			 */
288 			port_remove_done_event(pkevp);
289 		} else {
290 			mutex_exit(&pkevp->portkev_lock);
291 		}
292 		pkevp->portkev_user = user;
293 	}
294 
295 	pkevp->portkev_events = 0;	/* no fired events */
296 	pdp->pd_events = events;	/* events associated */
297 
298 	/*
299 	 * do VOP_POLL and cache this poll fd.
300 	 *
301 	 * XXX - pollrelock() logic needs to know
302 	 * which pollcache lock to grab. It'd be a
303 	 * cleaner solution if we could pass pcp as
304 	 * an arguement in VOP_POLL interface instead
305 	 * of implicitly passing it using thread_t
306 	 * struct. On the other hand, changing VOP_POLL
307 	 * interface will require all driver/file system
308 	 * poll routine to change.
309 	 */
310 	curthread->t_pollcache = (pollcache_t *)pcp;
311 	error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php);
312 	curthread->t_pollcache = NULL;
313 
314 	/*
315 	 * To keep synchronization between VOP_POLL above and
316 	 * pollhead_insert below, it is necessary to
317 	 * call VOP_POLL() again (see port_bind_pollhead()).
318 	 */
319 	if (error) {
320 		/* dissociate the fd from the port */
321 		delfd_port(fd, pfd);
322 		port_remove_fd_local(pfd, pcp);
323 		releasef(fd);
324 		mutex_exit(&pcp->pc_lock);
325 		return (error);
326 	}
327 
328 	if (php != NULL) {
329 		/*
330 		 * No events delivered yet.
331 		 * Bind pollhead pointer with current polldat_t structure.
332 		 * Sub-system will call pollwakeup() later with php as
333 		 * argument.
334 		 */
335 		error = port_bind_pollhead(&php, pdp, &revents);
336 		if (error) {
337 			delfd_port(fd, pfd);
338 			port_remove_fd_local(pfd, pcp);
339 			releasef(fd);
340 			mutex_exit(&pcp->pc_lock);
341 			return (error);
342 		}
343 	}
344 
345 	/*
346 	 * Check if events detected.
347 	 * revents was already set after the VOP_POLL above or
348 	 * it was updated in port_bind_pollhead().
349 	 */
350 	mutex_enter(&pkevp->portkev_lock);
351 	if (revents) {
352 		ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0);
353 		revents = revents & (pdp->pd_events | POLLHUP | POLLERR);
354 		/* send events to the event port */
355 		pkevp->portkev_events = revents;
356 		/*
357 		 * port_send_event will release the portkev_lock mutex.
358 		 */
359 		(void) port_send_event(pkevp);
360 	} else {
361 		/*
362 		 * events can be submitted
363 		 */
364 		pkevp->portkev_flags |= PORT_KEV_VALID;
365 		mutex_exit(&pkevp->portkev_lock);
366 	}
367 
368 	releasef(fd);
369 	mutex_exit(&pcp->pc_lock);
370 	return (error);
371 }
372 
373 /*
374  * The port_dissociate_fd() function dissociates the delivered file
375  * descriptor from the event port and removes already fired events.
376  * If a fd is shared between processes, all involved processes will get
377  * the same rights related to re-association of the fd with the port and
378  * retrieve of events from that fd.
379  * The process which associated the fd with a port for the first time
380  * becomes also the owner of the association. Only the owner of the
381  * association is allowed to dissociate the fd from the port.
382  */
383 int
384 port_dissociate_fd(port_t *pp, uintptr_t object)
385 {
386 	int		fd;
387 	port_fdcache_t	*pcp;
388 	portfd_t	*pfd;
389 	file_t		*fp;
390 
391 	if (object > (uintptr_t)INT_MAX)
392 		return (EBADFD);
393 
394 	fd = object;
395 	pcp = pp->port_queue.portq_pcp;
396 
397 	mutex_enter(&pcp->pc_lock);
398 	if (pcp->pc_hash == NULL) {
399 		/* no file descriptor cache available */
400 		mutex_exit(&pcp->pc_lock);
401 		return (0);
402 	}
403 	if ((fp = getf(fd)) == NULL) {
404 		mutex_exit(&pcp->pc_lock);
405 		return (EBADFD);
406 	}
407 	pfd = port_cache_lookup_fp(pcp, fd, fp);
408 	if (pfd == NULL) {
409 		releasef(fd);
410 		mutex_exit(&pcp->pc_lock);
411 		return (0);
412 	}
413 	/* only association owner is allowed to remove the association */
414 	if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) {
415 		releasef(fd);
416 		mutex_exit(&pcp->pc_lock);
417 		return (EACCES);
418 	}
419 
420 	/* remove port from the file descriptor interested list */
421 	delfd_port(fd, pfd);
422 	releasef(fd);
423 
424 	/* remove polldat & port event structure */
425 	port_remove_fd_object(pfd, pp, pcp);
426 	mutex_exit(&pcp->pc_lock);
427 	return (0);
428 }
429 
430 /*
431  * Remove the fd from the event port cache.
432  */
433 static void
434 port_remove_fd_local(portfd_t *pfd, port_fdcache_t *pcp)
435 {
436 	polldat_t	*pdp = PFTOD(pfd);
437 
438 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
439 	pdp->pd_fp = NULL;
440 	if (pdp->pd_php != NULL) {
441 		pollhead_delete(pdp->pd_php, pdp);
442 		pdp->pd_php = NULL;
443 	}
444 	port_free_event_local(pdp->pd_portev, 0);
445 	/* remove polldat struct */
446 	port_pcache_remove_fd(pcp, pfd);
447 }
448 
449 /*
450  * Associate event port polldat_t structure with sub-system pointer to
451  * a polhead_t structure.
452  */
453 static int
454 port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents)
455 {
456 	int		error;
457 	file_t		*fp;
458 
459 	/*
460 	 * During re-association of a fd with a port the pd_php pointer
461 	 * is still the same as at the first association time.
462 	 */
463 	if (pdp->pd_php == *php)
464 		return (0);		/* already associated */
465 
466 	/* polldat_t associated with another pollhead_t pointer */
467 	if (pdp->pd_php != NULL)
468 		pollhead_delete(pdp->pd_php, pdp);
469 
470 	/*
471 	 * Before pollhead_insert() pollwakeup() will not detect a polldat
472 	 * entry in the ph_list and the event notification will disappear.
473 	 * This happens because polldat_t is still not associated with
474 	 * the pointer to the pollhead_t structure.
475 	 */
476 	pollhead_insert(*php, pdp);
477 
478 	/*
479 	 * From now on event notification can be detected in pollwakeup(),
480 	 * Use VOP_POLL() again to check the current status of the event.
481 	 */
482 	pdp->pd_php = *php;
483 	fp = pdp->pd_fp;
484 	curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache;
485 	error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, revents, php);
486 	curthread->t_pollcache = NULL;
487 	return (error);
488 }
489 
490 /*
491  * Grow the hash table. Rehash all the elements on the hash table.
492  */
493 static void
494 port_cache_grow_hashtbl(port_fdcache_t *pcp)
495 {
496 	portfd_t	**oldtbl;
497 	polldat_t	*pdp;
498 	portfd_t	*pfd;
499 	polldat_t	*pdp1;
500 	int		oldsize;
501 	int		i;
502 
503 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
504 	oldsize = pcp->pc_hashsize;
505 	oldtbl = pcp->pc_hash;
506 	pcp->pc_hashsize *= PORTHASH_MULT;
507 	pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *),
508 	    KM_SLEEP);
509 	/*
510 	 * rehash existing elements
511 	 */
512 	pcp->pc_fdcount = 0;
513 	for (i = 0; i < oldsize; i++) {
514 		pfd = oldtbl[i];
515 		pdp = PFTOD(pfd);
516 		while (pdp != NULL) {
517 			pdp1 = pdp->pd_hashnext;
518 			port_cache_insert_fd(pcp, pdp);
519 			pdp = pdp1;
520 		}
521 	}
522 	kmem_free(oldtbl, oldsize * sizeof (portfd_t *));
523 }
524 /*
525  * This routine inserts a polldat into the portcache's hash table. It
526  * may be necessary to grow the size of the hash table.
527  */
528 static void
529 port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp)
530 {
531 	portfd_t	**bucket;
532 
533 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
534 	if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT))
535 		port_cache_grow_hashtbl(pcp);
536 	bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd);
537 	pdp->pd_hashnext = PFTOD(*bucket);
538 	*bucket = PDTOF(pdp);
539 	pcp->pc_fdcount++;
540 }
541 
542 
543 /*
544  * The port_remove_portfd() function dissociates the port from the fd
545  * and vive versa.
546  */
547 static void
548 port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp)
549 {
550 	port_t	*pp;
551 	file_t	*fp;
552 
553 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
554 	pp = pdp->pd_portev->portkev_port;
555 	fp = getf(pdp->pd_fd);
556 	/*
557 	 * If we did not get the fp for pd_fd but its portfd_t
558 	 * still exist in the cache, it means the pd_fd is being
559 	 * closed by some other thread which will also free the portfd_t.
560 	 */
561 	if (fp != NULL) {
562 		delfd_port(pdp->pd_fd, PDTOF(pdp));
563 		releasef(pdp->pd_fd);
564 		port_remove_fd_object(PDTOF(pdp), pp, pcp);
565 	}
566 }
567 
568 /*
569  * This function is used by port_close_sourcefd() to destroy the cache
570  * on last close.
571  */
572 static void
573 port_pcache_destroy(port_fdcache_t *pcp)
574 {
575 	ASSERT(pcp->pc_fdcount == 0);
576 	kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize);
577 	mutex_destroy(&pcp->pc_lock);
578 	kmem_free(pcp, sizeof (port_fdcache_t));
579 }
580 
581 /*
582  * port_close() calls this function to request the PORT_SOURCE_FD source
583  * to remove/free all resources allocated and associated with the port.
584  */
585 /* ARGSUSED */
586 static void
587 port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose)
588 {
589 	port_t		*pp = arg;
590 	port_fdcache_t	*pcp;
591 	portfd_t	**hashtbl;
592 	polldat_t	*pdp;
593 	polldat_t	*pdpnext;
594 	int		index;
595 
596 	pcp = pp->port_queue.portq_pcp;
597 	if (pcp == NULL)
598 		/* no cache available -> nothing to do */
599 		return;
600 
601 	mutex_enter(&pcp->pc_lock);
602 	/*
603 	 * Scan the cache and free all allocated portfd_t and port_kevent_t
604 	 * structures.
605 	 */
606 	hashtbl = pcp->pc_hash;
607 	for (index = 0; index < pcp->pc_hashsize; index++) {
608 		for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) {
609 			pdpnext = pdp->pd_hashnext;
610 			if (pid == pdp->pd_portev->portkev_pid) {
611 				/*
612 				 * remove polldat + port_event_t from cache
613 				 * only when current process did the
614 				 * association.
615 				 */
616 				port_remove_portfd(pdp, pcp);
617 			}
618 		}
619 	}
620 	if (lastclose) {
621 		/*
622 		 * Wait for all the portfd's to be freed.
623 		 * The remaining portfd_t's are the once we did not
624 		 * free in port_remove_portfd since some other thread
625 		 * is closing the fd. These threads will free the portfd_t's
626 		 * once we drop the pc_lock mutex.
627 		 */
628 		while (pcp->pc_fdcount) {
629 			(void) cv_wait_sig(&pcp->pc_lclosecv, &pcp->pc_lock);
630 		}
631 		/* event port vnode will be destroyed -> remove everything */
632 		pp->port_queue.portq_pcp = NULL;
633 	}
634 	mutex_exit(&pcp->pc_lock);
635 	/*
636 	 * last close:
637 	 * pollwakeup() can not further interact with this cache
638 	 * (all polldat structs are removed from pollhead entries).
639 	 */
640 	if (lastclose)
641 		port_pcache_destroy(pcp);
642 }
643