1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stat.h> 32 #include <sys/errno.h> 33 #include <sys/kmem.h> 34 #include <sys/sysmacros.h> 35 #include <sys/debug.h> 36 #include <sys/poll_impl.h> 37 #include <sys/port_impl.h> 38 39 #define PORTHASH_START 256 /* start cache space for events */ 40 #define PORTHASH_MULT 2 /* growth threshold and factor */ 41 42 /* local functions */ 43 static int port_fd_callback(void *, int *, pid_t, int, void *); 44 static int port_bind_pollhead(pollhead_t **, polldat_t *, short *); 45 static void port_remove_fd_local(portfd_t *, port_fdcache_t *); 46 static void port_close_sourcefd(void *, int, pid_t, int); 47 static void port_cache_insert_fd(port_fdcache_t *, polldat_t *); 48 49 /* 50 * port_fd_callback() 51 * The event port framework uses callback functions to notify associated 52 * event sources about actions on source specific objects. 53 * The source itself defines the "arg" required to identify the object with 54 * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t 55 * structure. The portfd_t structure is specific for PORT_SOURCE_FD source. 56 * The port_fd_callback() function is notified in three cases: 57 * - PORT_CALLBACK_DEFAULT 58 * The object (fd) will be delivered to the application. 59 * - PORT_CALLBACK_DISSOCIATE 60 * The object (fd) will be dissociated from the port. 61 * - PORT_CALLBACK_CLOSE 62 * The object (fd) will be dissociated from the port because the port 63 * is being closed. 64 * A fd is shareable between processes only when 65 * - processes have the same fd id and 66 * - processes have the same fp. 67 * A fd becomes shareable: 68 * - on fork() across parent and child process and 69 * - when I_SENDFD is used to pass file descriptors between parent and child 70 * immediately after fork() (the sender and receiver must get the same 71 * file descriptor id). 72 * If a fd is shared between processes, all involved processes will get 73 * the same rights related to re-association of the fd with the port and 74 * retrieve of events from that fd. 75 * The process which associated the fd with a port for the first time 76 * becomes also the owner of the association. Only the owner of the 77 * association is allowed to dissociate the fd from the port. 78 */ 79 /* ARGSUSED */ 80 static int 81 port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 82 { 83 portfd_t *pfd = (portfd_t *)arg; 84 polldat_t *pdp = PFTOD(pfd); 85 port_fdcache_t *pcp; 86 file_t *fp; 87 int error; 88 89 ASSERT((pdp != NULL) && (events != NULL)); 90 switch (flag) { 91 case PORT_CALLBACK_DEFAULT: 92 if (curproc->p_pid != pid) { 93 /* 94 * Check if current process is allowed to retrieve 95 * events from this fd. 96 */ 97 fp = getf(pdp->pd_fd); 98 if (fp == NULL) { 99 error = EACCES; /* deny delivery of events */ 100 break; 101 } 102 releasef(pdp->pd_fd); 103 if (fp != pdp->pd_fp) { 104 error = EACCES; /* deny delivery of events */ 105 break; 106 } 107 } 108 *events = pdp->pd_portev->portkev_events; /* update events */ 109 error = 0; 110 break; 111 case PORT_CALLBACK_DISSOCIATE: 112 error = 0; 113 break; 114 case PORT_CALLBACK_CLOSE: 115 /* remove polldat/portfd struct */ 116 pdp->pd_portev = NULL; 117 pcp = (port_fdcache_t *)pdp->pd_pcache; 118 mutex_enter(&pcp->pc_lock); 119 pdp->pd_fp = NULL; 120 pdp->pd_events = 0; 121 if (pdp->pd_php != NULL) { 122 pollhead_delete(pdp->pd_php, pdp); 123 pdp->pd_php = NULL; 124 } 125 port_pcache_remove_fd(pcp, pfd); 126 mutex_exit(&pcp->pc_lock); 127 error = 0; 128 break; 129 default: 130 error = EINVAL; 131 break; 132 } 133 return (error); 134 } 135 136 /* 137 * This routine returns a pointer to a cached poll fd entry, or NULL if it 138 * does not find it in the hash table. 139 * The fd is used as index. 140 * The fd and the fp are used to detect a valid entry. 141 * This function returns a pointer to a valid portfd_t structure only when 142 * the fd and the fp in the args match the entries in polldat_t. 143 */ 144 portfd_t * 145 port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp) 146 { 147 polldat_t *pdp; 148 portfd_t **bucket; 149 150 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 151 bucket = PORT_FD_BUCKET(pcp, fd); 152 pdp = PFTOD(*bucket); 153 while (pdp != NULL) { 154 if (pdp->pd_fd == fd && pdp->pd_fp == fp) 155 break; 156 pdp = pdp->pd_hashnext; 157 } 158 return (PDTOF(pdp)); 159 } 160 161 /* 162 * port_associate_fd() 163 * This function associates new file descriptors with a port or 164 * reactivate already associated file descriptors. 165 * The reactivation also updates the events types to be checked and the 166 * attached user pointer. 167 * Per port a cache is used to store associated file descriptors. 168 * Internally the VOP_POLL interface is used to poll for existing events. 169 * The VOP_POLL interface can also deliver a pointer to a pollhead_t structure 170 * which is used to enqueue polldat_t structures with pending events. 171 * If VOP_POLL immediately returns valid events (revents) then those events 172 * will be submitted to the event port with port_send_event(). 173 * Otherwise VOP_POLL does not return events but it delivers a pointer to a 174 * pollhead_t structure. In such a case the corresponding file system behind 175 * VOP_POLL will use the pollwakeup() function to notify about exisiting 176 * events. 177 */ 178 int 179 port_associate_fd(port_t *pp, int source, uintptr_t object, int events, 180 void *user) 181 { 182 port_fdcache_t *pcp; 183 int fd; 184 struct pollhead *php = NULL; 185 portfd_t *pfd; 186 polldat_t *pdp; 187 file_t *fp; 188 port_kevent_t *pkevp; 189 short revents; 190 int error = 0; 191 192 pcp = pp->port_queue.portq_pcp; 193 if (object > (uintptr_t)INT_MAX) 194 return (EBADFD); 195 196 fd = object; 197 198 if ((fp = getf(fd)) == NULL) 199 return (EBADFD); 200 201 mutex_enter(&pcp->pc_lock); 202 if (pcp->pc_hash == NULL) { 203 /* 204 * This is the first time that a fd is being associated with 205 * the current port: 206 * - create PORT_SOURCE_FD cache 207 * - associate PORT_SOURCE_FD source with the port 208 */ 209 error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD, 210 NULL, port_close_sourcefd, pp, NULL); 211 if (error) { 212 mutex_exit(&pcp->pc_lock); 213 releasef(fd); 214 return (error); 215 } 216 217 /* create polldat cache */ 218 pcp->pc_hashsize = PORTHASH_START; 219 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * 220 sizeof (portfd_t *), KM_SLEEP); 221 pfd = NULL; 222 } else { 223 /* Check if the fd/fp is already associated with the port */ 224 pfd = port_cache_lookup_fp(pcp, fd, fp); 225 } 226 227 if (pfd == NULL) { 228 /* 229 * new entry 230 * Allocate a polldat_t structure per fd 231 * The use of the polldat_t structure to cache file descriptors 232 * is required to be able to share the pollwakeup() function 233 * with poll(2) and devpoll(7d). 234 */ 235 pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP); 236 pdp = PFTOD(pfd); 237 pdp->pd_fd = fd; 238 pdp->pd_fp = fp; 239 pdp->pd_pcache = (void *)pcp; 240 241 /* Allocate a port event structure per fd */ 242 error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED, 243 &pdp->pd_portev); 244 if (error) { 245 kmem_free(pfd, sizeof (portfd_t)); 246 releasef(fd); 247 mutex_exit(&pcp->pc_lock); 248 return (error); 249 } 250 pkevp = pdp->pd_portev; 251 pkevp->portkev_callback = port_fd_callback; 252 pkevp->portkev_arg = pfd; 253 254 /* add portfd_t entry to the cache */ 255 port_cache_insert_fd(pcp, pdp); 256 pkevp->portkev_object = fd; 257 pkevp->portkev_user = user; 258 259 /* 260 * Add current port to the file descriptor interested list 261 * The members of the list are notified when the file descriptor 262 * is closed. 263 */ 264 addfd_port(fd, pfd); 265 } else { 266 /* 267 * The file descriptor is already associated with the port 268 */ 269 pdp = PFTOD(pfd); 270 pkevp = pdp->pd_portev; 271 272 /* 273 * Check if the re-association happens before the last 274 * submitted event of the file descriptor was retrieved. 275 * Clear the PORT_KEV_VALID flag if set. No new events 276 * should get submitted after this flag is cleared. 277 */ 278 mutex_enter(&pkevp->portkev_lock); 279 if (pkevp->portkev_flags & PORT_KEV_VALID) { 280 pkevp->portkev_flags &= ~PORT_KEV_VALID; 281 } 282 if (pkevp->portkev_flags & PORT_KEV_DONEQ) { 283 mutex_exit(&pkevp->portkev_lock); 284 /* 285 * Remove any events that where already fired 286 * for this fd and are still in the port queue. 287 */ 288 port_remove_done_event(pkevp); 289 } else { 290 mutex_exit(&pkevp->portkev_lock); 291 } 292 pkevp->portkev_user = user; 293 } 294 295 pkevp->portkev_events = 0; /* no fired events */ 296 pdp->pd_events = events; /* events associated */ 297 298 /* 299 * do VOP_POLL and cache this poll fd. 300 * 301 * XXX - pollrelock() logic needs to know 302 * which pollcache lock to grab. It'd be a 303 * cleaner solution if we could pass pcp as 304 * an arguement in VOP_POLL interface instead 305 * of implicitly passing it using thread_t 306 * struct. On the other hand, changing VOP_POLL 307 * interface will require all driver/file system 308 * poll routine to change. 309 */ 310 curthread->t_pollcache = (pollcache_t *)pcp; 311 error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php); 312 curthread->t_pollcache = NULL; 313 314 /* 315 * To keep synchronization between VOP_POLL above and 316 * pollhead_insert below, it is necessary to 317 * call VOP_POLL() again (see port_bind_pollhead()). 318 */ 319 if (error) { 320 /* dissociate the fd from the port */ 321 delfd_port(fd, pfd); 322 port_remove_fd_local(pfd, pcp); 323 releasef(fd); 324 mutex_exit(&pcp->pc_lock); 325 return (error); 326 } 327 328 if (php != NULL) { 329 /* 330 * No events delivered yet. 331 * Bind pollhead pointer with current polldat_t structure. 332 * Sub-system will call pollwakeup() later with php as 333 * argument. 334 */ 335 error = port_bind_pollhead(&php, pdp, &revents); 336 if (error) { 337 delfd_port(fd, pfd); 338 port_remove_fd_local(pfd, pcp); 339 releasef(fd); 340 mutex_exit(&pcp->pc_lock); 341 return (error); 342 } 343 } 344 345 /* 346 * Check if events detected. 347 * revents was already set after the VOP_POLL above or 348 * it was updated in port_bind_pollhead(). 349 */ 350 mutex_enter(&pkevp->portkev_lock); 351 if (revents) { 352 ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0); 353 revents = revents & (pdp->pd_events | POLLHUP | POLLERR); 354 /* send events to the event port */ 355 pkevp->portkev_events = revents; 356 /* 357 * port_send_event will release the portkev_lock mutex. 358 */ 359 (void) port_send_event(pkevp); 360 } else { 361 /* 362 * events can be submitted 363 */ 364 pkevp->portkev_flags |= PORT_KEV_VALID; 365 mutex_exit(&pkevp->portkev_lock); 366 } 367 368 releasef(fd); 369 mutex_exit(&pcp->pc_lock); 370 return (error); 371 } 372 373 /* 374 * The port_dissociate_fd() function dissociates the delivered file 375 * descriptor from the event port and removes already fired events. 376 * If a fd is shared between processes, all involved processes will get 377 * the same rights related to re-association of the fd with the port and 378 * retrieve of events from that fd. 379 * The process which associated the fd with a port for the first time 380 * becomes also the owner of the association. Only the owner of the 381 * association is allowed to dissociate the fd from the port. 382 */ 383 int 384 port_dissociate_fd(port_t *pp, uintptr_t object) 385 { 386 int fd; 387 port_fdcache_t *pcp; 388 portfd_t *pfd; 389 file_t *fp; 390 391 if (object > (uintptr_t)INT_MAX) 392 return (EBADFD); 393 394 fd = object; 395 pcp = pp->port_queue.portq_pcp; 396 397 mutex_enter(&pcp->pc_lock); 398 if (pcp->pc_hash == NULL) { 399 /* no file descriptor cache available */ 400 mutex_exit(&pcp->pc_lock); 401 return (0); 402 } 403 if ((fp = getf(fd)) == NULL) { 404 mutex_exit(&pcp->pc_lock); 405 return (EBADFD); 406 } 407 pfd = port_cache_lookup_fp(pcp, fd, fp); 408 if (pfd == NULL) { 409 releasef(fd); 410 mutex_exit(&pcp->pc_lock); 411 return (0); 412 } 413 /* only association owner is allowed to remove the association */ 414 if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) { 415 releasef(fd); 416 mutex_exit(&pcp->pc_lock); 417 return (EACCES); 418 } 419 420 /* remove port from the file descriptor interested list */ 421 delfd_port(fd, pfd); 422 releasef(fd); 423 424 /* remove polldat & port event structure */ 425 port_remove_fd_object(pfd, pp, pcp); 426 mutex_exit(&pcp->pc_lock); 427 return (0); 428 } 429 430 /* 431 * Remove the fd from the event port cache. 432 */ 433 static void 434 port_remove_fd_local(portfd_t *pfd, port_fdcache_t *pcp) 435 { 436 polldat_t *pdp = PFTOD(pfd); 437 438 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 439 pdp->pd_fp = NULL; 440 if (pdp->pd_php != NULL) { 441 pollhead_delete(pdp->pd_php, pdp); 442 pdp->pd_php = NULL; 443 } 444 port_free_event_local(pdp->pd_portev, 0); 445 /* remove polldat struct */ 446 port_pcache_remove_fd(pcp, pfd); 447 } 448 449 /* 450 * Associate event port polldat_t structure with sub-system pointer to 451 * a polhead_t structure. 452 */ 453 static int 454 port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents) 455 { 456 int error; 457 file_t *fp; 458 459 /* 460 * During re-association of a fd with a port the pd_php pointer 461 * is still the same as at the first association time. 462 */ 463 if (pdp->pd_php == *php) 464 return (0); /* already associated */ 465 466 /* polldat_t associated with another pollhead_t pointer */ 467 if (pdp->pd_php != NULL) 468 pollhead_delete(pdp->pd_php, pdp); 469 470 /* 471 * Before pollhead_insert() pollwakeup() will not detect a polldat 472 * entry in the ph_list and the event notification will disappear. 473 * This happens because polldat_t is still not associated with 474 * the pointer to the pollhead_t structure. 475 */ 476 pollhead_insert(*php, pdp); 477 478 /* 479 * From now on event notification can be detected in pollwakeup(), 480 * Use VOP_POLL() again to check the current status of the event. 481 */ 482 pdp->pd_php = *php; 483 fp = pdp->pd_fp; 484 curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache; 485 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, revents, php); 486 curthread->t_pollcache = NULL; 487 return (error); 488 } 489 490 /* 491 * Grow the hash table. Rehash all the elements on the hash table. 492 */ 493 static void 494 port_cache_grow_hashtbl(port_fdcache_t *pcp) 495 { 496 portfd_t **oldtbl; 497 polldat_t *pdp; 498 portfd_t *pfd; 499 polldat_t *pdp1; 500 int oldsize; 501 int i; 502 503 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 504 oldsize = pcp->pc_hashsize; 505 oldtbl = pcp->pc_hash; 506 pcp->pc_hashsize *= PORTHASH_MULT; 507 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *), 508 KM_SLEEP); 509 /* 510 * rehash existing elements 511 */ 512 pcp->pc_fdcount = 0; 513 for (i = 0; i < oldsize; i++) { 514 pfd = oldtbl[i]; 515 pdp = PFTOD(pfd); 516 while (pdp != NULL) { 517 pdp1 = pdp->pd_hashnext; 518 port_cache_insert_fd(pcp, pdp); 519 pdp = pdp1; 520 } 521 } 522 kmem_free(oldtbl, oldsize * sizeof (portfd_t *)); 523 } 524 /* 525 * This routine inserts a polldat into the portcache's hash table. It 526 * may be necessary to grow the size of the hash table. 527 */ 528 static void 529 port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp) 530 { 531 portfd_t **bucket; 532 533 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 534 if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT)) 535 port_cache_grow_hashtbl(pcp); 536 bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd); 537 pdp->pd_hashnext = PFTOD(*bucket); 538 *bucket = PDTOF(pdp); 539 pcp->pc_fdcount++; 540 } 541 542 543 /* 544 * The port_remove_portfd() function dissociates the port from the fd 545 * and vive versa. 546 */ 547 static void 548 port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp) 549 { 550 port_t *pp; 551 file_t *fp; 552 553 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 554 pp = pdp->pd_portev->portkev_port; 555 fp = getf(pdp->pd_fd); 556 /* 557 * If we did not get the fp for pd_fd but its portfd_t 558 * still exist in the cache, it means the pd_fd is being 559 * closed by some other thread which will also free the portfd_t. 560 */ 561 if (fp != NULL) { 562 delfd_port(pdp->pd_fd, PDTOF(pdp)); 563 releasef(pdp->pd_fd); 564 port_remove_fd_object(PDTOF(pdp), pp, pcp); 565 } 566 } 567 568 /* 569 * This function is used by port_close_sourcefd() to destroy the cache 570 * on last close. 571 */ 572 static void 573 port_pcache_destroy(port_fdcache_t *pcp) 574 { 575 ASSERT(pcp->pc_fdcount == 0); 576 kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize); 577 mutex_destroy(&pcp->pc_lock); 578 kmem_free(pcp, sizeof (port_fdcache_t)); 579 } 580 581 /* 582 * port_close() calls this function to request the PORT_SOURCE_FD source 583 * to remove/free all resources allocated and associated with the port. 584 */ 585 /* ARGSUSED */ 586 static void 587 port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose) 588 { 589 port_t *pp = arg; 590 port_fdcache_t *pcp; 591 portfd_t **hashtbl; 592 polldat_t *pdp; 593 polldat_t *pdpnext; 594 int index; 595 596 pcp = pp->port_queue.portq_pcp; 597 if (pcp == NULL) 598 /* no cache available -> nothing to do */ 599 return; 600 601 mutex_enter(&pcp->pc_lock); 602 /* 603 * Scan the cache and free all allocated portfd_t and port_kevent_t 604 * structures. 605 */ 606 hashtbl = pcp->pc_hash; 607 for (index = 0; index < pcp->pc_hashsize; index++) { 608 for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) { 609 pdpnext = pdp->pd_hashnext; 610 if (pid == pdp->pd_portev->portkev_pid) { 611 /* 612 * remove polldat + port_event_t from cache 613 * only when current process did the 614 * association. 615 */ 616 port_remove_portfd(pdp, pcp); 617 } 618 } 619 } 620 if (lastclose) { 621 /* 622 * Wait for all the portfd's to be freed. 623 * The remaining portfd_t's are the once we did not 624 * free in port_remove_portfd since some other thread 625 * is closing the fd. These threads will free the portfd_t's 626 * once we drop the pc_lock mutex. 627 */ 628 while (pcp->pc_fdcount) { 629 (void) cv_wait_sig(&pcp->pc_lclosecv, &pcp->pc_lock); 630 } 631 /* event port vnode will be destroyed -> remove everything */ 632 pp->port_queue.portq_pcp = NULL; 633 } 634 mutex_exit(&pcp->pc_lock); 635 /* 636 * last close: 637 * pollwakeup() can not further interact with this cache 638 * (all polldat structs are removed from pollhead entries). 639 */ 640 if (lastclose) 641 port_pcache_destroy(pcp); 642 } 643