1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stat.h> 32 #include <sys/errno.h> 33 #include <sys/kmem.h> 34 #include <sys/sysmacros.h> 35 #include <sys/debug.h> 36 #include <sys/poll_impl.h> 37 #include <sys/port_impl.h> 38 39 #define PORTHASH_START 256 /* start cache space for events */ 40 #define PORTHASH_MULT 2 /* growth threshold and factor */ 41 42 /* local functions */ 43 static int port_fd_callback(void *, int *, pid_t, int, void *); 44 static int port_bind_pollhead(pollhead_t **, polldat_t *, short *); 45 static void port_close_sourcefd(void *, int, pid_t, int); 46 static void port_cache_insert_fd(port_fdcache_t *, polldat_t *); 47 48 /* 49 * port_fd_callback() 50 * The event port framework uses callback functions to notify associated 51 * event sources about actions on source specific objects. 52 * The source itself defines the "arg" required to identify the object with 53 * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t 54 * structure. The portfd_t structure is specific for PORT_SOURCE_FD source. 55 * The port_fd_callback() function is notified in three cases: 56 * - PORT_CALLBACK_DEFAULT 57 * The object (fd) will be delivered to the application. 58 * - PORT_CALLBACK_DISSOCIATE 59 * The object (fd) will be dissociated from the port. 60 * - PORT_CALLBACK_CLOSE 61 * The object (fd) will be dissociated from the port because the port 62 * is being closed. 63 * A fd is shareable between processes only when 64 * - processes have the same fd id and 65 * - processes have the same fp. 66 * A fd becomes shareable: 67 * - on fork() across parent and child process and 68 * - when I_SENDFD is used to pass file descriptors between parent and child 69 * immediately after fork() (the sender and receiver must get the same 70 * file descriptor id). 71 * If a fd is shared between processes, all involved processes will get 72 * the same rights related to re-association of the fd with the port and 73 * retrieve of events from that fd. 74 * The process which associated the fd with a port for the first time 75 * becomes also the owner of the association. Only the owner of the 76 * association is allowed to dissociate the fd from the port. 77 */ 78 /* ARGSUSED */ 79 static int 80 port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 81 { 82 portfd_t *pfd = (portfd_t *)arg; 83 polldat_t *pdp = PFTOD(pfd); 84 port_fdcache_t *pcp; 85 file_t *fp; 86 int error; 87 88 ASSERT((pdp != NULL) && (events != NULL)); 89 switch (flag) { 90 case PORT_CALLBACK_DEFAULT: 91 if (curproc->p_pid != pid) { 92 /* 93 * Check if current process is allowed to retrieve 94 * events from this fd. 95 */ 96 fp = getf(pdp->pd_fd); 97 if (fp == NULL) { 98 error = EACCES; /* deny delivery of events */ 99 break; 100 } 101 releasef(pdp->pd_fd); 102 if (fp != pdp->pd_fp) { 103 error = EACCES; /* deny delivery of events */ 104 break; 105 } 106 } 107 *events = pdp->pd_portev->portkev_events; /* update events */ 108 error = 0; 109 break; 110 case PORT_CALLBACK_DISSOCIATE: 111 error = 0; 112 break; 113 case PORT_CALLBACK_CLOSE: 114 /* remove polldat/portfd struct */ 115 pdp->pd_portev = NULL; 116 pcp = (port_fdcache_t *)pdp->pd_pcache; 117 mutex_enter(&pcp->pc_lock); 118 pdp->pd_fp = NULL; 119 pdp->pd_events = 0; 120 if (pdp->pd_php != NULL) { 121 pollhead_delete(pdp->pd_php, pdp); 122 pdp->pd_php = NULL; 123 } 124 port_pcache_remove_fd(pcp, pfd); 125 mutex_exit(&pcp->pc_lock); 126 error = 0; 127 break; 128 default: 129 error = EINVAL; 130 break; 131 } 132 return (error); 133 } 134 135 /* 136 * This routine returns a pointer to a cached poll fd entry, or NULL if it 137 * does not find it in the hash table. 138 * The fd is used as index. 139 * The fd and the fp are used to detect a valid entry. 140 * This function returns a pointer to a valid portfd_t structure only when 141 * the fd and the fp in the args match the entries in polldat_t. 142 */ 143 portfd_t * 144 port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp) 145 { 146 polldat_t *pdp; 147 portfd_t **bucket; 148 149 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 150 bucket = PORT_FD_BUCKET(pcp, fd); 151 pdp = PFTOD(*bucket); 152 while (pdp != NULL) { 153 if (pdp->pd_fd == fd && pdp->pd_fp == fp) 154 break; 155 pdp = pdp->pd_hashnext; 156 } 157 return (PDTOF(pdp)); 158 } 159 160 /* 161 * port_associate_fd() 162 * This function associates new file descriptors with a port or 163 * reactivate already associated file descriptors. 164 * The reactivation also updates the events types to be checked and the 165 * attached user pointer. 166 * Per port a cache is used to store associated file descriptors. 167 * Internally the VOP_POLL interface is used to poll for existing events. 168 * The VOP_POLL interface can also deliver a pointer to a pollhead_t structure 169 * which is used to enqueue polldat_t structures with pending events. 170 * If VOP_POLL immediately returns valid events (revents) then those events 171 * will be submitted to the event port with port_send_event(). 172 * Otherwise VOP_POLL does not return events but it delivers a pointer to a 173 * pollhead_t structure. In such a case the corresponding file system behind 174 * VOP_POLL will use the pollwakeup() function to notify about existing 175 * events. 176 */ 177 int 178 port_associate_fd(port_t *pp, int source, uintptr_t object, int events, 179 void *user) 180 { 181 port_fdcache_t *pcp; 182 int fd; 183 struct pollhead *php = NULL; 184 portfd_t *pfd; 185 polldat_t *pdp; 186 file_t *fp; 187 port_kevent_t *pkevp; 188 short revents; 189 int error = 0; 190 int active; 191 192 pcp = pp->port_queue.portq_pcp; 193 if (object > (uintptr_t)INT_MAX) 194 return (EBADFD); 195 196 fd = object; 197 198 if ((fp = getf(fd)) == NULL) 199 return (EBADFD); 200 201 mutex_enter(&pcp->pc_lock); 202 if (pcp->pc_hash == NULL) { 203 /* 204 * This is the first time that a fd is being associated with 205 * the current port: 206 * - create PORT_SOURCE_FD cache 207 * - associate PORT_SOURCE_FD source with the port 208 */ 209 error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD, 210 NULL, port_close_sourcefd, pp, NULL); 211 if (error) { 212 mutex_exit(&pcp->pc_lock); 213 releasef(fd); 214 return (error); 215 } 216 217 /* create polldat cache */ 218 pcp->pc_hashsize = PORTHASH_START; 219 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * 220 sizeof (portfd_t *), KM_SLEEP); 221 pfd = NULL; 222 } else { 223 /* Check if the fd/fp is already associated with the port */ 224 pfd = port_cache_lookup_fp(pcp, fd, fp); 225 } 226 227 if (pfd == NULL) { 228 /* 229 * new entry 230 * Allocate a polldat_t structure per fd 231 * The use of the polldat_t structure to cache file descriptors 232 * is required to be able to share the pollwakeup() function 233 * with poll(2) and devpoll(7d). 234 */ 235 pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP); 236 pdp = PFTOD(pfd); 237 pdp->pd_fd = fd; 238 pdp->pd_fp = fp; 239 pdp->pd_pcache = (void *)pcp; 240 241 /* Allocate a port event structure per fd */ 242 error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED, 243 &pdp->pd_portev); 244 if (error) { 245 kmem_free(pfd, sizeof (portfd_t)); 246 releasef(fd); 247 mutex_exit(&pcp->pc_lock); 248 return (error); 249 } 250 pkevp = pdp->pd_portev; 251 pkevp->portkev_callback = port_fd_callback; 252 pkevp->portkev_arg = pfd; 253 254 /* add portfd_t entry to the cache */ 255 port_cache_insert_fd(pcp, pdp); 256 pkevp->portkev_object = fd; 257 pkevp->portkev_user = user; 258 259 /* 260 * Add current port to the file descriptor interested list 261 * The members of the list are notified when the file descriptor 262 * is closed. 263 */ 264 addfd_port(fd, pfd); 265 } else { 266 /* 267 * The file descriptor is already associated with the port 268 */ 269 pdp = PFTOD(pfd); 270 pkevp = pdp->pd_portev; 271 272 /* 273 * Check if the re-association happens before the last 274 * submitted event of the file descriptor was retrieved. 275 * Clear the PORT_KEV_VALID flag if set. No new events 276 * should get submitted after this flag is cleared. 277 */ 278 mutex_enter(&pkevp->portkev_lock); 279 if (pkevp->portkev_flags & PORT_KEV_VALID) { 280 pkevp->portkev_flags &= ~PORT_KEV_VALID; 281 } 282 if (pkevp->portkev_flags & PORT_KEV_DONEQ) { 283 mutex_exit(&pkevp->portkev_lock); 284 /* 285 * Remove any events that where already fired 286 * for this fd and are still in the port queue. 287 */ 288 (void) port_remove_done_event(pkevp); 289 } else { 290 mutex_exit(&pkevp->portkev_lock); 291 } 292 pkevp->portkev_user = user; 293 } 294 295 mutex_enter(&pkevp->portkev_lock); 296 pkevp->portkev_events = 0; /* no fired events */ 297 pdp->pd_events = events; /* events associated */ 298 /* 299 * allow new events. 300 */ 301 pkevp->portkev_flags |= PORT_KEV_VALID; 302 mutex_exit(&pkevp->portkev_lock); 303 304 /* 305 * do VOP_POLL and cache this poll fd. 306 * 307 * XXX - pollrelock() logic needs to know 308 * which pollcache lock to grab. It'd be a 309 * cleaner solution if we could pass pcp as 310 * an arguement in VOP_POLL interface instead 311 * of implicitly passing it using thread_t 312 * struct. On the other hand, changing VOP_POLL 313 * interface will require all driver/file system 314 * poll routine to change. 315 */ 316 curthread->t_pollcache = (pollcache_t *)pcp; 317 error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php, NULL); 318 curthread->t_pollcache = NULL; 319 320 /* 321 * To keep synchronization between VOP_POLL above and 322 * pollhead_insert below, it is necessary to 323 * call VOP_POLL() again (see port_bind_pollhead()). 324 */ 325 if (error) { 326 goto errout; 327 } 328 329 if (php != NULL) { 330 /* 331 * No events delivered yet. 332 * Bind pollhead pointer with current polldat_t structure. 333 * Sub-system will call pollwakeup() later with php as 334 * argument. 335 */ 336 error = port_bind_pollhead(&php, pdp, &revents); 337 if (error) { 338 goto errout; 339 } 340 } 341 342 /* 343 * Check if new events where detected and no events have been 344 * delivered. The revents was already set after the VOP_POLL 345 * above or it was updated in port_bind_pollhead(). 346 */ 347 mutex_enter(&pkevp->portkev_lock); 348 if (revents && (pkevp->portkev_flags & PORT_KEV_VALID)) { 349 ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0); 350 pkevp->portkev_flags &= ~PORT_KEV_VALID; 351 revents = revents & (pdp->pd_events | POLLHUP | POLLERR); 352 /* send events to the event port */ 353 pkevp->portkev_events = revents; 354 /* 355 * port_send_event will release the portkev_lock mutex. 356 */ 357 port_send_event(pkevp); 358 } else { 359 mutex_exit(&pkevp->portkev_lock); 360 } 361 362 releasef(fd); 363 mutex_exit(&pcp->pc_lock); 364 return (error); 365 366 errout: 367 delfd_port(fd, pfd); 368 /* 369 * If the portkev is not valid, then an event was 370 * delivered. 371 * 372 * If an event was delivered and got picked up, then 373 * we return error = 0 treating this as a successful 374 * port associate call. The thread which received 375 * the event gets control of the object. 376 */ 377 active = 0; 378 mutex_enter(&pkevp->portkev_lock); 379 if (pkevp->portkev_flags & PORT_KEV_VALID) { 380 pkevp->portkev_flags &= ~PORT_KEV_VALID; 381 active = 1; 382 } 383 mutex_exit(&pkevp->portkev_lock); 384 385 if (!port_remove_fd_object(pfd, pp, pcp) && !active) { 386 error = 0; 387 } 388 releasef(fd); 389 mutex_exit(&pcp->pc_lock); 390 return (error); 391 } 392 393 /* 394 * The port_dissociate_fd() function dissociates the delivered file 395 * descriptor from the event port and removes already fired events. 396 * If a fd is shared between processes, all involved processes will get 397 * the same rights related to re-association of the fd with the port and 398 * retrieve of events from that fd. 399 * The process which associated the fd with a port for the first time 400 * becomes also the owner of the association. Only the owner of the 401 * association is allowed to dissociate the fd from the port. 402 */ 403 int 404 port_dissociate_fd(port_t *pp, uintptr_t object) 405 { 406 int fd; 407 port_fdcache_t *pcp; 408 portfd_t *pfd; 409 file_t *fp; 410 int active; 411 port_kevent_t *pkevp; 412 413 if (object > (uintptr_t)INT_MAX) 414 return (EBADFD); 415 416 fd = object; 417 pcp = pp->port_queue.portq_pcp; 418 419 mutex_enter(&pcp->pc_lock); 420 if (pcp->pc_hash == NULL) { 421 /* no file descriptor cache available */ 422 mutex_exit(&pcp->pc_lock); 423 return (ENOENT); 424 } 425 if ((fp = getf(fd)) == NULL) { 426 mutex_exit(&pcp->pc_lock); 427 return (EBADFD); 428 } 429 pfd = port_cache_lookup_fp(pcp, fd, fp); 430 if (pfd == NULL) { 431 releasef(fd); 432 mutex_exit(&pcp->pc_lock); 433 return (ENOENT); 434 } 435 /* only association owner is allowed to remove the association */ 436 if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) { 437 releasef(fd); 438 mutex_exit(&pcp->pc_lock); 439 return (EACCES); 440 } 441 442 /* remove port from the file descriptor interested list */ 443 delfd_port(fd, pfd); 444 releasef(fd); 445 446 /* 447 * Deactivate the association. No events get posted after 448 * this. 449 */ 450 pkevp = PFTOD(pfd)->pd_portev; 451 mutex_enter(&pkevp->portkev_lock); 452 if (pkevp->portkev_flags & PORT_KEV_VALID) { 453 pkevp->portkev_flags &= ~PORT_KEV_VALID; 454 active = 1; 455 } else { 456 active = 0; 457 } 458 mutex_exit(&pkevp->portkev_lock); 459 460 /* remove polldat & port event structure */ 461 if (port_remove_fd_object(pfd, pp, pcp)) { 462 /* 463 * An event was found and removed from the 464 * port done queue. This means the event has not yet 465 * been retrived. In this case we treat this as an active 466 * association. 467 */ 468 ASSERT(active == 0); 469 active = 1; 470 } 471 mutex_exit(&pcp->pc_lock); 472 473 /* 474 * Return ENOENT if there was no active association. 475 */ 476 return ((active ? 0 : ENOENT)); 477 } 478 479 /* 480 * Associate event port polldat_t structure with sub-system pointer to 481 * a polhead_t structure. 482 */ 483 static int 484 port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents) 485 { 486 int error; 487 file_t *fp; 488 489 /* 490 * During re-association of a fd with a port the pd_php pointer 491 * is still the same as at the first association time. 492 */ 493 if (pdp->pd_php == *php) 494 return (0); /* already associated */ 495 496 /* polldat_t associated with another pollhead_t pointer */ 497 if (pdp->pd_php != NULL) 498 pollhead_delete(pdp->pd_php, pdp); 499 500 /* 501 * Before pollhead_insert() pollwakeup() will not detect a polldat 502 * entry in the ph_list and the event notification will disappear. 503 * This happens because polldat_t is still not associated with 504 * the pointer to the pollhead_t structure. 505 */ 506 pollhead_insert(*php, pdp); 507 508 /* 509 * From now on event notification can be detected in pollwakeup(), 510 * Use VOP_POLL() again to check the current status of the event. 511 */ 512 pdp->pd_php = *php; 513 fp = pdp->pd_fp; 514 curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache; 515 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, revents, php, NULL); 516 curthread->t_pollcache = NULL; 517 return (error); 518 } 519 520 /* 521 * Grow the hash table. Rehash all the elements on the hash table. 522 */ 523 static void 524 port_cache_grow_hashtbl(port_fdcache_t *pcp) 525 { 526 portfd_t **oldtbl; 527 polldat_t *pdp; 528 portfd_t *pfd; 529 polldat_t *pdp1; 530 int oldsize; 531 int i; 532 533 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 534 oldsize = pcp->pc_hashsize; 535 oldtbl = pcp->pc_hash; 536 pcp->pc_hashsize *= PORTHASH_MULT; 537 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *), 538 KM_SLEEP); 539 /* 540 * rehash existing elements 541 */ 542 pcp->pc_fdcount = 0; 543 for (i = 0; i < oldsize; i++) { 544 pfd = oldtbl[i]; 545 pdp = PFTOD(pfd); 546 while (pdp != NULL) { 547 pdp1 = pdp->pd_hashnext; 548 port_cache_insert_fd(pcp, pdp); 549 pdp = pdp1; 550 } 551 } 552 kmem_free(oldtbl, oldsize * sizeof (portfd_t *)); 553 } 554 /* 555 * This routine inserts a polldat into the portcache's hash table. It 556 * may be necessary to grow the size of the hash table. 557 */ 558 static void 559 port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp) 560 { 561 portfd_t **bucket; 562 563 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 564 if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT)) 565 port_cache_grow_hashtbl(pcp); 566 bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd); 567 pdp->pd_hashnext = PFTOD(*bucket); 568 *bucket = PDTOF(pdp); 569 pcp->pc_fdcount++; 570 } 571 572 573 /* 574 * The port_remove_portfd() function dissociates the port from the fd 575 * and vive versa. 576 */ 577 static void 578 port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp) 579 { 580 port_t *pp; 581 file_t *fp; 582 583 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 584 pp = pdp->pd_portev->portkev_port; 585 fp = getf(pdp->pd_fd); 586 /* 587 * If we did not get the fp for pd_fd but its portfd_t 588 * still exist in the cache, it means the pd_fd is being 589 * closed by some other thread which will also free the portfd_t. 590 */ 591 if (fp != NULL) { 592 delfd_port(pdp->pd_fd, PDTOF(pdp)); 593 releasef(pdp->pd_fd); 594 (void) port_remove_fd_object(PDTOF(pdp), pp, pcp); 595 } 596 } 597 598 /* 599 * This function is used by port_close_sourcefd() to destroy the cache 600 * on last close. 601 */ 602 static void 603 port_pcache_destroy(port_fdcache_t *pcp) 604 { 605 ASSERT(pcp->pc_fdcount == 0); 606 kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize); 607 mutex_destroy(&pcp->pc_lock); 608 kmem_free(pcp, sizeof (port_fdcache_t)); 609 } 610 611 /* 612 * port_close() calls this function to request the PORT_SOURCE_FD source 613 * to remove/free all resources allocated and associated with the port. 614 */ 615 /* ARGSUSED */ 616 static void 617 port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose) 618 { 619 port_t *pp = arg; 620 port_fdcache_t *pcp; 621 portfd_t **hashtbl; 622 polldat_t *pdp; 623 polldat_t *pdpnext; 624 int index; 625 626 pcp = pp->port_queue.portq_pcp; 627 if (pcp == NULL) 628 /* no cache available -> nothing to do */ 629 return; 630 631 mutex_enter(&pcp->pc_lock); 632 /* 633 * Scan the cache and free all allocated portfd_t and port_kevent_t 634 * structures. 635 */ 636 hashtbl = pcp->pc_hash; 637 for (index = 0; index < pcp->pc_hashsize; index++) { 638 for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) { 639 pdpnext = pdp->pd_hashnext; 640 if (pid == pdp->pd_portev->portkev_pid) { 641 /* 642 * remove polldat + port_event_t from cache 643 * only when current process did the 644 * association. 645 */ 646 port_remove_portfd(pdp, pcp); 647 } 648 } 649 } 650 if (lastclose) { 651 /* 652 * Wait for all the portfd's to be freed. 653 * The remaining portfd_t's are the once we did not 654 * free in port_remove_portfd since some other thread 655 * is closing the fd. These threads will free the portfd_t's 656 * once we drop the pc_lock mutex. 657 */ 658 while (pcp->pc_fdcount) { 659 (void) cv_wait_sig(&pcp->pc_lclosecv, &pcp->pc_lock); 660 } 661 /* event port vnode will be destroyed -> remove everything */ 662 pp->port_queue.portq_pcp = NULL; 663 } 664 mutex_exit(&pcp->pc_lock); 665 /* 666 * last close: 667 * pollwakeup() can not further interact with this cache 668 * (all polldat structs are removed from pollhead entries). 669 */ 670 if (lastclose) 671 port_pcache_destroy(pcp); 672 } 673