1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2012 Marcel Telka <marcel@telka.sk> 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. 26 * Copyright 2021 Racktop Systems, Inc. 27 */ 28 29 /* 30 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 31 * Use is subject to license terms. 32 */ 33 34 /* 35 * Copyright 1993 OpenVision Technologies, Inc., All Rights Reserved. 36 */ 37 38 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 39 /* All Rights Reserved */ 40 41 /* 42 * Portions of this source code were derived from Berkeley 4.3 BSD 43 * under license from the Regents of the University of California. 44 */ 45 46 /* 47 * Server-side remote procedure call interface. 48 * 49 * Master transport handle (SVCMASTERXPRT). 50 * The master transport handle structure is shared among service 51 * threads processing events on the transport. Some fields in the 52 * master structure are protected by locks 53 * - xp_req_lock protects the request queue: 54 * xp_req_head, xp_req_tail, xp_reqs, xp_size, xp_full, xp_enable 55 * - xp_thread_lock protects the thread (clone) counts 56 * xp_threads, xp_detached_threads, xp_wq 57 * Each master transport is registered to exactly one thread pool. 58 * 59 * Clone transport handle (SVCXPRT) 60 * The clone transport handle structure is a per-service-thread handle 61 * to the transport. The structure carries all the fields/buffers used 62 * for request processing. A service thread or, in other words, a clone 63 * structure, can be linked to an arbitrary master structure to process 64 * requests on this transport. The master handle keeps track of reference 65 * counts of threads (clones) linked to it. A service thread can switch 66 * to another transport by unlinking its clone handle from the current 67 * transport and linking to a new one. Switching is relatively inexpensive 68 * but it involves locking (master's xprt->xp_thread_lock). 69 * 70 * Pools. 71 * A pool represents a kernel RPC service (NFS, Lock Manager, etc.). 72 * Transports related to the service are registered to the service pool. 73 * Service threads can switch between different transports in the pool. 74 * Thus, each service has its own pool of service threads. The maximum 75 * number of threads in a pool is pool->p_maxthreads. This limit allows 76 * to restrict resource usage by the service. Some fields are protected 77 * by locks: 78 * - p_req_lock protects several counts and flags: 79 * p_reqs, p_size, p_walkers, p_asleep, p_drowsy, p_req_cv 80 * - p_thread_lock governs other thread counts: 81 * p_threads, p_detached_threads, p_reserved_threads, p_closing 82 * 83 * In addition, each pool contains a doubly-linked list of transports, 84 * an `xprt-ready' queue and a creator thread (see below). Threads in 85 * the pool share some other parameters such as stack size and 86 * polling timeout. 87 * 88 * Pools are initialized through the svc_pool_create() function called from 89 * the nfssys() system call. However, thread creation must be done by 90 * the userland agent. This is done by using SVCPOOL_WAIT and 91 * SVCPOOL_RUN arguments to nfssys(), which call svc_wait() and 92 * svc_do_run(), respectively. Once the pool has been initialized, 93 * the userland process must set up a 'creator' thread. This thread 94 * should park itself in the kernel by calling svc_wait(). If 95 * svc_wait() returns successfully, it should fork off a new worker 96 * thread, which then calls svc_do_run() in order to get work. When 97 * that thread is complete, svc_do_run() will return, and the user 98 * program should call thr_exit(). 99 * 100 * When we try to register a new pool and there is an old pool with 101 * the same id in the doubly linked pool list (this happens when we kill 102 * and restart nfsd or lockd), then we unlink the old pool from the list 103 * and mark its state as `closing'. After that the transports can still 104 * process requests but new transports won't be registered. When all the 105 * transports and service threads associated with the pool are gone the 106 * creator thread (see below) will clean up the pool structure and exit. 107 * 108 * svc_queuereq() and svc_run(). 109 * The kernel RPC server is interrupt driven. The svc_queuereq() interrupt 110 * routine is called to deliver an RPC request. The service threads 111 * loop in svc_run(). The interrupt function queues a request on the 112 * transport's queue and it makes sure that the request is serviced. 113 * It may either wake up one of sleeping threads, or ask for a new thread 114 * to be created, or, if the previous request is just being picked up, do 115 * nothing. In the last case the service thread that is picking up the 116 * previous request will wake up or create the next thread. After a service 117 * thread processes a request and sends a reply it returns to svc_run() 118 * and svc_run() calls svc_poll() to find new input. 119 * 120 * svc_poll(). 121 * In order to avoid unnecessary locking, which causes performance 122 * problems, we always look for a pending request on the current transport. 123 * If there is none we take a hint from the pool's `xprt-ready' queue. 124 * If the queue had an overflow we switch to the `drain' mode checking 125 * each transport in the pool's transport list. Once we find a 126 * master transport handle with a pending request we latch the request 127 * lock on this transport and return to svc_run(). If the request 128 * belongs to a transport different than the one the service thread is 129 * linked to we need to unlink and link again. 130 * 131 * A service thread goes asleep when there are no pending 132 * requests on the transports registered on the pool's transports. 133 * All the pool's threads sleep on the same condition variable. 134 * If a thread has been sleeping for too long period of time 135 * (by default 5 seconds) it wakes up and exits. Also when a transport 136 * is closing sleeping threads wake up to unlink from this transport. 137 * 138 * The `xprt-ready' queue. 139 * If a service thread finds no request on a transport it is currently linked 140 * to it will find another transport with a pending request. To make 141 * this search more efficient each pool has an `xprt-ready' queue. 142 * The queue is a FIFO. When the interrupt routine queues a request it also 143 * inserts a pointer to the transport into the `xprt-ready' queue. A 144 * thread looking for a transport with a pending request can pop up a 145 * transport and check for a request. The request can be already gone 146 * since it could be taken by a thread linked to that transport. In such a 147 * case we try the next hint. The `xprt-ready' queue has fixed size (by 148 * default 256 nodes). If it overflows svc_poll() has to switch to the 149 * less efficient but safe `drain' mode and walk through the pool's 150 * transport list. 151 * 152 * Both the svc_poll() loop and the `xprt-ready' queue are optimized 153 * for the peak load case that is for the situation when the queue is not 154 * empty, there are all the time few pending requests, and a service 155 * thread which has just processed a request does not go asleep but picks 156 * up immediately the next request. 157 * 158 * Thread creator. 159 * Each pool has a thread creator associated with it. The creator thread 160 * sleeps on a condition variable and waits for a signal to create a 161 * service thread. The actual thread creation is done in userland by 162 * the method described in "Pools" above. 163 * 164 * Signaling threads should turn on the `creator signaled' flag, and 165 * can avoid sending signals when the flag is on. The flag is cleared 166 * when the thread is created. 167 * 168 * When the pool is in closing state (ie it has been already unregistered 169 * from the pool list) the last thread on the last transport in the pool 170 * should turn the p_creator_exit flag on. The creator thread will 171 * clean up the pool structure and exit. 172 * 173 * Thread reservation; Detaching service threads. 174 * A service thread can detach itself to block for an extended amount 175 * of time. However, to keep the service active we need to guarantee 176 * at least pool->p_redline non-detached threads that can process incoming 177 * requests. This, the maximum number of detached and reserved threads is 178 * p->p_maxthreads - p->p_redline. A service thread should first acquire 179 * a reservation, and if the reservation was granted it can detach itself. 180 * If a reservation was granted but the thread does not detach itself 181 * it should cancel the reservation before it returns to svc_run(). 182 */ 183 184 #include <sys/param.h> 185 #include <sys/types.h> 186 #include <rpc/types.h> 187 #include <sys/socket.h> 188 #include <sys/time.h> 189 #include <sys/tiuser.h> 190 #include <sys/t_kuser.h> 191 #include <netinet/in.h> 192 #include <rpc/xdr.h> 193 #include <rpc/auth.h> 194 #include <rpc/clnt.h> 195 #include <rpc/rpc_msg.h> 196 #include <rpc/svc.h> 197 #include <sys/proc.h> 198 #include <sys/user.h> 199 #include <sys/stream.h> 200 #include <sys/strsubr.h> 201 #include <sys/strsun.h> 202 #include <sys/tihdr.h> 203 #include <sys/debug.h> 204 #include <sys/cmn_err.h> 205 #include <sys/file.h> 206 #include <sys/systm.h> 207 #include <sys/callb.h> 208 #include <sys/vtrace.h> 209 #include <sys/zone.h> 210 #include <nfs/nfs.h> 211 #include <sys/tsol/label_macro.h> 212 213 /* 214 * Defines for svc_poll() 215 */ 216 #define SVC_EXPRTGONE ((SVCMASTERXPRT *)1) /* Transport is closing */ 217 #define SVC_ETIMEDOUT ((SVCMASTERXPRT *)2) /* Timeout */ 218 #define SVC_EINTR ((SVCMASTERXPRT *)3) /* Interrupted by signal */ 219 220 /* 221 * Default stack size for service threads. 222 */ 223 #define DEFAULT_SVC_RUN_STKSIZE (0) /* default kernel stack */ 224 225 int svc_default_stksize = DEFAULT_SVC_RUN_STKSIZE; 226 227 /* 228 * Default polling timeout for service threads. 229 * Multiplied by hz when used. 230 */ 231 #define DEFAULT_SVC_POLL_TIMEOUT (5) /* seconds */ 232 233 clock_t svc_default_timeout = DEFAULT_SVC_POLL_TIMEOUT; 234 235 /* 236 * Size of the `xprt-ready' queue. 237 */ 238 #define DEFAULT_SVC_QSIZE (256) /* qnodes */ 239 240 size_t svc_default_qsize = DEFAULT_SVC_QSIZE; 241 242 /* 243 * Default limit for the number of service threads. 244 */ 245 #define DEFAULT_SVC_MAXTHREADS (INT16_MAX) 246 247 int svc_default_maxthreads = DEFAULT_SVC_MAXTHREADS; 248 249 /* 250 * Maximum number of requests from the same transport (in `drain' mode). 251 */ 252 #define DEFAULT_SVC_MAX_SAME_XPRT (8) 253 254 int svc_default_max_same_xprt = DEFAULT_SVC_MAX_SAME_XPRT; 255 256 257 /* 258 * Default `Redline' of non-detached threads. 259 * Total number of detached and reserved threads in an RPC server 260 * thread pool is limited to pool->p_maxthreads - svc_redline. 261 */ 262 #define DEFAULT_SVC_REDLINE (1) 263 264 int svc_default_redline = DEFAULT_SVC_REDLINE; 265 266 /* 267 * A node for the `xprt-ready' queue. 268 * See below. 269 */ 270 struct __svcxprt_qnode { 271 __SVCXPRT_QNODE *q_next; 272 SVCMASTERXPRT *q_xprt; 273 }; 274 275 /* 276 * Global SVC variables (private). 277 */ 278 struct svc_globals { 279 SVCPOOL *svc_pools; 280 kmutex_t svc_plock; 281 }; 282 283 /* 284 * Debug variable to check for rdma based 285 * transport startup and cleanup. Contorlled 286 * through /etc/system. Off by default. 287 */ 288 int rdma_check = 0; 289 290 /* 291 * This allows disabling flow control in svc_queuereq(). 292 */ 293 volatile int svc_flowcontrol_disable = 0; 294 295 /* 296 * Authentication parameters list. 297 */ 298 static caddr_t rqcred_head; 299 static kmutex_t rqcred_lock; 300 301 /* 302 * If true, then keep quiet about version mismatch. 303 * This macro is for broadcast RPC only. We have no broadcast RPC in 304 * kernel now but one may define a flag in the transport structure 305 * and redefine this macro. 306 */ 307 #define version_keepquiet(xprt) (FALSE) 308 309 /* 310 * ZSD key used to retrieve zone-specific svc globals 311 */ 312 static zone_key_t svc_zone_key; 313 314 static void svc_callout_free(SVCMASTERXPRT *); 315 static void svc_xprt_qinit(SVCPOOL *, size_t); 316 static void svc_xprt_qdestroy(SVCPOOL *); 317 static void svc_thread_creator(SVCPOOL *); 318 static void svc_creator_signal(SVCPOOL *); 319 static void svc_creator_signalexit(SVCPOOL *); 320 static void svc_pool_unregister(struct svc_globals *, SVCPOOL *); 321 static int svc_run(SVCPOOL *); 322 323 /* ARGSUSED */ 324 static void * 325 svc_zoneinit(zoneid_t zoneid) 326 { 327 struct svc_globals *svc; 328 329 svc = kmem_alloc(sizeof (*svc), KM_SLEEP); 330 mutex_init(&svc->svc_plock, NULL, MUTEX_DEFAULT, NULL); 331 svc->svc_pools = NULL; 332 return (svc); 333 } 334 335 /* ARGSUSED */ 336 static void 337 svc_zoneshutdown(zoneid_t zoneid, void *arg) 338 { 339 struct svc_globals *svc = arg; 340 SVCPOOL *pool; 341 342 mutex_enter(&svc->svc_plock); 343 while ((pool = svc->svc_pools) != NULL) { 344 svc_pool_unregister(svc, pool); 345 } 346 mutex_exit(&svc->svc_plock); 347 } 348 349 /* ARGSUSED */ 350 static void 351 svc_zonefini(zoneid_t zoneid, void *arg) 352 { 353 struct svc_globals *svc = arg; 354 355 ASSERT(svc->svc_pools == NULL); 356 mutex_destroy(&svc->svc_plock); 357 kmem_free(svc, sizeof (*svc)); 358 } 359 360 /* 361 * Global SVC init routine. 362 * Initialize global generic and transport type specific structures 363 * used by the kernel RPC server side. This routine is called only 364 * once when the module is being loaded. 365 */ 366 void 367 svc_init() 368 { 369 zone_key_create(&svc_zone_key, svc_zoneinit, svc_zoneshutdown, 370 svc_zonefini); 371 svc_cots_init(); 372 svc_clts_init(); 373 } 374 375 /* 376 * Destroy the SVCPOOL structure. 377 */ 378 static void 379 svc_pool_cleanup(SVCPOOL *pool) 380 { 381 ASSERT(pool->p_threads + pool->p_detached_threads == 0); 382 ASSERT(pool->p_lcount == 0); 383 ASSERT(pool->p_closing); 384 385 /* 386 * Call the user supplied shutdown function. This is done 387 * here so the user of the pool will be able to cleanup 388 * service related resources. 389 */ 390 if (pool->p_shutdown != NULL) 391 (pool->p_shutdown)(); 392 393 /* Destroy `xprt-ready' queue */ 394 svc_xprt_qdestroy(pool); 395 396 /* Destroy transport list */ 397 rw_destroy(&pool->p_lrwlock); 398 399 /* Destroy locks and condition variables */ 400 mutex_destroy(&pool->p_thread_lock); 401 mutex_destroy(&pool->p_req_lock); 402 cv_destroy(&pool->p_req_cv); 403 404 /* Destroy creator's locks and condition variables */ 405 mutex_destroy(&pool->p_creator_lock); 406 cv_destroy(&pool->p_creator_cv); 407 mutex_destroy(&pool->p_user_lock); 408 cv_destroy(&pool->p_user_cv); 409 410 /* Free pool structure */ 411 kmem_free(pool, sizeof (SVCPOOL)); 412 } 413 414 /* 415 * If all the transports and service threads are already gone 416 * signal the creator thread to clean up and exit. 417 */ 418 static bool_t 419 svc_pool_tryexit(SVCPOOL *pool) 420 { 421 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 422 ASSERT(pool->p_closing); 423 424 if (pool->p_threads + pool->p_detached_threads == 0) { 425 rw_enter(&pool->p_lrwlock, RW_READER); 426 if (pool->p_lcount == 0) { 427 /* 428 * Release the locks before sending a signal. 429 */ 430 rw_exit(&pool->p_lrwlock); 431 mutex_exit(&pool->p_thread_lock); 432 433 /* 434 * Notify the creator thread to clean up and exit 435 * 436 * NOTICE: No references to the pool beyond this point! 437 * The pool is being destroyed. 438 */ 439 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 440 svc_creator_signalexit(pool); 441 442 return (TRUE); 443 } 444 rw_exit(&pool->p_lrwlock); 445 } 446 447 ASSERT(MUTEX_HELD(&pool->p_thread_lock)); 448 return (FALSE); 449 } 450 451 /* 452 * Find a pool with a given id. 453 */ 454 static SVCPOOL * 455 svc_pool_find(struct svc_globals *svc, int id) 456 { 457 SVCPOOL *pool; 458 459 ASSERT(MUTEX_HELD(&svc->svc_plock)); 460 461 /* 462 * Search the list for a pool with a matching id 463 * and register the transport handle with that pool. 464 */ 465 for (pool = svc->svc_pools; pool; pool = pool->p_next) 466 if (pool->p_id == id) 467 return (pool); 468 469 return (NULL); 470 } 471 472 /* 473 * PSARC 2003/523 Contract Private Interface 474 * svc_do_run 475 * Changes must be reviewed by Solaris File Sharing 476 * Changes must be communicated to contract-2003-523@sun.com 477 */ 478 int 479 svc_do_run(int id) 480 { 481 SVCPOOL *pool; 482 int err = 0; 483 struct svc_globals *svc; 484 485 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 486 mutex_enter(&svc->svc_plock); 487 488 pool = svc_pool_find(svc, id); 489 490 mutex_exit(&svc->svc_plock); 491 492 if (pool == NULL) 493 return (ENOENT); 494 495 /* 496 * Increment counter of pool threads now 497 * that a thread has been created. 498 */ 499 mutex_enter(&pool->p_thread_lock); 500 pool->p_threads++; 501 mutex_exit(&pool->p_thread_lock); 502 503 /* Give work to the new thread. */ 504 err = svc_run(pool); 505 506 return (err); 507 } 508 509 /* 510 * Unregister a pool from the pool list. 511 * Set the closing state. If all the transports and service threads 512 * are already gone signal the creator thread to clean up and exit. 513 */ 514 static void 515 svc_pool_unregister(struct svc_globals *svc, SVCPOOL *pool) 516 { 517 SVCPOOL *next = pool->p_next; 518 SVCPOOL *prev = pool->p_prev; 519 520 ASSERT(MUTEX_HELD(&svc->svc_plock)); 521 522 /* Remove from the list */ 523 if (pool == svc->svc_pools) 524 svc->svc_pools = next; 525 if (next) 526 next->p_prev = prev; 527 if (prev) 528 prev->p_next = next; 529 pool->p_next = pool->p_prev = NULL; 530 531 /* 532 * Offline the pool. Mark the pool as closing. 533 * If there are no transports in this pool notify 534 * the creator thread to clean it up and exit. 535 */ 536 mutex_enter(&pool->p_thread_lock); 537 if (pool->p_offline != NULL) 538 (pool->p_offline)(); 539 pool->p_closing = TRUE; 540 if (svc_pool_tryexit(pool)) 541 return; 542 mutex_exit(&pool->p_thread_lock); 543 } 544 545 /* 546 * Register a pool with a given id in the global doubly linked pool list. 547 * - if there is a pool with the same id in the list then unregister it 548 * - insert the new pool into the list. 549 */ 550 static void 551 svc_pool_register(struct svc_globals *svc, SVCPOOL *pool, int id) 552 { 553 SVCPOOL *old_pool; 554 555 /* 556 * If there is a pool with the same id then remove it from 557 * the list and mark the pool as closing. 558 */ 559 mutex_enter(&svc->svc_plock); 560 561 if (old_pool = svc_pool_find(svc, id)) 562 svc_pool_unregister(svc, old_pool); 563 564 /* Insert into the doubly linked list */ 565 pool->p_id = id; 566 pool->p_next = svc->svc_pools; 567 pool->p_prev = NULL; 568 if (svc->svc_pools) 569 svc->svc_pools->p_prev = pool; 570 svc->svc_pools = pool; 571 572 mutex_exit(&svc->svc_plock); 573 } 574 575 /* 576 * Initialize a newly created pool structure 577 */ 578 static int 579 svc_pool_init(SVCPOOL *pool, uint_t maxthreads, uint_t redline, 580 uint_t qsize, uint_t timeout, uint_t stksize, uint_t max_same_xprt) 581 { 582 klwp_t *lwp = ttolwp(curthread); 583 584 ASSERT(pool); 585 586 if (maxthreads == 0) 587 maxthreads = svc_default_maxthreads; 588 if (redline == 0) 589 redline = svc_default_redline; 590 if (qsize == 0) 591 qsize = svc_default_qsize; 592 if (timeout == 0) 593 timeout = svc_default_timeout; 594 if (stksize == 0) 595 stksize = svc_default_stksize; 596 if (max_same_xprt == 0) 597 max_same_xprt = svc_default_max_same_xprt; 598 599 if (maxthreads < redline) 600 return (EINVAL); 601 602 /* Allocate and initialize the `xprt-ready' queue */ 603 svc_xprt_qinit(pool, qsize); 604 605 /* Initialize doubly-linked xprt list */ 606 rw_init(&pool->p_lrwlock, NULL, RW_DEFAULT, NULL); 607 608 /* 609 * Setting lwp_childstksz on the current lwp so that 610 * descendants of this lwp get the modified stacksize, if 611 * it is defined. It is important that either this lwp or 612 * one of its descendants do the actual servicepool thread 613 * creation to maintain the stacksize inheritance. 614 */ 615 if (lwp != NULL) 616 lwp->lwp_childstksz = stksize; 617 618 /* Initialize thread limits, locks and condition variables */ 619 pool->p_maxthreads = maxthreads; 620 pool->p_redline = redline; 621 pool->p_timeout = timeout * hz; 622 pool->p_stksize = stksize; 623 pool->p_max_same_xprt = max_same_xprt; 624 mutex_init(&pool->p_thread_lock, NULL, MUTEX_DEFAULT, NULL); 625 mutex_init(&pool->p_req_lock, NULL, MUTEX_DEFAULT, NULL); 626 cv_init(&pool->p_req_cv, NULL, CV_DEFAULT, NULL); 627 628 /* Initialize userland creator */ 629 pool->p_user_exit = FALSE; 630 pool->p_signal_create_thread = FALSE; 631 pool->p_user_waiting = FALSE; 632 mutex_init(&pool->p_user_lock, NULL, MUTEX_DEFAULT, NULL); 633 cv_init(&pool->p_user_cv, NULL, CV_DEFAULT, NULL); 634 635 /* Initialize the creator and start the creator thread */ 636 pool->p_creator_exit = FALSE; 637 mutex_init(&pool->p_creator_lock, NULL, MUTEX_DEFAULT, NULL); 638 cv_init(&pool->p_creator_cv, NULL, CV_DEFAULT, NULL); 639 640 (void) zthread_create(NULL, pool->p_stksize, svc_thread_creator, 641 pool, 0, minclsyspri); 642 643 return (0); 644 } 645 646 /* 647 * PSARC 2003/523 Contract Private Interface 648 * svc_pool_create 649 * Changes must be reviewed by Solaris File Sharing 650 * Changes must be communicated to contract-2003-523@sun.com 651 * 652 * Create an kernel RPC server-side thread/transport pool. 653 * 654 * This is public interface for creation of a server RPC thread pool 655 * for a given service provider. Transports registered with the pool's id 656 * will be served by a pool's threads. This function is called from the 657 * nfssys() system call. 658 */ 659 int 660 svc_pool_create(struct svcpool_args *args) 661 { 662 SVCPOOL *pool; 663 int error; 664 struct svc_globals *svc; 665 666 /* 667 * Caller should check credentials in a way appropriate 668 * in the context of the call. 669 */ 670 671 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 672 /* Allocate a new pool */ 673 pool = kmem_zalloc(sizeof (SVCPOOL), KM_SLEEP); 674 675 /* 676 * Initialize the pool structure and create a creator thread. 677 */ 678 error = svc_pool_init(pool, args->maxthreads, args->redline, 679 args->qsize, args->timeout, args->stksize, args->max_same_xprt); 680 681 if (error) { 682 kmem_free(pool, sizeof (SVCPOOL)); 683 return (error); 684 } 685 686 /* Register the pool with the global pool list */ 687 svc_pool_register(svc, pool, args->id); 688 689 return (0); 690 } 691 692 int 693 svc_pool_control(int id, int cmd, void *arg) 694 { 695 SVCPOOL *pool; 696 struct svc_globals *svc; 697 698 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 699 700 switch (cmd) { 701 case SVCPSET_SHUTDOWN_PROC: 702 /* 703 * Search the list for a pool with a matching id 704 * and register the transport handle with that pool. 705 */ 706 mutex_enter(&svc->svc_plock); 707 708 if ((pool = svc_pool_find(svc, id)) == NULL) { 709 mutex_exit(&svc->svc_plock); 710 return (ENOENT); 711 } 712 /* 713 * Grab the transport list lock before releasing the 714 * pool list lock 715 */ 716 rw_enter(&pool->p_lrwlock, RW_WRITER); 717 mutex_exit(&svc->svc_plock); 718 719 pool->p_shutdown = *((void (*)())arg); 720 721 rw_exit(&pool->p_lrwlock); 722 723 return (0); 724 case SVCPSET_UNREGISTER_PROC: 725 /* 726 * Search the list for a pool with a matching id 727 * and register the unregister callback handle with that pool. 728 */ 729 mutex_enter(&svc->svc_plock); 730 731 if ((pool = svc_pool_find(svc, id)) == NULL) { 732 mutex_exit(&svc->svc_plock); 733 return (ENOENT); 734 } 735 /* 736 * Grab the transport list lock before releasing the 737 * pool list lock 738 */ 739 rw_enter(&pool->p_lrwlock, RW_WRITER); 740 mutex_exit(&svc->svc_plock); 741 742 pool->p_offline = *((void (*)())arg); 743 744 rw_exit(&pool->p_lrwlock); 745 746 return (0); 747 default: 748 return (EINVAL); 749 } 750 } 751 752 /* 753 * Pool's transport list manipulation routines. 754 * - svc_xprt_register() 755 * - svc_xprt_unregister() 756 * 757 * svc_xprt_register() is called from svc_tli_kcreate() to 758 * insert a new master transport handle into the doubly linked 759 * list of server transport handles (one list per pool). 760 * 761 * The list is used by svc_poll(), when it operates in `drain' 762 * mode, to search for a next transport with a pending request. 763 */ 764 765 int 766 svc_xprt_register(SVCMASTERXPRT *xprt, int id) 767 { 768 SVCMASTERXPRT *prev, *next; 769 SVCPOOL *pool; 770 struct svc_globals *svc; 771 772 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 773 /* 774 * Search the list for a pool with a matching id 775 * and register the transport handle with that pool. 776 */ 777 mutex_enter(&svc->svc_plock); 778 779 if ((pool = svc_pool_find(svc, id)) == NULL) { 780 mutex_exit(&svc->svc_plock); 781 return (ENOENT); 782 } 783 784 /* Grab the transport list lock before releasing the pool list lock */ 785 rw_enter(&pool->p_lrwlock, RW_WRITER); 786 mutex_exit(&svc->svc_plock); 787 788 /* Don't register new transports when the pool is in closing state */ 789 if (pool->p_closing) { 790 rw_exit(&pool->p_lrwlock); 791 return (EBUSY); 792 } 793 794 /* 795 * Initialize xp_pool to point to the pool. 796 * We don't want to go through the pool list every time. 797 */ 798 xprt->xp_pool = pool; 799 800 /* 801 * Insert a transport handle into the list. 802 * The list head points to the most recently inserted transport. 803 */ 804 if (pool->p_lhead == NULL) 805 pool->p_lhead = xprt->xp_prev = xprt->xp_next = xprt; 806 else { 807 next = pool->p_lhead; 808 prev = pool->p_lhead->xp_prev; 809 810 xprt->xp_next = next; 811 xprt->xp_prev = prev; 812 813 pool->p_lhead = prev->xp_next = next->xp_prev = xprt; 814 } 815 816 /* Increment the transports count */ 817 pool->p_lcount++; 818 819 rw_exit(&pool->p_lrwlock); 820 return (0); 821 } 822 823 /* 824 * Called from svc_xprt_cleanup() to remove a master transport handle 825 * from the pool's list of server transports (when a transport is 826 * being destroyed). 827 */ 828 void 829 svc_xprt_unregister(SVCMASTERXPRT *xprt) 830 { 831 SVCPOOL *pool = xprt->xp_pool; 832 833 /* 834 * Unlink xprt from the list. 835 * If the list head points to this xprt then move it 836 * to the next xprt or reset to NULL if this is the last 837 * xprt in the list. 838 */ 839 rw_enter(&pool->p_lrwlock, RW_WRITER); 840 841 if (xprt == xprt->xp_next) 842 pool->p_lhead = NULL; 843 else { 844 SVCMASTERXPRT *next = xprt->xp_next; 845 SVCMASTERXPRT *prev = xprt->xp_prev; 846 847 next->xp_prev = prev; 848 prev->xp_next = next; 849 850 if (pool->p_lhead == xprt) 851 pool->p_lhead = next; 852 } 853 854 xprt->xp_next = xprt->xp_prev = NULL; 855 856 /* Decrement list count */ 857 pool->p_lcount--; 858 859 rw_exit(&pool->p_lrwlock); 860 } 861 862 static void 863 svc_xprt_qdestroy(SVCPOOL *pool) 864 { 865 mutex_destroy(&pool->p_qend_lock); 866 kmem_free(pool->p_qbody, pool->p_qsize * sizeof (__SVCXPRT_QNODE)); 867 } 868 869 /* 870 * Initialize an `xprt-ready' queue for a given pool. 871 */ 872 static void 873 svc_xprt_qinit(SVCPOOL *pool, size_t qsize) 874 { 875 int i; 876 877 pool->p_qsize = qsize; 878 pool->p_qbody = kmem_zalloc(pool->p_qsize * sizeof (__SVCXPRT_QNODE), 879 KM_SLEEP); 880 881 for (i = 0; i < pool->p_qsize - 1; i++) 882 pool->p_qbody[i].q_next = &(pool->p_qbody[i+1]); 883 884 pool->p_qbody[pool->p_qsize-1].q_next = &(pool->p_qbody[0]); 885 pool->p_qtop = &(pool->p_qbody[0]); 886 pool->p_qend = &(pool->p_qbody[0]); 887 888 mutex_init(&pool->p_qend_lock, NULL, MUTEX_DEFAULT, NULL); 889 } 890 891 /* 892 * Called from the svc_queuereq() interrupt routine to queue 893 * a hint for svc_poll() which transport has a pending request. 894 * - insert a pointer to xprt into the xprt-ready queue (FIFO) 895 * - if the xprt-ready queue is full turn the overflow flag on. 896 * 897 * NOTICE: pool->p_qtop is protected by the pool's request lock 898 * and the caller (svc_queuereq()) must hold the lock. 899 */ 900 static void 901 svc_xprt_qput(SVCPOOL *pool, SVCMASTERXPRT *xprt) 902 { 903 ASSERT(MUTEX_HELD(&pool->p_req_lock)); 904 905 /* If the overflow flag is on there is nothing we can do */ 906 if (pool->p_qoverflow) 907 return; 908 909 /* If the queue is full turn the overflow flag on and exit */ 910 if (pool->p_qtop->q_next == pool->p_qend) { 911 mutex_enter(&pool->p_qend_lock); 912 if (pool->p_qtop->q_next == pool->p_qend) { 913 pool->p_qoverflow = TRUE; 914 mutex_exit(&pool->p_qend_lock); 915 return; 916 } 917 mutex_exit(&pool->p_qend_lock); 918 } 919 920 /* Insert a hint and move pool->p_qtop */ 921 pool->p_qtop->q_xprt = xprt; 922 pool->p_qtop = pool->p_qtop->q_next; 923 } 924 925 /* 926 * Called from svc_poll() to get a hint which transport has a 927 * pending request. Returns a pointer to a transport or NULL if the 928 * `xprt-ready' queue is empty. 929 * 930 * Since we do not acquire the pool's request lock while checking if 931 * the queue is empty we may miss a request that is just being delivered. 932 * However this is ok since svc_poll() will retry again until the 933 * count indicates that there are pending requests for this pool. 934 */ 935 static SVCMASTERXPRT * 936 svc_xprt_qget(SVCPOOL *pool) 937 { 938 SVCMASTERXPRT *xprt; 939 940 mutex_enter(&pool->p_qend_lock); 941 do { 942 /* 943 * If the queue is empty return NULL. 944 * Since we do not acquire the pool's request lock which 945 * protects pool->p_qtop this is not exact check. However, 946 * this is safe - if we miss a request here svc_poll() 947 * will retry again. 948 */ 949 if (pool->p_qend == pool->p_qtop) { 950 mutex_exit(&pool->p_qend_lock); 951 return (NULL); 952 } 953 954 /* Get a hint and move pool->p_qend */ 955 xprt = pool->p_qend->q_xprt; 956 pool->p_qend = pool->p_qend->q_next; 957 958 /* Skip fields deleted by svc_xprt_qdelete() */ 959 } while (xprt == NULL); 960 mutex_exit(&pool->p_qend_lock); 961 962 return (xprt); 963 } 964 965 /* 966 * Delete all the references to a transport handle that 967 * is being destroyed from the xprt-ready queue. 968 * Deleted pointers are replaced with NULLs. 969 */ 970 static void 971 svc_xprt_qdelete(SVCPOOL *pool, SVCMASTERXPRT *xprt) 972 { 973 __SVCXPRT_QNODE *q; 974 975 mutex_enter(&pool->p_req_lock); 976 for (q = pool->p_qend; q != pool->p_qtop; q = q->q_next) { 977 if (q->q_xprt == xprt) 978 q->q_xprt = NULL; 979 } 980 mutex_exit(&pool->p_req_lock); 981 } 982 983 /* 984 * Destructor for a master server transport handle. 985 * - if there are no more non-detached threads linked to this transport 986 * then, if requested, call xp_closeproc (we don't wait for detached 987 * threads linked to this transport to complete). 988 * - if there are no more threads linked to this 989 * transport then 990 * a) remove references to this transport from the xprt-ready queue 991 * b) remove a reference to this transport from the pool's transport list 992 * c) call a transport specific `destroy' function 993 * d) cancel remaining thread reservations. 994 * 995 * NOTICE: Caller must hold the transport's thread lock. 996 */ 997 static void 998 svc_xprt_cleanup(SVCMASTERXPRT *xprt, bool_t detached) 999 { 1000 ASSERT(MUTEX_HELD(&xprt->xp_thread_lock)); 1001 ASSERT(xprt->xp_wq == NULL); 1002 1003 /* 1004 * If called from the last non-detached thread 1005 * it should call the closeproc on this transport. 1006 */ 1007 if (!detached && xprt->xp_threads == 0 && xprt->xp_closeproc) { 1008 (*(xprt->xp_closeproc)) (xprt); 1009 } 1010 1011 if (xprt->xp_threads + xprt->xp_detached_threads > 0) 1012 mutex_exit(&xprt->xp_thread_lock); 1013 else { 1014 /* Remove references to xprt from the `xprt-ready' queue */ 1015 svc_xprt_qdelete(xprt->xp_pool, xprt); 1016 1017 /* Unregister xprt from the pool's transport list */ 1018 svc_xprt_unregister(xprt); 1019 svc_callout_free(xprt); 1020 SVC_DESTROY(xprt); 1021 } 1022 } 1023 1024 /* 1025 * Find a dispatch routine for a given prog/vers pair. 1026 * This function is called from svc_getreq() to search the callout 1027 * table for an entry with a matching RPC program number `prog' 1028 * and a version range that covers `vers'. 1029 * - if it finds a matching entry it returns pointer to the dispatch routine 1030 * - otherwise it returns NULL and fills both vers_min and vers_max 1031 * with, respectively, lowest version and highest version 1032 * supported for the program `prog' 1033 */ 1034 static SVC_DISPATCH * 1035 svc_callout_find(SVCXPRT *xprt, rpcprog_t prog, rpcvers_t vers, 1036 rpcvers_t *vers_min, rpcvers_t *vers_max) 1037 { 1038 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1039 int i; 1040 1041 *vers_min = ~(rpcvers_t)0; 1042 *vers_max = 0; 1043 1044 for (i = 0; i < sct->sct_size; i++) { 1045 SVC_CALLOUT *sc = &sct->sct_sc[i]; 1046 1047 if (prog == sc->sc_prog) { 1048 if (vers >= sc->sc_versmin && vers <= sc->sc_versmax) 1049 return (sc->sc_dispatch); 1050 1051 if (*vers_max < sc->sc_versmax) 1052 *vers_max = sc->sc_versmax; 1053 if (*vers_min > sc->sc_versmin) 1054 *vers_min = sc->sc_versmin; 1055 } 1056 } 1057 1058 return (NULL); 1059 } 1060 1061 /* 1062 * Optionally free callout table allocated for this transport by 1063 * the service provider. 1064 */ 1065 static void 1066 svc_callout_free(SVCMASTERXPRT *xprt) 1067 { 1068 SVC_CALLOUT_TABLE *sct = xprt->xp_sct; 1069 1070 if (sct->sct_free) { 1071 kmem_free(sct->sct_sc, sct->sct_size * sizeof (SVC_CALLOUT)); 1072 kmem_free(sct, sizeof (SVC_CALLOUT_TABLE)); 1073 } 1074 } 1075 1076 /* 1077 * Send a reply to an RPC request 1078 * 1079 * PSARC 2003/523 Contract Private Interface 1080 * svc_sendreply 1081 * Changes must be reviewed by Solaris File Sharing 1082 * Changes must be communicated to contract-2003-523@sun.com 1083 */ 1084 bool_t 1085 svc_sendreply(const SVCXPRT *clone_xprt, const xdrproc_t xdr_results, 1086 const caddr_t xdr_location) 1087 { 1088 struct rpc_msg rply; 1089 1090 rply.rm_direction = REPLY; 1091 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1092 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1093 rply.acpted_rply.ar_stat = SUCCESS; 1094 rply.acpted_rply.ar_results.where = xdr_location; 1095 rply.acpted_rply.ar_results.proc = xdr_results; 1096 1097 return (SVC_REPLY((SVCXPRT *)clone_xprt, &rply)); 1098 } 1099 1100 /* 1101 * No procedure error reply 1102 * 1103 * PSARC 2003/523 Contract Private Interface 1104 * svcerr_noproc 1105 * Changes must be reviewed by Solaris File Sharing 1106 * Changes must be communicated to contract-2003-523@sun.com 1107 */ 1108 void 1109 svcerr_noproc(const SVCXPRT *clone_xprt) 1110 { 1111 struct rpc_msg rply; 1112 1113 rply.rm_direction = REPLY; 1114 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1115 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1116 rply.acpted_rply.ar_stat = PROC_UNAVAIL; 1117 SVC_FREERES((SVCXPRT *)clone_xprt); 1118 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1119 } 1120 1121 /* 1122 * Can't decode arguments error reply 1123 * 1124 * PSARC 2003/523 Contract Private Interface 1125 * svcerr_decode 1126 * Changes must be reviewed by Solaris File Sharing 1127 * Changes must be communicated to contract-2003-523@sun.com 1128 */ 1129 void 1130 svcerr_decode(const SVCXPRT *clone_xprt) 1131 { 1132 struct rpc_msg rply; 1133 1134 rply.rm_direction = REPLY; 1135 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1136 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1137 rply.acpted_rply.ar_stat = GARBAGE_ARGS; 1138 SVC_FREERES((SVCXPRT *)clone_xprt); 1139 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1140 } 1141 1142 /* 1143 * Some system error 1144 */ 1145 void 1146 svcerr_systemerr(const SVCXPRT *clone_xprt) 1147 { 1148 struct rpc_msg rply; 1149 1150 rply.rm_direction = REPLY; 1151 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1152 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1153 rply.acpted_rply.ar_stat = SYSTEM_ERR; 1154 SVC_FREERES((SVCXPRT *)clone_xprt); 1155 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1156 } 1157 1158 /* 1159 * Authentication error reply 1160 */ 1161 void 1162 svcerr_auth(const SVCXPRT *clone_xprt, const enum auth_stat why) 1163 { 1164 struct rpc_msg rply; 1165 1166 rply.rm_direction = REPLY; 1167 rply.rm_reply.rp_stat = MSG_DENIED; 1168 rply.rjcted_rply.rj_stat = AUTH_ERROR; 1169 rply.rjcted_rply.rj_why = why; 1170 SVC_FREERES((SVCXPRT *)clone_xprt); 1171 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1172 } 1173 1174 /* 1175 * Authentication too weak error reply 1176 */ 1177 void 1178 svcerr_weakauth(const SVCXPRT *clone_xprt) 1179 { 1180 svcerr_auth((SVCXPRT *)clone_xprt, AUTH_TOOWEAK); 1181 } 1182 1183 /* 1184 * Authentication error; bad credentials 1185 */ 1186 void 1187 svcerr_badcred(const SVCXPRT *clone_xprt) 1188 { 1189 struct rpc_msg rply; 1190 1191 rply.rm_direction = REPLY; 1192 rply.rm_reply.rp_stat = MSG_DENIED; 1193 rply.rjcted_rply.rj_stat = AUTH_ERROR; 1194 rply.rjcted_rply.rj_why = AUTH_BADCRED; 1195 SVC_FREERES((SVCXPRT *)clone_xprt); 1196 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1197 } 1198 1199 /* 1200 * Program unavailable error reply 1201 * 1202 * PSARC 2003/523 Contract Private Interface 1203 * svcerr_noprog 1204 * Changes must be reviewed by Solaris File Sharing 1205 * Changes must be communicated to contract-2003-523@sun.com 1206 */ 1207 void 1208 svcerr_noprog(const SVCXPRT *clone_xprt) 1209 { 1210 struct rpc_msg rply; 1211 1212 rply.rm_direction = REPLY; 1213 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1214 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1215 rply.acpted_rply.ar_stat = PROG_UNAVAIL; 1216 SVC_FREERES((SVCXPRT *)clone_xprt); 1217 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1218 } 1219 1220 /* 1221 * Program version mismatch error reply 1222 * 1223 * PSARC 2003/523 Contract Private Interface 1224 * svcerr_progvers 1225 * Changes must be reviewed by Solaris File Sharing 1226 * Changes must be communicated to contract-2003-523@sun.com 1227 */ 1228 void 1229 svcerr_progvers(const SVCXPRT *clone_xprt, 1230 const rpcvers_t low_vers, const rpcvers_t high_vers) 1231 { 1232 struct rpc_msg rply; 1233 1234 rply.rm_direction = REPLY; 1235 rply.rm_reply.rp_stat = MSG_ACCEPTED; 1236 rply.acpted_rply.ar_verf = clone_xprt->xp_verf; 1237 rply.acpted_rply.ar_stat = PROG_MISMATCH; 1238 rply.acpted_rply.ar_vers.low = low_vers; 1239 rply.acpted_rply.ar_vers.high = high_vers; 1240 SVC_FREERES((SVCXPRT *)clone_xprt); 1241 SVC_REPLY((SVCXPRT *)clone_xprt, &rply); 1242 } 1243 1244 /* 1245 * Get server side input from some transport. 1246 * 1247 * Statement of authentication parameters management: 1248 * This function owns and manages all authentication parameters, specifically 1249 * the "raw" parameters (msg.rm_call.cb_cred and msg.rm_call.cb_verf) and 1250 * the "cooked" credentials (rqst->rq_clntcred). 1251 * However, this function does not know the structure of the cooked 1252 * credentials, so it make the following assumptions: 1253 * a) the structure is contiguous (no pointers), and 1254 * b) the cred structure size does not exceed RQCRED_SIZE bytes. 1255 * In all events, all three parameters are freed upon exit from this routine. 1256 * The storage is trivially managed on the call stack in user land, but 1257 * is malloced in kernel land. 1258 * 1259 * Note: the xprt's xp_svc_lock is not held while the service's dispatch 1260 * routine is running. If we decide to implement svc_unregister(), we'll 1261 * need to decide whether it's okay for a thread to unregister a service 1262 * while a request is being processed. If we decide that this is a 1263 * problem, we can probably use some sort of reference counting scheme to 1264 * keep the callout entry from going away until the request has completed. 1265 */ 1266 static void 1267 svc_getreq( 1268 SVCXPRT *clone_xprt, /* clone transport handle */ 1269 mblk_t *mp) 1270 { 1271 struct rpc_msg msg; 1272 struct svc_req r; 1273 char *cred_area; /* too big to allocate on call stack */ 1274 1275 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_START, 1276 "svc_getreq_start:"); 1277 1278 ASSERT(clone_xprt->xp_master != NULL); 1279 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL || 1280 mp->b_datap->db_type != M_DATA); 1281 1282 /* 1283 * Firstly, allocate the authentication parameters' storage 1284 */ 1285 mutex_enter(&rqcred_lock); 1286 if (rqcred_head) { 1287 cred_area = rqcred_head; 1288 1289 /* LINTED pointer alignment */ 1290 rqcred_head = *(caddr_t *)rqcred_head; 1291 mutex_exit(&rqcred_lock); 1292 } else { 1293 mutex_exit(&rqcred_lock); 1294 cred_area = kmem_alloc(2 * MAX_AUTH_BYTES + RQCRED_SIZE, 1295 KM_SLEEP); 1296 } 1297 msg.rm_call.cb_cred.oa_base = cred_area; 1298 msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]); 1299 r.rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]); 1300 1301 /* 1302 * underlying transport recv routine may modify mblk data 1303 * and make it difficult to extract label afterwards. So 1304 * get the label from the raw mblk data now. 1305 */ 1306 if (is_system_labeled()) { 1307 cred_t *cr; 1308 1309 r.rq_label = kmem_alloc(sizeof (bslabel_t), KM_SLEEP); 1310 cr = msg_getcred(mp, NULL); 1311 ASSERT(cr != NULL); 1312 1313 bcopy(label2bslabel(crgetlabel(cr)), r.rq_label, 1314 sizeof (bslabel_t)); 1315 } else { 1316 r.rq_label = NULL; 1317 } 1318 1319 /* 1320 * Now receive a message from the transport. 1321 */ 1322 if (SVC_RECV(clone_xprt, mp, &msg)) { 1323 void (*dispatchroutine) (struct svc_req *, SVCXPRT *); 1324 rpcvers_t vers_min; 1325 rpcvers_t vers_max; 1326 bool_t no_dispatch; 1327 enum auth_stat why; 1328 1329 /* 1330 * Find the registered program and call its 1331 * dispatch routine. 1332 */ 1333 r.rq_xprt = clone_xprt; 1334 r.rq_prog = msg.rm_call.cb_prog; 1335 r.rq_vers = msg.rm_call.cb_vers; 1336 r.rq_proc = msg.rm_call.cb_proc; 1337 r.rq_cred = msg.rm_call.cb_cred; 1338 1339 /* 1340 * First authenticate the message. 1341 */ 1342 TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_START, 1343 "svc_getreq_auth_start:"); 1344 if ((why = sec_svc_msg(&r, &msg, &no_dispatch)) != AUTH_OK) { 1345 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1346 "svc_getreq_auth_end:(%S)", "failed"); 1347 svcerr_auth(clone_xprt, why); 1348 /* 1349 * Free the arguments. 1350 */ 1351 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1352 } else if (no_dispatch) { 1353 /* 1354 * XXX - when bug id 4053736 is done, remove 1355 * the SVC_FREEARGS() call. 1356 */ 1357 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1358 } else { 1359 TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END, 1360 "svc_getreq_auth_end:(%S)", "good"); 1361 1362 dispatchroutine = svc_callout_find(clone_xprt, 1363 r.rq_prog, r.rq_vers, &vers_min, &vers_max); 1364 1365 if (dispatchroutine) { 1366 (*dispatchroutine) (&r, clone_xprt); 1367 } else { 1368 /* 1369 * If we got here, the program or version 1370 * is not served ... 1371 */ 1372 if (vers_max == 0 || 1373 version_keepquiet(clone_xprt)) 1374 svcerr_noprog(clone_xprt); 1375 else 1376 svcerr_progvers(clone_xprt, vers_min, 1377 vers_max); 1378 1379 /* 1380 * Free the arguments. For successful calls 1381 * this is done by the dispatch routine. 1382 */ 1383 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1384 /* Fall through to ... */ 1385 } 1386 /* 1387 * Call cleanup procedure for RPCSEC_GSS. 1388 * This is a hack since there is currently no 1389 * op, such as SVC_CLEANAUTH. rpc_gss_cleanup 1390 * should only be called for a non null proc. 1391 * Null procs in RPC GSS are overloaded to 1392 * provide context setup and control. The main 1393 * purpose of rpc_gss_cleanup is to decrement the 1394 * reference count associated with the cached 1395 * GSS security context. We should never get here 1396 * for an RPCSEC_GSS null proc since *no_dispatch 1397 * would have been set to true from sec_svc_msg above. 1398 */ 1399 if (r.rq_cred.oa_flavor == RPCSEC_GSS) 1400 rpc_gss_cleanup(clone_xprt); 1401 } 1402 } 1403 1404 if (r.rq_label != NULL) 1405 kmem_free(r.rq_label, sizeof (bslabel_t)); 1406 1407 /* 1408 * Free authentication parameters' storage 1409 */ 1410 mutex_enter(&rqcred_lock); 1411 /* LINTED pointer alignment */ 1412 *(caddr_t *)cred_area = rqcred_head; 1413 rqcred_head = cred_area; 1414 mutex_exit(&rqcred_lock); 1415 } 1416 1417 /* 1418 * Allocate new clone transport handle. 1419 */ 1420 SVCXPRT * 1421 svc_clone_init(void) 1422 { 1423 SVCXPRT *clone_xprt; 1424 1425 clone_xprt = kmem_zalloc(sizeof (SVCXPRT), KM_SLEEP); 1426 clone_xprt->xp_cred = crget(); 1427 return (clone_xprt); 1428 } 1429 1430 /* 1431 * Free memory allocated by svc_clone_init. 1432 */ 1433 void 1434 svc_clone_free(SVCXPRT *clone_xprt) 1435 { 1436 /* Fre credentials from crget() */ 1437 if (clone_xprt->xp_cred) 1438 crfree(clone_xprt->xp_cred); 1439 kmem_free(clone_xprt, sizeof (SVCXPRT)); 1440 } 1441 1442 /* 1443 * Link a per-thread clone transport handle to a master 1444 * - increment a thread reference count on the master 1445 * - copy some of the master's fields to the clone 1446 * - call a transport specific clone routine. 1447 */ 1448 void 1449 svc_clone_link(SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt, SVCXPRT *clone_xprt2) 1450 { 1451 cred_t *cred = clone_xprt->xp_cred; 1452 1453 ASSERT(cred); 1454 1455 /* 1456 * Bump up master's thread count. 1457 * Linking a per-thread clone transport handle to a master 1458 * associates a service thread with the master. 1459 */ 1460 mutex_enter(&xprt->xp_thread_lock); 1461 xprt->xp_threads++; 1462 mutex_exit(&xprt->xp_thread_lock); 1463 1464 /* Clear everything */ 1465 bzero(clone_xprt, sizeof (SVCXPRT)); 1466 1467 /* Set pointer to the master transport stucture */ 1468 clone_xprt->xp_master = xprt; 1469 1470 /* Structure copy of all the common fields */ 1471 clone_xprt->xp_xpc = xprt->xp_xpc; 1472 1473 /* Restore per-thread fields (xp_cred) */ 1474 clone_xprt->xp_cred = cred; 1475 1476 if (clone_xprt2) 1477 SVC_CLONE_XPRT(clone_xprt2, clone_xprt); 1478 } 1479 1480 /* 1481 * Unlink a non-detached clone transport handle from a master 1482 * - decrement a thread reference count on the master 1483 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1484 * if this is the last non-detached/absolute thread on this transport 1485 * then it will close/destroy the transport 1486 * - call transport specific function to destroy the clone handle 1487 * - clear xp_master to avoid recursion. 1488 */ 1489 void 1490 svc_clone_unlink(SVCXPRT *clone_xprt) 1491 { 1492 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1493 1494 /* This cannot be a detached thread */ 1495 ASSERT(!clone_xprt->xp_detached); 1496 ASSERT(xprt->xp_threads > 0); 1497 1498 /* Decrement a reference count on the transport */ 1499 mutex_enter(&xprt->xp_thread_lock); 1500 xprt->xp_threads--; 1501 1502 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1503 if (xprt->xp_wq) 1504 mutex_exit(&xprt->xp_thread_lock); 1505 else 1506 svc_xprt_cleanup(xprt, FALSE); 1507 1508 /* Call a transport specific clone `destroy' function */ 1509 SVC_CLONE_DESTROY(clone_xprt); 1510 1511 /* Clear xp_master */ 1512 clone_xprt->xp_master = NULL; 1513 } 1514 1515 /* 1516 * Unlink a detached clone transport handle from a master 1517 * - decrement the thread count on the master 1518 * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup(); 1519 * if this is the last thread on this transport then it will destroy 1520 * the transport. 1521 * - call a transport specific function to destroy the clone handle 1522 * - clear xp_master to avoid recursion. 1523 */ 1524 static void 1525 svc_clone_unlinkdetached(SVCXPRT *clone_xprt) 1526 { 1527 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 1528 1529 /* This must be a detached thread */ 1530 ASSERT(clone_xprt->xp_detached); 1531 ASSERT(xprt->xp_detached_threads > 0); 1532 ASSERT(xprt->xp_threads + xprt->xp_detached_threads > 0); 1533 1534 /* Grab xprt->xp_thread_lock and decrement link counts */ 1535 mutex_enter(&xprt->xp_thread_lock); 1536 xprt->xp_detached_threads--; 1537 1538 /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */ 1539 if (xprt->xp_wq) 1540 mutex_exit(&xprt->xp_thread_lock); 1541 else 1542 svc_xprt_cleanup(xprt, TRUE); 1543 1544 /* Call transport specific clone `destroy' function */ 1545 SVC_CLONE_DESTROY(clone_xprt); 1546 1547 /* Clear xp_master */ 1548 clone_xprt->xp_master = NULL; 1549 } 1550 1551 /* 1552 * Try to exit a non-detached service thread 1553 * - check if there are enough threads left 1554 * - if this thread (ie its clone transport handle) are linked 1555 * to a master transport then unlink it 1556 * - free the clone structure 1557 * - return to userland for thread exit 1558 * 1559 * If this is the last non-detached or the last thread on this 1560 * transport then the call to svc_clone_unlink() will, respectively, 1561 * close and/or destroy the transport. 1562 */ 1563 static void 1564 svc_thread_exit(SVCPOOL *pool, SVCXPRT *clone_xprt) 1565 { 1566 if (clone_xprt->xp_master) 1567 svc_clone_unlink(clone_xprt); 1568 svc_clone_free(clone_xprt); 1569 1570 mutex_enter(&pool->p_thread_lock); 1571 pool->p_threads--; 1572 if (pool->p_closing && svc_pool_tryexit(pool)) 1573 /* return - thread exit will be handled at user level */ 1574 return; 1575 mutex_exit(&pool->p_thread_lock); 1576 1577 /* return - thread exit will be handled at user level */ 1578 } 1579 1580 /* 1581 * Exit a detached service thread that returned to svc_run 1582 * - decrement the `detached thread' count for the pool 1583 * - unlink the detached clone transport handle from the master 1584 * - free the clone structure 1585 * - return to userland for thread exit 1586 * 1587 * If this is the last thread on this transport then the call 1588 * to svc_clone_unlinkdetached() will destroy the transport. 1589 */ 1590 static void 1591 svc_thread_exitdetached(SVCPOOL *pool, SVCXPRT *clone_xprt) 1592 { 1593 /* This must be a detached thread */ 1594 ASSERT(clone_xprt->xp_master); 1595 ASSERT(clone_xprt->xp_detached); 1596 ASSERT(!MUTEX_HELD(&pool->p_thread_lock)); 1597 1598 svc_clone_unlinkdetached(clone_xprt); 1599 svc_clone_free(clone_xprt); 1600 1601 mutex_enter(&pool->p_thread_lock); 1602 1603 ASSERT(pool->p_reserved_threads >= 0); 1604 ASSERT(pool->p_detached_threads > 0); 1605 1606 pool->p_detached_threads--; 1607 if (pool->p_closing && svc_pool_tryexit(pool)) 1608 /* return - thread exit will be handled at user level */ 1609 return; 1610 mutex_exit(&pool->p_thread_lock); 1611 1612 /* return - thread exit will be handled at user level */ 1613 } 1614 1615 /* 1616 * PSARC 2003/523 Contract Private Interface 1617 * svc_wait 1618 * Changes must be reviewed by Solaris File Sharing 1619 * Changes must be communicated to contract-2003-523@sun.com 1620 */ 1621 int 1622 svc_wait(int id) 1623 { 1624 SVCPOOL *pool; 1625 int err = 0; 1626 struct svc_globals *svc; 1627 1628 svc = zone_getspecific(svc_zone_key, curproc->p_zone); 1629 mutex_enter(&svc->svc_plock); 1630 pool = svc_pool_find(svc, id); 1631 mutex_exit(&svc->svc_plock); 1632 1633 if (pool == NULL) 1634 return (ENOENT); 1635 1636 mutex_enter(&pool->p_user_lock); 1637 1638 /* Check if there's already a user thread waiting on this pool */ 1639 if (pool->p_user_waiting) { 1640 mutex_exit(&pool->p_user_lock); 1641 return (EBUSY); 1642 } 1643 1644 pool->p_user_waiting = TRUE; 1645 1646 /* Go to sleep, waiting for the signaled flag. */ 1647 while (!pool->p_signal_create_thread && !pool->p_user_exit) { 1648 if (cv_wait_sig(&pool->p_user_cv, &pool->p_user_lock) == 0) { 1649 /* Interrupted, return to handle exit or signal */ 1650 pool->p_user_waiting = FALSE; 1651 pool->p_signal_create_thread = FALSE; 1652 mutex_exit(&pool->p_user_lock); 1653 1654 /* 1655 * Thread has been interrupted and therefore 1656 * the service daemon is leaving as well so 1657 * let's go ahead and remove the service 1658 * pool at this time. 1659 */ 1660 mutex_enter(&svc->svc_plock); 1661 svc_pool_unregister(svc, pool); 1662 mutex_exit(&svc->svc_plock); 1663 1664 return (EINTR); 1665 } 1666 } 1667 1668 pool->p_signal_create_thread = FALSE; 1669 pool->p_user_waiting = FALSE; 1670 1671 /* 1672 * About to exit the service pool. Set return value 1673 * to let the userland code know our intent. Signal 1674 * svc_thread_creator() so that it can clean up the 1675 * pool structure. 1676 */ 1677 if (pool->p_user_exit) { 1678 err = ECANCELED; 1679 cv_signal(&pool->p_user_cv); 1680 } 1681 1682 mutex_exit(&pool->p_user_lock); 1683 1684 /* Return to userland with error code, for possible thread creation. */ 1685 return (err); 1686 } 1687 1688 /* 1689 * `Service threads' creator thread. 1690 * The creator thread waits for a signal to create new thread. 1691 */ 1692 static void 1693 svc_thread_creator(SVCPOOL *pool) 1694 { 1695 callb_cpr_t cpr_info; /* CPR info for the creator thread */ 1696 1697 CALLB_CPR_INIT(&cpr_info, &pool->p_creator_lock, callb_generic_cpr, 1698 "svc_thread_creator"); 1699 1700 for (;;) { 1701 mutex_enter(&pool->p_creator_lock); 1702 1703 /* Check if someone set the exit flag */ 1704 if (pool->p_creator_exit) 1705 break; 1706 1707 /* Clear the `signaled' flag and go asleep */ 1708 pool->p_creator_signaled = FALSE; 1709 1710 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1711 cv_wait(&pool->p_creator_cv, &pool->p_creator_lock); 1712 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1713 1714 /* Check if someone signaled to exit */ 1715 if (pool->p_creator_exit) 1716 break; 1717 1718 mutex_exit(&pool->p_creator_lock); 1719 1720 mutex_enter(&pool->p_thread_lock); 1721 1722 /* 1723 * When the pool is in closing state and all the transports 1724 * are gone the creator should not create any new threads. 1725 */ 1726 if (pool->p_closing) { 1727 rw_enter(&pool->p_lrwlock, RW_READER); 1728 if (pool->p_lcount == 0) { 1729 rw_exit(&pool->p_lrwlock); 1730 mutex_exit(&pool->p_thread_lock); 1731 continue; 1732 } 1733 rw_exit(&pool->p_lrwlock); 1734 } 1735 1736 /* 1737 * Create a new service thread now. 1738 */ 1739 ASSERT(pool->p_reserved_threads >= 0); 1740 ASSERT(pool->p_detached_threads >= 0); 1741 1742 if (pool->p_threads + pool->p_detached_threads < 1743 pool->p_maxthreads) { 1744 /* 1745 * Signal the service pool wait thread 1746 * only if it hasn't already been signaled. 1747 */ 1748 mutex_enter(&pool->p_user_lock); 1749 if (pool->p_signal_create_thread == FALSE) { 1750 pool->p_signal_create_thread = TRUE; 1751 cv_signal(&pool->p_user_cv); 1752 } 1753 mutex_exit(&pool->p_user_lock); 1754 1755 } 1756 1757 mutex_exit(&pool->p_thread_lock); 1758 } 1759 1760 /* 1761 * Pool is closed. Cleanup and exit. 1762 */ 1763 1764 /* Signal userland creator thread that it can stop now. */ 1765 mutex_enter(&pool->p_user_lock); 1766 pool->p_user_exit = TRUE; 1767 cv_broadcast(&pool->p_user_cv); 1768 mutex_exit(&pool->p_user_lock); 1769 1770 /* Wait for svc_wait() to be done with the pool */ 1771 mutex_enter(&pool->p_user_lock); 1772 while (pool->p_user_waiting) { 1773 CALLB_CPR_SAFE_BEGIN(&cpr_info); 1774 cv_wait(&pool->p_user_cv, &pool->p_user_lock); 1775 CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock); 1776 } 1777 mutex_exit(&pool->p_user_lock); 1778 1779 CALLB_CPR_EXIT(&cpr_info); 1780 svc_pool_cleanup(pool); 1781 zthread_exit(); 1782 } 1783 1784 /* 1785 * If the creator thread is idle signal it to create 1786 * a new service thread. 1787 */ 1788 static void 1789 svc_creator_signal(SVCPOOL *pool) 1790 { 1791 mutex_enter(&pool->p_creator_lock); 1792 if (pool->p_creator_signaled == FALSE) { 1793 pool->p_creator_signaled = TRUE; 1794 cv_signal(&pool->p_creator_cv); 1795 } 1796 mutex_exit(&pool->p_creator_lock); 1797 } 1798 1799 /* 1800 * Notify the creator thread to clean up and exit. 1801 */ 1802 static void 1803 svc_creator_signalexit(SVCPOOL *pool) 1804 { 1805 mutex_enter(&pool->p_creator_lock); 1806 pool->p_creator_exit = TRUE; 1807 cv_signal(&pool->p_creator_cv); 1808 mutex_exit(&pool->p_creator_lock); 1809 } 1810 1811 /* 1812 * Polling part of the svc_run(). 1813 * - search for a transport with a pending request 1814 * - when one is found then latch the request lock and return to svc_run() 1815 * - if there is no request go asleep and wait for a signal 1816 * - handle two exceptions: 1817 * a) current transport is closing 1818 * b) timeout waiting for a new request 1819 * in both cases return to svc_run() 1820 */ 1821 static SVCMASTERXPRT * 1822 svc_poll(SVCPOOL *pool, SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt) 1823 { 1824 /* 1825 * Main loop iterates until 1826 * a) we find a pending request, 1827 * b) detect that the current transport is closing 1828 * c) time out waiting for a new request. 1829 */ 1830 for (;;) { 1831 SVCMASTERXPRT *next; 1832 clock_t timeleft; 1833 1834 /* 1835 * Step 1. 1836 * Check if there is a pending request on the current 1837 * transport handle so that we can avoid cloning. 1838 * If so then decrement the `pending-request' count for 1839 * the pool and return to svc_run(). 1840 * 1841 * We need to prevent a potential starvation. When 1842 * a selected transport has all pending requests coming in 1843 * all the time then the service threads will never switch to 1844 * another transport. With a limited number of service 1845 * threads some transports may be never serviced. 1846 * To prevent such a scenario we pick up at most 1847 * pool->p_max_same_xprt requests from the same transport 1848 * and then take a hint from the xprt-ready queue or walk 1849 * the transport list. 1850 */ 1851 if (xprt && xprt->xp_req_head && (!pool->p_qoverflow || 1852 clone_xprt->xp_same_xprt++ < pool->p_max_same_xprt)) { 1853 mutex_enter(&xprt->xp_req_lock); 1854 if (xprt->xp_req_head) 1855 return (xprt); 1856 mutex_exit(&xprt->xp_req_lock); 1857 } 1858 clone_xprt->xp_same_xprt = 0; 1859 1860 /* 1861 * Step 2. 1862 * If there is no request on the current transport try to 1863 * find another transport with a pending request. 1864 */ 1865 mutex_enter(&pool->p_req_lock); 1866 pool->p_walkers++; 1867 mutex_exit(&pool->p_req_lock); 1868 1869 /* 1870 * Make sure that transports will not be destroyed just 1871 * while we are checking them. 1872 */ 1873 rw_enter(&pool->p_lrwlock, RW_READER); 1874 1875 for (;;) { 1876 SVCMASTERXPRT *hint; 1877 1878 /* 1879 * Get the next transport from the xprt-ready queue. 1880 * This is a hint. There is no guarantee that the 1881 * transport still has a pending request since it 1882 * could be picked up by another thread in step 1. 1883 * 1884 * If the transport has a pending request then keep 1885 * it locked. Decrement the `pending-requests' for 1886 * the pool and `walking-threads' counts, and return 1887 * to svc_run(). 1888 */ 1889 hint = svc_xprt_qget(pool); 1890 1891 if (hint && hint->xp_req_head) { 1892 mutex_enter(&hint->xp_req_lock); 1893 if (hint->xp_req_head) { 1894 rw_exit(&pool->p_lrwlock); 1895 1896 mutex_enter(&pool->p_req_lock); 1897 pool->p_walkers--; 1898 mutex_exit(&pool->p_req_lock); 1899 1900 return (hint); 1901 } 1902 mutex_exit(&hint->xp_req_lock); 1903 } 1904 1905 /* 1906 * If there was no hint in the xprt-ready queue then 1907 * - if there is less pending requests than polling 1908 * threads go asleep 1909 * - otherwise check if there was an overflow in the 1910 * xprt-ready queue; if so, then we need to break 1911 * the `drain' mode 1912 */ 1913 if (hint == NULL) { 1914 if (pool->p_reqs < pool->p_walkers) { 1915 mutex_enter(&pool->p_req_lock); 1916 if (pool->p_reqs < pool->p_walkers) 1917 goto sleep; 1918 mutex_exit(&pool->p_req_lock); 1919 } 1920 if (pool->p_qoverflow) { 1921 break; 1922 } 1923 } 1924 } 1925 1926 /* 1927 * If there was an overflow in the xprt-ready queue then we 1928 * need to switch to the `drain' mode, i.e. walk through the 1929 * pool's transport list and search for a transport with a 1930 * pending request. If we manage to drain all the pending 1931 * requests then we can clear the overflow flag. This will 1932 * switch svc_poll() back to taking hints from the xprt-ready 1933 * queue (which is generally more efficient). 1934 * 1935 * If there are no registered transports simply go asleep. 1936 */ 1937 if (xprt == NULL && pool->p_lhead == NULL) { 1938 mutex_enter(&pool->p_req_lock); 1939 goto sleep; 1940 } 1941 1942 /* 1943 * `Walk' through the pool's list of master server 1944 * transport handles. Continue to loop until there are less 1945 * looping threads then pending requests. 1946 */ 1947 next = xprt ? xprt->xp_next : pool->p_lhead; 1948 1949 for (;;) { 1950 /* 1951 * Check if there is a request on this transport. 1952 * 1953 * Since blocking on a locked mutex is very expensive 1954 * check for a request without a lock first. If we miss 1955 * a request that is just being delivered but this will 1956 * cost at most one full walk through the list. 1957 */ 1958 if (next->xp_req_head) { 1959 /* 1960 * Check again, now with a lock. 1961 */ 1962 mutex_enter(&next->xp_req_lock); 1963 if (next->xp_req_head) { 1964 rw_exit(&pool->p_lrwlock); 1965 1966 mutex_enter(&pool->p_req_lock); 1967 pool->p_walkers--; 1968 mutex_exit(&pool->p_req_lock); 1969 1970 return (next); 1971 } 1972 mutex_exit(&next->xp_req_lock); 1973 } 1974 1975 /* 1976 * Continue to `walk' through the pool's 1977 * transport list until there is less requests 1978 * than walkers. Check this condition without 1979 * a lock first to avoid contention on a mutex. 1980 */ 1981 if (pool->p_reqs < pool->p_walkers) { 1982 /* Check again, now with the lock. */ 1983 mutex_enter(&pool->p_req_lock); 1984 if (pool->p_reqs < pool->p_walkers) 1985 break; /* goto sleep */ 1986 mutex_exit(&pool->p_req_lock); 1987 } 1988 1989 next = next->xp_next; 1990 } 1991 1992 sleep: 1993 /* 1994 * No work to do. Stop the `walk' and go asleep. 1995 * Decrement the `walking-threads' count for the pool. 1996 */ 1997 pool->p_walkers--; 1998 rw_exit(&pool->p_lrwlock); 1999 2000 /* 2001 * Count us as asleep, mark this thread as safe 2002 * for suspend and wait for a request. 2003 */ 2004 pool->p_asleep++; 2005 timeleft = cv_reltimedwait_sig(&pool->p_req_cv, 2006 &pool->p_req_lock, pool->p_timeout, TR_CLOCK_TICK); 2007 2008 /* 2009 * If the drowsy flag is on this means that 2010 * someone has signaled a wakeup. In such a case 2011 * the `asleep-threads' count has already updated 2012 * so just clear the flag. 2013 * 2014 * If the drowsy flag is off then we need to update 2015 * the `asleep-threads' count. 2016 */ 2017 if (pool->p_drowsy) { 2018 pool->p_drowsy = FALSE; 2019 /* 2020 * If the thread is here because it timedout, 2021 * instead of returning SVC_ETIMEDOUT, it is 2022 * time to do some more work. 2023 */ 2024 if (timeleft == -1) 2025 timeleft = 1; 2026 } else { 2027 pool->p_asleep--; 2028 } 2029 mutex_exit(&pool->p_req_lock); 2030 2031 /* 2032 * If we received a signal while waiting for a 2033 * request, inform svc_run(), so that we can return 2034 * to user level and exit. 2035 */ 2036 if (timeleft == 0) 2037 return (SVC_EINTR); 2038 2039 /* 2040 * If the current transport is gone then notify 2041 * svc_run() to unlink from it. 2042 */ 2043 if (xprt && xprt->xp_wq == NULL) 2044 return (SVC_EXPRTGONE); 2045 2046 /* 2047 * If we have timed out waiting for a request inform 2048 * svc_run() that we probably don't need this thread. 2049 */ 2050 if (timeleft == -1) 2051 return (SVC_ETIMEDOUT); 2052 } 2053 } 2054 2055 /* 2056 * calculate memory space used by message 2057 */ 2058 static size_t 2059 svc_msgsize(mblk_t *mp) 2060 { 2061 size_t count = 0; 2062 2063 for (; mp; mp = mp->b_cont) 2064 count += MBLKSIZE(mp); 2065 2066 return (count); 2067 } 2068 2069 /* 2070 * svc_flowcontrol() attempts to turn the flow control on or off for the 2071 * transport. 2072 * 2073 * On input the xprt->xp_full determines whether the flow control is currently 2074 * off (FALSE) or on (TRUE). If it is off we do tests to see whether we should 2075 * turn it on, and vice versa. 2076 * 2077 * There are two conditions considered for the flow control. Both conditions 2078 * have the low and the high watermark. Once the high watermark is reached in 2079 * EITHER condition the flow control is turned on. For turning the flow 2080 * control off BOTH conditions must be below the low watermark. 2081 * 2082 * Condition #1 - Number of requests queued: 2083 * 2084 * The max number of threads working on the pool is roughly pool->p_maxthreads. 2085 * Every thread could handle up to pool->p_max_same_xprt requests from one 2086 * transport before it moves to another transport. See svc_poll() for details. 2087 * In case all threads in the pool are working on a transport they will handle 2088 * no more than enough_reqs (pool->p_maxthreads * pool->p_max_same_xprt) 2089 * requests in one shot from that transport. We are turning the flow control 2090 * on once the high watermark is reached for a transport so that the underlying 2091 * queue knows the rate of incoming requests is higher than we are able to 2092 * handle. 2093 * 2094 * The high watermark: 2 * enough_reqs 2095 * The low watermark: enough_reqs 2096 * 2097 * Condition #2 - Length of the data payload for the queued messages/requests: 2098 * 2099 * We want to prevent a particular pool exhausting the memory, so once the 2100 * total length of queued requests for the whole pool reaches the high 2101 * watermark we start to turn on the flow control for significant memory 2102 * consumers (individual transports). To keep the implementation simple 2103 * enough, this condition is not exact, because we count only the data part of 2104 * the queued requests and we ignore the overhead. For our purposes this 2105 * should be enough. We should also consider that up to pool->p_maxthreads 2106 * threads for the pool might work on large requests (this is not counted for 2107 * this condition). We need to leave some space for rest of the system and for 2108 * other big memory consumers (like ZFS). Also, after the flow control is 2109 * turned on (on cots transports) we can start to accumulate a few megabytes in 2110 * queues for each transport. 2111 * 2112 * Usually, the big memory consumers are NFS WRITE requests, so we do not 2113 * expect to see this condition met for other than NFS pools. 2114 * 2115 * The high watermark: 1/5 of available memory 2116 * The low watermark: 1/6 of available memory 2117 * 2118 * Once the high watermark is reached we turn the flow control on only for 2119 * transports exceeding a per-transport memory limit. The per-transport 2120 * fraction of memory is calculated as: 2121 * 2122 * the high watermark / number of transports 2123 * 2124 * For transports with less than the per-transport fraction of memory consumed, 2125 * the flow control is not turned on, so they are not blocked by a few "hungry" 2126 * transports. Because of this, the total memory consumption for the 2127 * particular pool might grow up to 2 * the high watermark. 2128 * 2129 * The individual transports are unblocked once their consumption is below: 2130 * 2131 * per-transport fraction of memory / 2 2132 * 2133 * or once the total memory consumption for the whole pool falls below the low 2134 * watermark. 2135 * 2136 */ 2137 static void 2138 svc_flowcontrol(SVCMASTERXPRT *xprt) 2139 { 2140 SVCPOOL *pool = xprt->xp_pool; 2141 size_t totalmem = ptob(physmem); 2142 int enough_reqs = pool->p_maxthreads * pool->p_max_same_xprt; 2143 2144 ASSERT(MUTEX_HELD(&xprt->xp_req_lock)); 2145 2146 /* Should we turn the flow control on? */ 2147 if (xprt->xp_full == FALSE) { 2148 /* Is flow control disabled? */ 2149 if (svc_flowcontrol_disable != 0) 2150 return; 2151 2152 /* Is there enough requests queued? */ 2153 if (xprt->xp_reqs >= enough_reqs * 2) { 2154 xprt->xp_full = TRUE; 2155 return; 2156 } 2157 2158 /* 2159 * If this pool uses over 20% of memory and this transport is 2160 * significant memory consumer then we are full 2161 */ 2162 if (pool->p_size >= totalmem / 5 && 2163 xprt->xp_size >= totalmem / 5 / pool->p_lcount) 2164 xprt->xp_full = TRUE; 2165 2166 return; 2167 } 2168 2169 /* We might want to turn the flow control off */ 2170 2171 /* Do we still have enough requests? */ 2172 if (xprt->xp_reqs > enough_reqs) 2173 return; 2174 2175 /* 2176 * If this pool still uses over 16% of memory and this transport is 2177 * still significant memory consumer then we are still full 2178 */ 2179 if (pool->p_size >= totalmem / 6 && 2180 xprt->xp_size >= totalmem / 5 / pool->p_lcount / 2) 2181 return; 2182 2183 /* Turn the flow control off and make sure rpcmod is notified */ 2184 xprt->xp_full = FALSE; 2185 xprt->xp_enable = TRUE; 2186 } 2187 2188 /* 2189 * Main loop of the kernel RPC server 2190 * - wait for input (find a transport with a pending request). 2191 * - dequeue the request 2192 * - call a registered server routine to process the requests 2193 * 2194 * There can many threads running concurrently in this loop 2195 * on the same or on different transports. 2196 */ 2197 static int 2198 svc_run(SVCPOOL *pool) 2199 { 2200 SVCMASTERXPRT *xprt = NULL; /* master transport handle */ 2201 SVCXPRT *clone_xprt; /* clone for this thread */ 2202 proc_t *p = ttoproc(curthread); 2203 2204 /* Allocate a clone transport handle for this thread */ 2205 clone_xprt = svc_clone_init(); 2206 2207 /* 2208 * The loop iterates until the thread becomes 2209 * idle too long or the transport is gone. 2210 */ 2211 for (;;) { 2212 SVCMASTERXPRT *next; 2213 mblk_t *mp; 2214 bool_t enable; 2215 size_t size; 2216 2217 TRACE_0(TR_FAC_KRPC, TR_SVC_RUN, "svc_run"); 2218 2219 /* 2220 * If the process is exiting/killed, return 2221 * immediately without processing any more 2222 * requests. 2223 */ 2224 if (p->p_flag & (SEXITING | SKILLED)) { 2225 svc_thread_exit(pool, clone_xprt); 2226 return (EINTR); 2227 } 2228 2229 /* Find a transport with a pending request */ 2230 next = svc_poll(pool, xprt, clone_xprt); 2231 2232 /* 2233 * If svc_poll() finds a transport with a request 2234 * it latches xp_req_lock on it. Therefore we need 2235 * to dequeue the request and release the lock as 2236 * soon as possible. 2237 */ 2238 ASSERT(next != NULL && 2239 (next == SVC_EXPRTGONE || 2240 next == SVC_ETIMEDOUT || 2241 next == SVC_EINTR || 2242 MUTEX_HELD(&next->xp_req_lock))); 2243 2244 /* Ooops! Current transport is closing. Unlink now */ 2245 if (next == SVC_EXPRTGONE) { 2246 svc_clone_unlink(clone_xprt); 2247 xprt = NULL; 2248 continue; 2249 } 2250 2251 /* Ooops! Timeout while waiting for a request. Exit */ 2252 if (next == SVC_ETIMEDOUT) { 2253 svc_thread_exit(pool, clone_xprt); 2254 return (0); 2255 } 2256 2257 /* 2258 * Interrupted by a signal while waiting for a 2259 * request. Return to userspace and exit. 2260 */ 2261 if (next == SVC_EINTR) { 2262 svc_thread_exit(pool, clone_xprt); 2263 return (EINTR); 2264 } 2265 2266 /* 2267 * De-queue the request and release the request lock 2268 * on this transport (latched by svc_poll()). 2269 */ 2270 mp = next->xp_req_head; 2271 next->xp_req_head = mp->b_next; 2272 mp->b_next = (mblk_t *)0; 2273 size = svc_msgsize(mp); 2274 2275 mutex_enter(&pool->p_req_lock); 2276 pool->p_reqs--; 2277 if (pool->p_reqs == 0) 2278 pool->p_qoverflow = FALSE; 2279 pool->p_size -= size; 2280 mutex_exit(&pool->p_req_lock); 2281 2282 next->xp_reqs--; 2283 next->xp_size -= size; 2284 2285 if (next->xp_full) 2286 svc_flowcontrol(next); 2287 2288 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_DEQ, 2289 "rpc_que_req_deq:pool %p mp %p", pool, mp); 2290 mutex_exit(&next->xp_req_lock); 2291 2292 /* 2293 * If this is a new request on a current transport then 2294 * the clone structure is already properly initialized. 2295 * Otherwise, if the request is on a different transport, 2296 * unlink from the current master and link to 2297 * the one we got a request on. 2298 */ 2299 if (next != xprt) { 2300 if (xprt) 2301 svc_clone_unlink(clone_xprt); 2302 svc_clone_link(next, clone_xprt, NULL); 2303 xprt = next; 2304 } 2305 2306 /* 2307 * If there are more requests and req_cv hasn't 2308 * been signaled yet then wake up one more thread now. 2309 * 2310 * We avoid signaling req_cv until the most recently 2311 * signaled thread wakes up and gets CPU to clear 2312 * the `drowsy' flag. 2313 */ 2314 if (!(pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2315 pool->p_asleep == 0)) { 2316 mutex_enter(&pool->p_req_lock); 2317 2318 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2319 pool->p_asleep == 0) 2320 mutex_exit(&pool->p_req_lock); 2321 else { 2322 pool->p_asleep--; 2323 pool->p_drowsy = TRUE; 2324 2325 cv_signal(&pool->p_req_cv); 2326 mutex_exit(&pool->p_req_lock); 2327 } 2328 } 2329 2330 /* 2331 * If there are no asleep/signaled threads, we are 2332 * still below pool->p_maxthreads limit, and no thread is 2333 * currently being created then signal the creator 2334 * for one more service thread. 2335 * 2336 * The asleep and drowsy checks are not protected 2337 * by a lock since it hurts performance and a wrong 2338 * decision is not essential. 2339 */ 2340 if (pool->p_asleep == 0 && !pool->p_drowsy && 2341 pool->p_threads + pool->p_detached_threads < 2342 pool->p_maxthreads) 2343 svc_creator_signal(pool); 2344 2345 /* 2346 * Process the request. 2347 */ 2348 svc_getreq(clone_xprt, mp); 2349 2350 /* If thread had a reservation it should have been canceled */ 2351 ASSERT(!clone_xprt->xp_reserved); 2352 2353 /* 2354 * If the clone is marked detached then exit. 2355 * The rpcmod slot has already been released 2356 * when we detached this thread. 2357 */ 2358 if (clone_xprt->xp_detached) { 2359 svc_thread_exitdetached(pool, clone_xprt); 2360 return (0); 2361 } 2362 2363 /* 2364 * Release our reference on the rpcmod 2365 * slot attached to xp_wq->q_ptr. 2366 */ 2367 mutex_enter(&xprt->xp_req_lock); 2368 enable = xprt->xp_enable; 2369 if (enable) 2370 xprt->xp_enable = FALSE; 2371 mutex_exit(&xprt->xp_req_lock); 2372 SVC_RELE(clone_xprt, NULL, enable); 2373 } 2374 /* NOTREACHED */ 2375 } 2376 2377 /* 2378 * Flush any pending requests for the queue and 2379 * free the associated mblks. 2380 */ 2381 void 2382 svc_queueclean(queue_t *q) 2383 { 2384 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2385 mblk_t *mp; 2386 SVCPOOL *pool; 2387 2388 /* 2389 * clean up the requests 2390 */ 2391 mutex_enter(&xprt->xp_req_lock); 2392 pool = xprt->xp_pool; 2393 while ((mp = xprt->xp_req_head) != NULL) { 2394 /* remove the request from the list */ 2395 xprt->xp_req_head = mp->b_next; 2396 mp->b_next = (mblk_t *)0; 2397 SVC_RELE(xprt, mp, FALSE); 2398 } 2399 2400 mutex_enter(&pool->p_req_lock); 2401 pool->p_reqs -= xprt->xp_reqs; 2402 pool->p_size -= xprt->xp_size; 2403 mutex_exit(&pool->p_req_lock); 2404 2405 xprt->xp_reqs = 0; 2406 xprt->xp_size = 0; 2407 xprt->xp_full = FALSE; 2408 xprt->xp_enable = FALSE; 2409 mutex_exit(&xprt->xp_req_lock); 2410 } 2411 2412 /* 2413 * This routine is called by rpcmod to inform kernel RPC that a 2414 * queue is closing. It is called after all the requests have been 2415 * picked up (that is after all the slots on the queue have 2416 * been released by kernel RPC). It is also guaranteed that no more 2417 * request will be delivered on this transport. 2418 * 2419 * - clear xp_wq to mark the master server transport handle as closing 2420 * - if there are no more threads on this transport close/destroy it 2421 * - otherwise, leave the linked threads to close/destroy the transport 2422 * later. 2423 */ 2424 void 2425 svc_queueclose(queue_t *q) 2426 { 2427 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2428 2429 if (xprt == NULL) { 2430 /* 2431 * If there is no master xprt associated with this stream, 2432 * then there is nothing to do. This happens regularly 2433 * with connection-oriented listening streams created by 2434 * nfsd. 2435 */ 2436 return; 2437 } 2438 2439 mutex_enter(&xprt->xp_thread_lock); 2440 2441 ASSERT(xprt->xp_req_head == NULL); 2442 ASSERT(xprt->xp_wq != NULL); 2443 2444 xprt->xp_wq = NULL; 2445 2446 if (xprt->xp_threads == 0) { 2447 SVCPOOL *pool = xprt->xp_pool; 2448 2449 /* 2450 * svc_xprt_cleanup() destroys the transport 2451 * or releases the transport thread lock 2452 */ 2453 svc_xprt_cleanup(xprt, FALSE); 2454 2455 mutex_enter(&pool->p_thread_lock); 2456 2457 /* 2458 * If the pool is in closing state and this was 2459 * the last transport in the pool then signal the creator 2460 * thread to clean up and exit. 2461 */ 2462 if (pool->p_closing && svc_pool_tryexit(pool)) { 2463 return; 2464 } 2465 mutex_exit(&pool->p_thread_lock); 2466 } else { 2467 /* 2468 * There are still some threads linked to the transport. They 2469 * are very likely sleeping in svc_poll(). We could wake up 2470 * them by broadcasting on the p_req_cv condition variable, but 2471 * that might give us a performance penalty if there are too 2472 * many sleeping threads. 2473 * 2474 * Instead, we do nothing here. The linked threads will unlink 2475 * themselves and destroy the transport once they are woken up 2476 * on timeout, or by new request. There is no reason to hurry 2477 * up now with the thread wake up. 2478 */ 2479 2480 /* 2481 * NOTICE: No references to the master transport structure 2482 * beyond this point! 2483 */ 2484 mutex_exit(&xprt->xp_thread_lock); 2485 } 2486 } 2487 2488 /* 2489 * Interrupt `request delivery' routine called from rpcmod 2490 * - put a request at the tail of the transport request queue 2491 * - insert a hint for svc_poll() into the xprt-ready queue 2492 * - increment the `pending-requests' count for the pool 2493 * - handle flow control 2494 * - wake up a thread sleeping in svc_poll() if necessary 2495 * - if all the threads are running ask the creator for a new one. 2496 */ 2497 bool_t 2498 svc_queuereq(queue_t *q, mblk_t *mp, bool_t flowcontrol) 2499 { 2500 SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0]; 2501 SVCPOOL *pool = xprt->xp_pool; 2502 size_t size; 2503 2504 TRACE_0(TR_FAC_KRPC, TR_SVC_QUEUEREQ_START, "svc_queuereq_start"); 2505 2506 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL || 2507 mp->b_datap->db_type != M_DATA); 2508 2509 /* 2510 * Step 1. 2511 * Grab the transport's request lock and the 2512 * pool's request lock so that when we put 2513 * the request at the tail of the transport's 2514 * request queue, possibly put the request on 2515 * the xprt ready queue and increment the 2516 * pending request count it looks atomic. 2517 */ 2518 mutex_enter(&xprt->xp_req_lock); 2519 if (flowcontrol && xprt->xp_full) { 2520 mutex_exit(&xprt->xp_req_lock); 2521 2522 return (FALSE); 2523 } 2524 ASSERT(xprt->xp_full == FALSE); 2525 mutex_enter(&pool->p_req_lock); 2526 if (xprt->xp_req_head == NULL) 2527 xprt->xp_req_head = mp; 2528 else 2529 xprt->xp_req_tail->b_next = mp; 2530 xprt->xp_req_tail = mp; 2531 2532 /* 2533 * Step 2. 2534 * Insert a hint into the xprt-ready queue, increment 2535 * counters, handle flow control, and wake up 2536 * a thread sleeping in svc_poll() if necessary. 2537 */ 2538 2539 /* Insert pointer to this transport into the xprt-ready queue */ 2540 svc_xprt_qput(pool, xprt); 2541 2542 /* Increment counters */ 2543 pool->p_reqs++; 2544 xprt->xp_reqs++; 2545 2546 size = svc_msgsize(mp); 2547 xprt->xp_size += size; 2548 pool->p_size += size; 2549 2550 /* Handle flow control */ 2551 if (flowcontrol) 2552 svc_flowcontrol(xprt); 2553 2554 TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_ENQ, 2555 "rpc_que_req_enq:pool %p mp %p", pool, mp); 2556 2557 /* 2558 * If there are more requests and req_cv hasn't 2559 * been signaled yet then wake up one more thread now. 2560 * 2561 * We avoid signaling req_cv until the most recently 2562 * signaled thread wakes up and gets CPU to clear 2563 * the `drowsy' flag. 2564 */ 2565 if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers || 2566 pool->p_asleep == 0) { 2567 mutex_exit(&pool->p_req_lock); 2568 } else { 2569 pool->p_drowsy = TRUE; 2570 pool->p_asleep--; 2571 2572 /* 2573 * Signal wakeup and drop the request lock. 2574 */ 2575 cv_signal(&pool->p_req_cv); 2576 mutex_exit(&pool->p_req_lock); 2577 } 2578 mutex_exit(&xprt->xp_req_lock); 2579 2580 /* 2581 * Step 3. 2582 * If there are no asleep/signaled threads, we are 2583 * still below pool->p_maxthreads limit, and no thread is 2584 * currently being created then signal the creator 2585 * for one more service thread. 2586 * 2587 * The asleep and drowsy checks are not not protected 2588 * by a lock since it hurts performance and a wrong 2589 * decision is not essential. 2590 */ 2591 if (pool->p_asleep == 0 && !pool->p_drowsy && 2592 pool->p_threads + pool->p_detached_threads < pool->p_maxthreads) 2593 svc_creator_signal(pool); 2594 2595 TRACE_1(TR_FAC_KRPC, TR_SVC_QUEUEREQ_END, 2596 "svc_queuereq_end:(%S)", "end"); 2597 2598 return (TRUE); 2599 } 2600 2601 /* 2602 * Reserve a service thread so that it can be detached later. 2603 * This reservation is required to make sure that when it tries to 2604 * detach itself the total number of detached threads does not exceed 2605 * pool->p_maxthreads - pool->p_redline (i.e. that we can have 2606 * up to pool->p_redline non-detached threads). 2607 * 2608 * If the thread does not detach itself later, it should cancel the 2609 * reservation before returning to svc_run(). 2610 * 2611 * - check if there is room for more reserved/detached threads 2612 * - if so, then increment the `reserved threads' count for the pool 2613 * - mark the thread as reserved (setting the flag in the clone transport 2614 * handle for this thread 2615 * - returns 1 if the reservation succeeded, 0 if it failed. 2616 */ 2617 int 2618 svc_reserve_thread(SVCXPRT *clone_xprt) 2619 { 2620 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2621 2622 /* Recursive reservations are not allowed */ 2623 ASSERT(!clone_xprt->xp_reserved); 2624 ASSERT(!clone_xprt->xp_detached); 2625 2626 /* Check pool counts if there is room for reservation */ 2627 mutex_enter(&pool->p_thread_lock); 2628 if (pool->p_reserved_threads + pool->p_detached_threads >= 2629 pool->p_maxthreads - pool->p_redline) { 2630 mutex_exit(&pool->p_thread_lock); 2631 return (0); 2632 } 2633 pool->p_reserved_threads++; 2634 mutex_exit(&pool->p_thread_lock); 2635 2636 /* Mark the thread (clone handle) as reserved */ 2637 clone_xprt->xp_reserved = TRUE; 2638 2639 return (1); 2640 } 2641 2642 /* 2643 * Cancel a reservation for a thread. 2644 * - decrement the `reserved threads' count for the pool 2645 * - clear the flag in the clone transport handle for this thread. 2646 */ 2647 void 2648 svc_unreserve_thread(SVCXPRT *clone_xprt) 2649 { 2650 SVCPOOL *pool = clone_xprt->xp_master->xp_pool; 2651 2652 /* Thread must have a reservation */ 2653 ASSERT(clone_xprt->xp_reserved); 2654 ASSERT(!clone_xprt->xp_detached); 2655 2656 /* Decrement global count */ 2657 mutex_enter(&pool->p_thread_lock); 2658 pool->p_reserved_threads--; 2659 mutex_exit(&pool->p_thread_lock); 2660 2661 /* Clear reservation flag */ 2662 clone_xprt->xp_reserved = FALSE; 2663 } 2664 2665 /* 2666 * Detach a thread from its transport, so that it can block for an 2667 * extended time. Because the transport can be closed after the thread is 2668 * detached, the thread should have already sent off a reply if it was 2669 * going to send one. 2670 * 2671 * - decrement `non-detached threads' count and increment `detached threads' 2672 * counts for the transport 2673 * - decrement the `non-detached threads' and `reserved threads' 2674 * counts and increment the `detached threads' count for the pool 2675 * - release the rpcmod slot 2676 * - mark the clone (thread) as detached. 2677 * 2678 * No need to return a pointer to the thread's CPR information, since 2679 * the thread has a userland identity. 2680 * 2681 * NOTICE: a thread must not detach itself without making a prior reservation 2682 * through svc_thread_reserve(). 2683 */ 2684 callb_cpr_t * 2685 svc_detach_thread(SVCXPRT *clone_xprt) 2686 { 2687 SVCMASTERXPRT *xprt = clone_xprt->xp_master; 2688 SVCPOOL *pool = xprt->xp_pool; 2689 bool_t enable; 2690 2691 /* Thread must have a reservation */ 2692 ASSERT(clone_xprt->xp_reserved); 2693 ASSERT(!clone_xprt->xp_detached); 2694 2695 /* Bookkeeping for this transport */ 2696 mutex_enter(&xprt->xp_thread_lock); 2697 xprt->xp_threads--; 2698 xprt->xp_detached_threads++; 2699 mutex_exit(&xprt->xp_thread_lock); 2700 2701 /* Bookkeeping for the pool */ 2702 mutex_enter(&pool->p_thread_lock); 2703 pool->p_threads--; 2704 pool->p_reserved_threads--; 2705 pool->p_detached_threads++; 2706 mutex_exit(&pool->p_thread_lock); 2707 2708 /* Release an rpcmod slot for this request */ 2709 mutex_enter(&xprt->xp_req_lock); 2710 enable = xprt->xp_enable; 2711 if (enable) 2712 xprt->xp_enable = FALSE; 2713 mutex_exit(&xprt->xp_req_lock); 2714 SVC_RELE(clone_xprt, NULL, enable); 2715 2716 /* Mark the clone (thread) as detached */ 2717 clone_xprt->xp_reserved = FALSE; 2718 clone_xprt->xp_detached = TRUE; 2719 2720 return (NULL); 2721 } 2722 2723 /* 2724 * This routine is responsible for extracting RDMA plugin master XPRT, 2725 * unregister from the SVCPOOL and initiate plugin specific cleanup. 2726 * It is passed a list/group of rdma transports as records which are 2727 * active in a given registered or unregistered kRPC thread pool. Its shuts 2728 * all active rdma transports in that pool. If the thread active on the trasport 2729 * happens to be last thread for that pool, it will signal the creater thread 2730 * to cleanup the pool and destroy the xprt in svc_queueclose() 2731 */ 2732 void 2733 rdma_stop(rdma_xprt_group_t *rdma_xprts) 2734 { 2735 SVCMASTERXPRT *xprt; 2736 rdma_xprt_record_t *curr_rec; 2737 queue_t *q; 2738 mblk_t *mp; 2739 int i, rtg_count; 2740 SVCPOOL *pool; 2741 2742 if (rdma_xprts->rtg_count == 0) 2743 return; 2744 2745 rtg_count = rdma_xprts->rtg_count; 2746 2747 for (i = 0; i < rtg_count; i++) { 2748 curr_rec = rdma_xprts->rtg_listhead; 2749 rdma_xprts->rtg_listhead = curr_rec->rtr_next; 2750 rdma_xprts->rtg_count--; 2751 curr_rec->rtr_next = NULL; 2752 xprt = curr_rec->rtr_xprt_ptr; 2753 q = xprt->xp_wq; 2754 svc_rdma_kstop(xprt); 2755 2756 mutex_enter(&xprt->xp_req_lock); 2757 pool = xprt->xp_pool; 2758 while ((mp = xprt->xp_req_head) != NULL) { 2759 rdma_recv_data_t *rdp = (rdma_recv_data_t *)mp->b_rptr; 2760 2761 /* remove the request from the list */ 2762 xprt->xp_req_head = mp->b_next; 2763 mp->b_next = (mblk_t *)0; 2764 2765 RDMA_BUF_FREE(rdp->conn, &rdp->rpcmsg); 2766 RDMA_REL_CONN(rdp->conn); 2767 freemsg(mp); 2768 } 2769 mutex_enter(&pool->p_req_lock); 2770 pool->p_reqs -= xprt->xp_reqs; 2771 pool->p_size -= xprt->xp_size; 2772 mutex_exit(&pool->p_req_lock); 2773 xprt->xp_reqs = 0; 2774 xprt->xp_size = 0; 2775 xprt->xp_full = FALSE; 2776 xprt->xp_enable = FALSE; 2777 mutex_exit(&xprt->xp_req_lock); 2778 svc_queueclose(q); 2779 #ifdef DEBUG 2780 if (rdma_check) 2781 cmn_err(CE_NOTE, "rdma_stop: Exited svc_queueclose\n"); 2782 #endif 2783 /* 2784 * Free the rdma transport record for the expunged rdma 2785 * based master transport handle. 2786 */ 2787 kmem_free(curr_rec, sizeof (rdma_xprt_record_t)); 2788 if (!rdma_xprts->rtg_listhead) 2789 break; 2790 } 2791 } 2792 2793 2794 /* 2795 * rpc_msg_dup/rpc_msg_free 2796 * Currently only used by svc_rpcsec_gss.c but put in this file as it 2797 * may be useful to others in the future. 2798 * But future consumers should be careful cuz so far 2799 * - only tested/used for call msgs (not reply) 2800 * - only tested/used with call verf oa_length==0 2801 */ 2802 struct rpc_msg * 2803 rpc_msg_dup(struct rpc_msg *src) 2804 { 2805 struct rpc_msg *dst; 2806 struct opaque_auth oa_src, oa_dst; 2807 2808 dst = kmem_alloc(sizeof (*dst), KM_SLEEP); 2809 2810 dst->rm_xid = src->rm_xid; 2811 dst->rm_direction = src->rm_direction; 2812 2813 dst->rm_call.cb_rpcvers = src->rm_call.cb_rpcvers; 2814 dst->rm_call.cb_prog = src->rm_call.cb_prog; 2815 dst->rm_call.cb_vers = src->rm_call.cb_vers; 2816 dst->rm_call.cb_proc = src->rm_call.cb_proc; 2817 2818 /* dup opaque auth call body cred */ 2819 oa_src = src->rm_call.cb_cred; 2820 2821 oa_dst.oa_flavor = oa_src.oa_flavor; 2822 oa_dst.oa_base = kmem_alloc(oa_src.oa_length, KM_SLEEP); 2823 2824 bcopy(oa_src.oa_base, oa_dst.oa_base, oa_src.oa_length); 2825 oa_dst.oa_length = oa_src.oa_length; 2826 2827 dst->rm_call.cb_cred = oa_dst; 2828 2829 /* dup or just alloc opaque auth call body verifier */ 2830 if (src->rm_call.cb_verf.oa_length > 0) { 2831 oa_src = src->rm_call.cb_verf; 2832 2833 oa_dst.oa_flavor = oa_src.oa_flavor; 2834 oa_dst.oa_base = kmem_alloc(oa_src.oa_length, KM_SLEEP); 2835 2836 bcopy(oa_src.oa_base, oa_dst.oa_base, oa_src.oa_length); 2837 oa_dst.oa_length = oa_src.oa_length; 2838 2839 dst->rm_call.cb_verf = oa_dst; 2840 } else { 2841 oa_dst.oa_flavor = -1; /* will be set later */ 2842 oa_dst.oa_base = kmem_alloc(MAX_AUTH_BYTES, KM_SLEEP); 2843 2844 oa_dst.oa_length = 0; /* will be set later */ 2845 2846 dst->rm_call.cb_verf = oa_dst; 2847 } 2848 return (dst); 2849 2850 error: 2851 kmem_free(dst->rm_call.cb_cred.oa_base, dst->rm_call.cb_cred.oa_length); 2852 kmem_free(dst, sizeof (*dst)); 2853 return (NULL); 2854 } 2855 2856 void 2857 rpc_msg_free(struct rpc_msg **msg, int cb_verf_oa_length) 2858 { 2859 struct rpc_msg *m = *msg; 2860 2861 kmem_free(m->rm_call.cb_cred.oa_base, m->rm_call.cb_cred.oa_length); 2862 m->rm_call.cb_cred.oa_base = NULL; 2863 m->rm_call.cb_cred.oa_length = 0; 2864 2865 kmem_free(m->rm_call.cb_verf.oa_base, cb_verf_oa_length); 2866 m->rm_call.cb_verf.oa_base = NULL; 2867 m->rm_call.cb_verf.oa_length = 0; 2868 2869 kmem_free(m, sizeof (*m)); 2870 m = NULL; 2871 } 2872 2873 /* 2874 * Generally 'cr_ref' should be 1, otherwise reference is kept 2875 * in underlying calls, so reset it. 2876 */ 2877 cred_t * 2878 svc_xprt_cred(SVCXPRT *xprt) 2879 { 2880 cred_t *cr = xprt->xp_cred; 2881 2882 ASSERT(cr != NULL); 2883 2884 if (crgetref(cr) != 1) { 2885 crfree(cr); 2886 cr = crget(); 2887 xprt->xp_cred = cr; 2888 } 2889 return (cr); 2890 } 2891