17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5f9c1591dSVallish Vaidyeshwara * Common Development and Distribution License (the "License").
6f9c1591dSVallish Vaidyeshwara * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22f9c1591dSVallish Vaidyeshwara * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate #include <thread.h>
277c478bd9Sstevel@tonic-gate #include <stdlib.h>
287c478bd9Sstevel@tonic-gate #include <errno.h>
297c478bd9Sstevel@tonic-gate #include <strings.h>
307c478bd9Sstevel@tonic-gate #include <tiuser.h>
317c478bd9Sstevel@tonic-gate #include <syslog.h>
327c478bd9Sstevel@tonic-gate #include <zone.h>
337c478bd9Sstevel@tonic-gate #include <sys/priocntl.h>
347c478bd9Sstevel@tonic-gate #include <sys/fxpriocntl.h>
357c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
367c478bd9Sstevel@tonic-gate #include <nfs/nfssys.h>
377c478bd9Sstevel@tonic-gate #include "thrpool.h"
387c478bd9Sstevel@tonic-gate
397c478bd9Sstevel@tonic-gate extern int _nfssys(int, void *);
407c478bd9Sstevel@tonic-gate
417c478bd9Sstevel@tonic-gate /*
427c478bd9Sstevel@tonic-gate * Thread to call into the kernel and do work on behalf of NFS.
437c478bd9Sstevel@tonic-gate */
447c478bd9Sstevel@tonic-gate static void *
svcstart(void * arg)457c478bd9Sstevel@tonic-gate svcstart(void *arg)
467c478bd9Sstevel@tonic-gate {
477c478bd9Sstevel@tonic-gate int id = (int)arg;
487c478bd9Sstevel@tonic-gate
497c478bd9Sstevel@tonic-gate /*
50*f7b93e0cSVallish Vaidyeshwara * Create a kernel worker thread to service
51*f7b93e0cSVallish Vaidyeshwara * new incoming requests on a pool.
527c478bd9Sstevel@tonic-gate */
53*f7b93e0cSVallish Vaidyeshwara _nfssys(SVCPOOL_RUN, &id);
547c478bd9Sstevel@tonic-gate
557c478bd9Sstevel@tonic-gate /*
56*f7b93e0cSVallish Vaidyeshwara * Returned from the kernel, this thread's work is done,
57*f7b93e0cSVallish Vaidyeshwara * and it should exit. For new incoming requests,
58*f7b93e0cSVallish Vaidyeshwara * svcblock() will spawn another worker thread by
59*f7b93e0cSVallish Vaidyeshwara * calling svcstart() again.
607c478bd9Sstevel@tonic-gate */
617c478bd9Sstevel@tonic-gate thr_exit(NULL);
627c478bd9Sstevel@tonic-gate return (NULL);
637c478bd9Sstevel@tonic-gate }
647c478bd9Sstevel@tonic-gate
657c478bd9Sstevel@tonic-gate static void *
svc_rdma_creator(void * arg)667c478bd9Sstevel@tonic-gate svc_rdma_creator(void *arg)
677c478bd9Sstevel@tonic-gate {
687c478bd9Sstevel@tonic-gate struct rdma_svc_args *rsap = (struct rdma_svc_args *)arg;
697c478bd9Sstevel@tonic-gate
70f9c1591dSVallish Vaidyeshwara if (_nfssys(RDMA_SVC_INIT, rsap) < 0) {
71f9c1591dSVallish Vaidyeshwara if (errno != ENODEV) {
727c478bd9Sstevel@tonic-gate (void) syslog(LOG_INFO, "RDMA transport startup "
737c478bd9Sstevel@tonic-gate "failed with %m");
747c478bd9Sstevel@tonic-gate }
757c478bd9Sstevel@tonic-gate }
767c478bd9Sstevel@tonic-gate free(rsap);
777c478bd9Sstevel@tonic-gate thr_exit(NULL);
787c478bd9Sstevel@tonic-gate return (NULL);
797c478bd9Sstevel@tonic-gate }
807c478bd9Sstevel@tonic-gate
817c478bd9Sstevel@tonic-gate /*
827c478bd9Sstevel@tonic-gate * User-space "creator" thread. This thread blocks in the kernel
837c478bd9Sstevel@tonic-gate * until new worker threads need to be created for the service
847c478bd9Sstevel@tonic-gate * pool. On return to userspace, if there is no error, create a
857c478bd9Sstevel@tonic-gate * new thread for the service pool.
867c478bd9Sstevel@tonic-gate */
877c478bd9Sstevel@tonic-gate static void *
svcblock(void * arg)887c478bd9Sstevel@tonic-gate svcblock(void *arg)
897c478bd9Sstevel@tonic-gate {
907c478bd9Sstevel@tonic-gate int id = (int)arg;
917c478bd9Sstevel@tonic-gate
927c478bd9Sstevel@tonic-gate /* CONSTCOND */
937c478bd9Sstevel@tonic-gate while (1) {
947c478bd9Sstevel@tonic-gate thread_t tid;
957c478bd9Sstevel@tonic-gate
967c478bd9Sstevel@tonic-gate /*
977c478bd9Sstevel@tonic-gate * Call into the kernel, and hang out there
987c478bd9Sstevel@tonic-gate * until a thread needs to be created.
997c478bd9Sstevel@tonic-gate */
100f9c1591dSVallish Vaidyeshwara if (_nfssys(SVCPOOL_WAIT, &id) < 0) {
101*f7b93e0cSVallish Vaidyeshwara if (errno == ECANCELED || errno == EINTR ||
102*f7b93e0cSVallish Vaidyeshwara errno == EBUSY)
1037c478bd9Sstevel@tonic-gate /*
104*f7b93e0cSVallish Vaidyeshwara * If we get back ECANCELED or EINTR,
105*f7b93e0cSVallish Vaidyeshwara * the service pool is exiting, and we
106*f7b93e0cSVallish Vaidyeshwara * may as well clean up this thread. If
107*f7b93e0cSVallish Vaidyeshwara * EBUSY is returned, there's already a
108*f7b93e0cSVallish Vaidyeshwara * thread looping on this pool, so we
109*f7b93e0cSVallish Vaidyeshwara * should give up.
1107c478bd9Sstevel@tonic-gate */
1117c478bd9Sstevel@tonic-gate break;
1127c478bd9Sstevel@tonic-gate else
1137c478bd9Sstevel@tonic-gate continue;
1147c478bd9Sstevel@tonic-gate }
1157c478bd9Sstevel@tonic-gate
1167c478bd9Sstevel@tonic-gate /*
1177c478bd9Sstevel@tonic-gate * User portion of the thread does no real work since
1187c478bd9Sstevel@tonic-gate * the svcpool threads actually spend their entire
1197c478bd9Sstevel@tonic-gate * lives in the kernel. So, user portion of the thread
1207c478bd9Sstevel@tonic-gate * should have the smallest stack possible.
1217c478bd9Sstevel@tonic-gate */
1227c478bd9Sstevel@tonic-gate (void) thr_create(NULL, THR_MIN_STACK, svcstart, (void *)id,
1237c478bd9Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid);
1247c478bd9Sstevel@tonic-gate }
1257c478bd9Sstevel@tonic-gate
1267c478bd9Sstevel@tonic-gate thr_exit(NULL);
1277c478bd9Sstevel@tonic-gate return (NULL);
1287c478bd9Sstevel@tonic-gate }
1297c478bd9Sstevel@tonic-gate
1307c478bd9Sstevel@tonic-gate void
svcsetprio(void)1317c478bd9Sstevel@tonic-gate svcsetprio(void)
1327c478bd9Sstevel@tonic-gate {
1337c478bd9Sstevel@tonic-gate pcinfo_t pcinfo;
1347c478bd9Sstevel@tonic-gate pri_t maxupri;
1357c478bd9Sstevel@tonic-gate
1367c478bd9Sstevel@tonic-gate /*
1377c478bd9Sstevel@tonic-gate * By default, all threads should be part of the FX scheduler
1387c478bd9Sstevel@tonic-gate * class. As nfsd/lockd server threads used to be part of the
1397c478bd9Sstevel@tonic-gate * kernel, they're used to being scheduled in the SYS class.
1407c478bd9Sstevel@tonic-gate * Userland threads shouldn't be in SYS, but they can be given a
1417c478bd9Sstevel@tonic-gate * higher priority by default. This change still renders nfsd/lockd
1427c478bd9Sstevel@tonic-gate * managable by an admin by utilizing commands to change scheduling
1437c478bd9Sstevel@tonic-gate * manually, or by using resource management tools such as pools
1447c478bd9Sstevel@tonic-gate * to associate them with a different scheduling class and segregate
1457c478bd9Sstevel@tonic-gate * the workload.
1467c478bd9Sstevel@tonic-gate *
1477c478bd9Sstevel@tonic-gate * We set the threads' priority to the upper bound for priorities
1487c478bd9Sstevel@tonic-gate * in FX. This should be 60, but since the desired action is to
1497c478bd9Sstevel@tonic-gate * make nfsd/lockd more important than TS threads, we bow to the
1507c478bd9Sstevel@tonic-gate * system's knowledge rather than setting it manually. Furthermore,
1517c478bd9Sstevel@tonic-gate * since the SYS class doesn't timeslice, use an "infinite" quantum.
1527c478bd9Sstevel@tonic-gate * If anything fails, just log the failure and let the daemon
1537c478bd9Sstevel@tonic-gate * default to TS.
1547c478bd9Sstevel@tonic-gate *
1557c478bd9Sstevel@tonic-gate * The change of scheduling class is expected to fail in a non-global
1567c478bd9Sstevel@tonic-gate * zone, so we avoid worrying the zone administrator unnecessarily.
1577c478bd9Sstevel@tonic-gate */
1587c478bd9Sstevel@tonic-gate (void) strcpy(pcinfo.pc_clname, "FX");
1597c478bd9Sstevel@tonic-gate if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) != -1) {
1607c478bd9Sstevel@tonic-gate maxupri = ((fxinfo_t *)pcinfo.pc_clinfo)->fx_maxupri;
1617c478bd9Sstevel@tonic-gate if (priocntl(P_LWPID, P_MYID, PC_SETXPARMS, "FX",
1627c478bd9Sstevel@tonic-gate FX_KY_UPRILIM, maxupri, FX_KY_UPRI, maxupri,
1637c478bd9Sstevel@tonic-gate FX_KY_TQNSECS, FX_TQINF, NULL) != 0 &&
1647c478bd9Sstevel@tonic-gate getzoneid() == GLOBAL_ZONEID)
1657c478bd9Sstevel@tonic-gate (void) syslog(LOG_ERR, "Unable to use FX scheduler: "
1667c478bd9Sstevel@tonic-gate "%m. Using system default scheduler.");
1677c478bd9Sstevel@tonic-gate } else
1687c478bd9Sstevel@tonic-gate (void) syslog(LOG_ERR, "Unable to determine parameters "
1697c478bd9Sstevel@tonic-gate "for FX scheduler. Using system default scheduler.");
1707c478bd9Sstevel@tonic-gate }
1717c478bd9Sstevel@tonic-gate
1727c478bd9Sstevel@tonic-gate int
svcrdma(int id,int versmin,int versmax,int delegation)1737c478bd9Sstevel@tonic-gate svcrdma(int id, int versmin, int versmax, int delegation)
1747c478bd9Sstevel@tonic-gate {
1757c478bd9Sstevel@tonic-gate thread_t tid;
1767c478bd9Sstevel@tonic-gate struct rdma_svc_args *rsa;
1777c478bd9Sstevel@tonic-gate
1787c478bd9Sstevel@tonic-gate rsa = (struct rdma_svc_args *)malloc(sizeof (struct rdma_svc_args));
1797c478bd9Sstevel@tonic-gate rsa->poolid = (uint32_t)id;
1807c478bd9Sstevel@tonic-gate rsa->netid = NULL;
1817c478bd9Sstevel@tonic-gate rsa->nfs_versmin = versmin;
1827c478bd9Sstevel@tonic-gate rsa->nfs_versmax = versmax;
1837c478bd9Sstevel@tonic-gate rsa->delegation = delegation;
1847c478bd9Sstevel@tonic-gate
1857c478bd9Sstevel@tonic-gate /*
1867c478bd9Sstevel@tonic-gate * Create a thread to handle RDMA start and stop.
1877c478bd9Sstevel@tonic-gate */
1887c478bd9Sstevel@tonic-gate if (thr_create(NULL, THR_MIN_STACK * 2, svc_rdma_creator, (void *)rsa,
1897c478bd9Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid))
1907c478bd9Sstevel@tonic-gate return (1);
1917c478bd9Sstevel@tonic-gate
1927c478bd9Sstevel@tonic-gate return (0);
1937c478bd9Sstevel@tonic-gate }
1947c478bd9Sstevel@tonic-gate
1957c478bd9Sstevel@tonic-gate int
svcwait(int id)1967c478bd9Sstevel@tonic-gate svcwait(int id)
1977c478bd9Sstevel@tonic-gate {
1987c478bd9Sstevel@tonic-gate thread_t tid;
1997c478bd9Sstevel@tonic-gate
2007c478bd9Sstevel@tonic-gate /*
2017c478bd9Sstevel@tonic-gate * Create a bound thread to wait for kernel LWPs that
2027c478bd9Sstevel@tonic-gate * need to be created. This thread also has little need
2037c478bd9Sstevel@tonic-gate * of stackspace, so should be created with that in mind.
2047c478bd9Sstevel@tonic-gate */
2057c478bd9Sstevel@tonic-gate if (thr_create(NULL, THR_MIN_STACK * 2, svcblock, (void *)id,
2067c478bd9Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid))
2077c478bd9Sstevel@tonic-gate return (1);
2087c478bd9Sstevel@tonic-gate
2097c478bd9Sstevel@tonic-gate return (0);
2107c478bd9Sstevel@tonic-gate }
211