xref: /dragonfly/sys/kern/kern_usched.c (revision 03517d4e)
1 /*
2  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sergey Glushchenko <deen@smz.com.ua>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cpumask.h>
37 #include <sys/errno.h>
38 #include <sys/globaldata.h>		/* curthread */
39 #include <sys/proc.h>
40 #include <sys/caps.h>
41 #include <sys/sysmsg.h>			/* struct usched_set_args */
42 #include <sys/systm.h>			/* strcmp() */
43 #include <sys/usched.h>
44 
45 #include <machine/smp.h>
46 
47 static TAILQ_HEAD(, usched) usched_list = TAILQ_HEAD_INITIALIZER(usched_list);
48 
49 cpumask_t usched_mastermask = CPUMASK_INITIALIZER_ALLONES;
50 
51 static int setaffinity_lp(struct lwp *lp, cpumask_t *mask);
52 
53 /*
54  * Called from very low level boot code, sys/kern/init_main.c:mi_proc0init().
55  * We cannot do anything fancy.  no malloc's, no nothing other then
56  * static initialization.
57  */
58 struct usched *
59 usched_init(void)
60 {
61 	const char *defsched;
62 
63 	defsched = kgetenv("kern.user_scheduler");
64 
65 	/*
66 	 * Add various userland schedulers to the system.
67 	 */
68 	usched_ctl(&usched_bsd4, USCH_ADD);
69 	usched_ctl(&usched_dfly, USCH_ADD);
70 	usched_ctl(&usched_dummy, USCH_ADD);
71 	if (defsched == NULL )
72 		return(&usched_dfly);
73 	if (strcmp(defsched, "bsd4") == 0)
74 		return(&usched_bsd4);
75 	if (strcmp(defsched, "dfly") == 0)
76 		return(&usched_dfly);
77 	kprintf("WARNING: Running dummy userland scheduler\n");
78 	return(&usched_dummy);
79 }
80 
81 /*
82  * USCHED_CTL
83  *
84  * SYNOPSIS:
85  * 	Add/remove usched to/from list.
86  *
87  * ARGUMENTS:
88  * 	usched - pointer to target scheduler
89  * 	action - addition or removal ?
90  *
91  * RETURN VALUES:
92  * 	0 - success
93  * 	EINVAL - error
94  */
95 int
96 usched_ctl(struct usched *usched, int action)
97 {
98 	struct usched *item;	/* temporaly for TAILQ processing */
99 	int error = 0;
100 
101 	switch(action) {
102 	case USCH_ADD:
103 		/*
104 		 * Make sure it isn't already on the list
105 		 */
106 #ifdef INVARIANTS
107 		TAILQ_FOREACH(item, &usched_list, entry) {
108 			KKASSERT(item != usched);
109 		}
110 #endif
111 		/*
112 		 * Optional callback to the scheduler before we officially
113 		 * add it to the list.
114 		 */
115 		if (usched->usched_register)
116 			usched->usched_register();
117 		TAILQ_INSERT_TAIL(&usched_list, usched, entry);
118 		break;
119 	case USCH_REM:
120 		/*
121 		 * Do not allow the default scheduler to be removed
122 		 */
123 		if (strcmp(usched->name, "bsd4") == 0) {
124 			error = EINVAL;
125 			break;
126 		}
127 		TAILQ_FOREACH(item, &usched_list, entry) {
128 			if (item == usched)
129 				break;
130 		}
131 		if (item) {
132 			if (item->usched_unregister)
133 				item->usched_unregister();
134 			TAILQ_REMOVE(&usched_list, item, entry);
135 		} else {
136 			error = EINVAL;
137 		}
138 		break;
139 	default:
140 		error = EINVAL;
141 		break;
142 	}
143 	return (error);
144 }
145 
146 /*
147  * Called from the scheduler clock on each cpu independently at the
148  * common scheduling rate.  If the scheduler clock interrupted a running
149  * lwp the lp will be non-NULL.
150  */
151 void
152 usched_schedulerclock(struct lwp *lp, sysclock_t periodic, sysclock_t time)
153 {
154 	struct usched *item;
155 
156 	TAILQ_FOREACH(item, &usched_list, entry) {
157 		if (lp && lp->lwp_proc->p_usched == item)
158 			item->schedulerclock(lp, periodic, time);
159 		else
160 			item->schedulerclock(NULL, periodic, time);
161 	}
162 }
163 
164 /*
165  * USCHED_SET(syscall)
166  *
167  * SYNOPSIS:
168  * 	Setting up a proc's usched.
169  *
170  * ARGUMENTS:
171  *	pid	-
172  *	cmd	-
173  * 	data	-
174  *	bytes	-
175  * RETURN VALUES:
176  * 	0 - success
177  * 	EFBIG  - error (invalid cpu#)
178  * 	EPERM  - error (failed to delete cpu#)
179  * 	EINVAL - error (other reasons)
180  *
181  * MPALMOSTSAFE
182  */
183 int
184 sys_usched_set(struct sysmsg *sysmsg, const struct usched_set_args *uap)
185 {
186 	struct proc *p = curthread->td_proc;
187 	struct usched *item;	/* temporaly for TAILQ processing */
188 	int error;
189 	char buffer[NAME_LENGTH];
190 	cpumask_t mask;
191 	struct lwp *lp;
192 	int cpuid;
193 
194 	if (uap->pid != 0 && uap->pid != curthread->td_proc->p_pid)
195 		return (EINVAL);
196 
197 	lp = curthread->td_lwp;
198 	lwkt_gettoken(&lp->lwp_token);
199 
200 	switch (uap->cmd) {
201 	case USCHED_SET_SCHEDULER:
202 		if ((error = caps_priv_check_self(SYSCAP_NOSCHED)) != 0)
203 			break;
204 		error = copyinstr(uap->data, buffer, sizeof(buffer), NULL);
205 		if (error)
206 			break;
207 		TAILQ_FOREACH(item, &usched_list, entry) {
208 			if ((strcmp(item->name, buffer) == 0))
209 				break;
210 		}
211 
212 		/*
213 		 * If the scheduler for a process is being changed, disassociate
214 		 * the old scheduler before switching to the new one.
215 		 *
216 		 * XXX we might have to add an additional ABI call to do a 'full
217 		 * disassociation' and another ABI call to do a 'full
218 		 * reassociation'
219 		 */
220 		/* XXX lwp have to deal with multiple lwps here */
221 		if (p->p_nthreads != 1) {
222 			error = EINVAL;
223 			break;
224 		}
225 		if (item && item != p->p_usched) {
226 			/* XXX lwp */
227 			p->p_usched->release_curproc(ONLY_LWP_IN_PROC(p));
228 			p->p_usched->heuristic_exiting(ONLY_LWP_IN_PROC(p), p);
229 			p->p_usched = item;
230 		} else if (item == NULL) {
231 			error = EINVAL;
232 		}
233 		break;
234 	case USCHED_SET_CPU:
235 		if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
236 			break;
237 		if (uap->bytes != sizeof(int)) {
238 			error = EINVAL;
239 			break;
240 		}
241 		error = copyin(uap->data, &cpuid, sizeof(int));
242 		if (error)
243 			break;
244 		if (cpuid < 0 || cpuid >= ncpus) {
245 			error = EFBIG;
246 			break;
247 		}
248 		if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) {
249 			error = EINVAL;
250 			break;
251 		}
252 		CPUMASK_ASSBIT(lp->lwp_cpumask, cpuid);
253 		if (cpuid != mycpu->gd_cpuid) {
254 			lwkt_migratecpu(cpuid);
255 			p->p_usched->changedcpu(lp);
256 		}
257 		break;
258 	case USCHED_GET_CPU:
259 		/* USCHED_GET_CPU doesn't require special privileges. */
260 		if (uap->bytes != sizeof(int)) {
261 			error = EINVAL;
262 			break;
263 		}
264 		error = copyout(&(mycpu->gd_cpuid), uap->data, sizeof(int));
265 		break;
266 	case USCHED_GET_CPUMASK:
267 		/* USCHED_GET_CPUMASK doesn't require special privileges. */
268 		if (uap->bytes != sizeof(cpumask_t)) {
269 			error = EINVAL;
270 			break;
271 		}
272 		mask = lp->lwp_cpumask;
273 		CPUMASK_ANDMASK(mask, smp_active_mask);
274 		error = copyout(&mask, uap->data, sizeof(cpumask_t));
275 		break;
276 	case USCHED_ADD_CPU:
277 		if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
278 			break;
279 		if (uap->bytes != sizeof(int)) {
280 			error = EINVAL;
281 			break;
282 		}
283 		error = copyin(uap->data, &cpuid, sizeof(int));
284 		if (error)
285 			break;
286 		if (cpuid < 0 || cpuid >= ncpus) {
287 			error = EFBIG;
288 			break;
289 		}
290 		if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) {
291 			error = EINVAL;
292 			break;
293 		}
294 		CPUMASK_ORBIT(lp->lwp_cpumask, cpuid);
295 		break;
296 	case USCHED_DEL_CPU:
297 		/* USCHED_DEL_CPU doesn't require special privileges. */
298 		if (uap->bytes != sizeof(int)) {
299 			error = EINVAL;
300 			break;
301 		}
302 		error = copyin(uap->data, &cpuid, sizeof(int));
303 		if (error)
304 			break;
305 		if (cpuid < 0 || cpuid >= ncpus) {
306 			error = EFBIG;
307 			break;
308 		}
309 		lp = curthread->td_lwp;
310 		mask = lp->lwp_cpumask;
311 		CPUMASK_ANDMASK(mask, smp_active_mask);
312 		CPUMASK_NANDBIT(mask, cpuid);
313 		if (CPUMASK_TESTZERO(mask)) {
314 			error = EPERM;
315 		} else {
316 			CPUMASK_NANDBIT(lp->lwp_cpumask, cpuid);
317 			if (CPUMASK_TESTMASK(lp->lwp_cpumask,
318 					    mycpu->gd_cpumask) == 0) {
319 				mask = lp->lwp_cpumask;
320 				CPUMASK_ANDMASK(mask, smp_active_mask);
321 				cpuid = BSFCPUMASK(mask);
322 				lwkt_migratecpu(cpuid);
323 				p->p_usched->changedcpu(lp);
324 			}
325 		}
326 		break;
327 	case USCHED_SET_CPUMASK:
328 		if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
329 			break;
330 		if (uap->bytes != sizeof(mask)) {
331 			error = EINVAL;
332 			break;
333 		}
334 		error = copyin(uap->data, &mask, sizeof(mask));
335 		if (error)
336 			break;
337 
338 		CPUMASK_ANDMASK(mask, smp_active_mask);
339 		if (CPUMASK_TESTZERO(mask)) {
340 			error = EPERM;
341 			break;
342 		}
343 		/* Commit the new cpumask. */
344 		lp->lwp_cpumask = mask;
345 
346 		/* Migrate if necessary. */
347 		if (CPUMASK_TESTMASK(lp->lwp_cpumask, mycpu->gd_cpumask) == 0) {
348 			cpuid = BSFCPUMASK(lp->lwp_cpumask);
349 			lwkt_migratecpu(cpuid);
350 			p->p_usched->changedcpu(lp);
351 		}
352 		break;
353 	default:
354 		error = EINVAL;
355 		break;
356 	}
357 	lwkt_reltoken(&lp->lwp_token);
358 
359 	return (error);
360 }
361 
362 int
363 sys_lwp_getaffinity(struct sysmsg *sysmsg,
364 		    const struct lwp_getaffinity_args *uap)
365 {
366 	struct proc *p;
367 	cpumask_t mask;
368 	struct lwp *lp;
369 	int error = 0;
370 
371 	if (uap->pid < 0)
372 		return (EINVAL);
373 
374 	if (uap->pid == 0) {
375 		p = curproc;
376 		PHOLD(p);
377 	} else {
378 		p = pfind(uap->pid);	/* pfind() holds (p) */
379 		if (p == NULL)
380 			return (ESRCH);
381 	}
382 	lwkt_gettoken(&p->p_token);
383 
384 	if (uap->tid < 0) {
385 		lp = RB_FIRST(lwp_rb_tree, &p->p_lwp_tree);
386 	} else {
387 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
388 	}
389 	if (lp == NULL) {
390 		error = ESRCH;
391 	} else {
392 		/* Take a snapshot for copyout, which may block. */
393 		LWPHOLD(lp);
394 		lwkt_gettoken(&lp->lwp_token);
395 		mask = lp->lwp_cpumask;
396 		CPUMASK_ANDMASK(mask, smp_active_mask);
397 		lwkt_reltoken(&lp->lwp_token);
398 		LWPRELE(lp);
399 	}
400 
401 	lwkt_reltoken(&p->p_token);
402 	PRELE(p);
403 
404 	if (error == 0)
405 		error = copyout(&mask, uap->mask, sizeof(cpumask_t));
406 
407 	return (error);
408 }
409 
410 int
411 sys_lwp_setaffinity(struct sysmsg *sysmsg,
412 		    const struct lwp_setaffinity_args *uap)
413 {
414 	struct proc *p;
415 	cpumask_t mask;
416 	struct lwp *lp;
417 	int error;
418 
419 	/*
420 	 * NOTE:
421 	 * Always allow change self CPU affinity.
422 	 */
423 	if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0 &&
424 	    uap->pid != 0)
425 	{
426 		return (error);
427 	}
428 
429 	error = copyin(uap->mask, &mask, sizeof(mask));
430 	if (error)
431 		return (error);
432 
433 	CPUMASK_ANDMASK(mask, smp_active_mask);
434 	if (CPUMASK_TESTZERO(mask))
435 		return (EPERM);
436 	if (uap->pid < 0)
437 		return (EINVAL);
438 
439 	/*
440 	 * Locate the process
441 	 */
442 	if (uap->pid == 0) {
443 		p = curproc;
444 		PHOLD(p);
445 	} else {
446 		p = pfind(uap->pid);	/* pfind() holds (p) */
447 		if (p == NULL)
448 			return (ESRCH);
449 	}
450 	lwkt_gettoken(&p->p_token);
451 
452 	if (uap->tid < 0) {
453 		FOREACH_LWP_IN_PROC(lp, p) {
454 			error = setaffinity_lp(lp, &mask);
455 		}
456 		/* not an error if no LPs left in process */
457 	} else {
458 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
459 		error = setaffinity_lp(lp, &mask);
460 	}
461 	lwkt_reltoken(&p->p_token);
462 	PRELE(p);
463 
464 	return (error);
465 }
466 
467 static int
468 setaffinity_lp(struct lwp *lp, cpumask_t *mask)
469 {
470 	if (lp == NULL)
471 		return ESRCH;
472 
473 	LWPHOLD(lp);
474 	lwkt_gettoken(&lp->lwp_token);
475 	lp->lwp_cpumask = *mask;
476 
477 	/*
478 	 * NOTE: When adjusting a thread that is not our own the migration
479 	 *	 will occur at the next reschedule.
480 	 */
481 	if (lp == curthread->td_lwp) {
482 		/*
483 		 * Self migration can be done immediately,
484 		 * if necessary.
485 		 */
486 		if (CPUMASK_TESTBIT(lp->lwp_cpumask,
487 		    mycpu->gd_cpuid) == 0) {
488 			lwkt_migratecpu(BSFCPUMASK(lp->lwp_cpumask));
489 			lp->lwp_proc->p_usched->changedcpu(lp);
490 		}
491 	}
492 	lwkt_reltoken(&lp->lwp_token);
493 	LWPRELE(lp);
494 
495 	return 0;
496 }
497