1 /*
2 * Copyright (c) 2005 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sergey Glushchenko <deen@smz.com.ua>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35
36 #include <sys/cpumask.h>
37 #include <sys/errno.h>
38 #include <sys/globaldata.h> /* curthread */
39 #include <sys/proc.h>
40 #include <sys/caps.h>
41 #include <sys/sysmsg.h> /* struct usched_set_args */
42 #include <sys/systm.h> /* strcmp() */
43 #include <sys/usched.h>
44
45 #include <machine/smp.h>
46
47 static TAILQ_HEAD(, usched) usched_list = TAILQ_HEAD_INITIALIZER(usched_list);
48
49 cpumask_t usched_mastermask = CPUMASK_INITIALIZER_ALLONES;
50
51 static int setaffinity_lp(struct lwp *lp, cpumask_t *mask);
52
53 /*
54 * Called from very low level boot code, sys/kern/init_main.c:mi_proc0init().
55 * We cannot do anything fancy. no malloc's, no nothing other then
56 * static initialization.
57 */
58 struct usched *
usched_init(void)59 usched_init(void)
60 {
61 const char *defsched;
62
63 defsched = kgetenv("kern.user_scheduler");
64
65 /*
66 * Add various userland schedulers to the system.
67 */
68 usched_ctl(&usched_bsd4, USCH_ADD);
69 usched_ctl(&usched_dfly, USCH_ADD);
70 usched_ctl(&usched_dummy, USCH_ADD);
71 if (defsched == NULL )
72 return(&usched_dfly);
73 if (strcmp(defsched, "bsd4") == 0)
74 return(&usched_bsd4);
75 if (strcmp(defsched, "dfly") == 0)
76 return(&usched_dfly);
77 kprintf("WARNING: Running dummy userland scheduler\n");
78 return(&usched_dummy);
79 }
80
81 /*
82 * USCHED_CTL
83 *
84 * SYNOPSIS:
85 * Add/remove usched to/from list.
86 *
87 * ARGUMENTS:
88 * usched - pointer to target scheduler
89 * action - addition or removal ?
90 *
91 * RETURN VALUES:
92 * 0 - success
93 * EINVAL - error
94 */
95 int
usched_ctl(struct usched * usched,int action)96 usched_ctl(struct usched *usched, int action)
97 {
98 struct usched *item; /* temporaly for TAILQ processing */
99 int error = 0;
100
101 switch(action) {
102 case USCH_ADD:
103 /*
104 * Make sure it isn't already on the list
105 */
106 #ifdef INVARIANTS
107 TAILQ_FOREACH(item, &usched_list, entry) {
108 KKASSERT(item != usched);
109 }
110 #endif
111 /*
112 * Optional callback to the scheduler before we officially
113 * add it to the list.
114 */
115 if (usched->usched_register)
116 usched->usched_register();
117 TAILQ_INSERT_TAIL(&usched_list, usched, entry);
118 break;
119 case USCH_REM:
120 /*
121 * Do not allow the default scheduler to be removed
122 */
123 if (strcmp(usched->name, "bsd4") == 0) {
124 error = EINVAL;
125 break;
126 }
127 TAILQ_FOREACH(item, &usched_list, entry) {
128 if (item == usched)
129 break;
130 }
131 if (item) {
132 if (item->usched_unregister)
133 item->usched_unregister();
134 TAILQ_REMOVE(&usched_list, item, entry);
135 } else {
136 error = EINVAL;
137 }
138 break;
139 default:
140 error = EINVAL;
141 break;
142 }
143 return (error);
144 }
145
146 /*
147 * Called from the scheduler clock on each cpu independently at the
148 * common scheduling rate. If the scheduler clock interrupted a running
149 * lwp the lp will be non-NULL.
150 */
151 void
usched_schedulerclock(struct lwp * lp,sysclock_t periodic,sysclock_t time)152 usched_schedulerclock(struct lwp *lp, sysclock_t periodic, sysclock_t time)
153 {
154 struct usched *item;
155
156 TAILQ_FOREACH(item, &usched_list, entry) {
157 if (lp && lp->lwp_proc->p_usched == item)
158 item->schedulerclock(lp, periodic, time);
159 else
160 item->schedulerclock(NULL, periodic, time);
161 }
162 }
163
164 /*
165 * USCHED_SET(syscall)
166 *
167 * SYNOPSIS:
168 * Setting up a proc's usched.
169 *
170 * ARGUMENTS:
171 * pid -
172 * cmd -
173 * data -
174 * bytes -
175 * RETURN VALUES:
176 * 0 - success
177 * EFBIG - error (invalid cpu#)
178 * EPERM - error (failed to delete cpu#)
179 * EINVAL - error (other reasons)
180 *
181 * MPALMOSTSAFE
182 */
183 int
sys_usched_set(struct sysmsg * sysmsg,const struct usched_set_args * uap)184 sys_usched_set(struct sysmsg *sysmsg, const struct usched_set_args *uap)
185 {
186 struct proc *p = curthread->td_proc;
187 struct usched *item; /* temporaly for TAILQ processing */
188 int error;
189 char buffer[NAME_LENGTH];
190 cpumask_t mask;
191 struct lwp *lp;
192 int cpuid;
193
194 if (uap->pid != 0 && uap->pid != curthread->td_proc->p_pid)
195 return (EINVAL);
196
197 lp = curthread->td_lwp;
198 lwkt_gettoken(&lp->lwp_token);
199
200 switch (uap->cmd) {
201 case USCHED_SET_SCHEDULER:
202 if ((error = caps_priv_check_self(SYSCAP_NOSCHED)) != 0)
203 break;
204 error = copyinstr(uap->data, buffer, sizeof(buffer), NULL);
205 if (error)
206 break;
207 TAILQ_FOREACH(item, &usched_list, entry) {
208 if ((strcmp(item->name, buffer) == 0))
209 break;
210 }
211
212 /*
213 * If the scheduler for a process is being changed, disassociate
214 * the old scheduler before switching to the new one.
215 *
216 * XXX we might have to add an additional ABI call to do a 'full
217 * disassociation' and another ABI call to do a 'full
218 * reassociation'
219 */
220 /* XXX lwp have to deal with multiple lwps here */
221 if (p->p_nthreads != 1) {
222 error = EINVAL;
223 break;
224 }
225 if (item && item != p->p_usched) {
226 /* XXX lwp */
227 p->p_usched->release_curproc(ONLY_LWP_IN_PROC(p));
228 p->p_usched->heuristic_exiting(ONLY_LWP_IN_PROC(p), p);
229 p->p_usched = item;
230 } else if (item == NULL) {
231 error = EINVAL;
232 }
233 break;
234 case USCHED_SET_CPU:
235 if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
236 break;
237 if (uap->bytes != sizeof(int)) {
238 error = EINVAL;
239 break;
240 }
241 error = copyin(uap->data, &cpuid, sizeof(int));
242 if (error)
243 break;
244 if (cpuid < 0 || cpuid >= ncpus) {
245 error = EFBIG;
246 break;
247 }
248 if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) {
249 error = EINVAL;
250 break;
251 }
252 CPUMASK_ASSBIT(lp->lwp_cpumask, cpuid);
253 if (cpuid != mycpu->gd_cpuid) {
254 lwkt_migratecpu(cpuid);
255 p->p_usched->changedcpu(lp);
256 }
257 break;
258 case USCHED_GET_CPU:
259 /* USCHED_GET_CPU doesn't require special privileges. */
260 if (uap->bytes != sizeof(int)) {
261 error = EINVAL;
262 break;
263 }
264 error = copyout(&(mycpu->gd_cpuid), uap->data, sizeof(int));
265 break;
266 case USCHED_GET_CPUMASK:
267 /* USCHED_GET_CPUMASK doesn't require special privileges. */
268 if (uap->bytes != sizeof(cpumask_t)) {
269 error = EINVAL;
270 break;
271 }
272 mask = lp->lwp_cpumask;
273 CPUMASK_ANDMASK(mask, smp_active_mask);
274 error = copyout(&mask, uap->data, sizeof(cpumask_t));
275 break;
276 case USCHED_ADD_CPU:
277 if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
278 break;
279 if (uap->bytes != sizeof(int)) {
280 error = EINVAL;
281 break;
282 }
283 error = copyin(uap->data, &cpuid, sizeof(int));
284 if (error)
285 break;
286 if (cpuid < 0 || cpuid >= ncpus) {
287 error = EFBIG;
288 break;
289 }
290 if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) {
291 error = EINVAL;
292 break;
293 }
294 CPUMASK_ORBIT(lp->lwp_cpumask, cpuid);
295 break;
296 case USCHED_DEL_CPU:
297 /* USCHED_DEL_CPU doesn't require special privileges. */
298 if (uap->bytes != sizeof(int)) {
299 error = EINVAL;
300 break;
301 }
302 error = copyin(uap->data, &cpuid, sizeof(int));
303 if (error)
304 break;
305 if (cpuid < 0 || cpuid >= ncpus) {
306 error = EFBIG;
307 break;
308 }
309 lp = curthread->td_lwp;
310 mask = lp->lwp_cpumask;
311 CPUMASK_ANDMASK(mask, smp_active_mask);
312 CPUMASK_NANDBIT(mask, cpuid);
313 if (CPUMASK_TESTZERO(mask)) {
314 error = EPERM;
315 } else {
316 CPUMASK_NANDBIT(lp->lwp_cpumask, cpuid);
317 if (CPUMASK_TESTMASK(lp->lwp_cpumask,
318 mycpu->gd_cpumask) == 0) {
319 mask = lp->lwp_cpumask;
320 CPUMASK_ANDMASK(mask, smp_active_mask);
321 cpuid = BSFCPUMASK(mask);
322 lwkt_migratecpu(cpuid);
323 p->p_usched->changedcpu(lp);
324 }
325 }
326 break;
327 case USCHED_SET_CPUMASK:
328 if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0)
329 break;
330 if (uap->bytes != sizeof(mask)) {
331 error = EINVAL;
332 break;
333 }
334 error = copyin(uap->data, &mask, sizeof(mask));
335 if (error)
336 break;
337
338 CPUMASK_ANDMASK(mask, smp_active_mask);
339 if (CPUMASK_TESTZERO(mask)) {
340 error = EPERM;
341 break;
342 }
343 /* Commit the new cpumask. */
344 lp->lwp_cpumask = mask;
345
346 /* Migrate if necessary. */
347 if (CPUMASK_TESTMASK(lp->lwp_cpumask, mycpu->gd_cpumask) == 0) {
348 cpuid = BSFCPUMASK(lp->lwp_cpumask);
349 lwkt_migratecpu(cpuid);
350 p->p_usched->changedcpu(lp);
351 }
352 break;
353 default:
354 error = EINVAL;
355 break;
356 }
357 lwkt_reltoken(&lp->lwp_token);
358
359 return (error);
360 }
361
362 int
sys_lwp_getaffinity(struct sysmsg * sysmsg,const struct lwp_getaffinity_args * uap)363 sys_lwp_getaffinity(struct sysmsg *sysmsg,
364 const struct lwp_getaffinity_args *uap)
365 {
366 struct proc *p;
367 cpumask_t mask;
368 struct lwp *lp;
369 int error = 0;
370
371 if (uap->pid < 0)
372 return (EINVAL);
373
374 if (uap->pid == 0) {
375 p = curproc;
376 PHOLD(p);
377 } else {
378 p = pfind(uap->pid); /* pfind() holds (p) */
379 if (p == NULL)
380 return (ESRCH);
381 }
382 lwkt_gettoken(&p->p_token);
383
384 if (uap->tid < 0) {
385 lp = RB_FIRST(lwp_rb_tree, &p->p_lwp_tree);
386 } else {
387 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
388 }
389 if (lp == NULL) {
390 error = ESRCH;
391 } else {
392 /* Take a snapshot for copyout, which may block. */
393 LWPHOLD(lp);
394 lwkt_gettoken(&lp->lwp_token);
395 mask = lp->lwp_cpumask;
396 CPUMASK_ANDMASK(mask, smp_active_mask);
397 lwkt_reltoken(&lp->lwp_token);
398 LWPRELE(lp);
399 }
400
401 lwkt_reltoken(&p->p_token);
402 PRELE(p);
403
404 if (error == 0)
405 error = copyout(&mask, uap->mask, sizeof(cpumask_t));
406
407 return (error);
408 }
409
410 int
sys_lwp_setaffinity(struct sysmsg * sysmsg,const struct lwp_setaffinity_args * uap)411 sys_lwp_setaffinity(struct sysmsg *sysmsg,
412 const struct lwp_setaffinity_args *uap)
413 {
414 struct proc *p;
415 cpumask_t mask;
416 struct lwp *lp;
417 int error;
418
419 /*
420 * NOTE:
421 * Always allow change self CPU affinity.
422 */
423 if ((error = caps_priv_check_self(SYSCAP_NOSCHED_CPUSET)) != 0 &&
424 uap->pid != 0)
425 {
426 return (error);
427 }
428
429 error = copyin(uap->mask, &mask, sizeof(mask));
430 if (error)
431 return (error);
432
433 CPUMASK_ANDMASK(mask, smp_active_mask);
434 if (CPUMASK_TESTZERO(mask))
435 return (EPERM);
436 if (uap->pid < 0)
437 return (EINVAL);
438
439 /*
440 * Locate the process
441 */
442 if (uap->pid == 0) {
443 p = curproc;
444 PHOLD(p);
445 } else {
446 p = pfind(uap->pid); /* pfind() holds (p) */
447 if (p == NULL)
448 return (ESRCH);
449 }
450 lwkt_gettoken(&p->p_token);
451
452 if (uap->tid < 0) {
453 FOREACH_LWP_IN_PROC(lp, p) {
454 error = setaffinity_lp(lp, &mask);
455 }
456 /* not an error if no LPs left in process */
457 } else {
458 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
459 error = setaffinity_lp(lp, &mask);
460 }
461 lwkt_reltoken(&p->p_token);
462 PRELE(p);
463
464 return (error);
465 }
466
467 static int
setaffinity_lp(struct lwp * lp,cpumask_t * mask)468 setaffinity_lp(struct lwp *lp, cpumask_t *mask)
469 {
470 if (lp == NULL)
471 return ESRCH;
472
473 LWPHOLD(lp);
474 lwkt_gettoken(&lp->lwp_token);
475 lp->lwp_cpumask = *mask;
476
477 /*
478 * NOTE: When adjusting a thread that is not our own the migration
479 * will occur at the next reschedule.
480 */
481 if (lp == curthread->td_lwp) {
482 /*
483 * Self migration can be done immediately,
484 * if necessary.
485 */
486 if (CPUMASK_TESTBIT(lp->lwp_cpumask,
487 mycpu->gd_cpuid) == 0) {
488 lwkt_migratecpu(BSFCPUMASK(lp->lwp_cpumask));
489 lp->lwp_proc->p_usched->changedcpu(lp);
490 }
491 }
492 lwkt_reltoken(&lp->lwp_token);
493 LWPRELE(lp);
494
495 return 0;
496 }
497