1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1994-1995 Søren Schmidt
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <sys/fcntl.h>
34 #include <sys/jail.h>
35 #include <sys/imgact.h>
36 #include <sys/limits.h>
37 #include <sys/lock.h>
38 #include <sys/msgbuf.h>
39 #include <sys/mutex.h>
40 #include <sys/poll.h>
41 #include <sys/priv.h>
42 #include <sys/proc.h>
43 #include <sys/procctl.h>
44 #include <sys/reboot.h>
45 #include <sys/random.h>
46 #include <sys/resourcevar.h>
47 #include <sys/rtprio.h>
48 #include <sys/sched.h>
49 #include <sys/smp.h>
50 #include <sys/stat.h>
51 #include <sys/syscallsubr.h>
52 #include <sys/sysctl.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/time.h>
56 #include <sys/vmmeter.h>
57 #include <sys/vnode.h>
58
59 #include <security/audit/audit.h>
60 #include <security/mac/mac_framework.h>
61
62 #include <vm/pmap.h>
63 #include <vm/vm_map.h>
64 #include <vm/swap_pager.h>
65
66 #ifdef COMPAT_LINUX32
67 #include <machine/../linux32/linux.h>
68 #include <machine/../linux32/linux32_proto.h>
69 #else
70 #include <machine/../linux/linux.h>
71 #include <machine/../linux/linux_proto.h>
72 #endif
73
74 #include <compat/linux/linux_common.h>
75 #include <compat/linux/linux_dtrace.h>
76 #include <compat/linux/linux_file.h>
77 #include <compat/linux/linux_mib.h>
78 #include <compat/linux/linux_mmap.h>
79 #include <compat/linux/linux_signal.h>
80 #include <compat/linux/linux_time.h>
81 #include <compat/linux/linux_util.h>
82 #include <compat/linux/linux_emul.h>
83 #include <compat/linux/linux_misc.h>
84
85 int stclohz; /* Statistics clock frequency */
86
87 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
88 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
89 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
90 RLIMIT_MEMLOCK, RLIMIT_AS
91 };
92
93 struct l_sysinfo {
94 l_long uptime; /* Seconds since boot */
95 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */
96 #define LINUX_SYSINFO_LOADS_SCALE 65536
97 l_ulong totalram; /* Total usable main memory size */
98 l_ulong freeram; /* Available memory size */
99 l_ulong sharedram; /* Amount of shared memory */
100 l_ulong bufferram; /* Memory used by buffers */
101 l_ulong totalswap; /* Total swap space size */
102 l_ulong freeswap; /* swap space still available */
103 l_ushort procs; /* Number of current processes */
104 l_ushort pads;
105 l_ulong totalhigh;
106 l_ulong freehigh;
107 l_uint mem_unit;
108 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */
109 };
110
111 struct l_pselect6arg {
112 l_uintptr_t ss;
113 l_size_t ss_len;
114 };
115
116 static int linux_utimensat_lts_to_ts(struct l_timespec *,
117 struct timespec *);
118 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
119 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *,
120 struct timespec *);
121 #endif
122 static int linux_common_utimensat(struct thread *, int,
123 const char *, struct timespec *, int);
124 static int linux_common_pselect6(struct thread *, l_int,
125 l_fd_set *, l_fd_set *, l_fd_set *,
126 struct timespec *, l_uintptr_t *);
127 static int linux_common_ppoll(struct thread *, struct pollfd *,
128 uint32_t, struct timespec *, l_sigset_t *,
129 l_size_t);
130 static int linux_pollin(struct thread *, struct pollfd *,
131 struct pollfd *, u_int);
132 static int linux_pollout(struct thread *, struct pollfd *,
133 struct pollfd *, u_int);
134
135 int
linux_sysinfo(struct thread * td,struct linux_sysinfo_args * args)136 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
137 {
138 struct l_sysinfo sysinfo;
139 int i, j;
140 struct timespec ts;
141
142 bzero(&sysinfo, sizeof(sysinfo));
143 getnanouptime(&ts);
144 if (ts.tv_nsec != 0)
145 ts.tv_sec++;
146 sysinfo.uptime = ts.tv_sec;
147
148 /* Use the information from the mib to get our load averages */
149 for (i = 0; i < 3; i++)
150 sysinfo.loads[i] = averunnable.ldavg[i] *
151 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
152
153 sysinfo.totalram = physmem * PAGE_SIZE;
154 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE;
155
156 /*
157 * sharedram counts pages allocated to named, swap-backed objects such
158 * as shared memory segments and tmpfs files. There is no cheap way to
159 * compute this, so just leave the field unpopulated. Linux itself only
160 * started setting this field in the 3.x timeframe.
161 */
162 sysinfo.sharedram = 0;
163 sysinfo.bufferram = 0;
164
165 swap_pager_status(&i, &j);
166 sysinfo.totalswap = i * PAGE_SIZE;
167 sysinfo.freeswap = (i - j) * PAGE_SIZE;
168
169 sysinfo.procs = nprocs;
170
171 /*
172 * Platforms supported by the emulation layer do not have a notion of
173 * high memory.
174 */
175 sysinfo.totalhigh = 0;
176 sysinfo.freehigh = 0;
177
178 sysinfo.mem_unit = 1;
179
180 return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
181 }
182
183 #ifdef LINUX_LEGACY_SYSCALLS
184 int
linux_alarm(struct thread * td,struct linux_alarm_args * args)185 linux_alarm(struct thread *td, struct linux_alarm_args *args)
186 {
187 struct itimerval it, old_it;
188 u_int secs;
189 int error __diagused;
190
191 secs = args->secs;
192 /*
193 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2
194 * to match kern_setitimer()'s limit to avoid error from it.
195 *
196 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit
197 * platforms.
198 */
199 if (secs > INT32_MAX / 2)
200 secs = INT32_MAX / 2;
201
202 it.it_value.tv_sec = secs;
203 it.it_value.tv_usec = 0;
204 timevalclear(&it.it_interval);
205 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
206 KASSERT(error == 0, ("kern_setitimer returns %d", error));
207
208 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) ||
209 old_it.it_value.tv_usec >= 500000)
210 old_it.it_value.tv_sec++;
211 td->td_retval[0] = old_it.it_value.tv_sec;
212 return (0);
213 }
214 #endif
215
216 int
linux_brk(struct thread * td,struct linux_brk_args * args)217 linux_brk(struct thread *td, struct linux_brk_args *args)
218 {
219 struct vmspace *vm = td->td_proc->p_vmspace;
220 uintptr_t new, old;
221
222 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize);
223 new = (uintptr_t)args->dsend;
224 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new))
225 td->td_retval[0] = (register_t)new;
226 else
227 td->td_retval[0] = (register_t)old;
228
229 return (0);
230 }
231
232 #ifdef LINUX_LEGACY_SYSCALLS
233 int
linux_select(struct thread * td,struct linux_select_args * args)234 linux_select(struct thread *td, struct linux_select_args *args)
235 {
236 l_timeval ltv;
237 struct timeval tv0, tv1, utv, *tvp;
238 int error;
239
240 /*
241 * Store current time for computation of the amount of
242 * time left.
243 */
244 if (args->timeout) {
245 if ((error = copyin(args->timeout, <v, sizeof(ltv))))
246 goto select_out;
247 utv.tv_sec = ltv.tv_sec;
248 utv.tv_usec = ltv.tv_usec;
249
250 if (itimerfix(&utv)) {
251 /*
252 * The timeval was invalid. Convert it to something
253 * valid that will act as it does under Linux.
254 */
255 utv.tv_sec += utv.tv_usec / 1000000;
256 utv.tv_usec %= 1000000;
257 if (utv.tv_usec < 0) {
258 utv.tv_sec -= 1;
259 utv.tv_usec += 1000000;
260 }
261 if (utv.tv_sec < 0)
262 timevalclear(&utv);
263 }
264 microtime(&tv0);
265 tvp = &utv;
266 } else
267 tvp = NULL;
268
269 error = kern_select(td, args->nfds, args->readfds, args->writefds,
270 args->exceptfds, tvp, LINUX_NFDBITS);
271 if (error)
272 goto select_out;
273
274 if (args->timeout) {
275 if (td->td_retval[0]) {
276 /*
277 * Compute how much time was left of the timeout,
278 * by subtracting the current time and the time
279 * before we started the call, and subtracting
280 * that result from the user-supplied value.
281 */
282 microtime(&tv1);
283 timevalsub(&tv1, &tv0);
284 timevalsub(&utv, &tv1);
285 if (utv.tv_sec < 0)
286 timevalclear(&utv);
287 } else
288 timevalclear(&utv);
289 ltv.tv_sec = utv.tv_sec;
290 ltv.tv_usec = utv.tv_usec;
291 if ((error = copyout(<v, args->timeout, sizeof(ltv))))
292 goto select_out;
293 }
294
295 select_out:
296 return (error);
297 }
298 #endif
299
300 int
linux_mremap(struct thread * td,struct linux_mremap_args * args)301 linux_mremap(struct thread *td, struct linux_mremap_args *args)
302 {
303 uintptr_t addr;
304 size_t len;
305 int error = 0;
306
307 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
308 td->td_retval[0] = 0;
309 return (EINVAL);
310 }
311
312 /*
313 * Check for the page alignment.
314 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
315 */
316 if (args->addr & PAGE_MASK) {
317 td->td_retval[0] = 0;
318 return (EINVAL);
319 }
320
321 args->new_len = round_page(args->new_len);
322 args->old_len = round_page(args->old_len);
323
324 if (args->new_len > args->old_len) {
325 td->td_retval[0] = 0;
326 return (ENOMEM);
327 }
328
329 if (args->new_len < args->old_len) {
330 addr = args->addr + args->new_len;
331 len = args->old_len - args->new_len;
332 error = kern_munmap(td, addr, len);
333 }
334
335 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
336 return (error);
337 }
338
339 #define LINUX_MS_ASYNC 0x0001
340 #define LINUX_MS_INVALIDATE 0x0002
341 #define LINUX_MS_SYNC 0x0004
342
343 int
linux_msync(struct thread * td,struct linux_msync_args * args)344 linux_msync(struct thread *td, struct linux_msync_args *args)
345 {
346
347 return (kern_msync(td, args->addr, args->len,
348 args->fl & ~LINUX_MS_SYNC));
349 }
350
351 int
linux_mprotect(struct thread * td,struct linux_mprotect_args * uap)352 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
353 {
354
355 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len,
356 uap->prot));
357 }
358
359 int
linux_madvise(struct thread * td,struct linux_madvise_args * uap)360 linux_madvise(struct thread *td, struct linux_madvise_args *uap)
361 {
362
363 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len,
364 uap->behav));
365 }
366
367 int
linux_mmap2(struct thread * td,struct linux_mmap2_args * uap)368 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap)
369 {
370 #if defined(LINUX_ARCHWANT_MMAP2PGOFF)
371 /*
372 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is
373 * implemented with mmap2 syscall and the offset is represented in
374 * multiples of page size.
375 */
376 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot,
377 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE));
378 #else
379 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot,
380 uap->flags, uap->fd, uap->pgoff));
381 #endif
382 }
383
384 #ifdef LINUX_LEGACY_SYSCALLS
385 int
linux_time(struct thread * td,struct linux_time_args * args)386 linux_time(struct thread *td, struct linux_time_args *args)
387 {
388 struct timeval tv;
389 l_time_t tm;
390 int error;
391
392 microtime(&tv);
393 tm = tv.tv_sec;
394 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
395 return (error);
396 td->td_retval[0] = tm;
397 return (0);
398 }
399 #endif
400
401 struct l_times_argv {
402 l_clock_t tms_utime;
403 l_clock_t tms_stime;
404 l_clock_t tms_cutime;
405 l_clock_t tms_cstime;
406 };
407
408 /*
409 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
410 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
411 * auxiliary vector entry.
412 */
413 #define CLK_TCK 100
414
415 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
416 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
417
418 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \
419 CONVNTCK(r) : CONVOTCK(r))
420
421 int
linux_times(struct thread * td,struct linux_times_args * args)422 linux_times(struct thread *td, struct linux_times_args *args)
423 {
424 struct timeval tv, utime, stime, cutime, cstime;
425 struct l_times_argv tms;
426 struct proc *p;
427 int error;
428
429 if (args->buf != NULL) {
430 p = td->td_proc;
431 PROC_LOCK(p);
432 PROC_STATLOCK(p);
433 calcru(p, &utime, &stime);
434 PROC_STATUNLOCK(p);
435 calccru(p, &cutime, &cstime);
436 PROC_UNLOCK(p);
437
438 tms.tms_utime = CONVTCK(utime);
439 tms.tms_stime = CONVTCK(stime);
440
441 tms.tms_cutime = CONVTCK(cutime);
442 tms.tms_cstime = CONVTCK(cstime);
443
444 if ((error = copyout(&tms, args->buf, sizeof(tms))))
445 return (error);
446 }
447
448 microuptime(&tv);
449 td->td_retval[0] = (int)CONVTCK(tv);
450 return (0);
451 }
452
453 int
linux_newuname(struct thread * td,struct linux_newuname_args * args)454 linux_newuname(struct thread *td, struct linux_newuname_args *args)
455 {
456 struct l_new_utsname utsname;
457 char osname[LINUX_MAX_UTSNAME];
458 char osrelease[LINUX_MAX_UTSNAME];
459 char *p;
460
461 linux_get_osname(td, osname);
462 linux_get_osrelease(td, osrelease);
463
464 bzero(&utsname, sizeof(utsname));
465 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
466 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
467 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
468 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
469 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
470 for (p = utsname.version; *p != '\0'; ++p)
471 if (*p == '\n') {
472 *p = '\0';
473 break;
474 }
475 #if defined(__amd64__)
476 /*
477 * On amd64, Linux uname(2) needs to return "x86_64"
478 * for both 64-bit and 32-bit applications. On 32-bit,
479 * the string returned by getauxval(AT_PLATFORM) needs
480 * to remain "i686", though.
481 */
482 #if defined(COMPAT_LINUX32)
483 if (linux32_emulate_i386)
484 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
485 else
486 #endif
487 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
488 #elif defined(__aarch64__)
489 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME);
490 #elif defined(__i386__)
491 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
492 #endif
493
494 return (copyout(&utsname, args->buf, sizeof(utsname)));
495 }
496
497 struct l_utimbuf {
498 l_time_t l_actime;
499 l_time_t l_modtime;
500 };
501
502 #ifdef LINUX_LEGACY_SYSCALLS
503 int
linux_utime(struct thread * td,struct linux_utime_args * args)504 linux_utime(struct thread *td, struct linux_utime_args *args)
505 {
506 struct timeval tv[2], *tvp;
507 struct l_utimbuf lut;
508 int error;
509
510 if (args->times) {
511 if ((error = copyin(args->times, &lut, sizeof lut)) != 0)
512 return (error);
513 tv[0].tv_sec = lut.l_actime;
514 tv[0].tv_usec = 0;
515 tv[1].tv_sec = lut.l_modtime;
516 tv[1].tv_usec = 0;
517 tvp = tv;
518 } else
519 tvp = NULL;
520
521 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
522 tvp, UIO_SYSSPACE));
523 }
524 #endif
525
526 #ifdef LINUX_LEGACY_SYSCALLS
527 int
linux_utimes(struct thread * td,struct linux_utimes_args * args)528 linux_utimes(struct thread *td, struct linux_utimes_args *args)
529 {
530 l_timeval ltv[2];
531 struct timeval tv[2], *tvp = NULL;
532 int error;
533
534 if (args->tptr != NULL) {
535 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0)
536 return (error);
537 tv[0].tv_sec = ltv[0].tv_sec;
538 tv[0].tv_usec = ltv[0].tv_usec;
539 tv[1].tv_sec = ltv[1].tv_sec;
540 tv[1].tv_usec = ltv[1].tv_usec;
541 tvp = tv;
542 }
543
544 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
545 tvp, UIO_SYSSPACE));
546 }
547 #endif
548
549 static int
linux_utimensat_lts_to_ts(struct l_timespec * l_times,struct timespec * times)550 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times)
551 {
552
553 if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
554 l_times->tv_nsec != LINUX_UTIME_NOW &&
555 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
556 return (EINVAL);
557
558 times->tv_sec = l_times->tv_sec;
559 switch (l_times->tv_nsec)
560 {
561 case LINUX_UTIME_OMIT:
562 times->tv_nsec = UTIME_OMIT;
563 break;
564 case LINUX_UTIME_NOW:
565 times->tv_nsec = UTIME_NOW;
566 break;
567 default:
568 times->tv_nsec = l_times->tv_nsec;
569 }
570
571 return (0);
572 }
573
574 static int
linux_common_utimensat(struct thread * td,int ldfd,const char * pathname,struct timespec * timesp,int lflags)575 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname,
576 struct timespec *timesp, int lflags)
577 {
578 int dfd, flags = 0;
579
580 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd;
581
582 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH))
583 return (EINVAL);
584
585 if (timesp != NULL) {
586 /* This breaks POSIX, but is what the Linux kernel does
587 * _on purpose_ (documented in the man page for utimensat(2)),
588 * so we must follow that behaviour. */
589 if (timesp[0].tv_nsec == UTIME_OMIT &&
590 timesp[1].tv_nsec == UTIME_OMIT)
591 return (0);
592 }
593
594 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW)
595 flags |= AT_SYMLINK_NOFOLLOW;
596 if (lflags & LINUX_AT_EMPTY_PATH)
597 flags |= AT_EMPTY_PATH;
598
599 if (pathname != NULL)
600 return (kern_utimensat(td, dfd, pathname,
601 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags));
602
603 if (lflags != 0)
604 return (EINVAL);
605
606 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE));
607 }
608
609 int
linux_utimensat(struct thread * td,struct linux_utimensat_args * args)610 linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
611 {
612 struct l_timespec l_times[2];
613 struct timespec times[2], *timesp;
614 int error;
615
616 if (args->times != NULL) {
617 error = copyin(args->times, l_times, sizeof(l_times));
618 if (error != 0)
619 return (error);
620
621 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]);
622 if (error != 0)
623 return (error);
624 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]);
625 if (error != 0)
626 return (error);
627 timesp = times;
628 } else
629 timesp = NULL;
630
631 return (linux_common_utimensat(td, args->dfd, args->pathname,
632 timesp, args->flags));
633 }
634
635 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
636 static int
linux_utimensat_lts64_to_ts(struct l_timespec64 * l_times,struct timespec * times)637 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times)
638 {
639
640 /* Zero out the padding in compat mode. */
641 l_times->tv_nsec &= 0xFFFFFFFFUL;
642
643 if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
644 l_times->tv_nsec != LINUX_UTIME_NOW &&
645 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
646 return (EINVAL);
647
648 times->tv_sec = l_times->tv_sec;
649 switch (l_times->tv_nsec)
650 {
651 case LINUX_UTIME_OMIT:
652 times->tv_nsec = UTIME_OMIT;
653 break;
654 case LINUX_UTIME_NOW:
655 times->tv_nsec = UTIME_NOW;
656 break;
657 default:
658 times->tv_nsec = l_times->tv_nsec;
659 }
660
661 return (0);
662 }
663
664 int
linux_utimensat_time64(struct thread * td,struct linux_utimensat_time64_args * args)665 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args)
666 {
667 struct l_timespec64 l_times[2];
668 struct timespec times[2], *timesp;
669 int error;
670
671 if (args->times64 != NULL) {
672 error = copyin(args->times64, l_times, sizeof(l_times));
673 if (error != 0)
674 return (error);
675
676 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]);
677 if (error != 0)
678 return (error);
679 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]);
680 if (error != 0)
681 return (error);
682 timesp = times;
683 } else
684 timesp = NULL;
685
686 return (linux_common_utimensat(td, args->dfd, args->pathname,
687 timesp, args->flags));
688 }
689 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
690
691 #ifdef LINUX_LEGACY_SYSCALLS
692 int
linux_futimesat(struct thread * td,struct linux_futimesat_args * args)693 linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
694 {
695 l_timeval ltv[2];
696 struct timeval tv[2], *tvp = NULL;
697 int error, dfd;
698
699 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
700
701 if (args->utimes != NULL) {
702 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0)
703 return (error);
704 tv[0].tv_sec = ltv[0].tv_sec;
705 tv[0].tv_usec = ltv[0].tv_usec;
706 tv[1].tv_sec = ltv[1].tv_sec;
707 tv[1].tv_usec = ltv[1].tv_usec;
708 tvp = tv;
709 }
710
711 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE,
712 tvp, UIO_SYSSPACE));
713 }
714 #endif
715
716 static int
linux_common_wait(struct thread * td,idtype_t idtype,int id,int * statusp,int options,void * rup,l_siginfo_t * infop)717 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp,
718 int options, void *rup, l_siginfo_t *infop)
719 {
720 l_siginfo_t lsi;
721 siginfo_t siginfo;
722 struct __wrusage wru;
723 int error, status, tmpstat, sig;
724
725 error = kern_wait6(td, idtype, id, &status, options,
726 rup != NULL ? &wru : NULL, &siginfo);
727
728 if (error == 0 && statusp) {
729 tmpstat = status & 0xffff;
730 if (WIFSIGNALED(tmpstat)) {
731 tmpstat = (tmpstat & 0xffffff80) |
732 bsd_to_linux_signal(WTERMSIG(tmpstat));
733 } else if (WIFSTOPPED(tmpstat)) {
734 tmpstat = (tmpstat & 0xffff00ff) |
735 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
736 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32))
737 if (WSTOPSIG(status) == SIGTRAP) {
738 tmpstat = linux_ptrace_status(td,
739 siginfo.si_pid, tmpstat);
740 }
741 #endif
742 } else if (WIFCONTINUED(tmpstat)) {
743 tmpstat = 0xffff;
744 }
745 error = copyout(&tmpstat, statusp, sizeof(int));
746 }
747 if (error == 0 && rup != NULL)
748 error = linux_copyout_rusage(&wru.wru_self, rup);
749 if (error == 0 && infop != NULL && td->td_retval[0] != 0) {
750 sig = bsd_to_linux_signal(siginfo.si_signo);
751 siginfo_to_lsiginfo(&siginfo, &lsi, sig);
752 error = copyout(&lsi, infop, sizeof(lsi));
753 }
754
755 return (error);
756 }
757
758 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
759 int
linux_waitpid(struct thread * td,struct linux_waitpid_args * args)760 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
761 {
762 struct linux_wait4_args wait4_args = {
763 .pid = args->pid,
764 .status = args->status,
765 .options = args->options,
766 .rusage = NULL,
767 };
768
769 return (linux_wait4(td, &wait4_args));
770 }
771 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
772
773 int
linux_wait4(struct thread * td,struct linux_wait4_args * args)774 linux_wait4(struct thread *td, struct linux_wait4_args *args)
775 {
776 struct proc *p;
777 int options, id, idtype;
778
779 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
780 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
781 return (EINVAL);
782
783 /* -INT_MIN is not defined. */
784 if (args->pid == INT_MIN)
785 return (ESRCH);
786
787 options = 0;
788 linux_to_bsd_waitopts(args->options, &options);
789
790 /*
791 * For backward compatibility we implicitly add flags WEXITED
792 * and WTRAPPED here.
793 */
794 options |= WEXITED | WTRAPPED;
795
796 if (args->pid == WAIT_ANY) {
797 idtype = P_ALL;
798 id = 0;
799 } else if (args->pid < 0) {
800 idtype = P_PGID;
801 id = (id_t)-args->pid;
802 } else if (args->pid == 0) {
803 idtype = P_PGID;
804 p = td->td_proc;
805 PROC_LOCK(p);
806 id = p->p_pgid;
807 PROC_UNLOCK(p);
808 } else {
809 idtype = P_PID;
810 id = (id_t)args->pid;
811 }
812
813 return (linux_common_wait(td, idtype, id, args->status, options,
814 args->rusage, NULL));
815 }
816
817 int
linux_waitid(struct thread * td,struct linux_waitid_args * args)818 linux_waitid(struct thread *td, struct linux_waitid_args *args)
819 {
820 idtype_t idtype;
821 int error, options;
822 struct proc *p;
823 pid_t id;
824
825 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED |
826 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
827 return (EINVAL);
828
829 options = 0;
830 linux_to_bsd_waitopts(args->options, &options);
831
832 id = args->id;
833 switch (args->idtype) {
834 case LINUX_P_ALL:
835 idtype = P_ALL;
836 break;
837 case LINUX_P_PID:
838 if (args->id <= 0)
839 return (EINVAL);
840 idtype = P_PID;
841 break;
842 case LINUX_P_PGID:
843 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) {
844 p = td->td_proc;
845 PROC_LOCK(p);
846 id = p->p_pgid;
847 PROC_UNLOCK(p);
848 } else if (args->id <= 0)
849 return (EINVAL);
850 idtype = P_PGID;
851 break;
852 case LINUX_P_PIDFD:
853 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype");
854 return (ENOSYS);
855 default:
856 return (EINVAL);
857 }
858
859 error = linux_common_wait(td, idtype, id, NULL, options,
860 args->rusage, args->info);
861 td->td_retval[0] = 0;
862
863 return (error);
864 }
865
866 #ifdef LINUX_LEGACY_SYSCALLS
867 int
linux_mknod(struct thread * td,struct linux_mknod_args * args)868 linux_mknod(struct thread *td, struct linux_mknod_args *args)
869 {
870 int error;
871
872 switch (args->mode & S_IFMT) {
873 case S_IFIFO:
874 case S_IFSOCK:
875 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE,
876 args->mode);
877 break;
878
879 case S_IFCHR:
880 case S_IFBLK:
881 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE,
882 args->mode, linux_decode_dev(args->dev));
883 break;
884
885 case S_IFDIR:
886 error = EPERM;
887 break;
888
889 case 0:
890 args->mode |= S_IFREG;
891 /* FALLTHROUGH */
892 case S_IFREG:
893 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE,
894 O_WRONLY | O_CREAT | O_TRUNC, args->mode);
895 if (error == 0)
896 kern_close(td, td->td_retval[0]);
897 break;
898
899 default:
900 error = EINVAL;
901 break;
902 }
903 return (error);
904 }
905 #endif
906
907 int
linux_mknodat(struct thread * td,struct linux_mknodat_args * args)908 linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
909 {
910 int error, dfd;
911
912 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
913
914 switch (args->mode & S_IFMT) {
915 case S_IFIFO:
916 case S_IFSOCK:
917 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE,
918 args->mode);
919 break;
920
921 case S_IFCHR:
922 case S_IFBLK:
923 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE,
924 args->mode, linux_decode_dev(args->dev));
925 break;
926
927 case S_IFDIR:
928 error = EPERM;
929 break;
930
931 case 0:
932 args->mode |= S_IFREG;
933 /* FALLTHROUGH */
934 case S_IFREG:
935 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE,
936 O_WRONLY | O_CREAT | O_TRUNC, args->mode);
937 if (error == 0)
938 kern_close(td, td->td_retval[0]);
939 break;
940
941 default:
942 error = EINVAL;
943 break;
944 }
945 return (error);
946 }
947
948 /*
949 * UGH! This is just about the dumbest idea I've ever heard!!
950 */
951 int
linux_personality(struct thread * td,struct linux_personality_args * args)952 linux_personality(struct thread *td, struct linux_personality_args *args)
953 {
954 struct linux_pemuldata *pem;
955 struct proc *p = td->td_proc;
956 uint32_t old;
957
958 PROC_LOCK(p);
959 pem = pem_find(p);
960 old = pem->persona;
961 if (args->per != 0xffffffff)
962 pem->persona = args->per;
963 PROC_UNLOCK(p);
964
965 td->td_retval[0] = old;
966 return (0);
967 }
968
969 struct l_itimerval {
970 l_timeval it_interval;
971 l_timeval it_value;
972 };
973
974 #define B2L_ITIMERVAL(bip, lip) \
975 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \
976 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \
977 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \
978 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
979
980 int
linux_setitimer(struct thread * td,struct linux_setitimer_args * uap)981 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
982 {
983 int error;
984 struct l_itimerval ls;
985 struct itimerval aitv, oitv;
986
987 if (uap->itv == NULL) {
988 uap->itv = uap->oitv;
989 return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
990 }
991
992 error = copyin(uap->itv, &ls, sizeof(ls));
993 if (error != 0)
994 return (error);
995 B2L_ITIMERVAL(&aitv, &ls);
996 error = kern_setitimer(td, uap->which, &aitv, &oitv);
997 if (error != 0 || uap->oitv == NULL)
998 return (error);
999 B2L_ITIMERVAL(&ls, &oitv);
1000
1001 return (copyout(&ls, uap->oitv, sizeof(ls)));
1002 }
1003
1004 int
linux_getitimer(struct thread * td,struct linux_getitimer_args * uap)1005 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
1006 {
1007 int error;
1008 struct l_itimerval ls;
1009 struct itimerval aitv;
1010
1011 error = kern_getitimer(td, uap->which, &aitv);
1012 if (error != 0)
1013 return (error);
1014 B2L_ITIMERVAL(&ls, &aitv);
1015 return (copyout(&ls, uap->itv, sizeof(ls)));
1016 }
1017
1018 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1019 int
linux_nice(struct thread * td,struct linux_nice_args * args)1020 linux_nice(struct thread *td, struct linux_nice_args *args)
1021 {
1022
1023 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc));
1024 }
1025 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1026
1027 int
linux_setgroups(struct thread * td,struct linux_setgroups_args * args)1028 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
1029 {
1030 struct ucred *newcred, *oldcred;
1031 l_gid_t *linux_gidset;
1032 gid_t *bsd_gidset;
1033 int ngrp, error;
1034 struct proc *p;
1035
1036 ngrp = args->gidsetsize;
1037 if (ngrp < 0 || ngrp >= ngroups_max + 1)
1038 return (EINVAL);
1039 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
1040 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1041 if (error)
1042 goto out;
1043 newcred = crget();
1044 crextend(newcred, ngrp + 1);
1045 p = td->td_proc;
1046 PROC_LOCK(p);
1047 oldcred = p->p_ucred;
1048 crcopy(newcred, oldcred);
1049
1050 /*
1051 * cr_groups[0] holds egid. Setting the whole set from
1052 * the supplied set will cause egid to be changed too.
1053 * Keep cr_groups[0] unchanged to prevent that.
1054 */
1055
1056 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) {
1057 PROC_UNLOCK(p);
1058 crfree(newcred);
1059 goto out;
1060 }
1061
1062 if (ngrp > 0) {
1063 newcred->cr_ngroups = ngrp + 1;
1064
1065 bsd_gidset = newcred->cr_groups;
1066 ngrp--;
1067 while (ngrp >= 0) {
1068 bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1069 ngrp--;
1070 }
1071 } else
1072 newcred->cr_ngroups = 1;
1073
1074 setsugid(p);
1075 proc_set_cred(p, newcred);
1076 PROC_UNLOCK(p);
1077 crfree(oldcred);
1078 error = 0;
1079 out:
1080 free(linux_gidset, M_LINUX);
1081 return (error);
1082 }
1083
1084 int
linux_getgroups(struct thread * td,struct linux_getgroups_args * args)1085 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1086 {
1087 struct ucred *cred;
1088 l_gid_t *linux_gidset;
1089 gid_t *bsd_gidset;
1090 int bsd_gidsetsz, ngrp, error;
1091
1092 cred = td->td_ucred;
1093 bsd_gidset = cred->cr_groups;
1094 bsd_gidsetsz = cred->cr_ngroups - 1;
1095
1096 /*
1097 * cr_groups[0] holds egid. Returning the whole set
1098 * here will cause a duplicate. Exclude cr_groups[0]
1099 * to prevent that.
1100 */
1101
1102 if ((ngrp = args->gidsetsize) == 0) {
1103 td->td_retval[0] = bsd_gidsetsz;
1104 return (0);
1105 }
1106
1107 if (ngrp < bsd_gidsetsz)
1108 return (EINVAL);
1109
1110 ngrp = 0;
1111 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
1112 M_LINUX, M_WAITOK);
1113 while (ngrp < bsd_gidsetsz) {
1114 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1115 ngrp++;
1116 }
1117
1118 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
1119 free(linux_gidset, M_LINUX);
1120 if (error)
1121 return (error);
1122
1123 td->td_retval[0] = ngrp;
1124 return (0);
1125 }
1126
1127 static bool
linux_get_dummy_limit(struct thread * td,l_uint resource,struct rlimit * rlim)1128 linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim)
1129 {
1130 ssize_t size;
1131 int res, error;
1132
1133 if (linux_dummy_rlimits == 0)
1134 return (false);
1135
1136 switch (resource) {
1137 case LINUX_RLIMIT_LOCKS:
1138 case LINUX_RLIMIT_RTTIME:
1139 rlim->rlim_cur = LINUX_RLIM_INFINITY;
1140 rlim->rlim_max = LINUX_RLIM_INFINITY;
1141 return (true);
1142 case LINUX_RLIMIT_NICE:
1143 case LINUX_RLIMIT_RTPRIO:
1144 rlim->rlim_cur = 0;
1145 rlim->rlim_max = 0;
1146 return (true);
1147 case LINUX_RLIMIT_SIGPENDING:
1148 error = kernel_sysctlbyname(td,
1149 "kern.sigqueue.max_pending_per_proc",
1150 &res, &size, 0, 0, 0, 0);
1151 if (error != 0)
1152 return (false);
1153 rlim->rlim_cur = res;
1154 rlim->rlim_max = res;
1155 return (true);
1156 case LINUX_RLIMIT_MSGQUEUE:
1157 error = kernel_sysctlbyname(td,
1158 "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
1159 if (error != 0)
1160 return (false);
1161 rlim->rlim_cur = res;
1162 rlim->rlim_max = res;
1163 return (true);
1164 default:
1165 return (false);
1166 }
1167 }
1168
1169 int
linux_setrlimit(struct thread * td,struct linux_setrlimit_args * args)1170 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1171 {
1172 struct rlimit bsd_rlim;
1173 struct l_rlimit rlim;
1174 u_int which;
1175 int error;
1176
1177 if (args->resource >= LINUX_RLIM_NLIMITS)
1178 return (EINVAL);
1179
1180 which = linux_to_bsd_resource[args->resource];
1181 if (which == -1)
1182 return (EINVAL);
1183
1184 error = copyin(args->rlim, &rlim, sizeof(rlim));
1185 if (error)
1186 return (error);
1187
1188 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1189 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1190 return (kern_setrlimit(td, which, &bsd_rlim));
1191 }
1192
1193 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1194 int
linux_old_getrlimit(struct thread * td,struct linux_old_getrlimit_args * args)1195 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1196 {
1197 struct l_rlimit rlim;
1198 struct rlimit bsd_rlim;
1199 u_int which;
1200
1201 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) {
1202 rlim.rlim_cur = bsd_rlim.rlim_cur;
1203 rlim.rlim_max = bsd_rlim.rlim_max;
1204 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1205 }
1206
1207 if (args->resource >= LINUX_RLIM_NLIMITS)
1208 return (EINVAL);
1209
1210 which = linux_to_bsd_resource[args->resource];
1211 if (which == -1)
1212 return (EINVAL);
1213
1214 lim_rlimit(td, which, &bsd_rlim);
1215
1216 #ifdef COMPAT_LINUX32
1217 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1218 if (rlim.rlim_cur == UINT_MAX)
1219 rlim.rlim_cur = INT_MAX;
1220 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1221 if (rlim.rlim_max == UINT_MAX)
1222 rlim.rlim_max = INT_MAX;
1223 #else
1224 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1225 if (rlim.rlim_cur == ULONG_MAX)
1226 rlim.rlim_cur = LONG_MAX;
1227 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1228 if (rlim.rlim_max == ULONG_MAX)
1229 rlim.rlim_max = LONG_MAX;
1230 #endif
1231 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1232 }
1233 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1234
1235 int
linux_getrlimit(struct thread * td,struct linux_getrlimit_args * args)1236 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1237 {
1238 struct l_rlimit rlim;
1239 struct rlimit bsd_rlim;
1240 u_int which;
1241
1242 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) {
1243 rlim.rlim_cur = bsd_rlim.rlim_cur;
1244 rlim.rlim_max = bsd_rlim.rlim_max;
1245 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1246 }
1247
1248 if (args->resource >= LINUX_RLIM_NLIMITS)
1249 return (EINVAL);
1250
1251 which = linux_to_bsd_resource[args->resource];
1252 if (which == -1)
1253 return (EINVAL);
1254
1255 lim_rlimit(td, which, &bsd_rlim);
1256
1257 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1258 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1259 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1260 }
1261
1262 int
linux_sched_setscheduler(struct thread * td,struct linux_sched_setscheduler_args * args)1263 linux_sched_setscheduler(struct thread *td,
1264 struct linux_sched_setscheduler_args *args)
1265 {
1266 struct sched_param sched_param;
1267 struct thread *tdt;
1268 int error, policy;
1269
1270 switch (args->policy) {
1271 case LINUX_SCHED_OTHER:
1272 policy = SCHED_OTHER;
1273 break;
1274 case LINUX_SCHED_FIFO:
1275 policy = SCHED_FIFO;
1276 break;
1277 case LINUX_SCHED_RR:
1278 policy = SCHED_RR;
1279 break;
1280 default:
1281 return (EINVAL);
1282 }
1283
1284 error = copyin(args->param, &sched_param, sizeof(sched_param));
1285 if (error)
1286 return (error);
1287
1288 if (linux_map_sched_prio) {
1289 switch (policy) {
1290 case SCHED_OTHER:
1291 if (sched_param.sched_priority != 0)
1292 return (EINVAL);
1293
1294 sched_param.sched_priority =
1295 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
1296 break;
1297 case SCHED_FIFO:
1298 case SCHED_RR:
1299 if (sched_param.sched_priority < 1 ||
1300 sched_param.sched_priority >= LINUX_MAX_RT_PRIO)
1301 return (EINVAL);
1302
1303 /*
1304 * Map [1, LINUX_MAX_RT_PRIO - 1] to
1305 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
1306 */
1307 sched_param.sched_priority =
1308 (sched_param.sched_priority - 1) *
1309 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
1310 (LINUX_MAX_RT_PRIO - 1);
1311 break;
1312 }
1313 }
1314
1315 tdt = linux_tdfind(td, args->pid, -1);
1316 if (tdt == NULL)
1317 return (ESRCH);
1318
1319 error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
1320 PROC_UNLOCK(tdt->td_proc);
1321 return (error);
1322 }
1323
1324 int
linux_sched_getscheduler(struct thread * td,struct linux_sched_getscheduler_args * args)1325 linux_sched_getscheduler(struct thread *td,
1326 struct linux_sched_getscheduler_args *args)
1327 {
1328 struct thread *tdt;
1329 int error, policy;
1330
1331 tdt = linux_tdfind(td, args->pid, -1);
1332 if (tdt == NULL)
1333 return (ESRCH);
1334
1335 error = kern_sched_getscheduler(td, tdt, &policy);
1336 PROC_UNLOCK(tdt->td_proc);
1337
1338 switch (policy) {
1339 case SCHED_OTHER:
1340 td->td_retval[0] = LINUX_SCHED_OTHER;
1341 break;
1342 case SCHED_FIFO:
1343 td->td_retval[0] = LINUX_SCHED_FIFO;
1344 break;
1345 case SCHED_RR:
1346 td->td_retval[0] = LINUX_SCHED_RR;
1347 break;
1348 }
1349 return (error);
1350 }
1351
1352 int
linux_sched_get_priority_max(struct thread * td,struct linux_sched_get_priority_max_args * args)1353 linux_sched_get_priority_max(struct thread *td,
1354 struct linux_sched_get_priority_max_args *args)
1355 {
1356 struct sched_get_priority_max_args bsd;
1357
1358 if (linux_map_sched_prio) {
1359 switch (args->policy) {
1360 case LINUX_SCHED_OTHER:
1361 td->td_retval[0] = 0;
1362 return (0);
1363 case LINUX_SCHED_FIFO:
1364 case LINUX_SCHED_RR:
1365 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1;
1366 return (0);
1367 default:
1368 return (EINVAL);
1369 }
1370 }
1371
1372 switch (args->policy) {
1373 case LINUX_SCHED_OTHER:
1374 bsd.policy = SCHED_OTHER;
1375 break;
1376 case LINUX_SCHED_FIFO:
1377 bsd.policy = SCHED_FIFO;
1378 break;
1379 case LINUX_SCHED_RR:
1380 bsd.policy = SCHED_RR;
1381 break;
1382 default:
1383 return (EINVAL);
1384 }
1385 return (sys_sched_get_priority_max(td, &bsd));
1386 }
1387
1388 int
linux_sched_get_priority_min(struct thread * td,struct linux_sched_get_priority_min_args * args)1389 linux_sched_get_priority_min(struct thread *td,
1390 struct linux_sched_get_priority_min_args *args)
1391 {
1392 struct sched_get_priority_min_args bsd;
1393
1394 if (linux_map_sched_prio) {
1395 switch (args->policy) {
1396 case LINUX_SCHED_OTHER:
1397 td->td_retval[0] = 0;
1398 return (0);
1399 case LINUX_SCHED_FIFO:
1400 case LINUX_SCHED_RR:
1401 td->td_retval[0] = 1;
1402 return (0);
1403 default:
1404 return (EINVAL);
1405 }
1406 }
1407
1408 switch (args->policy) {
1409 case LINUX_SCHED_OTHER:
1410 bsd.policy = SCHED_OTHER;
1411 break;
1412 case LINUX_SCHED_FIFO:
1413 bsd.policy = SCHED_FIFO;
1414 break;
1415 case LINUX_SCHED_RR:
1416 bsd.policy = SCHED_RR;
1417 break;
1418 default:
1419 return (EINVAL);
1420 }
1421 return (sys_sched_get_priority_min(td, &bsd));
1422 }
1423
1424 #define REBOOT_CAD_ON 0x89abcdef
1425 #define REBOOT_CAD_OFF 0
1426 #define REBOOT_HALT 0xcdef0123
1427 #define REBOOT_RESTART 0x01234567
1428 #define REBOOT_RESTART2 0xA1B2C3D4
1429 #define REBOOT_POWEROFF 0x4321FEDC
1430 #define REBOOT_MAGIC1 0xfee1dead
1431 #define REBOOT_MAGIC2 0x28121969
1432 #define REBOOT_MAGIC2A 0x05121996
1433 #define REBOOT_MAGIC2B 0x16041998
1434
1435 int
linux_reboot(struct thread * td,struct linux_reboot_args * args)1436 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1437 {
1438 struct reboot_args bsd_args;
1439
1440 if (args->magic1 != REBOOT_MAGIC1)
1441 return (EINVAL);
1442
1443 switch (args->magic2) {
1444 case REBOOT_MAGIC2:
1445 case REBOOT_MAGIC2A:
1446 case REBOOT_MAGIC2B:
1447 break;
1448 default:
1449 return (EINVAL);
1450 }
1451
1452 switch (args->cmd) {
1453 case REBOOT_CAD_ON:
1454 case REBOOT_CAD_OFF:
1455 return (priv_check(td, PRIV_REBOOT));
1456 case REBOOT_HALT:
1457 bsd_args.opt = RB_HALT;
1458 break;
1459 case REBOOT_RESTART:
1460 case REBOOT_RESTART2:
1461 bsd_args.opt = 0;
1462 break;
1463 case REBOOT_POWEROFF:
1464 bsd_args.opt = RB_POWEROFF;
1465 break;
1466 default:
1467 return (EINVAL);
1468 }
1469 return (sys_reboot(td, &bsd_args));
1470 }
1471
1472 int
linux_getpid(struct thread * td,struct linux_getpid_args * args)1473 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1474 {
1475
1476 td->td_retval[0] = td->td_proc->p_pid;
1477
1478 return (0);
1479 }
1480
1481 int
linux_gettid(struct thread * td,struct linux_gettid_args * args)1482 linux_gettid(struct thread *td, struct linux_gettid_args *args)
1483 {
1484 struct linux_emuldata *em;
1485
1486 em = em_find(td);
1487 KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
1488
1489 td->td_retval[0] = em->em_tid;
1490
1491 return (0);
1492 }
1493
1494 int
linux_getppid(struct thread * td,struct linux_getppid_args * args)1495 linux_getppid(struct thread *td, struct linux_getppid_args *args)
1496 {
1497
1498 td->td_retval[0] = kern_getppid(td);
1499 return (0);
1500 }
1501
1502 int
linux_getgid(struct thread * td,struct linux_getgid_args * args)1503 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1504 {
1505
1506 td->td_retval[0] = td->td_ucred->cr_rgid;
1507 return (0);
1508 }
1509
1510 int
linux_getuid(struct thread * td,struct linux_getuid_args * args)1511 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1512 {
1513
1514 td->td_retval[0] = td->td_ucred->cr_ruid;
1515 return (0);
1516 }
1517
1518 int
linux_getsid(struct thread * td,struct linux_getsid_args * args)1519 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1520 {
1521
1522 return (kern_getsid(td, args->pid));
1523 }
1524
1525 int
linux_getpriority(struct thread * td,struct linux_getpriority_args * args)1526 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
1527 {
1528 int error;
1529
1530 error = kern_getpriority(td, args->which, args->who);
1531 td->td_retval[0] = 20 - td->td_retval[0];
1532 return (error);
1533 }
1534
1535 int
linux_sethostname(struct thread * td,struct linux_sethostname_args * args)1536 linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
1537 {
1538 int name[2];
1539
1540 name[0] = CTL_KERN;
1541 name[1] = KERN_HOSTNAME;
1542 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
1543 args->len, 0, 0));
1544 }
1545
1546 int
linux_setdomainname(struct thread * td,struct linux_setdomainname_args * args)1547 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
1548 {
1549 int name[2];
1550
1551 name[0] = CTL_KERN;
1552 name[1] = KERN_NISDOMAINNAME;
1553 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
1554 args->len, 0, 0));
1555 }
1556
1557 int
linux_exit_group(struct thread * td,struct linux_exit_group_args * args)1558 linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
1559 {
1560
1561 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
1562 args->error_code);
1563
1564 /*
1565 * XXX: we should send a signal to the parent if
1566 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
1567 * as it doesnt occur often.
1568 */
1569 exit1(td, args->error_code, 0);
1570 /* NOTREACHED */
1571 }
1572
1573 #define _LINUX_CAPABILITY_VERSION_1 0x19980330
1574 #define _LINUX_CAPABILITY_VERSION_2 0x20071026
1575 #define _LINUX_CAPABILITY_VERSION_3 0x20080522
1576
1577 struct l_user_cap_header {
1578 l_int version;
1579 l_int pid;
1580 };
1581
1582 struct l_user_cap_data {
1583 l_int effective;
1584 l_int permitted;
1585 l_int inheritable;
1586 };
1587
1588 int
linux_capget(struct thread * td,struct linux_capget_args * uap)1589 linux_capget(struct thread *td, struct linux_capget_args *uap)
1590 {
1591 struct l_user_cap_header luch;
1592 struct l_user_cap_data lucd[2];
1593 int error, u32s;
1594
1595 if (uap->hdrp == NULL)
1596 return (EFAULT);
1597
1598 error = copyin(uap->hdrp, &luch, sizeof(luch));
1599 if (error != 0)
1600 return (error);
1601
1602 switch (luch.version) {
1603 case _LINUX_CAPABILITY_VERSION_1:
1604 u32s = 1;
1605 break;
1606 case _LINUX_CAPABILITY_VERSION_2:
1607 case _LINUX_CAPABILITY_VERSION_3:
1608 u32s = 2;
1609 break;
1610 default:
1611 luch.version = _LINUX_CAPABILITY_VERSION_1;
1612 error = copyout(&luch, uap->hdrp, sizeof(luch));
1613 if (error)
1614 return (error);
1615 return (EINVAL);
1616 }
1617
1618 if (luch.pid)
1619 return (EPERM);
1620
1621 if (uap->datap) {
1622 /*
1623 * The current implementation doesn't support setting
1624 * a capability (it's essentially a stub) so indicate
1625 * that no capabilities are currently set or available
1626 * to request.
1627 */
1628 memset(&lucd, 0, u32s * sizeof(lucd[0]));
1629 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0]));
1630 }
1631
1632 return (error);
1633 }
1634
1635 int
linux_capset(struct thread * td,struct linux_capset_args * uap)1636 linux_capset(struct thread *td, struct linux_capset_args *uap)
1637 {
1638 struct l_user_cap_header luch;
1639 struct l_user_cap_data lucd[2];
1640 int error, i, u32s;
1641
1642 if (uap->hdrp == NULL || uap->datap == NULL)
1643 return (EFAULT);
1644
1645 error = copyin(uap->hdrp, &luch, sizeof(luch));
1646 if (error != 0)
1647 return (error);
1648
1649 switch (luch.version) {
1650 case _LINUX_CAPABILITY_VERSION_1:
1651 u32s = 1;
1652 break;
1653 case _LINUX_CAPABILITY_VERSION_2:
1654 case _LINUX_CAPABILITY_VERSION_3:
1655 u32s = 2;
1656 break;
1657 default:
1658 luch.version = _LINUX_CAPABILITY_VERSION_1;
1659 error = copyout(&luch, uap->hdrp, sizeof(luch));
1660 if (error)
1661 return (error);
1662 return (EINVAL);
1663 }
1664
1665 if (luch.pid)
1666 return (EPERM);
1667
1668 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0]));
1669 if (error != 0)
1670 return (error);
1671
1672 /* We currently don't support setting any capabilities. */
1673 for (i = 0; i < u32s; i++) {
1674 if (lucd[i].effective || lucd[i].permitted ||
1675 lucd[i].inheritable) {
1676 linux_msg(td,
1677 "capset[%d] effective=0x%x, permitted=0x%x, "
1678 "inheritable=0x%x is not implemented", i,
1679 (int)lucd[i].effective, (int)lucd[i].permitted,
1680 (int)lucd[i].inheritable);
1681 return (EPERM);
1682 }
1683 }
1684
1685 return (0);
1686 }
1687
1688 int
linux_prctl(struct thread * td,struct linux_prctl_args * args)1689 linux_prctl(struct thread *td, struct linux_prctl_args *args)
1690 {
1691 int error = 0, max_size, arg;
1692 struct proc *p = td->td_proc;
1693 char comm[LINUX_MAX_COMM_LEN];
1694 int pdeath_signal, trace_state;
1695
1696 switch (args->option) {
1697 case LINUX_PR_SET_PDEATHSIG:
1698 if (!LINUX_SIG_VALID(args->arg2))
1699 return (EINVAL);
1700 pdeath_signal = linux_to_bsd_signal(args->arg2);
1701 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL,
1702 &pdeath_signal));
1703 case LINUX_PR_GET_PDEATHSIG:
1704 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS,
1705 &pdeath_signal);
1706 if (error != 0)
1707 return (error);
1708 pdeath_signal = bsd_to_linux_signal(pdeath_signal);
1709 return (copyout(&pdeath_signal,
1710 (void *)(register_t)args->arg2,
1711 sizeof(pdeath_signal)));
1712 /*
1713 * In Linux, this flag controls if set[gu]id processes can coredump.
1714 * There are additional semantics imposed on processes that cannot
1715 * coredump:
1716 * - Such processes can not be ptraced.
1717 * - There are some semantics around ownership of process-related files
1718 * in the /proc namespace.
1719 *
1720 * In FreeBSD, we can (and by default, do) disable setuid coredump
1721 * system-wide with 'sugid_coredump.' We control tracability on a
1722 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag).
1723 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the
1724 * procctl is roughly analogous to Linux's DUMPABLE.
1725 *
1726 * So, proxy these knobs to the corresponding PROC_TRACE setting.
1727 */
1728 case LINUX_PR_GET_DUMPABLE:
1729 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS,
1730 &trace_state);
1731 if (error != 0)
1732 return (error);
1733 td->td_retval[0] = (trace_state != -1);
1734 return (0);
1735 case LINUX_PR_SET_DUMPABLE:
1736 /*
1737 * It is only valid for userspace to set one of these two
1738 * flags, and only one at a time.
1739 */
1740 switch (args->arg2) {
1741 case LINUX_SUID_DUMP_DISABLE:
1742 trace_state = PROC_TRACE_CTL_DISABLE_EXEC;
1743 break;
1744 case LINUX_SUID_DUMP_USER:
1745 trace_state = PROC_TRACE_CTL_ENABLE;
1746 break;
1747 default:
1748 return (EINVAL);
1749 }
1750 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL,
1751 &trace_state));
1752 case LINUX_PR_GET_KEEPCAPS:
1753 /*
1754 * Indicate that we always clear the effective and
1755 * permitted capability sets when the user id becomes
1756 * non-zero (actually the capability sets are simply
1757 * always zero in the current implementation).
1758 */
1759 td->td_retval[0] = 0;
1760 break;
1761 case LINUX_PR_SET_KEEPCAPS:
1762 /*
1763 * Ignore requests to keep the effective and permitted
1764 * capability sets when the user id becomes non-zero.
1765 */
1766 break;
1767 case LINUX_PR_SET_NAME:
1768 /*
1769 * To be on the safe side we need to make sure to not
1770 * overflow the size a Linux program expects. We already
1771 * do this here in the copyin, so that we don't need to
1772 * check on copyout.
1773 */
1774 max_size = MIN(sizeof(comm), sizeof(p->p_comm));
1775 error = copyinstr((void *)(register_t)args->arg2, comm,
1776 max_size, NULL);
1777
1778 /* Linux silently truncates the name if it is too long. */
1779 if (error == ENAMETOOLONG) {
1780 /*
1781 * XXX: copyinstr() isn't documented to populate the
1782 * array completely, so do a copyin() to be on the
1783 * safe side. This should be changed in case
1784 * copyinstr() is changed to guarantee this.
1785 */
1786 error = copyin((void *)(register_t)args->arg2, comm,
1787 max_size - 1);
1788 comm[max_size - 1] = '\0';
1789 }
1790 if (error)
1791 return (error);
1792
1793 PROC_LOCK(p);
1794 strlcpy(p->p_comm, comm, sizeof(p->p_comm));
1795 PROC_UNLOCK(p);
1796 break;
1797 case LINUX_PR_GET_NAME:
1798 PROC_LOCK(p);
1799 strlcpy(comm, p->p_comm, sizeof(comm));
1800 PROC_UNLOCK(p);
1801 error = copyout(comm, (void *)(register_t)args->arg2,
1802 strlen(comm) + 1);
1803 break;
1804 case LINUX_PR_GET_SECCOMP:
1805 case LINUX_PR_SET_SECCOMP:
1806 /*
1807 * Same as returned by Linux without CONFIG_SECCOMP enabled.
1808 */
1809 error = EINVAL;
1810 break;
1811 case LINUX_PR_CAPBSET_READ:
1812 #if 0
1813 /*
1814 * This makes too much noise with Ubuntu Focal.
1815 */
1816 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d",
1817 (int)args->arg2);
1818 #endif
1819 error = EINVAL;
1820 break;
1821 case LINUX_PR_SET_NO_NEW_PRIVS:
1822 arg = args->arg2 == 1 ?
1823 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
1824 error = kern_procctl(td, P_PID, p->p_pid,
1825 PROC_NO_NEW_PRIVS_CTL, &arg);
1826 break;
1827 case LINUX_PR_SET_PTRACER:
1828 linux_msg(td, "unsupported prctl PR_SET_PTRACER");
1829 error = EINVAL;
1830 break;
1831 default:
1832 linux_msg(td, "unsupported prctl option %d", args->option);
1833 error = EINVAL;
1834 break;
1835 }
1836
1837 return (error);
1838 }
1839
1840 int
linux_sched_setparam(struct thread * td,struct linux_sched_setparam_args * uap)1841 linux_sched_setparam(struct thread *td,
1842 struct linux_sched_setparam_args *uap)
1843 {
1844 struct sched_param sched_param;
1845 struct thread *tdt;
1846 int error, policy;
1847
1848 error = copyin(uap->param, &sched_param, sizeof(sched_param));
1849 if (error)
1850 return (error);
1851
1852 tdt = linux_tdfind(td, uap->pid, -1);
1853 if (tdt == NULL)
1854 return (ESRCH);
1855
1856 if (linux_map_sched_prio) {
1857 error = kern_sched_getscheduler(td, tdt, &policy);
1858 if (error)
1859 goto out;
1860
1861 switch (policy) {
1862 case SCHED_OTHER:
1863 if (sched_param.sched_priority != 0) {
1864 error = EINVAL;
1865 goto out;
1866 }
1867 sched_param.sched_priority =
1868 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
1869 break;
1870 case SCHED_FIFO:
1871 case SCHED_RR:
1872 if (sched_param.sched_priority < 1 ||
1873 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) {
1874 error = EINVAL;
1875 goto out;
1876 }
1877 /*
1878 * Map [1, LINUX_MAX_RT_PRIO - 1] to
1879 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
1880 */
1881 sched_param.sched_priority =
1882 (sched_param.sched_priority - 1) *
1883 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
1884 (LINUX_MAX_RT_PRIO - 1);
1885 break;
1886 }
1887 }
1888
1889 error = kern_sched_setparam(td, tdt, &sched_param);
1890 out: PROC_UNLOCK(tdt->td_proc);
1891 return (error);
1892 }
1893
1894 int
linux_sched_getparam(struct thread * td,struct linux_sched_getparam_args * uap)1895 linux_sched_getparam(struct thread *td,
1896 struct linux_sched_getparam_args *uap)
1897 {
1898 struct sched_param sched_param;
1899 struct thread *tdt;
1900 int error, policy;
1901
1902 tdt = linux_tdfind(td, uap->pid, -1);
1903 if (tdt == NULL)
1904 return (ESRCH);
1905
1906 error = kern_sched_getparam(td, tdt, &sched_param);
1907 if (error) {
1908 PROC_UNLOCK(tdt->td_proc);
1909 return (error);
1910 }
1911
1912 if (linux_map_sched_prio) {
1913 error = kern_sched_getscheduler(td, tdt, &policy);
1914 PROC_UNLOCK(tdt->td_proc);
1915 if (error)
1916 return (error);
1917
1918 switch (policy) {
1919 case SCHED_OTHER:
1920 sched_param.sched_priority = 0;
1921 break;
1922 case SCHED_FIFO:
1923 case SCHED_RR:
1924 /*
1925 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to
1926 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up).
1927 */
1928 sched_param.sched_priority =
1929 (sched_param.sched_priority *
1930 (LINUX_MAX_RT_PRIO - 1) +
1931 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) /
1932 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1;
1933 break;
1934 }
1935 } else
1936 PROC_UNLOCK(tdt->td_proc);
1937
1938 error = copyout(&sched_param, uap->param, sizeof(sched_param));
1939 return (error);
1940 }
1941
1942 /*
1943 * Get affinity of a process.
1944 */
1945 int
linux_sched_getaffinity(struct thread * td,struct linux_sched_getaffinity_args * args)1946 linux_sched_getaffinity(struct thread *td,
1947 struct linux_sched_getaffinity_args *args)
1948 {
1949 struct thread *tdt;
1950 cpuset_t *mask;
1951 size_t size;
1952 int error;
1953 id_t tid;
1954
1955 tdt = linux_tdfind(td, args->pid, -1);
1956 if (tdt == NULL)
1957 return (ESRCH);
1958 tid = tdt->td_tid;
1959 PROC_UNLOCK(tdt->td_proc);
1960
1961 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO);
1962 size = min(args->len, sizeof(cpuset_t));
1963 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
1964 tid, size, mask);
1965 if (error == ERANGE)
1966 error = EINVAL;
1967 if (error == 0)
1968 error = copyout(mask, args->user_mask_ptr, size);
1969 if (error == 0)
1970 td->td_retval[0] = size;
1971 free(mask, M_LINUX);
1972 return (error);
1973 }
1974
1975 /*
1976 * Set affinity of a process.
1977 */
1978 int
linux_sched_setaffinity(struct thread * td,struct linux_sched_setaffinity_args * args)1979 linux_sched_setaffinity(struct thread *td,
1980 struct linux_sched_setaffinity_args *args)
1981 {
1982 struct thread *tdt;
1983 cpuset_t *mask;
1984 int cpu, error;
1985 size_t len;
1986 id_t tid;
1987
1988 tdt = linux_tdfind(td, args->pid, -1);
1989 if (tdt == NULL)
1990 return (ESRCH);
1991 tid = tdt->td_tid;
1992 PROC_UNLOCK(tdt->td_proc);
1993
1994 len = min(args->len, sizeof(cpuset_t));
1995 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);
1996 error = copyin(args->user_mask_ptr, mask, len);
1997 if (error != 0)
1998 goto out;
1999 /* Linux ignore high bits */
2000 CPU_FOREACH_ISSET(cpu, mask)
2001 if (cpu > mp_maxid)
2002 CPU_CLR(cpu, mask);
2003
2004 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
2005 tid, mask);
2006 if (error == EDEADLK)
2007 error = EINVAL;
2008 out:
2009 free(mask, M_TEMP);
2010 return (error);
2011 }
2012
2013 struct linux_rlimit64 {
2014 uint64_t rlim_cur;
2015 uint64_t rlim_max;
2016 };
2017
2018 int
linux_prlimit64(struct thread * td,struct linux_prlimit64_args * args)2019 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
2020 {
2021 struct rlimit rlim, nrlim;
2022 struct linux_rlimit64 lrlim;
2023 struct proc *p;
2024 u_int which;
2025 int flags;
2026 int error;
2027
2028 if (args->new == NULL && args->old != NULL) {
2029 if (linux_get_dummy_limit(td, args->resource, &rlim)) {
2030 lrlim.rlim_cur = rlim.rlim_cur;
2031 lrlim.rlim_max = rlim.rlim_max;
2032 return (copyout(&lrlim, args->old, sizeof(lrlim)));
2033 }
2034 }
2035
2036 if (args->resource >= LINUX_RLIM_NLIMITS)
2037 return (EINVAL);
2038
2039 which = linux_to_bsd_resource[args->resource];
2040 if (which == -1)
2041 return (EINVAL);
2042
2043 if (args->new != NULL) {
2044 /*
2045 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
2046 * rlim is unsigned 64-bit. FreeBSD treats negative limits
2047 * as INFINITY so we do not need a conversion even.
2048 */
2049 error = copyin(args->new, &nrlim, sizeof(nrlim));
2050 if (error != 0)
2051 return (error);
2052 }
2053
2054 flags = PGET_HOLD | PGET_NOTWEXIT;
2055 if (args->new != NULL)
2056 flags |= PGET_CANDEBUG;
2057 else
2058 flags |= PGET_CANSEE;
2059 if (args->pid == 0) {
2060 p = td->td_proc;
2061 PHOLD(p);
2062 } else {
2063 error = pget(args->pid, flags, &p);
2064 if (error != 0)
2065 return (error);
2066 }
2067 if (args->old != NULL) {
2068 PROC_LOCK(p);
2069 lim_rlimit_proc(p, which, &rlim);
2070 PROC_UNLOCK(p);
2071 if (rlim.rlim_cur == RLIM_INFINITY)
2072 lrlim.rlim_cur = LINUX_RLIM_INFINITY;
2073 else
2074 lrlim.rlim_cur = rlim.rlim_cur;
2075 if (rlim.rlim_max == RLIM_INFINITY)
2076 lrlim.rlim_max = LINUX_RLIM_INFINITY;
2077 else
2078 lrlim.rlim_max = rlim.rlim_max;
2079 error = copyout(&lrlim, args->old, sizeof(lrlim));
2080 if (error != 0)
2081 goto out;
2082 }
2083
2084 if (args->new != NULL)
2085 error = kern_proc_setrlimit(td, p, which, &nrlim);
2086
2087 out:
2088 PRELE(p);
2089 return (error);
2090 }
2091
2092 int
linux_pselect6(struct thread * td,struct linux_pselect6_args * args)2093 linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
2094 {
2095 struct timespec ts, *tsp;
2096 int error;
2097
2098 if (args->tsp != NULL) {
2099 error = linux_get_timespec(&ts, args->tsp);
2100 if (error != 0)
2101 return (error);
2102 tsp = &ts;
2103 } else
2104 tsp = NULL;
2105
2106 error = linux_common_pselect6(td, args->nfds, args->readfds,
2107 args->writefds, args->exceptfds, tsp, args->sig);
2108
2109 if (args->tsp != NULL)
2110 linux_put_timespec(&ts, args->tsp);
2111 return (error);
2112 }
2113
2114 static int
linux_common_pselect6(struct thread * td,l_int nfds,l_fd_set * readfds,l_fd_set * writefds,l_fd_set * exceptfds,struct timespec * tsp,l_uintptr_t * sig)2115 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds,
2116 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp,
2117 l_uintptr_t *sig)
2118 {
2119 struct timeval utv, tv0, tv1, *tvp;
2120 struct l_pselect6arg lpse6;
2121 sigset_t *ssp;
2122 sigset_t ss;
2123 int error;
2124
2125 ssp = NULL;
2126 if (sig != NULL) {
2127 error = copyin(sig, &lpse6, sizeof(lpse6));
2128 if (error != 0)
2129 return (error);
2130 error = linux_copyin_sigset(td, PTRIN(lpse6.ss),
2131 lpse6.ss_len, &ss, &ssp);
2132 if (error != 0)
2133 return (error);
2134 } else
2135 ssp = NULL;
2136
2137 /*
2138 * Currently glibc changes nanosecond number to microsecond.
2139 * This mean losing precision but for now it is hardly seen.
2140 */
2141 if (tsp != NULL) {
2142 TIMESPEC_TO_TIMEVAL(&utv, tsp);
2143 if (itimerfix(&utv))
2144 return (EINVAL);
2145
2146 microtime(&tv0);
2147 tvp = &utv;
2148 } else
2149 tvp = NULL;
2150
2151 error = kern_pselect(td, nfds, readfds, writefds,
2152 exceptfds, tvp, ssp, LINUX_NFDBITS);
2153
2154 if (tsp != NULL) {
2155 /*
2156 * Compute how much time was left of the timeout,
2157 * by subtracting the current time and the time
2158 * before we started the call, and subtracting
2159 * that result from the user-supplied value.
2160 */
2161 microtime(&tv1);
2162 timevalsub(&tv1, &tv0);
2163 timevalsub(&utv, &tv1);
2164 if (utv.tv_sec < 0)
2165 timevalclear(&utv);
2166 TIMEVAL_TO_TIMESPEC(&utv, tsp);
2167 }
2168 return (error);
2169 }
2170
2171 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2172 int
linux_pselect6_time64(struct thread * td,struct linux_pselect6_time64_args * args)2173 linux_pselect6_time64(struct thread *td,
2174 struct linux_pselect6_time64_args *args)
2175 {
2176 struct timespec ts, *tsp;
2177 int error;
2178
2179 if (args->tsp != NULL) {
2180 error = linux_get_timespec64(&ts, args->tsp);
2181 if (error != 0)
2182 return (error);
2183 tsp = &ts;
2184 } else
2185 tsp = NULL;
2186
2187 error = linux_common_pselect6(td, args->nfds, args->readfds,
2188 args->writefds, args->exceptfds, tsp, args->sig);
2189
2190 if (args->tsp != NULL)
2191 linux_put_timespec64(&ts, args->tsp);
2192 return (error);
2193 }
2194 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
2195
2196 int
linux_ppoll(struct thread * td,struct linux_ppoll_args * args)2197 linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
2198 {
2199 struct timespec uts, *tsp;
2200 int error;
2201
2202 if (args->tsp != NULL) {
2203 error = linux_get_timespec(&uts, args->tsp);
2204 if (error != 0)
2205 return (error);
2206 tsp = &uts;
2207 } else
2208 tsp = NULL;
2209
2210 error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
2211 args->sset, args->ssize);
2212 if (error == 0 && args->tsp != NULL)
2213 error = linux_put_timespec(&uts, args->tsp);
2214 return (error);
2215 }
2216
2217 static int
linux_common_ppoll(struct thread * td,struct pollfd * fds,uint32_t nfds,struct timespec * tsp,l_sigset_t * sset,l_size_t ssize)2218 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
2219 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize)
2220 {
2221 struct timespec ts0, ts1;
2222 struct pollfd stackfds[32];
2223 struct pollfd *kfds;
2224 sigset_t *ssp;
2225 sigset_t ss;
2226 int error;
2227
2228 if (kern_poll_maxfds(nfds))
2229 return (EINVAL);
2230 if (sset != NULL) {
2231 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp);
2232 if (error != 0)
2233 return (error);
2234 } else
2235 ssp = NULL;
2236 if (tsp != NULL)
2237 nanotime(&ts0);
2238
2239 if (nfds > nitems(stackfds))
2240 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK);
2241 else
2242 kfds = stackfds;
2243 error = linux_pollin(td, kfds, fds, nfds);
2244 if (error != 0)
2245 goto out;
2246
2247 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp);
2248 if (error == 0)
2249 error = linux_pollout(td, kfds, fds, nfds);
2250
2251 if (error == 0 && tsp != NULL) {
2252 if (td->td_retval[0]) {
2253 nanotime(&ts1);
2254 timespecsub(&ts1, &ts0, &ts1);
2255 timespecsub(tsp, &ts1, tsp);
2256 if (tsp->tv_sec < 0)
2257 timespecclear(tsp);
2258 } else
2259 timespecclear(tsp);
2260 }
2261
2262 out:
2263 if (nfds > nitems(stackfds))
2264 free(kfds, M_TEMP);
2265 return (error);
2266 }
2267
2268 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2269 int
linux_ppoll_time64(struct thread * td,struct linux_ppoll_time64_args * args)2270 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args)
2271 {
2272 struct timespec uts, *tsp;
2273 int error;
2274
2275 if (args->tsp != NULL) {
2276 error = linux_get_timespec64(&uts, args->tsp);
2277 if (error != 0)
2278 return (error);
2279 tsp = &uts;
2280 } else
2281 tsp = NULL;
2282 error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
2283 args->sset, args->ssize);
2284 if (error == 0 && args->tsp != NULL)
2285 error = linux_put_timespec64(&uts, args->tsp);
2286 return (error);
2287 }
2288 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
2289
2290 static int
linux_pollin(struct thread * td,struct pollfd * fds,struct pollfd * ufds,u_int nfd)2291 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
2292 {
2293 int error;
2294 u_int i;
2295
2296 error = copyin(ufds, fds, nfd * sizeof(*fds));
2297 if (error != 0)
2298 return (error);
2299
2300 for (i = 0; i < nfd; i++) {
2301 if (fds->events != 0)
2302 linux_to_bsd_poll_events(td, fds->fd,
2303 fds->events, &fds->events);
2304 fds++;
2305 }
2306 return (0);
2307 }
2308
2309 static int
linux_pollout(struct thread * td,struct pollfd * fds,struct pollfd * ufds,u_int nfd)2310 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
2311 {
2312 int error = 0;
2313 u_int i, n = 0;
2314
2315 for (i = 0; i < nfd; i++) {
2316 if (fds->revents != 0) {
2317 bsd_to_linux_poll_events(fds->revents,
2318 &fds->revents);
2319 n++;
2320 }
2321 error = copyout(&fds->revents, &ufds->revents,
2322 sizeof(ufds->revents));
2323 if (error)
2324 return (error);
2325 fds++;
2326 ufds++;
2327 }
2328 td->td_retval[0] = n;
2329 return (0);
2330 }
2331
2332 static int
linux_sched_rr_get_interval_common(struct thread * td,pid_t pid,struct timespec * ts)2333 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid,
2334 struct timespec *ts)
2335 {
2336 struct thread *tdt;
2337 int error;
2338
2339 /*
2340 * According to man in case the invalid pid specified
2341 * EINVAL should be returned.
2342 */
2343 if (pid < 0)
2344 return (EINVAL);
2345
2346 tdt = linux_tdfind(td, pid, -1);
2347 if (tdt == NULL)
2348 return (ESRCH);
2349
2350 error = kern_sched_rr_get_interval_td(td, tdt, ts);
2351 PROC_UNLOCK(tdt->td_proc);
2352 return (error);
2353 }
2354
2355 int
linux_sched_rr_get_interval(struct thread * td,struct linux_sched_rr_get_interval_args * uap)2356 linux_sched_rr_get_interval(struct thread *td,
2357 struct linux_sched_rr_get_interval_args *uap)
2358 {
2359 struct timespec ts;
2360 int error;
2361
2362 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts);
2363 if (error != 0)
2364 return (error);
2365 return (linux_put_timespec(&ts, uap->interval));
2366 }
2367
2368 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2369 int
linux_sched_rr_get_interval_time64(struct thread * td,struct linux_sched_rr_get_interval_time64_args * uap)2370 linux_sched_rr_get_interval_time64(struct thread *td,
2371 struct linux_sched_rr_get_interval_time64_args *uap)
2372 {
2373 struct timespec ts;
2374 int error;
2375
2376 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts);
2377 if (error != 0)
2378 return (error);
2379 return (linux_put_timespec64(&ts, uap->interval));
2380 }
2381 #endif
2382
2383 /*
2384 * In case when the Linux thread is the initial thread in
2385 * the thread group thread id is equal to the process id.
2386 * Glibc depends on this magic (assert in pthread_getattr_np.c).
2387 */
2388 struct thread *
linux_tdfind(struct thread * td,lwpid_t tid,pid_t pid)2389 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
2390 {
2391 struct linux_emuldata *em;
2392 struct thread *tdt;
2393 struct proc *p;
2394
2395 tdt = NULL;
2396 if (tid == 0 || tid == td->td_tid) {
2397 if (pid != -1 && td->td_proc->p_pid != pid)
2398 return (NULL);
2399 PROC_LOCK(td->td_proc);
2400 return (td);
2401 } else if (tid > PID_MAX)
2402 return (tdfind(tid, pid));
2403
2404 /*
2405 * Initial thread where the tid equal to the pid.
2406 */
2407 p = pfind(tid);
2408 if (p != NULL) {
2409 if (SV_PROC_ABI(p) != SV_ABI_LINUX ||
2410 (pid != -1 && tid != pid)) {
2411 /*
2412 * p is not a Linuxulator process.
2413 */
2414 PROC_UNLOCK(p);
2415 return (NULL);
2416 }
2417 FOREACH_THREAD_IN_PROC(p, tdt) {
2418 em = em_find(tdt);
2419 if (tid == em->em_tid)
2420 return (tdt);
2421 }
2422 PROC_UNLOCK(p);
2423 }
2424 return (NULL);
2425 }
2426
2427 void
linux_to_bsd_waitopts(int options,int * bsdopts)2428 linux_to_bsd_waitopts(int options, int *bsdopts)
2429 {
2430
2431 if (options & LINUX_WNOHANG)
2432 *bsdopts |= WNOHANG;
2433 if (options & LINUX_WUNTRACED)
2434 *bsdopts |= WUNTRACED;
2435 if (options & LINUX_WEXITED)
2436 *bsdopts |= WEXITED;
2437 if (options & LINUX_WCONTINUED)
2438 *bsdopts |= WCONTINUED;
2439 if (options & LINUX_WNOWAIT)
2440 *bsdopts |= WNOWAIT;
2441
2442 if (options & __WCLONE)
2443 *bsdopts |= WLINUXCLONE;
2444 }
2445
2446 int
linux_getrandom(struct thread * td,struct linux_getrandom_args * args)2447 linux_getrandom(struct thread *td, struct linux_getrandom_args *args)
2448 {
2449 struct uio uio;
2450 struct iovec iov;
2451 int error;
2452
2453 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM))
2454 return (EINVAL);
2455 if (args->count > INT_MAX)
2456 args->count = INT_MAX;
2457
2458 iov.iov_base = args->buf;
2459 iov.iov_len = args->count;
2460
2461 uio.uio_iov = &iov;
2462 uio.uio_iovcnt = 1;
2463 uio.uio_resid = iov.iov_len;
2464 uio.uio_segflg = UIO_USERSPACE;
2465 uio.uio_rw = UIO_READ;
2466 uio.uio_td = td;
2467
2468 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK);
2469 if (error == 0)
2470 td->td_retval[0] = args->count - uio.uio_resid;
2471 return (error);
2472 }
2473
2474 int
linux_mincore(struct thread * td,struct linux_mincore_args * args)2475 linux_mincore(struct thread *td, struct linux_mincore_args *args)
2476 {
2477
2478 /* Needs to be page-aligned */
2479 if (args->start & PAGE_MASK)
2480 return (EINVAL);
2481 return (kern_mincore(td, args->start, args->len, args->vec));
2482 }
2483
2484 #define SYSLOG_TAG "<6>"
2485
2486 int
linux_syslog(struct thread * td,struct linux_syslog_args * args)2487 linux_syslog(struct thread *td, struct linux_syslog_args *args)
2488 {
2489 char buf[128], *src, *dst;
2490 u_int seq;
2491 int buflen, error;
2492
2493 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) {
2494 linux_msg(td, "syslog unsupported type 0x%x", args->type);
2495 return (EINVAL);
2496 }
2497
2498 if (args->len < 6) {
2499 td->td_retval[0] = 0;
2500 return (0);
2501 }
2502
2503 error = priv_check(td, PRIV_MSGBUF);
2504 if (error)
2505 return (error);
2506
2507 mtx_lock(&msgbuf_lock);
2508 msgbuf_peekbytes(msgbufp, NULL, 0, &seq);
2509 mtx_unlock(&msgbuf_lock);
2510
2511 dst = args->buf;
2512 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG));
2513 /* The -1 is to skip the trailing '\0'. */
2514 dst += sizeof(SYSLOG_TAG) - 1;
2515
2516 while (error == 0) {
2517 mtx_lock(&msgbuf_lock);
2518 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq);
2519 mtx_unlock(&msgbuf_lock);
2520
2521 if (buflen == 0)
2522 break;
2523
2524 for (src = buf; src < buf + buflen && error == 0; src++) {
2525 if (*src == '\0')
2526 continue;
2527
2528 if (dst >= args->buf + args->len)
2529 goto out;
2530
2531 error = copyout(src, dst, 1);
2532 dst++;
2533
2534 if (*src == '\n' && *(src + 1) != '<' &&
2535 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) {
2536 error = copyout(&SYSLOG_TAG,
2537 dst, sizeof(SYSLOG_TAG));
2538 dst += sizeof(SYSLOG_TAG) - 1;
2539 }
2540 }
2541 }
2542 out:
2543 td->td_retval[0] = dst - args->buf;
2544 return (error);
2545 }
2546
2547 int
linux_getcpu(struct thread * td,struct linux_getcpu_args * args)2548 linux_getcpu(struct thread *td, struct linux_getcpu_args *args)
2549 {
2550 int cpu, error, node;
2551
2552 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */
2553 error = 0;
2554 node = cpuid_to_pcpu[cpu]->pc_domain;
2555
2556 if (args->cpu != NULL)
2557 error = copyout(&cpu, args->cpu, sizeof(l_int));
2558 if (args->node != NULL)
2559 error = copyout(&node, args->node, sizeof(l_int));
2560 return (error);
2561 }
2562
2563 #if defined(__i386__) || defined(__amd64__)
2564 int
linux_poll(struct thread * td,struct linux_poll_args * args)2565 linux_poll(struct thread *td, struct linux_poll_args *args)
2566 {
2567 struct timespec ts, *tsp;
2568
2569 if (args->timeout != INFTIM) {
2570 if (args->timeout < 0)
2571 return (EINVAL);
2572 ts.tv_sec = args->timeout / 1000;
2573 ts.tv_nsec = (args->timeout % 1000) * 1000000;
2574 tsp = &ts;
2575 } else
2576 tsp = NULL;
2577
2578 return (linux_common_ppoll(td, args->fds, args->nfds,
2579 tsp, NULL, 0));
2580 }
2581 #endif /* __i386__ || __amd64__ */
2582
2583 int
linux_seccomp(struct thread * td,struct linux_seccomp_args * args)2584 linux_seccomp(struct thread *td, struct linux_seccomp_args *args)
2585 {
2586
2587 switch (args->op) {
2588 case LINUX_SECCOMP_GET_ACTION_AVAIL:
2589 return (EOPNOTSUPP);
2590 default:
2591 /*
2592 * Ignore unknown operations, just like Linux kernel built
2593 * without CONFIG_SECCOMP.
2594 */
2595 return (EINVAL);
2596 }
2597 }
2598
2599 /*
2600 * Custom version of exec_copyin_args(), to copy out argument and environment
2601 * strings from the old process address space into the temporary string buffer.
2602 * Based on freebsd32_exec_copyin_args.
2603 */
2604 static int
linux_exec_copyin_args(struct image_args * args,const char * fname,enum uio_seg segflg,l_uintptr_t * argv,l_uintptr_t * envv)2605 linux_exec_copyin_args(struct image_args *args, const char *fname,
2606 enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv)
2607 {
2608 char *argp, *envp;
2609 l_uintptr_t *ptr, arg;
2610 int error;
2611
2612 bzero(args, sizeof(*args));
2613 if (argv == NULL)
2614 return (EFAULT);
2615
2616 /*
2617 * Allocate demand-paged memory for the file name, argument, and
2618 * environment strings.
2619 */
2620 error = exec_alloc_args(args);
2621 if (error != 0)
2622 return (error);
2623
2624 /*
2625 * Copy the file name.
2626 */
2627 error = exec_args_add_fname(args, fname, segflg);
2628 if (error != 0)
2629 goto err_exit;
2630
2631 /*
2632 * extract arguments first
2633 */
2634 ptr = argv;
2635 for (;;) {
2636 error = copyin(ptr++, &arg, sizeof(arg));
2637 if (error)
2638 goto err_exit;
2639 if (arg == 0)
2640 break;
2641 argp = PTRIN(arg);
2642 error = exec_args_add_arg(args, argp, UIO_USERSPACE);
2643 if (error != 0)
2644 goto err_exit;
2645 }
2646
2647 /*
2648 * This comment is from Linux do_execveat_common:
2649 * When argv is empty, add an empty string ("") as argv[0] to
2650 * ensure confused userspace programs that start processing
2651 * from argv[1] won't end up walking envp.
2652 */
2653 if (args->argc == 0 &&
2654 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0))
2655 goto err_exit;
2656
2657 /*
2658 * extract environment strings
2659 */
2660 if (envv) {
2661 ptr = envv;
2662 for (;;) {
2663 error = copyin(ptr++, &arg, sizeof(arg));
2664 if (error)
2665 goto err_exit;
2666 if (arg == 0)
2667 break;
2668 envp = PTRIN(arg);
2669 error = exec_args_add_env(args, envp, UIO_USERSPACE);
2670 if (error != 0)
2671 goto err_exit;
2672 }
2673 }
2674
2675 return (0);
2676
2677 err_exit:
2678 exec_free_args(args);
2679 return (error);
2680 }
2681
2682 int
linux_execve(struct thread * td,struct linux_execve_args * args)2683 linux_execve(struct thread *td, struct linux_execve_args *args)
2684 {
2685 struct image_args eargs;
2686 int error;
2687
2688 LINUX_CTR(execve);
2689
2690 error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE,
2691 args->argp, args->envp);
2692 if (error == 0)
2693 error = linux_common_execve(td, &eargs);
2694 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
2695 return (error);
2696 }
2697
2698 static void
linux_up_rtprio_if(struct thread * td1,struct rtprio * rtp)2699 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp)
2700 {
2701 struct rtprio rtp2;
2702
2703 pri_to_rtp(td1, &rtp2);
2704 if (rtp2.type < rtp->type ||
2705 (rtp2.type == rtp->type &&
2706 rtp2.prio < rtp->prio)) {
2707 rtp->type = rtp2.type;
2708 rtp->prio = rtp2.prio;
2709 }
2710 }
2711
2712 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX
2713
2714 static int
linux_rtprio2ioprio(struct rtprio * rtp)2715 linux_rtprio2ioprio(struct rtprio *rtp)
2716 {
2717 int ioprio, prio;
2718
2719 switch (rtp->type) {
2720 case RTP_PRIO_IDLE:
2721 prio = RTP_PRIO_MIN;
2722 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio);
2723 break;
2724 case RTP_PRIO_NORMAL:
2725 prio = rtp->prio / LINUX_PRIO_DIVIDER;
2726 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio);
2727 break;
2728 case RTP_PRIO_REALTIME:
2729 prio = rtp->prio / LINUX_PRIO_DIVIDER;
2730 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio);
2731 break;
2732 default:
2733 prio = RTP_PRIO_MIN;
2734 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio);
2735 break;
2736 }
2737 return (ioprio);
2738 }
2739
2740 static int
linux_ioprio2rtprio(int ioprio,struct rtprio * rtp)2741 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp)
2742 {
2743
2744 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) {
2745 case LINUX_IOPRIO_CLASS_IDLE:
2746 rtp->prio = RTP_PRIO_MIN;
2747 rtp->type = RTP_PRIO_IDLE;
2748 break;
2749 case LINUX_IOPRIO_CLASS_BE:
2750 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER;
2751 rtp->type = RTP_PRIO_NORMAL;
2752 break;
2753 case LINUX_IOPRIO_CLASS_RT:
2754 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER;
2755 rtp->type = RTP_PRIO_REALTIME;
2756 break;
2757 default:
2758 return (EINVAL);
2759 }
2760 return (0);
2761 }
2762 #undef LINUX_PRIO_DIVIDER
2763
2764 int
linux_ioprio_get(struct thread * td,struct linux_ioprio_get_args * args)2765 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args)
2766 {
2767 struct thread *td1;
2768 struct rtprio rtp;
2769 struct pgrp *pg;
2770 struct proc *p;
2771 int error, found;
2772
2773 p = NULL;
2774 td1 = NULL;
2775 error = 0;
2776 found = 0;
2777 rtp.type = RTP_PRIO_IDLE;
2778 rtp.prio = RTP_PRIO_MAX;
2779 switch (args->which) {
2780 case LINUX_IOPRIO_WHO_PROCESS:
2781 if (args->who == 0) {
2782 td1 = td;
2783 p = td1->td_proc;
2784 PROC_LOCK(p);
2785 } else if (args->who > PID_MAX) {
2786 td1 = linux_tdfind(td, args->who, -1);
2787 if (td1 != NULL)
2788 p = td1->td_proc;
2789 } else
2790 p = pfind(args->who);
2791 if (p == NULL)
2792 return (ESRCH);
2793 if ((error = p_cansee(td, p))) {
2794 PROC_UNLOCK(p);
2795 break;
2796 }
2797 if (td1 != NULL) {
2798 pri_to_rtp(td1, &rtp);
2799 } else {
2800 FOREACH_THREAD_IN_PROC(p, td1) {
2801 linux_up_rtprio_if(td1, &rtp);
2802 }
2803 }
2804 found++;
2805 PROC_UNLOCK(p);
2806 break;
2807 case LINUX_IOPRIO_WHO_PGRP:
2808 sx_slock(&proctree_lock);
2809 if (args->who == 0) {
2810 pg = td->td_proc->p_pgrp;
2811 PGRP_LOCK(pg);
2812 } else {
2813 pg = pgfind(args->who);
2814 if (pg == NULL) {
2815 sx_sunlock(&proctree_lock);
2816 error = ESRCH;
2817 break;
2818 }
2819 }
2820 sx_sunlock(&proctree_lock);
2821 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
2822 PROC_LOCK(p);
2823 if (p->p_state == PRS_NORMAL &&
2824 p_cansee(td, p) == 0) {
2825 FOREACH_THREAD_IN_PROC(p, td1) {
2826 linux_up_rtprio_if(td1, &rtp);
2827 found++;
2828 }
2829 }
2830 PROC_UNLOCK(p);
2831 }
2832 PGRP_UNLOCK(pg);
2833 break;
2834 case LINUX_IOPRIO_WHO_USER:
2835 if (args->who == 0)
2836 args->who = td->td_ucred->cr_uid;
2837 sx_slock(&allproc_lock);
2838 FOREACH_PROC_IN_SYSTEM(p) {
2839 PROC_LOCK(p);
2840 if (p->p_state == PRS_NORMAL &&
2841 p->p_ucred->cr_uid == args->who &&
2842 p_cansee(td, p) == 0) {
2843 FOREACH_THREAD_IN_PROC(p, td1) {
2844 linux_up_rtprio_if(td1, &rtp);
2845 found++;
2846 }
2847 }
2848 PROC_UNLOCK(p);
2849 }
2850 sx_sunlock(&allproc_lock);
2851 break;
2852 default:
2853 error = EINVAL;
2854 break;
2855 }
2856 if (error == 0) {
2857 if (found != 0)
2858 td->td_retval[0] = linux_rtprio2ioprio(&rtp);
2859 else
2860 error = ESRCH;
2861 }
2862 return (error);
2863 }
2864
2865 int
linux_ioprio_set(struct thread * td,struct linux_ioprio_set_args * args)2866 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args)
2867 {
2868 struct thread *td1;
2869 struct rtprio rtp;
2870 struct pgrp *pg;
2871 struct proc *p;
2872 int error;
2873
2874 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0)
2875 return (error);
2876 /* Attempts to set high priorities (REALTIME) require su privileges. */
2877 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME &&
2878 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0)
2879 return (error);
2880
2881 p = NULL;
2882 td1 = NULL;
2883 switch (args->which) {
2884 case LINUX_IOPRIO_WHO_PROCESS:
2885 if (args->who == 0) {
2886 td1 = td;
2887 p = td1->td_proc;
2888 PROC_LOCK(p);
2889 } else if (args->who > PID_MAX) {
2890 td1 = linux_tdfind(td, args->who, -1);
2891 if (td1 != NULL)
2892 p = td1->td_proc;
2893 } else
2894 p = pfind(args->who);
2895 if (p == NULL)
2896 return (ESRCH);
2897 if ((error = p_cansched(td, p))) {
2898 PROC_UNLOCK(p);
2899 break;
2900 }
2901 if (td1 != NULL) {
2902 error = rtp_to_pri(&rtp, td1);
2903 } else {
2904 FOREACH_THREAD_IN_PROC(p, td1) {
2905 if ((error = rtp_to_pri(&rtp, td1)) != 0)
2906 break;
2907 }
2908 }
2909 PROC_UNLOCK(p);
2910 break;
2911 case LINUX_IOPRIO_WHO_PGRP:
2912 sx_slock(&proctree_lock);
2913 if (args->who == 0) {
2914 pg = td->td_proc->p_pgrp;
2915 PGRP_LOCK(pg);
2916 } else {
2917 pg = pgfind(args->who);
2918 if (pg == NULL) {
2919 sx_sunlock(&proctree_lock);
2920 error = ESRCH;
2921 break;
2922 }
2923 }
2924 sx_sunlock(&proctree_lock);
2925 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
2926 PROC_LOCK(p);
2927 if (p->p_state == PRS_NORMAL &&
2928 p_cansched(td, p) == 0) {
2929 FOREACH_THREAD_IN_PROC(p, td1) {
2930 if ((error = rtp_to_pri(&rtp, td1)) != 0)
2931 break;
2932 }
2933 }
2934 PROC_UNLOCK(p);
2935 if (error != 0)
2936 break;
2937 }
2938 PGRP_UNLOCK(pg);
2939 break;
2940 case LINUX_IOPRIO_WHO_USER:
2941 if (args->who == 0)
2942 args->who = td->td_ucred->cr_uid;
2943 sx_slock(&allproc_lock);
2944 FOREACH_PROC_IN_SYSTEM(p) {
2945 PROC_LOCK(p);
2946 if (p->p_state == PRS_NORMAL &&
2947 p->p_ucred->cr_uid == args->who &&
2948 p_cansched(td, p) == 0) {
2949 FOREACH_THREAD_IN_PROC(p, td1) {
2950 if ((error = rtp_to_pri(&rtp, td1)) != 0)
2951 break;
2952 }
2953 }
2954 PROC_UNLOCK(p);
2955 if (error != 0)
2956 break;
2957 }
2958 sx_sunlock(&allproc_lock);
2959 break;
2960 default:
2961 error = EINVAL;
2962 break;
2963 }
2964 return (error);
2965 }
2966