xref: /freebsd/sys/compat/linux/linux_file.c (revision 6419bb52)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1995 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/conf.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/mutex.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/stat.h>
49 #include <sys/sx.h>
50 #include <sys/syscallsubr.h>
51 #include <sys/sysproto.h>
52 #include <sys/tty.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 
56 #ifdef COMPAT_LINUX32
57 #include <compat/freebsd32/freebsd32_misc.h>
58 #include <machine/../linux32/linux.h>
59 #include <machine/../linux32/linux32_proto.h>
60 #else
61 #include <machine/../linux/linux.h>
62 #include <machine/../linux/linux_proto.h>
63 #endif
64 #include <compat/linux/linux_misc.h>
65 #include <compat/linux/linux_util.h>
66 #include <compat/linux/linux_file.h>
67 
68 static int	linux_common_open(struct thread *, int, char *, int, int);
69 static int	linux_getdents_error(struct thread *, int, int);
70 
71 #ifdef LINUX_LEGACY_SYSCALLS
72 int
73 linux_creat(struct thread *td, struct linux_creat_args *args)
74 {
75 	char *path;
76 	int error;
77 
78 	LCONVPATHEXIST(td, args->path, &path);
79 
80 	error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
81 	    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
82 	LFREEPATH(path);
83 	return (error);
84 }
85 #endif
86 
87 static int
88 linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode)
89 {
90 	struct proc *p = td->td_proc;
91 	struct file *fp;
92 	int fd;
93 	int bsd_flags, error;
94 
95 	bsd_flags = 0;
96 	switch (l_flags & LINUX_O_ACCMODE) {
97 	case LINUX_O_WRONLY:
98 		bsd_flags |= O_WRONLY;
99 		break;
100 	case LINUX_O_RDWR:
101 		bsd_flags |= O_RDWR;
102 		break;
103 	default:
104 		bsd_flags |= O_RDONLY;
105 	}
106 	if (l_flags & LINUX_O_NDELAY)
107 		bsd_flags |= O_NONBLOCK;
108 	if (l_flags & LINUX_O_APPEND)
109 		bsd_flags |= O_APPEND;
110 	if (l_flags & LINUX_O_SYNC)
111 		bsd_flags |= O_FSYNC;
112 	if (l_flags & LINUX_O_CLOEXEC)
113 		bsd_flags |= O_CLOEXEC;
114 	if (l_flags & LINUX_O_NONBLOCK)
115 		bsd_flags |= O_NONBLOCK;
116 	if (l_flags & LINUX_O_ASYNC)
117 		bsd_flags |= O_ASYNC;
118 	if (l_flags & LINUX_O_CREAT)
119 		bsd_flags |= O_CREAT;
120 	if (l_flags & LINUX_O_TRUNC)
121 		bsd_flags |= O_TRUNC;
122 	if (l_flags & LINUX_O_EXCL)
123 		bsd_flags |= O_EXCL;
124 	if (l_flags & LINUX_O_NOCTTY)
125 		bsd_flags |= O_NOCTTY;
126 	if (l_flags & LINUX_O_DIRECT)
127 		bsd_flags |= O_DIRECT;
128 	if (l_flags & LINUX_O_NOFOLLOW)
129 		bsd_flags |= O_NOFOLLOW;
130 	if (l_flags & LINUX_O_DIRECTORY)
131 		bsd_flags |= O_DIRECTORY;
132 	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */
133 
134 	error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode);
135 	if (error != 0) {
136 		if (error == EMLINK)
137 			error = ELOOP;
138 		goto done;
139 	}
140 	if (p->p_flag & P_CONTROLT)
141 		goto done;
142 	if (bsd_flags & O_NOCTTY)
143 		goto done;
144 
145 	/*
146 	 * XXX In between kern_openat() and fget(), another process
147 	 * having the same filedesc could use that fd without
148 	 * checking below.
149 	*/
150 	fd = td->td_retval[0];
151 	if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) {
152 		if (fp->f_type != DTYPE_VNODE) {
153 			fdrop(fp, td);
154 			goto done;
155 		}
156 		sx_slock(&proctree_lock);
157 		PROC_LOCK(p);
158 		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
159 			PROC_UNLOCK(p);
160 			sx_sunlock(&proctree_lock);
161 			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
162 			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
163 			    td->td_ucred, td);
164 		} else {
165 			PROC_UNLOCK(p);
166 			sx_sunlock(&proctree_lock);
167 		}
168 		fdrop(fp, td);
169 	}
170 
171 done:
172 	LFREEPATH(path);
173 	return (error);
174 }
175 
176 int
177 linux_openat(struct thread *td, struct linux_openat_args *args)
178 {
179 	char *path;
180 	int dfd;
181 
182 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
183 	if (args->flags & LINUX_O_CREAT)
184 		LCONVPATH_AT(td, args->filename, &path, 1, dfd);
185 	else
186 		LCONVPATH_AT(td, args->filename, &path, 0, dfd);
187 
188 	return (linux_common_open(td, dfd, path, args->flags, args->mode));
189 }
190 
191 #ifdef LINUX_LEGACY_SYSCALLS
192 int
193 linux_open(struct thread *td, struct linux_open_args *args)
194 {
195 	char *path;
196 
197 	if (args->flags & LINUX_O_CREAT)
198 		LCONVPATHCREAT(td, args->path, &path);
199 	else
200 		LCONVPATHEXIST(td, args->path, &path);
201 
202 	return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode));
203 }
204 #endif
205 
206 int
207 linux_lseek(struct thread *td, struct linux_lseek_args *args)
208 {
209 
210 	return (kern_lseek(td, args->fdes, args->off, args->whence));
211 }
212 
213 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
214 int
215 linux_llseek(struct thread *td, struct linux_llseek_args *args)
216 {
217 	int error;
218 	off_t off;
219 
220 	off = (args->olow) | (((off_t) args->ohigh) << 32);
221 
222 	error = kern_lseek(td, args->fd, off, args->whence);
223 	if (error != 0)
224 		return (error);
225 
226 	error = copyout(td->td_retval, args->res, sizeof(off_t));
227 	if (error != 0)
228 		return (error);
229 
230 	td->td_retval[0] = 0;
231 	return (0);
232 }
233 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
234 
235 /*
236  * Note that linux_getdents(2) and linux_getdents64(2) have the same
237  * arguments. They only differ in the definition of struct dirent they
238  * operate on.
239  * Note that linux_readdir(2) is a special case of linux_getdents(2)
240  * where count is always equals 1, meaning that the buffer is one
241  * dirent-structure in size and that the code can't handle more anyway.
242  * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
243  * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
244  * trash user stack.
245  */
246 
247 static int
248 linux_getdents_error(struct thread *td, int fd, int err)
249 {
250 	struct vnode *vp;
251 	struct file *fp;
252 	int error;
253 
254 	/* Linux return ENOTDIR in case when fd is not a directory. */
255 	error = getvnode(td, fd, &cap_read_rights, &fp);
256 	if (error != 0)
257 		return (error);
258 	vp = fp->f_vnode;
259 	if (vp->v_type != VDIR) {
260 		fdrop(fp, td);
261 		return (ENOTDIR);
262 	}
263 	fdrop(fp, td);
264 	return (err);
265 }
266 
267 struct l_dirent {
268 	l_ulong		d_ino;
269 	l_off_t		d_off;
270 	l_ushort	d_reclen;
271 	char		d_name[LINUX_NAME_MAX + 1];
272 };
273 
274 struct l_dirent64 {
275 	uint64_t	d_ino;
276 	int64_t		d_off;
277 	l_ushort	d_reclen;
278 	u_char		d_type;
279 	char		d_name[LINUX_NAME_MAX + 1];
280 };
281 
282 /*
283  * Linux uses the last byte in the dirent buffer to store d_type,
284  * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
285  */
286 #define LINUX_RECLEN(namlen)						\
287     roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
288 
289 #define LINUX_RECLEN64(namlen)						\
290     roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1,		\
291     sizeof(uint64_t))
292 
293 #ifdef LINUX_LEGACY_SYSCALLS
294 int
295 linux_getdents(struct thread *td, struct linux_getdents_args *args)
296 {
297 	struct dirent *bdp;
298 	caddr_t inp, buf;		/* BSD-format */
299 	int len, reclen;		/* BSD-format */
300 	caddr_t outp;			/* Linux-format */
301 	int resid, linuxreclen;		/* Linux-format */
302 	caddr_t lbuf;			/* Linux-format */
303 	off_t base;
304 	struct l_dirent *linux_dirent;
305 	int buflen, error;
306 	size_t retval;
307 
308 	buflen = min(args->count, MAXBSIZE);
309 	buf = malloc(buflen, M_TEMP, M_WAITOK);
310 
311 	error = kern_getdirentries(td, args->fd, buf, buflen,
312 	    &base, NULL, UIO_SYSSPACE);
313 	if (error != 0) {
314 		error = linux_getdents_error(td, args->fd, error);
315 		goto out1;
316 	}
317 
318 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
319 
320 	len = td->td_retval[0];
321 	inp = buf;
322 	outp = (caddr_t)args->dent;
323 	resid = args->count;
324 	retval = 0;
325 
326 	while (len > 0) {
327 		bdp = (struct dirent *) inp;
328 		reclen = bdp->d_reclen;
329 		linuxreclen = LINUX_RECLEN(bdp->d_namlen);
330 		/*
331 		 * No more space in the user supplied dirent buffer.
332 		 * Return EINVAL.
333 		 */
334 		if (resid < linuxreclen) {
335 			error = EINVAL;
336 			goto out;
337 		}
338 
339 		linux_dirent = (struct l_dirent*)lbuf;
340 		linux_dirent->d_ino = bdp->d_fileno;
341 		linux_dirent->d_off = base + reclen;
342 		linux_dirent->d_reclen = linuxreclen;
343 		/*
344 		 * Copy d_type to last byte of l_dirent buffer
345 		 */
346 		lbuf[linuxreclen - 1] = bdp->d_type;
347 		strlcpy(linux_dirent->d_name, bdp->d_name,
348 		    linuxreclen - offsetof(struct l_dirent, d_name)-1);
349 		error = copyout(linux_dirent, outp, linuxreclen);
350 		if (error != 0)
351 			goto out;
352 
353 		inp += reclen;
354 		base += reclen;
355 		len -= reclen;
356 
357 		retval += linuxreclen;
358 		outp += linuxreclen;
359 		resid -= linuxreclen;
360 	}
361 	td->td_retval[0] = retval;
362 
363 out:
364 	free(lbuf, M_TEMP);
365 out1:
366 	free(buf, M_TEMP);
367 	return (error);
368 }
369 #endif
370 
371 int
372 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
373 {
374 	struct dirent *bdp;
375 	caddr_t inp, buf;		/* BSD-format */
376 	int len, reclen;		/* BSD-format */
377 	caddr_t outp;			/* Linux-format */
378 	int resid, linuxreclen;		/* Linux-format */
379 	caddr_t lbuf;			/* Linux-format */
380 	off_t base;
381 	struct l_dirent64 *linux_dirent64;
382 	int buflen, error;
383 	size_t retval;
384 
385 	buflen = min(args->count, MAXBSIZE);
386 	buf = malloc(buflen, M_TEMP, M_WAITOK);
387 
388 	error = kern_getdirentries(td, args->fd, buf, buflen,
389 	    &base, NULL, UIO_SYSSPACE);
390 	if (error != 0) {
391 		error = linux_getdents_error(td, args->fd, error);
392 		goto out1;
393 	}
394 
395 	lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
396 
397 	len = td->td_retval[0];
398 	inp = buf;
399 	outp = (caddr_t)args->dirent;
400 	resid = args->count;
401 	retval = 0;
402 
403 	while (len > 0) {
404 		bdp = (struct dirent *) inp;
405 		reclen = bdp->d_reclen;
406 		linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
407 		/*
408 		 * No more space in the user supplied dirent buffer.
409 		 * Return EINVAL.
410 		 */
411 		if (resid < linuxreclen) {
412 			error = EINVAL;
413 			goto out;
414 		}
415 
416 		linux_dirent64 = (struct l_dirent64*)lbuf;
417 		linux_dirent64->d_ino = bdp->d_fileno;
418 		linux_dirent64->d_off = base + reclen;
419 		linux_dirent64->d_reclen = linuxreclen;
420 		linux_dirent64->d_type = bdp->d_type;
421 		strlcpy(linux_dirent64->d_name, bdp->d_name,
422 		    linuxreclen - offsetof(struct l_dirent64, d_name));
423 		error = copyout(linux_dirent64, outp, linuxreclen);
424 		if (error != 0)
425 			goto out;
426 
427 		inp += reclen;
428 		base += reclen;
429 		len -= reclen;
430 
431 		retval += linuxreclen;
432 		outp += linuxreclen;
433 		resid -= linuxreclen;
434 	}
435 	td->td_retval[0] = retval;
436 
437 out:
438 	free(lbuf, M_TEMP);
439 out1:
440 	free(buf, M_TEMP);
441 	return (error);
442 }
443 
444 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
445 int
446 linux_readdir(struct thread *td, struct linux_readdir_args *args)
447 {
448 	struct dirent *bdp;
449 	caddr_t buf;			/* BSD-format */
450 	int linuxreclen;		/* Linux-format */
451 	caddr_t lbuf;			/* Linux-format */
452 	off_t base;
453 	struct l_dirent *linux_dirent;
454 	int buflen, error;
455 
456 	buflen = LINUX_RECLEN(LINUX_NAME_MAX);
457 	buf = malloc(buflen, M_TEMP, M_WAITOK);
458 
459 	error = kern_getdirentries(td, args->fd, buf, buflen,
460 	    &base, NULL, UIO_SYSSPACE);
461 	if (error != 0) {
462 		error = linux_getdents_error(td, args->fd, error);
463 		goto out;
464 	}
465 	if (td->td_retval[0] == 0)
466 		goto out;
467 
468 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
469 
470 	bdp = (struct dirent *) buf;
471 	linuxreclen = LINUX_RECLEN(bdp->d_namlen);
472 
473 	linux_dirent = (struct l_dirent*)lbuf;
474 	linux_dirent->d_ino = bdp->d_fileno;
475 	linux_dirent->d_off = linuxreclen;
476 	linux_dirent->d_reclen = bdp->d_namlen;
477 	strlcpy(linux_dirent->d_name, bdp->d_name,
478 	    linuxreclen - offsetof(struct l_dirent, d_name));
479 	error = copyout(linux_dirent, args->dent, linuxreclen);
480 	if (error == 0)
481 		td->td_retval[0] = linuxreclen;
482 
483 	free(lbuf, M_TEMP);
484 out:
485 	free(buf, M_TEMP);
486 	return (error);
487 }
488 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
489 
490 
491 /*
492  * These exist mainly for hooks for doing /compat/linux translation.
493  */
494 
495 #ifdef LINUX_LEGACY_SYSCALLS
496 int
497 linux_access(struct thread *td, struct linux_access_args *args)
498 {
499 	char *path;
500 	int error;
501 
502 	/* Linux convention. */
503 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
504 		return (EINVAL);
505 
506 	LCONVPATHEXIST(td, args->path, &path);
507 
508 	error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
509 	    args->amode);
510 	LFREEPATH(path);
511 
512 	return (error);
513 }
514 #endif
515 
516 int
517 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
518 {
519 	char *path;
520 	int error, dfd;
521 
522 	/* Linux convention. */
523 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
524 		return (EINVAL);
525 
526 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
527 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
528 
529 	error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
530 	LFREEPATH(path);
531 
532 	return (error);
533 }
534 
535 #ifdef LINUX_LEGACY_SYSCALLS
536 int
537 linux_unlink(struct thread *td, struct linux_unlink_args *args)
538 {
539 	char *path;
540 	int error;
541 	struct stat st;
542 
543 	LCONVPATHEXIST(td, args->path, &path);
544 
545 	error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0);
546 	if (error == EPERM) {
547 		/* Introduce POSIX noncompliant behaviour of Linux */
548 		if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
549 		    NULL) == 0) {
550 			if (S_ISDIR(st.st_mode))
551 				error = EISDIR;
552 		}
553 	}
554 	LFREEPATH(path);
555 	return (error);
556 }
557 #endif
558 
559 int
560 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
561 {
562 	char *path;
563 	int error, dfd;
564 	struct stat st;
565 
566 	if (args->flag & ~LINUX_AT_REMOVEDIR)
567 		return (EINVAL);
568 
569 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
570 	LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
571 
572 	if (args->flag & LINUX_AT_REMOVEDIR)
573 		error = kern_frmdirat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0);
574 	else
575 		error = kern_funlinkat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0,
576 		    0);
577 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
578 		/* Introduce POSIX noncompliant behaviour of Linux */
579 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
580 		    UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
581 			error = EISDIR;
582 	}
583 	LFREEPATH(path);
584 	return (error);
585 }
586 int
587 linux_chdir(struct thread *td, struct linux_chdir_args *args)
588 {
589 	char *path;
590 	int error;
591 
592 	LCONVPATHEXIST(td, args->path, &path);
593 
594 	error = kern_chdir(td, path, UIO_SYSSPACE);
595 	LFREEPATH(path);
596 	return (error);
597 }
598 
599 #ifdef LINUX_LEGACY_SYSCALLS
600 int
601 linux_chmod(struct thread *td, struct linux_chmod_args *args)
602 {
603 	char *path;
604 	int error;
605 
606 	LCONVPATHEXIST(td, args->path, &path);
607 
608 	error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE,
609 	    args->mode, 0);
610 	LFREEPATH(path);
611 	return (error);
612 }
613 #endif
614 
615 int
616 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
617 {
618 	char *path;
619 	int error, dfd;
620 
621 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
622 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
623 
624 	error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
625 	LFREEPATH(path);
626 	return (error);
627 }
628 
629 #ifdef LINUX_LEGACY_SYSCALLS
630 int
631 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
632 {
633 	char *path;
634 	int error;
635 
636 	LCONVPATHCREAT(td, args->path, &path);
637 
638 	error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
639 	LFREEPATH(path);
640 	return (error);
641 }
642 #endif
643 
644 int
645 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
646 {
647 	char *path;
648 	int error, dfd;
649 
650 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
651 	LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
652 
653 	error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
654 	LFREEPATH(path);
655 	return (error);
656 }
657 
658 #ifdef LINUX_LEGACY_SYSCALLS
659 int
660 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
661 {
662 	char *path;
663 	int error;
664 
665 	LCONVPATHEXIST(td, args->path, &path);
666 
667 	error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0);
668 	LFREEPATH(path);
669 	return (error);
670 }
671 
672 int
673 linux_rename(struct thread *td, struct linux_rename_args *args)
674 {
675 	char *from, *to;
676 	int error;
677 
678 	LCONVPATHEXIST(td, args->from, &from);
679 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
680 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
681 	if (to == NULL) {
682 		LFREEPATH(from);
683 		return (error);
684 	}
685 
686 	error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
687 	LFREEPATH(from);
688 	LFREEPATH(to);
689 	return (error);
690 }
691 #endif
692 
693 int
694 linux_renameat(struct thread *td, struct linux_renameat_args *args)
695 {
696 	struct linux_renameat2_args renameat2_args = {
697 	    .olddfd = args->olddfd,
698 	    .oldname = args->oldname,
699 	    .newdfd = args->newdfd,
700 	    .newname = args->newname,
701 	    .flags = 0
702 	};
703 
704 	return (linux_renameat2(td, &renameat2_args));
705 }
706 
707 int
708 linux_renameat2(struct thread *td, struct linux_renameat2_args *args)
709 {
710 	char *from, *to;
711 	int error, olddfd, newdfd;
712 
713 	if (args->flags != 0) {
714 		if (args->flags & ~(LINUX_RENAME_EXCHANGE |
715 		    LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT))
716 			return (EINVAL);
717 		if (args->flags & LINUX_RENAME_EXCHANGE &&
718 		    args->flags & (LINUX_RENAME_NOREPLACE |
719 		    LINUX_RENAME_WHITEOUT))
720 			return (EINVAL);
721 		linux_msg(td, "renameat2 unsupported flags 0x%x",
722 		    args->flags);
723 		return (EINVAL);
724 	}
725 
726 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
727 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
728 	LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
729 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
730 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
731 	if (to == NULL) {
732 		LFREEPATH(from);
733 		return (error);
734 	}
735 
736 	error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
737 	LFREEPATH(from);
738 	LFREEPATH(to);
739 	return (error);
740 }
741 
742 #ifdef LINUX_LEGACY_SYSCALLS
743 int
744 linux_symlink(struct thread *td, struct linux_symlink_args *args)
745 {
746 	char *path, *to;
747 	int error;
748 
749 	LCONVPATHEXIST(td, args->path, &path);
750 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
751 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
752 	if (to == NULL) {
753 		LFREEPATH(path);
754 		return (error);
755 	}
756 
757 	error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
758 	LFREEPATH(path);
759 	LFREEPATH(to);
760 	return (error);
761 }
762 #endif
763 
764 int
765 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
766 {
767 	char *path, *to;
768 	int error, dfd;
769 
770 	dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
771 	LCONVPATHEXIST(td, args->oldname, &path);
772 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
773 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
774 	if (to == NULL) {
775 		LFREEPATH(path);
776 		return (error);
777 	}
778 
779 	error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
780 	LFREEPATH(path);
781 	LFREEPATH(to);
782 	return (error);
783 }
784 
785 #ifdef LINUX_LEGACY_SYSCALLS
786 int
787 linux_readlink(struct thread *td, struct linux_readlink_args *args)
788 {
789 	char *name;
790 	int error;
791 
792 	LCONVPATHEXIST(td, args->name, &name);
793 
794 	error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
795 	    args->buf, UIO_USERSPACE, args->count);
796 	LFREEPATH(name);
797 	return (error);
798 }
799 #endif
800 
801 int
802 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
803 {
804 	char *name;
805 	int error, dfd;
806 
807 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
808 	LCONVPATHEXIST_AT(td, args->path, &name, dfd);
809 
810 	error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
811 	    UIO_USERSPACE, args->bufsiz);
812 	LFREEPATH(name);
813 	return (error);
814 }
815 
816 int
817 linux_truncate(struct thread *td, struct linux_truncate_args *args)
818 {
819 	char *path;
820 	int error;
821 
822 	LCONVPATHEXIST(td, args->path, &path);
823 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
824 	LFREEPATH(path);
825 	return (error);
826 }
827 
828 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
829 int
830 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
831 {
832 	char *path;
833 	off_t length;
834 	int error;
835 
836 #if defined(__amd64__) && defined(COMPAT_LINUX32)
837 	length = PAIR32TO64(off_t, args->length);
838 #else
839 	length = args->length;
840 #endif
841 
842 	LCONVPATHEXIST(td, args->path, &path);
843 	error = kern_truncate(td, path, UIO_SYSSPACE, length);
844 	LFREEPATH(path);
845 	return (error);
846 }
847 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
848 
849 int
850 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
851 {
852 
853 	return (kern_ftruncate(td, args->fd, args->length));
854 }
855 
856 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
857 int
858 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
859 {
860 	off_t length;
861 
862 #if defined(__amd64__) && defined(COMPAT_LINUX32)
863 	length = PAIR32TO64(off_t, args->length);
864 #else
865 	length = args->length;
866 #endif
867 
868 	return (kern_ftruncate(td, args->fd, length));
869 }
870 #endif
871 
872 #ifdef LINUX_LEGACY_SYSCALLS
873 int
874 linux_link(struct thread *td, struct linux_link_args *args)
875 {
876 	char *path, *to;
877 	int error;
878 
879 	LCONVPATHEXIST(td, args->path, &path);
880 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
881 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
882 	if (to == NULL) {
883 		LFREEPATH(path);
884 		return (error);
885 	}
886 
887 	error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
888 	    FOLLOW);
889 	LFREEPATH(path);
890 	LFREEPATH(to);
891 	return (error);
892 }
893 #endif
894 
895 int
896 linux_linkat(struct thread *td, struct linux_linkat_args *args)
897 {
898 	char *path, *to;
899 	int error, olddfd, newdfd, follow;
900 
901 	if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW)
902 		return (EINVAL);
903 
904 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
905 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
906 	LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
907 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
908 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
909 	if (to == NULL) {
910 		LFREEPATH(path);
911 		return (error);
912 	}
913 
914 	follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW :
915 	    FOLLOW;
916 	error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow);
917 	LFREEPATH(path);
918 	LFREEPATH(to);
919 	return (error);
920 }
921 
922 int
923 linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap)
924 {
925 
926 	return (kern_fsync(td, uap->fd, false));
927 }
928 
929 int
930 linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap)
931 {
932 	off_t nbytes, offset;
933 
934 #if defined(__amd64__) && defined(COMPAT_LINUX32)
935 	nbytes = PAIR32TO64(off_t, uap->nbytes);
936 	offset = PAIR32TO64(off_t, uap->offset);
937 #else
938 	nbytes = uap->nbytes;
939 	offset = uap->offset;
940 #endif
941 
942 	if (offset < 0 || nbytes < 0 ||
943 	    (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE |
944 	    LINUX_SYNC_FILE_RANGE_WRITE |
945 	    LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) {
946 		return (EINVAL);
947 	}
948 
949 	return (kern_fsync(td, uap->fd, false));
950 }
951 
952 int
953 linux_pread(struct thread *td, struct linux_pread_args *uap)
954 {
955 	struct vnode *vp;
956 	off_t offset;
957 	int error;
958 
959 #if defined(__amd64__) && defined(COMPAT_LINUX32)
960 	offset = PAIR32TO64(off_t, uap->offset);
961 #else
962 	offset = uap->offset;
963 #endif
964 
965 	error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset);
966 	if (error == 0) {
967 		/* This seems to violate POSIX but Linux does it. */
968 		error = fgetvp(td, uap->fd, &cap_pread_rights, &vp);
969 		if (error != 0)
970 			return (error);
971 		if (vp->v_type == VDIR)
972 			error = EISDIR;
973 		vrele(vp);
974 	}
975 	return (error);
976 }
977 
978 int
979 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
980 {
981 	off_t offset;
982 
983 #if defined(__amd64__) && defined(COMPAT_LINUX32)
984 	offset = PAIR32TO64(off_t, uap->offset);
985 #else
986 	offset = uap->offset;
987 #endif
988 
989 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset));
990 }
991 
992 int
993 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
994 {
995 	struct uio *auio;
996 	int error;
997 	off_t offset;
998 
999 	/*
1000 	 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
1001 	 * pos_l and pos_h, respectively, contain the
1002 	 * low order and high order 32 bits of offset.
1003 	 */
1004 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1005 	    (sizeof(offset) * 4)) | uap->pos_l;
1006 	if (offset < 0)
1007 		return (EINVAL);
1008 #ifdef COMPAT_LINUX32
1009 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1010 #else
1011 	error = copyinuio(uap->vec, uap->vlen, &auio);
1012 #endif
1013 	if (error != 0)
1014 		return (error);
1015 	error = kern_preadv(td, uap->fd, auio, offset);
1016 	free(auio, M_IOV);
1017 	return (error);
1018 }
1019 
1020 int
1021 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
1022 {
1023 	struct uio *auio;
1024 	int error;
1025 	off_t offset;
1026 
1027 	/*
1028 	 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
1029 	 * pos_l and pos_h, respectively, contain the
1030 	 * low order and high order 32 bits of offset.
1031 	 */
1032 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1033 	    (sizeof(offset) * 4)) | uap->pos_l;
1034 	if (offset < 0)
1035 		return (EINVAL);
1036 #ifdef COMPAT_LINUX32
1037 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1038 #else
1039 	error = copyinuio(uap->vec, uap->vlen, &auio);
1040 #endif
1041 	if (error != 0)
1042 		return (error);
1043 	error = kern_pwritev(td, uap->fd, auio, offset);
1044 	free(auio, M_IOV);
1045 	return (error);
1046 }
1047 
1048 int
1049 linux_mount(struct thread *td, struct linux_mount_args *args)
1050 {
1051 	char fstypename[MFSNAMELEN];
1052 	char *mntonname, *mntfromname;
1053 	int error, fsflags;
1054 
1055 	mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1056 	mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1057 	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
1058 	    NULL);
1059 	if (error != 0)
1060 		goto out;
1061 	if (args->specialfile != NULL) {
1062 		error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
1063 		if (error != 0)
1064 			goto out;
1065 	} else {
1066 		mntfromname[0] = '\0';
1067 	}
1068 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
1069 	if (error != 0)
1070 		goto out;
1071 
1072 	if (strcmp(fstypename, "ext2") == 0) {
1073 		strcpy(fstypename, "ext2fs");
1074 	} else if (strcmp(fstypename, "proc") == 0) {
1075 		strcpy(fstypename, "linprocfs");
1076 	} else if (strcmp(fstypename, "vfat") == 0) {
1077 		strcpy(fstypename, "msdosfs");
1078 	}
1079 
1080 	fsflags = 0;
1081 
1082 	/*
1083 	 * Linux SYNC flag is not included; the closest equivalent
1084 	 * FreeBSD has is !ASYNC, which is our default.
1085 	 */
1086 	if (args->rwflag & LINUX_MS_RDONLY)
1087 		fsflags |= MNT_RDONLY;
1088 	if (args->rwflag & LINUX_MS_NOSUID)
1089 		fsflags |= MNT_NOSUID;
1090 	if (args->rwflag & LINUX_MS_NOEXEC)
1091 		fsflags |= MNT_NOEXEC;
1092 	if (args->rwflag & LINUX_MS_REMOUNT)
1093 		fsflags |= MNT_UPDATE;
1094 
1095 	error = kernel_vmount(fsflags,
1096 	    "fstype", fstypename,
1097 	    "fspath", mntonname,
1098 	    "from", mntfromname,
1099 	    NULL);
1100 out:
1101 	free(mntonname, M_TEMP);
1102 	free(mntfromname, M_TEMP);
1103 	return (error);
1104 }
1105 
1106 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1107 int
1108 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
1109 {
1110 
1111 	return (kern_unmount(td, args->path, 0));
1112 }
1113 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1114 
1115 #ifdef LINUX_LEGACY_SYSCALLS
1116 int
1117 linux_umount(struct thread *td, struct linux_umount_args *args)
1118 {
1119 	int flags;
1120 
1121 	flags = 0;
1122 	if ((args->flags & LINUX_MNT_FORCE) != 0) {
1123 		args->flags &= ~LINUX_MNT_FORCE;
1124 		flags |= MNT_FORCE;
1125 	}
1126 	if (args->flags != 0) {
1127 		linux_msg(td, "unsupported umount2 flags %#x", args->flags);
1128 		return (EINVAL);
1129 	}
1130 
1131 	return (kern_unmount(td, args->path, flags));
1132 }
1133 #endif
1134 
1135 /*
1136  * fcntl family of syscalls
1137  */
1138 
1139 struct l_flock {
1140 	l_short		l_type;
1141 	l_short		l_whence;
1142 	l_off_t		l_start;
1143 	l_off_t		l_len;
1144 	l_pid_t		l_pid;
1145 }
1146 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1147 __packed
1148 #endif
1149 ;
1150 
1151 static void
1152 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1153 {
1154 	switch (linux_flock->l_type) {
1155 	case LINUX_F_RDLCK:
1156 		bsd_flock->l_type = F_RDLCK;
1157 		break;
1158 	case LINUX_F_WRLCK:
1159 		bsd_flock->l_type = F_WRLCK;
1160 		break;
1161 	case LINUX_F_UNLCK:
1162 		bsd_flock->l_type = F_UNLCK;
1163 		break;
1164 	default:
1165 		bsd_flock->l_type = -1;
1166 		break;
1167 	}
1168 	bsd_flock->l_whence = linux_flock->l_whence;
1169 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1170 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1171 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1172 	bsd_flock->l_sysid = 0;
1173 }
1174 
1175 static void
1176 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1177 {
1178 	switch (bsd_flock->l_type) {
1179 	case F_RDLCK:
1180 		linux_flock->l_type = LINUX_F_RDLCK;
1181 		break;
1182 	case F_WRLCK:
1183 		linux_flock->l_type = LINUX_F_WRLCK;
1184 		break;
1185 	case F_UNLCK:
1186 		linux_flock->l_type = LINUX_F_UNLCK;
1187 		break;
1188 	}
1189 	linux_flock->l_whence = bsd_flock->l_whence;
1190 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1191 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1192 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1193 }
1194 
1195 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1196 struct l_flock64 {
1197 	l_short		l_type;
1198 	l_short		l_whence;
1199 	l_loff_t	l_start;
1200 	l_loff_t	l_len;
1201 	l_pid_t		l_pid;
1202 }
1203 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1204 __packed
1205 #endif
1206 ;
1207 
1208 static void
1209 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1210 {
1211 	switch (linux_flock->l_type) {
1212 	case LINUX_F_RDLCK:
1213 		bsd_flock->l_type = F_RDLCK;
1214 		break;
1215 	case LINUX_F_WRLCK:
1216 		bsd_flock->l_type = F_WRLCK;
1217 		break;
1218 	case LINUX_F_UNLCK:
1219 		bsd_flock->l_type = F_UNLCK;
1220 		break;
1221 	default:
1222 		bsd_flock->l_type = -1;
1223 		break;
1224 	}
1225 	bsd_flock->l_whence = linux_flock->l_whence;
1226 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1227 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1228 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1229 	bsd_flock->l_sysid = 0;
1230 }
1231 
1232 static void
1233 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1234 {
1235 	switch (bsd_flock->l_type) {
1236 	case F_RDLCK:
1237 		linux_flock->l_type = LINUX_F_RDLCK;
1238 		break;
1239 	case F_WRLCK:
1240 		linux_flock->l_type = LINUX_F_WRLCK;
1241 		break;
1242 	case F_UNLCK:
1243 		linux_flock->l_type = LINUX_F_UNLCK;
1244 		break;
1245 	}
1246 	linux_flock->l_whence = bsd_flock->l_whence;
1247 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1248 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1249 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1250 }
1251 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1252 
1253 static int
1254 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
1255 {
1256 	struct l_flock linux_flock;
1257 	struct flock bsd_flock;
1258 	struct file *fp;
1259 	long arg;
1260 	int error, result;
1261 
1262 	switch (args->cmd) {
1263 	case LINUX_F_DUPFD:
1264 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1265 
1266 	case LINUX_F_GETFD:
1267 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1268 
1269 	case LINUX_F_SETFD:
1270 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1271 
1272 	case LINUX_F_GETFL:
1273 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1274 		result = td->td_retval[0];
1275 		td->td_retval[0] = 0;
1276 		if (result & O_RDONLY)
1277 			td->td_retval[0] |= LINUX_O_RDONLY;
1278 		if (result & O_WRONLY)
1279 			td->td_retval[0] |= LINUX_O_WRONLY;
1280 		if (result & O_RDWR)
1281 			td->td_retval[0] |= LINUX_O_RDWR;
1282 		if (result & O_NDELAY)
1283 			td->td_retval[0] |= LINUX_O_NONBLOCK;
1284 		if (result & O_APPEND)
1285 			td->td_retval[0] |= LINUX_O_APPEND;
1286 		if (result & O_FSYNC)
1287 			td->td_retval[0] |= LINUX_O_SYNC;
1288 		if (result & O_ASYNC)
1289 			td->td_retval[0] |= LINUX_O_ASYNC;
1290 #ifdef LINUX_O_NOFOLLOW
1291 		if (result & O_NOFOLLOW)
1292 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1293 #endif
1294 #ifdef LINUX_O_DIRECT
1295 		if (result & O_DIRECT)
1296 			td->td_retval[0] |= LINUX_O_DIRECT;
1297 #endif
1298 		return (error);
1299 
1300 	case LINUX_F_SETFL:
1301 		arg = 0;
1302 		if (args->arg & LINUX_O_NDELAY)
1303 			arg |= O_NONBLOCK;
1304 		if (args->arg & LINUX_O_APPEND)
1305 			arg |= O_APPEND;
1306 		if (args->arg & LINUX_O_SYNC)
1307 			arg |= O_FSYNC;
1308 		if (args->arg & LINUX_O_ASYNC)
1309 			arg |= O_ASYNC;
1310 #ifdef LINUX_O_NOFOLLOW
1311 		if (args->arg & LINUX_O_NOFOLLOW)
1312 			arg |= O_NOFOLLOW;
1313 #endif
1314 #ifdef LINUX_O_DIRECT
1315 		if (args->arg & LINUX_O_DIRECT)
1316 			arg |= O_DIRECT;
1317 #endif
1318 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1319 
1320 	case LINUX_F_GETLK:
1321 		error = copyin((void *)args->arg, &linux_flock,
1322 		    sizeof(linux_flock));
1323 		if (error)
1324 			return (error);
1325 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1326 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1327 		if (error)
1328 			return (error);
1329 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1330 		return (copyout(&linux_flock, (void *)args->arg,
1331 		    sizeof(linux_flock)));
1332 
1333 	case LINUX_F_SETLK:
1334 		error = copyin((void *)args->arg, &linux_flock,
1335 		    sizeof(linux_flock));
1336 		if (error)
1337 			return (error);
1338 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1339 		return (kern_fcntl(td, args->fd, F_SETLK,
1340 		    (intptr_t)&bsd_flock));
1341 
1342 	case LINUX_F_SETLKW:
1343 		error = copyin((void *)args->arg, &linux_flock,
1344 		    sizeof(linux_flock));
1345 		if (error)
1346 			return (error);
1347 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1348 		return (kern_fcntl(td, args->fd, F_SETLKW,
1349 		     (intptr_t)&bsd_flock));
1350 
1351 	case LINUX_F_GETOWN:
1352 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1353 
1354 	case LINUX_F_SETOWN:
1355 		/*
1356 		 * XXX some Linux applications depend on F_SETOWN having no
1357 		 * significant effect for pipes (SIGIO is not delivered for
1358 		 * pipes under Linux-2.2.35 at least).
1359 		 */
1360 		error = fget(td, args->fd,
1361 		    &cap_fcntl_rights, &fp);
1362 		if (error)
1363 			return (error);
1364 		if (fp->f_type == DTYPE_PIPE) {
1365 			fdrop(fp, td);
1366 			return (EINVAL);
1367 		}
1368 		fdrop(fp, td);
1369 
1370 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1371 
1372 	case LINUX_F_DUPFD_CLOEXEC:
1373 		return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
1374 	default:
1375 		linux_msg(td, "unsupported fcntl cmd %d\n", args->cmd);
1376 		return (EINVAL);
1377 	}
1378 }
1379 
1380 int
1381 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1382 {
1383 
1384 	return (fcntl_common(td, args));
1385 }
1386 
1387 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1388 int
1389 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1390 {
1391 	struct l_flock64 linux_flock;
1392 	struct flock bsd_flock;
1393 	struct linux_fcntl_args fcntl_args;
1394 	int error;
1395 
1396 	switch (args->cmd) {
1397 	case LINUX_F_GETLK64:
1398 		error = copyin((void *)args->arg, &linux_flock,
1399 		    sizeof(linux_flock));
1400 		if (error)
1401 			return (error);
1402 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1403 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1404 		if (error)
1405 			return (error);
1406 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1407 		return (copyout(&linux_flock, (void *)args->arg,
1408 			    sizeof(linux_flock)));
1409 
1410 	case LINUX_F_SETLK64:
1411 		error = copyin((void *)args->arg, &linux_flock,
1412 		    sizeof(linux_flock));
1413 		if (error)
1414 			return (error);
1415 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1416 		return (kern_fcntl(td, args->fd, F_SETLK,
1417 		    (intptr_t)&bsd_flock));
1418 
1419 	case LINUX_F_SETLKW64:
1420 		error = copyin((void *)args->arg, &linux_flock,
1421 		    sizeof(linux_flock));
1422 		if (error)
1423 			return (error);
1424 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1425 		return (kern_fcntl(td, args->fd, F_SETLKW,
1426 		    (intptr_t)&bsd_flock));
1427 	}
1428 
1429 	fcntl_args.fd = args->fd;
1430 	fcntl_args.cmd = args->cmd;
1431 	fcntl_args.arg = args->arg;
1432 	return (fcntl_common(td, &fcntl_args));
1433 }
1434 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1435 
1436 #ifdef LINUX_LEGACY_SYSCALLS
1437 int
1438 linux_chown(struct thread *td, struct linux_chown_args *args)
1439 {
1440 	char *path;
1441 	int error;
1442 
1443 	LCONVPATHEXIST(td, args->path, &path);
1444 
1445 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1446 	    args->gid, 0);
1447 	LFREEPATH(path);
1448 	return (error);
1449 }
1450 #endif
1451 
1452 int
1453 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
1454 {
1455 	char *path;
1456 	int error, dfd, flag;
1457 
1458 	if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
1459 		return (EINVAL);
1460 
1461 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD :  args->dfd;
1462 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
1463 
1464 	flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
1465 	    AT_SYMLINK_NOFOLLOW;
1466 	error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
1467 	    flag);
1468 	LFREEPATH(path);
1469 	return (error);
1470 }
1471 
1472 #ifdef LINUX_LEGACY_SYSCALLS
1473 int
1474 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1475 {
1476 	char *path;
1477 	int error;
1478 
1479 	LCONVPATHEXIST(td, args->path, &path);
1480 
1481 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1482 	    args->gid, AT_SYMLINK_NOFOLLOW);
1483 	LFREEPATH(path);
1484 	return (error);
1485 }
1486 #endif
1487 
1488 static int
1489 convert_fadvice(int advice)
1490 {
1491 	switch (advice) {
1492 	case LINUX_POSIX_FADV_NORMAL:
1493 		return (POSIX_FADV_NORMAL);
1494 	case LINUX_POSIX_FADV_RANDOM:
1495 		return (POSIX_FADV_RANDOM);
1496 	case LINUX_POSIX_FADV_SEQUENTIAL:
1497 		return (POSIX_FADV_SEQUENTIAL);
1498 	case LINUX_POSIX_FADV_WILLNEED:
1499 		return (POSIX_FADV_WILLNEED);
1500 	case LINUX_POSIX_FADV_DONTNEED:
1501 		return (POSIX_FADV_DONTNEED);
1502 	case LINUX_POSIX_FADV_NOREUSE:
1503 		return (POSIX_FADV_NOREUSE);
1504 	default:
1505 		return (-1);
1506 	}
1507 }
1508 
1509 int
1510 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
1511 {
1512 	off_t offset;
1513 	int advice;
1514 
1515 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1516 	offset = PAIR32TO64(off_t, args->offset);
1517 #else
1518 	offset = args->offset;
1519 #endif
1520 
1521 	advice = convert_fadvice(args->advice);
1522 	if (advice == -1)
1523 		return (EINVAL);
1524 	return (kern_posix_fadvise(td, args->fd, offset, args->len, advice));
1525 }
1526 
1527 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1528 int
1529 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
1530 {
1531 	off_t len, offset;
1532 	int advice;
1533 
1534 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1535 	len = PAIR32TO64(off_t, args->len);
1536 	offset = PAIR32TO64(off_t, args->offset);
1537 #else
1538 	len = args->len;
1539 	offset = args->offset;
1540 #endif
1541 
1542 	advice = convert_fadvice(args->advice);
1543 	if (advice == -1)
1544 		return (EINVAL);
1545 	return (kern_posix_fadvise(td, args->fd, offset, len, advice));
1546 }
1547 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1548 
1549 #ifdef LINUX_LEGACY_SYSCALLS
1550 int
1551 linux_pipe(struct thread *td, struct linux_pipe_args *args)
1552 {
1553 	int fildes[2];
1554 	int error;
1555 
1556 	error = kern_pipe(td, fildes, 0, NULL, NULL);
1557 	if (error != 0)
1558 		return (error);
1559 
1560 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1561 	if (error != 0) {
1562 		(void)kern_close(td, fildes[0]);
1563 		(void)kern_close(td, fildes[1]);
1564 	}
1565 
1566 	return (error);
1567 }
1568 #endif
1569 
1570 int
1571 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
1572 {
1573 	int fildes[2];
1574 	int error, flags;
1575 
1576 	if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
1577 		return (EINVAL);
1578 
1579 	flags = 0;
1580 	if ((args->flags & LINUX_O_NONBLOCK) != 0)
1581 		flags |= O_NONBLOCK;
1582 	if ((args->flags & LINUX_O_CLOEXEC) != 0)
1583 		flags |= O_CLOEXEC;
1584 	error = kern_pipe(td, fildes, flags, NULL, NULL);
1585 	if (error != 0)
1586 		return (error);
1587 
1588 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1589 	if (error != 0) {
1590 		(void)kern_close(td, fildes[0]);
1591 		(void)kern_close(td, fildes[1]);
1592 	}
1593 
1594 	return (error);
1595 }
1596 
1597 int
1598 linux_dup3(struct thread *td, struct linux_dup3_args *args)
1599 {
1600 	int cmd;
1601 	intptr_t newfd;
1602 
1603 	if (args->oldfd == args->newfd)
1604 		return (EINVAL);
1605 	if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
1606 		return (EINVAL);
1607 	if (args->flags & LINUX_O_CLOEXEC)
1608 		cmd = F_DUP2FD_CLOEXEC;
1609 	else
1610 		cmd = F_DUP2FD;
1611 
1612 	newfd = args->newfd;
1613 	return (kern_fcntl(td, args->oldfd, cmd, newfd));
1614 }
1615 
1616 int
1617 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
1618 {
1619 	off_t len, offset;
1620 
1621 	/*
1622 	 * We emulate only posix_fallocate system call for which
1623 	 * mode should be 0.
1624 	 */
1625 	if (args->mode != 0)
1626 		return (ENOSYS);
1627 
1628 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1629 	len = PAIR32TO64(off_t, args->len);
1630 	offset = PAIR32TO64(off_t, args->offset);
1631 #else
1632 	len = args->len;
1633 	offset = args->offset;
1634 #endif
1635 
1636 	return (kern_posix_fallocate(td, args->fd, offset, len));
1637 }
1638 
1639 int
1640 linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args
1641     *args)
1642 {
1643 	l_loff_t inoff, outoff, *inoffp, *outoffp;
1644 	int error, flags;
1645 
1646 	/*
1647 	 * copy_file_range(2) on Linux doesn't define any flags (yet), so is
1648 	 * the native implementation.  Enforce it.
1649 	 */
1650 	if (args->flags != 0) {
1651 		linux_msg(td, "copy_file_range unsupported flags 0x%x",
1652 		    args->flags);
1653 		return (EINVAL);
1654 	}
1655 	flags = 0;
1656 	inoffp = outoffp = NULL;
1657 	if (args->off_in != NULL) {
1658 		error = copyin(args->off_in, &inoff, sizeof(l_loff_t));
1659 		if (error != 0)
1660 			return (error);
1661 		inoffp = &inoff;
1662 	}
1663 	if (args->off_out != NULL) {
1664 		error = copyin(args->off_out, &outoff, sizeof(l_loff_t));
1665 		if (error != 0)
1666 			return (error);
1667 		outoffp = &outoff;
1668 	}
1669 
1670 	error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out,
1671 	    outoffp, args->len, flags);
1672 	if (error == 0 && args->off_in != NULL)
1673 		error = copyout(inoffp, args->off_in, sizeof(l_loff_t));
1674 	if (error == 0 && args->off_out != NULL)
1675 		error = copyout(outoffp, args->off_out, sizeof(l_loff_t));
1676 	return (error);
1677 }
1678 
1679