1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23  *
24  * Extended attributes (xattr) on Solaris are implemented as files
25  * which exist in a hidden xattr directory.  These extended attributes
26  * can be accessed using the attropen() system call which opens
27  * the extended attribute.  It can then be manipulated just like
28  * a standard file descriptor.  This has a couple advantages such
29  * as practically no size limit on the file, and the extended
30  * attributes permissions may differ from those of the parent file.
31  * This interface is really quite clever, but it's also completely
32  * different than what is supported on Linux.  It also comes with a
33  * steep performance penalty when accessing small xattrs because they
34  * are not stored with the parent file.
35  *
36  * Under Linux extended attributes are manipulated by the system
37  * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
38  * extended attributes to be name/value pairs where the name is a
39  * NULL terminated string.  The name must also include one of the
40  * following namespace prefixes:
41  *
42  *   user     - No restrictions and is available to user applications.
43  *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44  *   system   - Used for access control lists (system.nfs4_acl, etc).
45  *   security - Used by SELinux to store a files security context.
46  *
47  * The value under Linux to limited to 65536 bytes of binary data.
48  * In practice, individual xattrs tend to be much smaller than this
49  * and are typically less than 100 bytes.  A good example of this
50  * are the security.selinux xattrs which are less than 100 bytes and
51  * exist for every file when xattr labeling is enabled.
52  *
53  * The Linux xattr implementation has been written to take advantage of
54  * this typical usage.  When the dataset property 'xattr=sa' is set,
55  * then xattrs will be preferentially stored as System Attributes (SA).
56  * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57  * up to 64k of xattrs to be stored in the spill block.  If additional
58  * xattr space is required, which is unlikely under Linux, they will
59  * be stored using the traditional directory approach.
60  *
61  * This optimization results in roughly a 3x performance improvement
62  * when accessing xattrs because it avoids the need to perform a seek
63  * for every xattr value.  When multiple xattrs are stored per-file
64  * the performance improvements are even greater because all of the
65  * xattrs stored in the spill block will be cached.
66  *
67  * However, by default SA based xattrs are disabled in the Linux port
68  * to maximize compatibility with other implementations.  If you do
69  * enable SA based xattrs then they will not be visible on platforms
70  * which do not support this feature.
71  *
72  * NOTE: One additional consequence of the xattr directory implementation
73  * is that when an extended attribute is manipulated an inode is created.
74  * This inode will exist in the Linux inode cache but there will be no
75  * associated entry in the dentry cache which references it.  This is
76  * safe but it may result in some confusion.  Enabling SA based xattrs
77  * largely avoids the issue except in the overflow case.
78  */
79 
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_vfsops.h>
82 #include <sys/zfs_vnops.h>
83 #include <sys/zap.h>
84 #include <sys/vfs.h>
85 #include <sys/zpl.h>
86 #include <linux/vfs_compat.h>
87 
88 enum xattr_permission {
89 	XAPERM_DENY,
90 	XAPERM_ALLOW,
91 	XAPERM_COMPAT,
92 };
93 
94 typedef struct xattr_filldir {
95 	size_t size;
96 	size_t offset;
97 	char *buf;
98 	struct dentry *dentry;
99 } xattr_filldir_t;
100 
101 static enum xattr_permission zpl_xattr_permission(xattr_filldir_t *,
102     const char *, int);
103 
104 static int zfs_xattr_compat = 0;
105 
106 /*
107  * Determine is a given xattr name should be visible and if so copy it
108  * in to the provided buffer (xf->buf).
109  */
110 static int
111 zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
112 {
113 	enum xattr_permission perm;
114 
115 	/* Check permissions using the per-namespace list xattr handler. */
116 	perm = zpl_xattr_permission(xf, name, name_len);
117 	if (perm == XAPERM_DENY)
118 		return (0);
119 
120 	/* Prefix the name with "user." if it does not have a namespace. */
121 	if (perm == XAPERM_COMPAT) {
122 		if (xf->buf) {
123 			if (xf->offset + XATTR_USER_PREFIX_LEN + 1 > xf->size)
124 				return (-ERANGE);
125 
126 			memcpy(xf->buf + xf->offset, XATTR_USER_PREFIX,
127 			    XATTR_USER_PREFIX_LEN);
128 			xf->buf[xf->offset + XATTR_USER_PREFIX_LEN] = '\0';
129 		}
130 
131 		xf->offset += XATTR_USER_PREFIX_LEN;
132 	}
133 
134 	/* When xf->buf is NULL only calculate the required size. */
135 	if (xf->buf) {
136 		if (xf->offset + name_len + 1 > xf->size)
137 			return (-ERANGE);
138 
139 		memcpy(xf->buf + xf->offset, name, name_len);
140 		xf->buf[xf->offset + name_len] = '\0';
141 	}
142 
143 	xf->offset += (name_len + 1);
144 
145 	return (0);
146 }
147 
148 /*
149  * Read as many directory entry names as will fit in to the provided buffer,
150  * or when no buffer is provided calculate the required buffer size.
151  */
152 static int
153 zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
154 {
155 	zap_cursor_t zc;
156 	zap_attribute_t	zap;
157 	int error;
158 
159 	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
160 
161 	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
162 
163 		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
164 			error = -ENXIO;
165 			break;
166 		}
167 
168 		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
169 		if (error)
170 			break;
171 
172 		zap_cursor_advance(&zc);
173 	}
174 
175 	zap_cursor_fini(&zc);
176 
177 	if (error == -ENOENT)
178 		error = 0;
179 
180 	return (error);
181 }
182 
183 static ssize_t
184 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
185 {
186 	struct inode *ip = xf->dentry->d_inode;
187 	struct inode *dxip = NULL;
188 	znode_t *dxzp;
189 	int error;
190 
191 	/* Lookup the xattr directory */
192 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
193 	    cr, NULL, NULL);
194 	if (error) {
195 		if (error == -ENOENT)
196 			error = 0;
197 
198 		return (error);
199 	}
200 
201 	dxip = ZTOI(dxzp);
202 	error = zpl_xattr_readdir(dxip, xf);
203 	iput(dxip);
204 
205 	return (error);
206 }
207 
208 static ssize_t
209 zpl_xattr_list_sa(xattr_filldir_t *xf)
210 {
211 	znode_t *zp = ITOZ(xf->dentry->d_inode);
212 	nvpair_t *nvp = NULL;
213 	int error = 0;
214 
215 	mutex_enter(&zp->z_lock);
216 	if (zp->z_xattr_cached == NULL)
217 		error = -zfs_sa_get_xattr(zp);
218 	mutex_exit(&zp->z_lock);
219 
220 	if (error)
221 		return (error);
222 
223 	ASSERT(zp->z_xattr_cached);
224 
225 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
226 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
227 
228 		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
229 		    strlen(nvpair_name(nvp)));
230 		if (error)
231 			return (error);
232 	}
233 
234 	return (0);
235 }
236 
237 ssize_t
238 zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
239 {
240 	znode_t *zp = ITOZ(dentry->d_inode);
241 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
242 	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
243 	cred_t *cr = CRED();
244 	fstrans_cookie_t cookie;
245 	int error = 0;
246 
247 	crhold(cr);
248 	cookie = spl_fstrans_mark();
249 	ZPL_ENTER(zfsvfs);
250 	ZPL_VERIFY_ZP(zp);
251 	rw_enter(&zp->z_xattr_lock, RW_READER);
252 
253 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
254 		error = zpl_xattr_list_sa(&xf);
255 		if (error)
256 			goto out;
257 	}
258 
259 	error = zpl_xattr_list_dir(&xf, cr);
260 	if (error)
261 		goto out;
262 
263 	error = xf.offset;
264 out:
265 
266 	rw_exit(&zp->z_xattr_lock);
267 	ZPL_EXIT(zfsvfs);
268 	spl_fstrans_unmark(cookie);
269 	crfree(cr);
270 
271 	return (error);
272 }
273 
274 static int
275 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
276     size_t size, cred_t *cr)
277 {
278 	fstrans_cookie_t cookie;
279 	struct inode *xip = NULL;
280 	znode_t *dxzp = NULL;
281 	znode_t *xzp = NULL;
282 	int error;
283 
284 	/* Lookup the xattr directory */
285 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
286 	    cr, NULL, NULL);
287 	if (error)
288 		goto out;
289 
290 	/* Lookup a specific xattr name in the directory */
291 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
292 	if (error)
293 		goto out;
294 
295 	xip = ZTOI(xzp);
296 	if (!size) {
297 		error = i_size_read(xip);
298 		goto out;
299 	}
300 
301 	if (size < i_size_read(xip)) {
302 		error = -ERANGE;
303 		goto out;
304 	}
305 
306 	struct iovec iov;
307 	iov.iov_base = (void *)value;
308 	iov.iov_len = size;
309 
310 	zfs_uio_t uio;
311 	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
312 
313 	cookie = spl_fstrans_mark();
314 	error = -zfs_read(ITOZ(xip), &uio, 0, cr);
315 	spl_fstrans_unmark(cookie);
316 
317 	if (error == 0)
318 		error = size - zfs_uio_resid(&uio);
319 out:
320 	if (xzp)
321 		zrele(xzp);
322 
323 	if (dxzp)
324 		zrele(dxzp);
325 
326 	return (error);
327 }
328 
329 static int
330 zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
331 {
332 	znode_t *zp = ITOZ(ip);
333 	uchar_t *nv_value;
334 	uint_t nv_size;
335 	int error = 0;
336 
337 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
338 
339 	mutex_enter(&zp->z_lock);
340 	if (zp->z_xattr_cached == NULL)
341 		error = -zfs_sa_get_xattr(zp);
342 	mutex_exit(&zp->z_lock);
343 
344 	if (error)
345 		return (error);
346 
347 	ASSERT(zp->z_xattr_cached);
348 	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
349 	    &nv_value, &nv_size);
350 	if (error)
351 		return (error);
352 
353 	if (size == 0 || value == NULL)
354 		return (nv_size);
355 
356 	if (size < nv_size)
357 		return (-ERANGE);
358 
359 	memcpy(value, nv_value, nv_size);
360 
361 	return (nv_size);
362 }
363 
364 static int
365 __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
366     cred_t *cr)
367 {
368 	znode_t *zp = ITOZ(ip);
369 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
370 	int error;
371 
372 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
373 
374 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
375 		error = zpl_xattr_get_sa(ip, name, value, size);
376 		if (error != -ENOENT)
377 			goto out;
378 	}
379 
380 	error = zpl_xattr_get_dir(ip, name, value, size, cr);
381 out:
382 	if (error == -ENOENT)
383 		error = -ENODATA;
384 
385 	return (error);
386 }
387 
388 #define	XATTR_NOENT	0x0
389 #define	XATTR_IN_SA	0x1
390 #define	XATTR_IN_DIR	0x2
391 /* check where the xattr resides */
392 static int
393 __zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
394 {
395 	znode_t *zp = ITOZ(ip);
396 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
397 	int error;
398 
399 	ASSERT(where);
400 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
401 
402 	*where = XATTR_NOENT;
403 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
404 		error = zpl_xattr_get_sa(ip, name, NULL, 0);
405 		if (error >= 0)
406 			*where |= XATTR_IN_SA;
407 		else if (error != -ENOENT)
408 			return (error);
409 	}
410 
411 	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
412 	if (error >= 0)
413 		*where |= XATTR_IN_DIR;
414 	else if (error != -ENOENT)
415 		return (error);
416 
417 	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
418 		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
419 		    " in both SA and dir", ip, name);
420 	if (*where == XATTR_NOENT)
421 		error = -ENODATA;
422 	else
423 		error = 0;
424 	return (error);
425 }
426 
427 static int
428 zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
429 {
430 	znode_t *zp = ITOZ(ip);
431 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
432 	cred_t *cr = CRED();
433 	fstrans_cookie_t cookie;
434 	int error;
435 
436 	crhold(cr);
437 	cookie = spl_fstrans_mark();
438 	ZPL_ENTER(zfsvfs);
439 	ZPL_VERIFY_ZP(zp);
440 	rw_enter(&zp->z_xattr_lock, RW_READER);
441 	error = __zpl_xattr_get(ip, name, value, size, cr);
442 	rw_exit(&zp->z_xattr_lock);
443 	ZPL_EXIT(zfsvfs);
444 	spl_fstrans_unmark(cookie);
445 	crfree(cr);
446 
447 	return (error);
448 }
449 
450 static int
451 zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
452     size_t size, int flags, cred_t *cr)
453 {
454 	znode_t *dxzp = NULL;
455 	znode_t *xzp = NULL;
456 	vattr_t *vap = NULL;
457 	int lookup_flags, error;
458 	const int xattr_mode = S_IFREG | 0644;
459 	loff_t pos = 0;
460 
461 	/*
462 	 * Lookup the xattr directory.  When we're adding an entry pass
463 	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
464 	 * When removing an entry this flag is not passed to avoid
465 	 * unnecessarily creating a new xattr directory.
466 	 */
467 	lookup_flags = LOOKUP_XATTR;
468 	if (value != NULL)
469 		lookup_flags |= CREATE_XATTR_DIR;
470 
471 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,
472 	    cr, NULL, NULL);
473 	if (error)
474 		goto out;
475 
476 	/* Lookup a specific xattr name in the directory */
477 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
478 	if (error && (error != -ENOENT))
479 		goto out;
480 
481 	error = 0;
482 
483 	/* Remove a specific name xattr when value is set to NULL. */
484 	if (value == NULL) {
485 		if (xzp)
486 			error = -zfs_remove(dxzp, (char *)name, cr, 0);
487 
488 		goto out;
489 	}
490 
491 	/* Lookup failed create a new xattr. */
492 	if (xzp == NULL) {
493 		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
494 		vap->va_mode = xattr_mode;
495 		vap->va_mask = ATTR_MODE;
496 		vap->va_uid = crgetuid(cr);
497 		vap->va_gid = crgetgid(cr);
498 
499 		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
500 		    cr, 0, NULL);
501 		if (error)
502 			goto out;
503 	}
504 
505 	ASSERT(xzp != NULL);
506 
507 	error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);
508 	if (error)
509 		goto out;
510 
511 	error = -zfs_write_simple(xzp, value, size, pos, NULL);
512 out:
513 	if (error == 0) {
514 		ip->i_ctime = current_time(ip);
515 		zfs_mark_inode_dirty(ip);
516 	}
517 
518 	if (vap)
519 		kmem_free(vap, sizeof (vattr_t));
520 
521 	if (xzp)
522 		zrele(xzp);
523 
524 	if (dxzp)
525 		zrele(dxzp);
526 
527 	if (error == -ENOENT)
528 		error = -ENODATA;
529 
530 	ASSERT3S(error, <=, 0);
531 
532 	return (error);
533 }
534 
535 static int
536 zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
537     size_t size, int flags, cred_t *cr)
538 {
539 	znode_t *zp = ITOZ(ip);
540 	nvlist_t *nvl;
541 	size_t sa_size;
542 	int error = 0;
543 
544 	mutex_enter(&zp->z_lock);
545 	if (zp->z_xattr_cached == NULL)
546 		error = -zfs_sa_get_xattr(zp);
547 	mutex_exit(&zp->z_lock);
548 
549 	if (error)
550 		return (error);
551 
552 	ASSERT(zp->z_xattr_cached);
553 	nvl = zp->z_xattr_cached;
554 
555 	if (value == NULL) {
556 		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
557 		if (error == -ENOENT)
558 			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
559 	} else {
560 		/* Limited to 32k to keep nvpair memory allocations small */
561 		if (size > DXATTR_MAX_ENTRY_SIZE)
562 			return (-EFBIG);
563 
564 		/* Prevent the DXATTR SA from consuming the entire SA region */
565 		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
566 		if (error)
567 			return (error);
568 
569 		if (sa_size > DXATTR_MAX_SA_SIZE)
570 			return (-EFBIG);
571 
572 		error = -nvlist_add_byte_array(nvl, name,
573 		    (uchar_t *)value, size);
574 	}
575 
576 	/*
577 	 * Update the SA for additions, modifications, and removals. On
578 	 * error drop the inconsistent cached version of the nvlist, it
579 	 * will be reconstructed from the ARC when next accessed.
580 	 */
581 	if (error == 0)
582 		error = -zfs_sa_set_xattr(zp, name, value, size);
583 
584 	if (error) {
585 		nvlist_free(nvl);
586 		zp->z_xattr_cached = NULL;
587 	}
588 
589 	ASSERT3S(error, <=, 0);
590 
591 	return (error);
592 }
593 
594 static int
595 zpl_xattr_set(struct inode *ip, const char *name, const void *value,
596     size_t size, int flags)
597 {
598 	znode_t *zp = ITOZ(ip);
599 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
600 	cred_t *cr = CRED();
601 	fstrans_cookie_t cookie;
602 	int where;
603 	int error;
604 
605 	crhold(cr);
606 	cookie = spl_fstrans_mark();
607 	ZPL_ENTER(zfsvfs);
608 	ZPL_VERIFY_ZP(zp);
609 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
610 
611 	/*
612 	 * Before setting the xattr check to see if it already exists.
613 	 * This is done to ensure the following optional flags are honored.
614 	 *
615 	 *   XATTR_CREATE: fail if xattr already exists
616 	 *   XATTR_REPLACE: fail if xattr does not exist
617 	 *
618 	 * We also want to know if it resides in sa or dir, so we can make
619 	 * sure we don't end up with duplicate in both places.
620 	 */
621 	error = __zpl_xattr_where(ip, name, &where, cr);
622 	if (error < 0) {
623 		if (error != -ENODATA)
624 			goto out;
625 		if (flags & XATTR_REPLACE)
626 			goto out;
627 
628 		/* The xattr to be removed already doesn't exist */
629 		error = 0;
630 		if (value == NULL)
631 			goto out;
632 	} else {
633 		error = -EEXIST;
634 		if (flags & XATTR_CREATE)
635 			goto out;
636 	}
637 
638 	/* Preferentially store the xattr as a SA for better performance */
639 	if (zfsvfs->z_use_sa && zp->z_is_sa &&
640 	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
641 		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
642 		if (error == 0) {
643 			/*
644 			 * Successfully put into SA, we need to clear the one
645 			 * in dir.
646 			 */
647 			if (where & XATTR_IN_DIR)
648 				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
649 			goto out;
650 		}
651 	}
652 
653 	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
654 	/*
655 	 * Successfully put into dir, we need to clear the one in SA.
656 	 */
657 	if (error == 0 && (where & XATTR_IN_SA))
658 		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
659 out:
660 	rw_exit(&zp->z_xattr_lock);
661 	ZPL_EXIT(zfsvfs);
662 	spl_fstrans_unmark(cookie);
663 	crfree(cr);
664 	ASSERT3S(error, <=, 0);
665 
666 	return (error);
667 }
668 
669 /*
670  * Extended user attributes
671  *
672  * "Extended user attributes may be assigned to files and directories for
673  * storing arbitrary additional information such as the mime type,
674  * character set or encoding of a file.  The access permissions for user
675  * attributes are defined by the file permission bits: read permission
676  * is required to retrieve the attribute value, and writer permission is
677  * required to change it.
678  *
679  * The file permission bits of regular files and directories are
680  * interpreted differently from the file permission bits of special
681  * files and symbolic links.  For regular files and directories the file
682  * permission bits define access to the file's contents, while for
683  * device special files they define access to the device described by
684  * the special file.  The file permissions of symbolic links are not
685  * used in access checks.  These differences would allow users to
686  * consume filesystem resources in a way not controllable by disk quotas
687  * for group or world writable special files and directories.
688  *
689  * For this reason, extended user attributes are allowed only for
690  * regular files and directories, and access to extended user attributes
691  * is restricted to the owner and to users with appropriate capabilities
692  * for directories with the sticky bit set (see the chmod(1) manual page
693  * for an explanation of the sticky bit)." - xattr(7)
694  *
695  * ZFS allows extended user attributes to be disabled administratively
696  * by setting the 'xattr=off' property on the dataset.
697  */
698 static int
699 __zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
700     const char *name, size_t name_len)
701 {
702 	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
703 }
704 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
705 
706 static int
707 __zpl_xattr_user_get(struct inode *ip, const char *name,
708     void *value, size_t size)
709 {
710 	int error;
711 	/* xattr_resolve_name will do this for us if this is defined */
712 #ifndef HAVE_XATTR_HANDLER_NAME
713 	if (strcmp(name, "") == 0)
714 		return (-EINVAL);
715 #endif
716 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
717 		return (-EINVAL);
718 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
719 		return (-EOPNOTSUPP);
720 
721 	/*
722 	 * Try to look up the name with the namespace prefix first for
723 	 * compatibility with xattrs from this platform.  If that fails,
724 	 * try again without the namespace prefix for compatibility with
725 	 * other platforms.
726 	 */
727 	char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
728 	error = zpl_xattr_get(ip, xattr_name, value, size);
729 	kmem_strfree(xattr_name);
730 	if (error == -ENODATA)
731 		error = zpl_xattr_get(ip, name, value, size);
732 
733 	return (error);
734 }
735 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
736 
737 static int
738 __zpl_xattr_user_set(struct inode *ip, const char *name,
739     const void *value, size_t size, int flags)
740 {
741 	int error = 0;
742 	/* xattr_resolve_name will do this for us if this is defined */
743 #ifndef HAVE_XATTR_HANDLER_NAME
744 	if (strcmp(name, "") == 0)
745 		return (-EINVAL);
746 #endif
747 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
748 		return (-EINVAL);
749 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
750 		return (-EOPNOTSUPP);
751 
752 	/*
753 	 * Remove alternate compat version of the xattr so we only set the
754 	 * version specified by the zfs_xattr_compat tunable.
755 	 *
756 	 * The following flags must be handled correctly:
757 	 *
758 	 *   XATTR_CREATE: fail if xattr already exists
759 	 *   XATTR_REPLACE: fail if xattr does not exist
760 	 */
761 	char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
762 	const char *clear_name, *set_name;
763 	if (zfs_xattr_compat) {
764 		clear_name = prefixed_name;
765 		set_name = name;
766 	} else {
767 		clear_name = name;
768 		set_name = prefixed_name;
769 	}
770 	/*
771 	 * Clear the old value with the alternative name format, if it exists.
772 	 */
773 	error = zpl_xattr_set(ip, clear_name, NULL, 0, flags);
774 	/*
775 	 * XATTR_CREATE was specified and we failed to clear the xattr
776 	 * because it already exists.  Stop here.
777 	 */
778 	if (error == -EEXIST)
779 		goto out;
780 	/*
781 	 * If XATTR_REPLACE was specified and we succeeded to clear
782 	 * an xattr, we don't need to replace anything when setting
783 	 * the new value.  If we failed with -ENODATA that's fine,
784 	 * there was nothing to be cleared and we can ignore the error.
785 	 */
786 	if (error == 0)
787 		flags &= ~XATTR_REPLACE;
788 	/*
789 	 * Set the new value with the configured name format.
790 	 */
791 	error = zpl_xattr_set(ip, set_name, value, size, flags);
792 out:
793 	kmem_strfree(prefixed_name);
794 	return (error);
795 }
796 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
797 
798 static xattr_handler_t zpl_xattr_user_handler =
799 {
800 	.prefix	= XATTR_USER_PREFIX,
801 	.list	= zpl_xattr_user_list,
802 	.get	= zpl_xattr_user_get,
803 	.set	= zpl_xattr_user_set,
804 };
805 
806 /*
807  * Trusted extended attributes
808  *
809  * "Trusted extended attributes are visible and accessible only to
810  * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
811  * class are used to implement mechanisms in user space (i.e., outside
812  * the kernel) which keep information in extended attributes to which
813  * ordinary processes should not have access." - xattr(7)
814  */
815 static int
816 __zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
817     const char *name, size_t name_len)
818 {
819 	return (capable(CAP_SYS_ADMIN));
820 }
821 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
822 
823 static int
824 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
825     void *value, size_t size)
826 {
827 	char *xattr_name;
828 	int error;
829 
830 	if (!capable(CAP_SYS_ADMIN))
831 		return (-EACCES);
832 	/* xattr_resolve_name will do this for us if this is defined */
833 #ifndef HAVE_XATTR_HANDLER_NAME
834 	if (strcmp(name, "") == 0)
835 		return (-EINVAL);
836 #endif
837 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
838 	error = zpl_xattr_get(ip, xattr_name, value, size);
839 	kmem_strfree(xattr_name);
840 
841 	return (error);
842 }
843 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
844 
845 static int
846 __zpl_xattr_trusted_set(struct inode *ip, const char *name,
847     const void *value, size_t size, int flags)
848 {
849 	char *xattr_name;
850 	int error;
851 
852 	if (!capable(CAP_SYS_ADMIN))
853 		return (-EACCES);
854 	/* xattr_resolve_name will do this for us if this is defined */
855 #ifndef HAVE_XATTR_HANDLER_NAME
856 	if (strcmp(name, "") == 0)
857 		return (-EINVAL);
858 #endif
859 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
860 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
861 	kmem_strfree(xattr_name);
862 
863 	return (error);
864 }
865 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
866 
867 static xattr_handler_t zpl_xattr_trusted_handler = {
868 	.prefix	= XATTR_TRUSTED_PREFIX,
869 	.list	= zpl_xattr_trusted_list,
870 	.get	= zpl_xattr_trusted_get,
871 	.set	= zpl_xattr_trusted_set,
872 };
873 
874 /*
875  * Extended security attributes
876  *
877  * "The security attribute namespace is used by kernel security modules,
878  * such as Security Enhanced Linux, and also to implement file
879  * capabilities (see capabilities(7)).  Read and write access
880  * permissions to security attributes depend on the policy implemented
881  * for each security attribute by the security module.  When no security
882  * module is loaded, all processes have read access to extended security
883  * attributes, and write access is limited to processes that have the
884  * CAP_SYS_ADMIN capability." - xattr(7)
885  */
886 static int
887 __zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
888     const char *name, size_t name_len)
889 {
890 	return (1);
891 }
892 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
893 
894 static int
895 __zpl_xattr_security_get(struct inode *ip, const char *name,
896     void *value, size_t size)
897 {
898 	char *xattr_name;
899 	int error;
900 	/* xattr_resolve_name will do this for us if this is defined */
901 #ifndef HAVE_XATTR_HANDLER_NAME
902 	if (strcmp(name, "") == 0)
903 		return (-EINVAL);
904 #endif
905 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
906 	error = zpl_xattr_get(ip, xattr_name, value, size);
907 	kmem_strfree(xattr_name);
908 
909 	return (error);
910 }
911 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
912 
913 static int
914 __zpl_xattr_security_set(struct inode *ip, const char *name,
915     const void *value, size_t size, int flags)
916 {
917 	char *xattr_name;
918 	int error;
919 	/* xattr_resolve_name will do this for us if this is defined */
920 #ifndef HAVE_XATTR_HANDLER_NAME
921 	if (strcmp(name, "") == 0)
922 		return (-EINVAL);
923 #endif
924 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
925 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
926 	kmem_strfree(xattr_name);
927 
928 	return (error);
929 }
930 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
931 
932 static int
933 zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
934     void *fs_info)
935 {
936 	const struct xattr *xattr;
937 	int error = 0;
938 
939 	for (xattr = xattrs; xattr->name != NULL; xattr++) {
940 		error = __zpl_xattr_security_set(ip,
941 		    xattr->name, xattr->value, xattr->value_len, 0);
942 
943 		if (error < 0)
944 			break;
945 	}
946 
947 	return (error);
948 }
949 
950 int
951 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
952     const struct qstr *qstr)
953 {
954 	return security_inode_init_security(ip, dip, qstr,
955 	    &zpl_xattr_security_init_impl, NULL);
956 }
957 
958 /*
959  * Security xattr namespace handlers.
960  */
961 static xattr_handler_t zpl_xattr_security_handler = {
962 	.prefix	= XATTR_SECURITY_PREFIX,
963 	.list	= zpl_xattr_security_list,
964 	.get	= zpl_xattr_security_get,
965 	.set	= zpl_xattr_security_set,
966 };
967 
968 /*
969  * Extended system attributes
970  *
971  * "Extended system attributes are used by the kernel to store system
972  * objects such as Access Control Lists.  Read and write access permissions
973  * to system attributes depend on the policy implemented for each system
974  * attribute implemented by filesystems in the kernel." - xattr(7)
975  */
976 #ifdef CONFIG_FS_POSIX_ACL
977 static int
978 zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
979 {
980 	char *name, *value = NULL;
981 	int error = 0;
982 	size_t size = 0;
983 
984 	if (S_ISLNK(ip->i_mode))
985 		return (-EOPNOTSUPP);
986 
987 	switch (type) {
988 	case ACL_TYPE_ACCESS:
989 		name = XATTR_NAME_POSIX_ACL_ACCESS;
990 		if (acl) {
991 			umode_t mode = ip->i_mode;
992 			error = posix_acl_equiv_mode(acl, &mode);
993 			if (error < 0) {
994 				return (error);
995 			} else {
996 				/*
997 				 * The mode bits will have been set by
998 				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
999 				 * using the ZFS ACL conversion.  If they
1000 				 * differ from the Posix ACL conversion dirty
1001 				 * the inode to write the Posix mode bits.
1002 				 */
1003 				if (ip->i_mode != mode) {
1004 					ip->i_mode = ITOZ(ip)->z_mode = mode;
1005 					ip->i_ctime = current_time(ip);
1006 					zfs_mark_inode_dirty(ip);
1007 				}
1008 
1009 				if (error == 0)
1010 					acl = NULL;
1011 			}
1012 		}
1013 		break;
1014 
1015 	case ACL_TYPE_DEFAULT:
1016 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1017 		if (!S_ISDIR(ip->i_mode))
1018 			return (acl ? -EACCES : 0);
1019 		break;
1020 
1021 	default:
1022 		return (-EINVAL);
1023 	}
1024 
1025 	if (acl) {
1026 		size = posix_acl_xattr_size(acl->a_count);
1027 		value = kmem_alloc(size, KM_SLEEP);
1028 
1029 		error = zpl_acl_to_xattr(acl, value, size);
1030 		if (error < 0) {
1031 			kmem_free(value, size);
1032 			return (error);
1033 		}
1034 	}
1035 
1036 	error = zpl_xattr_set(ip, name, value, size, 0);
1037 	if (value)
1038 		kmem_free(value, size);
1039 
1040 	if (!error) {
1041 		if (acl)
1042 			zpl_set_cached_acl(ip, type, acl);
1043 		else
1044 			zpl_forget_cached_acl(ip, type);
1045 	}
1046 
1047 	return (error);
1048 }
1049 
1050 #ifdef HAVE_SET_ACL
1051 int
1052 #ifdef HAVE_SET_ACL_USERNS
1053 zpl_set_acl(struct user_namespace *userns, struct inode *ip,
1054     struct posix_acl *acl, int type)
1055 #else
1056 zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
1057 #endif /* HAVE_SET_ACL_USERNS */
1058 {
1059 	return (zpl_set_acl_impl(ip, acl, type));
1060 }
1061 #endif /* HAVE_SET_ACL */
1062 
1063 static struct posix_acl *
1064 zpl_get_acl_impl(struct inode *ip, int type)
1065 {
1066 	struct posix_acl *acl;
1067 	void *value = NULL;
1068 	char *name;
1069 
1070 	/*
1071 	 * As of Linux 3.14, the kernel get_acl will check this for us.
1072 	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
1073 	 * as the kernel get_acl will set it to temporary sentinel value.
1074 	 */
1075 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1076 	acl = get_cached_acl(ip, type);
1077 	if (acl != ACL_NOT_CACHED)
1078 		return (acl);
1079 #endif
1080 
1081 	switch (type) {
1082 	case ACL_TYPE_ACCESS:
1083 		name = XATTR_NAME_POSIX_ACL_ACCESS;
1084 		break;
1085 	case ACL_TYPE_DEFAULT:
1086 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1087 		break;
1088 	default:
1089 		return (ERR_PTR(-EINVAL));
1090 	}
1091 
1092 	int size = zpl_xattr_get(ip, name, NULL, 0);
1093 	if (size > 0) {
1094 		value = kmem_alloc(size, KM_SLEEP);
1095 		size = zpl_xattr_get(ip, name, value, size);
1096 	}
1097 
1098 	if (size > 0) {
1099 		acl = zpl_acl_from_xattr(value, size);
1100 	} else if (size == -ENODATA || size == -ENOSYS) {
1101 		acl = NULL;
1102 	} else {
1103 		acl = ERR_PTR(-EIO);
1104 	}
1105 
1106 	if (size > 0)
1107 		kmem_free(value, size);
1108 
1109 	/* As of Linux 4.7, the kernel get_acl will set this for us */
1110 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1111 	if (!IS_ERR(acl))
1112 		zpl_set_cached_acl(ip, type, acl);
1113 #endif
1114 
1115 	return (acl);
1116 }
1117 
1118 #if defined(HAVE_GET_ACL_RCU)
1119 struct posix_acl *
1120 zpl_get_acl(struct inode *ip, int type, bool rcu)
1121 {
1122 	if (rcu)
1123 		return (ERR_PTR(-ECHILD));
1124 
1125 	return (zpl_get_acl_impl(ip, type));
1126 }
1127 #elif defined(HAVE_GET_ACL)
1128 struct posix_acl *
1129 zpl_get_acl(struct inode *ip, int type)
1130 {
1131 	return (zpl_get_acl_impl(ip, type));
1132 }
1133 #else
1134 #error "Unsupported iops->get_acl() implementation"
1135 #endif /* HAVE_GET_ACL_RCU */
1136 
1137 int
1138 zpl_init_acl(struct inode *ip, struct inode *dir)
1139 {
1140 	struct posix_acl *acl = NULL;
1141 	int error = 0;
1142 
1143 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1144 		return (0);
1145 
1146 	if (!S_ISLNK(ip->i_mode)) {
1147 		acl = zpl_get_acl_impl(dir, ACL_TYPE_DEFAULT);
1148 		if (IS_ERR(acl))
1149 			return (PTR_ERR(acl));
1150 		if (!acl) {
1151 			ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
1152 			ip->i_ctime = current_time(ip);
1153 			zfs_mark_inode_dirty(ip);
1154 			return (0);
1155 		}
1156 	}
1157 
1158 	if (acl) {
1159 		umode_t mode;
1160 
1161 		if (S_ISDIR(ip->i_mode)) {
1162 			error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT);
1163 			if (error)
1164 				goto out;
1165 		}
1166 
1167 		mode = ip->i_mode;
1168 		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
1169 		if (error >= 0) {
1170 			ip->i_mode = ITOZ(ip)->z_mode = mode;
1171 			zfs_mark_inode_dirty(ip);
1172 			if (error > 0) {
1173 				error = zpl_set_acl_impl(ip, acl,
1174 				    ACL_TYPE_ACCESS);
1175 			}
1176 		}
1177 	}
1178 out:
1179 	zpl_posix_acl_release(acl);
1180 
1181 	return (error);
1182 }
1183 
1184 int
1185 zpl_chmod_acl(struct inode *ip)
1186 {
1187 	struct posix_acl *acl;
1188 	int error;
1189 
1190 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1191 		return (0);
1192 
1193 	if (S_ISLNK(ip->i_mode))
1194 		return (-EOPNOTSUPP);
1195 
1196 	acl = zpl_get_acl_impl(ip, ACL_TYPE_ACCESS);
1197 	if (IS_ERR(acl) || !acl)
1198 		return (PTR_ERR(acl));
1199 
1200 	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
1201 	if (!error)
1202 		error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS);
1203 
1204 	zpl_posix_acl_release(acl);
1205 
1206 	return (error);
1207 }
1208 
1209 static int
1210 __zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
1211     const char *name, size_t name_len)
1212 {
1213 	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
1214 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
1215 
1216 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1217 		return (0);
1218 
1219 	if (list && xattr_size <= list_size)
1220 		memcpy(list, xattr_name, xattr_size);
1221 
1222 	return (xattr_size);
1223 }
1224 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
1225 
1226 static int
1227 __zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
1228     const char *name, size_t name_len)
1229 {
1230 	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
1231 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
1232 
1233 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1234 		return (0);
1235 
1236 	if (list && xattr_size <= list_size)
1237 		memcpy(list, xattr_name, xattr_size);
1238 
1239 	return (xattr_size);
1240 }
1241 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
1242 
1243 static int
1244 __zpl_xattr_acl_get_access(struct inode *ip, const char *name,
1245     void *buffer, size_t size)
1246 {
1247 	struct posix_acl *acl;
1248 	int type = ACL_TYPE_ACCESS;
1249 	int error;
1250 	/* xattr_resolve_name will do this for us if this is defined */
1251 #ifndef HAVE_XATTR_HANDLER_NAME
1252 	if (strcmp(name, "") != 0)
1253 		return (-EINVAL);
1254 #endif
1255 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1256 		return (-EOPNOTSUPP);
1257 
1258 	acl = zpl_get_acl_impl(ip, type);
1259 	if (IS_ERR(acl))
1260 		return (PTR_ERR(acl));
1261 	if (acl == NULL)
1262 		return (-ENODATA);
1263 
1264 	error = zpl_acl_to_xattr(acl, buffer, size);
1265 	zpl_posix_acl_release(acl);
1266 
1267 	return (error);
1268 }
1269 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
1270 
1271 static int
1272 __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
1273     void *buffer, size_t size)
1274 {
1275 	struct posix_acl *acl;
1276 	int type = ACL_TYPE_DEFAULT;
1277 	int error;
1278 	/* xattr_resolve_name will do this for us if this is defined */
1279 #ifndef HAVE_XATTR_HANDLER_NAME
1280 	if (strcmp(name, "") != 0)
1281 		return (-EINVAL);
1282 #endif
1283 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1284 		return (-EOPNOTSUPP);
1285 
1286 	acl = zpl_get_acl_impl(ip, type);
1287 	if (IS_ERR(acl))
1288 		return (PTR_ERR(acl));
1289 	if (acl == NULL)
1290 		return (-ENODATA);
1291 
1292 	error = zpl_acl_to_xattr(acl, buffer, size);
1293 	zpl_posix_acl_release(acl);
1294 
1295 	return (error);
1296 }
1297 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
1298 
1299 static int
1300 __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
1301     const void *value, size_t size, int flags)
1302 {
1303 	struct posix_acl *acl;
1304 	int type = ACL_TYPE_ACCESS;
1305 	int error = 0;
1306 	/* xattr_resolve_name will do this for us if this is defined */
1307 #ifndef HAVE_XATTR_HANDLER_NAME
1308 	if (strcmp(name, "") != 0)
1309 		return (-EINVAL);
1310 #endif
1311 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1312 		return (-EOPNOTSUPP);
1313 
1314 	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
1315 		return (-EPERM);
1316 
1317 	if (value) {
1318 		acl = zpl_acl_from_xattr(value, size);
1319 		if (IS_ERR(acl))
1320 			return (PTR_ERR(acl));
1321 		else if (acl) {
1322 			error = zpl_posix_acl_valid(ip, acl);
1323 			if (error) {
1324 				zpl_posix_acl_release(acl);
1325 				return (error);
1326 			}
1327 		}
1328 	} else {
1329 		acl = NULL;
1330 	}
1331 	error = zpl_set_acl_impl(ip, acl, type);
1332 	zpl_posix_acl_release(acl);
1333 
1334 	return (error);
1335 }
1336 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
1337 
1338 static int
1339 __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
1340     const void *value, size_t size, int flags)
1341 {
1342 	struct posix_acl *acl;
1343 	int type = ACL_TYPE_DEFAULT;
1344 	int error = 0;
1345 	/* xattr_resolve_name will do this for us if this is defined */
1346 #ifndef HAVE_XATTR_HANDLER_NAME
1347 	if (strcmp(name, "") != 0)
1348 		return (-EINVAL);
1349 #endif
1350 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1351 		return (-EOPNOTSUPP);
1352 
1353 	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
1354 		return (-EPERM);
1355 
1356 	if (value) {
1357 		acl = zpl_acl_from_xattr(value, size);
1358 		if (IS_ERR(acl))
1359 			return (PTR_ERR(acl));
1360 		else if (acl) {
1361 			error = zpl_posix_acl_valid(ip, acl);
1362 			if (error) {
1363 				zpl_posix_acl_release(acl);
1364 				return (error);
1365 			}
1366 		}
1367 	} else {
1368 		acl = NULL;
1369 	}
1370 
1371 	error = zpl_set_acl_impl(ip, acl, type);
1372 	zpl_posix_acl_release(acl);
1373 
1374 	return (error);
1375 }
1376 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
1377 
1378 /*
1379  * ACL access xattr namespace handlers.
1380  *
1381  * Use .name instead of .prefix when available. xattr_resolve_name will match
1382  * whole name and reject anything that has .name only as prefix.
1383  */
1384 static xattr_handler_t zpl_xattr_acl_access_handler = {
1385 #ifdef HAVE_XATTR_HANDLER_NAME
1386 	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
1387 #else
1388 	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
1389 #endif
1390 	.list	= zpl_xattr_acl_list_access,
1391 	.get	= zpl_xattr_acl_get_access,
1392 	.set	= zpl_xattr_acl_set_access,
1393 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1394     defined(HAVE_XATTR_LIST_DENTRY) || \
1395     defined(HAVE_XATTR_LIST_HANDLER)
1396 	.flags	= ACL_TYPE_ACCESS,
1397 #endif
1398 };
1399 
1400 /*
1401  * ACL default xattr namespace handlers.
1402  *
1403  * Use .name instead of .prefix when available. xattr_resolve_name will match
1404  * whole name and reject anything that has .name only as prefix.
1405  */
1406 static xattr_handler_t zpl_xattr_acl_default_handler = {
1407 #ifdef HAVE_XATTR_HANDLER_NAME
1408 	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
1409 #else
1410 	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
1411 #endif
1412 	.list	= zpl_xattr_acl_list_default,
1413 	.get	= zpl_xattr_acl_get_default,
1414 	.set	= zpl_xattr_acl_set_default,
1415 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1416     defined(HAVE_XATTR_LIST_DENTRY) || \
1417     defined(HAVE_XATTR_LIST_HANDLER)
1418 	.flags	= ACL_TYPE_DEFAULT,
1419 #endif
1420 };
1421 
1422 #endif /* CONFIG_FS_POSIX_ACL */
1423 
1424 xattr_handler_t *zpl_xattr_handlers[] = {
1425 	&zpl_xattr_security_handler,
1426 	&zpl_xattr_trusted_handler,
1427 	&zpl_xattr_user_handler,
1428 #ifdef CONFIG_FS_POSIX_ACL
1429 	&zpl_xattr_acl_access_handler,
1430 	&zpl_xattr_acl_default_handler,
1431 #endif /* CONFIG_FS_POSIX_ACL */
1432 	NULL
1433 };
1434 
1435 static const struct xattr_handler *
1436 zpl_xattr_handler(const char *name)
1437 {
1438 	if (strncmp(name, XATTR_USER_PREFIX,
1439 	    XATTR_USER_PREFIX_LEN) == 0)
1440 		return (&zpl_xattr_user_handler);
1441 
1442 	if (strncmp(name, XATTR_TRUSTED_PREFIX,
1443 	    XATTR_TRUSTED_PREFIX_LEN) == 0)
1444 		return (&zpl_xattr_trusted_handler);
1445 
1446 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1447 	    XATTR_SECURITY_PREFIX_LEN) == 0)
1448 		return (&zpl_xattr_security_handler);
1449 
1450 #ifdef CONFIG_FS_POSIX_ACL
1451 	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
1452 	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
1453 		return (&zpl_xattr_acl_access_handler);
1454 
1455 	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
1456 	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
1457 		return (&zpl_xattr_acl_default_handler);
1458 #endif /* CONFIG_FS_POSIX_ACL */
1459 
1460 	return (NULL);
1461 }
1462 
1463 static enum xattr_permission
1464 zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
1465 {
1466 	const struct xattr_handler *handler;
1467 	struct dentry *d __maybe_unused = xf->dentry;
1468 	enum xattr_permission perm = XAPERM_ALLOW;
1469 
1470 	handler = zpl_xattr_handler(name);
1471 	if (handler == NULL) {
1472 		/* Do not expose FreeBSD system namespace xattrs. */
1473 		if (ZFS_XA_NS_PREFIX_MATCH(FREEBSD, name))
1474 			return (XAPERM_DENY);
1475 		/*
1476 		 * Anything that doesn't match a known namespace gets put in the
1477 		 * user namespace for compatibility with other platforms.
1478 		 */
1479 		perm = XAPERM_COMPAT;
1480 		handler = &zpl_xattr_user_handler;
1481 	}
1482 
1483 	if (handler->list) {
1484 #if defined(HAVE_XATTR_LIST_SIMPLE)
1485 		if (!handler->list(d))
1486 			return (XAPERM_DENY);
1487 #elif defined(HAVE_XATTR_LIST_DENTRY)
1488 		if (!handler->list(d, NULL, 0, name, name_len, 0))
1489 			return (XAPERM_DENY);
1490 #elif defined(HAVE_XATTR_LIST_HANDLER)
1491 		if (!handler->list(handler, d, NULL, 0, name, name_len))
1492 			return (XAPERM_DENY);
1493 #endif
1494 	}
1495 
1496 	return (perm);
1497 }
1498 
1499 #if defined(CONFIG_FS_POSIX_ACL) && \
1500 	(!defined(HAVE_POSIX_ACL_RELEASE) || \
1501 		defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY))
1502 struct acl_rel_struct {
1503 	struct acl_rel_struct *next;
1504 	struct posix_acl *acl;
1505 	clock_t time;
1506 };
1507 
1508 #define	ACL_REL_GRACE	(60*HZ)
1509 #define	ACL_REL_WINDOW	(1*HZ)
1510 #define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
1511 
1512 /*
1513  * Lockless multi-producer single-consumer fifo list.
1514  * Nodes are added to tail and removed from head. Tail pointer is our
1515  * synchronization point. It always points to the next pointer of the last
1516  * node, or head if list is empty.
1517  */
1518 static struct acl_rel_struct *acl_rel_head = NULL;
1519 static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
1520 
1521 static void
1522 zpl_posix_acl_free(void *arg)
1523 {
1524 	struct acl_rel_struct *freelist = NULL;
1525 	struct acl_rel_struct *a;
1526 	clock_t new_time;
1527 	boolean_t refire = B_FALSE;
1528 
1529 	ASSERT3P(acl_rel_head, !=, NULL);
1530 	while (acl_rel_head) {
1531 		a = acl_rel_head;
1532 		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
1533 			/*
1534 			 * If a is the last node we need to reset tail, but we
1535 			 * need to use cmpxchg to make sure it is still the
1536 			 * last node.
1537 			 */
1538 			if (acl_rel_tail == &a->next) {
1539 				acl_rel_head = NULL;
1540 				if (cmpxchg(&acl_rel_tail, &a->next,
1541 				    &acl_rel_head) == &a->next) {
1542 					ASSERT3P(a->next, ==, NULL);
1543 					a->next = freelist;
1544 					freelist = a;
1545 					break;
1546 				}
1547 			}
1548 			/*
1549 			 * a is not last node, make sure next pointer is set
1550 			 * by the adder and advance the head.
1551 			 */
1552 			while (READ_ONCE(a->next) == NULL)
1553 				cpu_relax();
1554 			acl_rel_head = a->next;
1555 			a->next = freelist;
1556 			freelist = a;
1557 		} else {
1558 			/*
1559 			 * a is still in grace period. We are responsible to
1560 			 * reschedule the free task, since adder will only do
1561 			 * so if list is empty.
1562 			 */
1563 			new_time = a->time + ACL_REL_SCHED;
1564 			refire = B_TRUE;
1565 			break;
1566 		}
1567 	}
1568 
1569 	if (refire)
1570 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1571 		    NULL, TQ_SLEEP, new_time);
1572 
1573 	while (freelist) {
1574 		a = freelist;
1575 		freelist = a->next;
1576 		kfree(a->acl);
1577 		kmem_free(a, sizeof (struct acl_rel_struct));
1578 	}
1579 }
1580 
1581 void
1582 zpl_posix_acl_release_impl(struct posix_acl *acl)
1583 {
1584 	struct acl_rel_struct *a, **prev;
1585 
1586 	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
1587 	a->next = NULL;
1588 	a->acl = acl;
1589 	a->time = ddi_get_lbolt();
1590 	/* atomically points tail to us and get the previous tail */
1591 	prev = xchg(&acl_rel_tail, &a->next);
1592 	ASSERT3P(*prev, ==, NULL);
1593 	*prev = a;
1594 	/* if it was empty before, schedule the free task */
1595 	if (prev == &acl_rel_head)
1596 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1597 		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
1598 }
1599 #endif
1600 
1601 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
1602 	"Use legacy ZFS xattr naming for writing new user namespace xattrs");
1603