1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2023, Klara Inc.
23  */
24 
25 #ifdef CONFIG_COMPAT
26 #include <linux/compat.h>
27 #endif
28 #include <linux/fs.h>
29 #ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
30 #include <linux/splice.h>
31 #endif
32 #include <sys/file.h>
33 #include <sys/zfs_znode.h>
34 #include <sys/zfs_vnops.h>
35 #include <sys/zfeature.h>
36 
37 /*
38  * Clone part of a file via block cloning.
39  *
40  * Note that we are not required to update file offsets; the kernel will take
41  * care of that depending on how it was called.
42  */
43 static ssize_t
zpl_clone_file_range_impl(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,size_t len)44 zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
45     struct file *dst_file, loff_t dst_off, size_t len)
46 {
47 	struct inode *src_i = file_inode(src_file);
48 	struct inode *dst_i = file_inode(dst_file);
49 	uint64_t src_off_o = (uint64_t)src_off;
50 	uint64_t dst_off_o = (uint64_t)dst_off;
51 	uint64_t len_o = (uint64_t)len;
52 	cred_t *cr = CRED();
53 	fstrans_cookie_t cookie;
54 	int err;
55 
56 	if (!zfs_bclone_enabled)
57 		return (-EOPNOTSUPP);
58 
59 	if (!spa_feature_is_enabled(
60 	    dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
61 		return (-EOPNOTSUPP);
62 
63 	if (src_i != dst_i)
64 		spl_inode_lock_shared(src_i);
65 	spl_inode_lock(dst_i);
66 
67 	crhold(cr);
68 	cookie = spl_fstrans_mark();
69 
70 	err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
71 	    &dst_off_o, &len_o, cr);
72 
73 	spl_fstrans_unmark(cookie);
74 	crfree(cr);
75 
76 	spl_inode_unlock(dst_i);
77 	if (src_i != dst_i)
78 		spl_inode_unlock_shared(src_i);
79 
80 	if (err < 0)
81 		return (err);
82 
83 	return ((ssize_t)len_o);
84 }
85 
86 #if defined(HAVE_VFS_COPY_FILE_RANGE) || \
87     defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
88 /*
89  * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
90  * to dst_off in dst_file. We are permitted to do this however we like, so we
91  * try to just clone the blocks, and if we can't support it, fall back to the
92  * kernel's generic byte copy function.
93  */
94 ssize_t
zpl_copy_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,size_t len,unsigned int flags)95 zpl_copy_file_range(struct file *src_file, loff_t src_off,
96     struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
97 {
98 	ssize_t ret;
99 
100 	/* Flags is reserved for future extensions and must be zero. */
101 	if (flags != 0)
102 		return (-EINVAL);
103 
104 	/* Try to do it via zfs_clone_range() and allow shortening. */
105 	ret = zpl_clone_file_range_impl(src_file, src_off,
106 	    dst_file, dst_off, len);
107 
108 #if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
109 	/*
110 	 * Since Linux 5.3 the filesystem driver is responsible for executing
111 	 * an appropriate fallback, and a generic fallback function is provided.
112 	 */
113 	if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
114 	    ret == -EAGAIN)
115 		ret = generic_copy_file_range(src_file, src_off, dst_file,
116 		    dst_off, len, flags);
117 #elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
118 	/*
119 	 * Since 6.8 the fallback function is called splice_copy_file_range
120 	 * and has a slightly different signature.
121 	 */
122 	if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
123 	    ret == -EAGAIN)
124 		ret = splice_copy_file_range(src_file, src_off, dst_file,
125 		    dst_off, len);
126 #else
127 	/*
128 	 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
129 	 * to the kernel that it should fallback to a content copy.
130 	 */
131 	if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
132 		ret = -EOPNOTSUPP;
133 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
134 
135 	return (ret);
136 }
137 #endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
138 
139 #ifdef HAVE_VFS_REMAP_FILE_RANGE
140 /*
141  * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
142  *
143  * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
144  * that they must clone - they cannot fall back to copying. FICLONE is exactly
145  * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
146  * the kernel will sort that out for us.
147  *
148  * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
149  * range in both files and if they're the same, arrange for them to be backed
150  * by the same storage.
151  *
152  * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
153  * if we want. It's designed for filesystems that may need to shorten the
154  * length for alignment, EOF, or any other requirement. ZFS may shorten the
155  * request when there is outstanding dirty data which hasn't been written.
156  */
157 loff_t
zpl_remap_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,loff_t len,unsigned int flags)158 zpl_remap_file_range(struct file *src_file, loff_t src_off,
159     struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
160 {
161 	if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
162 		return (-EINVAL);
163 
164 	/* No support for dedup yet */
165 	if (flags & REMAP_FILE_DEDUP)
166 		return (-EOPNOTSUPP);
167 
168 	/* Zero length means to clone everything to the end of the file */
169 	if (len == 0)
170 		len = i_size_read(file_inode(src_file)) - src_off;
171 
172 	ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
173 	    dst_file, dst_off, len);
174 
175 	if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
176 		ret = -EINVAL;
177 
178 	return (ret);
179 }
180 #endif /* HAVE_VFS_REMAP_FILE_RANGE */
181 
182 #if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
183     defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
184 /*
185  * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
186  */
187 int
zpl_clone_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,uint64_t len)188 zpl_clone_file_range(struct file *src_file, loff_t src_off,
189     struct file *dst_file, loff_t dst_off, uint64_t len)
190 {
191 	/* Zero length means to clone everything to the end of the file */
192 	if (len == 0)
193 		len = i_size_read(file_inode(src_file)) - src_off;
194 
195 	/* The entire length must be cloned or this is an error. */
196 	ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
197 	    dst_file, dst_off, len);
198 
199 	if (ret >= 0 && ret != len)
200 		ret = -EINVAL;
201 
202 	return (ret);
203 }
204 #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
205 
206 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE
207 /*
208  * Entry point for FIDEDUPERANGE, before Linux 4.20.
209  */
210 int
zpl_dedupe_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,uint64_t len)211 zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
212     struct file *dst_file, loff_t dst_off, uint64_t len)
213 {
214 	/* No support for dedup yet */
215 	return (-EOPNOTSUPP);
216 }
217 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
218 
219 /* Entry point for FICLONE, before Linux 4.5. */
220 long
zpl_ioctl_ficlone(struct file * dst_file,void * arg)221 zpl_ioctl_ficlone(struct file *dst_file, void *arg)
222 {
223 	unsigned long sfd = (unsigned long)arg;
224 
225 	struct file *src_file = fget(sfd);
226 	if (src_file == NULL)
227 		return (-EBADF);
228 
229 	if (dst_file->f_op != src_file->f_op) {
230 		fput(src_file);
231 		return (-EXDEV);
232 	}
233 
234 	size_t len = i_size_read(file_inode(src_file));
235 
236 	ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
237 
238 	fput(src_file);
239 
240 	if (ret < 0) {
241 		if (ret == -EOPNOTSUPP)
242 			return (-ENOTTY);
243 		return (ret);
244 	}
245 
246 	if (ret != len)
247 		return (-EINVAL);
248 
249 	return (0);
250 }
251 
252 /* Entry point for FICLONERANGE, before Linux 4.5. */
253 long
zpl_ioctl_ficlonerange(struct file * dst_file,void __user * arg)254 zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
255 {
256 	zfs_ioc_compat_file_clone_range_t fcr;
257 
258 	if (copy_from_user(&fcr, arg, sizeof (fcr)))
259 		return (-EFAULT);
260 
261 	struct file *src_file = fget(fcr.fcr_src_fd);
262 	if (src_file == NULL)
263 		return (-EBADF);
264 
265 	if (dst_file->f_op != src_file->f_op) {
266 		fput(src_file);
267 		return (-EXDEV);
268 	}
269 
270 	size_t len = fcr.fcr_src_length;
271 	if (len == 0)
272 		len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
273 
274 	ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
275 	    dst_file, fcr.fcr_dest_offset, len);
276 
277 	fput(src_file);
278 
279 	if (ret < 0) {
280 		if (ret == -EOPNOTSUPP)
281 			return (-ENOTTY);
282 		return (ret);
283 	}
284 
285 	if (ret != len)
286 		return (-EINVAL);
287 
288 	return (0);
289 }
290 
291 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */
292 long
zpl_ioctl_fideduperange(struct file * filp,void * arg)293 zpl_ioctl_fideduperange(struct file *filp, void *arg)
294 {
295 	(void) arg;
296 
297 	/* No support for dedup yet */
298 	return (-ENOTTY);
299 }
300