1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2023, Klara Inc.
23  */
24 
25 #ifdef CONFIG_COMPAT
26 #include <linux/compat.h>
27 #endif
28 #include <linux/fs.h>
29 #include <sys/file.h>
30 #include <sys/zfs_znode.h>
31 #include <sys/zfs_vnops.h>
32 #include <sys/zfeature.h>
33 
34 /*
35  * Clone part of a file via block cloning.
36  *
37  * Note that we are not required to update file offsets; the kernel will take
38  * care of that depending on how it was called.
39  */
40 static ssize_t
41 __zpl_clone_file_range(struct file *src_file, loff_t src_off,
42     struct file *dst_file, loff_t dst_off, size_t len)
43 {
44 	struct inode *src_i = file_inode(src_file);
45 	struct inode *dst_i = file_inode(dst_file);
46 	uint64_t src_off_o = (uint64_t)src_off;
47 	uint64_t dst_off_o = (uint64_t)dst_off;
48 	uint64_t len_o = (uint64_t)len;
49 	cred_t *cr = CRED();
50 	fstrans_cookie_t cookie;
51 	int err;
52 
53 	if (!spa_feature_is_enabled(
54 	    dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
55 		return (-EOPNOTSUPP);
56 
57 	if (src_i != dst_i)
58 		spl_inode_lock_shared(src_i);
59 	spl_inode_lock(dst_i);
60 
61 	crhold(cr);
62 	cookie = spl_fstrans_mark();
63 
64 	err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
65 	    &dst_off_o, &len_o, cr);
66 
67 	spl_fstrans_unmark(cookie);
68 	crfree(cr);
69 
70 	spl_inode_unlock(dst_i);
71 	if (src_i != dst_i)
72 		spl_inode_unlock_shared(src_i);
73 
74 	if (err < 0)
75 		return (err);
76 
77 	return ((ssize_t)len_o);
78 }
79 
80 #if defined(HAVE_VFS_COPY_FILE_RANGE) || \
81     defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
82 /*
83  * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
84  * to dst_off in dst_file. We are permitted to do this however we like, so we
85  * try to just clone the blocks, and if we can't support it, fall back to the
86  * kernel's generic byte copy function.
87  */
88 ssize_t
89 zpl_copy_file_range(struct file *src_file, loff_t src_off,
90     struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
91 {
92 	ssize_t ret;
93 
94 	if (flags != 0)
95 		return (-EINVAL);
96 
97 	/* Try to do it via zfs_clone_range() */
98 	ret = __zpl_clone_file_range(src_file, src_off,
99 	    dst_file, dst_off, len);
100 
101 #ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
102 	/*
103 	 * Since Linux 5.3 the filesystem driver is responsible for executing
104 	 * an appropriate fallback, and a generic fallback function is provided.
105 	 */
106 	if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
107 	    ret == -EAGAIN)
108 		ret = generic_copy_file_range(src_file, src_off, dst_file,
109 		    dst_off, len, flags);
110 #else
111 	/*
112 	 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
113 	 * to the kernel that it should fallback to a content copy.
114 	 */
115 	if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
116 		ret = -EOPNOTSUPP;
117 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
118 
119 	return (ret);
120 }
121 #endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
122 
123 #ifdef HAVE_VFS_REMAP_FILE_RANGE
124 /*
125  * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
126  *
127  * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
128  * that they must clone - they cannot fall back to copying. FICLONE is exactly
129  * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
130  * the kernel will sort that out for us.
131  *
132  * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
133  * range in both files and if they're the same, arrange for them to be backed
134  * by the same storage.
135  */
136 loff_t
137 zpl_remap_file_range(struct file *src_file, loff_t src_off,
138     struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
139 {
140 	if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
141 		return (-EINVAL);
142 
143 	/*
144 	 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
145 	 * range if we want. Its designed for filesystems that make data past
146 	 * EOF available, and don't want it to be visible in both files. ZFS
147 	 * doesn't do that, so we just turn the flag off.
148 	 */
149 	flags &= ~REMAP_FILE_CAN_SHORTEN;
150 
151 	if (flags & REMAP_FILE_DEDUP)
152 		/* No support for dedup yet */
153 		return (-EOPNOTSUPP);
154 
155 	/* Zero length means to clone everything to the end of the file */
156 	if (len == 0)
157 		len = i_size_read(file_inode(src_file)) - src_off;
158 
159 	return (__zpl_clone_file_range(src_file, src_off,
160 	    dst_file, dst_off, len));
161 }
162 #endif /* HAVE_VFS_REMAP_FILE_RANGE */
163 
164 #if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
165     defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
166 /*
167  * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
168  */
169 int
170 zpl_clone_file_range(struct file *src_file, loff_t src_off,
171     struct file *dst_file, loff_t dst_off, uint64_t len)
172 {
173 	/* Zero length means to clone everything to the end of the file */
174 	if (len == 0)
175 		len = i_size_read(file_inode(src_file)) - src_off;
176 
177 	return (__zpl_clone_file_range(src_file, src_off,
178 	    dst_file, dst_off, len));
179 }
180 #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
181 
182 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE
183 /*
184  * Entry point for FIDEDUPERANGE, before Linux 4.20.
185  */
186 int
187 zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
188     struct file *dst_file, loff_t dst_off, uint64_t len)
189 {
190 	/* No support for dedup yet */
191 	return (-EOPNOTSUPP);
192 }
193 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
194 
195 /* Entry point for FICLONE, before Linux 4.5. */
196 long
197 zpl_ioctl_ficlone(struct file *dst_file, void *arg)
198 {
199 	unsigned long sfd = (unsigned long)arg;
200 
201 	struct file *src_file = fget(sfd);
202 	if (src_file == NULL)
203 		return (-EBADF);
204 
205 	if (dst_file->f_op != src_file->f_op)
206 		return (-EXDEV);
207 
208 	size_t len = i_size_read(file_inode(src_file));
209 
210 	ssize_t ret =
211 	    __zpl_clone_file_range(src_file, 0, dst_file, 0, len);
212 
213 	fput(src_file);
214 
215 	if (ret < 0) {
216 		if (ret == -EOPNOTSUPP)
217 			return (-ENOTTY);
218 		return (ret);
219 	}
220 
221 	if (ret != len)
222 		return (-EINVAL);
223 
224 	return (0);
225 }
226 
227 /* Entry point for FICLONERANGE, before Linux 4.5. */
228 long
229 zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
230 {
231 	zfs_ioc_compat_file_clone_range_t fcr;
232 
233 	if (copy_from_user(&fcr, arg, sizeof (fcr)))
234 		return (-EFAULT);
235 
236 	struct file *src_file = fget(fcr.fcr_src_fd);
237 	if (src_file == NULL)
238 		return (-EBADF);
239 
240 	if (dst_file->f_op != src_file->f_op)
241 		return (-EXDEV);
242 
243 	size_t len = fcr.fcr_src_length;
244 	if (len == 0)
245 		len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
246 
247 	ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
248 	    dst_file, fcr.fcr_dest_offset, len);
249 
250 	fput(src_file);
251 
252 	if (ret < 0) {
253 		if (ret == -EOPNOTSUPP)
254 			return (-ENOTTY);
255 		return (ret);
256 	}
257 
258 	if (ret != len)
259 		return (-EINVAL);
260 
261 	return (0);
262 }
263 
264 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */
265 long
266 zpl_ioctl_fideduperange(struct file *filp, void *arg)
267 {
268 	(void) arg;
269 
270 	/* No support for dedup yet */
271 	return (-ENOTTY);
272 }
273