1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2023, Klara Inc. 23 */ 24 25 #ifdef CONFIG_COMPAT 26 #include <linux/compat.h> 27 #endif 28 #include <linux/fs.h> 29 #include <sys/file.h> 30 #include <sys/zfs_znode.h> 31 #include <sys/zfs_vnops.h> 32 #include <sys/zfeature.h> 33 34 int zfs_bclone_enabled = 1; 35 36 /* 37 * Clone part of a file via block cloning. 38 * 39 * Note that we are not required to update file offsets; the kernel will take 40 * care of that depending on how it was called. 41 */ 42 static ssize_t 43 __zpl_clone_file_range(struct file *src_file, loff_t src_off, 44 struct file *dst_file, loff_t dst_off, size_t len) 45 { 46 struct inode *src_i = file_inode(src_file); 47 struct inode *dst_i = file_inode(dst_file); 48 uint64_t src_off_o = (uint64_t)src_off; 49 uint64_t dst_off_o = (uint64_t)dst_off; 50 uint64_t len_o = (uint64_t)len; 51 cred_t *cr = CRED(); 52 fstrans_cookie_t cookie; 53 int err; 54 55 if (!zfs_bclone_enabled) 56 return (-EOPNOTSUPP); 57 58 if (!spa_feature_is_enabled( 59 dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING)) 60 return (-EOPNOTSUPP); 61 62 if (src_i != dst_i) 63 spl_inode_lock_shared(src_i); 64 spl_inode_lock(dst_i); 65 66 crhold(cr); 67 cookie = spl_fstrans_mark(); 68 69 err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i), 70 &dst_off_o, &len_o, cr); 71 72 spl_fstrans_unmark(cookie); 73 crfree(cr); 74 75 spl_inode_unlock(dst_i); 76 if (src_i != dst_i) 77 spl_inode_unlock_shared(src_i); 78 79 if (err < 0) 80 return (err); 81 82 return ((ssize_t)len_o); 83 } 84 85 #if defined(HAVE_VFS_COPY_FILE_RANGE) || \ 86 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) 87 /* 88 * Entry point for copy_file_range(). Copy len bytes from src_off in src_file 89 * to dst_off in dst_file. We are permitted to do this however we like, so we 90 * try to just clone the blocks, and if we can't support it, fall back to the 91 * kernel's generic byte copy function. 92 */ 93 ssize_t 94 zpl_copy_file_range(struct file *src_file, loff_t src_off, 95 struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags) 96 { 97 ssize_t ret; 98 99 if (flags != 0) 100 return (-EINVAL); 101 102 /* Try to do it via zfs_clone_range() */ 103 ret = __zpl_clone_file_range(src_file, src_off, 104 dst_file, dst_off, len); 105 106 #ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE 107 /* 108 * Since Linux 5.3 the filesystem driver is responsible for executing 109 * an appropriate fallback, and a generic fallback function is provided. 110 */ 111 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV || 112 ret == -EAGAIN) 113 ret = generic_copy_file_range(src_file, src_off, dst_file, 114 dst_off, len, flags); 115 #else 116 /* 117 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal 118 * to the kernel that it should fallback to a content copy. 119 */ 120 if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN) 121 ret = -EOPNOTSUPP; 122 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */ 123 124 return (ret); 125 } 126 #endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ 127 128 #ifdef HAVE_VFS_REMAP_FILE_RANGE 129 /* 130 * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE. 131 * 132 * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except 133 * that they must clone - they cannot fall back to copying. FICLONE is exactly 134 * FICLONERANGE, for the entire file. We don't need to try to tell them apart; 135 * the kernel will sort that out for us. 136 * 137 * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the 138 * range in both files and if they're the same, arrange for them to be backed 139 * by the same storage. 140 */ 141 loff_t 142 zpl_remap_file_range(struct file *src_file, loff_t src_off, 143 struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags) 144 { 145 if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)) 146 return (-EINVAL); 147 148 /* 149 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given 150 * range if we want. Its designed for filesystems that make data past 151 * EOF available, and don't want it to be visible in both files. ZFS 152 * doesn't do that, so we just turn the flag off. 153 */ 154 flags &= ~REMAP_FILE_CAN_SHORTEN; 155 156 if (flags & REMAP_FILE_DEDUP) 157 /* No support for dedup yet */ 158 return (-EOPNOTSUPP); 159 160 /* Zero length means to clone everything to the end of the file */ 161 if (len == 0) 162 len = i_size_read(file_inode(src_file)) - src_off; 163 164 return (__zpl_clone_file_range(src_file, src_off, 165 dst_file, dst_off, len)); 166 } 167 #endif /* HAVE_VFS_REMAP_FILE_RANGE */ 168 169 #if defined(HAVE_VFS_CLONE_FILE_RANGE) || \ 170 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) 171 /* 172 * Entry point for FICLONE and FICLONERANGE, before Linux 4.20. 173 */ 174 int 175 zpl_clone_file_range(struct file *src_file, loff_t src_off, 176 struct file *dst_file, loff_t dst_off, uint64_t len) 177 { 178 /* Zero length means to clone everything to the end of the file */ 179 if (len == 0) 180 len = i_size_read(file_inode(src_file)) - src_off; 181 182 return (__zpl_clone_file_range(src_file, src_off, 183 dst_file, dst_off, len)); 184 } 185 #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ 186 187 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE 188 /* 189 * Entry point for FIDEDUPERANGE, before Linux 4.20. 190 */ 191 int 192 zpl_dedupe_file_range(struct file *src_file, loff_t src_off, 193 struct file *dst_file, loff_t dst_off, uint64_t len) 194 { 195 /* No support for dedup yet */ 196 return (-EOPNOTSUPP); 197 } 198 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */ 199 200 /* Entry point for FICLONE, before Linux 4.5. */ 201 long 202 zpl_ioctl_ficlone(struct file *dst_file, void *arg) 203 { 204 unsigned long sfd = (unsigned long)arg; 205 206 struct file *src_file = fget(sfd); 207 if (src_file == NULL) 208 return (-EBADF); 209 210 if (dst_file->f_op != src_file->f_op) { 211 fput(src_file); 212 return (-EXDEV); 213 } 214 215 size_t len = i_size_read(file_inode(src_file)); 216 217 ssize_t ret = 218 __zpl_clone_file_range(src_file, 0, dst_file, 0, len); 219 220 fput(src_file); 221 222 if (ret < 0) { 223 if (ret == -EOPNOTSUPP) 224 return (-ENOTTY); 225 return (ret); 226 } 227 228 if (ret != len) 229 return (-EINVAL); 230 231 return (0); 232 } 233 234 /* Entry point for FICLONERANGE, before Linux 4.5. */ 235 long 236 zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg) 237 { 238 zfs_ioc_compat_file_clone_range_t fcr; 239 240 if (copy_from_user(&fcr, arg, sizeof (fcr))) 241 return (-EFAULT); 242 243 struct file *src_file = fget(fcr.fcr_src_fd); 244 if (src_file == NULL) 245 return (-EBADF); 246 247 if (dst_file->f_op != src_file->f_op) { 248 fput(src_file); 249 return (-EXDEV); 250 } 251 252 size_t len = fcr.fcr_src_length; 253 if (len == 0) 254 len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset; 255 256 ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset, 257 dst_file, fcr.fcr_dest_offset, len); 258 259 fput(src_file); 260 261 if (ret < 0) { 262 if (ret == -EOPNOTSUPP) 263 return (-ENOTTY); 264 return (ret); 265 } 266 267 if (ret != len) 268 return (-EINVAL); 269 270 return (0); 271 } 272 273 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */ 274 long 275 zpl_ioctl_fideduperange(struct file *filp, void *arg) 276 { 277 (void) arg; 278 279 /* No support for dedup yet */ 280 return (-ENOTTY); 281 } 282