1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2023, Klara Inc. 23 */ 24 25 #ifdef CONFIG_COMPAT 26 #include <linux/compat.h> 27 #endif 28 #include <linux/fs.h> 29 #include <sys/file.h> 30 #include <sys/zfs_znode.h> 31 #include <sys/zfs_vnops.h> 32 #include <sys/zfeature.h> 33 34 /* 35 * Clone part of a file via block cloning. 36 * 37 * Note that we are not required to update file offsets; the kernel will take 38 * care of that depending on how it was called. 39 */ 40 static ssize_t 41 __zpl_clone_file_range(struct file *src_file, loff_t src_off, 42 struct file *dst_file, loff_t dst_off, size_t len) 43 { 44 struct inode *src_i = file_inode(src_file); 45 struct inode *dst_i = file_inode(dst_file); 46 uint64_t src_off_o = (uint64_t)src_off; 47 uint64_t dst_off_o = (uint64_t)dst_off; 48 uint64_t len_o = (uint64_t)len; 49 cred_t *cr = CRED(); 50 fstrans_cookie_t cookie; 51 int err; 52 53 if (!spa_feature_is_enabled( 54 dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING)) 55 return (-EOPNOTSUPP); 56 57 if (src_i != dst_i) 58 spl_inode_lock_shared(src_i); 59 spl_inode_lock(dst_i); 60 61 crhold(cr); 62 cookie = spl_fstrans_mark(); 63 64 err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i), 65 &dst_off_o, &len_o, cr); 66 67 spl_fstrans_unmark(cookie); 68 crfree(cr); 69 70 spl_inode_unlock(dst_i); 71 if (src_i != dst_i) 72 spl_inode_unlock_shared(src_i); 73 74 if (err < 0) 75 return (err); 76 77 return ((ssize_t)len_o); 78 } 79 80 #if defined(HAVE_VFS_COPY_FILE_RANGE) || \ 81 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) 82 /* 83 * Entry point for copy_file_range(). Copy len bytes from src_off in src_file 84 * to dst_off in dst_file. We are permitted to do this however we like, so we 85 * try to just clone the blocks, and if we can't support it, fall back to the 86 * kernel's generic byte copy function. 87 */ 88 ssize_t 89 zpl_copy_file_range(struct file *src_file, loff_t src_off, 90 struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags) 91 { 92 ssize_t ret; 93 94 if (flags != 0) 95 return (-EINVAL); 96 97 /* Try to do it via zfs_clone_range() */ 98 ret = __zpl_clone_file_range(src_file, src_off, 99 dst_file, dst_off, len); 100 101 #ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE 102 /* 103 * Since Linux 5.3 the filesystem driver is responsible for executing 104 * an appropriate fallback, and a generic fallback function is provided. 105 */ 106 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV || 107 ret == -EAGAIN) 108 ret = generic_copy_file_range(src_file, src_off, dst_file, 109 dst_off, len, flags); 110 #else 111 /* 112 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal 113 * to the kernel that it should fallback to a content copy. 114 */ 115 if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN) 116 ret = -EOPNOTSUPP; 117 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */ 118 119 return (ret); 120 } 121 #endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ 122 123 #ifdef HAVE_VFS_REMAP_FILE_RANGE 124 /* 125 * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE. 126 * 127 * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except 128 * that they must clone - they cannot fall back to copying. FICLONE is exactly 129 * FICLONERANGE, for the entire file. We don't need to try to tell them apart; 130 * the kernel will sort that out for us. 131 * 132 * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the 133 * range in both files and if they're the same, arrange for them to be backed 134 * by the same storage. 135 */ 136 loff_t 137 zpl_remap_file_range(struct file *src_file, loff_t src_off, 138 struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags) 139 { 140 if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)) 141 return (-EINVAL); 142 143 /* 144 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given 145 * range if we want. Its designed for filesystems that make data past 146 * EOF available, and don't want it to be visible in both files. ZFS 147 * doesn't do that, so we just turn the flag off. 148 */ 149 flags &= ~REMAP_FILE_CAN_SHORTEN; 150 151 if (flags & REMAP_FILE_DEDUP) 152 /* No support for dedup yet */ 153 return (-EOPNOTSUPP); 154 155 /* Zero length means to clone everything to the end of the file */ 156 if (len == 0) 157 len = i_size_read(file_inode(src_file)) - src_off; 158 159 return (__zpl_clone_file_range(src_file, src_off, 160 dst_file, dst_off, len)); 161 } 162 #endif /* HAVE_VFS_REMAP_FILE_RANGE */ 163 164 #if defined(HAVE_VFS_CLONE_FILE_RANGE) || \ 165 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) 166 /* 167 * Entry point for FICLONE and FICLONERANGE, before Linux 4.20. 168 */ 169 int 170 zpl_clone_file_range(struct file *src_file, loff_t src_off, 171 struct file *dst_file, loff_t dst_off, uint64_t len) 172 { 173 /* Zero length means to clone everything to the end of the file */ 174 if (len == 0) 175 len = i_size_read(file_inode(src_file)) - src_off; 176 177 return (__zpl_clone_file_range(src_file, src_off, 178 dst_file, dst_off, len)); 179 } 180 #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ 181 182 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE 183 /* 184 * Entry point for FIDEDUPERANGE, before Linux 4.20. 185 */ 186 int 187 zpl_dedupe_file_range(struct file *src_file, loff_t src_off, 188 struct file *dst_file, loff_t dst_off, uint64_t len) 189 { 190 /* No support for dedup yet */ 191 return (-EOPNOTSUPP); 192 } 193 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */ 194 195 /* Entry point for FICLONE, before Linux 4.5. */ 196 long 197 zpl_ioctl_ficlone(struct file *dst_file, void *arg) 198 { 199 unsigned long sfd = (unsigned long)arg; 200 201 struct file *src_file = fget(sfd); 202 if (src_file == NULL) 203 return (-EBADF); 204 205 if (dst_file->f_op != src_file->f_op) 206 return (-EXDEV); 207 208 size_t len = i_size_read(file_inode(src_file)); 209 210 ssize_t ret = 211 __zpl_clone_file_range(src_file, 0, dst_file, 0, len); 212 213 fput(src_file); 214 215 if (ret < 0) { 216 if (ret == -EOPNOTSUPP) 217 return (-ENOTTY); 218 return (ret); 219 } 220 221 if (ret != len) 222 return (-EINVAL); 223 224 return (0); 225 } 226 227 /* Entry point for FICLONERANGE, before Linux 4.5. */ 228 long 229 zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg) 230 { 231 zfs_ioc_compat_file_clone_range_t fcr; 232 233 if (copy_from_user(&fcr, arg, sizeof (fcr))) 234 return (-EFAULT); 235 236 struct file *src_file = fget(fcr.fcr_src_fd); 237 if (src_file == NULL) 238 return (-EBADF); 239 240 if (dst_file->f_op != src_file->f_op) 241 return (-EXDEV); 242 243 size_t len = fcr.fcr_src_length; 244 if (len == 0) 245 len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset; 246 247 ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset, 248 dst_file, fcr.fcr_dest_offset, len); 249 250 fput(src_file); 251 252 if (ret < 0) { 253 if (ret == -EOPNOTSUPP) 254 return (-ENOTTY); 255 return (ret); 256 } 257 258 if (ret != len) 259 return (-EINVAL); 260 261 return (0); 262 } 263 264 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */ 265 long 266 zpl_ioctl_fideduperange(struct file *filp, void *arg) 267 { 268 (void) arg; 269 270 /* No support for dedup yet */ 271 return (-ENOTTY); 272 } 273