1 /* 2 * Copyright (c) 2020 iXsystems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/dmu.h> 34 #include <sys/dmu_impl.h> 35 #include <sys/dmu_tx.h> 36 #include <sys/dbuf.h> 37 #include <sys/dnode.h> 38 #include <sys/zfs_context.h> 39 #include <sys/dmu_objset.h> 40 #include <sys/dmu_traverse.h> 41 #include <sys/dsl_dataset.h> 42 #include <sys/dsl_dir.h> 43 #include <sys/dsl_pool.h> 44 #include <sys/dsl_synctask.h> 45 #include <sys/dsl_prop.h> 46 #include <sys/dmu_zfetch.h> 47 #include <sys/zfs_ioctl.h> 48 #include <sys/zap.h> 49 #include <sys/zio_checksum.h> 50 #include <sys/zio_compress.h> 51 #include <sys/sa.h> 52 #include <sys/zfeature.h> 53 #include <sys/abd.h> 54 #include <sys/zfs_rlock.h> 55 #include <sys/racct.h> 56 #include <sys/vm.h> 57 #include <sys/zfs_znode.h> 58 #include <sys/zfs_vnops.h> 59 60 #include <sys/ccompat.h> 61 62 #ifndef IDX_TO_OFF 63 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) 64 #endif 65 66 #if __FreeBSD_version < 1300051 67 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY 68 #else 69 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY 70 #endif 71 72 73 #if __FreeBSD_version < 1300072 74 #define dmu_page_lock(m) vm_page_lock(m) 75 #define dmu_page_unlock(m) vm_page_unlock(m) 76 #else 77 #define dmu_page_lock(m) 78 #define dmu_page_unlock(m) 79 #endif 80 81 static int 82 dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 83 uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 84 { 85 dnode_t *dn; 86 int err; 87 88 err = dnode_hold(os, object, FTAG, &dn); 89 if (err) 90 return (err); 91 92 err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 93 numbufsp, dbpp, DMU_READ_PREFETCH); 94 95 dnode_rele(dn, FTAG); 96 97 return (err); 98 } 99 100 int 101 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 102 vm_page_t *ma, dmu_tx_t *tx) 103 { 104 dmu_buf_t **dbp; 105 struct sf_buf *sf; 106 int numbufs, i; 107 int err; 108 109 if (size == 0) 110 return (0); 111 112 err = dmu_buf_hold_array(os, object, offset, size, 113 FALSE, FTAG, &numbufs, &dbp); 114 if (err) 115 return (err); 116 117 for (i = 0; i < numbufs; i++) { 118 int tocpy, copied, thiscpy; 119 int bufoff; 120 dmu_buf_t *db = dbp[i]; 121 caddr_t va; 122 123 ASSERT(size > 0); 124 ASSERT3U(db->db_size, >=, PAGESIZE); 125 126 bufoff = offset - db->db_offset; 127 tocpy = (int)MIN(db->db_size - bufoff, size); 128 129 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 130 131 if (tocpy == db->db_size) 132 dmu_buf_will_fill(db, tx); 133 else 134 dmu_buf_will_dirty(db, tx); 135 136 for (copied = 0; copied < tocpy; copied += PAGESIZE) { 137 ASSERT3U(ptoa((*ma)->pindex), ==, 138 db->db_offset + bufoff); 139 thiscpy = MIN(PAGESIZE, tocpy - copied); 140 va = zfs_map_page(*ma, &sf); 141 bcopy(va, (char *)db->db_data + bufoff, thiscpy); 142 zfs_unmap_page(sf); 143 ma += 1; 144 bufoff += PAGESIZE; 145 } 146 147 if (tocpy == db->db_size) 148 dmu_buf_fill_done(db, tx); 149 150 offset += tocpy; 151 size -= tocpy; 152 } 153 dmu_buf_rele_array(dbp, numbufs, FTAG); 154 return (err); 155 } 156 157 int 158 dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, 159 int *rbehind, int *rahead, int last_size) 160 { 161 struct sf_buf *sf; 162 vm_object_t vmobj; 163 vm_page_t m; 164 dmu_buf_t **dbp; 165 dmu_buf_t *db; 166 caddr_t va; 167 int numbufs, i; 168 int bufoff, pgoff, tocpy; 169 int mi, di; 170 int err; 171 172 ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); 173 ASSERT(last_size <= PAGE_SIZE); 174 175 err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), 176 IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); 177 if (err != 0) 178 return (err); 179 180 #ifdef ZFS_DEBUG 181 IMPLY(last_size < PAGE_SIZE, *rahead == 0); 182 if (dbp[0]->db_offset != 0 || numbufs > 1) { 183 for (i = 0; i < numbufs; i++) { 184 ASSERT(ISP2(dbp[i]->db_size)); 185 ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0); 186 ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); 187 } 188 } 189 #endif 190 191 vmobj = ma[0]->object; 192 zfs_vmobject_wlock_12(vmobj); 193 194 db = dbp[0]; 195 for (i = 0; i < *rbehind; i++) { 196 m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i, 197 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 198 if (m == NULL) 199 break; 200 if (!vm_page_none_valid(m)) { 201 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 202 vm_page_do_sunbusy(m); 203 break; 204 } 205 ASSERT(m->dirty == 0); 206 ASSERT(!pmap_page_is_write_mapped(m)); 207 208 ASSERT(db->db_size > PAGE_SIZE); 209 bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 210 va = zfs_map_page(m, &sf); 211 bcopy((char *)db->db_data + bufoff, va, PAGESIZE); 212 zfs_unmap_page(sf); 213 vm_page_valid(m); 214 dmu_page_lock(m); 215 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 216 vm_page_activate(m); 217 else 218 vm_page_deactivate(m); 219 dmu_page_unlock(m); 220 vm_page_do_sunbusy(m); 221 } 222 *rbehind = i; 223 224 bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; 225 pgoff = 0; 226 for (mi = 0, di = 0; mi < count && di < numbufs; ) { 227 if (pgoff == 0) { 228 m = ma[mi]; 229 if (m != bogus_page) { 230 vm_page_assert_xbusied(m); 231 ASSERT(vm_page_none_valid(m)); 232 ASSERT(m->dirty == 0); 233 ASSERT(!pmap_page_is_write_mapped(m)); 234 va = zfs_map_page(m, &sf); 235 } 236 } 237 if (bufoff == 0) 238 db = dbp[di]; 239 240 if (m != bogus_page) { 241 ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, 242 db->db_offset + bufoff); 243 } 244 245 /* 246 * We do not need to clamp the copy size by the file 247 * size as the last block is zero-filled beyond the 248 * end of file anyway. 249 */ 250 tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); 251 if (m != bogus_page) 252 bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy); 253 254 pgoff += tocpy; 255 ASSERT(pgoff <= PAGESIZE); 256 if (pgoff == PAGESIZE) { 257 if (m != bogus_page) { 258 zfs_unmap_page(sf); 259 vm_page_valid(m); 260 } 261 ASSERT(mi < count); 262 mi++; 263 pgoff = 0; 264 } 265 266 bufoff += tocpy; 267 ASSERT(bufoff <= db->db_size); 268 if (bufoff == db->db_size) { 269 ASSERT(di < numbufs); 270 di++; 271 bufoff = 0; 272 } 273 } 274 275 #ifdef ZFS_DEBUG 276 /* 277 * Three possibilities: 278 * - last requested page ends at a buffer boundary and , thus, 279 * all pages and buffers have been iterated; 280 * - all requested pages are filled, but the last buffer 281 * has not been exhausted; 282 * the read-ahead is possible only in this case; 283 * - all buffers have been read, but the last page has not been 284 * fully filled; 285 * this is only possible if the file has only a single buffer 286 * with a size that is not a multiple of the page size. 287 */ 288 if (mi == count) { 289 ASSERT(di >= numbufs - 1); 290 IMPLY(*rahead != 0, di == numbufs - 1); 291 IMPLY(*rahead != 0, bufoff != 0); 292 ASSERT(pgoff == 0); 293 } 294 if (di == numbufs) { 295 ASSERT(mi >= count - 1); 296 ASSERT(*rahead == 0); 297 IMPLY(pgoff == 0, mi == count); 298 if (pgoff != 0) { 299 ASSERT(mi == count - 1); 300 ASSERT((dbp[0]->db_size & PAGE_MASK) != 0); 301 } 302 } 303 #endif 304 if (pgoff != 0) { 305 ASSERT(m != bogus_page); 306 bzero(va + pgoff, PAGESIZE - pgoff); 307 zfs_unmap_page(sf); 308 vm_page_valid(m); 309 } 310 311 for (i = 0; i < *rahead; i++) { 312 m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i, 313 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 314 if (m == NULL) 315 break; 316 if (!vm_page_none_valid(m)) { 317 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 318 vm_page_do_sunbusy(m); 319 break; 320 } 321 ASSERT(m->dirty == 0); 322 ASSERT(!pmap_page_is_mapped(m)); 323 324 ASSERT(db->db_size > PAGE_SIZE); 325 bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 326 tocpy = MIN(db->db_size - bufoff, PAGESIZE); 327 va = zfs_map_page(m, &sf); 328 bcopy((char *)db->db_data + bufoff, va, tocpy); 329 if (tocpy < PAGESIZE) { 330 ASSERT(i == *rahead - 1); 331 ASSERT((db->db_size & PAGE_MASK) != 0); 332 bzero(va + tocpy, PAGESIZE - tocpy); 333 } 334 zfs_unmap_page(sf); 335 vm_page_valid(m); 336 dmu_page_lock(m); 337 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 338 vm_page_activate(m); 339 else 340 vm_page_deactivate(m); 341 dmu_page_unlock(m); 342 vm_page_do_sunbusy(m); 343 } 344 *rahead = i; 345 zfs_vmobject_wunlock_12(vmobj); 346 347 dmu_buf_rele_array(dbp, numbufs, FTAG); 348 return (0); 349 } 350