1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * Copyright (c) 2020 iXsystems, Inc. 3eda14cbcSMatt Macy * All rights reserved. 4eda14cbcSMatt Macy * 5eda14cbcSMatt Macy * Redistribution and use in source and binary forms, with or without 6eda14cbcSMatt Macy * modification, are permitted provided that the following conditions 7eda14cbcSMatt Macy * are met: 8eda14cbcSMatt Macy * 1. Redistributions of source code must retain the above copyright 9eda14cbcSMatt Macy * notice, this list of conditions and the following disclaimer. 10eda14cbcSMatt Macy * 2. Redistributions in binary form must reproduce the above copyright 11eda14cbcSMatt Macy * notice, this list of conditions and the following disclaimer in the 12eda14cbcSMatt Macy * documentation and/or other materials provided with the distribution. 13eda14cbcSMatt Macy * 14eda14cbcSMatt Macy * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15eda14cbcSMatt Macy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16eda14cbcSMatt Macy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17eda14cbcSMatt Macy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18eda14cbcSMatt Macy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19eda14cbcSMatt Macy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20eda14cbcSMatt Macy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21eda14cbcSMatt Macy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22eda14cbcSMatt Macy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23eda14cbcSMatt Macy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24eda14cbcSMatt Macy * SUCH DAMAGE. 25eda14cbcSMatt Macy * 26eda14cbcSMatt Macy */ 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy #include <sys/cdefs.h> 29eda14cbcSMatt Macy __FBSDID("$FreeBSD$"); 30eda14cbcSMatt Macy 31eda14cbcSMatt Macy #include <sys/types.h> 32eda14cbcSMatt Macy #include <sys/param.h> 33eda14cbcSMatt Macy #include <sys/dmu.h> 34eda14cbcSMatt Macy #include <sys/dmu_impl.h> 35eda14cbcSMatt Macy #include <sys/dmu_tx.h> 36eda14cbcSMatt Macy #include <sys/dbuf.h> 37eda14cbcSMatt Macy #include <sys/dnode.h> 38eda14cbcSMatt Macy #include <sys/zfs_context.h> 39eda14cbcSMatt Macy #include <sys/dmu_objset.h> 40eda14cbcSMatt Macy #include <sys/dmu_traverse.h> 41eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 42eda14cbcSMatt Macy #include <sys/dsl_dir.h> 43eda14cbcSMatt Macy #include <sys/dsl_pool.h> 44eda14cbcSMatt Macy #include <sys/dsl_synctask.h> 45eda14cbcSMatt Macy #include <sys/dsl_prop.h> 46eda14cbcSMatt Macy #include <sys/dmu_zfetch.h> 47eda14cbcSMatt Macy #include <sys/zfs_ioctl.h> 48eda14cbcSMatt Macy #include <sys/zap.h> 49eda14cbcSMatt Macy #include <sys/zio_checksum.h> 50eda14cbcSMatt Macy #include <sys/zio_compress.h> 51eda14cbcSMatt Macy #include <sys/sa.h> 52eda14cbcSMatt Macy #include <sys/zfeature.h> 53eda14cbcSMatt Macy #include <sys/abd.h> 54eda14cbcSMatt Macy #include <sys/zfs_rlock.h> 55eda14cbcSMatt Macy #include <sys/racct.h> 56eda14cbcSMatt Macy #include <sys/vm.h> 57eda14cbcSMatt Macy #include <sys/zfs_znode.h> 58eda14cbcSMatt Macy #include <sys/zfs_vnops.h> 59eda14cbcSMatt Macy 60eda14cbcSMatt Macy #include <sys/ccompat.h> 61eda14cbcSMatt Macy 62eda14cbcSMatt Macy #ifndef IDX_TO_OFF 63eda14cbcSMatt Macy #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) 64eda14cbcSMatt Macy #endif 65eda14cbcSMatt Macy 66eda14cbcSMatt Macy #if __FreeBSD_version < 1300051 67eda14cbcSMatt Macy #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY 68eda14cbcSMatt Macy #else 69eda14cbcSMatt Macy #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY 70eda14cbcSMatt Macy #endif 71eda14cbcSMatt Macy 72eda14cbcSMatt Macy 73eda14cbcSMatt Macy #if __FreeBSD_version < 1300072 74eda14cbcSMatt Macy #define dmu_page_lock(m) vm_page_lock(m) 75eda14cbcSMatt Macy #define dmu_page_unlock(m) vm_page_unlock(m) 76eda14cbcSMatt Macy #else 77eda14cbcSMatt Macy #define dmu_page_lock(m) 78eda14cbcSMatt Macy #define dmu_page_unlock(m) 79eda14cbcSMatt Macy #endif 80eda14cbcSMatt Macy 81eda14cbcSMatt Macy static int 82eda14cbcSMatt Macy dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 83eda14cbcSMatt Macy uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 84eda14cbcSMatt Macy { 85eda14cbcSMatt Macy dnode_t *dn; 86eda14cbcSMatt Macy int err; 87eda14cbcSMatt Macy 88eda14cbcSMatt Macy err = dnode_hold(os, object, FTAG, &dn); 89eda14cbcSMatt Macy if (err) 90eda14cbcSMatt Macy return (err); 91eda14cbcSMatt Macy 92eda14cbcSMatt Macy err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 93eda14cbcSMatt Macy numbufsp, dbpp, DMU_READ_PREFETCH); 94eda14cbcSMatt Macy 95eda14cbcSMatt Macy dnode_rele(dn, FTAG); 96eda14cbcSMatt Macy 97eda14cbcSMatt Macy return (err); 98eda14cbcSMatt Macy } 99eda14cbcSMatt Macy 100eda14cbcSMatt Macy int 101eda14cbcSMatt Macy dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 102eda14cbcSMatt Macy vm_page_t *ma, dmu_tx_t *tx) 103eda14cbcSMatt Macy { 104eda14cbcSMatt Macy dmu_buf_t **dbp; 105eda14cbcSMatt Macy struct sf_buf *sf; 106eda14cbcSMatt Macy int numbufs, i; 107eda14cbcSMatt Macy int err; 108eda14cbcSMatt Macy 109eda14cbcSMatt Macy if (size == 0) 110eda14cbcSMatt Macy return (0); 111eda14cbcSMatt Macy 112eda14cbcSMatt Macy err = dmu_buf_hold_array(os, object, offset, size, 113eda14cbcSMatt Macy FALSE, FTAG, &numbufs, &dbp); 114eda14cbcSMatt Macy if (err) 115eda14cbcSMatt Macy return (err); 116eda14cbcSMatt Macy 117eda14cbcSMatt Macy for (i = 0; i < numbufs; i++) { 118eda14cbcSMatt Macy int tocpy, copied, thiscpy; 119eda14cbcSMatt Macy int bufoff; 120eda14cbcSMatt Macy dmu_buf_t *db = dbp[i]; 121eda14cbcSMatt Macy caddr_t va; 122eda14cbcSMatt Macy 123eda14cbcSMatt Macy ASSERT(size > 0); 124eda14cbcSMatt Macy ASSERT3U(db->db_size, >=, PAGESIZE); 125eda14cbcSMatt Macy 126eda14cbcSMatt Macy bufoff = offset - db->db_offset; 127eda14cbcSMatt Macy tocpy = (int)MIN(db->db_size - bufoff, size); 128eda14cbcSMatt Macy 129eda14cbcSMatt Macy ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 130eda14cbcSMatt Macy 131eda14cbcSMatt Macy if (tocpy == db->db_size) 132eda14cbcSMatt Macy dmu_buf_will_fill(db, tx); 133eda14cbcSMatt Macy else 134eda14cbcSMatt Macy dmu_buf_will_dirty(db, tx); 135eda14cbcSMatt Macy 136eda14cbcSMatt Macy for (copied = 0; copied < tocpy; copied += PAGESIZE) { 137eda14cbcSMatt Macy ASSERT3U(ptoa((*ma)->pindex), ==, 138eda14cbcSMatt Macy db->db_offset + bufoff); 139eda14cbcSMatt Macy thiscpy = MIN(PAGESIZE, tocpy - copied); 140eda14cbcSMatt Macy va = zfs_map_page(*ma, &sf); 141eda14cbcSMatt Macy bcopy(va, (char *)db->db_data + bufoff, thiscpy); 142eda14cbcSMatt Macy zfs_unmap_page(sf); 143eda14cbcSMatt Macy ma += 1; 144eda14cbcSMatt Macy bufoff += PAGESIZE; 145eda14cbcSMatt Macy } 146eda14cbcSMatt Macy 147eda14cbcSMatt Macy if (tocpy == db->db_size) 148eda14cbcSMatt Macy dmu_buf_fill_done(db, tx); 149eda14cbcSMatt Macy 150eda14cbcSMatt Macy offset += tocpy; 151eda14cbcSMatt Macy size -= tocpy; 152eda14cbcSMatt Macy } 153eda14cbcSMatt Macy dmu_buf_rele_array(dbp, numbufs, FTAG); 154eda14cbcSMatt Macy return (err); 155eda14cbcSMatt Macy } 156eda14cbcSMatt Macy 157eda14cbcSMatt Macy int 158eda14cbcSMatt Macy dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, 159eda14cbcSMatt Macy int *rbehind, int *rahead, int last_size) 160eda14cbcSMatt Macy { 161eda14cbcSMatt Macy struct sf_buf *sf; 162eda14cbcSMatt Macy vm_object_t vmobj; 163eda14cbcSMatt Macy vm_page_t m; 164eda14cbcSMatt Macy dmu_buf_t **dbp; 165eda14cbcSMatt Macy dmu_buf_t *db; 166eda14cbcSMatt Macy caddr_t va; 167eda14cbcSMatt Macy int numbufs, i; 168eda14cbcSMatt Macy int bufoff, pgoff, tocpy; 169eda14cbcSMatt Macy int mi, di; 170eda14cbcSMatt Macy int err; 171eda14cbcSMatt Macy 172eda14cbcSMatt Macy ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); 173eda14cbcSMatt Macy ASSERT(last_size <= PAGE_SIZE); 174eda14cbcSMatt Macy 175eda14cbcSMatt Macy err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), 176eda14cbcSMatt Macy IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); 177eda14cbcSMatt Macy if (err != 0) 178eda14cbcSMatt Macy return (err); 179eda14cbcSMatt Macy 180eda14cbcSMatt Macy #ifdef ZFS_DEBUG 181eda14cbcSMatt Macy IMPLY(last_size < PAGE_SIZE, *rahead == 0); 182eda14cbcSMatt Macy if (dbp[0]->db_offset != 0 || numbufs > 1) { 183eda14cbcSMatt Macy for (i = 0; i < numbufs; i++) { 184eda14cbcSMatt Macy ASSERT(ISP2(dbp[i]->db_size)); 185eda14cbcSMatt Macy ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0); 186eda14cbcSMatt Macy ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); 187eda14cbcSMatt Macy } 188eda14cbcSMatt Macy } 189eda14cbcSMatt Macy #endif 190eda14cbcSMatt Macy 191eda14cbcSMatt Macy vmobj = ma[0]->object; 192eda14cbcSMatt Macy zfs_vmobject_wlock_12(vmobj); 193eda14cbcSMatt Macy 194eda14cbcSMatt Macy db = dbp[0]; 195eda14cbcSMatt Macy for (i = 0; i < *rbehind; i++) { 196eda14cbcSMatt Macy m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i, 197eda14cbcSMatt Macy VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 198eda14cbcSMatt Macy if (m == NULL) 199eda14cbcSMatt Macy break; 200eda14cbcSMatt Macy if (!vm_page_none_valid(m)) { 201eda14cbcSMatt Macy ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 202eda14cbcSMatt Macy vm_page_do_sunbusy(m); 203eda14cbcSMatt Macy break; 204eda14cbcSMatt Macy } 205eda14cbcSMatt Macy ASSERT(m->dirty == 0); 206eda14cbcSMatt Macy ASSERT(!pmap_page_is_write_mapped(m)); 207eda14cbcSMatt Macy 208eda14cbcSMatt Macy ASSERT(db->db_size > PAGE_SIZE); 209eda14cbcSMatt Macy bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 210eda14cbcSMatt Macy va = zfs_map_page(m, &sf); 211eda14cbcSMatt Macy bcopy((char *)db->db_data + bufoff, va, PAGESIZE); 212eda14cbcSMatt Macy zfs_unmap_page(sf); 213eda14cbcSMatt Macy vm_page_valid(m); 214eda14cbcSMatt Macy dmu_page_lock(m); 215eda14cbcSMatt Macy if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 216eda14cbcSMatt Macy vm_page_activate(m); 217eda14cbcSMatt Macy else 218eda14cbcSMatt Macy vm_page_deactivate(m); 219eda14cbcSMatt Macy dmu_page_unlock(m); 220eda14cbcSMatt Macy vm_page_do_sunbusy(m); 221eda14cbcSMatt Macy } 222eda14cbcSMatt Macy *rbehind = i; 223eda14cbcSMatt Macy 224eda14cbcSMatt Macy bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; 225eda14cbcSMatt Macy pgoff = 0; 226eda14cbcSMatt Macy for (mi = 0, di = 0; mi < count && di < numbufs; ) { 227eda14cbcSMatt Macy if (pgoff == 0) { 228eda14cbcSMatt Macy m = ma[mi]; 229eda14cbcSMatt Macy if (m != bogus_page) { 230eda14cbcSMatt Macy vm_page_assert_xbusied(m); 231eda14cbcSMatt Macy ASSERT(vm_page_none_valid(m)); 232eda14cbcSMatt Macy ASSERT(m->dirty == 0); 233eda14cbcSMatt Macy ASSERT(!pmap_page_is_write_mapped(m)); 234eda14cbcSMatt Macy va = zfs_map_page(m, &sf); 235eda14cbcSMatt Macy } 236eda14cbcSMatt Macy } 237eda14cbcSMatt Macy if (bufoff == 0) 238eda14cbcSMatt Macy db = dbp[di]; 239eda14cbcSMatt Macy 240eda14cbcSMatt Macy if (m != bogus_page) { 241eda14cbcSMatt Macy ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, 242eda14cbcSMatt Macy db->db_offset + bufoff); 243eda14cbcSMatt Macy } 244eda14cbcSMatt Macy 245eda14cbcSMatt Macy /* 246eda14cbcSMatt Macy * We do not need to clamp the copy size by the file 247eda14cbcSMatt Macy * size as the last block is zero-filled beyond the 248eda14cbcSMatt Macy * end of file anyway. 249eda14cbcSMatt Macy */ 250eda14cbcSMatt Macy tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); 251eda14cbcSMatt Macy if (m != bogus_page) 252eda14cbcSMatt Macy bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy); 253eda14cbcSMatt Macy 254eda14cbcSMatt Macy pgoff += tocpy; 255eda14cbcSMatt Macy ASSERT(pgoff <= PAGESIZE); 256eda14cbcSMatt Macy if (pgoff == PAGESIZE) { 257eda14cbcSMatt Macy if (m != bogus_page) { 258eda14cbcSMatt Macy zfs_unmap_page(sf); 259eda14cbcSMatt Macy vm_page_valid(m); 260eda14cbcSMatt Macy } 261eda14cbcSMatt Macy ASSERT(mi < count); 262eda14cbcSMatt Macy mi++; 263eda14cbcSMatt Macy pgoff = 0; 264eda14cbcSMatt Macy } 265eda14cbcSMatt Macy 266eda14cbcSMatt Macy bufoff += tocpy; 267eda14cbcSMatt Macy ASSERT(bufoff <= db->db_size); 268eda14cbcSMatt Macy if (bufoff == db->db_size) { 269eda14cbcSMatt Macy ASSERT(di < numbufs); 270eda14cbcSMatt Macy di++; 271eda14cbcSMatt Macy bufoff = 0; 272eda14cbcSMatt Macy } 273eda14cbcSMatt Macy } 274eda14cbcSMatt Macy 275eda14cbcSMatt Macy #ifdef ZFS_DEBUG 276eda14cbcSMatt Macy /* 277eda14cbcSMatt Macy * Three possibilities: 278eda14cbcSMatt Macy * - last requested page ends at a buffer boundary and , thus, 279eda14cbcSMatt Macy * all pages and buffers have been iterated; 280eda14cbcSMatt Macy * - all requested pages are filled, but the last buffer 281eda14cbcSMatt Macy * has not been exhausted; 282eda14cbcSMatt Macy * the read-ahead is possible only in this case; 283eda14cbcSMatt Macy * - all buffers have been read, but the last page has not been 284eda14cbcSMatt Macy * fully filled; 285eda14cbcSMatt Macy * this is only possible if the file has only a single buffer 286eda14cbcSMatt Macy * with a size that is not a multiple of the page size. 287eda14cbcSMatt Macy */ 288eda14cbcSMatt Macy if (mi == count) { 289eda14cbcSMatt Macy ASSERT(di >= numbufs - 1); 290eda14cbcSMatt Macy IMPLY(*rahead != 0, di == numbufs - 1); 291eda14cbcSMatt Macy IMPLY(*rahead != 0, bufoff != 0); 292eda14cbcSMatt Macy ASSERT(pgoff == 0); 293eda14cbcSMatt Macy } 294eda14cbcSMatt Macy if (di == numbufs) { 295eda14cbcSMatt Macy ASSERT(mi >= count - 1); 296eda14cbcSMatt Macy ASSERT(*rahead == 0); 297eda14cbcSMatt Macy IMPLY(pgoff == 0, mi == count); 298eda14cbcSMatt Macy if (pgoff != 0) { 299eda14cbcSMatt Macy ASSERT(mi == count - 1); 300eda14cbcSMatt Macy ASSERT((dbp[0]->db_size & PAGE_MASK) != 0); 301eda14cbcSMatt Macy } 302eda14cbcSMatt Macy } 303eda14cbcSMatt Macy #endif 304eda14cbcSMatt Macy if (pgoff != 0) { 305eda14cbcSMatt Macy ASSERT(m != bogus_page); 306eda14cbcSMatt Macy bzero(va + pgoff, PAGESIZE - pgoff); 307eda14cbcSMatt Macy zfs_unmap_page(sf); 308eda14cbcSMatt Macy vm_page_valid(m); 309eda14cbcSMatt Macy } 310eda14cbcSMatt Macy 311eda14cbcSMatt Macy for (i = 0; i < *rahead; i++) { 312eda14cbcSMatt Macy m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i, 313eda14cbcSMatt Macy VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 314eda14cbcSMatt Macy if (m == NULL) 315eda14cbcSMatt Macy break; 316eda14cbcSMatt Macy if (!vm_page_none_valid(m)) { 317eda14cbcSMatt Macy ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 318eda14cbcSMatt Macy vm_page_do_sunbusy(m); 319eda14cbcSMatt Macy break; 320eda14cbcSMatt Macy } 321eda14cbcSMatt Macy ASSERT(m->dirty == 0); 322*caed7b1cSMartin Matuska ASSERT(!pmap_page_is_write_mapped(m)); 323eda14cbcSMatt Macy 324eda14cbcSMatt Macy ASSERT(db->db_size > PAGE_SIZE); 325eda14cbcSMatt Macy bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 326eda14cbcSMatt Macy tocpy = MIN(db->db_size - bufoff, PAGESIZE); 327eda14cbcSMatt Macy va = zfs_map_page(m, &sf); 328eda14cbcSMatt Macy bcopy((char *)db->db_data + bufoff, va, tocpy); 329eda14cbcSMatt Macy if (tocpy < PAGESIZE) { 330eda14cbcSMatt Macy ASSERT(i == *rahead - 1); 331eda14cbcSMatt Macy ASSERT((db->db_size & PAGE_MASK) != 0); 332eda14cbcSMatt Macy bzero(va + tocpy, PAGESIZE - tocpy); 333eda14cbcSMatt Macy } 334eda14cbcSMatt Macy zfs_unmap_page(sf); 335eda14cbcSMatt Macy vm_page_valid(m); 336eda14cbcSMatt Macy dmu_page_lock(m); 337eda14cbcSMatt Macy if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 338eda14cbcSMatt Macy vm_page_activate(m); 339eda14cbcSMatt Macy else 340eda14cbcSMatt Macy vm_page_deactivate(m); 341eda14cbcSMatt Macy dmu_page_unlock(m); 342eda14cbcSMatt Macy vm_page_do_sunbusy(m); 343eda14cbcSMatt Macy } 344eda14cbcSMatt Macy *rahead = i; 345eda14cbcSMatt Macy zfs_vmobject_wunlock_12(vmobj); 346eda14cbcSMatt Macy 347eda14cbcSMatt Macy dmu_buf_rele_array(dbp, numbufs, FTAG); 348eda14cbcSMatt Macy return (0); 349eda14cbcSMatt Macy } 350