1 /* 2 * Copyright (c) 2020 iXsystems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/dmu.h> 34 #include <sys/dmu_impl.h> 35 #include <sys/dmu_tx.h> 36 #include <sys/dbuf.h> 37 #include <sys/dnode.h> 38 #include <sys/zfs_context.h> 39 #include <sys/dmu_objset.h> 40 #include <sys/dmu_traverse.h> 41 #include <sys/dsl_dataset.h> 42 #include <sys/dsl_dir.h> 43 #include <sys/dsl_pool.h> 44 #include <sys/dsl_synctask.h> 45 #include <sys/dsl_prop.h> 46 #include <sys/dmu_zfetch.h> 47 #include <sys/zfs_ioctl.h> 48 #include <sys/zap.h> 49 #include <sys/zio_checksum.h> 50 #include <sys/zio_compress.h> 51 #include <sys/sa.h> 52 #include <sys/zfeature.h> 53 #include <sys/abd.h> 54 #include <sys/zfs_rlock.h> 55 #include <sys/racct.h> 56 #include <sys/vm.h> 57 #include <sys/zfs_znode.h> 58 #include <sys/zfs_vnops.h> 59 60 #include <sys/ccompat.h> 61 62 #ifndef IDX_TO_OFF 63 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) 64 #endif 65 66 #if __FreeBSD_version < 1300051 67 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY 68 #else 69 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY 70 #endif 71 72 73 #if __FreeBSD_version < 1300072 74 #define dmu_page_lock(m) vm_page_lock(m) 75 #define dmu_page_unlock(m) vm_page_unlock(m) 76 #else 77 #define dmu_page_lock(m) 78 #define dmu_page_unlock(m) 79 #endif 80 81 int 82 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 83 vm_page_t *ma, dmu_tx_t *tx) 84 { 85 dmu_buf_t **dbp; 86 struct sf_buf *sf; 87 int numbufs, i; 88 int err; 89 90 if (size == 0) 91 return (0); 92 93 err = dmu_buf_hold_array(os, object, offset, size, 94 FALSE, FTAG, &numbufs, &dbp); 95 if (err) 96 return (err); 97 98 for (i = 0; i < numbufs; i++) { 99 int tocpy, copied, thiscpy; 100 int bufoff; 101 dmu_buf_t *db = dbp[i]; 102 caddr_t va; 103 104 ASSERT3U(size, >, 0); 105 ASSERT3U(db->db_size, >=, PAGESIZE); 106 107 bufoff = offset - db->db_offset; 108 tocpy = (int)MIN(db->db_size - bufoff, size); 109 110 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 111 112 if (tocpy == db->db_size) 113 dmu_buf_will_fill(db, tx); 114 else 115 dmu_buf_will_dirty(db, tx); 116 117 for (copied = 0; copied < tocpy; copied += PAGESIZE) { 118 ASSERT3U(ptoa((*ma)->pindex), ==, 119 db->db_offset + bufoff); 120 thiscpy = MIN(PAGESIZE, tocpy - copied); 121 va = zfs_map_page(*ma, &sf); 122 memcpy((char *)db->db_data + bufoff, va, thiscpy); 123 zfs_unmap_page(sf); 124 ma += 1; 125 bufoff += PAGESIZE; 126 } 127 128 if (tocpy == db->db_size) 129 dmu_buf_fill_done(db, tx); 130 131 offset += tocpy; 132 size -= tocpy; 133 } 134 dmu_buf_rele_array(dbp, numbufs, FTAG); 135 return (err); 136 } 137 138 int 139 dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, 140 int *rbehind, int *rahead, int last_size) 141 { 142 struct sf_buf *sf; 143 vm_object_t vmobj; 144 vm_page_t m; 145 dmu_buf_t **dbp; 146 dmu_buf_t *db; 147 caddr_t va; 148 int numbufs, i; 149 int bufoff, pgoff, tocpy; 150 int mi, di; 151 int err; 152 153 ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); 154 ASSERT3S(last_size, <=, PAGE_SIZE); 155 156 err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), 157 IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); 158 if (err != 0) 159 return (err); 160 161 #ifdef ZFS_DEBUG 162 IMPLY(last_size < PAGE_SIZE, *rahead == 0); 163 if (dbp[0]->db_offset != 0 || numbufs > 1) { 164 for (i = 0; i < numbufs; i++) { 165 ASSERT(ISP2(dbp[i]->db_size)); 166 ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0); 167 ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); 168 } 169 } 170 #endif 171 172 vmobj = ma[0]->object; 173 zfs_vmobject_wlock_12(vmobj); 174 175 db = dbp[0]; 176 for (i = 0; i < *rbehind; i++) { 177 m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i, 178 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 179 if (m == NULL) 180 break; 181 if (!vm_page_none_valid(m)) { 182 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 183 vm_page_do_sunbusy(m); 184 break; 185 } 186 ASSERT3U(m->dirty, ==, 0); 187 ASSERT(!pmap_page_is_write_mapped(m)); 188 189 ASSERT3U(db->db_size, >, PAGE_SIZE); 190 bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 191 va = zfs_map_page(m, &sf); 192 memcpy(va, (char *)db->db_data + bufoff, PAGESIZE); 193 zfs_unmap_page(sf); 194 vm_page_valid(m); 195 dmu_page_lock(m); 196 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 197 vm_page_activate(m); 198 else 199 vm_page_deactivate(m); 200 dmu_page_unlock(m); 201 vm_page_do_sunbusy(m); 202 } 203 *rbehind = i; 204 205 bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; 206 pgoff = 0; 207 for (mi = 0, di = 0; mi < count && di < numbufs; ) { 208 if (pgoff == 0) { 209 m = ma[mi]; 210 if (m != bogus_page) { 211 vm_page_assert_xbusied(m); 212 ASSERT(vm_page_none_valid(m)); 213 ASSERT3U(m->dirty, ==, 0); 214 ASSERT(!pmap_page_is_write_mapped(m)); 215 va = zfs_map_page(m, &sf); 216 } 217 } 218 if (bufoff == 0) 219 db = dbp[di]; 220 221 if (m != bogus_page) { 222 ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, 223 db->db_offset + bufoff); 224 } 225 226 /* 227 * We do not need to clamp the copy size by the file 228 * size as the last block is zero-filled beyond the 229 * end of file anyway. 230 */ 231 tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); 232 ASSERT3S(tocpy, >=, 0); 233 if (m != bogus_page) 234 memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy); 235 236 pgoff += tocpy; 237 ASSERT3S(pgoff, >=, 0); 238 ASSERT3S(pgoff, <=, PAGESIZE); 239 if (pgoff == PAGESIZE) { 240 if (m != bogus_page) { 241 zfs_unmap_page(sf); 242 vm_page_valid(m); 243 } 244 ASSERT3S(mi, <, count); 245 mi++; 246 pgoff = 0; 247 } 248 249 bufoff += tocpy; 250 ASSERT3S(bufoff, >=, 0); 251 ASSERT3S(bufoff, <=, db->db_size); 252 if (bufoff == db->db_size) { 253 ASSERT3S(di, <, numbufs); 254 di++; 255 bufoff = 0; 256 } 257 } 258 259 #ifdef ZFS_DEBUG 260 /* 261 * Three possibilities: 262 * - last requested page ends at a buffer boundary and , thus, 263 * all pages and buffers have been iterated; 264 * - all requested pages are filled, but the last buffer 265 * has not been exhausted; 266 * the read-ahead is possible only in this case; 267 * - all buffers have been read, but the last page has not been 268 * fully filled; 269 * this is only possible if the file has only a single buffer 270 * with a size that is not a multiple of the page size. 271 */ 272 if (mi == count) { 273 ASSERT3S(di, >=, numbufs - 1); 274 IMPLY(*rahead != 0, di == numbufs - 1); 275 IMPLY(*rahead != 0, bufoff != 0); 276 ASSERT0(pgoff); 277 } 278 if (di == numbufs) { 279 ASSERT3S(mi, >=, count - 1); 280 ASSERT0(*rahead); 281 IMPLY(pgoff == 0, mi == count); 282 if (pgoff != 0) { 283 ASSERT3S(mi, ==, count - 1); 284 ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0); 285 } 286 } 287 #endif 288 if (pgoff != 0) { 289 ASSERT3P(m, !=, bogus_page); 290 memset(va + pgoff, 0, PAGESIZE - pgoff); 291 zfs_unmap_page(sf); 292 vm_page_valid(m); 293 } 294 295 for (i = 0; i < *rahead; i++) { 296 m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i, 297 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 298 if (m == NULL) 299 break; 300 if (!vm_page_none_valid(m)) { 301 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 302 vm_page_do_sunbusy(m); 303 break; 304 } 305 ASSERT3U(m->dirty, ==, 0); 306 ASSERT(!pmap_page_is_write_mapped(m)); 307 308 ASSERT3U(db->db_size, >, PAGE_SIZE); 309 bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 310 tocpy = MIN(db->db_size - bufoff, PAGESIZE); 311 va = zfs_map_page(m, &sf); 312 memcpy(va, (char *)db->db_data + bufoff, tocpy); 313 if (tocpy < PAGESIZE) { 314 ASSERT3S(i, ==, *rahead - 1); 315 ASSERT3U((db->db_size & PAGE_MASK), !=, 0); 316 memset(va + tocpy, 0, PAGESIZE - tocpy); 317 } 318 zfs_unmap_page(sf); 319 vm_page_valid(m); 320 dmu_page_lock(m); 321 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 322 vm_page_activate(m); 323 else 324 vm_page_deactivate(m); 325 dmu_page_unlock(m); 326 vm_page_do_sunbusy(m); 327 } 328 *rahead = i; 329 zfs_vmobject_wunlock_12(vmobj); 330 331 dmu_buf_rele_array(dbp, numbufs, FTAG); 332 return (0); 333 } 334