1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * Copyright (c) 2020 iXsystems, Inc.
3eda14cbcSMatt Macy  * All rights reserved.
4eda14cbcSMatt Macy  *
5eda14cbcSMatt Macy  * Redistribution and use in source and binary forms, with or without
6eda14cbcSMatt Macy  * modification, are permitted provided that the following conditions
7eda14cbcSMatt Macy  * are met:
8eda14cbcSMatt Macy  * 1. Redistributions of source code must retain the above copyright
9eda14cbcSMatt Macy  *    notice, this list of conditions and the following disclaimer.
10eda14cbcSMatt Macy  * 2. Redistributions in binary form must reproduce the above copyright
11eda14cbcSMatt Macy  *    notice, this list of conditions and the following disclaimer in the
12eda14cbcSMatt Macy  *    documentation and/or other materials provided with the distribution.
13eda14cbcSMatt Macy  *
14eda14cbcSMatt Macy  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15eda14cbcSMatt Macy  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16eda14cbcSMatt Macy  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17eda14cbcSMatt Macy  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18eda14cbcSMatt Macy  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19eda14cbcSMatt Macy  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20eda14cbcSMatt Macy  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21eda14cbcSMatt Macy  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22eda14cbcSMatt Macy  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23eda14cbcSMatt Macy  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24eda14cbcSMatt Macy  * SUCH DAMAGE.
25eda14cbcSMatt Macy  *
26eda14cbcSMatt Macy  */
27eda14cbcSMatt Macy 
28eda14cbcSMatt Macy #include <sys/types.h>
29eda14cbcSMatt Macy #include <sys/param.h>
30eda14cbcSMatt Macy #include <sys/dmu.h>
31eda14cbcSMatt Macy #include <sys/dmu_impl.h>
32eda14cbcSMatt Macy #include <sys/dmu_tx.h>
33eda14cbcSMatt Macy #include <sys/dbuf.h>
34eda14cbcSMatt Macy #include <sys/dnode.h>
35eda14cbcSMatt Macy #include <sys/zfs_context.h>
36eda14cbcSMatt Macy #include <sys/dmu_objset.h>
37eda14cbcSMatt Macy #include <sys/dmu_traverse.h>
38eda14cbcSMatt Macy #include <sys/dsl_dataset.h>
39eda14cbcSMatt Macy #include <sys/dsl_dir.h>
40eda14cbcSMatt Macy #include <sys/dsl_pool.h>
41eda14cbcSMatt Macy #include <sys/dsl_synctask.h>
42eda14cbcSMatt Macy #include <sys/dsl_prop.h>
43eda14cbcSMatt Macy #include <sys/dmu_zfetch.h>
44eda14cbcSMatt Macy #include <sys/zfs_ioctl.h>
45eda14cbcSMatt Macy #include <sys/zap.h>
46eda14cbcSMatt Macy #include <sys/zio_checksum.h>
47eda14cbcSMatt Macy #include <sys/zio_compress.h>
48eda14cbcSMatt Macy #include <sys/sa.h>
49eda14cbcSMatt Macy #include <sys/zfeature.h>
50eda14cbcSMatt Macy #include <sys/abd.h>
51eda14cbcSMatt Macy #include <sys/zfs_rlock.h>
52eda14cbcSMatt Macy #include <sys/racct.h>
53eda14cbcSMatt Macy #include <sys/vm.h>
54eda14cbcSMatt Macy #include <sys/zfs_znode.h>
55eda14cbcSMatt Macy #include <sys/zfs_vnops.h>
56eda14cbcSMatt Macy 
57eda14cbcSMatt Macy #include <sys/ccompat.h>
58eda14cbcSMatt Macy 
59eda14cbcSMatt Macy #ifndef IDX_TO_OFF
60eda14cbcSMatt Macy #define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
61eda14cbcSMatt Macy #endif
62eda14cbcSMatt Macy 
63eda14cbcSMatt Macy #if  __FreeBSD_version < 1300051
64eda14cbcSMatt Macy #define	VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
65eda14cbcSMatt Macy #else
66eda14cbcSMatt Macy #define	VM_ALLOC_BUSY_FLAGS  VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
67eda14cbcSMatt Macy #endif
68eda14cbcSMatt Macy 
69eda14cbcSMatt Macy 
70eda14cbcSMatt Macy #if __FreeBSD_version < 1300072
71eda14cbcSMatt Macy #define	dmu_page_lock(m)	vm_page_lock(m)
72eda14cbcSMatt Macy #define	dmu_page_unlock(m)	vm_page_unlock(m)
73eda14cbcSMatt Macy #else
74eda14cbcSMatt Macy #define	dmu_page_lock(m)
75eda14cbcSMatt Macy #define	dmu_page_unlock(m)
76eda14cbcSMatt Macy #endif
77eda14cbcSMatt Macy 
78eda14cbcSMatt Macy int
dmu_write_pages(objset_t * os,uint64_t object,uint64_t offset,uint64_t size,vm_page_t * ma,dmu_tx_t * tx)79eda14cbcSMatt Macy dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
80eda14cbcSMatt Macy     vm_page_t *ma, dmu_tx_t *tx)
81eda14cbcSMatt Macy {
82eda14cbcSMatt Macy 	dmu_buf_t **dbp;
83eda14cbcSMatt Macy 	struct sf_buf *sf;
84eda14cbcSMatt Macy 	int numbufs, i;
85eda14cbcSMatt Macy 	int err;
86eda14cbcSMatt Macy 
87eda14cbcSMatt Macy 	if (size == 0)
88eda14cbcSMatt Macy 		return (0);
89eda14cbcSMatt Macy 
90eda14cbcSMatt Macy 	err = dmu_buf_hold_array(os, object, offset, size,
91eda14cbcSMatt Macy 	    FALSE, FTAG, &numbufs, &dbp);
92eda14cbcSMatt Macy 	if (err)
93eda14cbcSMatt Macy 		return (err);
94eda14cbcSMatt Macy 
95eda14cbcSMatt Macy 	for (i = 0; i < numbufs; i++) {
96eda14cbcSMatt Macy 		int tocpy, copied, thiscpy;
97eda14cbcSMatt Macy 		int bufoff;
98eda14cbcSMatt Macy 		dmu_buf_t *db = dbp[i];
99eda14cbcSMatt Macy 		caddr_t va;
100eda14cbcSMatt Macy 
10116038816SMartin Matuska 		ASSERT3U(size, >, 0);
102eda14cbcSMatt Macy 		ASSERT3U(db->db_size, >=, PAGESIZE);
103eda14cbcSMatt Macy 
104eda14cbcSMatt Macy 		bufoff = offset - db->db_offset;
105eda14cbcSMatt Macy 		tocpy = (int)MIN(db->db_size - bufoff, size);
106eda14cbcSMatt Macy 
107eda14cbcSMatt Macy 		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
108eda14cbcSMatt Macy 
109eda14cbcSMatt Macy 		if (tocpy == db->db_size)
110*188408daSMartin Matuska 			dmu_buf_will_fill(db, tx, B_FALSE);
111eda14cbcSMatt Macy 		else
112eda14cbcSMatt Macy 			dmu_buf_will_dirty(db, tx);
113eda14cbcSMatt Macy 
114eda14cbcSMatt Macy 		for (copied = 0; copied < tocpy; copied += PAGESIZE) {
115eda14cbcSMatt Macy 			ASSERT3U(ptoa((*ma)->pindex), ==,
116eda14cbcSMatt Macy 			    db->db_offset + bufoff);
117eda14cbcSMatt Macy 			thiscpy = MIN(PAGESIZE, tocpy - copied);
118eda14cbcSMatt Macy 			va = zfs_map_page(*ma, &sf);
119da5137abSMartin Matuska 			memcpy((char *)db->db_data + bufoff, va, thiscpy);
120eda14cbcSMatt Macy 			zfs_unmap_page(sf);
121eda14cbcSMatt Macy 			ma += 1;
122eda14cbcSMatt Macy 			bufoff += PAGESIZE;
123eda14cbcSMatt Macy 		}
124eda14cbcSMatt Macy 
125eda14cbcSMatt Macy 		if (tocpy == db->db_size)
126*188408daSMartin Matuska 			dmu_buf_fill_done(db, tx, B_FALSE);
127eda14cbcSMatt Macy 
128eda14cbcSMatt Macy 		offset += tocpy;
129eda14cbcSMatt Macy 		size -= tocpy;
130eda14cbcSMatt Macy 	}
131eda14cbcSMatt Macy 	dmu_buf_rele_array(dbp, numbufs, FTAG);
132eda14cbcSMatt Macy 	return (err);
133eda14cbcSMatt Macy }
134eda14cbcSMatt Macy 
135eda14cbcSMatt Macy int
dmu_read_pages(objset_t * os,uint64_t object,vm_page_t * ma,int count,int * rbehind,int * rahead,int last_size)136eda14cbcSMatt Macy dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
137eda14cbcSMatt Macy     int *rbehind, int *rahead, int last_size)
138eda14cbcSMatt Macy {
139eda14cbcSMatt Macy 	struct sf_buf *sf;
140eda14cbcSMatt Macy 	vm_object_t vmobj;
141eda14cbcSMatt Macy 	vm_page_t m;
142eda14cbcSMatt Macy 	dmu_buf_t **dbp;
143eda14cbcSMatt Macy 	dmu_buf_t *db;
144eda14cbcSMatt Macy 	caddr_t va;
145eda14cbcSMatt Macy 	int numbufs, i;
146eda14cbcSMatt Macy 	int bufoff, pgoff, tocpy;
147eda14cbcSMatt Macy 	int mi, di;
148eda14cbcSMatt Macy 	int err;
149eda14cbcSMatt Macy 
150eda14cbcSMatt Macy 	ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
15116038816SMartin Matuska 	ASSERT3S(last_size, <=, PAGE_SIZE);
152eda14cbcSMatt Macy 
153eda14cbcSMatt Macy 	err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
154eda14cbcSMatt Macy 	    IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
155eda14cbcSMatt Macy 	if (err != 0)
156eda14cbcSMatt Macy 		return (err);
157eda14cbcSMatt Macy 
158eda14cbcSMatt Macy #ifdef ZFS_DEBUG
159eda14cbcSMatt Macy 	IMPLY(last_size < PAGE_SIZE, *rahead == 0);
160eda14cbcSMatt Macy 	if (dbp[0]->db_offset != 0 || numbufs > 1) {
161eda14cbcSMatt Macy 		for (i = 0; i < numbufs; i++) {
162eda14cbcSMatt Macy 			ASSERT(ISP2(dbp[i]->db_size));
16316038816SMartin Matuska 			ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0);
164eda14cbcSMatt Macy 			ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
165eda14cbcSMatt Macy 		}
166eda14cbcSMatt Macy 	}
167eda14cbcSMatt Macy #endif
168eda14cbcSMatt Macy 
169eda14cbcSMatt Macy 	vmobj = ma[0]->object;
170eda14cbcSMatt Macy 	zfs_vmobject_wlock_12(vmobj);
171eda14cbcSMatt Macy 
172eda14cbcSMatt Macy 	db = dbp[0];
173eda14cbcSMatt Macy 	for (i = 0; i < *rbehind; i++) {
174eda14cbcSMatt Macy 		m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
175eda14cbcSMatt Macy 		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
176eda14cbcSMatt Macy 		if (m == NULL)
177eda14cbcSMatt Macy 			break;
178eda14cbcSMatt Macy 		if (!vm_page_none_valid(m)) {
179eda14cbcSMatt Macy 			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
180eda14cbcSMatt Macy 			vm_page_do_sunbusy(m);
181eda14cbcSMatt Macy 			break;
182eda14cbcSMatt Macy 		}
18316038816SMartin Matuska 		ASSERT3U(m->dirty, ==, 0);
184eda14cbcSMatt Macy 		ASSERT(!pmap_page_is_write_mapped(m));
185eda14cbcSMatt Macy 
18616038816SMartin Matuska 		ASSERT3U(db->db_size, >, PAGE_SIZE);
187eda14cbcSMatt Macy 		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
188eda14cbcSMatt Macy 		va = zfs_map_page(m, &sf);
189da5137abSMartin Matuska 		memcpy(va, (char *)db->db_data + bufoff, PAGESIZE);
190eda14cbcSMatt Macy 		zfs_unmap_page(sf);
191eda14cbcSMatt Macy 		vm_page_valid(m);
192eda14cbcSMatt Macy 		dmu_page_lock(m);
193eda14cbcSMatt Macy 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
194eda14cbcSMatt Macy 			vm_page_activate(m);
195eda14cbcSMatt Macy 		else
196eda14cbcSMatt Macy 			vm_page_deactivate(m);
197eda14cbcSMatt Macy 		dmu_page_unlock(m);
198eda14cbcSMatt Macy 		vm_page_do_sunbusy(m);
199eda14cbcSMatt Macy 	}
200eda14cbcSMatt Macy 	*rbehind = i;
201eda14cbcSMatt Macy 
202eda14cbcSMatt Macy 	bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
203eda14cbcSMatt Macy 	pgoff = 0;
204eda14cbcSMatt Macy 	for (mi = 0, di = 0; mi < count && di < numbufs; ) {
205eda14cbcSMatt Macy 		if (pgoff == 0) {
206eda14cbcSMatt Macy 			m = ma[mi];
207eda14cbcSMatt Macy 			if (m != bogus_page) {
208eda14cbcSMatt Macy 				vm_page_assert_xbusied(m);
209eda14cbcSMatt Macy 				ASSERT(vm_page_none_valid(m));
21016038816SMartin Matuska 				ASSERT3U(m->dirty, ==, 0);
211eda14cbcSMatt Macy 				ASSERT(!pmap_page_is_write_mapped(m));
212eda14cbcSMatt Macy 				va = zfs_map_page(m, &sf);
213eda14cbcSMatt Macy 			}
214eda14cbcSMatt Macy 		}
215eda14cbcSMatt Macy 		if (bufoff == 0)
216eda14cbcSMatt Macy 			db = dbp[di];
217eda14cbcSMatt Macy 
218eda14cbcSMatt Macy 		if (m != bogus_page) {
219eda14cbcSMatt Macy 			ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
220eda14cbcSMatt Macy 			    db->db_offset + bufoff);
221eda14cbcSMatt Macy 		}
222eda14cbcSMatt Macy 
223eda14cbcSMatt Macy 		/*
224eda14cbcSMatt Macy 		 * We do not need to clamp the copy size by the file
225eda14cbcSMatt Macy 		 * size as the last block is zero-filled beyond the
226eda14cbcSMatt Macy 		 * end of file anyway.
227eda14cbcSMatt Macy 		 */
228eda14cbcSMatt Macy 		tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
22916038816SMartin Matuska 		ASSERT3S(tocpy, >=, 0);
230eda14cbcSMatt Macy 		if (m != bogus_page)
231da5137abSMartin Matuska 			memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy);
232eda14cbcSMatt Macy 
233eda14cbcSMatt Macy 		pgoff += tocpy;
23416038816SMartin Matuska 		ASSERT3S(pgoff, >=, 0);
23516038816SMartin Matuska 		ASSERT3S(pgoff, <=, PAGESIZE);
236eda14cbcSMatt Macy 		if (pgoff == PAGESIZE) {
237eda14cbcSMatt Macy 			if (m != bogus_page) {
238eda14cbcSMatt Macy 				zfs_unmap_page(sf);
239eda14cbcSMatt Macy 				vm_page_valid(m);
240eda14cbcSMatt Macy 			}
24116038816SMartin Matuska 			ASSERT3S(mi, <, count);
242eda14cbcSMatt Macy 			mi++;
243eda14cbcSMatt Macy 			pgoff = 0;
244eda14cbcSMatt Macy 		}
245eda14cbcSMatt Macy 
246eda14cbcSMatt Macy 		bufoff += tocpy;
24716038816SMartin Matuska 		ASSERT3S(bufoff, >=, 0);
24816038816SMartin Matuska 		ASSERT3S(bufoff, <=, db->db_size);
249eda14cbcSMatt Macy 		if (bufoff == db->db_size) {
25016038816SMartin Matuska 			ASSERT3S(di, <, numbufs);
251eda14cbcSMatt Macy 			di++;
252eda14cbcSMatt Macy 			bufoff = 0;
253eda14cbcSMatt Macy 		}
254eda14cbcSMatt Macy 	}
255eda14cbcSMatt Macy 
256eda14cbcSMatt Macy #ifdef ZFS_DEBUG
257eda14cbcSMatt Macy 	/*
258eda14cbcSMatt Macy 	 * Three possibilities:
259eda14cbcSMatt Macy 	 * - last requested page ends at a buffer boundary and , thus,
260eda14cbcSMatt Macy 	 *   all pages and buffers have been iterated;
261eda14cbcSMatt Macy 	 * - all requested pages are filled, but the last buffer
262eda14cbcSMatt Macy 	 *   has not been exhausted;
263eda14cbcSMatt Macy 	 *   the read-ahead is possible only in this case;
264eda14cbcSMatt Macy 	 * - all buffers have been read, but the last page has not been
265eda14cbcSMatt Macy 	 *   fully filled;
266eda14cbcSMatt Macy 	 *   this is only possible if the file has only a single buffer
267eda14cbcSMatt Macy 	 *   with a size that is not a multiple of the page size.
268eda14cbcSMatt Macy 	 */
269eda14cbcSMatt Macy 	if (mi == count) {
27016038816SMartin Matuska 		ASSERT3S(di, >=, numbufs - 1);
271eda14cbcSMatt Macy 		IMPLY(*rahead != 0, di == numbufs - 1);
272eda14cbcSMatt Macy 		IMPLY(*rahead != 0, bufoff != 0);
27316038816SMartin Matuska 		ASSERT0(pgoff);
274eda14cbcSMatt Macy 	}
275eda14cbcSMatt Macy 	if (di == numbufs) {
27616038816SMartin Matuska 		ASSERT3S(mi, >=, count - 1);
27716038816SMartin Matuska 		ASSERT0(*rahead);
278eda14cbcSMatt Macy 		IMPLY(pgoff == 0, mi == count);
279eda14cbcSMatt Macy 		if (pgoff != 0) {
28016038816SMartin Matuska 			ASSERT3S(mi, ==, count - 1);
28116038816SMartin Matuska 			ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0);
282eda14cbcSMatt Macy 		}
283eda14cbcSMatt Macy 	}
284eda14cbcSMatt Macy #endif
285eda14cbcSMatt Macy 	if (pgoff != 0) {
28616038816SMartin Matuska 		ASSERT3P(m, !=, bogus_page);
287da5137abSMartin Matuska 		memset(va + pgoff, 0, PAGESIZE - pgoff);
288eda14cbcSMatt Macy 		zfs_unmap_page(sf);
289eda14cbcSMatt Macy 		vm_page_valid(m);
290eda14cbcSMatt Macy 	}
291eda14cbcSMatt Macy 
292eda14cbcSMatt Macy 	for (i = 0; i < *rahead; i++) {
293eda14cbcSMatt Macy 		m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
294eda14cbcSMatt Macy 		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
295eda14cbcSMatt Macy 		if (m == NULL)
296eda14cbcSMatt Macy 			break;
297eda14cbcSMatt Macy 		if (!vm_page_none_valid(m)) {
298eda14cbcSMatt Macy 			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
299eda14cbcSMatt Macy 			vm_page_do_sunbusy(m);
300eda14cbcSMatt Macy 			break;
301eda14cbcSMatt Macy 		}
30216038816SMartin Matuska 		ASSERT3U(m->dirty, ==, 0);
303caed7b1cSMartin Matuska 		ASSERT(!pmap_page_is_write_mapped(m));
304eda14cbcSMatt Macy 
30516038816SMartin Matuska 		ASSERT3U(db->db_size, >, PAGE_SIZE);
306eda14cbcSMatt Macy 		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
307eda14cbcSMatt Macy 		tocpy = MIN(db->db_size - bufoff, PAGESIZE);
308eda14cbcSMatt Macy 		va = zfs_map_page(m, &sf);
309da5137abSMartin Matuska 		memcpy(va, (char *)db->db_data + bufoff, tocpy);
310eda14cbcSMatt Macy 		if (tocpy < PAGESIZE) {
31116038816SMartin Matuska 			ASSERT3S(i, ==, *rahead - 1);
31216038816SMartin Matuska 			ASSERT3U((db->db_size & PAGE_MASK), !=, 0);
313da5137abSMartin Matuska 			memset(va + tocpy, 0, PAGESIZE - tocpy);
314eda14cbcSMatt Macy 		}
315eda14cbcSMatt Macy 		zfs_unmap_page(sf);
316eda14cbcSMatt Macy 		vm_page_valid(m);
317eda14cbcSMatt Macy 		dmu_page_lock(m);
318eda14cbcSMatt Macy 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
319eda14cbcSMatt Macy 			vm_page_activate(m);
320eda14cbcSMatt Macy 		else
321eda14cbcSMatt Macy 			vm_page_deactivate(m);
322eda14cbcSMatt Macy 		dmu_page_unlock(m);
323eda14cbcSMatt Macy 		vm_page_do_sunbusy(m);
324eda14cbcSMatt Macy 	}
325eda14cbcSMatt Macy 	*rahead = i;
326eda14cbcSMatt Macy 	zfs_vmobject_wunlock_12(vmobj);
327eda14cbcSMatt Macy 
328eda14cbcSMatt Macy 	dmu_buf_rele_array(dbp, numbufs, FTAG);
329eda14cbcSMatt Macy 	return (0);
330eda14cbcSMatt Macy }
331