1 /*
2  * Copyright (c) 2020 iXsystems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/dmu.h>
31 #include <sys/dmu_impl.h>
32 #include <sys/dmu_tx.h>
33 #include <sys/dbuf.h>
34 #include <sys/dnode.h>
35 #include <sys/zfs_context.h>
36 #include <sys/dmu_objset.h>
37 #include <sys/dmu_traverse.h>
38 #include <sys/dsl_dataset.h>
39 #include <sys/dsl_dir.h>
40 #include <sys/dsl_pool.h>
41 #include <sys/dsl_synctask.h>
42 #include <sys/dsl_prop.h>
43 #include <sys/dmu_zfetch.h>
44 #include <sys/zfs_ioctl.h>
45 #include <sys/zap.h>
46 #include <sys/zio_checksum.h>
47 #include <sys/zio_compress.h>
48 #include <sys/sa.h>
49 #include <sys/zfeature.h>
50 #include <sys/abd.h>
51 #include <sys/zfs_rlock.h>
52 #include <sys/racct.h>
53 #include <sys/vm.h>
54 #include <sys/zfs_znode.h>
55 #include <sys/zfs_vnops.h>
56 
57 #include <sys/ccompat.h>
58 
59 #ifndef IDX_TO_OFF
60 #define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
61 #endif
62 
63 #if  __FreeBSD_version < 1300051
64 #define	VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
65 #else
66 #define	VM_ALLOC_BUSY_FLAGS  VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
67 #endif
68 
69 
70 #if __FreeBSD_version < 1300072
71 #define	dmu_page_lock(m)	vm_page_lock(m)
72 #define	dmu_page_unlock(m)	vm_page_unlock(m)
73 #else
74 #define	dmu_page_lock(m)
75 #define	dmu_page_unlock(m)
76 #endif
77 
78 int
dmu_write_pages(objset_t * os,uint64_t object,uint64_t offset,uint64_t size,vm_page_t * ma,dmu_tx_t * tx)79 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
80     vm_page_t *ma, dmu_tx_t *tx)
81 {
82 	dmu_buf_t **dbp;
83 	struct sf_buf *sf;
84 	int numbufs, i;
85 	int err;
86 
87 	if (size == 0)
88 		return (0);
89 
90 	err = dmu_buf_hold_array(os, object, offset, size,
91 	    FALSE, FTAG, &numbufs, &dbp);
92 	if (err)
93 		return (err);
94 
95 	for (i = 0; i < numbufs; i++) {
96 		int tocpy, copied, thiscpy;
97 		int bufoff;
98 		dmu_buf_t *db = dbp[i];
99 		caddr_t va;
100 
101 		ASSERT3U(size, >, 0);
102 		ASSERT3U(db->db_size, >=, PAGESIZE);
103 
104 		bufoff = offset - db->db_offset;
105 		tocpy = (int)MIN(db->db_size - bufoff, size);
106 
107 		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
108 
109 		if (tocpy == db->db_size)
110 			dmu_buf_will_fill(db, tx, B_FALSE);
111 		else
112 			dmu_buf_will_dirty(db, tx);
113 
114 		for (copied = 0; copied < tocpy; copied += PAGESIZE) {
115 			ASSERT3U(ptoa((*ma)->pindex), ==,
116 			    db->db_offset + bufoff);
117 			thiscpy = MIN(PAGESIZE, tocpy - copied);
118 			va = zfs_map_page(*ma, &sf);
119 			memcpy((char *)db->db_data + bufoff, va, thiscpy);
120 			zfs_unmap_page(sf);
121 			ma += 1;
122 			bufoff += PAGESIZE;
123 		}
124 
125 		if (tocpy == db->db_size)
126 			dmu_buf_fill_done(db, tx, B_FALSE);
127 
128 		offset += tocpy;
129 		size -= tocpy;
130 	}
131 	dmu_buf_rele_array(dbp, numbufs, FTAG);
132 	return (err);
133 }
134 
135 int
dmu_read_pages(objset_t * os,uint64_t object,vm_page_t * ma,int count,int * rbehind,int * rahead,int last_size)136 dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
137     int *rbehind, int *rahead, int last_size)
138 {
139 	struct sf_buf *sf;
140 	vm_object_t vmobj;
141 	vm_page_t m;
142 	dmu_buf_t **dbp;
143 	dmu_buf_t *db;
144 	caddr_t va;
145 	int numbufs, i;
146 	int bufoff, pgoff, tocpy;
147 	int mi, di;
148 	int err;
149 
150 	ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
151 	ASSERT3S(last_size, <=, PAGE_SIZE);
152 
153 	err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
154 	    IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
155 	if (err != 0)
156 		return (err);
157 
158 #ifdef ZFS_DEBUG
159 	IMPLY(last_size < PAGE_SIZE, *rahead == 0);
160 	if (dbp[0]->db_offset != 0 || numbufs > 1) {
161 		for (i = 0; i < numbufs; i++) {
162 			ASSERT(ISP2(dbp[i]->db_size));
163 			ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0);
164 			ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
165 		}
166 	}
167 #endif
168 
169 	vmobj = ma[0]->object;
170 	zfs_vmobject_wlock_12(vmobj);
171 
172 	db = dbp[0];
173 	for (i = 0; i < *rbehind; i++) {
174 		m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
175 		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
176 		if (m == NULL)
177 			break;
178 		if (!vm_page_none_valid(m)) {
179 			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
180 			vm_page_do_sunbusy(m);
181 			break;
182 		}
183 		ASSERT3U(m->dirty, ==, 0);
184 		ASSERT(!pmap_page_is_write_mapped(m));
185 
186 		ASSERT3U(db->db_size, >, PAGE_SIZE);
187 		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
188 		va = zfs_map_page(m, &sf);
189 		memcpy(va, (char *)db->db_data + bufoff, PAGESIZE);
190 		zfs_unmap_page(sf);
191 		vm_page_valid(m);
192 		dmu_page_lock(m);
193 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
194 			vm_page_activate(m);
195 		else
196 			vm_page_deactivate(m);
197 		dmu_page_unlock(m);
198 		vm_page_do_sunbusy(m);
199 	}
200 	*rbehind = i;
201 
202 	bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
203 	pgoff = 0;
204 	for (mi = 0, di = 0; mi < count && di < numbufs; ) {
205 		if (pgoff == 0) {
206 			m = ma[mi];
207 			if (m != bogus_page) {
208 				vm_page_assert_xbusied(m);
209 				ASSERT(vm_page_none_valid(m));
210 				ASSERT3U(m->dirty, ==, 0);
211 				ASSERT(!pmap_page_is_write_mapped(m));
212 				va = zfs_map_page(m, &sf);
213 			}
214 		}
215 		if (bufoff == 0)
216 			db = dbp[di];
217 
218 		if (m != bogus_page) {
219 			ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
220 			    db->db_offset + bufoff);
221 		}
222 
223 		/*
224 		 * We do not need to clamp the copy size by the file
225 		 * size as the last block is zero-filled beyond the
226 		 * end of file anyway.
227 		 */
228 		tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
229 		ASSERT3S(tocpy, >=, 0);
230 		if (m != bogus_page)
231 			memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy);
232 
233 		pgoff += tocpy;
234 		ASSERT3S(pgoff, >=, 0);
235 		ASSERT3S(pgoff, <=, PAGESIZE);
236 		if (pgoff == PAGESIZE) {
237 			if (m != bogus_page) {
238 				zfs_unmap_page(sf);
239 				vm_page_valid(m);
240 			}
241 			ASSERT3S(mi, <, count);
242 			mi++;
243 			pgoff = 0;
244 		}
245 
246 		bufoff += tocpy;
247 		ASSERT3S(bufoff, >=, 0);
248 		ASSERT3S(bufoff, <=, db->db_size);
249 		if (bufoff == db->db_size) {
250 			ASSERT3S(di, <, numbufs);
251 			di++;
252 			bufoff = 0;
253 		}
254 	}
255 
256 #ifdef ZFS_DEBUG
257 	/*
258 	 * Three possibilities:
259 	 * - last requested page ends at a buffer boundary and , thus,
260 	 *   all pages and buffers have been iterated;
261 	 * - all requested pages are filled, but the last buffer
262 	 *   has not been exhausted;
263 	 *   the read-ahead is possible only in this case;
264 	 * - all buffers have been read, but the last page has not been
265 	 *   fully filled;
266 	 *   this is only possible if the file has only a single buffer
267 	 *   with a size that is not a multiple of the page size.
268 	 */
269 	if (mi == count) {
270 		ASSERT3S(di, >=, numbufs - 1);
271 		IMPLY(*rahead != 0, di == numbufs - 1);
272 		IMPLY(*rahead != 0, bufoff != 0);
273 		ASSERT0(pgoff);
274 	}
275 	if (di == numbufs) {
276 		ASSERT3S(mi, >=, count - 1);
277 		ASSERT0(*rahead);
278 		IMPLY(pgoff == 0, mi == count);
279 		if (pgoff != 0) {
280 			ASSERT3S(mi, ==, count - 1);
281 			ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0);
282 		}
283 	}
284 #endif
285 	if (pgoff != 0) {
286 		ASSERT3P(m, !=, bogus_page);
287 		memset(va + pgoff, 0, PAGESIZE - pgoff);
288 		zfs_unmap_page(sf);
289 		vm_page_valid(m);
290 	}
291 
292 	for (i = 0; i < *rahead; i++) {
293 		m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
294 		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
295 		if (m == NULL)
296 			break;
297 		if (!vm_page_none_valid(m)) {
298 			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
299 			vm_page_do_sunbusy(m);
300 			break;
301 		}
302 		ASSERT3U(m->dirty, ==, 0);
303 		ASSERT(!pmap_page_is_write_mapped(m));
304 
305 		ASSERT3U(db->db_size, >, PAGE_SIZE);
306 		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
307 		tocpy = MIN(db->db_size - bufoff, PAGESIZE);
308 		va = zfs_map_page(m, &sf);
309 		memcpy(va, (char *)db->db_data + bufoff, tocpy);
310 		if (tocpy < PAGESIZE) {
311 			ASSERT3S(i, ==, *rahead - 1);
312 			ASSERT3U((db->db_size & PAGE_MASK), !=, 0);
313 			memset(va + tocpy, 0, PAGESIZE - tocpy);
314 		}
315 		zfs_unmap_page(sf);
316 		vm_page_valid(m);
317 		dmu_page_lock(m);
318 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
319 			vm_page_activate(m);
320 		else
321 			vm_page_deactivate(m);
322 		dmu_page_unlock(m);
323 		vm_page_do_sunbusy(m);
324 	}
325 	*rahead = i;
326 	zfs_vmobject_wunlock_12(vmobj);
327 
328 	dmu_buf_rele_array(dbp, numbufs, FTAG);
329 	return (0);
330 }
331