1 /*
2  * Copyright (c) 2020 iXsystems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 #include <sys/proc.h>
40 #include <sys/errno.h>
41 #include <sys/uio.h>
42 #include <sys/buf.h>
43 #include <sys/file.h>
44 #include <sys/kmem.h>
45 #include <sys/conf.h>
46 #include <sys/cmn_err.h>
47 #include <sys/stat.h>
48 #include <sys/zfs_ioctl.h>
49 #include <sys/zfs_vfsops.h>
50 #include <sys/zfs_znode.h>
51 #include <sys/zap.h>
52 #include <sys/spa.h>
53 #include <sys/spa_impl.h>
54 #include <sys/vdev.h>
55 #include <sys/vdev_impl.h>
56 #include <sys/dmu.h>
57 #include <sys/dsl_dir.h>
58 #include <sys/dsl_dataset.h>
59 #include <sys/dsl_prop.h>
60 #include <sys/dsl_deleg.h>
61 #include <sys/dmu_objset.h>
62 #include <sys/dmu_impl.h>
63 #include <sys/dmu_tx.h>
64 #include <sys/sunddi.h>
65 #include <sys/policy.h>
66 #include <sys/zone.h>
67 #include <sys/nvpair.h>
68 #include <sys/mount.h>
69 #include <sys/taskqueue.h>
70 #include <sys/sdt.h>
71 #include <sys/fs/zfs.h>
72 #include <sys/zfs_ctldir.h>
73 #include <sys/zfs_dir.h>
74 #include <sys/zfs_onexit.h>
75 #include <sys/zvol.h>
76 #include <sys/dsl_scan.h>
77 #include <sys/dmu_objset.h>
78 #include <sys/dmu_send.h>
79 #include <sys/dsl_destroy.h>
80 #include <sys/dsl_bookmark.h>
81 #include <sys/dsl_userhold.h>
82 #include <sys/zfeature.h>
83 #include <sys/zcp.h>
84 #include <sys/zio_checksum.h>
85 #include <sys/vdev_removal.h>
86 #include <sys/dsl_crypt.h>
87 
88 #include <sys/zfs_ioctl_compat.h>
89 #include <sys/zfs_context.h>
90 
91 #include <sys/arc_impl.h>
92 #include <sys/dsl_pool.h>
93 
94 
95 /* BEGIN CSTYLED */
96 SYSCTL_DECL(_vfs_zfs);
97 SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0, "ZFS adaptive replacement cache");
98 SYSCTL_NODE(_vfs_zfs, OID_AUTO, condense, CTLFLAG_RW, 0, "ZFS condense");
99 SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf, CTLFLAG_RW, 0, "ZFS disk buf cache");
100 SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf_cache, CTLFLAG_RW, 0, "ZFS disk buf cache");
101 SYSCTL_NODE(_vfs_zfs, OID_AUTO, deadman, CTLFLAG_RW, 0, "ZFS deadman");
102 SYSCTL_NODE(_vfs_zfs, OID_AUTO, dedup, CTLFLAG_RW, 0, "ZFS dedup");
103 SYSCTL_NODE(_vfs_zfs, OID_AUTO, l2arc, CTLFLAG_RW, 0, "ZFS l2arc");
104 SYSCTL_NODE(_vfs_zfs, OID_AUTO, livelist, CTLFLAG_RW, 0, "ZFS livelist");
105 SYSCTL_NODE(_vfs_zfs, OID_AUTO, lua, CTLFLAG_RW, 0, "ZFS lua");
106 SYSCTL_NODE(_vfs_zfs, OID_AUTO, metaslab, CTLFLAG_RW, 0, "ZFS metaslab");
107 SYSCTL_NODE(_vfs_zfs, OID_AUTO, mg, CTLFLAG_RW, 0, "ZFS metaslab group");
108 SYSCTL_NODE(_vfs_zfs, OID_AUTO, multihost, CTLFLAG_RW, 0, "ZFS multihost protection");
109 SYSCTL_NODE(_vfs_zfs, OID_AUTO, prefetch, CTLFLAG_RW, 0, "ZFS prefetch");
110 SYSCTL_NODE(_vfs_zfs, OID_AUTO, reconstruct, CTLFLAG_RW, 0, "ZFS reconstruct");
111 SYSCTL_NODE(_vfs_zfs, OID_AUTO, recv, CTLFLAG_RW, 0, "ZFS receive");
112 SYSCTL_NODE(_vfs_zfs, OID_AUTO, send, CTLFLAG_RW, 0, "ZFS send");
113 SYSCTL_NODE(_vfs_zfs, OID_AUTO, spa, CTLFLAG_RW, 0, "ZFS space allocation");
114 SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RW, 0, "ZFS TRIM");
115 SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, "ZFS transaction group");
116 SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
117 SYSCTL_NODE(_vfs_zfs, OID_AUTO, vnops, CTLFLAG_RW, 0, "ZFS VNOPS");
118 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zevent, CTLFLAG_RW, 0, "ZFS event");
119 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zil, CTLFLAG_RW, 0, "ZFS ZIL");
120 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
121 
122 SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0,
123     "ZFS livelist condense");
124 SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
125 SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, file, CTLFLAG_RW, 0, "ZFS VDEV file");
126 SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
127     "ZFS VDEV mirror");
128 
129 SYSCTL_DECL(_vfs_zfs_version);
130 SYSCTL_CONST_STRING(_vfs_zfs_version, OID_AUTO, module, CTLFLAG_RD,
131     (ZFS_META_VERSION "-" ZFS_META_RELEASE), "OpenZFS module version");
132 
133 extern arc_state_t ARC_anon;
134 extern arc_state_t ARC_mru;
135 extern arc_state_t ARC_mru_ghost;
136 extern arc_state_t ARC_mfu;
137 extern arc_state_t ARC_mfu_ghost;
138 extern arc_state_t ARC_l2c_only;
139 
140 /*
141  * minimum lifespan of a prefetch block in clock ticks
142  * (initialized in arc_init())
143  */
144 
145 /* arc.c */
146 
147 int
148 param_set_arc_max(SYSCTL_HANDLER_ARGS)
149 {
150 	uint64_t val;
151 	int err;
152 
153 	val = zfs_arc_max;
154 	err = sysctl_handle_long(oidp, &val, 0, req);
155 	if (err != 0 || req->newptr == NULL)
156 		return (SET_ERROR(err));
157 
158 	if (val != 0 && (val < MIN_ARC_MAX || val <= arc_c_min ||
159 	    val >= arc_all_memory()))
160 		return (SET_ERROR(EINVAL));
161 
162 	zfs_arc_max = val;
163 	arc_tuning_update(B_TRUE);
164 
165 	/* Update the sysctl to the tuned value */
166 	if (val != 0)
167 		zfs_arc_max = arc_c_max;
168 
169 	return (0);
170 }
171 
172 int
173 param_set_arc_min(SYSCTL_HANDLER_ARGS)
174 {
175 	uint64_t val;
176 	int err;
177 
178 	val = zfs_arc_min;
179 	err = sysctl_handle_64(oidp, &val, 0, req);
180 	if (err != 0 || req->newptr == NULL)
181 		return (SET_ERROR(err));
182 
183 	if (val != 0 && (val < 2ULL << SPA_MAXBLOCKSHIFT || val > arc_c_max))
184 		return (SET_ERROR(EINVAL));
185 
186 	zfs_arc_min = val;
187 	arc_tuning_update(B_TRUE);
188 
189 	/* Update the sysctl to the tuned value */
190 	if (val != 0)
191 		zfs_arc_min = arc_c_min;
192 
193 	return (0);
194 }
195 
196 /* legacy compat */
197 extern uint64_t l2arc_write_max;	/* def max write size */
198 extern uint64_t l2arc_write_boost;	/* extra warmup write */
199 extern uint64_t l2arc_headroom;		/* # of dev writes */
200 extern uint64_t l2arc_headroom_boost;
201 extern uint64_t l2arc_feed_secs;	/* interval seconds */
202 extern uint64_t l2arc_feed_min_ms;	/* min interval msecs */
203 extern int l2arc_noprefetch;			/* don't cache prefetch bufs */
204 extern int l2arc_feed_again;			/* turbo warmup */
205 extern int l2arc_norw;			/* no reads during writes */
206 
207 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,
208     &l2arc_write_max, 0, "max write size (LEGACY)");
209 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,
210     &l2arc_write_boost, 0, "extra write during warmup (LEGACY)");
211 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,
212     &l2arc_headroom, 0, "number of dev writes (LEGACY)");
213 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,
214     &l2arc_feed_secs, 0, "interval seconds (LEGACY)");
215 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,
216     &l2arc_feed_min_ms, 0, "min interval milliseconds (LEGACY)");
217 
218 SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,
219     &l2arc_noprefetch, 0, "don't cache prefetch bufs (LEGACY)");
220 SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,
221     &l2arc_feed_again, 0, "turbo warmup (LEGACY)");
222 SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
223     &l2arc_norw, 0, "no reads during writes (LEGACY)");
224 #if 0
225 extern int zfs_compressed_arc_enabled;
226 SYSCTL_INT(_vfs_zfs, OID_AUTO, compressed_arc_enabled, CTLFLAG_RW,
227     &zfs_compressed_arc_enabled, 1, "compressed arc buffers (LEGACY)");
228 #endif
229 
230 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
231     &ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
232 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
233     &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
234     "size of anonymous state");
235 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
236     &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
237     "size of anonymous state");
238 
239 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
240     &ARC_mru.arcs_size.rc_count, 0, "size of mru state");
241 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
242     &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
243     "size of metadata in mru state");
244 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
245     &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
246     "size of data in mru state");
247 
248 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
249     &ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
250 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
251     &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
252     "size of metadata in mru ghost state");
253 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
254     &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
255     "size of data in mru ghost state");
256 
257 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
258     &ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
259 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
260     &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
261     "size of metadata in mfu state");
262 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
263     &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
264     "size of data in mfu state");
265 
266 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
267     &ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
268 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
269     &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
270     "size of metadata in mfu ghost state");
271 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
272     &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
273     "size of data in mfu ghost state");
274 
275 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
276     &ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
277 
278 static int
279 sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
280 {
281 	int err, val;
282 
283 	val = arc_no_grow_shift;
284 	err = sysctl_handle_int(oidp, &val, 0, req);
285 	if (err != 0 || req->newptr == NULL)
286 		return (err);
287 
288         if (val < 0 || val >= arc_shrink_shift)
289 		return (EINVAL);
290 
291 	arc_no_grow_shift = val;
292 	return (0);
293 }
294 
295 SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
296     CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, sizeof (int),
297     sysctl_vfs_zfs_arc_no_grow_shift, "I",
298     "log2(fraction of ARC which must be free to allow growing)");
299 
300 int
301 param_set_arc_long(SYSCTL_HANDLER_ARGS)
302 {
303 	int err;
304 
305 	err = sysctl_handle_long(oidp, arg1, 0, req);
306 	if (err != 0 || req->newptr == NULL)
307 		return (err);
308 
309 	arc_tuning_update(B_TRUE);
310 
311 	return (0);
312 }
313 
314 int
315 param_set_arc_int(SYSCTL_HANDLER_ARGS)
316 {
317 	int err;
318 
319 	err = sysctl_handle_int(oidp, arg1, 0, req);
320 	if (err != 0 || req->newptr == NULL)
321 		return (err);
322 
323 	arc_tuning_update(B_TRUE);
324 
325 	return (0);
326 }
327 
328 SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
329     CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
330     &zfs_arc_min, sizeof (zfs_arc_min), param_set_arc_min, "LU",
331     "min arc size (LEGACY)");
332 SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
333     CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
334     &zfs_arc_max, sizeof (zfs_arc_max), param_set_arc_max, "LU",
335     "max arc size (LEGACY)");
336 
337 /* dbuf.c */
338 
339 
340 /* dmu.c */
341 
342 /* dmu_zfetch.c */
343 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
344 
345 /* max bytes to prefetch per stream (default 8MB) */
346 extern uint32_t	zfetch_max_distance;
347 SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN,
348     &zfetch_max_distance, 0, "Max bytes to prefetch per stream (LEGACY)");
349 
350 /* max bytes to prefetch indirects for per stream (default 64MB) */
351 extern uint32_t	zfetch_max_idistance;
352 SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN,
353     &zfetch_max_idistance, 0,
354     "Max bytes to prefetch indirects for per stream (LEGACY)");
355 
356 /* dsl_pool.c */
357 
358 /* dnode.c */
359 extern int zfs_default_bs;
360 SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN,
361     &zfs_default_bs, 0, "Default dnode block shift");
362 
363 extern int zfs_default_ibs;
364 SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
365     &zfs_default_ibs, 0, "Default dnode indirect block shift");
366 
367 
368 /* dsl_scan.c */
369 
370 /* metaslab.c */
371 
372 /*
373  * In pools where the log space map feature is not enabled we touch
374  * multiple metaslabs (and their respective space maps) with each
375  * transaction group. Thus, we benefit from having a small space map
376  * block size since it allows us to issue more I/O operations scattered
377  * around the disk. So a sane default for the space map block size
378  * is 8~16K.
379  */
380 extern int zfs_metaslab_sm_blksz_no_log;
381 SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN,
382     &zfs_metaslab_sm_blksz_no_log, 0,
383     "Block size for space map in pools with log space map disabled.  "
384     "Power of 2 and greater than 4096.");
385 
386 /*
387  * When the log space map feature is enabled, we accumulate a lot of
388  * changes per metaslab that are flushed once in a while so we benefit
389  * from a bigger block size like 128K for the metaslab space maps.
390  */
391 extern int zfs_metaslab_sm_blksz_with_log;
392 SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN,
393     &zfs_metaslab_sm_blksz_with_log, 0,
394     "Block size for space map in pools with log space map enabled.  "
395     "Power of 2 and greater than 4096.");
396 
397 /*
398  * The in-core space map representation is more compact than its on-disk form.
399  * The zfs_condense_pct determines how much more compact the in-core
400  * space map representation must be before we compact it on-disk.
401  * Values should be greater than or equal to 100.
402  */
403 extern int zfs_condense_pct;
404 SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_pct, CTLFLAG_RWTUN,
405     &zfs_condense_pct, 0,
406     "Condense on-disk spacemap when it is more than this many percents"
407     " of in-memory counterpart");
408 
409 extern int zfs_remove_max_segment;
410 SYSCTL_INT(_vfs_zfs, OID_AUTO, remove_max_segment, CTLFLAG_RWTUN,
411     &zfs_remove_max_segment, 0, "Largest contiguous segment ZFS will attempt to"
412     " allocate when removing a device");
413 
414 extern int zfs_removal_suspend_progress;
415 SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN,
416     &zfs_removal_suspend_progress, 0, "Ensures certain actions can happen while"
417     " in the middle of a removal");
418 
419 
420 /*
421  * Minimum size which forces the dynamic allocator to change
422  * it's allocation strategy.  Once the space map cannot satisfy
423  * an allocation of this size then it switches to using more
424  * aggressive strategy (i.e search by size rather than offset).
425  */
426 extern uint64_t metaslab_df_alloc_threshold;
427 SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN,
428     &metaslab_df_alloc_threshold, 0,
429     "Minimum size which forces the dynamic allocator to change it's allocation strategy");
430 
431 /*
432  * The minimum free space, in percent, which must be available
433  * in a space map to continue allocations in a first-fit fashion.
434  * Once the space map's free space drops below this level we dynamically
435  * switch to using best-fit allocations.
436  */
437 extern int metaslab_df_free_pct;
438 SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, CTLFLAG_RWTUN,
439     &metaslab_df_free_pct, 0,
440     "The minimum free space, in percent, which must be available in a "
441     "space map to continue allocations in a first-fit fashion");
442 
443 /*
444  * Percentage of all cpus that can be used by the metaslab taskq.
445  */
446 extern int metaslab_load_pct;
447 SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct, CTLFLAG_RWTUN,
448     &metaslab_load_pct, 0,
449     "Percentage of cpus that can be used by the metaslab taskq");
450 
451 /*
452  * Max number of metaslabs per group to preload.
453  */
454 extern int metaslab_preload_limit;
455 SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
456     &metaslab_preload_limit, 0,
457     "Max number of metaslabs per group to preload");
458 
459 /* spa.c */
460 extern int zfs_ccw_retry_interval;
461 SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,
462     &zfs_ccw_retry_interval, 0,
463     "Configuration cache file write, retry after failure, interval (seconds)");
464 
465 extern uint64_t zfs_max_missing_tvds_cachefile;
466 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN,
467     &zfs_max_missing_tvds_cachefile, 0,
468     "allow importing pools with missing top-level vdevs in cache file");
469 
470 extern uint64_t zfs_max_missing_tvds_scan;
471 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN,
472     &zfs_max_missing_tvds_scan, 0,
473     "allow importing pools with missing top-level vdevs during scan");
474 
475 /* spa_misc.c */
476 extern int zfs_flags;
477 static int
478 sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS)
479 {
480 	int err, val;
481 
482 	val = zfs_flags;
483 	err = sysctl_handle_int(oidp, &val, 0, req);
484 	if (err != 0 || req->newptr == NULL)
485 		return (err);
486 
487 	/*
488 	 * ZFS_DEBUG_MODIFY must be enabled prior to boot so all
489 	 * arc buffers in the system have the necessary additional
490 	 * checksum data.  However, it is safe to disable at any
491 	 * time.
492 	 */
493 	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
494 		val &= ~ZFS_DEBUG_MODIFY;
495 	zfs_flags = val;
496 
497 	return (0);
498 }
499 
500 SYSCTL_PROC(_vfs_zfs, OID_AUTO, debugflags,
501     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, NULL, 0,
502     sysctl_vfs_zfs_debug_flags, "IU", "Debug flags for ZFS testing.");
503 
504 int
505 param_set_deadman_synctime(SYSCTL_HANDLER_ARGS)
506 {
507 	unsigned long val;
508 	int err;
509 
510 	val = zfs_deadman_synctime_ms;
511 	err = sysctl_handle_long(oidp, &val, 0, req);
512 	if (err != 0 || req->newptr == NULL)
513 		return (err);
514 	zfs_deadman_synctime_ms = val;
515 
516 	spa_set_deadman_synctime(MSEC2NSEC(zfs_deadman_synctime_ms));
517 
518 	return (0);
519 }
520 
521 int
522 param_set_deadman_ziotime(SYSCTL_HANDLER_ARGS)
523 {
524 	unsigned long val;
525 	int err;
526 
527 	val = zfs_deadman_ziotime_ms;
528 	err = sysctl_handle_long(oidp, &val, 0, req);
529 	if (err != 0 || req->newptr == NULL)
530 		return (err);
531 	zfs_deadman_ziotime_ms = val;
532 
533 	spa_set_deadman_ziotime(MSEC2NSEC(zfs_deadman_synctime_ms));
534 
535 	return (0);
536 }
537 
538 int
539 param_set_deadman_failmode(SYSCTL_HANDLER_ARGS)
540 {
541 	char buf[16];
542 	int rc;
543 
544 	if (req->newptr == NULL)
545 		strlcpy(buf, zfs_deadman_failmode, sizeof (buf));
546 
547 	rc = sysctl_handle_string(oidp, buf, sizeof (buf), req);
548 	if (rc || req->newptr == NULL)
549 		return (rc);
550 	if (strcmp(buf, zfs_deadman_failmode) == 0)
551 		return (0);
552 	if (!strcmp(buf,  "wait"))
553 		zfs_deadman_failmode = "wait";
554 	if (!strcmp(buf,  "continue"))
555 		zfs_deadman_failmode = "continue";
556 	if (!strcmp(buf,  "panic"))
557 		zfs_deadman_failmode = "panic";
558 
559 	return (-param_set_deadman_failmode_common(buf));
560 }
561 
562 
563 /* spacemap.c */
564 extern int space_map_ibs;
565 SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
566     &space_map_ibs, 0, "Space map indirect block shift");
567 
568 
569 /* vdev.c */
570 int
571 param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
572 {
573 	uint64_t val;
574 	int err;
575 
576 	val = zfs_vdev_min_auto_ashift;
577 	err = sysctl_handle_64(oidp, &val, 0, req);
578 	if (err != 0 || req->newptr == NULL)
579 		return (SET_ERROR(err));
580 
581 	if (val < ASHIFT_MIN || val > zfs_vdev_max_auto_ashift)
582 		return (SET_ERROR(EINVAL));
583 
584 	zfs_vdev_min_auto_ashift = val;
585 
586 	return (0);
587 }
588 
589 int
590 param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
591 {
592 	uint64_t val;
593 	int err;
594 
595 	val = zfs_vdev_max_auto_ashift;
596 	err = sysctl_handle_64(oidp, &val, 0, req);
597 	if (err != 0 || req->newptr == NULL)
598 		return (SET_ERROR(err));
599 
600 	if (val > ASHIFT_MAX || val < zfs_vdev_min_auto_ashift)
601 		return (SET_ERROR(EINVAL));
602 
603 	zfs_vdev_max_auto_ashift = val;
604 
605 	return (0);
606 }
607 
608 SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
609     CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
610     &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
611     param_set_min_auto_ashift, "QU",
612     "Min ashift used when creating new top-level vdev. (LEGACY)");
613 SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
614     CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
615     &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
616     param_set_max_auto_ashift, "QU",
617     "Max ashift used when optimizing for logical -> physical sector size on "
618     "new top-level vdevs. (LEGACY)");
619 
620 /*
621  * Since the DTL space map of a vdev is not expected to have a lot of
622  * entries, we default its block size to 4K.
623  */
624 extern int zfs_vdev_dtl_sm_blksz;
625 SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN,
626     &zfs_vdev_dtl_sm_blksz, 0,
627     "Block size for DTL space map.  Power of 2 and greater than 4096.");
628 
629 /*
630  * vdev-wide space maps that have lots of entries written to them at
631  * the end of each transaction can benefit from a higher I/O bandwidth
632  * (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
633  */
634 extern int zfs_vdev_standard_sm_blksz;
635 SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN,
636     &zfs_vdev_standard_sm_blksz, 0,
637     "Block size for standard space map.  Power of 2 and greater than 4096.");
638 
639 extern int vdev_validate_skip;
640 SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, CTLFLAG_RDTUN,
641     &vdev_validate_skip, 0,
642     "Enable to bypass vdev_validate().");
643 
644 
645 /* vdev_cache.c */
646 
647 /* vdev_mirror.c */
648 /*
649  * The load configuration settings below are tuned by default for
650  * the case where all devices are of the same rotational type.
651  *
652  * If there is a mixture of rotating and non-rotating media, setting
653  * non_rotating_seek_inc to 0 may well provide better results as it
654  * will direct more reads to the non-rotating vdevs which are more
655  * likely to have a higher performance.
656  */
657 
658 
659 /* vdev_queue.c */
660 #define	ZFS_VDEV_QUEUE_KNOB_MIN(name)					\
661 extern uint32_t zfs_vdev_ ## name ## _min_active;				\
662 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\
663     &zfs_vdev_ ## name ## _min_active, 0,				\
664     "Initial number of I/O requests of type " #name			\
665     " active for each device");
666 
667 #define	ZFS_VDEV_QUEUE_KNOB_MAX(name)					\
668 extern uint32_t zfs_vdev_ ## name ## _max_active;				\
669 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN, \
670     &zfs_vdev_ ## name ## _max_active, 0,				\
671     "Maximum number of I/O requests of type " #name			\
672     " active for each device");
673 
674 
675 #undef ZFS_VDEV_QUEUE_KNOB
676 
677 extern uint32_t zfs_vdev_max_active;
678 SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN,
679     &zfs_vdev_max_active, 0,
680     "The maximum number of I/Os of all types active for each device. (LEGACY)");
681 
682 extern int zfs_vdev_def_queue_depth;
683 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth, CTLFLAG_RWTUN,
684     &zfs_vdev_def_queue_depth, 0,
685     "Default queue depth for each allocator");
686 
687 /*extern uint64_t zfs_multihost_history;
688 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, multihost_history, CTLFLAG_RWTUN,
689     &zfs_multihost_history, 0,
690     "Historical staticists for the last N multihost updates");*/
691 
692 #ifdef notyet
693 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, trim_on_init, CTLFLAG_RW,
694     &vdev_trim_on_init, 0, "Enable/disable full vdev trim on initialisation");
695 #endif
696 
697 
698 /* zio.c */
699 #if defined(__LP64__)
700 int zio_use_uma = 1;
701 #else
702 int zio_use_uma = 0;
703 #endif
704 
705 SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, use_uma, CTLFLAG_RDTUN, &zio_use_uma, 0,
706     "Use uma(9) for ZIO allocations");
707 SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, CTLFLAG_RDTUN, &zio_exclude_metadata, 0,
708     "Exclude metadata buffers from dumps as well");
709 
710 int
711 param_set_slop_shift(SYSCTL_HANDLER_ARGS)
712 {
713 	int val;
714 	int err;
715 
716 	val = *(int *)arg1;
717 
718 	err = sysctl_handle_int(oidp, &val, 0, req);
719 	if (err != 0 || req->newptr == NULL)
720 		return (err);
721 
722 	if (val < 1 || val > 31)
723 		return (EINVAL);
724 
725 	*(int *)arg1 = val;
726 
727 	return (0);
728 }
729 
730 int
731 param_set_multihost_interval(SYSCTL_HANDLER_ARGS)
732 {
733 	int err;
734 
735 	err = sysctl_handle_long(oidp, arg1, 0, req);
736 	if (err != 0 || req->newptr == NULL)
737 		return (err);
738 
739 	if (spa_mode_global != SPA_MODE_UNINIT)
740 		mmp_signal_all_threads();
741 
742 	return (0);
743 }
744