1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy  * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
23eda14cbcSMatt Macy  */
24eda14cbcSMatt Macy 
25eda14cbcSMatt Macy #ifndef _SYS_VDEV_RAIDZ_H
26eda14cbcSMatt Macy #define	_SYS_VDEV_RAIDZ_H
27eda14cbcSMatt Macy 
28eda14cbcSMatt Macy #include <sys/types.h>
29e716630dSMartin Matuska #include <sys/zfs_rlock.h>
30eda14cbcSMatt Macy 
31eda14cbcSMatt Macy #ifdef	__cplusplus
32eda14cbcSMatt Macy extern "C" {
33eda14cbcSMatt Macy #endif
34eda14cbcSMatt Macy 
35eda14cbcSMatt Macy struct zio;
36e92ffd9bSMartin Matuska struct raidz_col;
377877fdebSMatt Macy struct raidz_row;
38eda14cbcSMatt Macy struct raidz_map;
39e716630dSMartin Matuska struct vdev_raidz;
40e716630dSMartin Matuska struct uberblock;
41eda14cbcSMatt Macy #if !defined(_KERNEL)
42eda14cbcSMatt Macy struct kernel_param {};
43eda14cbcSMatt Macy #endif
44eda14cbcSMatt Macy 
45eda14cbcSMatt Macy /*
46eda14cbcSMatt Macy  * vdev_raidz interface
47eda14cbcSMatt Macy  */
48eda14cbcSMatt Macy struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
49eda14cbcSMatt Macy     uint64_t);
50e716630dSMartin Matuska struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *,
51e716630dSMartin Matuska     uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t);
52eda14cbcSMatt Macy void vdev_raidz_map_free(struct raidz_map *);
53e716630dSMartin Matuska void vdev_raidz_free(struct vdev_raidz *);
547877fdebSMatt Macy void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *);
55eda14cbcSMatt Macy void vdev_raidz_generate_parity(struct raidz_map *);
567877fdebSMatt Macy void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
577877fdebSMatt Macy void vdev_raidz_child_done(zio_t *);
587877fdebSMatt Macy void vdev_raidz_io_done(zio_t *);
59e92ffd9bSMartin Matuska void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
60e716630dSMartin Matuska struct raidz_row *vdev_raidz_row_alloc(int);
61e716630dSMartin Matuska void vdev_raidz_reflow_copy_scratch(spa_t *);
62e716630dSMartin Matuska void raidz_dtl_reassessed(vdev_t *);
63eda14cbcSMatt Macy 
64f9693befSMartin Matuska extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
65f9693befSMartin Matuska 
66eda14cbcSMatt Macy /*
67eda14cbcSMatt Macy  * vdev_raidz_math interface
68eda14cbcSMatt Macy  */
69eda14cbcSMatt Macy void vdev_raidz_math_init(void);
70eda14cbcSMatt Macy void vdev_raidz_math_fini(void);
71eda14cbcSMatt Macy const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
727877fdebSMatt Macy int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *);
737877fdebSMatt Macy int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *,
747877fdebSMatt Macy     const int *, const int *, const int);
75eda14cbcSMatt Macy int vdev_raidz_impl_set(const char *);
76eda14cbcSMatt Macy 
77e716630dSMartin Matuska typedef struct vdev_raidz_expand {
78e716630dSMartin Matuska 	uint64_t vre_vdev_id;
79e716630dSMartin Matuska 
80e716630dSMartin Matuska 	kmutex_t vre_lock;
81e716630dSMartin Matuska 	kcondvar_t vre_cv;
82e716630dSMartin Matuska 
83e716630dSMartin Matuska 	/*
84e716630dSMartin Matuska 	 * How much i/o is outstanding (issued and not completed).
85e716630dSMartin Matuska 	 */
86e716630dSMartin Matuska 	uint64_t vre_outstanding_bytes;
87e716630dSMartin Matuska 
88e716630dSMartin Matuska 	/*
89e716630dSMartin Matuska 	 * Next offset to issue i/o for.
90e716630dSMartin Matuska 	 */
91e716630dSMartin Matuska 	uint64_t vre_offset;
92e716630dSMartin Matuska 
93e716630dSMartin Matuska 	/*
94e716630dSMartin Matuska 	 * Lowest offset of a failed expansion i/o.  The expansion will retry
95e716630dSMartin Matuska 	 * from here.  Once the expansion thread notices the failure and exits,
96e716630dSMartin Matuska 	 * vre_failed_offset is reset back to UINT64_MAX, and
97e716630dSMartin Matuska 	 * vre_waiting_for_resilver will be set.
98e716630dSMartin Matuska 	 */
99e716630dSMartin Matuska 	uint64_t vre_failed_offset;
100e716630dSMartin Matuska 	boolean_t vre_waiting_for_resilver;
101e716630dSMartin Matuska 
102e716630dSMartin Matuska 	/*
103e716630dSMartin Matuska 	 * Offset that is completing each txg
104e716630dSMartin Matuska 	 */
105e716630dSMartin Matuska 	uint64_t vre_offset_pertxg[TXG_SIZE];
106e716630dSMartin Matuska 
107e716630dSMartin Matuska 	/*
108e716630dSMartin Matuska 	 * Bytes copied in each txg.
109e716630dSMartin Matuska 	 */
110e716630dSMartin Matuska 	uint64_t vre_bytes_copied_pertxg[TXG_SIZE];
111e716630dSMartin Matuska 
112e716630dSMartin Matuska 	/*
113e716630dSMartin Matuska 	 * The rangelock prevents normal read/write zio's from happening while
114e716630dSMartin Matuska 	 * there are expansion (reflow) i/os in progress to the same offsets.
115e716630dSMartin Matuska 	 */
116e716630dSMartin Matuska 	zfs_rangelock_t vre_rangelock;
117e716630dSMartin Matuska 
118e716630dSMartin Matuska 	/*
119e716630dSMartin Matuska 	 * These fields are stored on-disk in the vdev_top_zap:
120e716630dSMartin Matuska 	 */
121e716630dSMartin Matuska 	dsl_scan_state_t vre_state;
122e716630dSMartin Matuska 	uint64_t vre_start_time;
123e716630dSMartin Matuska 	uint64_t vre_end_time;
124e716630dSMartin Matuska 	uint64_t vre_bytes_copied;
125e716630dSMartin Matuska } vdev_raidz_expand_t;
126e716630dSMartin Matuska 
1277877fdebSMatt Macy typedef struct vdev_raidz {
128e716630dSMartin Matuska 	/*
129e716630dSMartin Matuska 	 * Number of child vdevs when this raidz vdev was created (i.e. before
130e716630dSMartin Matuska 	 * any raidz expansions).
131e716630dSMartin Matuska 	 */
132e716630dSMartin Matuska 	int vd_original_width;
133e716630dSMartin Matuska 
134e716630dSMartin Matuska 	/*
135e716630dSMartin Matuska 	 * The current number of child vdevs, which may be more than the
136e716630dSMartin Matuska 	 * original width if an expansion is in progress or has completed.
137e716630dSMartin Matuska 	 */
138e716630dSMartin Matuska 	int vd_physical_width;
139e716630dSMartin Matuska 
1407877fdebSMatt Macy 	int vd_nparity;
141e716630dSMartin Matuska 
142e716630dSMartin Matuska 	/*
143e716630dSMartin Matuska 	 * Tree of reflow_node_t's.  The lock protects the avl tree only.
144e716630dSMartin Matuska 	 * The reflow_node_t's describe completed expansions, and are used
145e716630dSMartin Matuska 	 * to determine the logical width given a block's birth time.
146e716630dSMartin Matuska 	 */
147e716630dSMartin Matuska 	avl_tree_t vd_expand_txgs;
148e716630dSMartin Matuska 	kmutex_t vd_expand_lock;
149e716630dSMartin Matuska 
150e716630dSMartin Matuska 	/*
151e716630dSMartin Matuska 	 * If this vdev is being expanded, spa_raidz_expand is set to this
152e716630dSMartin Matuska 	 */
153e716630dSMartin Matuska 	vdev_raidz_expand_t vn_vre;
1547877fdebSMatt Macy } vdev_raidz_t;
1557877fdebSMatt Macy 
156e716630dSMartin Matuska extern int vdev_raidz_attach_check(vdev_t *);
157e716630dSMartin Matuska extern void vdev_raidz_attach_sync(void *, dmu_tx_t *);
158e716630dSMartin Matuska extern void spa_start_raidz_expansion_thread(spa_t *);
159e716630dSMartin Matuska extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *);
160e716630dSMartin Matuska extern int vdev_raidz_load(vdev_t *);
161e716630dSMartin Matuska 
162e716630dSMartin Matuska /* RAIDZ scratch area pause points (for testing) */
163e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_NONE	0
164e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1
165e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2
166e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3
167e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4
168e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5
169e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
170e716630dSMartin Matuska #define	RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
171e716630dSMartin Matuska 
172eda14cbcSMatt Macy #ifdef	__cplusplus
173eda14cbcSMatt Macy }
174eda14cbcSMatt Macy #endif
175eda14cbcSMatt Macy 
176eda14cbcSMatt Macy #endif /* _SYS_VDEV_RAIDZ_H */
177