1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>. 23eda14cbcSMatt Macy */ 24eda14cbcSMatt Macy 25eda14cbcSMatt Macy #ifndef _SYS_VDEV_RAIDZ_H 26eda14cbcSMatt Macy #define _SYS_VDEV_RAIDZ_H 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy #include <sys/types.h> 29e716630dSMartin Matuska #include <sys/zfs_rlock.h> 30eda14cbcSMatt Macy 31eda14cbcSMatt Macy #ifdef __cplusplus 32eda14cbcSMatt Macy extern "C" { 33eda14cbcSMatt Macy #endif 34eda14cbcSMatt Macy 35eda14cbcSMatt Macy struct zio; 36e92ffd9bSMartin Matuska struct raidz_col; 377877fdebSMatt Macy struct raidz_row; 38eda14cbcSMatt Macy struct raidz_map; 39e716630dSMartin Matuska struct vdev_raidz; 40e716630dSMartin Matuska struct uberblock; 41eda14cbcSMatt Macy #if !defined(_KERNEL) 42eda14cbcSMatt Macy struct kernel_param {}; 43eda14cbcSMatt Macy #endif 44eda14cbcSMatt Macy 45eda14cbcSMatt Macy /* 46eda14cbcSMatt Macy * vdev_raidz interface 47eda14cbcSMatt Macy */ 48eda14cbcSMatt Macy struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t, 49eda14cbcSMatt Macy uint64_t); 50e716630dSMartin Matuska struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *, 51e716630dSMartin Matuska uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t); 52eda14cbcSMatt Macy void vdev_raidz_map_free(struct raidz_map *); 53e716630dSMartin Matuska void vdev_raidz_free(struct vdev_raidz *); 547877fdebSMatt Macy void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *); 55eda14cbcSMatt Macy void vdev_raidz_generate_parity(struct raidz_map *); 567877fdebSMatt Macy void vdev_raidz_reconstruct(struct raidz_map *, const int *, int); 577877fdebSMatt Macy void vdev_raidz_child_done(zio_t *); 587877fdebSMatt Macy void vdev_raidz_io_done(zio_t *); 59e92ffd9bSMartin Matuska void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *); 60e716630dSMartin Matuska struct raidz_row *vdev_raidz_row_alloc(int); 61e716630dSMartin Matuska void vdev_raidz_reflow_copy_scratch(spa_t *); 62e716630dSMartin Matuska void raidz_dtl_reassessed(vdev_t *); 63eda14cbcSMatt Macy 64f9693befSMartin Matuska extern const zio_vsd_ops_t vdev_raidz_vsd_ops; 65f9693befSMartin Matuska 66eda14cbcSMatt Macy /* 67eda14cbcSMatt Macy * vdev_raidz_math interface 68eda14cbcSMatt Macy */ 69eda14cbcSMatt Macy void vdev_raidz_math_init(void); 70eda14cbcSMatt Macy void vdev_raidz_math_fini(void); 71eda14cbcSMatt Macy const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); 727877fdebSMatt Macy int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *); 737877fdebSMatt Macy int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *, 747877fdebSMatt Macy const int *, const int *, const int); 75eda14cbcSMatt Macy int vdev_raidz_impl_set(const char *); 76eda14cbcSMatt Macy 77e716630dSMartin Matuska typedef struct vdev_raidz_expand { 78e716630dSMartin Matuska uint64_t vre_vdev_id; 79e716630dSMartin Matuska 80e716630dSMartin Matuska kmutex_t vre_lock; 81e716630dSMartin Matuska kcondvar_t vre_cv; 82e716630dSMartin Matuska 83e716630dSMartin Matuska /* 84e716630dSMartin Matuska * How much i/o is outstanding (issued and not completed). 85e716630dSMartin Matuska */ 86e716630dSMartin Matuska uint64_t vre_outstanding_bytes; 87e716630dSMartin Matuska 88e716630dSMartin Matuska /* 89e716630dSMartin Matuska * Next offset to issue i/o for. 90e716630dSMartin Matuska */ 91e716630dSMartin Matuska uint64_t vre_offset; 92e716630dSMartin Matuska 93e716630dSMartin Matuska /* 94e716630dSMartin Matuska * Lowest offset of a failed expansion i/o. The expansion will retry 95e716630dSMartin Matuska * from here. Once the expansion thread notices the failure and exits, 96e716630dSMartin Matuska * vre_failed_offset is reset back to UINT64_MAX, and 97e716630dSMartin Matuska * vre_waiting_for_resilver will be set. 98e716630dSMartin Matuska */ 99e716630dSMartin Matuska uint64_t vre_failed_offset; 100e716630dSMartin Matuska boolean_t vre_waiting_for_resilver; 101e716630dSMartin Matuska 102e716630dSMartin Matuska /* 103e716630dSMartin Matuska * Offset that is completing each txg 104e716630dSMartin Matuska */ 105e716630dSMartin Matuska uint64_t vre_offset_pertxg[TXG_SIZE]; 106e716630dSMartin Matuska 107e716630dSMartin Matuska /* 108e716630dSMartin Matuska * Bytes copied in each txg. 109e716630dSMartin Matuska */ 110e716630dSMartin Matuska uint64_t vre_bytes_copied_pertxg[TXG_SIZE]; 111e716630dSMartin Matuska 112e716630dSMartin Matuska /* 113e716630dSMartin Matuska * The rangelock prevents normal read/write zio's from happening while 114e716630dSMartin Matuska * there are expansion (reflow) i/os in progress to the same offsets. 115e716630dSMartin Matuska */ 116e716630dSMartin Matuska zfs_rangelock_t vre_rangelock; 117e716630dSMartin Matuska 118e716630dSMartin Matuska /* 119e716630dSMartin Matuska * These fields are stored on-disk in the vdev_top_zap: 120e716630dSMartin Matuska */ 121e716630dSMartin Matuska dsl_scan_state_t vre_state; 122e716630dSMartin Matuska uint64_t vre_start_time; 123e716630dSMartin Matuska uint64_t vre_end_time; 124e716630dSMartin Matuska uint64_t vre_bytes_copied; 125e716630dSMartin Matuska } vdev_raidz_expand_t; 126e716630dSMartin Matuska 1277877fdebSMatt Macy typedef struct vdev_raidz { 128e716630dSMartin Matuska /* 129e716630dSMartin Matuska * Number of child vdevs when this raidz vdev was created (i.e. before 130e716630dSMartin Matuska * any raidz expansions). 131e716630dSMartin Matuska */ 132e716630dSMartin Matuska int vd_original_width; 133e716630dSMartin Matuska 134e716630dSMartin Matuska /* 135e716630dSMartin Matuska * The current number of child vdevs, which may be more than the 136e716630dSMartin Matuska * original width if an expansion is in progress or has completed. 137e716630dSMartin Matuska */ 138e716630dSMartin Matuska int vd_physical_width; 139e716630dSMartin Matuska 1407877fdebSMatt Macy int vd_nparity; 141e716630dSMartin Matuska 142e716630dSMartin Matuska /* 143e716630dSMartin Matuska * Tree of reflow_node_t's. The lock protects the avl tree only. 144e716630dSMartin Matuska * The reflow_node_t's describe completed expansions, and are used 145e716630dSMartin Matuska * to determine the logical width given a block's birth time. 146e716630dSMartin Matuska */ 147e716630dSMartin Matuska avl_tree_t vd_expand_txgs; 148e716630dSMartin Matuska kmutex_t vd_expand_lock; 149e716630dSMartin Matuska 150e716630dSMartin Matuska /* 151e716630dSMartin Matuska * If this vdev is being expanded, spa_raidz_expand is set to this 152e716630dSMartin Matuska */ 153e716630dSMartin Matuska vdev_raidz_expand_t vn_vre; 1547877fdebSMatt Macy } vdev_raidz_t; 1557877fdebSMatt Macy 156e716630dSMartin Matuska extern int vdev_raidz_attach_check(vdev_t *); 157e716630dSMartin Matuska extern void vdev_raidz_attach_sync(void *, dmu_tx_t *); 158e716630dSMartin Matuska extern void spa_start_raidz_expansion_thread(spa_t *); 159e716630dSMartin Matuska extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *); 160e716630dSMartin Matuska extern int vdev_raidz_load(vdev_t *); 161e716630dSMartin Matuska 162e716630dSMartin Matuska /* RAIDZ scratch area pause points (for testing) */ 163e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_NONE 0 164e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1 165e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2 166e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3 167e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4 168e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5 169e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6 170e716630dSMartin Matuska #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7 171e716630dSMartin Matuska 172eda14cbcSMatt Macy #ifdef __cplusplus 173eda14cbcSMatt Macy } 174eda14cbcSMatt Macy #endif 175eda14cbcSMatt Macy 176eda14cbcSMatt Macy #endif /* _SYS_VDEV_RAIDZ_H */ 177