1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (C) 2016 Gvozden Nešković. All rights reserved. 23 */ 24 25 #ifndef _VDEV_RAIDZ_H 26 #define _VDEV_RAIDZ_H 27 28 #include <sys/types.h> 29 #include <sys/debug.h> 30 #include <sys/kstat.h> 31 #include <sys/abd.h> 32 #include <sys/vdev_impl.h> 33 #include <sys/abd_impl.h> 34 #include <sys/zfs_rlock.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #define CODE_P (0U) 41 #define CODE_Q (1U) 42 #define CODE_R (2U) 43 44 #define PARITY_P (1U) 45 #define PARITY_PQ (2U) 46 #define PARITY_PQR (3U) 47 48 #define TARGET_X (0U) 49 #define TARGET_Y (1U) 50 #define TARGET_Z (2U) 51 52 /* 53 * Parity generation methods indexes 54 */ 55 enum raidz_math_gen_op { 56 RAIDZ_GEN_P = 0, 57 RAIDZ_GEN_PQ, 58 RAIDZ_GEN_PQR, 59 RAIDZ_GEN_NUM = 3 60 }; 61 /* 62 * Data reconstruction methods indexes 63 */ 64 enum raidz_rec_op { 65 RAIDZ_REC_P = 0, 66 RAIDZ_REC_Q, 67 RAIDZ_REC_R, 68 RAIDZ_REC_PQ, 69 RAIDZ_REC_PR, 70 RAIDZ_REC_QR, 71 RAIDZ_REC_PQR, 72 RAIDZ_REC_NUM = 7 73 }; 74 75 extern const char *const raidz_gen_name[RAIDZ_GEN_NUM]; 76 extern const char *const raidz_rec_name[RAIDZ_REC_NUM]; 77 78 /* 79 * Methods used to define raidz implementation 80 * 81 * @raidz_gen_f Parity generation function 82 * @par1 pointer to raidz_map 83 * @raidz_rec_f Data reconstruction function 84 * @par1 pointer to raidz_map 85 * @par2 array of reconstruction targets 86 * @will_work_f Function returns TRUE if impl. is supported on the system 87 * @init_impl_f Function is called once on init 88 * @fini_impl_f Function is called once on fini 89 */ 90 typedef void (*raidz_gen_f)(void *); 91 typedef int (*raidz_rec_f)(void *, const int *); 92 typedef boolean_t (*will_work_f)(void); 93 typedef void (*init_impl_f)(void); 94 typedef void (*fini_impl_f)(void); 95 96 #define RAIDZ_IMPL_NAME_MAX (20) 97 98 typedef struct raidz_impl_ops { 99 init_impl_f init; 100 fini_impl_f fini; 101 raidz_gen_f gen[RAIDZ_GEN_NUM]; /* Parity generate functions */ 102 raidz_rec_f rec[RAIDZ_REC_NUM]; /* Data reconstruction functions */ 103 will_work_f is_supported; /* Support check function */ 104 char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */ 105 } raidz_impl_ops_t; 106 107 108 typedef struct raidz_col { 109 int rc_devidx; /* child device index for I/O */ 110 uint32_t rc_size; /* I/O size */ 111 uint64_t rc_offset; /* device offset */ 112 abd_t rc_abdstruct; /* rc_abd probably points here */ 113 abd_t *rc_abd; /* I/O data */ 114 abd_t *rc_orig_data; /* pre-reconstruction */ 115 int rc_error; /* I/O error for this device */ 116 uint8_t rc_tried:1; /* Did we attempt this I/O column? */ 117 uint8_t rc_skipped:1; /* Did we skip this I/O column? */ 118 uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */ 119 uint8_t rc_force_repair:1; /* Write good data to this column */ 120 uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */ 121 int rc_shadow_devidx; /* for double write during expansion */ 122 int rc_shadow_error; /* for double write during expansion */ 123 uint64_t rc_shadow_offset; /* for double write during expansion */ 124 } raidz_col_t; 125 126 typedef struct raidz_row { 127 int rr_cols; /* Regular column count */ 128 int rr_scols; /* Count including skipped columns */ 129 int rr_bigcols; /* Remainder data column count */ 130 int rr_missingdata; /* Count of missing data devices */ 131 int rr_missingparity; /* Count of missing parity devices */ 132 int rr_firstdatacol; /* First data column/parity count */ 133 abd_t *rr_abd_empty; /* dRAID empty sector buffer */ 134 int rr_nempty; /* empty sectors included in parity */ 135 #ifdef ZFS_DEBUG 136 uint64_t rr_offset; /* Logical offset for *_io_verify() */ 137 uint64_t rr_size; /* Physical size for *_io_verify() */ 138 #endif 139 raidz_col_t rr_col[]; /* Flexible array of I/O columns */ 140 } raidz_row_t; 141 142 typedef struct raidz_map { 143 boolean_t rm_ecksuminjected; /* checksum error was injected */ 144 int rm_nrows; /* Regular row count */ 145 int rm_nskip; /* RAIDZ sectors skipped for padding */ 146 int rm_skipstart; /* Column index of padding start */ 147 int rm_original_width; /* pre-expansion width of raidz vdev */ 148 int rm_nphys_cols; /* num entries in rm_phys_col[] */ 149 zfs_locked_range_t *rm_lr; 150 const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ 151 raidz_col_t *rm_phys_col; /* if non-NULL, read i/o aggregation */ 152 raidz_row_t *rm_row[]; /* flexible array of rows */ 153 } raidz_map_t; 154 155 /* 156 * Nodes in vdev_raidz_t:vd_expand_txgs. 157 * Blocks with physical birth time of re_txg or later have the specified 158 * logical width (until the next node). 159 */ 160 typedef struct reflow_node { 161 uint64_t re_txg; 162 uint64_t re_logical_width; 163 avl_node_t re_link; 164 } reflow_node_t; 165 166 167 #define RAIDZ_ORIGINAL_IMPL (INT_MAX) 168 169 extern const raidz_impl_ops_t vdev_raidz_scalar_impl; 170 extern boolean_t raidz_will_scalar_work(void); 171 172 #if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */ 173 extern const raidz_impl_ops_t vdev_raidz_sse2_impl; 174 #endif 175 #if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */ 176 extern const raidz_impl_ops_t vdev_raidz_ssse3_impl; 177 #endif 178 #if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */ 179 extern const raidz_impl_ops_t vdev_raidz_avx2_impl; 180 #endif 181 #if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */ 182 extern const raidz_impl_ops_t vdev_raidz_avx512f_impl; 183 #endif 184 #if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */ 185 extern const raidz_impl_ops_t vdev_raidz_avx512bw_impl; 186 #endif 187 #if defined(__aarch64__) 188 extern const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl; 189 extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl; 190 #endif 191 #if defined(__powerpc__) 192 extern const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl; 193 #endif 194 195 /* 196 * Commonly used raidz_map helpers 197 * 198 * raidz_parity Returns parity of the RAIDZ block 199 * raidz_ncols Returns number of columns the block spans 200 * Note, all rows have the same number of columns. 201 * raidz_nbigcols Returns number of big columns 202 * raidz_col_p Returns pointer to a column 203 * raidz_col_size Returns size of a column 204 * raidz_big_size Returns size of big columns 205 * raidz_short_size Returns size of short columns 206 */ 207 #define raidz_parity(rm) ((rm)->rm_row[0]->rr_firstdatacol) 208 #define raidz_ncols(rm) ((rm)->rm_row[0]->rr_cols) 209 #define raidz_nbigcols(rm) ((rm)->rm_bigcols) 210 #define raidz_col_p(rm, c) ((rm)->rm_col + (c)) 211 #define raidz_col_size(rm, c) ((rm)->rm_col[c].rc_size) 212 #define raidz_big_size(rm) (raidz_col_size(rm, CODE_P)) 213 #define raidz_short_size(rm) (raidz_col_size(rm, raidz_ncols(rm)-1)) 214 215 /* 216 * Macro defines an RAIDZ parity generation method 217 * 218 * @code parity the function produce 219 * @impl name of the implementation 220 */ 221 #define _RAIDZ_GEN_WRAP(code, impl) \ 222 static void \ 223 impl ## _gen_ ## code(void *rrp) \ 224 { \ 225 raidz_row_t *rr = (raidz_row_t *)rrp; \ 226 raidz_generate_## code ## _impl(rr); \ 227 } 228 229 /* 230 * Macro defines an RAIDZ data reconstruction method 231 * 232 * @code parity the function produce 233 * @impl name of the implementation 234 */ 235 #define _RAIDZ_REC_WRAP(code, impl) \ 236 static int \ 237 impl ## _rec_ ## code(void *rrp, const int *tgtidx) \ 238 { \ 239 raidz_row_t *rr = (raidz_row_t *)rrp; \ 240 return (raidz_reconstruct_## code ## _impl(rr, tgtidx)); \ 241 } 242 243 /* 244 * Define all gen methods for an implementation 245 * 246 * @impl name of the implementation 247 */ 248 #define DEFINE_GEN_METHODS(impl) \ 249 _RAIDZ_GEN_WRAP(p, impl); \ 250 _RAIDZ_GEN_WRAP(pq, impl); \ 251 _RAIDZ_GEN_WRAP(pqr, impl) 252 253 /* 254 * Define all rec functions for an implementation 255 * 256 * @impl name of the implementation 257 */ 258 #define DEFINE_REC_METHODS(impl) \ 259 _RAIDZ_REC_WRAP(p, impl); \ 260 _RAIDZ_REC_WRAP(q, impl); \ 261 _RAIDZ_REC_WRAP(r, impl); \ 262 _RAIDZ_REC_WRAP(pq, impl); \ 263 _RAIDZ_REC_WRAP(pr, impl); \ 264 _RAIDZ_REC_WRAP(qr, impl); \ 265 _RAIDZ_REC_WRAP(pqr, impl) 266 267 #define RAIDZ_GEN_METHODS(impl) \ 268 { \ 269 [RAIDZ_GEN_P] = & impl ## _gen_p, \ 270 [RAIDZ_GEN_PQ] = & impl ## _gen_pq, \ 271 [RAIDZ_GEN_PQR] = & impl ## _gen_pqr \ 272 } 273 274 #define RAIDZ_REC_METHODS(impl) \ 275 { \ 276 [RAIDZ_REC_P] = & impl ## _rec_p, \ 277 [RAIDZ_REC_Q] = & impl ## _rec_q, \ 278 [RAIDZ_REC_R] = & impl ## _rec_r, \ 279 [RAIDZ_REC_PQ] = & impl ## _rec_pq, \ 280 [RAIDZ_REC_PR] = & impl ## _rec_pr, \ 281 [RAIDZ_REC_QR] = & impl ## _rec_qr, \ 282 [RAIDZ_REC_PQR] = & impl ## _rec_pqr \ 283 } 284 285 286 typedef struct raidz_impl_kstat { 287 uint64_t gen[RAIDZ_GEN_NUM]; /* gen method speed B/s */ 288 uint64_t rec[RAIDZ_REC_NUM]; /* rec method speed B/s */ 289 } raidz_impl_kstat_t; 290 291 /* 292 * Enumerate various multiplication constants 293 * used in reconstruction methods 294 */ 295 typedef enum raidz_mul_info { 296 /* Reconstruct Q */ 297 MUL_Q_X = 0, 298 /* Reconstruct R */ 299 MUL_R_X = 0, 300 /* Reconstruct PQ */ 301 MUL_PQ_X = 0, 302 MUL_PQ_Y = 1, 303 /* Reconstruct PR */ 304 MUL_PR_X = 0, 305 MUL_PR_Y = 1, 306 /* Reconstruct QR */ 307 MUL_QR_XQ = 0, 308 MUL_QR_X = 1, 309 MUL_QR_YQ = 2, 310 MUL_QR_Y = 3, 311 /* Reconstruct PQR */ 312 MUL_PQR_XP = 0, 313 MUL_PQR_XQ = 1, 314 MUL_PQR_XR = 2, 315 MUL_PQR_YU = 3, 316 MUL_PQR_YP = 4, 317 MUL_PQR_YQ = 5, 318 319 MUL_CNT = 6 320 } raidz_mul_info_t; 321 322 /* 323 * Powers of 2 in the Galois field. 324 */ 325 extern const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))); 326 /* Logs of 2 in the Galois field defined above. */ 327 extern const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))); 328 329 /* 330 * Multiply a given number by 2 raised to the given power. 331 */ 332 static inline uint8_t 333 vdev_raidz_exp2(const uint8_t a, const unsigned exp) 334 { 335 if (a == 0) 336 return (0); 337 338 return (vdev_raidz_pow2[(exp + (unsigned)vdev_raidz_log2[a]) % 255]); 339 } 340 341 /* 342 * Galois Field operations. 343 * 344 * gf_exp2 - computes 2 raised to the given power 345 * gf_exp4 - computes 4 raised to the given power 346 * gf_mul - multiplication 347 * gf_div - division 348 * gf_inv - multiplicative inverse 349 */ 350 typedef unsigned gf_t; 351 typedef unsigned gf_log_t; 352 353 static inline gf_t 354 gf_mul(const gf_t a, const gf_t b) 355 { 356 gf_log_t logsum; 357 358 if (a == 0 || b == 0) 359 return (0); 360 361 logsum = (gf_log_t)vdev_raidz_log2[a] + (gf_log_t)vdev_raidz_log2[b]; 362 363 return ((gf_t)vdev_raidz_pow2[logsum % 255]); 364 } 365 366 static inline gf_t 367 gf_div(const gf_t a, const gf_t b) 368 { 369 gf_log_t logsum; 370 371 ASSERT3U(b, >, 0); 372 if (a == 0) 373 return (0); 374 375 logsum = (gf_log_t)255 + (gf_log_t)vdev_raidz_log2[a] - 376 (gf_log_t)vdev_raidz_log2[b]; 377 378 return ((gf_t)vdev_raidz_pow2[logsum % 255]); 379 } 380 381 static inline gf_t 382 gf_inv(const gf_t a) 383 { 384 gf_log_t logsum; 385 386 ASSERT3U(a, >, 0); 387 388 logsum = (gf_log_t)255 - (gf_log_t)vdev_raidz_log2[a]; 389 390 return ((gf_t)vdev_raidz_pow2[logsum]); 391 } 392 393 static inline gf_t 394 gf_exp2(gf_log_t exp) 395 { 396 return (vdev_raidz_pow2[exp % 255]); 397 } 398 399 static inline gf_t 400 gf_exp4(gf_log_t exp) 401 { 402 ASSERT3U(exp, <=, 255); 403 return ((gf_t)vdev_raidz_pow2[(2 * exp) % 255]); 404 } 405 406 #ifdef __cplusplus 407 } 408 #endif 409 410 #endif /* _VDEV_RAIDZ_H */ 411