1 /* 2 * Copyright (C) 2016 Andrea Mazzoleni 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #ifndef __IO_H 19 #define __IO_H 20 21 #include "state.h" 22 #include "support.h" 23 #include "handle.h" 24 #include "parity.h" 25 26 /** 27 * Number of read-ahead buffers. 28 * 29 * More buffers always result in better performance. 30 * 31 * This is the scrub performance on my machine with different buffers: 32 * 33 * 1 - 380 MB/s, CPU 26%, speed 100% [SnapRAID 9.2] 34 * 2 - 426 MB/s, CPU 46%, speed 112% 35 * 4 - 452 MB/s, CPU 54%, speed 118% 36 * 8 - 487 MB/s, CPU 60%, speed 128% 37 * 16 - 505 MB/s, CPU 63%, speed 132% 38 * 32 - 520 MB/s, CPU 64%, speed 136% [SnapRAID <= 12.0] 39 * 64 - 524 MB/s, CPU 65%, speed 137% [SnapRAID > 12.0] 40 * 128 - 525 MB/s, CPU 66%, speed 138% 41 */ 42 #define IO_MIN 3 /* required by writers, readers can work also with 2 */ 43 #define IO_MAX 128 44 45 /** 46 * State of the task. 47 */ 48 #define TASK_STATE_IOERROR_CONTINUE -4 /**< IO error. Continuation requested. */ 49 #define TASK_STATE_ERROR_CONTINUE -3 /**< Generic error. Continuation requested. */ 50 #define TASK_STATE_IOERROR -2 /**< IO error. Failure requested. */ 51 #define TASK_STATE_ERROR -1 /**< Generic error. Failure requested. */ 52 #define TASK_STATE_EMPTY 0 /**< Nothing to do. */ 53 #define TASK_STATE_READY 1 /**< Ready to start. */ 54 #define TASK_STATE_DONE 2 /**< Task completed. */ 55 56 /** 57 * Task of work. 58 * 59 * This represents the minimal element of work that worker threads are 60 * going to be asked to do. 61 * 62 * It consists in reading a block of data from a disk. 63 * 64 * Note that the disk to use is defined implicitly in the worker thread. 65 */ 66 struct snapraid_task { 67 int state; /**< State of the task. One of the TASK_STATE_*. */ 68 char path[PATH_MAX]; /**< Path of the file. */ 69 struct snapraid_disk* disk; /**< Disk of the file. */ 70 unsigned char* buffer; /**< Where to read the data. */ 71 block_off_t position; /**< Parity position to read. */ 72 73 /** 74 * Result of the task. 75 */ 76 struct snapraid_block* block; 77 struct snapraid_file* file; 78 block_off_t file_pos; 79 int read_size; /**< Size of the data read. */ 80 int is_timestamp_different; /**< Report if file has a changed timestamp. */ 81 }; 82 83 /** 84 * Worker for tasks. 85 * 86 * This represents a worker thread designated to read data 87 * from a specific disk. 88 */ 89 struct snapraid_worker { 90 #if HAVE_THREAD 91 thread_id_t thread; /**< Thread context for the worker. */ 92 #endif 93 94 struct snapraid_io* io; /**< Parent pointer. */ 95 96 void (*func)(struct snapraid_worker*, struct snapraid_task*); 97 98 /** 99 * Handle to data or parity. 100 * 101 * Only one of the two is valid, the other is 0. 102 */ 103 struct snapraid_handle* handle; /**< Handle at the file on the disk. */ 104 struct snapraid_parity_handle* parity_handle; /**< Handle at the parity on the disk. */ 105 106 /** 107 * Vector of tasks. 108 * 109 * It's a ring of tasks reused cycle after cycle. 110 */ 111 struct snapraid_task task_map[IO_MAX]; 112 113 /** 114 * The task in progress by the worker thread. 115 * 116 * It's an index inside in the ::task_map vector. 117 */ 118 unsigned index; 119 120 /** 121 * Which buffer base index should be used for destination. 122 */ 123 unsigned buffer_skew; 124 }; 125 126 /** 127 * Number of error kind for writers. 128 */ 129 #define IO_WRITER_ERROR_BASE TASK_STATE_IOERROR_CONTINUE 130 #define IO_WRITER_ERROR_MAX (-IO_WRITER_ERROR_BASE) 131 132 /** 133 * Reader. 134 * 135 * This represents the pool of worker threads dedicated to read 136 * data from the disks. 137 */ 138 struct snapraid_io { 139 struct snapraid_state* state; 140 141 /** 142 * Number of read-ahead buffers to use. 143 * 144 * Between IO_MIN and IO_MAX for thread use. 145 * 146 * If equal to 1, it means to work without any thread. 147 */ 148 unsigned io_max; 149 150 #if HAVE_THREAD 151 /** 152 * Mutex used to protect the synchronization 153 * between the io and the workers. 154 */ 155 thread_mutex_t io_mutex; 156 157 /** 158 * Condition for a new read is completed. 159 * 160 * The workers signal this condition when a new read is completed. 161 * The IO waits on this condition when it's waiting for 162 * a new read to be completed. 163 */ 164 thread_cond_t read_done; 165 166 /** 167 * Condition for a new read scheduled. 168 * 169 * The workers wait on this condition when they are waiting for a new 170 * read to process. 171 * The IO signals this condition when new reads are scheduled. 172 */ 173 thread_cond_t read_sched; 174 175 /** 176 * Condition for a new write is completed. 177 * 178 * The workers signal this condition when a new write is completed. 179 * The IO waits on this condition when it's waiting for 180 * a new write to be completed. 181 */ 182 thread_cond_t write_done; 183 184 /** 185 * Condition for a new write scheduled. 186 * 187 * The workers wait on this condition when they are waiting for a new 188 * write to process. 189 * The IO signals this condition when new writes are scheduled. 190 */ 191 thread_cond_t write_sched; 192 #endif 193 194 /** 195 * Base position for workers. 196 * 197 * It's the index in the ::worker_map[]. 198 */ 199 unsigned data_base; 200 unsigned data_count; 201 unsigned parity_base; 202 unsigned parity_count; 203 204 /** 205 * Callbacks for workers. 206 */ 207 void (*data_reader)(struct snapraid_worker*, struct snapraid_task*); 208 void (*parity_reader)(struct snapraid_worker*, struct snapraid_task*); 209 void (*parity_writer)(struct snapraid_worker*, struct snapraid_task*); 210 211 /** 212 * Blocks mapping. 213 * 214 * This info is used to obtain the sequence of block 215 * positions to process. 216 */ 217 block_off_t block_start; 218 block_off_t block_max; 219 block_off_t block_next; 220 bit_vect_t* block_enabled; 221 222 /** 223 * Buffers for data. 224 * 225 * A pool of buffers used to store the data read. 226 */ 227 unsigned buffer_max; /**< Number of buffers. */ 228 void* buffer_alloc_map[IO_MAX]; /**< Allocation map for buffers. */ 229 void** buffer_map[IO_MAX]; /**< Buffers for data. */ 230 231 /** 232 * Workers. 233 * 234 * A vector of readers, each one representing a different thread. 235 */ 236 unsigned reader_max; /**< Number of workers. */ 237 struct snapraid_worker* reader_map; /**< Vector of workers. */ 238 unsigned writer_max; /**< Number of workers. */ 239 struct snapraid_worker* writer_map; /**< Vector of workers. */ 240 241 /** 242 * List of not yet processed workers. 243 * 244 * The list has ::reader_max + 1 elements. Each element contains 245 * the number of the reader to process. 246 * 247 * At initialization the list is filled with [0..reader_max]. 248 * To get the next element to process we use i = list[i + 1]. 249 * The end is when i == reader_max. 250 */ 251 unsigned char* reader_list; 252 unsigned char* writer_list; 253 254 /** 255 * Exit condition for all threads. 256 */ 257 int done; 258 259 /** 260 * The task currently used by the caller. 261 * 262 * It's a rolling counter, when reaching ::io_max 263 * it goes again to 0. 264 * 265 * When the caller finish with the current index, 266 * it's incremented, and a read_sched() signal is sent. 267 * 268 * In monothread mode it isn't the task index, 269 * but the worker index. 270 */ 271 unsigned reader_index; 272 273 /** 274 * The task currently used by the caller. 275 * 276 * It's a rolling counter, when reaching ::io_max 277 * it goes again to 0. 278 * 279 * When the caller finish with the current index, 280 * it's incremented, and a write_sched() signal is sent. 281 * 282 * In monothread mode it isn't the task index, 283 * but the worker index. 284 */ 285 unsigned writer_index; 286 287 /** 288 * Counts the error happening in the writers. 289 */ 290 int writer_error[IO_WRITER_ERROR_MAX]; 291 }; 292 293 /** 294 * Initialize the InputOutput workers. 295 * 296 * \param io_cache The number of IO buffers for read-ahead and write-behind. 0 for default. 297 * \param buffer_max The number of data/parity buffers to allocate. 298 */ 299 void io_init(struct snapraid_io* io, struct snapraid_state* state, 300 unsigned io_cache, unsigned buffer_max, 301 void (*data_reader)(struct snapraid_worker*, struct snapraid_task*), 302 struct snapraid_handle* handle_map, unsigned handle_max, 303 void (*parity_reader)(struct snapraid_worker*, struct snapraid_task*), 304 void (*parity_writer)(struct snapraid_worker*, struct snapraid_task*), 305 struct snapraid_parity_handle* parity_handle_map, unsigned parity_handle_max); 306 307 /** 308 * Deinitialize the InputOutput workers. 309 */ 310 void io_done(struct snapraid_io* io); 311 312 /** 313 * Start all the worker threads. 314 */ 315 extern void (*io_start)(struct snapraid_io* io, 316 block_off_t blockstart, block_off_t blockmax, 317 bit_vect_t* block_enabled); 318 319 /** 320 * Stop all the worker threads. 321 */ 322 extern void (*io_stop)(struct snapraid_io* io); 323 324 /** 325 * Next read position. 326 * 327 * This call starts the reading process. 328 * It must be called before io_data_read() and io_parity_read(). 329 * 330 * \param io InputOutput context. 331 * \param buffer The data buffers to use for this position. 332 * \return The parity position. 333 */ 334 extern block_off_t (*io_read_next)(struct snapraid_io* io, void*** buffer); 335 336 /** 337 * Read a data block. 338 * 339 * It must be called exactly ::handle_max times. 340 * 341 * \param io InputOutput context. 342 * \param diskcur The position of the data block in the ::handle_map vector. 343 * \return The completed task. 344 */ 345 extern struct snapraid_task* (*io_data_read)(struct snapraid_io* io, unsigned* diskcur, unsigned* waiting_map, unsigned* waiting_mac); 346 347 /** 348 * Read a parity block. 349 * 350 * It must be called exactly ::parity_handle_max times. 351 * 352 * \param io InputOutput context. 353 * \param levcur The position of the parity block in the ::parity_handle_map vector. 354 * \return The completed task. 355 */ 356 extern struct snapraid_task* (*io_parity_read)(struct snapraid_io* io, unsigned* levcur, unsigned* waiting_map, unsigned* waiting_mac); 357 358 /** 359 * Write of a parity block. 360 * 361 * It must be called exactly ::parity_handle_max times. 362 * 363 * \param io InputOutput context. 364 * \param levcur The position of the parity block in the ::parity_handle_map vector. 365 */ 366 extern void (*io_parity_write)(struct snapraid_io* io, unsigned* levcur, unsigned* waiting_map, unsigned* waiting_mac); 367 368 /** 369 * Preset the write position. 370 * 371 * This call starts the write process. 372 * It must be called before io_parity_write(). 373 * 374 * \param io InputOutput context. 375 * \param blockcur The parity position to write. 376 * \param skip Skip the writes, in case parity doesn't need to be updated. 377 */ 378 extern void (*io_write_preset)(struct snapraid_io* io, block_off_t blockcur, int skip); 379 380 /** 381 * Next write position. 382 * 383 * This call ends the write process. 384 * It must be called after io_parity_write(). 385 * 386 * \param io InputOutput context. 387 * \param blockcur The parity position to write. 388 * \param skip Skip the writes, in case parity doesn't need to be updated. 389 * \param writer_error Return the number of errors. Vector of IO_WRITER_ERROR_MAX elements. 390 */ 391 extern void (*io_write_next)(struct snapraid_io* io, block_off_t blockcur, int skip, int* writer_error); 392 393 /** 394 * Refresh the number of cached blocks for all data and parity disks. 395 */ 396 extern void (*io_refresh)(struct snapraid_io* io); 397 398 #endif 399 400