1 /*
2  * Copyright (C) 2016 Andrea Mazzoleni
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __IO_H
19 #define __IO_H
20 
21 #include "state.h"
22 #include "support.h"
23 #include "handle.h"
24 #include "parity.h"
25 
26 /**
27  * Number of read-ahead buffers.
28  *
29  * More buffers always result in better performance.
30  *
31  * This is the scrub performance on my machine with different buffers:
32  *
33  *  1  - 380 MB/s, CPU 26%, speed 100% [SnapRAID 9.2]
34  *  2  - 426 MB/s, CPU 46%, speed 112%
35  *  4  - 452 MB/s, CPU 54%, speed 118%
36  *  8  - 487 MB/s, CPU 60%, speed 128%
37  * 16  - 505 MB/s, CPU 63%, speed 132%
38  * 32  - 520 MB/s, CPU 64%, speed 136% [SnapRAID <= 12.0]
39  * 64  - 524 MB/s, CPU 65%, speed 137% [SnapRAID > 12.0]
40  * 128 - 525 MB/s, CPU 66%, speed 138%
41  */
42 #define IO_MIN 3 /* required by writers, readers can work also with 2 */
43 #define IO_MAX 128
44 
45 /**
46  * State of the task.
47  */
48 #define TASK_STATE_IOERROR_CONTINUE -4 /**< IO error. Continuation requested. */
49 #define TASK_STATE_ERROR_CONTINUE -3 /**< Generic error. Continuation requested. */
50 #define TASK_STATE_IOERROR -2 /**< IO error. Failure requested. */
51 #define TASK_STATE_ERROR -1 /**< Generic error. Failure requested. */
52 #define TASK_STATE_EMPTY 0 /**< Nothing to do. */
53 #define TASK_STATE_READY 1 /**< Ready to start. */
54 #define TASK_STATE_DONE 2 /**< Task completed. */
55 
56 /**
57  * Task of work.
58  *
59  * This represents the minimal element of work that worker threads are
60  * going to be asked to do.
61  *
62  * It consists in reading a block of data from a disk.
63  *
64  * Note that the disk to use is defined implicitly in the worker thread.
65  */
66 struct snapraid_task {
67 	int state; /**< State of the task. One of the TASK_STATE_*. */
68 	char path[PATH_MAX]; /**< Path of the file. */
69 	struct snapraid_disk* disk; /**< Disk of the file. */
70 	unsigned char* buffer; /**< Where to read the data. */
71 	block_off_t position; /**< Parity position to read. */
72 
73 	/**
74 	 * Result of the task.
75 	 */
76 	struct snapraid_block* block;
77 	struct snapraid_file* file;
78 	block_off_t file_pos;
79 	int read_size; /**< Size of the data read. */
80 	int is_timestamp_different; /**< Report if file has a changed timestamp. */
81 };
82 
83 /**
84  * Worker for tasks.
85  *
86  * This represents a worker thread designated to read data
87  * from a specific disk.
88  */
89 struct snapraid_worker {
90 #if HAVE_THREAD
91 	thread_id_t thread; /**< Thread context for the worker. */
92 #endif
93 
94 	struct snapraid_io* io; /**< Parent pointer. */
95 
96 	void (*func)(struct snapraid_worker*, struct snapraid_task*);
97 
98 	/**
99 	 * Handle to data or parity.
100 	 *
101 	 * Only one of the two is valid, the other is 0.
102 	 */
103 	struct snapraid_handle* handle; /**< Handle at the file on the disk. */
104 	struct snapraid_parity_handle* parity_handle; /**< Handle at the parity on the disk. */
105 
106 	/**
107 	 * Vector of tasks.
108 	 *
109 	 * It's a ring of tasks reused cycle after cycle.
110 	 */
111 	struct snapraid_task task_map[IO_MAX];
112 
113 	/**
114 	 * The task in progress by the worker thread.
115 	 *
116 	 * It's an index inside in the ::task_map vector.
117 	 */
118 	unsigned index;
119 
120 	/**
121 	 * Which buffer base index should be used for destination.
122 	 */
123 	unsigned buffer_skew;
124 };
125 
126 /**
127  * Number of error kind for writers.
128  */
129 #define IO_WRITER_ERROR_BASE TASK_STATE_IOERROR_CONTINUE
130 #define IO_WRITER_ERROR_MAX (-IO_WRITER_ERROR_BASE)
131 
132 /**
133  * Reader.
134  *
135  * This represents the pool of worker threads dedicated to read
136  * data from the disks.
137  */
138 struct snapraid_io {
139 	struct snapraid_state* state;
140 
141 	/**
142 	 * Number of read-ahead buffers to use.
143 	 *
144 	 * Between IO_MIN and IO_MAX for thread use.
145 	 *
146 	 * If equal to 1, it means to work without any thread.
147 	 */
148 	unsigned io_max;
149 
150 #if HAVE_THREAD
151 	/**
152 	 * Mutex used to protect the synchronization
153 	 * between the io and the workers.
154 	 */
155 	thread_mutex_t io_mutex;
156 
157 	/**
158 	 * Condition for a new read is completed.
159 	 *
160 	 * The workers signal this condition when a new read is completed.
161 	 * The IO waits on this condition when it's waiting for
162 	 * a new read to be completed.
163 	 */
164 	thread_cond_t read_done;
165 
166 	/**
167 	 * Condition for a new read scheduled.
168 	 *
169 	 * The workers wait on this condition when they are waiting for a new
170 	 * read to process.
171 	 * The IO signals this condition when new reads are scheduled.
172 	 */
173 	thread_cond_t read_sched;
174 
175 	/**
176 	 * Condition for a new write is completed.
177 	 *
178 	 * The workers signal this condition when a new write is completed.
179 	 * The IO waits on this condition when it's waiting for
180 	 * a new write to be completed.
181 	 */
182 	thread_cond_t write_done;
183 
184 	/**
185 	 * Condition for a new write scheduled.
186 	 *
187 	 * The workers wait on this condition when they are waiting for a new
188 	 * write to process.
189 	 * The IO signals this condition when new writes are scheduled.
190 	 */
191 	thread_cond_t write_sched;
192 #endif
193 
194 	/**
195 	 * Base position for workers.
196 	 *
197 	 * It's the index in the ::worker_map[].
198 	 */
199 	unsigned data_base;
200 	unsigned data_count;
201 	unsigned parity_base;
202 	unsigned parity_count;
203 
204 	/**
205 	 * Callbacks for workers.
206 	 */
207 	void (*data_reader)(struct snapraid_worker*, struct snapraid_task*);
208 	void (*parity_reader)(struct snapraid_worker*, struct snapraid_task*);
209 	void (*parity_writer)(struct snapraid_worker*, struct snapraid_task*);
210 
211 	/**
212 	 * Blocks mapping.
213 	 *
214 	 * This info is used to obtain the sequence of block
215 	 * positions to process.
216 	 */
217 	block_off_t block_start;
218 	block_off_t block_max;
219 	block_off_t block_next;
220 	bit_vect_t* block_enabled;
221 
222 	/**
223 	 * Buffers for data.
224 	 *
225 	 * A pool of buffers used to store the data read.
226 	 */
227 	unsigned buffer_max; /**< Number of buffers. */
228 	void* buffer_alloc_map[IO_MAX]; /**< Allocation map for buffers. */
229 	void** buffer_map[IO_MAX]; /**< Buffers for data. */
230 
231 	/**
232 	 * Workers.
233 	 *
234 	 * A vector of readers, each one representing a different thread.
235 	 */
236 	unsigned reader_max; /**< Number of workers. */
237 	struct snapraid_worker* reader_map; /**< Vector of workers. */
238 	unsigned writer_max; /**< Number of workers. */
239 	struct snapraid_worker* writer_map; /**< Vector of workers. */
240 
241 	/**
242 	 * List of not yet processed workers.
243 	 *
244 	 * The list has ::reader_max + 1 elements. Each element contains
245 	 * the number of the reader to process.
246 	 *
247 	 * At initialization the list is filled with [0..reader_max].
248 	 * To get the next element to process we use i = list[i + 1].
249 	 * The end is when i == reader_max.
250 	 */
251 	unsigned char* reader_list;
252 	unsigned char* writer_list;
253 
254 	/**
255 	 * Exit condition for all threads.
256 	 */
257 	int done;
258 
259 	/**
260 	 * The task currently used by the caller.
261 	 *
262 	 * It's a rolling counter, when reaching ::io_max
263 	 * it goes again to 0.
264 	 *
265 	 * When the caller finish with the current index,
266 	 * it's incremented, and a read_sched() signal is sent.
267 	 *
268 	 * In monothread mode it isn't the task index,
269 	 * but the worker index.
270 	 */
271 	unsigned reader_index;
272 
273 	/**
274 	 * The task currently used by the caller.
275 	 *
276 	 * It's a rolling counter, when reaching ::io_max
277 	 * it goes again to 0.
278 	 *
279 	 * When the caller finish with the current index,
280 	 * it's incremented, and a write_sched() signal is sent.
281 	 *
282 	 * In monothread mode it isn't the task index,
283 	 * but the worker index.
284 	 */
285 	unsigned writer_index;
286 
287 	/**
288 	 * Counts the error happening in the writers.
289 	 */
290 	int writer_error[IO_WRITER_ERROR_MAX];
291 };
292 
293 /**
294  * Initialize the InputOutput workers.
295  *
296  * \param io_cache The number of IO buffers for read-ahead and write-behind. 0 for default.
297  * \param buffer_max The number of data/parity buffers to allocate.
298  */
299 void io_init(struct snapraid_io* io, struct snapraid_state* state,
300 	unsigned io_cache, unsigned buffer_max,
301 	void (*data_reader)(struct snapraid_worker*, struct snapraid_task*),
302 	struct snapraid_handle* handle_map, unsigned handle_max,
303 	void (*parity_reader)(struct snapraid_worker*, struct snapraid_task*),
304 	void (*parity_writer)(struct snapraid_worker*, struct snapraid_task*),
305 	struct snapraid_parity_handle* parity_handle_map, unsigned parity_handle_max);
306 
307 /**
308  * Deinitialize the InputOutput workers.
309  */
310 void io_done(struct snapraid_io* io);
311 
312 /**
313  * Start all the worker threads.
314  */
315 extern void (*io_start)(struct snapraid_io* io,
316 	block_off_t blockstart, block_off_t blockmax,
317 	bit_vect_t* block_enabled);
318 
319 /**
320  * Stop all the worker threads.
321  */
322 extern void (*io_stop)(struct snapraid_io* io);
323 
324 /**
325  * Next read position.
326  *
327  * This call starts the reading process.
328  * It must be called before io_data_read() and io_parity_read().
329  *
330  * \param io InputOutput context.
331  * \param buffer The data buffers to use for this position.
332  * \return The parity position.
333  */
334 extern block_off_t (*io_read_next)(struct snapraid_io* io, void*** buffer);
335 
336 /**
337  * Read a data block.
338  *
339  * It must be called exactly ::handle_max times.
340  *
341  * \param io InputOutput context.
342  * \param diskcur The position of the data block in the ::handle_map vector.
343  * \return The completed task.
344  */
345 extern struct snapraid_task* (*io_data_read)(struct snapraid_io* io, unsigned* diskcur, unsigned* waiting_map, unsigned* waiting_mac);
346 
347 /**
348  * Read a parity block.
349  *
350  * It must be called exactly ::parity_handle_max times.
351  *
352  * \param io InputOutput context.
353  * \param levcur The position of the parity block in the ::parity_handle_map vector.
354  * \return The completed task.
355  */
356 extern struct snapraid_task* (*io_parity_read)(struct snapraid_io* io, unsigned* levcur, unsigned* waiting_map, unsigned* waiting_mac);
357 
358 /**
359  * Write of a parity block.
360  *
361  * It must be called exactly ::parity_handle_max times.
362  *
363  * \param io InputOutput context.
364  * \param levcur The position of the parity block in the ::parity_handle_map vector.
365  */
366 extern void (*io_parity_write)(struct snapraid_io* io, unsigned* levcur, unsigned* waiting_map, unsigned* waiting_mac);
367 
368 /**
369  * Preset the write position.
370  *
371  * This call starts the write process.
372  * It must be called before io_parity_write().
373  *
374  * \param io InputOutput context.
375  * \param blockcur The parity position to write.
376  * \param skip Skip the writes, in case parity doesn't need to be updated.
377  */
378 extern void (*io_write_preset)(struct snapraid_io* io, block_off_t blockcur, int skip);
379 
380 /**
381  * Next write position.
382  *
383  * This call ends the write process.
384  * It must be called after io_parity_write().
385  *
386  * \param io InputOutput context.
387  * \param blockcur The parity position to write.
388  * \param skip Skip the writes, in case parity doesn't need to be updated.
389  * \param writer_error Return the number of errors. Vector of IO_WRITER_ERROR_MAX elements.
390  */
391 extern void (*io_write_next)(struct snapraid_io* io, block_off_t blockcur, int skip, int* writer_error);
392 
393 /**
394  * Refresh the number of cached blocks for all data and parity disks.
395  */
396 extern void (*io_refresh)(struct snapraid_io* io);
397 
398 #endif
399 
400