xref: /netbsd/sys/dev/raidframe/rf_paritylog.c (revision bf9ec67e)
1 /*	$NetBSD: rf_paritylog.c,v 1.8 2002/05/22 15:40:51 wiz Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /* Code for manipulating in-core parity logs
30  *
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.8 2002/05/22 15:40:51 wiz Exp $");
35 
36 #include "rf_archs.h"
37 
38 #if RF_INCLUDE_PARITYLOGGING > 0
39 
40 /*
41  * Append-only log for recording parity "update" and "overwrite" records
42  */
43 
44 #include <dev/raidframe/raidframevar.h>
45 
46 #include "rf_threadstuff.h"
47 #include "rf_mcpair.h"
48 #include "rf_raid.h"
49 #include "rf_dag.h"
50 #include "rf_dagfuncs.h"
51 #include "rf_desc.h"
52 #include "rf_layout.h"
53 #include "rf_diskqueue.h"
54 #include "rf_etimer.h"
55 #include "rf_paritylog.h"
56 #include "rf_general.h"
57 #include "rf_map.h"
58 #include "rf_paritylogging.h"
59 #include "rf_paritylogDiskMgr.h"
60 
61 static RF_CommonLogData_t *
62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
63 {
64 	RF_CommonLogData_t *common = NULL;
65 	int     rc;
66 
67 	/* Return a struct for holding common parity log information from the
68 	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
69 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
70 
71 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
72 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
73 		common = raidPtr->parityLogDiskQueue.freeCommonList;
74 		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
75 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
76 	} else {
77 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
78 		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
79 		rc = rf_mutex_init(&common->mutex);
80 		if (rc) {
81 			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
82 			    __LINE__, rc);
83 			RF_Free(common, sizeof(RF_CommonLogData_t));
84 			common = NULL;
85 		}
86 	}
87 	common->next = NULL;
88 	return (common);
89 }
90 
91 static void
92 FreeParityLogCommonData(RF_CommonLogData_t * common)
93 {
94 	RF_Raid_t *raidPtr;
95 
96 	/* Insert a single struct for holding parity log information (data)
97 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
98 	 * NON-BLOCKING */
99 
100 	raidPtr = common->raidPtr;
101 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
102 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
103 	raidPtr->parityLogDiskQueue.freeCommonList = common;
104 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
105 }
106 
107 static RF_ParityLogData_t *
108 AllocParityLogData(RF_Raid_t * raidPtr)
109 {
110 	RF_ParityLogData_t *data = NULL;
111 
112 	/* Return a struct for holding parity log information from the free
113 	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
114 	 * call RF_Malloc to create a new structure. NON-BLOCKING */
115 
116 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
117 	if (raidPtr->parityLogDiskQueue.freeDataList) {
118 		data = raidPtr->parityLogDiskQueue.freeDataList;
119 		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
120 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
121 	} else {
122 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
123 		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
124 	}
125 	data->next = NULL;
126 	data->prev = NULL;
127 	return (data);
128 }
129 
130 
131 static void
132 FreeParityLogData(RF_ParityLogData_t * data)
133 {
134 	RF_ParityLogData_t *nextItem;
135 	RF_Raid_t *raidPtr;
136 
137 	/* Insert a linked list of structs for holding parity log information
138 	 * (data) into the free list (parityLogDiskQueue.freeList).
139 	 * NON-BLOCKING */
140 
141 	raidPtr = data->common->raidPtr;
142 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
143 	while (data) {
144 		nextItem = data->next;
145 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
146 		raidPtr->parityLogDiskQueue.freeDataList = data;
147 		data = nextItem;
148 	}
149 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
150 }
151 
152 
153 static void
154 EnqueueParityLogData(
155     RF_ParityLogData_t * data,
156     RF_ParityLogData_t ** head,
157     RF_ParityLogData_t ** tail)
158 {
159 	RF_Raid_t *raidPtr;
160 
161 	/* Insert an in-core parity log (*data) into the head of a disk queue
162 	 * (*head, *tail). NON-BLOCKING */
163 
164 	raidPtr = data->common->raidPtr;
165 	if (rf_parityLogDebug)
166 		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
167 	RF_ASSERT(data->prev == NULL);
168 	RF_ASSERT(data->next == NULL);
169 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
170 	if (*head) {
171 		/* insert into head of queue */
172 		RF_ASSERT((*head)->prev == NULL);
173 		RF_ASSERT((*tail)->next == NULL);
174 		data->next = *head;
175 		(*head)->prev = data;
176 		*head = data;
177 	} else {
178 		/* insert into empty list */
179 		RF_ASSERT(*head == NULL);
180 		RF_ASSERT(*tail == NULL);
181 		*head = data;
182 		*tail = data;
183 	}
184 	RF_ASSERT((*head)->prev == NULL);
185 	RF_ASSERT((*tail)->next == NULL);
186 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
187 }
188 
189 static RF_ParityLogData_t *
190 DequeueParityLogData(
191     RF_Raid_t * raidPtr,
192     RF_ParityLogData_t ** head,
193     RF_ParityLogData_t ** tail,
194     int ignoreLocks)
195 {
196 	RF_ParityLogData_t *data;
197 
198 	/* Remove and return an in-core parity log from the tail of a disk
199 	 * queue (*head, *tail). NON-BLOCKING */
200 
201 	/* remove from tail, preserving FIFO order */
202 	if (!ignoreLocks)
203 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
204 	data = *tail;
205 	if (data) {
206 		if (*head == *tail) {
207 			/* removing last item from queue */
208 			*head = NULL;
209 			*tail = NULL;
210 		} else {
211 			*tail = (*tail)->prev;
212 			(*tail)->next = NULL;
213 			RF_ASSERT((*head)->prev == NULL);
214 			RF_ASSERT((*tail)->next == NULL);
215 		}
216 		data->next = NULL;
217 		data->prev = NULL;
218 		if (rf_parityLogDebug)
219 			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
220 	}
221 	if (*head) {
222 		RF_ASSERT((*head)->prev == NULL);
223 		RF_ASSERT((*tail)->next == NULL);
224 	}
225 	if (!ignoreLocks)
226 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
227 	return (data);
228 }
229 
230 
231 static void
232 RequeueParityLogData(
233     RF_ParityLogData_t * data,
234     RF_ParityLogData_t ** head,
235     RF_ParityLogData_t ** tail)
236 {
237 	RF_Raid_t *raidPtr;
238 
239 	/* Insert an in-core parity log (*data) into the tail of a disk queue
240 	 * (*head, *tail). NON-BLOCKING */
241 
242 	raidPtr = data->common->raidPtr;
243 	RF_ASSERT(data);
244 	if (rf_parityLogDebug)
245 		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
246 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
247 	if (*tail) {
248 		/* append to tail of list */
249 		data->prev = *tail;
250 		data->next = NULL;
251 		(*tail)->next = data;
252 		*tail = data;
253 	} else {
254 		/* inserting into an empty list */
255 		*head = data;
256 		*tail = data;
257 		(*head)->prev = NULL;
258 		(*tail)->next = NULL;
259 	}
260 	RF_ASSERT((*head)->prev == NULL);
261 	RF_ASSERT((*tail)->next == NULL);
262 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
263 }
264 
265 RF_ParityLogData_t *
266 rf_CreateParityLogData(
267     RF_ParityRecordType_t operation,
268     RF_PhysDiskAddr_t * pda,
269     caddr_t bufPtr,
270     RF_Raid_t * raidPtr,
271     int (*wakeFunc) (RF_DagNode_t * node, int status),
272     void *wakeArg,
273     RF_AccTraceEntry_t * tracerec,
274     RF_Etimer_t startTime)
275 {
276 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
277 	RF_CommonLogData_t *common;
278 	RF_PhysDiskAddr_t *diskAddress;
279 	int     boundary, offset = 0;
280 
281 	/* Return an initialized struct of info to be logged. Build one item
282 	 * per physical disk address, one item per region.
283 	 *
284 	 * NON-BLOCKING */
285 
286 	diskAddress = pda;
287 	common = AllocParityLogCommonData(raidPtr);
288 	RF_ASSERT(common);
289 
290 	common->operation = operation;
291 	common->bufPtr = bufPtr;
292 	common->raidPtr = raidPtr;
293 	common->wakeFunc = wakeFunc;
294 	common->wakeArg = wakeArg;
295 	common->tracerec = tracerec;
296 	common->startTime = startTime;
297 	common->cnt = 0;
298 
299 	if (rf_parityLogDebug)
300 		printf("[entering CreateParityLogData]\n");
301 	while (diskAddress) {
302 		common->cnt++;
303 		data = AllocParityLogData(raidPtr);
304 		RF_ASSERT(data);
305 		data->common = common;
306 		data->next = NULL;
307 		data->prev = NULL;
308 		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
309 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
310 			/* disk address does not cross a region boundary */
311 			data->diskAddress = *diskAddress;
312 			data->bufOffset = offset;
313 			offset = offset + diskAddress->numSector;
314 			EnqueueParityLogData(data, &resultHead, &resultTail);
315 			/* adjust disk address */
316 			diskAddress = diskAddress->next;
317 		} else {
318 			/* disk address crosses a region boundary */
319 			/* find address where region is crossed */
320 			boundary = 0;
321 			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
322 				boundary++;
323 
324 			/* enter data before the boundary */
325 			data->diskAddress = *diskAddress;
326 			data->diskAddress.numSector = boundary;
327 			data->bufOffset = offset;
328 			offset += boundary;
329 			EnqueueParityLogData(data, &resultHead, &resultTail);
330 			/* adjust disk address */
331 			diskAddress->startSector += boundary;
332 			diskAddress->numSector -= boundary;
333 		}
334 	}
335 	if (rf_parityLogDebug)
336 		printf("[leaving CreateParityLogData]\n");
337 	return (resultHead);
338 }
339 
340 
341 RF_ParityLogData_t *
342 rf_SearchAndDequeueParityLogData(
343     RF_Raid_t * raidPtr,
344     int regionID,
345     RF_ParityLogData_t ** head,
346     RF_ParityLogData_t ** tail,
347     int ignoreLocks)
348 {
349 	RF_ParityLogData_t *w;
350 
351 	/* Remove and return an in-core parity log from a specified region
352 	 * (regionID). If a matching log is not found, return NULL.
353 	 *
354 	 * NON-BLOCKING. */
355 
356 	/* walk backward through a list, looking for an entry with a matching
357 	 * region ID */
358 	if (!ignoreLocks)
359 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
360 	w = (*tail);
361 	while (w) {
362 		if (w->regionID == regionID) {
363 			/* remove an element from the list */
364 			if (w == *tail) {
365 				if (*head == *tail) {
366 					/* removing only element in the list */
367 					*head = NULL;
368 					*tail = NULL;
369 				} else {
370 					/* removing last item in the list */
371 					*tail = (*tail)->prev;
372 					(*tail)->next = NULL;
373 					RF_ASSERT((*head)->prev == NULL);
374 					RF_ASSERT((*tail)->next == NULL);
375 				}
376 			} else {
377 				if (w == *head) {
378 					/* removing first item in the list */
379 					*head = (*head)->next;
380 					(*head)->prev = NULL;
381 					RF_ASSERT((*head)->prev == NULL);
382 					RF_ASSERT((*tail)->next == NULL);
383 				} else {
384 					/* removing an item from the middle of
385 					 * the list */
386 					w->prev->next = w->next;
387 					w->next->prev = w->prev;
388 					RF_ASSERT((*head)->prev == NULL);
389 					RF_ASSERT((*tail)->next == NULL);
390 				}
391 			}
392 			w->prev = NULL;
393 			w->next = NULL;
394 			if (rf_parityLogDebug)
395 				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
396 			return (w);
397 		} else
398 			w = w->prev;
399 	}
400 	if (!ignoreLocks)
401 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
402 	return (NULL);
403 }
404 
405 static RF_ParityLogData_t *
406 DequeueMatchingLogData(
407     RF_Raid_t * raidPtr,
408     RF_ParityLogData_t ** head,
409     RF_ParityLogData_t ** tail)
410 {
411 	RF_ParityLogData_t *logDataList, *logData;
412 	int     regionID;
413 
414 	/* Remove and return an in-core parity log from the tail of a disk
415 	 * queue (*head, *tail).  Then remove all matching (identical
416 	 * regionIDs) logData and return as a linked list.
417 	 *
418 	 * NON-BLOCKING */
419 
420 	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
421 	if (logDataList) {
422 		regionID = logDataList->regionID;
423 		logData = logDataList;
424 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
425 		while (logData->next) {
426 			logData = logData->next;
427 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
428 		}
429 	}
430 	return (logDataList);
431 }
432 
433 
434 static RF_ParityLog_t *
435 AcquireParityLog(
436     RF_ParityLogData_t * logData,
437     int finish)
438 {
439 	RF_ParityLog_t *log = NULL;
440 	RF_Raid_t *raidPtr;
441 
442 	/* Grab a log buffer from the pool and return it. If no buffers are
443 	 * available, return NULL. NON-BLOCKING */
444 	raidPtr = logData->common->raidPtr;
445 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
446 	if (raidPtr->parityLogPool.parityLogs) {
447 		log = raidPtr->parityLogPool.parityLogs;
448 		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
449 		log->regionID = logData->regionID;
450 		log->numRecords = 0;
451 		log->next = NULL;
452 		raidPtr->logsInUse++;
453 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
454 	} else {
455 		/* no logs available, so place ourselves on the queue of work
456 		 * waiting on log buffers this is done while
457 		 * parityLogPool.mutex is held, to ensure synchronization with
458 		 * ReleaseParityLogs. */
459 		if (rf_parityLogDebug)
460 			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
461 		if (finish)
462 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
463 		else
464 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
465 	}
466 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
467 	return (log);
468 }
469 
470 void
471 rf_ReleaseParityLogs(
472     RF_Raid_t * raidPtr,
473     RF_ParityLog_t * firstLog)
474 {
475 	RF_ParityLogData_t *logDataList;
476 	RF_ParityLog_t *log, *lastLog;
477 	int     cnt;
478 
479 	/* Insert a linked list of parity logs (firstLog) to the free list
480 	 * (parityLogPool.parityLogPool)
481 	 *
482 	 * NON-BLOCKING. */
483 
484 	RF_ASSERT(firstLog);
485 
486 	/* Before returning logs to global free list, service all requests
487 	 * which are blocked on logs.  Holding mutexes for parityLogPool and
488 	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
489 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
490 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
491 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
492 	log = firstLog;
493 	if (firstLog)
494 		firstLog = firstLog->next;
495 	log->numRecords = 0;
496 	log->next = NULL;
497 	while (logDataList && log) {
498 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
499 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
500 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
501 		if (rf_parityLogDebug)
502 			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
503 		if (log == NULL) {
504 			log = firstLog;
505 			if (firstLog) {
506 				firstLog = firstLog->next;
507 				log->numRecords = 0;
508 				log->next = NULL;
509 			}
510 		}
511 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
512 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
513 		if (log)
514 			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
515 	}
516 	/* return remaining logs to pool */
517 	if (log) {
518 		log->next = firstLog;
519 		firstLog = log;
520 	}
521 	if (firstLog) {
522 		lastLog = firstLog;
523 		raidPtr->logsInUse--;
524 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
525 		while (lastLog->next) {
526 			lastLog = lastLog->next;
527 			raidPtr->logsInUse--;
528 			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
529 		}
530 		lastLog->next = raidPtr->parityLogPool.parityLogs;
531 		raidPtr->parityLogPool.parityLogs = firstLog;
532 		cnt = 0;
533 		log = raidPtr->parityLogPool.parityLogs;
534 		while (log) {
535 			cnt++;
536 			log = log->next;
537 		}
538 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
539 	}
540 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
541 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
542 }
543 
544 static void
545 ReintLog(
546     RF_Raid_t * raidPtr,
547     int regionID,
548     RF_ParityLog_t * log)
549 {
550 	RF_ASSERT(log);
551 
552 	/* Insert an in-core parity log (log) into the disk queue of
553 	 * reintegration work.  Set the flag (reintInProgress) for the
554 	 * specified region (regionID) to indicate that reintegration is in
555 	 * progress for this region. NON-BLOCKING */
556 
557 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
558 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
559 									 * complete */
560 
561 	if (rf_parityLogDebug)
562 		printf("[requesting reintegration of region %d]\n", log->regionID);
563 	/* move record to reintegration queue */
564 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
565 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
566 	raidPtr->parityLogDiskQueue.reintQueue = log;
567 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
568 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
569 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
570 }
571 
572 static void
573 FlushLog(
574     RF_Raid_t * raidPtr,
575     RF_ParityLog_t * log)
576 {
577 	/* insert a core log (log) into a list of logs
578 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
579 	 * NON-BLOCKING */
580 
581 	RF_ASSERT(log);
582 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
583 	RF_ASSERT(log->next == NULL);
584 	/* move log to flush queue */
585 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
586 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
587 	raidPtr->parityLogDiskQueue.flushQueue = log;
588 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
589 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
590 }
591 
592 static int
593 DumpParityLogToDisk(
594     int finish,
595     RF_ParityLogData_t * logData)
596 {
597 	int     i, diskCount, regionID = logData->regionID;
598 	RF_ParityLog_t *log;
599 	RF_Raid_t *raidPtr;
600 
601 	raidPtr = logData->common->raidPtr;
602 
603 	/* Move a core log to disk.  If the log disk is full, initiate
604 	 * reintegration.
605 	 *
606 	 * Return (0) if we can enqueue the dump immediately, otherwise return
607 	 * (1) to indicate we are blocked on reintegration and control of the
608 	 * thread should be relinquished.
609 	 *
610 	 * Caller must hold regionInfo[regionID].mutex
611 	 *
612 	 * NON-BLOCKING */
613 
614 	if (rf_parityLogDebug)
615 		printf("[dumping parity log to disk, region %d]\n", regionID);
616 	log = raidPtr->regionInfo[regionID].coreLog;
617 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
618 	RF_ASSERT(log->next == NULL);
619 
620 	/* if reintegration is in progress, must queue work */
621 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
622 	if (raidPtr->regionInfo[regionID].reintInProgress) {
623 		/* Can not proceed since this region is currently being
624 		 * reintegrated. We can not block, so queue remaining work and
625 		 * return */
626 		if (rf_parityLogDebug)
627 			printf("[region %d waiting on reintegration]\n", regionID);
628 		/* XXX not sure about the use of finish - shouldn't this
629 		 * always be "Enqueue"? */
630 		if (finish)
631 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
632 		else
633 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
634 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
635 		return (1);	/* relenquish control of this thread */
636 	}
637 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
638 	raidPtr->regionInfo[regionID].coreLog = NULL;
639 	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
640 		/* IMPORTANT!! this loop bound assumes region disk holds an
641 		 * integral number of core logs */
642 	{
643 		/* update disk map for this region */
644 		diskCount = raidPtr->regionInfo[regionID].diskCount;
645 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
646 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
647 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
648 		}
649 		log->diskOffset = diskCount;
650 		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
651 		FlushLog(raidPtr, log);
652 	} else {
653 		/* no room for log on disk, send it to disk manager and
654 		 * request reintegration */
655 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
656 		ReintLog(raidPtr, regionID, log);
657 	}
658 	if (rf_parityLogDebug)
659 		printf("[finished dumping parity log to disk, region %d]\n", regionID);
660 	return (0);
661 }
662 
663 int
664 rf_ParityLogAppend(
665     RF_ParityLogData_t * logData,
666     int finish,
667     RF_ParityLog_t ** incomingLog,
668     int clearReintFlag)
669 {
670 	int     regionID, logItem, itemDone;
671 	RF_ParityLogData_t *item;
672 	int     punt, done = RF_FALSE;
673 	RF_ParityLog_t *log;
674 	RF_Raid_t *raidPtr;
675 	RF_Etimer_t timer;
676 	int     (*wakeFunc) (RF_DagNode_t * node, int status);
677 	void   *wakeArg;
678 
679 	/* Add parity to the appropriate log, one sector at a time. This
680 	 * routine is called is called by dag functions ParityLogUpdateFunc
681 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
682 	 *
683 	 * Parity to be logged is contained in a linked-list (logData).  When
684 	 * this routine returns, every sector in the list will be in one of
685 	 * three places: 1) entered into the parity log 2) queued, waiting on
686 	 * reintegration 3) queued, waiting on a core log
687 	 *
688 	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
689 	 * Later, as conditions which required the block are removed, the work
690 	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
691 	 *
692 	 * NON-BLOCKING */
693 
694 	raidPtr = logData->common->raidPtr;
695 	/* lock the region for the first item in logData */
696 	RF_ASSERT(logData != NULL);
697 	regionID = logData->regionID;
698 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
699 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
700 
701 	if (clearReintFlag) {
702 		/* Enable flushing for this region.  Holding both locks
703 		 * provides a synchronization barrier with DumpParityLogToDisk */
704 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
705 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
706 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
707 		raidPtr->regionInfo[regionID].diskCount = 0;
708 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
709 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
710 										 * enabled */
711 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
712 	}
713 	/* process each item in logData */
714 	while (logData) {
715 		/* remove an item from logData */
716 		item = logData;
717 		logData = logData->next;
718 		item->next = NULL;
719 		item->prev = NULL;
720 
721 		if (rf_parityLogDebug)
722 			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
723 
724 		/* see if we moved to a new region */
725 		if (regionID != item->regionID) {
726 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
727 			regionID = item->regionID;
728 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
729 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
730 		}
731 		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
732 				 * can happen in one of two ways: 1) no core
733 				 * log (AcquireParityLog) 2) waiting on
734 				 * reintegration (DumpParityLogToDisk) If punt
735 				 * is RF_TRUE, the dataItem was queued, so
736 				 * skip to next item. */
737 
738 		/* process item, one sector at a time, until all sectors
739 		 * processed or we punt */
740 		if (item->diskAddress.numSector > 0)
741 			done = RF_FALSE;
742 		else
743 			RF_ASSERT(0);
744 		while (!punt && !done) {
745 			/* verify that a core log exists for this region */
746 			if (!raidPtr->regionInfo[regionID].coreLog) {
747 				/* Attempt to acquire a parity log. If
748 				 * acquisition fails, queue remaining work in
749 				 * data item and move to nextItem. */
750 				if (incomingLog)
751 					if (*incomingLog) {
752 						RF_ASSERT((*incomingLog)->next == NULL);
753 						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
754 						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
755 						*incomingLog = NULL;
756 					} else
757 						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
758 				else
759 					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
760 				/* Note: AcquireParityLog either returns a log
761 				 * or enqueues currentItem */
762 			}
763 			if (!raidPtr->regionInfo[regionID].coreLog)
764 				punt = RF_TRUE;	/* failed to find a core log */
765 			else {
766 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
767 				/* verify that the log has room for new
768 				 * entries */
769 				/* if log is full, dump it to disk and grab a
770 				 * new log */
771 				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
772 					/* log is full, dump it to disk */
773 					if (DumpParityLogToDisk(finish, item))
774 						punt = RF_TRUE;	/* dump unsuccessful,
775 								 * blocked on
776 								 * reintegration */
777 					else {
778 						/* dump was successful */
779 						if (incomingLog)
780 							if (*incomingLog) {
781 								RF_ASSERT((*incomingLog)->next == NULL);
782 								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
783 								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
784 								*incomingLog = NULL;
785 							} else
786 								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
787 						else
788 							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
789 						/* if a core log is not
790 						 * available, must queue work
791 						 * and return */
792 						if (!raidPtr->regionInfo[regionID].coreLog)
793 							punt = RF_TRUE;	/* blocked on log
794 									 * availability */
795 					}
796 				}
797 			}
798 			/* if we didn't punt on this item, attempt to add a
799 			 * sector to the core log */
800 			if (!punt) {
801 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
802 				/* at this point, we have a core log with
803 				 * enough room for a sector */
804 				/* copy a sector into the log */
805 				log = raidPtr->regionInfo[regionID].coreLog;
806 				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
807 				logItem = log->numRecords++;
808 				log->records[logItem].parityAddr = item->diskAddress;
809 				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
810 				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
811 				log->records[logItem].parityAddr.numSector = 1;
812 				log->records[logItem].operation = item->common->operation;
813 				memcpy(log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
814 				item->diskAddress.numSector--;
815 				item->diskAddress.startSector++;
816 				if (item->diskAddress.numSector == 0)
817 					done = RF_TRUE;
818 			}
819 		}
820 
821 		if (!punt) {
822 			/* Processed this item completely, decrement count of
823 			 * items to be processed. */
824 			RF_ASSERT(item->diskAddress.numSector == 0);
825 			RF_LOCK_MUTEX(item->common->mutex);
826 			item->common->cnt--;
827 			if (item->common->cnt == 0)
828 				itemDone = RF_TRUE;
829 			else
830 				itemDone = RF_FALSE;
831 			RF_UNLOCK_MUTEX(item->common->mutex);
832 			if (itemDone) {
833 				/* Finished processing all log data for this
834 				 * IO Return structs to free list and invoke
835 				 * wakeup function. */
836 				timer = item->common->startTime;	/* grab initial value of
837 									 * timer */
838 				RF_ETIMER_STOP(timer);
839 				RF_ETIMER_EVAL(timer);
840 				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
841 				if (rf_parityLogDebug)
842 					printf("[waking process for region %d]\n", item->regionID);
843 				wakeFunc = item->common->wakeFunc;
844 				wakeArg = item->common->wakeArg;
845 				FreeParityLogCommonData(item->common);
846 				FreeParityLogData(item);
847 				(wakeFunc) (wakeArg, 0);
848 			} else
849 				FreeParityLogData(item);
850 		}
851 	}
852 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
853 	if (rf_parityLogDebug)
854 		printf("[exiting ParityLogAppend]\n");
855 	return (0);
856 }
857 
858 
859 void
860 rf_EnableParityLogging(RF_Raid_t * raidPtr)
861 {
862 	int     regionID;
863 
864 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
865 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
866 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
867 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
868 	}
869 	if (rf_parityLogDebug)
870 		printf("[parity logging enabled]\n");
871 }
872 #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
873