1 /* $NetBSD: rf_paritylog.c,v 1.13 2007/03/04 06:02:38 christos Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* Code for manipulating in-core parity logs 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.13 2007/03/04 06:02:38 christos Exp $"); 35 36 #include "rf_archs.h" 37 38 #if RF_INCLUDE_PARITYLOGGING > 0 39 40 /* 41 * Append-only log for recording parity "update" and "overwrite" records 42 */ 43 44 #include <dev/raidframe/raidframevar.h> 45 46 #include "rf_threadstuff.h" 47 #include "rf_mcpair.h" 48 #include "rf_raid.h" 49 #include "rf_dag.h" 50 #include "rf_dagfuncs.h" 51 #include "rf_desc.h" 52 #include "rf_layout.h" 53 #include "rf_diskqueue.h" 54 #include "rf_etimer.h" 55 #include "rf_paritylog.h" 56 #include "rf_general.h" 57 #include "rf_map.h" 58 #include "rf_paritylogging.h" 59 #include "rf_paritylogDiskMgr.h" 60 61 static RF_CommonLogData_t * 62 AllocParityLogCommonData(RF_Raid_t * raidPtr) 63 { 64 RF_CommonLogData_t *common = NULL; 65 66 /* Return a struct for holding common parity log information from the 67 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 68 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 69 70 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 71 if (raidPtr->parityLogDiskQueue.freeCommonList) { 72 common = raidPtr->parityLogDiskQueue.freeCommonList; 73 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 74 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 75 } else { 76 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 77 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 78 rf_mutex_init(&common->mutex); 79 } 80 common->next = NULL; 81 return (common); 82 } 83 84 static void 85 FreeParityLogCommonData(RF_CommonLogData_t * common) 86 { 87 RF_Raid_t *raidPtr; 88 89 /* Insert a single struct for holding parity log information (data) 90 * into the free list (rf_parityLogDiskQueue.freeCommonList). 91 * NON-BLOCKING */ 92 93 raidPtr = common->raidPtr; 94 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 95 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 96 raidPtr->parityLogDiskQueue.freeCommonList = common; 97 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 98 } 99 100 static RF_ParityLogData_t * 101 AllocParityLogData(RF_Raid_t * raidPtr) 102 { 103 RF_ParityLogData_t *data = NULL; 104 105 /* Return a struct for holding parity log information from the free 106 * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 107 * call RF_Malloc to create a new structure. NON-BLOCKING */ 108 109 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 110 if (raidPtr->parityLogDiskQueue.freeDataList) { 111 data = raidPtr->parityLogDiskQueue.freeDataList; 112 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 113 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 114 } else { 115 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 116 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 117 } 118 data->next = NULL; 119 data->prev = NULL; 120 return (data); 121 } 122 123 124 static void 125 FreeParityLogData(RF_ParityLogData_t * data) 126 { 127 RF_ParityLogData_t *nextItem; 128 RF_Raid_t *raidPtr; 129 130 /* Insert a linked list of structs for holding parity log information 131 * (data) into the free list (parityLogDiskQueue.freeList). 132 * NON-BLOCKING */ 133 134 raidPtr = data->common->raidPtr; 135 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 136 while (data) { 137 nextItem = data->next; 138 data->next = raidPtr->parityLogDiskQueue.freeDataList; 139 raidPtr->parityLogDiskQueue.freeDataList = data; 140 data = nextItem; 141 } 142 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 143 } 144 145 146 static void 147 EnqueueParityLogData( 148 RF_ParityLogData_t * data, 149 RF_ParityLogData_t ** head, 150 RF_ParityLogData_t ** tail) 151 { 152 RF_Raid_t *raidPtr; 153 154 /* Insert an in-core parity log (*data) into the head of a disk queue 155 * (*head, *tail). NON-BLOCKING */ 156 157 raidPtr = data->common->raidPtr; 158 if (rf_parityLogDebug) 159 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 160 RF_ASSERT(data->prev == NULL); 161 RF_ASSERT(data->next == NULL); 162 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 163 if (*head) { 164 /* insert into head of queue */ 165 RF_ASSERT((*head)->prev == NULL); 166 RF_ASSERT((*tail)->next == NULL); 167 data->next = *head; 168 (*head)->prev = data; 169 *head = data; 170 } else { 171 /* insert into empty list */ 172 RF_ASSERT(*head == NULL); 173 RF_ASSERT(*tail == NULL); 174 *head = data; 175 *tail = data; 176 } 177 RF_ASSERT((*head)->prev == NULL); 178 RF_ASSERT((*tail)->next == NULL); 179 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 180 } 181 182 static RF_ParityLogData_t * 183 DequeueParityLogData( 184 RF_Raid_t * raidPtr, 185 RF_ParityLogData_t ** head, 186 RF_ParityLogData_t ** tail, 187 int ignoreLocks) 188 { 189 RF_ParityLogData_t *data; 190 191 /* Remove and return an in-core parity log from the tail of a disk 192 * queue (*head, *tail). NON-BLOCKING */ 193 194 /* remove from tail, preserving FIFO order */ 195 if (!ignoreLocks) 196 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 197 data = *tail; 198 if (data) { 199 if (*head == *tail) { 200 /* removing last item from queue */ 201 *head = NULL; 202 *tail = NULL; 203 } else { 204 *tail = (*tail)->prev; 205 (*tail)->next = NULL; 206 RF_ASSERT((*head)->prev == NULL); 207 RF_ASSERT((*tail)->next == NULL); 208 } 209 data->next = NULL; 210 data->prev = NULL; 211 if (rf_parityLogDebug) 212 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 213 } 214 if (*head) { 215 RF_ASSERT((*head)->prev == NULL); 216 RF_ASSERT((*tail)->next == NULL); 217 } 218 if (!ignoreLocks) 219 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 220 return (data); 221 } 222 223 224 static void 225 RequeueParityLogData( 226 RF_ParityLogData_t * data, 227 RF_ParityLogData_t ** head, 228 RF_ParityLogData_t ** tail) 229 { 230 RF_Raid_t *raidPtr; 231 232 /* Insert an in-core parity log (*data) into the tail of a disk queue 233 * (*head, *tail). NON-BLOCKING */ 234 235 raidPtr = data->common->raidPtr; 236 RF_ASSERT(data); 237 if (rf_parityLogDebug) 238 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 239 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 240 if (*tail) { 241 /* append to tail of list */ 242 data->prev = *tail; 243 data->next = NULL; 244 (*tail)->next = data; 245 *tail = data; 246 } else { 247 /* inserting into an empty list */ 248 *head = data; 249 *tail = data; 250 (*head)->prev = NULL; 251 (*tail)->next = NULL; 252 } 253 RF_ASSERT((*head)->prev == NULL); 254 RF_ASSERT((*tail)->next == NULL); 255 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 256 } 257 258 RF_ParityLogData_t * 259 rf_CreateParityLogData( 260 RF_ParityRecordType_t operation, 261 RF_PhysDiskAddr_t * pda, 262 void *bufPtr, 263 RF_Raid_t * raidPtr, 264 int (*wakeFunc) (RF_DagNode_t * node, int status), 265 void *wakeArg, 266 RF_AccTraceEntry_t * tracerec, 267 RF_Etimer_t startTime) 268 { 269 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 270 RF_CommonLogData_t *common; 271 RF_PhysDiskAddr_t *diskAddress; 272 int boundary, offset = 0; 273 274 /* Return an initialized struct of info to be logged. Build one item 275 * per physical disk address, one item per region. 276 * 277 * NON-BLOCKING */ 278 279 diskAddress = pda; 280 common = AllocParityLogCommonData(raidPtr); 281 RF_ASSERT(common); 282 283 common->operation = operation; 284 common->bufPtr = bufPtr; 285 common->raidPtr = raidPtr; 286 common->wakeFunc = wakeFunc; 287 common->wakeArg = wakeArg; 288 common->tracerec = tracerec; 289 common->startTime = startTime; 290 common->cnt = 0; 291 292 if (rf_parityLogDebug) 293 printf("[entering CreateParityLogData]\n"); 294 while (diskAddress) { 295 common->cnt++; 296 data = AllocParityLogData(raidPtr); 297 RF_ASSERT(data); 298 data->common = common; 299 data->next = NULL; 300 data->prev = NULL; 301 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 302 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 303 /* disk address does not cross a region boundary */ 304 data->diskAddress = *diskAddress; 305 data->bufOffset = offset; 306 offset = offset + diskAddress->numSector; 307 EnqueueParityLogData(data, &resultHead, &resultTail); 308 /* adjust disk address */ 309 diskAddress = diskAddress->next; 310 } else { 311 /* disk address crosses a region boundary */ 312 /* find address where region is crossed */ 313 boundary = 0; 314 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 315 boundary++; 316 317 /* enter data before the boundary */ 318 data->diskAddress = *diskAddress; 319 data->diskAddress.numSector = boundary; 320 data->bufOffset = offset; 321 offset += boundary; 322 EnqueueParityLogData(data, &resultHead, &resultTail); 323 /* adjust disk address */ 324 diskAddress->startSector += boundary; 325 diskAddress->numSector -= boundary; 326 } 327 } 328 if (rf_parityLogDebug) 329 printf("[leaving CreateParityLogData]\n"); 330 return (resultHead); 331 } 332 333 334 RF_ParityLogData_t * 335 rf_SearchAndDequeueParityLogData( 336 RF_Raid_t * raidPtr, 337 int regionID, 338 RF_ParityLogData_t ** head, 339 RF_ParityLogData_t ** tail, 340 int ignoreLocks) 341 { 342 RF_ParityLogData_t *w; 343 344 /* Remove and return an in-core parity log from a specified region 345 * (regionID). If a matching log is not found, return NULL. 346 * 347 * NON-BLOCKING. */ 348 349 /* walk backward through a list, looking for an entry with a matching 350 * region ID */ 351 if (!ignoreLocks) 352 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 353 w = (*tail); 354 while (w) { 355 if (w->regionID == regionID) { 356 /* remove an element from the list */ 357 if (w == *tail) { 358 if (*head == *tail) { 359 /* removing only element in the list */ 360 *head = NULL; 361 *tail = NULL; 362 } else { 363 /* removing last item in the list */ 364 *tail = (*tail)->prev; 365 (*tail)->next = NULL; 366 RF_ASSERT((*head)->prev == NULL); 367 RF_ASSERT((*tail)->next == NULL); 368 } 369 } else { 370 if (w == *head) { 371 /* removing first item in the list */ 372 *head = (*head)->next; 373 (*head)->prev = NULL; 374 RF_ASSERT((*head)->prev == NULL); 375 RF_ASSERT((*tail)->next == NULL); 376 } else { 377 /* removing an item from the middle of 378 * the list */ 379 w->prev->next = w->next; 380 w->next->prev = w->prev; 381 RF_ASSERT((*head)->prev == NULL); 382 RF_ASSERT((*tail)->next == NULL); 383 } 384 } 385 w->prev = NULL; 386 w->next = NULL; 387 if (rf_parityLogDebug) 388 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 389 return (w); 390 } else 391 w = w->prev; 392 } 393 if (!ignoreLocks) 394 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 395 return (NULL); 396 } 397 398 static RF_ParityLogData_t * 399 DequeueMatchingLogData( 400 RF_Raid_t * raidPtr, 401 RF_ParityLogData_t ** head, 402 RF_ParityLogData_t ** tail) 403 { 404 RF_ParityLogData_t *logDataList, *logData; 405 int regionID; 406 407 /* Remove and return an in-core parity log from the tail of a disk 408 * queue (*head, *tail). Then remove all matching (identical 409 * regionIDs) logData and return as a linked list. 410 * 411 * NON-BLOCKING */ 412 413 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 414 if (logDataList) { 415 regionID = logDataList->regionID; 416 logData = logDataList; 417 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 418 while (logData->next) { 419 logData = logData->next; 420 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 421 } 422 } 423 return (logDataList); 424 } 425 426 427 static RF_ParityLog_t * 428 AcquireParityLog( 429 RF_ParityLogData_t * logData, 430 int finish) 431 { 432 RF_ParityLog_t *log = NULL; 433 RF_Raid_t *raidPtr; 434 435 /* Grab a log buffer from the pool and return it. If no buffers are 436 * available, return NULL. NON-BLOCKING */ 437 raidPtr = logData->common->raidPtr; 438 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 439 if (raidPtr->parityLogPool.parityLogs) { 440 log = raidPtr->parityLogPool.parityLogs; 441 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 442 log->regionID = logData->regionID; 443 log->numRecords = 0; 444 log->next = NULL; 445 raidPtr->logsInUse++; 446 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 447 } else { 448 /* no logs available, so place ourselves on the queue of work 449 * waiting on log buffers this is done while 450 * parityLogPool.mutex is held, to ensure synchronization with 451 * ReleaseParityLogs. */ 452 if (rf_parityLogDebug) 453 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 454 if (finish) 455 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 456 else 457 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 458 } 459 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 460 return (log); 461 } 462 463 void 464 rf_ReleaseParityLogs( 465 RF_Raid_t * raidPtr, 466 RF_ParityLog_t * firstLog) 467 { 468 RF_ParityLogData_t *logDataList; 469 RF_ParityLog_t *log, *lastLog; 470 int cnt; 471 472 /* Insert a linked list of parity logs (firstLog) to the free list 473 * (parityLogPool.parityLogPool) 474 * 475 * NON-BLOCKING. */ 476 477 RF_ASSERT(firstLog); 478 479 /* Before returning logs to global free list, service all requests 480 * which are blocked on logs. Holding mutexes for parityLogPool and 481 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 482 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 483 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 484 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 485 log = firstLog; 486 if (firstLog) 487 firstLog = firstLog->next; 488 log->numRecords = 0; 489 log->next = NULL; 490 while (logDataList && log) { 491 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 492 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 493 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 494 if (rf_parityLogDebug) 495 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 496 if (log == NULL) { 497 log = firstLog; 498 if (firstLog) { 499 firstLog = firstLog->next; 500 log->numRecords = 0; 501 log->next = NULL; 502 } 503 } 504 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 505 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 506 if (log) 507 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 508 } 509 /* return remaining logs to pool */ 510 if (log) { 511 log->next = firstLog; 512 firstLog = log; 513 } 514 if (firstLog) { 515 lastLog = firstLog; 516 raidPtr->logsInUse--; 517 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 518 while (lastLog->next) { 519 lastLog = lastLog->next; 520 raidPtr->logsInUse--; 521 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 522 } 523 lastLog->next = raidPtr->parityLogPool.parityLogs; 524 raidPtr->parityLogPool.parityLogs = firstLog; 525 cnt = 0; 526 log = raidPtr->parityLogPool.parityLogs; 527 while (log) { 528 cnt++; 529 log = log->next; 530 } 531 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 532 } 533 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 534 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 535 } 536 537 static void 538 ReintLog( 539 RF_Raid_t * raidPtr, 540 int regionID, 541 RF_ParityLog_t * log) 542 { 543 RF_ASSERT(log); 544 545 /* Insert an in-core parity log (log) into the disk queue of 546 * reintegration work. Set the flag (reintInProgress) for the 547 * specified region (regionID) to indicate that reintegration is in 548 * progress for this region. NON-BLOCKING */ 549 550 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 551 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 552 * complete */ 553 554 if (rf_parityLogDebug) 555 printf("[requesting reintegration of region %d]\n", log->regionID); 556 /* move record to reintegration queue */ 557 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 558 log->next = raidPtr->parityLogDiskQueue.reintQueue; 559 raidPtr->parityLogDiskQueue.reintQueue = log; 560 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 561 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 562 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 563 } 564 565 static void 566 FlushLog( 567 RF_Raid_t * raidPtr, 568 RF_ParityLog_t * log) 569 { 570 /* insert a core log (log) into a list of logs 571 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 572 * NON-BLOCKING */ 573 574 RF_ASSERT(log); 575 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 576 RF_ASSERT(log->next == NULL); 577 /* move log to flush queue */ 578 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 579 log->next = raidPtr->parityLogDiskQueue.flushQueue; 580 raidPtr->parityLogDiskQueue.flushQueue = log; 581 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 582 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 583 } 584 585 static int 586 DumpParityLogToDisk( 587 int finish, 588 RF_ParityLogData_t * logData) 589 { 590 int i, diskCount, regionID = logData->regionID; 591 RF_ParityLog_t *log; 592 RF_Raid_t *raidPtr; 593 594 raidPtr = logData->common->raidPtr; 595 596 /* Move a core log to disk. If the log disk is full, initiate 597 * reintegration. 598 * 599 * Return (0) if we can enqueue the dump immediately, otherwise return 600 * (1) to indicate we are blocked on reintegration and control of the 601 * thread should be relinquished. 602 * 603 * Caller must hold regionInfo[regionID].mutex 604 * 605 * NON-BLOCKING */ 606 607 if (rf_parityLogDebug) 608 printf("[dumping parity log to disk, region %d]\n", regionID); 609 log = raidPtr->regionInfo[regionID].coreLog; 610 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 611 RF_ASSERT(log->next == NULL); 612 613 /* if reintegration is in progress, must queue work */ 614 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 615 if (raidPtr->regionInfo[regionID].reintInProgress) { 616 /* Can not proceed since this region is currently being 617 * reintegrated. We can not block, so queue remaining work and 618 * return */ 619 if (rf_parityLogDebug) 620 printf("[region %d waiting on reintegration]\n", regionID); 621 /* XXX not sure about the use of finish - shouldn't this 622 * always be "Enqueue"? */ 623 if (finish) 624 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 625 else 626 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 627 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 628 return (1); /* relenquish control of this thread */ 629 } 630 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 631 raidPtr->regionInfo[regionID].coreLog = NULL; 632 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 633 /* IMPORTANT!! this loop bound assumes region disk holds an 634 * integral number of core logs */ 635 { 636 /* update disk map for this region */ 637 diskCount = raidPtr->regionInfo[regionID].diskCount; 638 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 639 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 640 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 641 } 642 log->diskOffset = diskCount; 643 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 644 FlushLog(raidPtr, log); 645 } else { 646 /* no room for log on disk, send it to disk manager and 647 * request reintegration */ 648 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 649 ReintLog(raidPtr, regionID, log); 650 } 651 if (rf_parityLogDebug) 652 printf("[finished dumping parity log to disk, region %d]\n", regionID); 653 return (0); 654 } 655 656 int 657 rf_ParityLogAppend( 658 RF_ParityLogData_t * logData, 659 int finish, 660 RF_ParityLog_t ** incomingLog, 661 int clearReintFlag) 662 { 663 int regionID, logItem, itemDone; 664 RF_ParityLogData_t *item; 665 int punt, done = RF_FALSE; 666 RF_ParityLog_t *log; 667 RF_Raid_t *raidPtr; 668 RF_Etimer_t timer; 669 int (*wakeFunc) (RF_DagNode_t * node, int status); 670 void *wakeArg; 671 672 /* Add parity to the appropriate log, one sector at a time. This 673 * routine is called is called by dag functions ParityLogUpdateFunc 674 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 675 * 676 * Parity to be logged is contained in a linked-list (logData). When 677 * this routine returns, every sector in the list will be in one of 678 * three places: 1) entered into the parity log 2) queued, waiting on 679 * reintegration 3) queued, waiting on a core log 680 * 681 * Blocked work is passed to the ParityLoggingDiskManager for completion. 682 * Later, as conditions which required the block are removed, the work 683 * reenters this routine with the "finish" parameter set to "RF_TRUE." 684 * 685 * NON-BLOCKING */ 686 687 raidPtr = logData->common->raidPtr; 688 /* lock the region for the first item in logData */ 689 RF_ASSERT(logData != NULL); 690 regionID = logData->regionID; 691 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 692 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 693 694 if (clearReintFlag) { 695 /* Enable flushing for this region. Holding both locks 696 * provides a synchronization barrier with DumpParityLogToDisk */ 697 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 698 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 699 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 700 raidPtr->regionInfo[regionID].diskCount = 0; 701 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 702 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 703 * enabled */ 704 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 705 } 706 /* process each item in logData */ 707 while (logData) { 708 /* remove an item from logData */ 709 item = logData; 710 logData = logData->next; 711 item->next = NULL; 712 item->prev = NULL; 713 714 if (rf_parityLogDebug) 715 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 716 717 /* see if we moved to a new region */ 718 if (regionID != item->regionID) { 719 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 720 regionID = item->regionID; 721 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 722 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 723 } 724 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 725 * can happen in one of two ways: 1) no core 726 * log (AcquireParityLog) 2) waiting on 727 * reintegration (DumpParityLogToDisk) If punt 728 * is RF_TRUE, the dataItem was queued, so 729 * skip to next item. */ 730 731 /* process item, one sector at a time, until all sectors 732 * processed or we punt */ 733 if (item->diskAddress.numSector > 0) 734 done = RF_FALSE; 735 else 736 RF_ASSERT(0); 737 while (!punt && !done) { 738 /* verify that a core log exists for this region */ 739 if (!raidPtr->regionInfo[regionID].coreLog) { 740 /* Attempt to acquire a parity log. If 741 * acquisition fails, queue remaining work in 742 * data item and move to nextItem. */ 743 if (incomingLog) 744 if (*incomingLog) { 745 RF_ASSERT((*incomingLog)->next == NULL); 746 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 747 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 748 *incomingLog = NULL; 749 } else 750 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 751 else 752 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 753 /* Note: AcquireParityLog either returns a log 754 * or enqueues currentItem */ 755 } 756 if (!raidPtr->regionInfo[regionID].coreLog) 757 punt = RF_TRUE; /* failed to find a core log */ 758 else { 759 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 760 /* verify that the log has room for new 761 * entries */ 762 /* if log is full, dump it to disk and grab a 763 * new log */ 764 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 765 /* log is full, dump it to disk */ 766 if (DumpParityLogToDisk(finish, item)) 767 punt = RF_TRUE; /* dump unsuccessful, 768 * blocked on 769 * reintegration */ 770 else { 771 /* dump was successful */ 772 if (incomingLog) 773 if (*incomingLog) { 774 RF_ASSERT((*incomingLog)->next == NULL); 775 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 776 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 777 *incomingLog = NULL; 778 } else 779 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 780 else 781 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 782 /* if a core log is not 783 * available, must queue work 784 * and return */ 785 if (!raidPtr->regionInfo[regionID].coreLog) 786 punt = RF_TRUE; /* blocked on log 787 * availability */ 788 } 789 } 790 } 791 /* if we didn't punt on this item, attempt to add a 792 * sector to the core log */ 793 if (!punt) { 794 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 795 /* at this point, we have a core log with 796 * enough room for a sector */ 797 /* copy a sector into the log */ 798 log = raidPtr->regionInfo[regionID].coreLog; 799 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 800 logItem = log->numRecords++; 801 log->records[logItem].parityAddr = item->diskAddress; 802 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 803 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 804 log->records[logItem].parityAddr.numSector = 1; 805 log->records[logItem].operation = item->common->operation; 806 memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector)); 807 item->diskAddress.numSector--; 808 item->diskAddress.startSector++; 809 if (item->diskAddress.numSector == 0) 810 done = RF_TRUE; 811 } 812 } 813 814 if (!punt) { 815 /* Processed this item completely, decrement count of 816 * items to be processed. */ 817 RF_ASSERT(item->diskAddress.numSector == 0); 818 RF_LOCK_MUTEX(item->common->mutex); 819 item->common->cnt--; 820 if (item->common->cnt == 0) 821 itemDone = RF_TRUE; 822 else 823 itemDone = RF_FALSE; 824 RF_UNLOCK_MUTEX(item->common->mutex); 825 if (itemDone) { 826 /* Finished processing all log data for this 827 * IO Return structs to free list and invoke 828 * wakeup function. */ 829 timer = item->common->startTime; /* grab initial value of 830 * timer */ 831 RF_ETIMER_STOP(timer); 832 RF_ETIMER_EVAL(timer); 833 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 834 if (rf_parityLogDebug) 835 printf("[waking process for region %d]\n", item->regionID); 836 wakeFunc = item->common->wakeFunc; 837 wakeArg = item->common->wakeArg; 838 FreeParityLogCommonData(item->common); 839 FreeParityLogData(item); 840 (wakeFunc) (wakeArg, 0); 841 } else 842 FreeParityLogData(item); 843 } 844 } 845 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 846 if (rf_parityLogDebug) 847 printf("[exiting ParityLogAppend]\n"); 848 return (0); 849 } 850 851 852 void 853 rf_EnableParityLogging(RF_Raid_t * raidPtr) 854 { 855 int regionID; 856 857 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 858 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 859 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 860 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 861 } 862 if (rf_parityLogDebug) 863 printf("[parity logging enabled]\n"); 864 } 865 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 866