1 /* $NetBSD: rf_paritylog.c,v 1.9 2002/09/14 17:53:58 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* Code for manipulating in-core parity logs 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.9 2002/09/14 17:53:58 oster Exp $"); 35 36 #include "rf_archs.h" 37 38 #if RF_INCLUDE_PARITYLOGGING > 0 39 40 /* 41 * Append-only log for recording parity "update" and "overwrite" records 42 */ 43 44 #include <dev/raidframe/raidframevar.h> 45 46 #include "rf_threadstuff.h" 47 #include "rf_mcpair.h" 48 #include "rf_raid.h" 49 #include "rf_dag.h" 50 #include "rf_dagfuncs.h" 51 #include "rf_desc.h" 52 #include "rf_layout.h" 53 #include "rf_diskqueue.h" 54 #include "rf_etimer.h" 55 #include "rf_paritylog.h" 56 #include "rf_general.h" 57 #include "rf_map.h" 58 #include "rf_paritylogging.h" 59 #include "rf_paritylogDiskMgr.h" 60 61 static RF_CommonLogData_t * 62 AllocParityLogCommonData(RF_Raid_t * raidPtr) 63 { 64 RF_CommonLogData_t *common = NULL; 65 int rc; 66 67 /* Return a struct for holding common parity log information from the 68 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 69 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 70 71 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 72 if (raidPtr->parityLogDiskQueue.freeCommonList) { 73 common = raidPtr->parityLogDiskQueue.freeCommonList; 74 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 75 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 76 } else { 77 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 78 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 79 rc = rf_mutex_init(&common->mutex); 80 if (rc) { 81 rf_print_unable_to_init_mutex(__FILE__, __LINE__, rc); 82 RF_Free(common, sizeof(RF_CommonLogData_t)); 83 common = NULL; 84 } 85 } 86 common->next = NULL; 87 return (common); 88 } 89 90 static void 91 FreeParityLogCommonData(RF_CommonLogData_t * common) 92 { 93 RF_Raid_t *raidPtr; 94 95 /* Insert a single struct for holding parity log information (data) 96 * into the free list (rf_parityLogDiskQueue.freeCommonList). 97 * NON-BLOCKING */ 98 99 raidPtr = common->raidPtr; 100 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 101 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 102 raidPtr->parityLogDiskQueue.freeCommonList = common; 103 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 104 } 105 106 static RF_ParityLogData_t * 107 AllocParityLogData(RF_Raid_t * raidPtr) 108 { 109 RF_ParityLogData_t *data = NULL; 110 111 /* Return a struct for holding parity log information from the free 112 * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 113 * call RF_Malloc to create a new structure. NON-BLOCKING */ 114 115 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 116 if (raidPtr->parityLogDiskQueue.freeDataList) { 117 data = raidPtr->parityLogDiskQueue.freeDataList; 118 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 119 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 120 } else { 121 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 122 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 123 } 124 data->next = NULL; 125 data->prev = NULL; 126 return (data); 127 } 128 129 130 static void 131 FreeParityLogData(RF_ParityLogData_t * data) 132 { 133 RF_ParityLogData_t *nextItem; 134 RF_Raid_t *raidPtr; 135 136 /* Insert a linked list of structs for holding parity log information 137 * (data) into the free list (parityLogDiskQueue.freeList). 138 * NON-BLOCKING */ 139 140 raidPtr = data->common->raidPtr; 141 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 142 while (data) { 143 nextItem = data->next; 144 data->next = raidPtr->parityLogDiskQueue.freeDataList; 145 raidPtr->parityLogDiskQueue.freeDataList = data; 146 data = nextItem; 147 } 148 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 149 } 150 151 152 static void 153 EnqueueParityLogData( 154 RF_ParityLogData_t * data, 155 RF_ParityLogData_t ** head, 156 RF_ParityLogData_t ** tail) 157 { 158 RF_Raid_t *raidPtr; 159 160 /* Insert an in-core parity log (*data) into the head of a disk queue 161 * (*head, *tail). NON-BLOCKING */ 162 163 raidPtr = data->common->raidPtr; 164 if (rf_parityLogDebug) 165 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 166 RF_ASSERT(data->prev == NULL); 167 RF_ASSERT(data->next == NULL); 168 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 169 if (*head) { 170 /* insert into head of queue */ 171 RF_ASSERT((*head)->prev == NULL); 172 RF_ASSERT((*tail)->next == NULL); 173 data->next = *head; 174 (*head)->prev = data; 175 *head = data; 176 } else { 177 /* insert into empty list */ 178 RF_ASSERT(*head == NULL); 179 RF_ASSERT(*tail == NULL); 180 *head = data; 181 *tail = data; 182 } 183 RF_ASSERT((*head)->prev == NULL); 184 RF_ASSERT((*tail)->next == NULL); 185 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 186 } 187 188 static RF_ParityLogData_t * 189 DequeueParityLogData( 190 RF_Raid_t * raidPtr, 191 RF_ParityLogData_t ** head, 192 RF_ParityLogData_t ** tail, 193 int ignoreLocks) 194 { 195 RF_ParityLogData_t *data; 196 197 /* Remove and return an in-core parity log from the tail of a disk 198 * queue (*head, *tail). NON-BLOCKING */ 199 200 /* remove from tail, preserving FIFO order */ 201 if (!ignoreLocks) 202 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 203 data = *tail; 204 if (data) { 205 if (*head == *tail) { 206 /* removing last item from queue */ 207 *head = NULL; 208 *tail = NULL; 209 } else { 210 *tail = (*tail)->prev; 211 (*tail)->next = NULL; 212 RF_ASSERT((*head)->prev == NULL); 213 RF_ASSERT((*tail)->next == NULL); 214 } 215 data->next = NULL; 216 data->prev = NULL; 217 if (rf_parityLogDebug) 218 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 219 } 220 if (*head) { 221 RF_ASSERT((*head)->prev == NULL); 222 RF_ASSERT((*tail)->next == NULL); 223 } 224 if (!ignoreLocks) 225 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 226 return (data); 227 } 228 229 230 static void 231 RequeueParityLogData( 232 RF_ParityLogData_t * data, 233 RF_ParityLogData_t ** head, 234 RF_ParityLogData_t ** tail) 235 { 236 RF_Raid_t *raidPtr; 237 238 /* Insert an in-core parity log (*data) into the tail of a disk queue 239 * (*head, *tail). NON-BLOCKING */ 240 241 raidPtr = data->common->raidPtr; 242 RF_ASSERT(data); 243 if (rf_parityLogDebug) 244 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 245 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 246 if (*tail) { 247 /* append to tail of list */ 248 data->prev = *tail; 249 data->next = NULL; 250 (*tail)->next = data; 251 *tail = data; 252 } else { 253 /* inserting into an empty list */ 254 *head = data; 255 *tail = data; 256 (*head)->prev = NULL; 257 (*tail)->next = NULL; 258 } 259 RF_ASSERT((*head)->prev == NULL); 260 RF_ASSERT((*tail)->next == NULL); 261 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 262 } 263 264 RF_ParityLogData_t * 265 rf_CreateParityLogData( 266 RF_ParityRecordType_t operation, 267 RF_PhysDiskAddr_t * pda, 268 caddr_t bufPtr, 269 RF_Raid_t * raidPtr, 270 int (*wakeFunc) (RF_DagNode_t * node, int status), 271 void *wakeArg, 272 RF_AccTraceEntry_t * tracerec, 273 RF_Etimer_t startTime) 274 { 275 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 276 RF_CommonLogData_t *common; 277 RF_PhysDiskAddr_t *diskAddress; 278 int boundary, offset = 0; 279 280 /* Return an initialized struct of info to be logged. Build one item 281 * per physical disk address, one item per region. 282 * 283 * NON-BLOCKING */ 284 285 diskAddress = pda; 286 common = AllocParityLogCommonData(raidPtr); 287 RF_ASSERT(common); 288 289 common->operation = operation; 290 common->bufPtr = bufPtr; 291 common->raidPtr = raidPtr; 292 common->wakeFunc = wakeFunc; 293 common->wakeArg = wakeArg; 294 common->tracerec = tracerec; 295 common->startTime = startTime; 296 common->cnt = 0; 297 298 if (rf_parityLogDebug) 299 printf("[entering CreateParityLogData]\n"); 300 while (diskAddress) { 301 common->cnt++; 302 data = AllocParityLogData(raidPtr); 303 RF_ASSERT(data); 304 data->common = common; 305 data->next = NULL; 306 data->prev = NULL; 307 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 308 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 309 /* disk address does not cross a region boundary */ 310 data->diskAddress = *diskAddress; 311 data->bufOffset = offset; 312 offset = offset + diskAddress->numSector; 313 EnqueueParityLogData(data, &resultHead, &resultTail); 314 /* adjust disk address */ 315 diskAddress = diskAddress->next; 316 } else { 317 /* disk address crosses a region boundary */ 318 /* find address where region is crossed */ 319 boundary = 0; 320 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 321 boundary++; 322 323 /* enter data before the boundary */ 324 data->diskAddress = *diskAddress; 325 data->diskAddress.numSector = boundary; 326 data->bufOffset = offset; 327 offset += boundary; 328 EnqueueParityLogData(data, &resultHead, &resultTail); 329 /* adjust disk address */ 330 diskAddress->startSector += boundary; 331 diskAddress->numSector -= boundary; 332 } 333 } 334 if (rf_parityLogDebug) 335 printf("[leaving CreateParityLogData]\n"); 336 return (resultHead); 337 } 338 339 340 RF_ParityLogData_t * 341 rf_SearchAndDequeueParityLogData( 342 RF_Raid_t * raidPtr, 343 int regionID, 344 RF_ParityLogData_t ** head, 345 RF_ParityLogData_t ** tail, 346 int ignoreLocks) 347 { 348 RF_ParityLogData_t *w; 349 350 /* Remove and return an in-core parity log from a specified region 351 * (regionID). If a matching log is not found, return NULL. 352 * 353 * NON-BLOCKING. */ 354 355 /* walk backward through a list, looking for an entry with a matching 356 * region ID */ 357 if (!ignoreLocks) 358 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 359 w = (*tail); 360 while (w) { 361 if (w->regionID == regionID) { 362 /* remove an element from the list */ 363 if (w == *tail) { 364 if (*head == *tail) { 365 /* removing only element in the list */ 366 *head = NULL; 367 *tail = NULL; 368 } else { 369 /* removing last item in the list */ 370 *tail = (*tail)->prev; 371 (*tail)->next = NULL; 372 RF_ASSERT((*head)->prev == NULL); 373 RF_ASSERT((*tail)->next == NULL); 374 } 375 } else { 376 if (w == *head) { 377 /* removing first item in the list */ 378 *head = (*head)->next; 379 (*head)->prev = NULL; 380 RF_ASSERT((*head)->prev == NULL); 381 RF_ASSERT((*tail)->next == NULL); 382 } else { 383 /* removing an item from the middle of 384 * the list */ 385 w->prev->next = w->next; 386 w->next->prev = w->prev; 387 RF_ASSERT((*head)->prev == NULL); 388 RF_ASSERT((*tail)->next == NULL); 389 } 390 } 391 w->prev = NULL; 392 w->next = NULL; 393 if (rf_parityLogDebug) 394 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 395 return (w); 396 } else 397 w = w->prev; 398 } 399 if (!ignoreLocks) 400 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 401 return (NULL); 402 } 403 404 static RF_ParityLogData_t * 405 DequeueMatchingLogData( 406 RF_Raid_t * raidPtr, 407 RF_ParityLogData_t ** head, 408 RF_ParityLogData_t ** tail) 409 { 410 RF_ParityLogData_t *logDataList, *logData; 411 int regionID; 412 413 /* Remove and return an in-core parity log from the tail of a disk 414 * queue (*head, *tail). Then remove all matching (identical 415 * regionIDs) logData and return as a linked list. 416 * 417 * NON-BLOCKING */ 418 419 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 420 if (logDataList) { 421 regionID = logDataList->regionID; 422 logData = logDataList; 423 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 424 while (logData->next) { 425 logData = logData->next; 426 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 427 } 428 } 429 return (logDataList); 430 } 431 432 433 static RF_ParityLog_t * 434 AcquireParityLog( 435 RF_ParityLogData_t * logData, 436 int finish) 437 { 438 RF_ParityLog_t *log = NULL; 439 RF_Raid_t *raidPtr; 440 441 /* Grab a log buffer from the pool and return it. If no buffers are 442 * available, return NULL. NON-BLOCKING */ 443 raidPtr = logData->common->raidPtr; 444 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 445 if (raidPtr->parityLogPool.parityLogs) { 446 log = raidPtr->parityLogPool.parityLogs; 447 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 448 log->regionID = logData->regionID; 449 log->numRecords = 0; 450 log->next = NULL; 451 raidPtr->logsInUse++; 452 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 453 } else { 454 /* no logs available, so place ourselves on the queue of work 455 * waiting on log buffers this is done while 456 * parityLogPool.mutex is held, to ensure synchronization with 457 * ReleaseParityLogs. */ 458 if (rf_parityLogDebug) 459 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 460 if (finish) 461 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 462 else 463 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 464 } 465 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 466 return (log); 467 } 468 469 void 470 rf_ReleaseParityLogs( 471 RF_Raid_t * raidPtr, 472 RF_ParityLog_t * firstLog) 473 { 474 RF_ParityLogData_t *logDataList; 475 RF_ParityLog_t *log, *lastLog; 476 int cnt; 477 478 /* Insert a linked list of parity logs (firstLog) to the free list 479 * (parityLogPool.parityLogPool) 480 * 481 * NON-BLOCKING. */ 482 483 RF_ASSERT(firstLog); 484 485 /* Before returning logs to global free list, service all requests 486 * which are blocked on logs. Holding mutexes for parityLogPool and 487 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 488 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 489 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 490 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 491 log = firstLog; 492 if (firstLog) 493 firstLog = firstLog->next; 494 log->numRecords = 0; 495 log->next = NULL; 496 while (logDataList && log) { 497 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 498 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 499 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 500 if (rf_parityLogDebug) 501 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 502 if (log == NULL) { 503 log = firstLog; 504 if (firstLog) { 505 firstLog = firstLog->next; 506 log->numRecords = 0; 507 log->next = NULL; 508 } 509 } 510 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 511 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 512 if (log) 513 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 514 } 515 /* return remaining logs to pool */ 516 if (log) { 517 log->next = firstLog; 518 firstLog = log; 519 } 520 if (firstLog) { 521 lastLog = firstLog; 522 raidPtr->logsInUse--; 523 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 524 while (lastLog->next) { 525 lastLog = lastLog->next; 526 raidPtr->logsInUse--; 527 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 528 } 529 lastLog->next = raidPtr->parityLogPool.parityLogs; 530 raidPtr->parityLogPool.parityLogs = firstLog; 531 cnt = 0; 532 log = raidPtr->parityLogPool.parityLogs; 533 while (log) { 534 cnt++; 535 log = log->next; 536 } 537 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 538 } 539 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 540 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 541 } 542 543 static void 544 ReintLog( 545 RF_Raid_t * raidPtr, 546 int regionID, 547 RF_ParityLog_t * log) 548 { 549 RF_ASSERT(log); 550 551 /* Insert an in-core parity log (log) into the disk queue of 552 * reintegration work. Set the flag (reintInProgress) for the 553 * specified region (regionID) to indicate that reintegration is in 554 * progress for this region. NON-BLOCKING */ 555 556 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 557 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 558 * complete */ 559 560 if (rf_parityLogDebug) 561 printf("[requesting reintegration of region %d]\n", log->regionID); 562 /* move record to reintegration queue */ 563 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 564 log->next = raidPtr->parityLogDiskQueue.reintQueue; 565 raidPtr->parityLogDiskQueue.reintQueue = log; 566 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 567 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 568 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 569 } 570 571 static void 572 FlushLog( 573 RF_Raid_t * raidPtr, 574 RF_ParityLog_t * log) 575 { 576 /* insert a core log (log) into a list of logs 577 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 578 * NON-BLOCKING */ 579 580 RF_ASSERT(log); 581 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 582 RF_ASSERT(log->next == NULL); 583 /* move log to flush queue */ 584 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 585 log->next = raidPtr->parityLogDiskQueue.flushQueue; 586 raidPtr->parityLogDiskQueue.flushQueue = log; 587 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 588 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 589 } 590 591 static int 592 DumpParityLogToDisk( 593 int finish, 594 RF_ParityLogData_t * logData) 595 { 596 int i, diskCount, regionID = logData->regionID; 597 RF_ParityLog_t *log; 598 RF_Raid_t *raidPtr; 599 600 raidPtr = logData->common->raidPtr; 601 602 /* Move a core log to disk. If the log disk is full, initiate 603 * reintegration. 604 * 605 * Return (0) if we can enqueue the dump immediately, otherwise return 606 * (1) to indicate we are blocked on reintegration and control of the 607 * thread should be relinquished. 608 * 609 * Caller must hold regionInfo[regionID].mutex 610 * 611 * NON-BLOCKING */ 612 613 if (rf_parityLogDebug) 614 printf("[dumping parity log to disk, region %d]\n", regionID); 615 log = raidPtr->regionInfo[regionID].coreLog; 616 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 617 RF_ASSERT(log->next == NULL); 618 619 /* if reintegration is in progress, must queue work */ 620 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 621 if (raidPtr->regionInfo[regionID].reintInProgress) { 622 /* Can not proceed since this region is currently being 623 * reintegrated. We can not block, so queue remaining work and 624 * return */ 625 if (rf_parityLogDebug) 626 printf("[region %d waiting on reintegration]\n", regionID); 627 /* XXX not sure about the use of finish - shouldn't this 628 * always be "Enqueue"? */ 629 if (finish) 630 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 631 else 632 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 633 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 634 return (1); /* relenquish control of this thread */ 635 } 636 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 637 raidPtr->regionInfo[regionID].coreLog = NULL; 638 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 639 /* IMPORTANT!! this loop bound assumes region disk holds an 640 * integral number of core logs */ 641 { 642 /* update disk map for this region */ 643 diskCount = raidPtr->regionInfo[regionID].diskCount; 644 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 645 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 646 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 647 } 648 log->diskOffset = diskCount; 649 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 650 FlushLog(raidPtr, log); 651 } else { 652 /* no room for log on disk, send it to disk manager and 653 * request reintegration */ 654 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 655 ReintLog(raidPtr, regionID, log); 656 } 657 if (rf_parityLogDebug) 658 printf("[finished dumping parity log to disk, region %d]\n", regionID); 659 return (0); 660 } 661 662 int 663 rf_ParityLogAppend( 664 RF_ParityLogData_t * logData, 665 int finish, 666 RF_ParityLog_t ** incomingLog, 667 int clearReintFlag) 668 { 669 int regionID, logItem, itemDone; 670 RF_ParityLogData_t *item; 671 int punt, done = RF_FALSE; 672 RF_ParityLog_t *log; 673 RF_Raid_t *raidPtr; 674 RF_Etimer_t timer; 675 int (*wakeFunc) (RF_DagNode_t * node, int status); 676 void *wakeArg; 677 678 /* Add parity to the appropriate log, one sector at a time. This 679 * routine is called is called by dag functions ParityLogUpdateFunc 680 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 681 * 682 * Parity to be logged is contained in a linked-list (logData). When 683 * this routine returns, every sector in the list will be in one of 684 * three places: 1) entered into the parity log 2) queued, waiting on 685 * reintegration 3) queued, waiting on a core log 686 * 687 * Blocked work is passed to the ParityLoggingDiskManager for completion. 688 * Later, as conditions which required the block are removed, the work 689 * reenters this routine with the "finish" parameter set to "RF_TRUE." 690 * 691 * NON-BLOCKING */ 692 693 raidPtr = logData->common->raidPtr; 694 /* lock the region for the first item in logData */ 695 RF_ASSERT(logData != NULL); 696 regionID = logData->regionID; 697 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 698 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 699 700 if (clearReintFlag) { 701 /* Enable flushing for this region. Holding both locks 702 * provides a synchronization barrier with DumpParityLogToDisk */ 703 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 704 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 705 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 706 raidPtr->regionInfo[regionID].diskCount = 0; 707 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 708 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 709 * enabled */ 710 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 711 } 712 /* process each item in logData */ 713 while (logData) { 714 /* remove an item from logData */ 715 item = logData; 716 logData = logData->next; 717 item->next = NULL; 718 item->prev = NULL; 719 720 if (rf_parityLogDebug) 721 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 722 723 /* see if we moved to a new region */ 724 if (regionID != item->regionID) { 725 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 726 regionID = item->regionID; 727 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 728 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 729 } 730 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 731 * can happen in one of two ways: 1) no core 732 * log (AcquireParityLog) 2) waiting on 733 * reintegration (DumpParityLogToDisk) If punt 734 * is RF_TRUE, the dataItem was queued, so 735 * skip to next item. */ 736 737 /* process item, one sector at a time, until all sectors 738 * processed or we punt */ 739 if (item->diskAddress.numSector > 0) 740 done = RF_FALSE; 741 else 742 RF_ASSERT(0); 743 while (!punt && !done) { 744 /* verify that a core log exists for this region */ 745 if (!raidPtr->regionInfo[regionID].coreLog) { 746 /* Attempt to acquire a parity log. If 747 * acquisition fails, queue remaining work in 748 * data item and move to nextItem. */ 749 if (incomingLog) 750 if (*incomingLog) { 751 RF_ASSERT((*incomingLog)->next == NULL); 752 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 753 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 754 *incomingLog = NULL; 755 } else 756 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 757 else 758 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 759 /* Note: AcquireParityLog either returns a log 760 * or enqueues currentItem */ 761 } 762 if (!raidPtr->regionInfo[regionID].coreLog) 763 punt = RF_TRUE; /* failed to find a core log */ 764 else { 765 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 766 /* verify that the log has room for new 767 * entries */ 768 /* if log is full, dump it to disk and grab a 769 * new log */ 770 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 771 /* log is full, dump it to disk */ 772 if (DumpParityLogToDisk(finish, item)) 773 punt = RF_TRUE; /* dump unsuccessful, 774 * blocked on 775 * reintegration */ 776 else { 777 /* dump was successful */ 778 if (incomingLog) 779 if (*incomingLog) { 780 RF_ASSERT((*incomingLog)->next == NULL); 781 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 782 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 783 *incomingLog = NULL; 784 } else 785 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 786 else 787 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 788 /* if a core log is not 789 * available, must queue work 790 * and return */ 791 if (!raidPtr->regionInfo[regionID].coreLog) 792 punt = RF_TRUE; /* blocked on log 793 * availability */ 794 } 795 } 796 } 797 /* if we didn't punt on this item, attempt to add a 798 * sector to the core log */ 799 if (!punt) { 800 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 801 /* at this point, we have a core log with 802 * enough room for a sector */ 803 /* copy a sector into the log */ 804 log = raidPtr->regionInfo[regionID].coreLog; 805 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 806 logItem = log->numRecords++; 807 log->records[logItem].parityAddr = item->diskAddress; 808 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 809 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 810 log->records[logItem].parityAddr.numSector = 1; 811 log->records[logItem].operation = item->common->operation; 812 memcpy(log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector)); 813 item->diskAddress.numSector--; 814 item->diskAddress.startSector++; 815 if (item->diskAddress.numSector == 0) 816 done = RF_TRUE; 817 } 818 } 819 820 if (!punt) { 821 /* Processed this item completely, decrement count of 822 * items to be processed. */ 823 RF_ASSERT(item->diskAddress.numSector == 0); 824 RF_LOCK_MUTEX(item->common->mutex); 825 item->common->cnt--; 826 if (item->common->cnt == 0) 827 itemDone = RF_TRUE; 828 else 829 itemDone = RF_FALSE; 830 RF_UNLOCK_MUTEX(item->common->mutex); 831 if (itemDone) { 832 /* Finished processing all log data for this 833 * IO Return structs to free list and invoke 834 * wakeup function. */ 835 timer = item->common->startTime; /* grab initial value of 836 * timer */ 837 RF_ETIMER_STOP(timer); 838 RF_ETIMER_EVAL(timer); 839 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 840 if (rf_parityLogDebug) 841 printf("[waking process for region %d]\n", item->regionID); 842 wakeFunc = item->common->wakeFunc; 843 wakeArg = item->common->wakeArg; 844 FreeParityLogCommonData(item->common); 845 FreeParityLogData(item); 846 (wakeFunc) (wakeArg, 0); 847 } else 848 FreeParityLogData(item); 849 } 850 } 851 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 852 if (rf_parityLogDebug) 853 printf("[exiting ParityLogAppend]\n"); 854 return (0); 855 } 856 857 858 void 859 rf_EnableParityLogging(RF_Raid_t * raidPtr) 860 { 861 int regionID; 862 863 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 864 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 865 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 866 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 867 } 868 if (rf_parityLogDebug) 869 printf("[parity logging enabled]\n"); 870 } 871 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 872