1 /* $NetBSD: rf_paritylog.c,v 1.8 2002/05/22 15:40:51 wiz Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* Code for manipulating in-core parity logs 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.8 2002/05/22 15:40:51 wiz Exp $"); 35 36 #include "rf_archs.h" 37 38 #if RF_INCLUDE_PARITYLOGGING > 0 39 40 /* 41 * Append-only log for recording parity "update" and "overwrite" records 42 */ 43 44 #include <dev/raidframe/raidframevar.h> 45 46 #include "rf_threadstuff.h" 47 #include "rf_mcpair.h" 48 #include "rf_raid.h" 49 #include "rf_dag.h" 50 #include "rf_dagfuncs.h" 51 #include "rf_desc.h" 52 #include "rf_layout.h" 53 #include "rf_diskqueue.h" 54 #include "rf_etimer.h" 55 #include "rf_paritylog.h" 56 #include "rf_general.h" 57 #include "rf_map.h" 58 #include "rf_paritylogging.h" 59 #include "rf_paritylogDiskMgr.h" 60 61 static RF_CommonLogData_t * 62 AllocParityLogCommonData(RF_Raid_t * raidPtr) 63 { 64 RF_CommonLogData_t *common = NULL; 65 int rc; 66 67 /* Return a struct for holding common parity log information from the 68 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 69 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 70 71 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 72 if (raidPtr->parityLogDiskQueue.freeCommonList) { 73 common = raidPtr->parityLogDiskQueue.freeCommonList; 74 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 75 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 76 } else { 77 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 78 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 79 rc = rf_mutex_init(&common->mutex); 80 if (rc) { 81 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 82 __LINE__, rc); 83 RF_Free(common, sizeof(RF_CommonLogData_t)); 84 common = NULL; 85 } 86 } 87 common->next = NULL; 88 return (common); 89 } 90 91 static void 92 FreeParityLogCommonData(RF_CommonLogData_t * common) 93 { 94 RF_Raid_t *raidPtr; 95 96 /* Insert a single struct for holding parity log information (data) 97 * into the free list (rf_parityLogDiskQueue.freeCommonList). 98 * NON-BLOCKING */ 99 100 raidPtr = common->raidPtr; 101 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 102 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 103 raidPtr->parityLogDiskQueue.freeCommonList = common; 104 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 105 } 106 107 static RF_ParityLogData_t * 108 AllocParityLogData(RF_Raid_t * raidPtr) 109 { 110 RF_ParityLogData_t *data = NULL; 111 112 /* Return a struct for holding parity log information from the free 113 * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 114 * call RF_Malloc to create a new structure. NON-BLOCKING */ 115 116 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 117 if (raidPtr->parityLogDiskQueue.freeDataList) { 118 data = raidPtr->parityLogDiskQueue.freeDataList; 119 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 120 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 121 } else { 122 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 123 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 124 } 125 data->next = NULL; 126 data->prev = NULL; 127 return (data); 128 } 129 130 131 static void 132 FreeParityLogData(RF_ParityLogData_t * data) 133 { 134 RF_ParityLogData_t *nextItem; 135 RF_Raid_t *raidPtr; 136 137 /* Insert a linked list of structs for holding parity log information 138 * (data) into the free list (parityLogDiskQueue.freeList). 139 * NON-BLOCKING */ 140 141 raidPtr = data->common->raidPtr; 142 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 143 while (data) { 144 nextItem = data->next; 145 data->next = raidPtr->parityLogDiskQueue.freeDataList; 146 raidPtr->parityLogDiskQueue.freeDataList = data; 147 data = nextItem; 148 } 149 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 150 } 151 152 153 static void 154 EnqueueParityLogData( 155 RF_ParityLogData_t * data, 156 RF_ParityLogData_t ** head, 157 RF_ParityLogData_t ** tail) 158 { 159 RF_Raid_t *raidPtr; 160 161 /* Insert an in-core parity log (*data) into the head of a disk queue 162 * (*head, *tail). NON-BLOCKING */ 163 164 raidPtr = data->common->raidPtr; 165 if (rf_parityLogDebug) 166 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 167 RF_ASSERT(data->prev == NULL); 168 RF_ASSERT(data->next == NULL); 169 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 170 if (*head) { 171 /* insert into head of queue */ 172 RF_ASSERT((*head)->prev == NULL); 173 RF_ASSERT((*tail)->next == NULL); 174 data->next = *head; 175 (*head)->prev = data; 176 *head = data; 177 } else { 178 /* insert into empty list */ 179 RF_ASSERT(*head == NULL); 180 RF_ASSERT(*tail == NULL); 181 *head = data; 182 *tail = data; 183 } 184 RF_ASSERT((*head)->prev == NULL); 185 RF_ASSERT((*tail)->next == NULL); 186 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 187 } 188 189 static RF_ParityLogData_t * 190 DequeueParityLogData( 191 RF_Raid_t * raidPtr, 192 RF_ParityLogData_t ** head, 193 RF_ParityLogData_t ** tail, 194 int ignoreLocks) 195 { 196 RF_ParityLogData_t *data; 197 198 /* Remove and return an in-core parity log from the tail of a disk 199 * queue (*head, *tail). NON-BLOCKING */ 200 201 /* remove from tail, preserving FIFO order */ 202 if (!ignoreLocks) 203 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 204 data = *tail; 205 if (data) { 206 if (*head == *tail) { 207 /* removing last item from queue */ 208 *head = NULL; 209 *tail = NULL; 210 } else { 211 *tail = (*tail)->prev; 212 (*tail)->next = NULL; 213 RF_ASSERT((*head)->prev == NULL); 214 RF_ASSERT((*tail)->next == NULL); 215 } 216 data->next = NULL; 217 data->prev = NULL; 218 if (rf_parityLogDebug) 219 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 220 } 221 if (*head) { 222 RF_ASSERT((*head)->prev == NULL); 223 RF_ASSERT((*tail)->next == NULL); 224 } 225 if (!ignoreLocks) 226 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 227 return (data); 228 } 229 230 231 static void 232 RequeueParityLogData( 233 RF_ParityLogData_t * data, 234 RF_ParityLogData_t ** head, 235 RF_ParityLogData_t ** tail) 236 { 237 RF_Raid_t *raidPtr; 238 239 /* Insert an in-core parity log (*data) into the tail of a disk queue 240 * (*head, *tail). NON-BLOCKING */ 241 242 raidPtr = data->common->raidPtr; 243 RF_ASSERT(data); 244 if (rf_parityLogDebug) 245 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 246 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 247 if (*tail) { 248 /* append to tail of list */ 249 data->prev = *tail; 250 data->next = NULL; 251 (*tail)->next = data; 252 *tail = data; 253 } else { 254 /* inserting into an empty list */ 255 *head = data; 256 *tail = data; 257 (*head)->prev = NULL; 258 (*tail)->next = NULL; 259 } 260 RF_ASSERT((*head)->prev == NULL); 261 RF_ASSERT((*tail)->next == NULL); 262 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 263 } 264 265 RF_ParityLogData_t * 266 rf_CreateParityLogData( 267 RF_ParityRecordType_t operation, 268 RF_PhysDiskAddr_t * pda, 269 caddr_t bufPtr, 270 RF_Raid_t * raidPtr, 271 int (*wakeFunc) (RF_DagNode_t * node, int status), 272 void *wakeArg, 273 RF_AccTraceEntry_t * tracerec, 274 RF_Etimer_t startTime) 275 { 276 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 277 RF_CommonLogData_t *common; 278 RF_PhysDiskAddr_t *diskAddress; 279 int boundary, offset = 0; 280 281 /* Return an initialized struct of info to be logged. Build one item 282 * per physical disk address, one item per region. 283 * 284 * NON-BLOCKING */ 285 286 diskAddress = pda; 287 common = AllocParityLogCommonData(raidPtr); 288 RF_ASSERT(common); 289 290 common->operation = operation; 291 common->bufPtr = bufPtr; 292 common->raidPtr = raidPtr; 293 common->wakeFunc = wakeFunc; 294 common->wakeArg = wakeArg; 295 common->tracerec = tracerec; 296 common->startTime = startTime; 297 common->cnt = 0; 298 299 if (rf_parityLogDebug) 300 printf("[entering CreateParityLogData]\n"); 301 while (diskAddress) { 302 common->cnt++; 303 data = AllocParityLogData(raidPtr); 304 RF_ASSERT(data); 305 data->common = common; 306 data->next = NULL; 307 data->prev = NULL; 308 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 309 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 310 /* disk address does not cross a region boundary */ 311 data->diskAddress = *diskAddress; 312 data->bufOffset = offset; 313 offset = offset + diskAddress->numSector; 314 EnqueueParityLogData(data, &resultHead, &resultTail); 315 /* adjust disk address */ 316 diskAddress = diskAddress->next; 317 } else { 318 /* disk address crosses a region boundary */ 319 /* find address where region is crossed */ 320 boundary = 0; 321 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 322 boundary++; 323 324 /* enter data before the boundary */ 325 data->diskAddress = *diskAddress; 326 data->diskAddress.numSector = boundary; 327 data->bufOffset = offset; 328 offset += boundary; 329 EnqueueParityLogData(data, &resultHead, &resultTail); 330 /* adjust disk address */ 331 diskAddress->startSector += boundary; 332 diskAddress->numSector -= boundary; 333 } 334 } 335 if (rf_parityLogDebug) 336 printf("[leaving CreateParityLogData]\n"); 337 return (resultHead); 338 } 339 340 341 RF_ParityLogData_t * 342 rf_SearchAndDequeueParityLogData( 343 RF_Raid_t * raidPtr, 344 int regionID, 345 RF_ParityLogData_t ** head, 346 RF_ParityLogData_t ** tail, 347 int ignoreLocks) 348 { 349 RF_ParityLogData_t *w; 350 351 /* Remove and return an in-core parity log from a specified region 352 * (regionID). If a matching log is not found, return NULL. 353 * 354 * NON-BLOCKING. */ 355 356 /* walk backward through a list, looking for an entry with a matching 357 * region ID */ 358 if (!ignoreLocks) 359 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 360 w = (*tail); 361 while (w) { 362 if (w->regionID == regionID) { 363 /* remove an element from the list */ 364 if (w == *tail) { 365 if (*head == *tail) { 366 /* removing only element in the list */ 367 *head = NULL; 368 *tail = NULL; 369 } else { 370 /* removing last item in the list */ 371 *tail = (*tail)->prev; 372 (*tail)->next = NULL; 373 RF_ASSERT((*head)->prev == NULL); 374 RF_ASSERT((*tail)->next == NULL); 375 } 376 } else { 377 if (w == *head) { 378 /* removing first item in the list */ 379 *head = (*head)->next; 380 (*head)->prev = NULL; 381 RF_ASSERT((*head)->prev == NULL); 382 RF_ASSERT((*tail)->next == NULL); 383 } else { 384 /* removing an item from the middle of 385 * the list */ 386 w->prev->next = w->next; 387 w->next->prev = w->prev; 388 RF_ASSERT((*head)->prev == NULL); 389 RF_ASSERT((*tail)->next == NULL); 390 } 391 } 392 w->prev = NULL; 393 w->next = NULL; 394 if (rf_parityLogDebug) 395 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 396 return (w); 397 } else 398 w = w->prev; 399 } 400 if (!ignoreLocks) 401 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 402 return (NULL); 403 } 404 405 static RF_ParityLogData_t * 406 DequeueMatchingLogData( 407 RF_Raid_t * raidPtr, 408 RF_ParityLogData_t ** head, 409 RF_ParityLogData_t ** tail) 410 { 411 RF_ParityLogData_t *logDataList, *logData; 412 int regionID; 413 414 /* Remove and return an in-core parity log from the tail of a disk 415 * queue (*head, *tail). Then remove all matching (identical 416 * regionIDs) logData and return as a linked list. 417 * 418 * NON-BLOCKING */ 419 420 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 421 if (logDataList) { 422 regionID = logDataList->regionID; 423 logData = logDataList; 424 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 425 while (logData->next) { 426 logData = logData->next; 427 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 428 } 429 } 430 return (logDataList); 431 } 432 433 434 static RF_ParityLog_t * 435 AcquireParityLog( 436 RF_ParityLogData_t * logData, 437 int finish) 438 { 439 RF_ParityLog_t *log = NULL; 440 RF_Raid_t *raidPtr; 441 442 /* Grab a log buffer from the pool and return it. If no buffers are 443 * available, return NULL. NON-BLOCKING */ 444 raidPtr = logData->common->raidPtr; 445 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 446 if (raidPtr->parityLogPool.parityLogs) { 447 log = raidPtr->parityLogPool.parityLogs; 448 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 449 log->regionID = logData->regionID; 450 log->numRecords = 0; 451 log->next = NULL; 452 raidPtr->logsInUse++; 453 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 454 } else { 455 /* no logs available, so place ourselves on the queue of work 456 * waiting on log buffers this is done while 457 * parityLogPool.mutex is held, to ensure synchronization with 458 * ReleaseParityLogs. */ 459 if (rf_parityLogDebug) 460 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 461 if (finish) 462 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 463 else 464 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 465 } 466 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 467 return (log); 468 } 469 470 void 471 rf_ReleaseParityLogs( 472 RF_Raid_t * raidPtr, 473 RF_ParityLog_t * firstLog) 474 { 475 RF_ParityLogData_t *logDataList; 476 RF_ParityLog_t *log, *lastLog; 477 int cnt; 478 479 /* Insert a linked list of parity logs (firstLog) to the free list 480 * (parityLogPool.parityLogPool) 481 * 482 * NON-BLOCKING. */ 483 484 RF_ASSERT(firstLog); 485 486 /* Before returning logs to global free list, service all requests 487 * which are blocked on logs. Holding mutexes for parityLogPool and 488 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 489 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 490 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 491 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 492 log = firstLog; 493 if (firstLog) 494 firstLog = firstLog->next; 495 log->numRecords = 0; 496 log->next = NULL; 497 while (logDataList && log) { 498 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 499 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 500 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 501 if (rf_parityLogDebug) 502 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 503 if (log == NULL) { 504 log = firstLog; 505 if (firstLog) { 506 firstLog = firstLog->next; 507 log->numRecords = 0; 508 log->next = NULL; 509 } 510 } 511 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 512 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 513 if (log) 514 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 515 } 516 /* return remaining logs to pool */ 517 if (log) { 518 log->next = firstLog; 519 firstLog = log; 520 } 521 if (firstLog) { 522 lastLog = firstLog; 523 raidPtr->logsInUse--; 524 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 525 while (lastLog->next) { 526 lastLog = lastLog->next; 527 raidPtr->logsInUse--; 528 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 529 } 530 lastLog->next = raidPtr->parityLogPool.parityLogs; 531 raidPtr->parityLogPool.parityLogs = firstLog; 532 cnt = 0; 533 log = raidPtr->parityLogPool.parityLogs; 534 while (log) { 535 cnt++; 536 log = log->next; 537 } 538 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 539 } 540 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 541 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 542 } 543 544 static void 545 ReintLog( 546 RF_Raid_t * raidPtr, 547 int regionID, 548 RF_ParityLog_t * log) 549 { 550 RF_ASSERT(log); 551 552 /* Insert an in-core parity log (log) into the disk queue of 553 * reintegration work. Set the flag (reintInProgress) for the 554 * specified region (regionID) to indicate that reintegration is in 555 * progress for this region. NON-BLOCKING */ 556 557 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 558 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 559 * complete */ 560 561 if (rf_parityLogDebug) 562 printf("[requesting reintegration of region %d]\n", log->regionID); 563 /* move record to reintegration queue */ 564 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 565 log->next = raidPtr->parityLogDiskQueue.reintQueue; 566 raidPtr->parityLogDiskQueue.reintQueue = log; 567 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 568 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 569 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 570 } 571 572 static void 573 FlushLog( 574 RF_Raid_t * raidPtr, 575 RF_ParityLog_t * log) 576 { 577 /* insert a core log (log) into a list of logs 578 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 579 * NON-BLOCKING */ 580 581 RF_ASSERT(log); 582 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 583 RF_ASSERT(log->next == NULL); 584 /* move log to flush queue */ 585 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 586 log->next = raidPtr->parityLogDiskQueue.flushQueue; 587 raidPtr->parityLogDiskQueue.flushQueue = log; 588 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 589 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 590 } 591 592 static int 593 DumpParityLogToDisk( 594 int finish, 595 RF_ParityLogData_t * logData) 596 { 597 int i, diskCount, regionID = logData->regionID; 598 RF_ParityLog_t *log; 599 RF_Raid_t *raidPtr; 600 601 raidPtr = logData->common->raidPtr; 602 603 /* Move a core log to disk. If the log disk is full, initiate 604 * reintegration. 605 * 606 * Return (0) if we can enqueue the dump immediately, otherwise return 607 * (1) to indicate we are blocked on reintegration and control of the 608 * thread should be relinquished. 609 * 610 * Caller must hold regionInfo[regionID].mutex 611 * 612 * NON-BLOCKING */ 613 614 if (rf_parityLogDebug) 615 printf("[dumping parity log to disk, region %d]\n", regionID); 616 log = raidPtr->regionInfo[regionID].coreLog; 617 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 618 RF_ASSERT(log->next == NULL); 619 620 /* if reintegration is in progress, must queue work */ 621 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 622 if (raidPtr->regionInfo[regionID].reintInProgress) { 623 /* Can not proceed since this region is currently being 624 * reintegrated. We can not block, so queue remaining work and 625 * return */ 626 if (rf_parityLogDebug) 627 printf("[region %d waiting on reintegration]\n", regionID); 628 /* XXX not sure about the use of finish - shouldn't this 629 * always be "Enqueue"? */ 630 if (finish) 631 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 632 else 633 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 634 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 635 return (1); /* relenquish control of this thread */ 636 } 637 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 638 raidPtr->regionInfo[regionID].coreLog = NULL; 639 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 640 /* IMPORTANT!! this loop bound assumes region disk holds an 641 * integral number of core logs */ 642 { 643 /* update disk map for this region */ 644 diskCount = raidPtr->regionInfo[regionID].diskCount; 645 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 646 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 647 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 648 } 649 log->diskOffset = diskCount; 650 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 651 FlushLog(raidPtr, log); 652 } else { 653 /* no room for log on disk, send it to disk manager and 654 * request reintegration */ 655 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 656 ReintLog(raidPtr, regionID, log); 657 } 658 if (rf_parityLogDebug) 659 printf("[finished dumping parity log to disk, region %d]\n", regionID); 660 return (0); 661 } 662 663 int 664 rf_ParityLogAppend( 665 RF_ParityLogData_t * logData, 666 int finish, 667 RF_ParityLog_t ** incomingLog, 668 int clearReintFlag) 669 { 670 int regionID, logItem, itemDone; 671 RF_ParityLogData_t *item; 672 int punt, done = RF_FALSE; 673 RF_ParityLog_t *log; 674 RF_Raid_t *raidPtr; 675 RF_Etimer_t timer; 676 int (*wakeFunc) (RF_DagNode_t * node, int status); 677 void *wakeArg; 678 679 /* Add parity to the appropriate log, one sector at a time. This 680 * routine is called is called by dag functions ParityLogUpdateFunc 681 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 682 * 683 * Parity to be logged is contained in a linked-list (logData). When 684 * this routine returns, every sector in the list will be in one of 685 * three places: 1) entered into the parity log 2) queued, waiting on 686 * reintegration 3) queued, waiting on a core log 687 * 688 * Blocked work is passed to the ParityLoggingDiskManager for completion. 689 * Later, as conditions which required the block are removed, the work 690 * reenters this routine with the "finish" parameter set to "RF_TRUE." 691 * 692 * NON-BLOCKING */ 693 694 raidPtr = logData->common->raidPtr; 695 /* lock the region for the first item in logData */ 696 RF_ASSERT(logData != NULL); 697 regionID = logData->regionID; 698 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 699 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 700 701 if (clearReintFlag) { 702 /* Enable flushing for this region. Holding both locks 703 * provides a synchronization barrier with DumpParityLogToDisk */ 704 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 705 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 706 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 707 raidPtr->regionInfo[regionID].diskCount = 0; 708 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 709 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 710 * enabled */ 711 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 712 } 713 /* process each item in logData */ 714 while (logData) { 715 /* remove an item from logData */ 716 item = logData; 717 logData = logData->next; 718 item->next = NULL; 719 item->prev = NULL; 720 721 if (rf_parityLogDebug) 722 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 723 724 /* see if we moved to a new region */ 725 if (regionID != item->regionID) { 726 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 727 regionID = item->regionID; 728 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 729 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 730 } 731 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 732 * can happen in one of two ways: 1) no core 733 * log (AcquireParityLog) 2) waiting on 734 * reintegration (DumpParityLogToDisk) If punt 735 * is RF_TRUE, the dataItem was queued, so 736 * skip to next item. */ 737 738 /* process item, one sector at a time, until all sectors 739 * processed or we punt */ 740 if (item->diskAddress.numSector > 0) 741 done = RF_FALSE; 742 else 743 RF_ASSERT(0); 744 while (!punt && !done) { 745 /* verify that a core log exists for this region */ 746 if (!raidPtr->regionInfo[regionID].coreLog) { 747 /* Attempt to acquire a parity log. If 748 * acquisition fails, queue remaining work in 749 * data item and move to nextItem. */ 750 if (incomingLog) 751 if (*incomingLog) { 752 RF_ASSERT((*incomingLog)->next == NULL); 753 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 754 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 755 *incomingLog = NULL; 756 } else 757 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 758 else 759 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 760 /* Note: AcquireParityLog either returns a log 761 * or enqueues currentItem */ 762 } 763 if (!raidPtr->regionInfo[regionID].coreLog) 764 punt = RF_TRUE; /* failed to find a core log */ 765 else { 766 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 767 /* verify that the log has room for new 768 * entries */ 769 /* if log is full, dump it to disk and grab a 770 * new log */ 771 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 772 /* log is full, dump it to disk */ 773 if (DumpParityLogToDisk(finish, item)) 774 punt = RF_TRUE; /* dump unsuccessful, 775 * blocked on 776 * reintegration */ 777 else { 778 /* dump was successful */ 779 if (incomingLog) 780 if (*incomingLog) { 781 RF_ASSERT((*incomingLog)->next == NULL); 782 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 783 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 784 *incomingLog = NULL; 785 } else 786 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 787 else 788 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 789 /* if a core log is not 790 * available, must queue work 791 * and return */ 792 if (!raidPtr->regionInfo[regionID].coreLog) 793 punt = RF_TRUE; /* blocked on log 794 * availability */ 795 } 796 } 797 } 798 /* if we didn't punt on this item, attempt to add a 799 * sector to the core log */ 800 if (!punt) { 801 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 802 /* at this point, we have a core log with 803 * enough room for a sector */ 804 /* copy a sector into the log */ 805 log = raidPtr->regionInfo[regionID].coreLog; 806 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 807 logItem = log->numRecords++; 808 log->records[logItem].parityAddr = item->diskAddress; 809 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 810 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 811 log->records[logItem].parityAddr.numSector = 1; 812 log->records[logItem].operation = item->common->operation; 813 memcpy(log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector)); 814 item->diskAddress.numSector--; 815 item->diskAddress.startSector++; 816 if (item->diskAddress.numSector == 0) 817 done = RF_TRUE; 818 } 819 } 820 821 if (!punt) { 822 /* Processed this item completely, decrement count of 823 * items to be processed. */ 824 RF_ASSERT(item->diskAddress.numSector == 0); 825 RF_LOCK_MUTEX(item->common->mutex); 826 item->common->cnt--; 827 if (item->common->cnt == 0) 828 itemDone = RF_TRUE; 829 else 830 itemDone = RF_FALSE; 831 RF_UNLOCK_MUTEX(item->common->mutex); 832 if (itemDone) { 833 /* Finished processing all log data for this 834 * IO Return structs to free list and invoke 835 * wakeup function. */ 836 timer = item->common->startTime; /* grab initial value of 837 * timer */ 838 RF_ETIMER_STOP(timer); 839 RF_ETIMER_EVAL(timer); 840 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 841 if (rf_parityLogDebug) 842 printf("[waking process for region %d]\n", item->regionID); 843 wakeFunc = item->common->wakeFunc; 844 wakeArg = item->common->wakeArg; 845 FreeParityLogCommonData(item->common); 846 FreeParityLogData(item); 847 (wakeFunc) (wakeArg, 0); 848 } else 849 FreeParityLogData(item); 850 } 851 } 852 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 853 if (rf_parityLogDebug) 854 printf("[exiting ParityLogAppend]\n"); 855 return (0); 856 } 857 858 859 void 860 rf_EnableParityLogging(RF_Raid_t * raidPtr) 861 { 862 int regionID; 863 864 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 865 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 866 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 867 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 868 } 869 if (rf_parityLogDebug) 870 printf("[parity logging enabled]\n"); 871 } 872 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 873