1 /* $NetBSD: rf_parityscan.c,v 1.33 2009/11/17 18:54:26 jld Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /***************************************************************************** 30 * 31 * rf_parityscan.c -- misc utilities related to parity verification 32 * 33 ****************************************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_parityscan.c,v 1.33 2009/11/17 18:54:26 jld Exp $"); 37 38 #include <dev/raidframe/raidframevar.h> 39 40 #include "rf_raid.h" 41 #include "rf_dag.h" 42 #include "rf_dagfuncs.h" 43 #include "rf_dagutils.h" 44 #include "rf_mcpair.h" 45 #include "rf_general.h" 46 #include "rf_engine.h" 47 #include "rf_parityscan.h" 48 #include "rf_map.h" 49 #include "rf_paritymap.h" 50 51 /***************************************************************************** 52 * 53 * walk through the entire arry and write new parity. This works by 54 * creating two DAGs, one to read a stripe of data and one to write 55 * new parity. The first is executed, the data is xored together, and 56 * then the second is executed. To avoid constantly building and 57 * tearing down the DAGs, we create them a priori and fill them in 58 * with the mapping information as we go along. 59 * 60 * there should never be more than one thread running this. 61 * 62 ****************************************************************************/ 63 64 int 65 rf_RewriteParity(RF_Raid_t *raidPtr) 66 { 67 if (raidPtr->parity_map != NULL) 68 return rf_paritymap_rewrite(raidPtr->parity_map); 69 else 70 return rf_RewriteParityRange(raidPtr, 0, raidPtr->totalSectors); 71 } 72 73 int 74 rf_RewriteParityRange(RF_Raid_t *raidPtr, RF_SectorNum_t sec_begin, 75 RF_SectorNum_t sec_len) 76 { 77 /* 78 * Note: It is the caller's responsibility to ensure that 79 * sec_begin and sec_len are stripe-aligned. 80 */ 81 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 82 RF_AccessStripeMapHeader_t *asm_h; 83 int ret_val; 84 int rc; 85 RF_SectorNum_t i; 86 87 if (raidPtr->Layout.map->faultsTolerated == 0) { 88 /* There isn't any parity. Call it "okay." */ 89 return (RF_PARITY_OKAY); 90 } 91 if (raidPtr->status != rf_rs_optimal) { 92 /* 93 * We're in degraded mode. Don't try to verify parity now! 94 * XXX: this should be a "we don't want to", not a 95 * "we can't" error. 96 */ 97 return (RF_PARITY_COULD_NOT_VERIFY); 98 } 99 100 ret_val = 0; 101 102 rc = RF_PARITY_OKAY; 103 104 for (i = sec_begin; i < sec_begin + sec_len && 105 rc <= RF_PARITY_CORRECTED; 106 i += layoutPtr->dataSectorsPerStripe) { 107 if (raidPtr->waitShutdown) { 108 /* Someone is pulling the plug on this set... 109 abort the re-write */ 110 return (1); 111 } 112 asm_h = rf_MapAccess(raidPtr, i, 113 layoutPtr->dataSectorsPerStripe, 114 NULL, RF_DONT_REMAP); 115 raidPtr->parity_rewrite_stripes_done = 116 i / layoutPtr->dataSectorsPerStripe ; 117 rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); 118 119 switch (rc) { 120 case RF_PARITY_OKAY: 121 case RF_PARITY_CORRECTED: 122 break; 123 case RF_PARITY_BAD: 124 printf("Parity bad during correction\n"); 125 ret_val = 1; 126 break; 127 case RF_PARITY_COULD_NOT_CORRECT: 128 printf("Could not correct bad parity\n"); 129 ret_val = 1; 130 break; 131 case RF_PARITY_COULD_NOT_VERIFY: 132 printf("Could not verify parity\n"); 133 ret_val = 1; 134 break; 135 default: 136 printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); 137 ret_val = 1; 138 } 139 rf_FreeAccessStripeMap(asm_h); 140 } 141 return (ret_val); 142 } 143 /***************************************************************************** 144 * 145 * verify that the parity in a particular stripe is correct. we 146 * validate only the range of parity defined by parityPDA, since this 147 * is all we have locked. The way we do this is to create an asm that 148 * maps the whole stripe and then range-restrict it to the parity 149 * region defined by the parityPDA. 150 * 151 ****************************************************************************/ 152 int 153 rf_VerifyParity(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *aasm, 154 int correct_it, RF_RaidAccessFlags_t flags) 155 { 156 RF_PhysDiskAddr_t *parityPDA; 157 RF_AccessStripeMap_t *doasm; 158 const RF_LayoutSW_t *lp; 159 int lrc, rc; 160 161 lp = raidPtr->Layout.map; 162 if (lp->faultsTolerated == 0) { 163 /* 164 * There isn't any parity. Call it "okay." 165 */ 166 return (RF_PARITY_OKAY); 167 } 168 rc = RF_PARITY_OKAY; 169 if (lp->VerifyParity) { 170 for (doasm = aasm; doasm; doasm = doasm->next) { 171 for (parityPDA = doasm->parityInfo; parityPDA; 172 parityPDA = parityPDA->next) { 173 lrc = lp->VerifyParity(raidPtr, 174 doasm->raidAddress, 175 parityPDA, 176 correct_it, flags); 177 if (lrc > rc) { 178 /* see rf_parityscan.h for why this 179 * works */ 180 rc = lrc; 181 } 182 } 183 } 184 } else { 185 rc = RF_PARITY_COULD_NOT_VERIFY; 186 } 187 return (rc); 188 } 189 190 int 191 rf_VerifyParityBasic(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, 192 RF_PhysDiskAddr_t *parityPDA, int correct_it, 193 RF_RaidAccessFlags_t flags) 194 { 195 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 196 RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, 197 raidAddr); 198 RF_SectorCount_t numsector = parityPDA->numSector; 199 int numbytes = rf_RaidAddressToByte(raidPtr, numsector); 200 int bytesPerStripe = numbytes * layoutPtr->numDataCol; 201 RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ 202 RF_DagNode_t *blockNode, *wrBlock; 203 RF_AccessStripeMapHeader_t *asm_h; 204 RF_AccessStripeMap_t *asmap; 205 RF_AllocListElem_t *alloclist; 206 RF_PhysDiskAddr_t *pda; 207 char *pbuf, *bf, *end_p, *p; 208 int i, retcode; 209 RF_ReconUnitNum_t which_ru; 210 RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, 211 raidAddr, 212 &which_ru); 213 int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; 214 #if RF_ACC_TRACE > 0 215 RF_AccTraceEntry_t tracerec; 216 #endif 217 RF_MCPair_t *mcpair; 218 219 retcode = RF_PARITY_OKAY; 220 221 mcpair = rf_AllocMCPair(); 222 rf_MakeAllocList(alloclist); 223 RF_MallocAndAdd(bf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); 224 RF_MallocAndAdd(pbuf, numbytes, (char *), alloclist); 225 end_p = bf + bytesPerStripe; 226 227 rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, bf, rf_DiskReadFunc, rf_DiskReadUndoFunc, 228 "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); 229 blockNode = rd_dag_h->succedents[0]; 230 231 /* map the stripe and fill in the PDAs in the dag */ 232 asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, bf, RF_DONT_REMAP); 233 asmap = asm_h->stripeMap; 234 235 for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { 236 RF_ASSERT(pda); 237 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); 238 RF_ASSERT(pda->numSector != 0); 239 if (rf_TryToRedirectPDA(raidPtr, pda, 0)) 240 goto out; /* no way to verify parity if disk is 241 * dead. return w/ good status */ 242 blockNode->succedents[i]->params[0].p = pda; 243 blockNode->succedents[i]->params[2].v = psID; 244 blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 245 } 246 247 RF_ASSERT(!asmap->parityInfo->next); 248 rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); 249 RF_ASSERT(asmap->parityInfo->numSector != 0); 250 if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) 251 goto out; 252 blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; 253 254 /* fire off the DAG */ 255 #if RF_ACC_TRACE > 0 256 memset((char *) &tracerec, 0, sizeof(tracerec)); 257 rd_dag_h->tracerec = &tracerec; 258 #endif 259 #if 0 260 if (rf_verifyParityDebug) { 261 printf("Parity verify read dag:\n"); 262 rf_PrintDAGList(rd_dag_h); 263 } 264 #endif 265 RF_LOCK_MUTEX(mcpair->mutex); 266 mcpair->flag = 0; 267 RF_UNLOCK_MUTEX(mcpair->mutex); 268 269 rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 270 (void *) mcpair); 271 272 RF_LOCK_MUTEX(mcpair->mutex); 273 while (!mcpair->flag) 274 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 275 RF_UNLOCK_MUTEX(mcpair->mutex); 276 if (rd_dag_h->status != rf_enable) { 277 RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); 278 retcode = RF_PARITY_COULD_NOT_VERIFY; 279 goto out; 280 } 281 for (p = bf; p < end_p; p += numbytes) { 282 rf_bxor(p, pbuf, numbytes); 283 } 284 for (i = 0; i < numbytes; i++) { 285 if (pbuf[i] != bf[bytesPerStripe + i]) { 286 if (!correct_it) 287 RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", 288 i, (u_char) bf[bytesPerStripe + i], (u_char) pbuf[i]); 289 retcode = RF_PARITY_BAD; 290 break; 291 } 292 } 293 294 if (retcode && correct_it) { 295 wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 296 "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); 297 wrBlock = wr_dag_h->succedents[0]; 298 wrBlock->succedents[0]->params[0].p = asmap->parityInfo; 299 wrBlock->succedents[0]->params[2].v = psID; 300 wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 301 #if RF_ACC_TRACE > 0 302 memset((char *) &tracerec, 0, sizeof(tracerec)); 303 wr_dag_h->tracerec = &tracerec; 304 #endif 305 #if 0 306 if (rf_verifyParityDebug) { 307 printf("Parity verify write dag:\n"); 308 rf_PrintDAGList(wr_dag_h); 309 } 310 #endif 311 RF_LOCK_MUTEX(mcpair->mutex); 312 mcpair->flag = 0; 313 RF_UNLOCK_MUTEX(mcpair->mutex); 314 315 rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 316 (void *) mcpair); 317 318 RF_LOCK_MUTEX(mcpair->mutex); 319 while (!mcpair->flag) 320 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 321 RF_UNLOCK_MUTEX(mcpair->mutex); 322 if (wr_dag_h->status != rf_enable) { 323 RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); 324 retcode = RF_PARITY_COULD_NOT_CORRECT; 325 } 326 rf_FreeDAG(wr_dag_h); 327 if (retcode == RF_PARITY_BAD) 328 retcode = RF_PARITY_CORRECTED; 329 } 330 out: 331 rf_FreeAccessStripeMap(asm_h); 332 rf_FreeAllocList(alloclist); 333 rf_FreeDAG(rd_dag_h); 334 rf_FreeMCPair(mcpair); 335 return (retcode); 336 } 337 338 int 339 rf_TryToRedirectPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, 340 int parity) 341 { 342 if (raidPtr->Disks[pda->col].status == rf_ds_reconstructing) { 343 if (rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, pda->startSector)) { 344 #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 345 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 346 #if RF_DEBUG_VERIFYPARITY 347 RF_RowCol_t oc = pda->col; 348 RF_SectorNum_t os = pda->startSector; 349 #endif 350 if (parity) { 351 (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); 352 #if RF_DEBUG_VERIFYPARITY 353 if (rf_verifyParityDebug) 354 printf("VerifyParity: Redir P c %d sect %ld -> c %d sect %ld\n", 355 oc, (long) os, pda->col, (long) pda->startSector); 356 #endif 357 } else { 358 (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); 359 #if RF_DEBUG_VERIFYPARITY 360 if (rf_verifyParityDebug) 361 printf("VerifyParity: Redir D c %d sect %ld -> c %d sect %ld\n", 362 oc, (long) os, pda->col, (long) pda->startSector); 363 #endif 364 } 365 } else { 366 #endif 367 RF_RowCol_t spCol = raidPtr->Disks[pda->col].spareCol; 368 pda->col = spCol; 369 #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 370 } 371 #endif 372 } 373 } 374 if (RF_DEAD_DISK(raidPtr->Disks[pda->col].status)) 375 return (1); 376 return (0); 377 } 378 /***************************************************************************** 379 * 380 * currently a stub. 381 * 382 * takes as input an ASM describing a write operation and containing 383 * one failure, and verifies that the parity was correctly updated to 384 * reflect the write. 385 * 386 * if it's a data unit that's failed, we read the other data units in 387 * the stripe and the parity unit, XOR them together, and verify that 388 * we get the data intended for the failed disk. Since it's easy, we 389 * also validate that the right data got written to the surviving data 390 * disks. 391 * 392 * If it's the parity that failed, there's really no validation we can 393 * do except the above verification that the right data got written to 394 * all disks. This is because the new data intended for the failed 395 * disk is supplied in the ASM, but this is of course not the case for 396 * the new parity. 397 * 398 ****************************************************************************/ 399 #if 0 400 int 401 rf_VerifyDegrModeWrite(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asmh) 402 { 403 return (0); 404 } 405 #endif 406 /* creates a simple DAG with a header, a block-recon node at level 1, 407 * nNodes nodes at level 2, an unblock-recon node at level 3, and a 408 * terminator node at level 4. The stripe address field in the block 409 * and unblock nodes are not touched, nor are the pda fields in the 410 * second-level nodes, so they must be filled in later. 411 * 412 * commit point is established at unblock node - this means that any 413 * failure during dag execution causes the dag to fail 414 * 415 * name - node names at the second level 416 */ 417 RF_DagHeader_t * 418 rf_MakeSimpleDAG(RF_Raid_t *raidPtr, int nNodes, int bytesPerSU, char *databuf, 419 int (*doFunc) (RF_DagNode_t * node), 420 int (*undoFunc) (RF_DagNode_t * node), 421 const char *name, RF_AllocListElem_t *alloclist, 422 RF_RaidAccessFlags_t flags, int priority) 423 { 424 RF_DagHeader_t *dag_h; 425 RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode, *tmpNode; 426 int i; 427 428 /* grab a DAG header... */ 429 430 dag_h = rf_AllocDAGHeader(); 431 dag_h->raidPtr = (void *) raidPtr; 432 dag_h->allocList = NULL;/* we won't use this alloc list */ 433 dag_h->status = rf_enable; 434 dag_h->numSuccedents = 1; 435 dag_h->creator = "SimpleDAG"; 436 437 /* this dag can not commit until the unblock node is reached errors 438 * prior to the commit point imply the dag has failed */ 439 dag_h->numCommitNodes = 1; 440 dag_h->numCommits = 0; 441 442 /* create the nodes, the block & unblock nodes, and the terminator 443 * node */ 444 445 for (i = 0; i < nNodes; i++) { 446 tmpNode = rf_AllocDAGNode(); 447 tmpNode->list_next = dag_h->nodes; 448 dag_h->nodes = tmpNode; 449 } 450 nodes = dag_h->nodes; 451 452 blockNode = rf_AllocDAGNode(); 453 blockNode->list_next = dag_h->nodes; 454 dag_h->nodes = blockNode; 455 456 unblockNode = rf_AllocDAGNode(); 457 unblockNode->list_next = dag_h->nodes; 458 dag_h->nodes = unblockNode; 459 460 termNode = rf_AllocDAGNode(); 461 termNode->list_next = dag_h->nodes; 462 dag_h->nodes = termNode; 463 464 dag_h->succedents[0] = blockNode; 465 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); 466 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); 467 unblockNode->succedents[0] = termNode; 468 tmpNode = nodes; 469 for (i = 0; i < nNodes; i++) { 470 blockNode->succedents[i] = unblockNode->antecedents[i] = tmpNode; 471 unblockNode->antType[i] = rf_control; 472 rf_InitNode(tmpNode, rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); 473 tmpNode->succedents[0] = unblockNode; 474 tmpNode->antecedents[0] = blockNode; 475 tmpNode->antType[0] = rf_control; 476 tmpNode->params[1].p = (databuf + (i * bytesPerSU)); 477 tmpNode = tmpNode->list_next; 478 } 479 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); 480 termNode->antecedents[0] = unblockNode; 481 termNode->antType[0] = rf_control; 482 return (dag_h); 483 } 484