1 /* $NetBSD: rf_parityscan.c,v 1.12 2001/11/13 07:11:16 lukem Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /***************************************************************************** 30 * 31 * rf_parityscan.c -- misc utilities related to parity verification 32 * 33 *****************************************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_parityscan.c,v 1.12 2001/11/13 07:11:16 lukem Exp $"); 37 38 #include <dev/raidframe/raidframevar.h> 39 40 #include "rf_raid.h" 41 #include "rf_dag.h" 42 #include "rf_dagfuncs.h" 43 #include "rf_dagutils.h" 44 #include "rf_mcpair.h" 45 #include "rf_general.h" 46 #include "rf_engine.h" 47 #include "rf_parityscan.h" 48 #include "rf_map.h" 49 50 /***************************************************************************************** 51 * 52 * walk through the entire arry and write new parity. 53 * This works by creating two DAGs, one to read a stripe of data and one to 54 * write new parity. The first is executed, the data is xored together, and 55 * then the second is executed. To avoid constantly building and tearing down 56 * the DAGs, we create them a priori and fill them in with the mapping 57 * information as we go along. 58 * 59 * there should never be more than one thread running this. 60 * 61 ****************************************************************************************/ 62 63 int 64 rf_RewriteParity(raidPtr) 65 RF_Raid_t *raidPtr; 66 { 67 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 68 RF_AccessStripeMapHeader_t *asm_h; 69 int ret_val; 70 int rc; 71 RF_PhysDiskAddr_t pda; 72 RF_SectorNum_t i; 73 74 if (raidPtr->Layout.map->faultsTolerated == 0) { 75 /* There isn't any parity. Call it "okay." */ 76 return (RF_PARITY_OKAY); 77 } 78 if (raidPtr->status[0] != rf_rs_optimal) { 79 /* 80 * We're in degraded mode. Don't try to verify parity now! 81 * XXX: this should be a "we don't want to", not a 82 * "we can't" error. 83 */ 84 return (RF_PARITY_COULD_NOT_VERIFY); 85 } 86 87 ret_val = 0; 88 89 pda.startSector = 0; 90 pda.numSector = raidPtr->Layout.sectorsPerStripeUnit; 91 rc = RF_PARITY_OKAY; 92 93 for (i = 0; i < raidPtr->totalSectors && 94 rc <= RF_PARITY_CORRECTED; 95 i += layoutPtr->dataSectorsPerStripe) { 96 if (raidPtr->waitShutdown) { 97 /* Someone is pulling the plug on this set... 98 abort the re-write */ 99 return (1); 100 } 101 asm_h = rf_MapAccess(raidPtr, i, 102 layoutPtr->dataSectorsPerStripe, 103 NULL, RF_DONT_REMAP); 104 raidPtr->parity_rewrite_stripes_done = 105 i / layoutPtr->dataSectorsPerStripe ; 106 rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); 107 108 switch (rc) { 109 case RF_PARITY_OKAY: 110 case RF_PARITY_CORRECTED: 111 break; 112 case RF_PARITY_BAD: 113 printf("Parity bad during correction\n"); 114 ret_val = 1; 115 break; 116 case RF_PARITY_COULD_NOT_CORRECT: 117 printf("Could not correct bad parity\n"); 118 ret_val = 1; 119 break; 120 case RF_PARITY_COULD_NOT_VERIFY: 121 printf("Could not verify parity\n"); 122 ret_val = 1; 123 break; 124 default: 125 printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); 126 ret_val = 1; 127 } 128 rf_FreeAccessStripeMap(asm_h); 129 } 130 return (ret_val); 131 } 132 /***************************************************************************************** 133 * 134 * verify that the parity in a particular stripe is correct. 135 * we validate only the range of parity defined by parityPDA, since 136 * this is all we have locked. The way we do this is to create an asm 137 * that maps the whole stripe and then range-restrict it to the parity 138 * region defined by the parityPDA. 139 * 140 ****************************************************************************************/ 141 int 142 rf_VerifyParity(raidPtr, aasm, correct_it, flags) 143 RF_Raid_t *raidPtr; 144 RF_AccessStripeMap_t *aasm; 145 int correct_it; 146 RF_RaidAccessFlags_t flags; 147 { 148 RF_PhysDiskAddr_t *parityPDA; 149 RF_AccessStripeMap_t *doasm; 150 RF_LayoutSW_t *lp; 151 int lrc, rc; 152 153 lp = raidPtr->Layout.map; 154 if (lp->faultsTolerated == 0) { 155 /* 156 * There isn't any parity. Call it "okay." 157 */ 158 return (RF_PARITY_OKAY); 159 } 160 rc = RF_PARITY_OKAY; 161 if (lp->VerifyParity) { 162 for (doasm = aasm; doasm; doasm = doasm->next) { 163 for (parityPDA = doasm->parityInfo; parityPDA; 164 parityPDA = parityPDA->next) { 165 lrc = lp->VerifyParity(raidPtr, 166 doasm->raidAddress, 167 parityPDA, 168 correct_it, flags); 169 if (lrc > rc) { 170 /* see rf_parityscan.h for why this 171 * works */ 172 rc = lrc; 173 } 174 } 175 } 176 } else { 177 rc = RF_PARITY_COULD_NOT_VERIFY; 178 } 179 return (rc); 180 } 181 182 int 183 rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) 184 RF_Raid_t *raidPtr; 185 RF_RaidAddr_t raidAddr; 186 RF_PhysDiskAddr_t *parityPDA; 187 int correct_it; 188 RF_RaidAccessFlags_t flags; 189 { 190 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 191 RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, 192 raidAddr); 193 RF_SectorCount_t numsector = parityPDA->numSector; 194 int numbytes = rf_RaidAddressToByte(raidPtr, numsector); 195 int bytesPerStripe = numbytes * layoutPtr->numDataCol; 196 RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ 197 RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; 198 RF_AccessStripeMapHeader_t *asm_h; 199 RF_AccessStripeMap_t *asmap; 200 RF_AllocListElem_t *alloclist; 201 RF_PhysDiskAddr_t *pda; 202 char *pbuf, *buf, *end_p, *p; 203 int i, retcode; 204 RF_ReconUnitNum_t which_ru; 205 RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, 206 raidAddr, 207 &which_ru); 208 int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; 209 RF_AccTraceEntry_t tracerec; 210 RF_MCPair_t *mcpair; 211 212 retcode = RF_PARITY_OKAY; 213 214 mcpair = rf_AllocMCPair(); 215 rf_MakeAllocList(alloclist); 216 RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); 217 RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make 218 * sure buffer is zeroed */ 219 end_p = buf + bytesPerStripe; 220 221 rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, 222 "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); 223 blockNode = rd_dag_h->succedents[0]; 224 unblockNode = blockNode->succedents[0]->succedents[0]; 225 226 /* map the stripe and fill in the PDAs in the dag */ 227 asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); 228 asmap = asm_h->stripeMap; 229 230 for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { 231 RF_ASSERT(pda); 232 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); 233 RF_ASSERT(pda->numSector != 0); 234 if (rf_TryToRedirectPDA(raidPtr, pda, 0)) 235 goto out; /* no way to verify parity if disk is 236 * dead. return w/ good status */ 237 blockNode->succedents[i]->params[0].p = pda; 238 blockNode->succedents[i]->params[2].v = psID; 239 blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 240 } 241 242 RF_ASSERT(!asmap->parityInfo->next); 243 rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); 244 RF_ASSERT(asmap->parityInfo->numSector != 0); 245 if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) 246 goto out; 247 blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; 248 249 /* fire off the DAG */ 250 memset((char *) &tracerec, 0, sizeof(tracerec)); 251 rd_dag_h->tracerec = &tracerec; 252 253 if (rf_verifyParityDebug) { 254 printf("Parity verify read dag:\n"); 255 rf_PrintDAGList(rd_dag_h); 256 } 257 RF_LOCK_MUTEX(mcpair->mutex); 258 mcpair->flag = 0; 259 rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 260 (void *) mcpair); 261 while (!mcpair->flag) 262 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 263 RF_UNLOCK_MUTEX(mcpair->mutex); 264 if (rd_dag_h->status != rf_enable) { 265 RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); 266 retcode = RF_PARITY_COULD_NOT_VERIFY; 267 goto out; 268 } 269 for (p = buf; p < end_p; p += numbytes) { 270 rf_bxor(p, pbuf, numbytes, NULL); 271 } 272 for (i = 0; i < numbytes; i++) { 273 #if 0 274 if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) { 275 printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]); 276 } 277 #endif 278 if (pbuf[i] != buf[bytesPerStripe + i]) { 279 if (!correct_it) 280 RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", 281 i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); 282 retcode = RF_PARITY_BAD; 283 break; 284 } 285 } 286 287 if (retcode && correct_it) { 288 wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 289 "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); 290 wrBlock = wr_dag_h->succedents[0]; 291 wrUnblock = wrBlock->succedents[0]->succedents[0]; 292 wrBlock->succedents[0]->params[0].p = asmap->parityInfo; 293 wrBlock->succedents[0]->params[2].v = psID; 294 wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 295 memset((char *) &tracerec, 0, sizeof(tracerec)); 296 wr_dag_h->tracerec = &tracerec; 297 if (rf_verifyParityDebug) { 298 printf("Parity verify write dag:\n"); 299 rf_PrintDAGList(wr_dag_h); 300 } 301 RF_LOCK_MUTEX(mcpair->mutex); 302 mcpair->flag = 0; 303 rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 304 (void *) mcpair); 305 while (!mcpair->flag) 306 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 307 RF_UNLOCK_MUTEX(mcpair->mutex); 308 if (wr_dag_h->status != rf_enable) { 309 RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); 310 retcode = RF_PARITY_COULD_NOT_CORRECT; 311 } 312 rf_FreeDAG(wr_dag_h); 313 if (retcode == RF_PARITY_BAD) 314 retcode = RF_PARITY_CORRECTED; 315 } 316 out: 317 rf_FreeAccessStripeMap(asm_h); 318 rf_FreeAllocList(alloclist); 319 rf_FreeDAG(rd_dag_h); 320 rf_FreeMCPair(mcpair); 321 return (retcode); 322 } 323 324 int 325 rf_TryToRedirectPDA(raidPtr, pda, parity) 326 RF_Raid_t *raidPtr; 327 RF_PhysDiskAddr_t *pda; 328 int parity; 329 { 330 if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) { 331 if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) { 332 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 333 RF_RowCol_t or = pda->row, oc = pda->col; 334 RF_SectorNum_t os = pda->startSector; 335 if (parity) { 336 (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); 337 if (rf_verifyParityDebug) 338 printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n", 339 or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); 340 } else { 341 (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); 342 if (rf_verifyParityDebug) 343 printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n", 344 or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); 345 } 346 } else { 347 RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow; 348 RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol; 349 pda->row = spRow; 350 pda->col = spCol; 351 } 352 } 353 } 354 if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status)) 355 return (1); 356 return (0); 357 } 358 /***************************************************************************************** 359 * 360 * currently a stub. 361 * 362 * takes as input an ASM describing a write operation and containing one failure, and 363 * verifies that the parity was correctly updated to reflect the write. 364 * 365 * if it's a data unit that's failed, we read the other data units in the stripe and 366 * the parity unit, XOR them together, and verify that we get the data intended for 367 * the failed disk. Since it's easy, we also validate that the right data got written 368 * to the surviving data disks. 369 * 370 * If it's the parity that failed, there's really no validation we can do except the 371 * above verification that the right data got written to all disks. This is because 372 * the new data intended for the failed disk is supplied in the ASM, but this is of 373 * course not the case for the new parity. 374 * 375 ****************************************************************************************/ 376 int 377 rf_VerifyDegrModeWrite(raidPtr, asmh) 378 RF_Raid_t *raidPtr; 379 RF_AccessStripeMapHeader_t *asmh; 380 { 381 return (0); 382 } 383 /* creates a simple DAG with a header, a block-recon node at level 1, 384 * nNodes nodes at level 2, an unblock-recon node at level 3, and 385 * a terminator node at level 4. The stripe address field in 386 * the block and unblock nodes are not touched, nor are the pda 387 * fields in the second-level nodes, so they must be filled in later. 388 * 389 * commit point is established at unblock node - this means that any 390 * failure during dag execution causes the dag to fail 391 */ 392 RF_DagHeader_t * 393 rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority) 394 RF_Raid_t *raidPtr; 395 int nNodes; 396 int bytesPerSU; 397 char *databuf; 398 int (*doFunc) (RF_DagNode_t * node); 399 int (*undoFunc) (RF_DagNode_t * node); 400 char *name; /* node names at the second level */ 401 RF_AllocListElem_t *alloclist; 402 RF_RaidAccessFlags_t flags; 403 int priority; 404 { 405 RF_DagHeader_t *dag_h; 406 RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode; 407 int i; 408 409 /* create the nodes, the block & unblock nodes, and the terminator 410 * node */ 411 RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist); 412 blockNode = &nodes[nNodes]; 413 unblockNode = blockNode + 1; 414 termNode = unblockNode + 1; 415 416 dag_h = rf_AllocDAGHeader(); 417 dag_h->raidPtr = (void *) raidPtr; 418 dag_h->allocList = NULL;/* we won't use this alloc list */ 419 dag_h->status = rf_enable; 420 dag_h->numSuccedents = 1; 421 dag_h->creator = "SimpleDAG"; 422 423 /* this dag can not commit until the unblock node is reached errors 424 * prior to the commit point imply the dag has failed */ 425 dag_h->numCommitNodes = 1; 426 dag_h->numCommits = 0; 427 428 dag_h->succedents[0] = blockNode; 429 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); 430 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); 431 unblockNode->succedents[0] = termNode; 432 for (i = 0; i < nNodes; i++) { 433 blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i]; 434 unblockNode->antType[i] = rf_control; 435 rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); 436 nodes[i].succedents[0] = unblockNode; 437 nodes[i].antecedents[0] = blockNode; 438 nodes[i].antType[0] = rf_control; 439 nodes[i].params[1].p = (databuf + (i * bytesPerSU)); 440 } 441 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); 442 termNode->antecedents[0] = unblockNode; 443 termNode->antType[0] = rf_control; 444 return (dag_h); 445 } 446