1 /* $NetBSD: rf_copyback.c,v 1.22 2002/11/16 16:49:46 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /***************************************************************************************** 30 * 31 * copyback.c -- code to copy reconstructed data back from spare space to 32 * the replaced disk. 33 * 34 * the code operates using callbacks on the I/Os to continue with the next 35 * unit to be copied back. We do this because a simple loop containing blocking I/Os 36 * will not work in the simulator. 37 * 38 ****************************************************************************************/ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.22 2002/11/16 16:49:46 oster Exp $"); 42 43 #include <dev/raidframe/raidframevar.h> 44 45 #include <sys/time.h> 46 #include <sys/buf.h> 47 #include "rf_raid.h" 48 #include "rf_mcpair.h" 49 #include "rf_acctrace.h" 50 #include "rf_etimer.h" 51 #include "rf_general.h" 52 #include "rf_utils.h" 53 #include "rf_copyback.h" 54 #include "rf_decluster.h" 55 #include "rf_driver.h" 56 #include "rf_shutdown.h" 57 #include "rf_kintf.h" 58 59 #define RF_COPYBACK_DATA 0 60 #define RF_COPYBACK_PARITY 1 61 62 int rf_copyback_in_progress; 63 64 static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status); 65 static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status); 66 static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ, 67 RF_RaidAddr_t addr, RF_RowCol_t testRow, 68 RF_RowCol_t testCol, 69 RF_SectorNum_t testOffs); 70 static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status); 71 72 int 73 rf_ConfigureCopyback(listp) 74 RF_ShutdownList_t **listp; 75 { 76 rf_copyback_in_progress = 0; 77 return (0); 78 } 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/vnode.h> 86 87 /* do a complete copyback */ 88 void 89 rf_CopybackReconstructedData(raidPtr) 90 RF_Raid_t *raidPtr; 91 { 92 RF_ComponentLabel_t c_label; 93 int done, retcode; 94 RF_CopybackDesc_t *desc; 95 RF_RowCol_t frow, fcol; 96 RF_RaidDisk_t *badDisk; 97 char *databuf; 98 99 struct partinfo dpart; 100 struct vnode *vp; 101 struct vattr va; 102 struct proc *proc; 103 104 int ac; 105 106 done = 0; 107 fcol = 0; 108 for (frow = 0; frow < raidPtr->numRow; frow++) { 109 for (fcol = 0; fcol < raidPtr->numCol; fcol++) { 110 if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared 111 || raidPtr->Disks[frow][fcol].status == rf_ds_spared) { 112 done = 1; 113 break; 114 } 115 } 116 if (done) 117 break; 118 } 119 120 if (frow == raidPtr->numRow) { 121 printf("raid%d: no disks need copyback\n", raidPtr->raidid); 122 return; 123 } 124 badDisk = &raidPtr->Disks[frow][fcol]; 125 126 proc = raidPtr->engine_thread; 127 128 /* This device may have been opened successfully the first time. Close 129 * it before trying to open it again.. */ 130 131 if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { 132 printf("Closed the open device: %s\n", 133 raidPtr->Disks[frow][fcol].devname); 134 vp = raidPtr->raid_cinfo[frow][fcol].ci_vp; 135 ac = raidPtr->Disks[frow][fcol].auto_configured; 136 rf_close_component(raidPtr, vp, ac); 137 raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL; 138 139 } 140 /* note that this disk was *not* auto_configured (any longer) */ 141 raidPtr->Disks[frow][fcol].auto_configured = 0; 142 143 printf("About to (re-)open the device: %s\n", 144 raidPtr->Disks[frow][fcol].devname); 145 146 retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp); 147 148 if (retcode) { 149 printf("raid%d: copyback: raidlookup on device: %s failed: %d!\n", 150 raidPtr->raidid, raidPtr->Disks[frow][fcol].devname, 151 retcode); 152 153 /* XXX the component isn't responding properly... must be 154 * still dead :-( */ 155 return; 156 157 } else { 158 159 /* Ok, so we can at least do a lookup... How about actually 160 * getting a vp for it? */ 161 162 if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 163 return; 164 } 165 retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 166 FREAD, proc->p_ucred, proc); 167 if (retcode) { 168 return; 169 } 170 raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize; 171 172 raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size - 173 rf_protectedSectors; 174 175 raidPtr->raid_cinfo[frow][fcol].ci_vp = vp; 176 raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev; 177 178 raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* XXX or the above? */ 179 180 /* we allow the user to specify that only a fraction of the 181 * disks should be used this is just for debug: it speeds up 182 * the parity scan */ 183 raidPtr->Disks[frow][fcol].numBlocks = 184 raidPtr->Disks[frow][fcol].numBlocks * 185 rf_sizePercentage / 100; 186 } 187 188 if (retcode) { 189 printf("raid%d: copyback: target disk failed TUR\n", 190 raidPtr->raidid); 191 return; 192 } 193 /* get a buffer to hold one SU */ 194 RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); 195 196 /* create a descriptor */ 197 RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); 198 desc->raidPtr = raidPtr; 199 desc->status = 0; 200 desc->frow = frow; 201 desc->fcol = fcol; 202 desc->spRow = badDisk->spareRow; 203 desc->spCol = badDisk->spareCol; 204 desc->stripeAddr = 0; 205 desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 206 desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; 207 desc->databuf = databuf; 208 desc->mcpair = rf_AllocMCPair(); 209 210 /* quiesce the array, since we don't want to code support for user 211 * accs here */ 212 rf_SuspendNewRequestsAndWait(raidPtr); 213 214 /* adjust state of the array and of the disks */ 215 RF_LOCK_MUTEX(raidPtr->mutex); 216 raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; 217 raidPtr->status[desc->frow] = rf_rs_optimal; 218 rf_copyback_in_progress = 1; /* debug only */ 219 RF_UNLOCK_MUTEX(raidPtr->mutex); 220 221 RF_GETTIME(desc->starttime); 222 rf_ContinueCopyback(desc); 223 224 /* Data has been restored. Fix up the component label. */ 225 /* Don't actually need the read here.. */ 226 raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, 227 raidPtr->raid_cinfo[frow][fcol].ci_vp, 228 &c_label); 229 230 raid_init_component_label( raidPtr, &c_label ); 231 232 c_label.row = frow; 233 c_label.column = fcol; 234 c_label.partitionSize = raidPtr->Disks[frow][fcol].partitionSize; 235 236 raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, 237 raidPtr->raid_cinfo[frow][fcol].ci_vp, 238 &c_label); 239 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); 240 } 241 242 243 /* 244 * invoked via callback after a copyback I/O has completed to 245 * continue on with the next one 246 */ 247 void 248 rf_ContinueCopyback(desc) 249 RF_CopybackDesc_t *desc; 250 { 251 RF_SectorNum_t testOffs, stripeAddr; 252 RF_Raid_t *raidPtr = desc->raidPtr; 253 RF_RaidAddr_t addr; 254 RF_RowCol_t testRow, testCol; 255 #if RF_DEBUG_RECON 256 int old_pctg, new_pctg; 257 struct timeval t, diff; 258 #endif 259 int done; 260 261 #if RF_DEBUG_RECON 262 old_pctg = (-1); 263 #endif 264 while (1) { 265 stripeAddr = desc->stripeAddr; 266 desc->raidPtr->copyback_stripes_done = stripeAddr 267 / desc->sectPerStripe; 268 #if RF_DEBUG_RECON 269 if (rf_prReconSched) { 270 old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; 271 } 272 #endif 273 desc->stripeAddr += desc->sectPerStripe; 274 #if RF_DEBUG_RECON 275 if (rf_prReconSched) { 276 new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; 277 if (new_pctg != old_pctg) { 278 RF_GETTIME(t); 279 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); 280 printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); 281 } 282 } 283 #endif 284 if (stripeAddr >= raidPtr->totalSectors) { 285 rf_CopybackComplete(desc, 0); 286 return; 287 } 288 /* walk through the current stripe, su-by-su */ 289 for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) { 290 291 /* map the SU, disallowing remap to spare space */ 292 (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); 293 294 if (testRow == desc->frow && testCol == desc->fcol) { 295 rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs); 296 done = 1; 297 break; 298 } 299 } 300 301 if (!done) { 302 /* we didn't find the failed disk in the data part. 303 * check parity. */ 304 305 /* map the parity for this stripe, disallowing remap 306 * to spare space */ 307 (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); 308 309 if (testRow == desc->frow && testCol == desc->fcol) { 310 rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs); 311 } 312 } 313 /* check to see if the last read/write pair failed */ 314 if (desc->status) { 315 rf_CopybackComplete(desc, 1); 316 return; 317 } 318 /* we didn't find any units to copy back in this stripe. 319 * Continue with the next one */ 320 } 321 } 322 323 324 /* copyback one unit */ 325 static void 326 rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) 327 RF_CopybackDesc_t *desc; 328 int typ; 329 RF_RaidAddr_t addr; 330 RF_RowCol_t testRow; 331 RF_RowCol_t testCol; 332 RF_SectorNum_t testOffs; 333 { 334 RF_SectorCount_t sectPerSU = desc->sectPerSU; 335 RF_Raid_t *raidPtr = desc->raidPtr; 336 RF_RowCol_t spRow = desc->spRow; 337 RF_RowCol_t spCol = desc->spCol; 338 RF_SectorNum_t spOffs; 339 340 /* find the spare spare location for this SU */ 341 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 342 if (typ == RF_COPYBACK_DATA) 343 raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); 344 else 345 raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); 346 } else { 347 spOffs = testOffs; 348 } 349 350 /* create reqs to read the old location & write the new */ 351 desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, 352 sectPerSU, desc->databuf, 0L, 0, 353 (int (*) (void *, int)) rf_CopybackReadDoneProc, desc, 354 NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); 355 desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, 356 sectPerSU, desc->databuf, 0L, 0, 357 (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc, 358 NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); 359 desc->frow = testRow; 360 desc->fcol = testCol; 361 362 /* enqueue the read. the write will go out as part of the callback on 363 * the read. at user-level & in the kernel, wait for the read-write 364 * pair to complete. in the simulator, just return, since everything 365 * will happen as callbacks */ 366 367 RF_LOCK_MUTEX(desc->mcpair->mutex); 368 desc->mcpair->flag = 0; 369 370 rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); 371 372 while (!desc->mcpair->flag) { 373 RF_WAIT_MCPAIR(desc->mcpair); 374 } 375 RF_UNLOCK_MUTEX(desc->mcpair->mutex); 376 rf_FreeDiskQueueData(desc->readreq); 377 rf_FreeDiskQueueData(desc->writereq); 378 379 } 380 381 382 /* called at interrupt context when the read has completed. just send out the write */ 383 static int 384 rf_CopybackReadDoneProc(desc, status) 385 RF_CopybackDesc_t *desc; 386 int status; 387 { 388 if (status) { /* invoke the callback with bad status */ 389 printf("raid%d: copyback read failed. Aborting.\n", 390 desc->raidPtr->raidid); 391 (desc->writereq->CompleteFunc) (desc, -100); 392 } else { 393 rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY); 394 } 395 return (0); 396 } 397 /* called at interrupt context when the write has completed. 398 * at user level & in the kernel, wake up the copyback thread. 399 * in the simulator, invoke the next copyback directly. 400 * can't free diskqueuedata structs in the kernel b/c we're at interrupt context. 401 */ 402 static int 403 rf_CopybackWriteDoneProc(desc, status) 404 RF_CopybackDesc_t *desc; 405 int status; 406 { 407 if (status && status != -100) { 408 printf("raid%d: copyback write failed. Aborting.\n", 409 desc->raidPtr->raidid); 410 } 411 desc->status = status; 412 rf_MCPairWakeupFunc(desc->mcpair); 413 return (0); 414 } 415 /* invoked when the copyback has completed */ 416 static void 417 rf_CopybackComplete(desc, status) 418 RF_CopybackDesc_t *desc; 419 int status; 420 { 421 RF_Raid_t *raidPtr = desc->raidPtr; 422 struct timeval t, diff; 423 424 if (!status) { 425 RF_LOCK_MUTEX(raidPtr->mutex); 426 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 427 RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); 428 rf_FreeSpareTable(raidPtr); 429 } else { 430 raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; 431 } 432 RF_UNLOCK_MUTEX(raidPtr->mutex); 433 434 RF_GETTIME(t); 435 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); 436 #if 0 437 printf("Copyback time was %d.%06d seconds\n", 438 (int) diff.tv_sec, (int) diff.tv_usec); 439 #endif 440 } else 441 printf("raid%d: Copyback failure. Status: %d\n", 442 raidPtr->raidid, status); 443 444 RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU)); 445 rf_FreeMCPair(desc->mcpair); 446 RF_Free(desc, sizeof(*desc)); 447 448 rf_copyback_in_progress = 0; 449 rf_ResumeNewRequests(raidPtr); 450 } 451