1 /* $NetBSD: rf_copyback.c,v 1.19 2001/11/15 09:48:13 lukem Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /***************************************************************************************** 30 * 31 * copyback.c -- code to copy reconstructed data back from spare space to 32 * the replaced disk. 33 * 34 * the code operates using callbacks on the I/Os to continue with the next 35 * unit to be copied back. We do this because a simple loop containing blocking I/Os 36 * will not work in the simulator. 37 * 38 ****************************************************************************************/ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.19 2001/11/15 09:48:13 lukem Exp $"); 42 43 #include <dev/raidframe/raidframevar.h> 44 45 #include <sys/time.h> 46 #include <sys/buf.h> 47 #include "rf_raid.h" 48 #include "rf_mcpair.h" 49 #include "rf_acctrace.h" 50 #include "rf_etimer.h" 51 #include "rf_general.h" 52 #include "rf_utils.h" 53 #include "rf_copyback.h" 54 #include "rf_decluster.h" 55 #include "rf_driver.h" 56 #include "rf_shutdown.h" 57 #include "rf_kintf.h" 58 59 #define RF_COPYBACK_DATA 0 60 #define RF_COPYBACK_PARITY 1 61 62 int rf_copyback_in_progress; 63 64 static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status); 65 static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status); 66 static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ, 67 RF_RaidAddr_t addr, RF_RowCol_t testRow, 68 RF_RowCol_t testCol, 69 RF_SectorNum_t testOffs); 70 static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status); 71 72 int 73 rf_ConfigureCopyback(listp) 74 RF_ShutdownList_t **listp; 75 { 76 rf_copyback_in_progress = 0; 77 return (0); 78 } 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/vnode.h> 86 87 /* do a complete copyback */ 88 void 89 rf_CopybackReconstructedData(raidPtr) 90 RF_Raid_t *raidPtr; 91 { 92 RF_ComponentLabel_t c_label; 93 int done, retcode; 94 RF_CopybackDesc_t *desc; 95 RF_RowCol_t frow, fcol; 96 RF_RaidDisk_t *badDisk; 97 char *databuf; 98 99 struct partinfo dpart; 100 struct vnode *vp; 101 struct vattr va; 102 struct proc *proc; 103 104 int ac; 105 106 done = 0; 107 fcol = 0; 108 for (frow = 0; frow < raidPtr->numRow; frow++) { 109 for (fcol = 0; fcol < raidPtr->numCol; fcol++) { 110 if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared 111 || raidPtr->Disks[frow][fcol].status == rf_ds_spared) { 112 done = 1; 113 break; 114 } 115 } 116 if (done) 117 break; 118 } 119 120 if (frow == raidPtr->numRow) { 121 printf("COPYBACK: no disks need copyback\n"); 122 return; 123 } 124 badDisk = &raidPtr->Disks[frow][fcol]; 125 126 proc = raidPtr->engine_thread; 127 128 /* This device may have been opened successfully the first time. Close 129 * it before trying to open it again.. */ 130 131 if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { 132 printf("Closed the open device: %s\n", 133 raidPtr->Disks[frow][fcol].devname); 134 vp = raidPtr->raid_cinfo[frow][fcol].ci_vp; 135 ac = raidPtr->Disks[frow][fcol].auto_configured; 136 rf_close_component(raidPtr, vp, ac); 137 raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL; 138 139 } 140 /* note that this disk was *not* auto_configured (any longer) */ 141 raidPtr->Disks[frow][fcol].auto_configured = 0; 142 143 printf("About to (re-)open the device: %s\n", 144 raidPtr->Disks[frow][fcol].devname); 145 146 retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp); 147 148 if (retcode) { 149 printf("COPYBACK: raidlookup on device: %s failed: %d!\n", 150 raidPtr->Disks[frow][fcol].devname, retcode); 151 152 /* XXX the component isn't responding properly... must be 153 * still dead :-( */ 154 return; 155 156 } else { 157 158 /* Ok, so we can at least do a lookup... How about actually 159 * getting a vp for it? */ 160 161 if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 162 return; 163 } 164 retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 165 FREAD, proc->p_ucred, proc); 166 if (retcode) { 167 return; 168 } 169 raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize; 170 171 raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size - 172 rf_protectedSectors; 173 174 raidPtr->raid_cinfo[frow][fcol].ci_vp = vp; 175 raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev; 176 177 raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* XXX or the above? */ 178 179 /* we allow the user to specify that only a fraction of the 180 * disks should be used this is just for debug: it speeds up 181 * the parity scan */ 182 raidPtr->Disks[frow][fcol].numBlocks = 183 raidPtr->Disks[frow][fcol].numBlocks * 184 rf_sizePercentage / 100; 185 } 186 187 if (retcode) { 188 printf("COPYBACK: target disk failed TUR\n"); 189 return; 190 } 191 /* get a buffer to hold one SU */ 192 RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); 193 194 /* create a descriptor */ 195 RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); 196 desc->raidPtr = raidPtr; 197 desc->status = 0; 198 desc->frow = frow; 199 desc->fcol = fcol; 200 desc->spRow = badDisk->spareRow; 201 desc->spCol = badDisk->spareCol; 202 desc->stripeAddr = 0; 203 desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 204 desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; 205 desc->databuf = databuf; 206 desc->mcpair = rf_AllocMCPair(); 207 208 printf("COPYBACK: Quiescing the array\n"); 209 /* quiesce the array, since we don't want to code support for user 210 * accs here */ 211 rf_SuspendNewRequestsAndWait(raidPtr); 212 213 /* adjust state of the array and of the disks */ 214 RF_LOCK_MUTEX(raidPtr->mutex); 215 raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; 216 raidPtr->status[desc->frow] = rf_rs_optimal; 217 rf_copyback_in_progress = 1; /* debug only */ 218 RF_UNLOCK_MUTEX(raidPtr->mutex); 219 220 printf("COPYBACK: Beginning\n"); 221 RF_GETTIME(desc->starttime); 222 rf_ContinueCopyback(desc); 223 224 /* Data has been restored. Fix up the component label. */ 225 /* Don't actually need the read here.. */ 226 raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, 227 raidPtr->raid_cinfo[frow][fcol].ci_vp, 228 &c_label); 229 230 raid_init_component_label( raidPtr, &c_label ); 231 232 c_label.row = frow; 233 c_label.column = fcol; 234 c_label.partitionSize = raidPtr->Disks[frow][fcol].partitionSize; 235 236 raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, 237 raidPtr->raid_cinfo[frow][fcol].ci_vp, 238 &c_label); 239 } 240 241 242 /* 243 * invoked via callback after a copyback I/O has completed to 244 * continue on with the next one 245 */ 246 void 247 rf_ContinueCopyback(desc) 248 RF_CopybackDesc_t *desc; 249 { 250 RF_SectorNum_t testOffs, stripeAddr; 251 RF_Raid_t *raidPtr = desc->raidPtr; 252 RF_RaidAddr_t addr; 253 RF_RowCol_t testRow, testCol; 254 int old_pctg, new_pctg, done; 255 struct timeval t, diff; 256 257 old_pctg = (-1); 258 while (1) { 259 stripeAddr = desc->stripeAddr; 260 desc->raidPtr->copyback_stripes_done = stripeAddr 261 / desc->sectPerStripe; 262 if (rf_prReconSched) { 263 old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; 264 } 265 desc->stripeAddr += desc->sectPerStripe; 266 if (rf_prReconSched) { 267 new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; 268 if (new_pctg != old_pctg) { 269 RF_GETTIME(t); 270 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); 271 printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); 272 } 273 } 274 if (stripeAddr >= raidPtr->totalSectors) { 275 rf_CopybackComplete(desc, 0); 276 return; 277 } 278 /* walk through the current stripe, su-by-su */ 279 for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) { 280 281 /* map the SU, disallowing remap to spare space */ 282 (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); 283 284 if (testRow == desc->frow && testCol == desc->fcol) { 285 rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs); 286 done = 1; 287 break; 288 } 289 } 290 291 if (!done) { 292 /* we didn't find the failed disk in the data part. 293 * check parity. */ 294 295 /* map the parity for this stripe, disallowing remap 296 * to spare space */ 297 (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); 298 299 if (testRow == desc->frow && testCol == desc->fcol) { 300 rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs); 301 } 302 } 303 /* check to see if the last read/write pair failed */ 304 if (desc->status) { 305 rf_CopybackComplete(desc, 1); 306 return; 307 } 308 /* we didn't find any units to copy back in this stripe. 309 * Continue with the next one */ 310 } 311 } 312 313 314 /* copyback one unit */ 315 static void 316 rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) 317 RF_CopybackDesc_t *desc; 318 int typ; 319 RF_RaidAddr_t addr; 320 RF_RowCol_t testRow; 321 RF_RowCol_t testCol; 322 RF_SectorNum_t testOffs; 323 { 324 RF_SectorCount_t sectPerSU = desc->sectPerSU; 325 RF_Raid_t *raidPtr = desc->raidPtr; 326 RF_RowCol_t spRow = desc->spRow; 327 RF_RowCol_t spCol = desc->spCol; 328 RF_SectorNum_t spOffs; 329 330 /* find the spare spare location for this SU */ 331 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 332 if (typ == RF_COPYBACK_DATA) 333 raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); 334 else 335 raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); 336 } else { 337 spOffs = testOffs; 338 } 339 340 /* create reqs to read the old location & write the new */ 341 desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, 342 sectPerSU, desc->databuf, 0L, 0, 343 (int (*) (void *, int)) rf_CopybackReadDoneProc, desc, 344 NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); 345 desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, 346 sectPerSU, desc->databuf, 0L, 0, 347 (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc, 348 NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); 349 desc->frow = testRow; 350 desc->fcol = testCol; 351 352 /* enqueue the read. the write will go out as part of the callback on 353 * the read. at user-level & in the kernel, wait for the read-write 354 * pair to complete. in the simulator, just return, since everything 355 * will happen as callbacks */ 356 357 RF_LOCK_MUTEX(desc->mcpair->mutex); 358 desc->mcpair->flag = 0; 359 360 rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); 361 362 while (!desc->mcpair->flag) { 363 RF_WAIT_MCPAIR(desc->mcpair); 364 } 365 RF_UNLOCK_MUTEX(desc->mcpair->mutex); 366 rf_FreeDiskQueueData(desc->readreq); 367 rf_FreeDiskQueueData(desc->writereq); 368 369 } 370 371 372 /* called at interrupt context when the read has completed. just send out the write */ 373 static int 374 rf_CopybackReadDoneProc(desc, status) 375 RF_CopybackDesc_t *desc; 376 int status; 377 { 378 if (status) { /* invoke the callback with bad status */ 379 printf("COPYBACK: copyback read failed. Aborting.\n"); 380 (desc->writereq->CompleteFunc) (desc, -100); 381 } else { 382 rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY); 383 } 384 return (0); 385 } 386 /* called at interrupt context when the write has completed. 387 * at user level & in the kernel, wake up the copyback thread. 388 * in the simulator, invoke the next copyback directly. 389 * can't free diskqueuedata structs in the kernel b/c we're at interrupt context. 390 */ 391 static int 392 rf_CopybackWriteDoneProc(desc, status) 393 RF_CopybackDesc_t *desc; 394 int status; 395 { 396 if (status && status != -100) { 397 printf("COPYBACK: copyback write failed. Aborting.\n"); 398 } 399 desc->status = status; 400 rf_MCPairWakeupFunc(desc->mcpair); 401 return (0); 402 } 403 /* invoked when the copyback has completed */ 404 static void 405 rf_CopybackComplete(desc, status) 406 RF_CopybackDesc_t *desc; 407 int status; 408 { 409 RF_Raid_t *raidPtr = desc->raidPtr; 410 struct timeval t, diff; 411 412 if (!status) { 413 RF_LOCK_MUTEX(raidPtr->mutex); 414 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 415 RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); 416 rf_FreeSpareTable(raidPtr); 417 } else { 418 raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; 419 } 420 RF_UNLOCK_MUTEX(raidPtr->mutex); 421 422 RF_GETTIME(t); 423 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); 424 printf("Copyback time was %d.%06d seconds\n", 425 (int) diff.tv_sec, (int) diff.tv_usec); 426 } else 427 printf("COPYBACK: Failure.\n"); 428 429 RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU)); 430 rf_FreeMCPair(desc->mcpair); 431 RF_Free(desc, sizeof(*desc)); 432 433 rf_copyback_in_progress = 0; 434 rf_ResumeNewRequests(raidPtr); 435 } 436