xref: /netbsd/sys/dev/raidframe/rf_copyback.c (revision c4a72b64)
1 /*	$NetBSD: rf_copyback.c,v 1.22 2002/11/16 16:49:46 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*****************************************************************************************
30  *
31  * copyback.c -- code to copy reconstructed data back from spare space to
32  *               the replaced disk.
33  *
34  * the code operates using callbacks on the I/Os to continue with the next
35  * unit to be copied back.  We do this because a simple loop containing blocking I/Os
36  * will not work in the simulator.
37  *
38  ****************************************************************************************/
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.22 2002/11/16 16:49:46 oster Exp $");
42 
43 #include <dev/raidframe/raidframevar.h>
44 
45 #include <sys/time.h>
46 #include <sys/buf.h>
47 #include "rf_raid.h"
48 #include "rf_mcpair.h"
49 #include "rf_acctrace.h"
50 #include "rf_etimer.h"
51 #include "rf_general.h"
52 #include "rf_utils.h"
53 #include "rf_copyback.h"
54 #include "rf_decluster.h"
55 #include "rf_driver.h"
56 #include "rf_shutdown.h"
57 #include "rf_kintf.h"
58 
59 #define RF_COPYBACK_DATA   0
60 #define RF_COPYBACK_PARITY 1
61 
62 int     rf_copyback_in_progress;
63 
64 static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status);
65 static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status);
66 static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ,
67 			   RF_RaidAddr_t addr, RF_RowCol_t testRow,
68 			   RF_RowCol_t testCol,
69 			   RF_SectorNum_t testOffs);
70 static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status);
71 
72 int
73 rf_ConfigureCopyback(listp)
74 	RF_ShutdownList_t **listp;
75 {
76 	rf_copyback_in_progress = 0;
77 	return (0);
78 }
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86 
87 /* do a complete copyback */
88 void
89 rf_CopybackReconstructedData(raidPtr)
90 	RF_Raid_t *raidPtr;
91 {
92 	RF_ComponentLabel_t c_label;
93 	int     done, retcode;
94 	RF_CopybackDesc_t *desc;
95 	RF_RowCol_t frow, fcol;
96 	RF_RaidDisk_t *badDisk;
97 	char   *databuf;
98 
99 	struct partinfo dpart;
100 	struct vnode *vp;
101 	struct vattr va;
102 	struct proc *proc;
103 
104 	int ac;
105 
106 	done = 0;
107 	fcol = 0;
108 	for (frow = 0; frow < raidPtr->numRow; frow++) {
109 		for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
110 			if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared
111 			    || raidPtr->Disks[frow][fcol].status == rf_ds_spared) {
112 				done = 1;
113 				break;
114 			}
115 		}
116 		if (done)
117 			break;
118 	}
119 
120 	if (frow == raidPtr->numRow) {
121 		printf("raid%d: no disks need copyback\n", raidPtr->raidid);
122 		return;
123 	}
124 	badDisk = &raidPtr->Disks[frow][fcol];
125 
126 	proc = raidPtr->engine_thread;
127 
128 	/* This device may have been opened successfully the first time. Close
129 	 * it before trying to open it again.. */
130 
131 	if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
132 		printf("Closed the open device: %s\n",
133 		    raidPtr->Disks[frow][fcol].devname);
134 		vp = raidPtr->raid_cinfo[frow][fcol].ci_vp;
135 		ac = raidPtr->Disks[frow][fcol].auto_configured;
136 		rf_close_component(raidPtr, vp, ac);
137 		raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL;
138 
139 	}
140 	/* note that this disk was *not* auto_configured (any longer) */
141 	raidPtr->Disks[frow][fcol].auto_configured = 0;
142 
143 	printf("About to (re-)open the device: %s\n",
144 	    raidPtr->Disks[frow][fcol].devname);
145 
146 	retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp);
147 
148 	if (retcode) {
149 		printf("raid%d: copyback: raidlookup on device: %s failed: %d!\n",
150 		       raidPtr->raidid, raidPtr->Disks[frow][fcol].devname,
151 		       retcode);
152 
153 		/* XXX the component isn't responding properly... must be
154 		 * still dead :-( */
155 		return;
156 
157 	} else {
158 
159 		/* Ok, so we can at least do a lookup... How about actually
160 		 * getting a vp for it? */
161 
162 		if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
163 			return;
164 		}
165 		retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
166 		    FREAD, proc->p_ucred, proc);
167 		if (retcode) {
168 			return;
169 		}
170 		raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize;
171 
172 		raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size -
173 		    rf_protectedSectors;
174 
175 		raidPtr->raid_cinfo[frow][fcol].ci_vp = vp;
176 		raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev;
177 
178 		raidPtr->Disks[frow][fcol].dev = va.va_rdev;	/* XXX or the above? */
179 
180 		/* we allow the user to specify that only a fraction of the
181 		 * disks should be used this is just for debug:  it speeds up
182 		 * the parity scan */
183 		raidPtr->Disks[frow][fcol].numBlocks =
184 		    raidPtr->Disks[frow][fcol].numBlocks *
185 		    rf_sizePercentage / 100;
186 	}
187 
188 	if (retcode) {
189 		printf("raid%d: copyback: target disk failed TUR\n",
190 		       raidPtr->raidid);
191 		return;
192 	}
193 	/* get a buffer to hold one SU  */
194 	RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *));
195 
196 	/* create a descriptor */
197 	RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
198 	desc->raidPtr = raidPtr;
199 	desc->status = 0;
200 	desc->frow = frow;
201 	desc->fcol = fcol;
202 	desc->spRow = badDisk->spareRow;
203 	desc->spCol = badDisk->spareCol;
204 	desc->stripeAddr = 0;
205 	desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
206 	desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
207 	desc->databuf = databuf;
208 	desc->mcpair = rf_AllocMCPair();
209 
210 	/* quiesce the array, since we don't want to code support for user
211 	 * accs here */
212 	rf_SuspendNewRequestsAndWait(raidPtr);
213 
214 	/* adjust state of the array and of the disks */
215 	RF_LOCK_MUTEX(raidPtr->mutex);
216 	raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
217 	raidPtr->status[desc->frow] = rf_rs_optimal;
218 	rf_copyback_in_progress = 1;	/* debug only */
219 	RF_UNLOCK_MUTEX(raidPtr->mutex);
220 
221 	RF_GETTIME(desc->starttime);
222 	rf_ContinueCopyback(desc);
223 
224 	/* Data has been restored.  Fix up the component label. */
225 	/* Don't actually need the read here.. */
226 	raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev,
227 				  raidPtr->raid_cinfo[frow][fcol].ci_vp,
228 				  &c_label);
229 
230 	raid_init_component_label( raidPtr, &c_label );
231 
232 	c_label.row = frow;
233 	c_label.column = fcol;
234 	c_label.partitionSize = raidPtr->Disks[frow][fcol].partitionSize;
235 
236 	raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev,
237 				   raidPtr->raid_cinfo[frow][fcol].ci_vp,
238 				   &c_label);
239 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
240 }
241 
242 
243 /*
244  * invoked via callback after a copyback I/O has completed to
245  * continue on with the next one
246  */
247 void
248 rf_ContinueCopyback(desc)
249 	RF_CopybackDesc_t *desc;
250 {
251 	RF_SectorNum_t testOffs, stripeAddr;
252 	RF_Raid_t *raidPtr = desc->raidPtr;
253 	RF_RaidAddr_t addr;
254 	RF_RowCol_t testRow, testCol;
255 #if RF_DEBUG_RECON
256 	int     old_pctg, new_pctg;
257 	struct timeval t, diff;
258 #endif
259 	int done;
260 
261 #if RF_DEBUG_RECON
262 	old_pctg = (-1);
263 #endif
264 	while (1) {
265 		stripeAddr = desc->stripeAddr;
266 		desc->raidPtr->copyback_stripes_done = stripeAddr
267 			/ desc->sectPerStripe;
268 #if RF_DEBUG_RECON
269 		if (rf_prReconSched) {
270 			old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
271 		}
272 #endif
273 		desc->stripeAddr += desc->sectPerStripe;
274 #if RF_DEBUG_RECON
275 		if (rf_prReconSched) {
276 			new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
277 			if (new_pctg != old_pctg) {
278 				RF_GETTIME(t);
279 				RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
280 				printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec);
281 			}
282 		}
283 #endif
284 		if (stripeAddr >= raidPtr->totalSectors) {
285 			rf_CopybackComplete(desc, 0);
286 			return;
287 		}
288 		/* walk through the current stripe, su-by-su */
289 		for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) {
290 
291 			/* map the SU, disallowing remap to spare space */
292 			(raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
293 
294 			if (testRow == desc->frow && testCol == desc->fcol) {
295 				rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs);
296 				done = 1;
297 				break;
298 			}
299 		}
300 
301 		if (!done) {
302 			/* we didn't find the failed disk in the data part.
303 			 * check parity. */
304 
305 			/* map the parity for this stripe, disallowing remap
306 			 * to spare space */
307 			(raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
308 
309 			if (testRow == desc->frow && testCol == desc->fcol) {
310 				rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs);
311 			}
312 		}
313 		/* check to see if the last read/write pair failed */
314 		if (desc->status) {
315 			rf_CopybackComplete(desc, 1);
316 			return;
317 		}
318 		/* we didn't find any units to copy back in this stripe.
319 		 * Continue with the next one */
320 	}
321 }
322 
323 
324 /* copyback one unit */
325 static void
326 rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs)
327 	RF_CopybackDesc_t *desc;
328 	int     typ;
329 	RF_RaidAddr_t addr;
330 	RF_RowCol_t testRow;
331 	RF_RowCol_t testCol;
332 	RF_SectorNum_t testOffs;
333 {
334 	RF_SectorCount_t sectPerSU = desc->sectPerSU;
335 	RF_Raid_t *raidPtr = desc->raidPtr;
336 	RF_RowCol_t spRow = desc->spRow;
337 	RF_RowCol_t spCol = desc->spCol;
338 	RF_SectorNum_t spOffs;
339 
340 	/* find the spare spare location for this SU */
341 	if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
342 		if (typ == RF_COPYBACK_DATA)
343 			raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
344 		else
345 			raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
346 	} else {
347 		spOffs = testOffs;
348 	}
349 
350 	/* create reqs to read the old location & write the new */
351 	desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
352 	    sectPerSU, desc->databuf, 0L, 0,
353 	    (int (*) (void *, int)) rf_CopybackReadDoneProc, desc,
354 	    NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
355 	desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
356 	    sectPerSU, desc->databuf, 0L, 0,
357 	    (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc,
358 	    NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
359 	desc->frow = testRow;
360 	desc->fcol = testCol;
361 
362 	/* enqueue the read.  the write will go out as part of the callback on
363 	 * the read. at user-level & in the kernel, wait for the read-write
364 	 * pair to complete. in the simulator, just return, since everything
365 	 * will happen as callbacks */
366 
367 	RF_LOCK_MUTEX(desc->mcpair->mutex);
368 	desc->mcpair->flag = 0;
369 
370 	rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);
371 
372 	while (!desc->mcpair->flag) {
373 		RF_WAIT_MCPAIR(desc->mcpair);
374 	}
375 	RF_UNLOCK_MUTEX(desc->mcpair->mutex);
376 	rf_FreeDiskQueueData(desc->readreq);
377 	rf_FreeDiskQueueData(desc->writereq);
378 
379 }
380 
381 
382 /* called at interrupt context when the read has completed.  just send out the write */
383 static int
384 rf_CopybackReadDoneProc(desc, status)
385 	RF_CopybackDesc_t *desc;
386 	int     status;
387 {
388 	if (status) {		/* invoke the callback with bad status */
389 		printf("raid%d: copyback read failed.  Aborting.\n",
390 		       desc->raidPtr->raidid);
391 		(desc->writereq->CompleteFunc) (desc, -100);
392 	} else {
393 		rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
394 	}
395 	return (0);
396 }
397 /* called at interrupt context when the write has completed.
398  * at user level & in the kernel, wake up the copyback thread.
399  * in the simulator, invoke the next copyback directly.
400  * can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
401  */
402 static int
403 rf_CopybackWriteDoneProc(desc, status)
404 	RF_CopybackDesc_t *desc;
405 	int     status;
406 {
407 	if (status && status != -100) {
408 		printf("raid%d: copyback write failed.  Aborting.\n",
409 		       desc->raidPtr->raidid);
410 	}
411 	desc->status = status;
412 	rf_MCPairWakeupFunc(desc->mcpair);
413 	return (0);
414 }
415 /* invoked when the copyback has completed */
416 static void
417 rf_CopybackComplete(desc, status)
418 	RF_CopybackDesc_t *desc;
419 	int     status;
420 {
421 	RF_Raid_t *raidPtr = desc->raidPtr;
422 	struct timeval t, diff;
423 
424 	if (!status) {
425 		RF_LOCK_MUTEX(raidPtr->mutex);
426 		if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
427 			RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
428 			rf_FreeSpareTable(raidPtr);
429 		} else {
430 			raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare;
431 		}
432 		RF_UNLOCK_MUTEX(raidPtr->mutex);
433 
434 		RF_GETTIME(t);
435 		RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
436 #if 0
437 		printf("Copyback time was %d.%06d seconds\n",
438 		    (int) diff.tv_sec, (int) diff.tv_usec);
439 #endif
440 	} else
441 		printf("raid%d: Copyback failure.  Status: %d\n",
442 		       raidPtr->raidid, status);
443 
444 	RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
445 	rf_FreeMCPair(desc->mcpair);
446 	RF_Free(desc, sizeof(*desc));
447 
448 	rf_copyback_in_progress = 0;
449 	rf_ResumeNewRequests(raidPtr);
450 }
451