xref: /netbsd/sys/dev/raidframe/rf_copyback.c (revision 6550d01e)
1 /*	$NetBSD: rf_copyback.c,v 1.44 2010/11/19 06:44:40 dholland Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*****************************************************************************
30  *
31  * copyback.c -- code to copy reconstructed data back from spare space to
32  *               the replaced disk.
33  *
34  * the code operates using callbacks on the I/Os to continue with the
35  * next unit to be copied back.  We do this because a simple loop
36  * containing blocking I/Os will not work in the simulator.
37  *
38  ****************************************************************************/
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.44 2010/11/19 06:44:40 dholland Exp $");
42 
43 #include <dev/raidframe/raidframevar.h>
44 
45 #include <sys/time.h>
46 #include <sys/buf.h>
47 #include "rf_raid.h"
48 #include "rf_mcpair.h"
49 #include "rf_acctrace.h"
50 #include "rf_etimer.h"
51 #include "rf_general.h"
52 #include "rf_utils.h"
53 #include "rf_copyback.h"
54 #include "rf_decluster.h"
55 #include "rf_driver.h"
56 #include "rf_shutdown.h"
57 #include "rf_kintf.h"
58 
59 #define RF_COPYBACK_DATA   0
60 #define RF_COPYBACK_PARITY 1
61 
62 int     rf_copyback_in_progress;
63 
64 static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status);
65 static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status);
66 static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ,
67 			   RF_RaidAddr_t addr, RF_RowCol_t testCol,
68 			   RF_SectorNum_t testOffs);
69 static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status);
70 
71 int
72 rf_ConfigureCopyback(RF_ShutdownList_t **listp)
73 {
74 	rf_copyback_in_progress = 0;
75 	return (0);
76 }
77 
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/ioctl.h>
82 #include <sys/fcntl.h>
83 #include <sys/vnode.h>
84 #include <sys/namei.h> /* for pathbuf */
85 
86 /* do a complete copyback */
87 void
88 rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
89 {
90 	RF_ComponentLabel_t *c_label;
91 	int     found, retcode;
92 	RF_CopybackDesc_t *desc;
93 	RF_RowCol_t fcol;
94 	RF_RaidDisk_t *badDisk;
95 	char   *databuf;
96 
97 	struct pathbuf *dev_pb;
98 	struct vnode *vp;
99 	struct vattr va;
100 
101 	int ac;
102 
103 	fcol = 0;
104 	found = 0;
105 	for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
106 		if (raidPtr->Disks[fcol].status == rf_ds_dist_spared
107 		    || raidPtr->Disks[fcol].status == rf_ds_spared) {
108 			found = 1;
109 			break;
110 		}
111 	}
112 
113 	if (!found) {
114 		printf("raid%d: no disks need copyback\n", raidPtr->raidid);
115 		return;
116 	}
117 
118 	badDisk = &raidPtr->Disks[fcol];
119 
120 	/* This device may have been opened successfully the first time. Close
121 	 * it before trying to open it again.. */
122 
123 	if (raidPtr->raid_cinfo[fcol].ci_vp != NULL) {
124 		printf("Closed the open device: %s\n",
125 		    raidPtr->Disks[fcol].devname);
126 		vp = raidPtr->raid_cinfo[fcol].ci_vp;
127 		ac = raidPtr->Disks[fcol].auto_configured;
128 		rf_close_component(raidPtr, vp, ac);
129 		raidPtr->raid_cinfo[fcol].ci_vp = NULL;
130 
131 	}
132 	/* note that this disk was *not* auto_configured (any longer) */
133 	raidPtr->Disks[fcol].auto_configured = 0;
134 
135 	printf("About to (re-)open the device: %s\n",
136 	    raidPtr->Disks[fcol].devname);
137 
138 	dev_pb = pathbuf_create(raidPtr->Disks[fcol].devname);
139 	if (dev_pb == NULL) {
140 		/* shouldn't happen unless maybe the system is OOMing */
141 		printf("raid%d: copyback: pathbuf_create on device: %s failed: %d!\n",
142 		       raidPtr->raidid, raidPtr->Disks[fcol].devname,
143 		       ENOMEM);
144 		return;
145 	}
146 	retcode = dk_lookup(dev_pb, curlwp, &vp);
147 	pathbuf_destroy(dev_pb);
148 
149 	if (retcode) {
150 		printf("raid%d: copyback: dk_lookup on device: %s failed: %d!\n",
151 		       raidPtr->raidid, raidPtr->Disks[fcol].devname,
152 		       retcode);
153 
154 		/* XXX the component isn't responding properly... must be
155 		 * still dead :-( */
156 		return;
157 
158 	} else {
159 
160 		/* Ok, so we can at least do a lookup... How about actually
161 		 * getting a vp for it? */
162 
163 		if ((retcode = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
164 			return;
165 		retcode = rf_getdisksize(vp, curlwp, &raidPtr->Disks[fcol]);
166 		if (retcode) {
167 			return;
168 		}
169 
170 		raidPtr->raid_cinfo[fcol].ci_vp = vp;
171 		raidPtr->raid_cinfo[fcol].ci_dev = va.va_rdev;
172 
173 		raidPtr->Disks[fcol].dev = va.va_rdev;	/* XXX or the above? */
174 
175 		/* we allow the user to specify that only a fraction of the
176 		 * disks should be used this is just for debug:  it speeds up
177 		 * the parity scan */
178 		raidPtr->Disks[fcol].numBlocks =
179 		    raidPtr->Disks[fcol].numBlocks *
180 		    rf_sizePercentage / 100;
181 	}
182 
183 	if (retcode) {
184 		printf("raid%d: copyback: target disk failed TUR\n",
185 		       raidPtr->raidid);
186 		return;
187 	}
188 	/* get a buffer to hold one SU  */
189 	RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *));
190 
191 	/* create a descriptor */
192 	RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
193 	desc->raidPtr = raidPtr;
194 	desc->status = 0;
195 	desc->fcol = fcol;
196 	desc->spCol = badDisk->spareCol;
197 	desc->stripeAddr = 0;
198 	desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
199 	desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
200 	desc->databuf = databuf;
201 	desc->mcpair = rf_AllocMCPair();
202 
203 	/* quiesce the array, since we don't want to code support for user
204 	 * accs here */
205 	rf_SuspendNewRequestsAndWait(raidPtr);
206 
207 	/* adjust state of the array and of the disks */
208 	RF_LOCK_MUTEX(raidPtr->mutex);
209 	raidPtr->Disks[desc->fcol].status = rf_ds_optimal;
210 	raidPtr->status = rf_rs_optimal;
211 	rf_copyback_in_progress = 1;	/* debug only */
212 	RF_UNLOCK_MUTEX(raidPtr->mutex);
213 
214 	RF_GETTIME(desc->starttime);
215 	rf_ContinueCopyback(desc);
216 
217 	/* Data has been restored.  Fix up the component label. */
218 	/* Don't actually need the read here.. */
219 
220 	c_label = raidget_component_label(raidPtr, fcol);
221 	raid_init_component_label(raidPtr, c_label);
222 
223 	c_label->row = 0;
224 	c_label->column = fcol;
225 	c_label->partitionSize = raidPtr->Disks[fcol].partitionSize;
226 	c_label->partitionSizeHi = raidPtr->Disks[fcol].partitionSize >> 32;
227 
228 	raidflush_component_label(raidPtr, fcol);
229 
230 	/* XXXjld why is this here? */
231 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
232 }
233 
234 
235 /*
236  * invoked via callback after a copyback I/O has completed to
237  * continue on with the next one
238  */
239 void
240 rf_ContinueCopyback(RF_CopybackDesc_t *desc)
241 {
242 	RF_SectorNum_t testOffs, stripeAddr;
243 	RF_Raid_t *raidPtr = desc->raidPtr;
244 	RF_RaidAddr_t addr;
245 	RF_RowCol_t testCol;
246 #if RF_DEBUG_RECON
247 	int     old_pctg, new_pctg;
248 	struct timeval t, diff;
249 #endif
250 	int done;
251 
252 #if RF_DEBUG_RECON
253 	old_pctg = (-1);
254 #endif
255 	while (1) {
256 		stripeAddr = desc->stripeAddr;
257 		desc->raidPtr->copyback_stripes_done = stripeAddr
258 			/ desc->sectPerStripe;
259 #if RF_DEBUG_RECON
260 		if (rf_prReconSched) {
261 			old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
262 		}
263 #endif
264 		desc->stripeAddr += desc->sectPerStripe;
265 #if RF_DEBUG_RECON
266 		if (rf_prReconSched) {
267 			new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
268 			if (new_pctg != old_pctg) {
269 				RF_GETTIME(t);
270 				RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
271 				printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec);
272 			}
273 		}
274 #endif
275 		if (stripeAddr >= raidPtr->totalSectors) {
276 			rf_CopybackComplete(desc, 0);
277 			return;
278 		}
279 		/* walk through the current stripe, su-by-su */
280 		for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) {
281 
282 			/* map the SU, disallowing remap to spare space */
283 			(raidPtr->Layout.map->MapSector) (raidPtr, addr, &testCol, &testOffs, RF_DONT_REMAP);
284 
285 			if (testCol == desc->fcol) {
286 				rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testCol, testOffs);
287 				done = 1;
288 				break;
289 			}
290 		}
291 
292 		if (!done) {
293 			/* we didn't find the failed disk in the data part.
294 			 * check parity. */
295 
296 			/* map the parity for this stripe, disallowing remap
297 			 * to spare space */
298 			(raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testCol, &testOffs, RF_DONT_REMAP);
299 
300 			if (testCol == desc->fcol) {
301 				rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testCol, testOffs);
302 			}
303 		}
304 		/* check to see if the last read/write pair failed */
305 		if (desc->status) {
306 			rf_CopybackComplete(desc, 1);
307 			return;
308 		}
309 		/* we didn't find any units to copy back in this stripe.
310 		 * Continue with the next one */
311 	}
312 }
313 
314 
315 /* copyback one unit */
316 static void
317 rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr,
318 	       RF_RowCol_t testCol, RF_SectorNum_t testOffs)
319 {
320 	RF_SectorCount_t sectPerSU = desc->sectPerSU;
321 	RF_Raid_t *raidPtr = desc->raidPtr;
322 	RF_RowCol_t spCol = desc->spCol;
323 	RF_SectorNum_t spOffs;
324 
325 	/* find the spare spare location for this SU */
326 	if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
327 		if (typ == RF_COPYBACK_DATA)
328 			raidPtr->Layout.map->MapSector(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
329 		else
330 			raidPtr->Layout.map->MapParity(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
331 	} else {
332 		spOffs = testOffs;
333 	}
334 
335 	/* create reqs to read the old location & write the new */
336 	desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
337 	    sectPerSU, desc->databuf, 0L, 0,
338 	    (int (*) (void *, int)) rf_CopybackReadDoneProc, desc,
339 	    NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL,
340 	    PR_WAITOK);
341 	desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
342 	    sectPerSU, desc->databuf, 0L, 0,
343 	    (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc,
344 	    NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL,
345 	    PR_WAITOK);
346 	desc->fcol = testCol;
347 
348 	/* enqueue the read.  the write will go out as part of the callback on
349 	 * the read. at user-level & in the kernel, wait for the read-write
350 	 * pair to complete. in the simulator, just return, since everything
351 	 * will happen as callbacks */
352 
353 	RF_LOCK_MUTEX(desc->mcpair->mutex);
354 	desc->mcpair->flag = 0;
355 	RF_UNLOCK_MUTEX(desc->mcpair->mutex);
356 
357 	rf_DiskIOEnqueue(&raidPtr->Queues[spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);
358 
359 	RF_LOCK_MUTEX(desc->mcpair->mutex);
360 	while (!desc->mcpair->flag) {
361 		RF_WAIT_MCPAIR(desc->mcpair);
362 	}
363 	RF_UNLOCK_MUTEX(desc->mcpair->mutex);
364 	rf_FreeDiskQueueData(desc->readreq);
365 	rf_FreeDiskQueueData(desc->writereq);
366 
367 }
368 
369 
370 /* called at interrupt context when the read has completed.  just send out the write */
371 static int
372 rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status)
373 {
374 	if (status) {		/* invoke the callback with bad status */
375 		printf("raid%d: copyback read failed.  Aborting.\n",
376 		       desc->raidPtr->raidid);
377 		(desc->writereq->CompleteFunc) (desc, -100);
378 	} else {
379 		rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
380 	}
381 	return (0);
382 }
383 /* called at interrupt context when the write has completed.
384  * at user level & in the kernel, wake up the copyback thread.
385  * in the simulator, invoke the next copyback directly.
386  * can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
387  */
388 static int
389 rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status)
390 {
391 	if (status && status != -100) {
392 		printf("raid%d: copyback write failed.  Aborting.\n",
393 		       desc->raidPtr->raidid);
394 	}
395 	desc->status = status;
396 	rf_MCPairWakeupFunc(desc->mcpair);
397 	return (0);
398 }
399 /* invoked when the copyback has completed */
400 static void
401 rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
402 {
403 	RF_Raid_t *raidPtr = desc->raidPtr;
404 	struct timeval t, diff;
405 
406 	if (!status) {
407 		RF_LOCK_MUTEX(raidPtr->mutex);
408 		if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
409 			RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
410 			rf_FreeSpareTable(raidPtr);
411 		} else {
412 			raidPtr->Disks[desc->spCol].status = rf_ds_spare;
413 		}
414 		RF_UNLOCK_MUTEX(raidPtr->mutex);
415 
416 		RF_GETTIME(t);
417 		RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
418 #if 0
419 		printf("Copyback time was %d.%06d seconds\n",
420 		    (int) diff.tv_sec, (int) diff.tv_usec);
421 #endif
422 	} else
423 		printf("raid%d: Copyback failure.  Status: %d\n",
424 		       raidPtr->raidid, status);
425 
426 	RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
427 	rf_FreeMCPair(desc->mcpair);
428 	RF_Free(desc, sizeof(*desc));
429 
430 	rf_copyback_in_progress = 0;
431 	rf_ResumeNewRequests(raidPtr);
432 }
433