xref: /netbsd/sys/dev/raidframe/rf_disks.c (revision c4a72b64)
1 /*	$NetBSD: rf_disks.c,v 1.40 2002/10/22 03:15:28 oster Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *        This product includes software developed by the NetBSD
20  *        Foundation, Inc. and its contributors.
21  * 4. Neither the name of The NetBSD Foundation nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1995 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Author: Mark Holland
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  */
64 
65 /***************************************************************
66  * rf_disks.c -- code to perform operations on the actual disks
67  ***************************************************************/
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.40 2002/10/22 03:15:28 oster Exp $");
71 
72 #include <dev/raidframe/raidframevar.h>
73 
74 #include "rf_raid.h"
75 #include "rf_alloclist.h"
76 #include "rf_utils.h"
77 #include "rf_general.h"
78 #include "rf_options.h"
79 #include "rf_kintf.h"
80 #include "rf_netbsd.h"
81 
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/proc.h>
85 #include <sys/ioctl.h>
86 #include <sys/fcntl.h>
87 #include <sys/vnode.h>
88 
89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
90 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
91 				  RF_ComponentLabel_t *);
92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
93 				  RF_ComponentLabel_t *, int, int );
94 
95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
97 
98 /**************************************************************************
99  *
100  * initialize the disks comprising the array
101  *
102  * We want the spare disks to have regular row,col numbers so that we can
103  * easily substitue a spare for a failed disk.  But, the driver code assumes
104  * throughout that the array contains numRow by numCol _non-spare_ disks, so
105  * it's not clear how to fit in the spares.  This is an unfortunate holdover
106  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
107  * rest, and put all the spares in it.  This probably needs to get changed
108  * eventually.
109  *
110  **************************************************************************/
111 
112 int
113 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
114 	RF_ShutdownList_t **listp;
115 	RF_Raid_t *raidPtr;
116 	RF_Config_t *cfgPtr;
117 {
118 	RF_RaidDisk_t **disks;
119 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
120 	RF_RowCol_t r, c;
121 	int bs, ret;
122 	unsigned i, count, foundone = 0, numFailuresThisRow;
123 	int force;
124 
125 	force = cfgPtr->force;
126 
127 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
128 	if (ret)
129 		goto fail;
130 
131 	disks = raidPtr->Disks;
132 
133 	for (r = 0; r < raidPtr->numRow; r++) {
134 		numFailuresThisRow = 0;
135 		for (c = 0; c < raidPtr->numCol; c++) {
136 			ret = rf_ConfigureDisk(raidPtr,
137 					       &cfgPtr->devnames[r][c][0],
138 					       &disks[r][c], r, c);
139 
140 			if (ret)
141 				goto fail;
142 
143 			if (disks[r][c].status == rf_ds_optimal) {
144 				raidread_component_label(
145 					 raidPtr->raid_cinfo[r][c].ci_dev,
146 					 raidPtr->raid_cinfo[r][c].ci_vp,
147 					 &raidPtr->raid_cinfo[r][c].ci_label);
148 			}
149 
150 			if (disks[r][c].status != rf_ds_optimal) {
151 				numFailuresThisRow++;
152 			} else {
153 				if (disks[r][c].numBlocks < min_numblks)
154 					min_numblks = disks[r][c].numBlocks;
155 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
156 				    r, c, disks[r][c].devname,
157 				    (long int) disks[r][c].numBlocks,
158 				    disks[r][c].blockSize,
159 				    (long int) disks[r][c].numBlocks *
160 					 disks[r][c].blockSize / 1024 / 1024);
161 			}
162 		}
163 		/* XXX fix for n-fault tolerant */
164 		/* XXX this should probably check to see how many failures
165 		   we can handle for this configuration! */
166 		if (numFailuresThisRow > 0)
167 			raidPtr->status[r] = rf_rs_degraded;
168 	}
169 
170 	/* all disks must be the same size & have the same block size, bs must
171 	 * be a power of 2 */
172 	bs = 0;
173 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
174 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
175 			if (disks[r][c].status == rf_ds_optimal) {
176 				bs = disks[r][c].blockSize;
177 				foundone = 1;
178 			}
179 		}
180 	}
181 	if (!foundone) {
182 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
183 		ret = EINVAL;
184 		goto fail;
185 	}
186 	for (count = 0, i = 1; i; i <<= 1)
187 		if (bs & i)
188 			count++;
189 	if (count != 1) {
190 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
191 		ret = EINVAL;
192 		goto fail;
193 	}
194 
195 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
196 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
197 		if (force != 0) {
198 			printf("raid%d: Fatal errors being ignored.\n",
199 			       raidPtr->raidid);
200 		} else {
201 			ret = EINVAL;
202 			goto fail;
203 		}
204 	}
205 
206 	for (r = 0; r < raidPtr->numRow; r++) {
207 		for (c = 0; c < raidPtr->numCol; c++) {
208 			if (disks[r][c].status == rf_ds_optimal) {
209 				if (disks[r][c].blockSize != bs) {
210 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
211 					ret = EINVAL;
212 					goto fail;
213 				}
214 				if (disks[r][c].numBlocks != min_numblks) {
215 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
216 					    r, c, (int) min_numblks);
217 					disks[r][c].numBlocks = min_numblks;
218 				}
219 			}
220 		}
221 	}
222 
223 	raidPtr->sectorsPerDisk = min_numblks;
224 	raidPtr->logBytesPerSector = ffs(bs) - 1;
225 	raidPtr->bytesPerSector = bs;
226 	raidPtr->sectorMask = bs - 1;
227 	return (0);
228 
229 fail:
230 
231 	rf_UnconfigureVnodes( raidPtr );
232 
233 	return (ret);
234 }
235 
236 
237 /****************************************************************************
238  * set up the data structures describing the spare disks in the array
239  * recall from the above comment that the spare disk descriptors are stored
240  * in row zero, which is specially expanded to hold them.
241  ****************************************************************************/
242 int
243 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
244 	RF_ShutdownList_t ** listp;
245 	RF_Raid_t * raidPtr;
246 	RF_Config_t * cfgPtr;
247 {
248 	int     i, ret;
249 	unsigned int bs;
250 	RF_RaidDisk_t *disks;
251 	int     num_spares_done;
252 
253 	num_spares_done = 0;
254 
255 	/* The space for the spares should have already been allocated by
256 	 * ConfigureDisks() */
257 
258 	disks = &raidPtr->Disks[0][raidPtr->numCol];
259 	for (i = 0; i < raidPtr->numSpare; i++) {
260 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
261 				       &disks[i], 0, raidPtr->numCol + i);
262 		if (ret)
263 			goto fail;
264 		if (disks[i].status != rf_ds_optimal) {
265 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
266 				     &cfgPtr->spare_names[i][0]);
267 		} else {
268 			disks[i].status = rf_ds_spare;	/* change status to
269 							 * spare */
270 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
271 			    disks[i].devname,
272 			    (long int) disks[i].numBlocks, disks[i].blockSize,
273 			    (long int) disks[i].numBlocks *
274 				 disks[i].blockSize / 1024 / 1024);
275 		}
276 		num_spares_done++;
277 	}
278 
279 	/* check sizes and block sizes on spare disks */
280 	bs = 1 << raidPtr->logBytesPerSector;
281 	for (i = 0; i < raidPtr->numSpare; i++) {
282 		if (disks[i].blockSize != bs) {
283 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
284 			ret = EINVAL;
285 			goto fail;
286 		}
287 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
288 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
289 				     disks[i].devname, disks[i].blockSize,
290 				     (long int) raidPtr->sectorsPerDisk);
291 			ret = EINVAL;
292 			goto fail;
293 		} else
294 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
295 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
296 
297 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
298 			}
299 	}
300 
301 	return (0);
302 
303 fail:
304 
305 	/* Release the hold on the main components.  We've failed to allocate
306 	 * a spare, and since we're failing, we need to free things..
307 
308 	 XXX failing to allocate a spare is *not* that big of a deal...
309 	 We *can* survive without it, if need be, esp. if we get hot
310 	 adding working.
311 
312 	 If we don't fail out here, then we need a way to remove this spare...
313 	 that should be easier to do here than if we are "live"...
314 
315 	 */
316 
317 	rf_UnconfigureVnodes( raidPtr );
318 
319 	return (ret);
320 }
321 
322 static int
323 rf_AllocDiskStructures(raidPtr, cfgPtr)
324 	RF_Raid_t *raidPtr;
325  	RF_Config_t *cfgPtr;
326 {
327 	RF_RaidDisk_t **disks;
328 	int ret;
329 	int r;
330 
331 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
332 			(RF_RaidDisk_t **), raidPtr->cleanupList);
333 	if (disks == NULL) {
334 		ret = ENOMEM;
335 		goto fail;
336 	}
337 	raidPtr->Disks = disks;
338 	/* get space for the device-specific stuff... */
339 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
340 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
341 	    raidPtr->cleanupList);
342 	if (raidPtr->raid_cinfo == NULL) {
343 		ret = ENOMEM;
344 		goto fail;
345 	}
346 
347 	for (r = 0; r < raidPtr->numRow; r++) {
348 		/* We allocate RF_MAXSPARE on the first row so that we
349 		   have room to do hot-swapping of spares */
350 		RF_CallocAndAdd(disks[r], raidPtr->numCol
351 				+ ((r == 0) ? RF_MAXSPARE : 0),
352 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
353 				raidPtr->cleanupList);
354 		if (disks[r] == NULL) {
355 			ret = ENOMEM;
356 			goto fail;
357 		}
358 		/* get more space for device specific stuff.. */
359 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
360 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
361 		    sizeof(struct raidcinfo), (struct raidcinfo *),
362 		    raidPtr->cleanupList);
363 		if (raidPtr->raid_cinfo[r] == NULL) {
364 			ret = ENOMEM;
365 			goto fail;
366 		}
367 	}
368 	return(0);
369 fail:
370 	rf_UnconfigureVnodes( raidPtr );
371 
372 	return(ret);
373 }
374 
375 
376 /* configure a single disk during auto-configuration at boot */
377 int
378 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
379 	RF_Raid_t *raidPtr;
380 	RF_Config_t *cfgPtr;
381 	RF_AutoConfig_t *auto_config;
382 {
383 	RF_RaidDisk_t **disks;
384 	RF_RaidDisk_t *diskPtr;
385 	RF_RowCol_t r, c;
386 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
387 	int bs, ret;
388 	int numFailuresThisRow;
389 	RF_AutoConfig_t *ac;
390 	int parity_good;
391 	int mod_counter;
392 	int mod_counter_found;
393 
394 #if DEBUG
395 	printf("Starting autoconfiguration of RAID set...\n");
396 #endif
397 
398 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
399 	if (ret)
400 		goto fail;
401 
402 	disks = raidPtr->Disks;
403 
404 	/* assume the parity will be fine.. */
405 	parity_good = RF_RAID_CLEAN;
406 
407 	/* Check for mod_counters that are too low */
408 	mod_counter_found = 0;
409 	mod_counter = 0;
410 	ac = auto_config;
411 	while(ac!=NULL) {
412 		if (mod_counter_found==0) {
413 			mod_counter = ac->clabel->mod_counter;
414 			mod_counter_found = 1;
415 		} else {
416 			if (ac->clabel->mod_counter > mod_counter) {
417 				mod_counter = ac->clabel->mod_counter;
418 			}
419 		}
420 		ac->flag = 0; /* clear the general purpose flag */
421 		ac = ac->next;
422 	}
423 
424 	bs = 0;
425 	for (r = 0; r < raidPtr->numRow; r++) {
426 		numFailuresThisRow = 0;
427 		for (c = 0; c < raidPtr->numCol; c++) {
428 			diskPtr = &disks[r][c];
429 
430 			/* find this row/col in the autoconfig */
431 #if DEBUG
432 			printf("Looking for %d,%d in autoconfig\n",r,c);
433 #endif
434 			ac = auto_config;
435 			while(ac!=NULL) {
436 				if (ac->clabel==NULL) {
437 					/* big-time bad news. */
438 					goto fail;
439 				}
440 				if ((ac->clabel->row == r) &&
441 				    (ac->clabel->column == c) &&
442 				    (ac->clabel->mod_counter == mod_counter)) {
443 					/* it's this one... */
444 					/* flag it as 'used', so we don't
445 					   free it later. */
446 					ac->flag = 1;
447 #if DEBUG
448 					printf("Found: %s at %d,%d\n",
449 					       ac->devname,r,c);
450 #endif
451 
452 					break;
453 				}
454 				ac=ac->next;
455 			}
456 
457 			if (ac==NULL) {
458 				/* we didn't find an exact match with a
459 				   correct mod_counter above... can we
460 				   find one with an incorrect mod_counter
461 				   to use instead?  (this one, if we find
462 				   it, will be marked as failed once the
463 				   set configures)
464 				*/
465 
466 				ac = auto_config;
467 				while(ac!=NULL) {
468 					if (ac->clabel==NULL) {
469 						/* big-time bad news. */
470 						goto fail;
471 					}
472 					if ((ac->clabel->row == r) &&
473 					    (ac->clabel->column == c)) {
474 						/* it's this one...
475 						   flag it as 'used', so we
476 						   don't free it later. */
477 						ac->flag = 1;
478 #if DEBUG
479 						printf("Found(low mod_counter): %s at %d,%d\n",
480 						       ac->devname,r,c);
481 #endif
482 
483 						break;
484 					}
485 					ac=ac->next;
486 				}
487 			}
488 
489 
490 
491 			if (ac!=NULL) {
492 				/* Found it.  Configure it.. */
493 				diskPtr->blockSize = ac->clabel->blockSize;
494 				diskPtr->numBlocks = ac->clabel->numBlocks;
495 				/* Note: rf_protectedSectors is already
496 				   factored into numBlocks here */
497 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
498 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
499 
500 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
501 				       ac->clabel, sizeof(*ac->clabel));
502 				sprintf(diskPtr->devname, "/dev/%s",
503 					ac->devname);
504 
505 				/* note the fact that this component was
506 				   autoconfigured.  You'll need this info
507 				   later.  Trust me :) */
508 				diskPtr->auto_configured = 1;
509 				diskPtr->dev = ac->dev;
510 
511 				/*
512 				 * we allow the user to specify that
513 				 * only a fraction of the disks should
514 				 * be used this is just for debug: it
515 				 * speeds up the parity scan
516 				 */
517 
518 				diskPtr->numBlocks = diskPtr->numBlocks *
519 					rf_sizePercentage / 100;
520 
521 				/* XXX these will get set multiple times,
522 				   but since we're autoconfiguring, they'd
523 				   better be always the same each time!
524 				   If not, this is the least of your worries */
525 
526 				bs = diskPtr->blockSize;
527 				min_numblks = diskPtr->numBlocks;
528 
529 				/* this gets done multiple times, but that's
530 				   fine -- the serial number will be the same
531 				   for all components, guaranteed */
532 				raidPtr->serial_number =
533 					ac->clabel->serial_number;
534 				/* check the last time the label
535 				   was modified */
536 				if (ac->clabel->mod_counter !=
537 				    mod_counter) {
538 					/* Even though we've filled in all
539 					   of the above, we don't trust
540 					   this component since it's
541 					   modification counter is not
542 					   in sync with the rest, and we really
543 					   consider it to be failed.  */
544 					disks[r][c].status = rf_ds_failed;
545 					numFailuresThisRow++;
546 				} else {
547 					if (ac->clabel->clean !=
548 					    RF_RAID_CLEAN) {
549 						parity_good = RF_RAID_DIRTY;
550 					}
551 				}
552 			} else {
553 				/* Didn't find it at all!!
554 				   Component must really be dead */
555 				disks[r][c].status = rf_ds_failed;
556 				sprintf(disks[r][c].devname,"component%d",
557 					r * raidPtr->numCol + c);
558 				numFailuresThisRow++;
559 			}
560 		}
561 		/* XXX fix for n-fault tolerant */
562 		/* XXX this should probably check to see how many failures
563 		   we can handle for this configuration! */
564 		if (numFailuresThisRow > 0)
565 			raidPtr->status[r] = rf_rs_degraded;
566 	}
567 
568 	/* close the device for the ones that didn't get used */
569 
570 	ac = auto_config;
571 	while(ac!=NULL) {
572 		if (ac->flag == 0) {
573 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
574 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
575 			vput(ac->vp);
576 			ac->vp = NULL;
577 #if DEBUG
578 			printf("Released %s from auto-config set.\n",
579 			       ac->devname);
580 #endif
581 		}
582 		ac = ac->next;
583 	}
584 
585 	raidPtr->mod_counter = mod_counter;
586 
587 	/* note the state of the parity, if any */
588 	raidPtr->parity_good = parity_good;
589 	raidPtr->sectorsPerDisk = min_numblks;
590 	raidPtr->logBytesPerSector = ffs(bs) - 1;
591 	raidPtr->bytesPerSector = bs;
592 	raidPtr->sectorMask = bs - 1;
593 	return (0);
594 
595 fail:
596 
597 	rf_UnconfigureVnodes( raidPtr );
598 
599 	return (ret);
600 
601 }
602 
603 /* configure a single disk in the array */
604 int
605 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
606 	RF_Raid_t *raidPtr;
607 	char   *buf;
608 	RF_RaidDisk_t *diskPtr;
609 	RF_RowCol_t row;
610 	RF_RowCol_t col;
611 {
612 	char   *p;
613 	struct partinfo dpart;
614 	struct vnode *vp;
615 	struct vattr va;
616 	struct proc *proc;
617 	int     error;
618 
619 	p = rf_find_non_white(buf);
620 	if (p[strlen(p) - 1] == '\n') {
621 		/* strip off the newline */
622 		p[strlen(p) - 1] = '\0';
623 	}
624 	(void) strcpy(diskPtr->devname, p);
625 
626 	proc = raidPtr->engine_thread;
627 
628 	/* Let's start by claiming the component is fine and well... */
629 	diskPtr->status = rf_ds_optimal;
630 
631 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
632 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
633 
634 	error = raidlookup(diskPtr->devname, proc, &vp);
635 	if (error) {
636 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
637 		if (error == ENXIO) {
638 			/* the component isn't there... must be dead :-( */
639 			diskPtr->status = rf_ds_failed;
640 		} else {
641 			return (error);
642 		}
643 	}
644 	if (diskPtr->status == rf_ds_optimal) {
645 
646 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
647 			return (error);
648 		}
649 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
650 				  FREAD, proc->p_ucred, proc);
651 		if (error) {
652 			return (error);
653 		}
654 
655 		diskPtr->blockSize = dpart.disklab->d_secsize;
656 
657 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
658 		diskPtr->partitionSize = dpart.part->p_size;
659 
660 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
661 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
662 
663 		/* This component was not automatically configured */
664 		diskPtr->auto_configured = 0;
665 		diskPtr->dev = va.va_rdev;
666 
667 		/* we allow the user to specify that only a fraction of the
668 		 * disks should be used this is just for debug:  it speeds up
669 		 * the parity scan */
670 		diskPtr->numBlocks = diskPtr->numBlocks *
671 			rf_sizePercentage / 100;
672 	}
673 	return (0);
674 }
675 
676 static void
677 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
678 	RF_Raid_t *raidPtr;
679 	int row;
680 	int column;
681 	char *dev_name;
682 	RF_ComponentLabel_t *ci_label;
683 {
684 
685 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
686 	       raidPtr->raidid, dev_name, row, column );
687 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
688 	       ci_label->row, ci_label->column,
689 	       ci_label->num_rows, ci_label->num_columns);
690 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
691 	       ci_label->version, ci_label->serial_number,
692 	       ci_label->mod_counter);
693 	printf("         Clean: %s Status: %d\n",
694 	       ci_label->clean ? "Yes" : "No", ci_label->status );
695 }
696 
697 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
698 				  serial_number, mod_counter )
699 	RF_Raid_t *raidPtr;
700 	int row;
701 	int column;
702 	char *dev_name;
703 	RF_ComponentLabel_t *ci_label;
704 	int serial_number;
705 	int mod_counter;
706 {
707 	int fatal_error = 0;
708 
709 	if (serial_number != ci_label->serial_number) {
710 		printf("%s has a different serial number: %d %d\n",
711 		       dev_name, serial_number, ci_label->serial_number);
712 		fatal_error = 1;
713 	}
714 	if (mod_counter != ci_label->mod_counter) {
715 		printf("%s has a different modfication count: %d %d\n",
716 		       dev_name, mod_counter, ci_label->mod_counter);
717 	}
718 
719 	if (row != ci_label->row) {
720 		printf("Row out of alignment for: %s\n", dev_name);
721 		fatal_error = 1;
722 	}
723 	if (column != ci_label->column) {
724 		printf("Column out of alignment for: %s\n", dev_name);
725 		fatal_error = 1;
726 	}
727 	if (raidPtr->numRow != ci_label->num_rows) {
728 		printf("Number of rows do not match for: %s\n", dev_name);
729 		fatal_error = 1;
730 	}
731 	if (raidPtr->numCol != ci_label->num_columns) {
732 		printf("Number of columns do not match for: %s\n", dev_name);
733 		fatal_error = 1;
734 	}
735 	if (ci_label->clean == 0) {
736 		/* it's not clean, but that's not fatal */
737 		printf("%s is not clean!\n", dev_name);
738 	}
739 	return(fatal_error);
740 }
741 
742 
743 /*
744 
745    rf_CheckLabels() - check all the component labels for consistency.
746    Return an error if there is anything major amiss.
747 
748  */
749 
750 int
751 rf_CheckLabels( raidPtr, cfgPtr )
752 	RF_Raid_t *raidPtr;
753 	RF_Config_t *cfgPtr;
754 {
755 	int r,c;
756 	char *dev_name;
757 	RF_ComponentLabel_t *ci_label;
758 	int serial_number = 0;
759 	int mod_number = 0;
760 	int fatal_error = 0;
761 	int mod_values[4];
762 	int mod_count[4];
763 	int ser_values[4];
764 	int ser_count[4];
765 	int num_ser;
766 	int num_mod;
767 	int i;
768 	int found;
769 	int hosed_row;
770 	int hosed_column;
771 	int too_fatal;
772 	int parity_good;
773 	int force;
774 
775 	hosed_row = -1;
776 	hosed_column = -1;
777 	too_fatal = 0;
778 	force = cfgPtr->force;
779 
780 	/*
781 	   We're going to try to be a little intelligent here.  If one
782 	   component's label is bogus, and we can identify that it's the
783 	   *only* one that's gone, we'll mark it as "failed" and allow
784 	   the configuration to proceed.  This will be the *only* case
785 	   that we'll proceed if there would be (otherwise) fatal errors.
786 
787 	   Basically we simply keep a count of how many components had
788 	   what serial number.  If all but one agree, we simply mark
789 	   the disagreeing component as being failed, and allow
790 	   things to come up "normally".
791 
792 	   We do this first for serial numbers, and then for "mod_counter".
793 
794 	 */
795 
796 	num_ser = 0;
797 	num_mod = 0;
798 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
799 		for (c = 0; c < raidPtr->numCol; c++) {
800 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
801 			found=0;
802 			for(i=0;i<num_ser;i++) {
803 				if (ser_values[i] == ci_label->serial_number) {
804 					ser_count[i]++;
805 					found=1;
806 					break;
807 				}
808 			}
809 			if (!found) {
810 				ser_values[num_ser] = ci_label->serial_number;
811 				ser_count[num_ser] = 1;
812 				num_ser++;
813 				if (num_ser>2) {
814 					fatal_error = 1;
815 					break;
816 				}
817 			}
818 			found=0;
819 			for(i=0;i<num_mod;i++) {
820 				if (mod_values[i] == ci_label->mod_counter) {
821 					mod_count[i]++;
822 					found=1;
823 					break;
824 				}
825 			}
826 			if (!found) {
827 			        mod_values[num_mod] = ci_label->mod_counter;
828 				mod_count[num_mod] = 1;
829 				num_mod++;
830 				if (num_mod>2) {
831 					fatal_error = 1;
832 					break;
833 				}
834 			}
835 		}
836 	}
837 #if DEBUG
838 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
839 	for(i=0;i<num_ser;i++) {
840 		printf("%d %d\n", ser_values[i], ser_count[i]);
841 	}
842 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
843 	for(i=0;i<num_mod;i++) {
844 		printf("%d %d\n", mod_values[i], mod_count[i]);
845 	}
846 #endif
847 	serial_number = ser_values[0];
848 	if (num_ser == 2) {
849 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
850 			/* Locate the maverick component */
851 			if (ser_count[1] > ser_count[0]) {
852 				serial_number = ser_values[1];
853 			}
854 			for (r = 0; r < raidPtr->numRow; r++) {
855 				for (c = 0; c < raidPtr->numCol; c++) {
856 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
857 					if (serial_number !=
858 					    ci_label->serial_number) {
859 						hosed_row = r;
860 						hosed_column = c;
861 						break;
862 					}
863 				}
864 			}
865 			printf("Hosed component: %s\n",
866 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
867 			if (!force) {
868 				/* we'll fail this component, as if there are
869 				   other major errors, we arn't forcing things
870 				   and we'll abort the config anyways */
871 				raidPtr->Disks[hosed_row][hosed_column].status
872 					= rf_ds_failed;
873 				raidPtr->numFailures++;
874 				raidPtr->status[hosed_row] = rf_rs_degraded;
875 			}
876 		} else {
877 			too_fatal = 1;
878 		}
879 		if (cfgPtr->parityConfig == '0') {
880 			/* We've identified two different serial numbers.
881 			   RAID 0 can't cope with that, so we'll punt */
882 			too_fatal = 1;
883 		}
884 
885 	}
886 
887 	/* record the serial number for later.  If we bail later, setting
888 	   this doesn't matter, otherwise we've got the best guess at the
889 	   correct serial number */
890 	raidPtr->serial_number = serial_number;
891 
892 	mod_number = mod_values[0];
893 	if (num_mod == 2) {
894 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
895 			/* Locate the maverick component */
896 			if (mod_count[1] > mod_count[0]) {
897 				mod_number = mod_values[1];
898 			} else if (mod_count[1] < mod_count[0]) {
899 				mod_number = mod_values[0];
900 			} else {
901 				/* counts of different modification values
902 				   are the same.   Assume greater value is
903 				   the correct one, all other things
904 				   considered */
905 				if (mod_values[0] > mod_values[1]) {
906 					mod_number = mod_values[0];
907 				} else {
908 					mod_number = mod_values[1];
909 				}
910 
911 			}
912 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
913 				for (c = 0; c < raidPtr->numCol; c++) {
914 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
915 					if (mod_number !=
916 					    ci_label->mod_counter) {
917 						if ( ( hosed_row == r ) &&
918 						     ( hosed_column == c )) {
919 							/* same one.  Can
920 							   deal with it.  */
921 						} else {
922 							hosed_row = r;
923 							hosed_column = c;
924 							if (num_ser != 1) {
925 								too_fatal = 1;
926 								break;
927 							}
928 						}
929 					}
930 				}
931 			}
932 			printf("Hosed component: %s\n",
933 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
934 			if (!force) {
935 				/* we'll fail this component, as if there are
936 				   other major errors, we arn't forcing things
937 				   and we'll abort the config anyways */
938 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
939 					raidPtr->Disks[hosed_row][hosed_column].status
940 						= rf_ds_failed;
941 					raidPtr->numFailures++;
942 					raidPtr->status[hosed_row] = rf_rs_degraded;
943 				}
944 			}
945 		} else {
946 			too_fatal = 1;
947 		}
948 		if (cfgPtr->parityConfig == '0') {
949 			/* We've identified two different mod counters.
950 			   RAID 0 can't cope with that, so we'll punt */
951 			too_fatal = 1;
952 		}
953 	}
954 
955 	raidPtr->mod_counter = mod_number;
956 
957 	if (too_fatal) {
958 		/* we've had both a serial number mismatch, and a mod_counter
959 		   mismatch -- and they involved two different components!!
960 		   Bail -- make things fail so that the user must force
961 		   the issue... */
962 		hosed_row = -1;
963 		hosed_column = -1;
964 	}
965 
966 	if (num_ser > 2) {
967 		printf("raid%d: Too many different serial numbers!\n",
968 		       raidPtr->raidid);
969 	}
970 
971 	if (num_mod > 2) {
972 		printf("raid%d: Too many different mod counters!\n",
973 		       raidPtr->raidid);
974 	}
975 
976 	/* we start by assuming the parity will be good, and flee from
977 	   that notion at the slightest sign of trouble */
978 
979 	parity_good = RF_RAID_CLEAN;
980 	for (r = 0; r < raidPtr->numRow; r++) {
981 		for (c = 0; c < raidPtr->numCol; c++) {
982 			dev_name = &cfgPtr->devnames[r][c][0];
983 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
984 
985 			if ((r == hosed_row) && (c == hosed_column)) {
986 				printf("raid%d: Ignoring %s\n",
987 				       raidPtr->raidid, dev_name);
988 			} else {
989 				rf_print_label_status( raidPtr, r, c,
990 						       dev_name, ci_label );
991 				if (rf_check_label_vitals( raidPtr, r, c,
992 							   dev_name, ci_label,
993 							   serial_number,
994 							   mod_number )) {
995 					fatal_error = 1;
996 				}
997 				if (ci_label->clean != RF_RAID_CLEAN) {
998 					parity_good = RF_RAID_DIRTY;
999 				}
1000 			}
1001 		}
1002 	}
1003 	if (fatal_error) {
1004 		parity_good = RF_RAID_DIRTY;
1005 	}
1006 
1007 	/* we note the state of the parity */
1008 	raidPtr->parity_good = parity_good;
1009 
1010 	return(fatal_error);
1011 }
1012 
1013 int
1014 rf_add_hot_spare(raidPtr, sparePtr)
1015 	RF_Raid_t *raidPtr;
1016 	RF_SingleComponent_t *sparePtr;
1017 {
1018 	RF_RaidDisk_t *disks;
1019 	RF_DiskQueue_t *spareQueues;
1020 	int ret;
1021 	unsigned int bs;
1022 	int spare_number;
1023 
1024 	ret=0;
1025 
1026 	if (raidPtr->numSpare >= RF_MAXSPARE) {
1027 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1028 		return(EINVAL);
1029 	}
1030 
1031 	RF_LOCK_MUTEX(raidPtr->mutex);
1032 	while (raidPtr->adding_hot_spare==1) {
1033 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
1034 			&(raidPtr->mutex));
1035 	}
1036 	raidPtr->adding_hot_spare=1;
1037 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1038 
1039 	/* the beginning of the spares... */
1040 	disks = &raidPtr->Disks[0][raidPtr->numCol];
1041 
1042 	spare_number = raidPtr->numSpare;
1043 
1044 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1045 			       &disks[spare_number], 0,
1046 			       raidPtr->numCol + spare_number);
1047 
1048 	if (ret)
1049 		goto fail;
1050 	if (disks[spare_number].status != rf_ds_optimal) {
1051 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1052 			     sparePtr->component_name);
1053 		ret=EINVAL;
1054 		goto fail;
1055 	} else {
1056 		disks[spare_number].status = rf_ds_spare;
1057 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1058 			 disks[spare_number].devname,
1059 			 (long int) disks[spare_number].numBlocks,
1060 			 disks[spare_number].blockSize,
1061 			 (long int) disks[spare_number].numBlocks *
1062 			 disks[spare_number].blockSize / 1024 / 1024);
1063 	}
1064 
1065 
1066 	/* check sizes and block sizes on the spare disk */
1067 	bs = 1 << raidPtr->logBytesPerSector;
1068 	if (disks[spare_number].blockSize != bs) {
1069 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1070 		ret = EINVAL;
1071 		goto fail;
1072 	}
1073 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1074 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1075 			     disks[spare_number].devname,
1076 			     disks[spare_number].blockSize,
1077 			     (long int) raidPtr->sectorsPerDisk);
1078 		ret = EINVAL;
1079 		goto fail;
1080 	} else {
1081 		if (disks[spare_number].numBlocks >
1082 		    raidPtr->sectorsPerDisk) {
1083 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1084 				     (long int) raidPtr->sectorsPerDisk);
1085 
1086 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1087 		}
1088 	}
1089 
1090 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1091 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1092 				 0, raidPtr->numCol + spare_number,
1093 				 raidPtr->qType,
1094 				 raidPtr->sectorsPerDisk,
1095 				 raidPtr->Disks[0][raidPtr->numCol +
1096 						  spare_number].dev,
1097 				 raidPtr->maxOutstanding,
1098 				 &raidPtr->shutdownList,
1099 				 raidPtr->cleanupList);
1100 
1101 	RF_LOCK_MUTEX(raidPtr->mutex);
1102 	raidPtr->numSpare++;
1103 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1104 
1105 fail:
1106 	RF_LOCK_MUTEX(raidPtr->mutex);
1107 	raidPtr->adding_hot_spare=0;
1108 	wakeup(&(raidPtr->adding_hot_spare));
1109 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1110 
1111 	return(ret);
1112 }
1113 
1114 int
1115 rf_remove_hot_spare(raidPtr,sparePtr)
1116 	RF_Raid_t *raidPtr;
1117 	RF_SingleComponent_t *sparePtr;
1118 {
1119 	int spare_number;
1120 
1121 
1122 	if (raidPtr->numSpare==0) {
1123 		printf("No spares to remove!\n");
1124 		return(EINVAL);
1125 	}
1126 
1127 	spare_number = sparePtr->column;
1128 
1129 	return(EINVAL); /* XXX not implemented yet */
1130 #if 0
1131 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1132 		return(EINVAL);
1133 	}
1134 
1135 	/* verify that this spare isn't in use... */
1136 
1137 
1138 
1139 
1140 	/* it's gone.. */
1141 
1142 	raidPtr->numSpare--;
1143 
1144 	return(0);
1145 #endif
1146 }
1147 
1148 
1149 int
1150 rf_delete_component(raidPtr,component)
1151 	RF_Raid_t *raidPtr;
1152 	RF_SingleComponent_t *component;
1153 {
1154 	RF_RaidDisk_t *disks;
1155 
1156 	if ((component->row < 0) ||
1157 	    (component->row >= raidPtr->numRow) ||
1158 	    (component->column < 0) ||
1159 	    (component->column >= raidPtr->numCol)) {
1160 		return(EINVAL);
1161 	}
1162 
1163 	disks = &raidPtr->Disks[component->row][component->column];
1164 
1165 	/* 1. This component must be marked as 'failed' */
1166 
1167 	return(EINVAL); /* Not implemented yet. */
1168 }
1169 
1170 int
1171 rf_incorporate_hot_spare(raidPtr,component)
1172 	RF_Raid_t *raidPtr;
1173 	RF_SingleComponent_t *component;
1174 {
1175 
1176 	/* Issues here include how to 'move' this in if there is IO
1177 	   taking place (e.g. component queues and such) */
1178 
1179 	return(EINVAL); /* Not implemented yet. */
1180 }
1181