xref: /dragonfly/sys/dev/raid/vinum/vinumconfig.c (revision 73e0051e)
1 /*
2  * To do:
3  *
4  * Don't store drive configuration on the config DB: read each drive's header
5  * to decide where it is.
6  *
7  * Accept any old crap in the config_<foo> functions, and complain when
8  * we try to bring it up.
9  *
10  * When trying to bring volumes up, check that the complete address range
11  * is covered.
12  */
13 /*-
14  * Copyright (c) 1997, 1998
15  *	Nan Yang Computer Services Limited.  All rights reserved.
16  *
17  *  This software is distributed under the so-called ``Berkeley
18  *  License'':
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgement:
30  *	This product includes software developed by Nan Yang Computer
31  *      Services Limited.
32  * 4. Neither the name of the Company nor the names of its contributors
33  *    may be used to endorse or promote products derived from this software
34  *    without specific prior written permission.
35  *
36  * This software is provided ``as is'', and any express or implied
37  * warranties, including, but not limited to, the implied warranties of
38  * merchantability and fitness for a particular purpose are disclaimed.
39  * In no event shall the company or contributors be liable for any
40  * direct, indirect, incidental, special, exemplary, or consequential
41  * damages (including, but not limited to, procurement of substitute
42  * goods or services; loss of use, data, or profits; or business
43  * interruption) however caused and on any theory of liability, whether
44  * in contract, strict liability, or tort (including negligence or
45  * otherwise) arising in any way out of the use of this software, even if
46  * advised of the possibility of such damage.
47  *
48  * $Id: vinumconfig.c,v 1.30 2000/05/01 09:45:50 grog Exp grog $
49  * $FreeBSD: src/sys/dev/vinum/vinumconfig.c,v 1.32.2.6 2002/02/03 00:43:35 grog Exp $
50  * $DragonFly: src/sys/dev/raid/vinum/vinumconfig.c,v 1.12 2007/06/07 22:58:00 corecode Exp $
51  */
52 
53 #define STATIC static
54 
55 #include "vinumhdr.h"
56 #include "request.h"
57 
58 #define MAXTOKEN 64					    /* maximum number of tokens in a line */
59 
60 /*
61  * We can afford the luxury of global variables here,
62  * since start_config ensures that these functions
63  * are single-threaded.
64  */
65 
66 /* These are indices in vinum_conf of the last-mentioned of each kind of object */
67 static int current_drive;				    /* note the last drive we mention, for
68 							    * some defaults */
69 static int current_plex;				    /* and the same for the last plex */
70 static int current_volume;				    /* and the last volme */
71 static struct _ioctl_reply *ioctl_reply;		    /* struct to return via ioctl */
72 
73 static void made_sd(struct sd *sd);
74 static void made_vol(struct volume *vol);
75 static void made_plex(struct plex *plex);
76 
77 /* These values are used by most of these routines, so set them as globals */
78 static char *token[MAXTOKEN];				    /* pointers to individual tokens */
79 static int tokens;					    /* number of tokens */
80 
81 #define TOCONS	0x01
82 #define TOTTY	0x02
83 #define TOLOG	0x04
84 
85 struct putchar_arg {
86     int flags;
87     struct tty *tty;
88 };
89 
90 #define MSG_MAX 1024					    /* maximum length of a formatted message */
91 /*
92  * Format an error message and return to the user in the reply.
93  * CARE: This routine is designed to be called only from the
94  * configuration routines, so it assumes it's the owner of
95  * the configuration lock, and unlocks it on exit
96  */
97 void
98 throw_rude_remark(int error, char *msg,...)
99 {
100     __va_list ap;
101     char *text;
102     static int finishing;				    /* don't recurse */
103     int was_finishing;
104 
105     if ((vinum_conf.flags & VF_LOCKED) == 0)		    /* bug catcher */
106     	panic ("throw_rude_remark: called without config lock");
107     __va_start(ap, msg);
108     if ((ioctl_reply != NULL)				    /* we're called from the user */
109     &&(!(vinum_conf.flags & VF_READING_CONFIG))) {	    /* and not reading from disk: return msg */
110 	/*
111 	 * We can't just format to ioctl_reply, since it
112 	 * may contain our input parameters
113 	 */
114 	    kvasnrprintf(&text, MSG_MAX, 10, msg, ap);
115 	    strcpy(ioctl_reply->msg, text);
116 	    ioctl_reply->error = error;			    /* first byte is the error number */
117 	    kvasfree(&text);
118     } else {
119 	kprintf("vinum: ");
120 	kvprintf(msg, ap);				    /* print to the console */
121 	kprintf("\n");
122     }
123     __va_end(ap);
124 
125     if (vinum_conf.flags & VF_READING_CONFIG) {		    /* go through to the bitter end, */
126 	if ((vinum_conf.flags & VF_READING_CONFIG)	    /* we're reading from disk, */
127 	&&((daemon_options & daemon_noupdate) == 0)) {
128 	    log(LOG_NOTICE, "Disabling configuration updates\n");
129 	    daemon_options |= daemon_noupdate;
130 	}
131 	return;
132     }
133     /*
134      * We have a problem here: we want to unlock the
135      * configuration, which implies tidying up, but
136      * if we find an error while tidying up, we could
137      * recurse for ever.  Use this kludge to only try
138      * once
139      */
140     was_finishing = finishing;
141     finishing = 1;
142     finish_config(was_finishing);			    /* unlock anything we may be holding */
143     finishing = was_finishing;
144     longjmp(command_fail, error);
145 }
146 
147 /*
148  * Check a volume to see if the plex is already assigned to it.
149  * Return index in volume->plex, or -1 if not assigned
150  */
151 int
152 my_plex(int volno, int plexno)
153 {
154     int i;
155     struct volume *vol;
156 
157     vol = &VOL[volno];					    /* point to volno */
158     for (i = 0; i < vol->plexes; i++)
159 	if (vol->plex[i] == plexno)
160 	    return i;
161     return -1;						    /* not found */
162 }
163 
164 /*
165  * Check a plex to see if the subdisk is already assigned to it.
166  * Return index in plex->sd, or -1 if not assigned
167  */
168 int
169 my_sd(int plexno, int sdno)
170 {
171     int i;
172     struct plex *plex;
173 
174     plex = &PLEX[plexno];
175     for (i = 0; i < plex->subdisks; i++)
176 	if (plex->sdnos[i] == sdno)
177 	    return i;
178     return -1;						    /* not found */
179 }
180 
181 /* Add plex to the volume if possible */
182 int
183 give_plex_to_volume(int volno, int plexno)
184 {
185     struct volume *vol;
186     int i;
187 
188     /*
189      * It's not an error for the plex to already
190      * belong to the volume, but we need to check a
191      * number of things to make sure it's done right.
192      * Some day.
193      */
194     if (my_plex(volno, plexno) >= 0)
195 	return plexno;					    /* that's it */
196 
197     vol = &VOL[volno];					    /* point to volume */
198     if (vol->plexes == MAXPLEX)				    /* all plexes allocated */
199 	throw_rude_remark(ENOSPC,
200 	    "Too many plexes for volume %s",
201 	    vol->name);
202     else if ((vol->plexes > 0)				    /* we have other plexes */
203     &&((vol->flags & VF_CONFIG_SETUPSTATE) == 0))	    /* and we're not setting up state */
204 	invalidate_subdisks(&PLEX[plexno], sd_stale);	    /* make the subdisks invalid */
205     vol->plex[vol->plexes] = plexno;			    /* this one */
206     vol->plexes++;					    /* add another plex */
207     PLEX[plexno].volno = volno;				    /* note the number of our volume */
208 
209     /* Find out how big our volume is */
210     for (i = 0; i < vol->plexes; i++)
211 	vol->size = u64max(vol->size, PLEX[vol->plex[i]].length);
212     return vol->plexes - 1;				    /* and return its index */
213 }
214 
215 /*
216  * Add subdisk to a plex if possible
217  */
218 int
219 give_sd_to_plex(int plexno, int sdno)
220 {
221     int i;
222     struct plex *plex;
223     struct sd *sd;
224 
225     /*
226      * It's not an error for the sd to already
227      * belong to the plex, but we need to check a
228      * number of things to make sure it's done right.
229      * Some day.
230      */
231     i = my_sd(plexno, sdno);
232     if (i >= 0)						    /* does it already belong to us? */
233 	return i;					    /* that's it */
234 
235     plex = &PLEX[plexno];				    /* point to the plex */
236     sd = &SD[sdno];					    /* and the subdisk */
237 
238     /* Do we have an offset?  Otherwise put it after the last one */
239     if (sd->plexoffset < 0) {				    /* no offset specified */
240 	if (plex->subdisks > 0) {
241 	    struct sd *lastsd = &SD[plex->sdnos[plex->subdisks - 1]]; /* last subdisk */
242 
243 	    if (plex->organization == plex_concat)	    /* concat, */
244 		sd->plexoffset = lastsd->sectors + lastsd->plexoffset; /* starts here */
245 	    else					    /* striped, RAID-4 or RAID-5 */
246 		sd->plexoffset = plex->stripesize * plex->subdisks; /* starts here */
247 	} else						    /* first subdisk */
248 	    sd->plexoffset = 0;				    /* start at the beginning */
249     }
250     if (plex->subdisks == MAXSD)			    /* we already have our maximum */
251 	throw_rude_remark(ENOSPC,			    /* crap out */
252 	    "Can't add %s to %s: plex full",
253 	    sd->name,
254 	    plex->name);
255 
256     plex->subdisks++;					    /* another entry */
257     if (plex->subdisks >= plex->subdisks_allocated)	    /* need more space */
258 	EXPAND(plex->sdnos, int, plex->subdisks_allocated, INITIAL_SUBDISKS_IN_PLEX);
259 
260     /* Adjust size of plex and volume. */
261     if (isparity(plex))					    /* RAID-4 or RAID-5 */
262 	plex->length = (plex->subdisks - 1) * sd->sectors;  /* size is one disk short */
263     else
264 	plex->length += sd->sectors;			    /* plex gets this much bigger */
265     if (plex->volno >= 0)				    /* we have a volume */
266 	VOL[plex->volno].size = u64max(VOL[plex->volno].size, plex->length); /* adjust its size */
267 
268     /*
269      * We need to check that the subdisks don't overlap,
270      * but we can't do that until a point where we *must*
271      * know the size of all the subdisks.  That's not
272      * here.  But we need to sort them by offset
273      */
274     for (i = 0; i < plex->subdisks - 1; i++) {
275 	if (sd->plexoffset < SD[plex->sdnos[i]].plexoffset) { /* it fits before this one */
276 	    /* First move any remaining subdisks by one */
277 	    int j;
278 
279 	    for (j = plex->subdisks - 1; j > i; j--)	    /* move up one at a time */
280 		plex->sdnos[j] = plex->sdnos[j - 1];
281 	    plex->sdnos[i] = sdno;
282 	    sd->plexsdno = i;				    /* note where we are in the subdisk */
283 	    return i;
284 	}
285     }
286 
287     /*
288      * The plex doesn't have any subdisk with a
289      * larger offset.  Insert it here.
290      */
291     plex->sdnos[i] = sdno;
292     sd->plexsdno = i;					    /* note where we are in the subdisk */
293     sd->plexno = plex->plexno;				    /* and who we belong to */
294     return i;
295 }
296 
297 /*
298  * Add a subdisk to drive if possible.  The
299  * pointer to the drive must already be stored in
300  * the sd structure, but the drive doesn't know
301  * about the subdisk yet.
302  */
303 void
304 give_sd_to_drive(int sdno)
305 {
306     struct sd *sd;					    /* pointer to subdisk */
307     struct drive *drive;				    /* and drive */
308     int fe;						    /* index in free list */
309     int sfe;						    /* and index of subdisk when assigning max */
310 
311     sd = &SD[sdno];					    /* point to sd */
312     drive = &DRIVE[sd->driveno];			    /* and drive */
313 
314     if (drive->state != drive_up) {
315 	update_sd_state(sdno);				    /* that crashes the subdisk */
316 	return;
317     }
318     if (drive->flags & VF_HOTSPARE)			    /* the drive is a hot spare, */
319 	throw_rude_remark(ENOSPC,
320 	    "Can't place %s on hot spare drive %s",
321 	    sd->name,
322 	    drive->label.name);
323     if ((drive->sectors_available == 0)			    /* no space left */
324     ||(sd->sectors > drive->sectors_available)) {	    /* or too big, */
325 	sd->driveoffset = -1;				    /* don't be confusing */
326 	free_sd(sd->sdno);
327 	throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name);
328 	return;						    /* in case we come back here */
329     }
330     drive->subdisks_used++;				    /* one more subdisk */
331 
332     if (sd->sectors == 0) {				    /* take the largest chunk */
333 	sfe = 0;					    /* to keep the compiler happy */
334 	for (fe = 0; fe < drive->freelist_entries; fe++) {
335 	    if (drive->freelist[fe].sectors >= sd->sectors) { /* more space here */
336 		sd->sectors = drive->freelist[fe].sectors;  /* take it */
337 		sd->driveoffset = drive->freelist[fe].offset;
338 		sfe = fe;				    /* and note the index for later */
339 	    }
340 	}
341 	if (sd->sectors == 0) {				    /* no luck, */
342 	    sd->driveoffset = -1;			    /* don't be confusing */
343 	    free_sd(sd->sdno);
344 	    throw_rude_remark(ENOSPC,			    /* give up */
345 		"No space for %s on %s",
346 		sd->name,
347 		drive->label.name);
348 	}
349 	if (sfe < (drive->freelist_entries - 1))	    /* not the last one, */
350 	    bcopy(&drive->freelist[sfe + 1],
351 		&drive->freelist[sfe],
352 		(drive->freelist_entries - sfe) * sizeof(struct drive_freelist));
353 	drive->freelist_entries--;			    /* one less entry */
354 	drive->sectors_available -= sd->sectors;	    /* and note how much less space we have */
355     } else if (sd->driveoffset < 0) {			    /* no offset specified, find one */
356 	for (fe = 0; fe < drive->freelist_entries; fe++) {
357 	    if (drive->freelist[fe].sectors >= sd->sectors) { /* it'll fit here */
358 		sd->driveoffset = drive->freelist[fe].offset;
359 		if (sd->sectors == drive->freelist[fe].sectors) { /* used up the entire entry */
360 		    if (fe < (drive->freelist_entries - 1)) /* not the last one, */
361 			bcopy(&drive->freelist[fe + 1],
362 			    &drive->freelist[fe],
363 			    (drive->freelist_entries - fe) * sizeof(struct drive_freelist));
364 		    drive->freelist_entries--;		    /* one less entry */
365 		} else {
366 		    drive->freelist[fe].sectors -= sd->sectors;	/* this much less space */
367 		    drive->freelist[fe].offset += sd->sectors; /* this much further on */
368 		}
369 		drive->sectors_available -= sd->sectors;    /* and note how much less space we have */
370 		break;
371 	    }
372 	}
373 	if (sd->driveoffset < 0)
374 	    /*
375 	     * Didn't find anything.  Although the drive has
376 	     * enough space, it's too fragmented
377 	     */
378 	{
379 	    free_sd(sd->sdno);
380 	    throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name);
381 	}
382     } else {						    /* specific offset */
383 	/*
384 	 * For a specific offset to work, the space must be
385 	 * entirely in a single freelist entry.  Look for it.
386 	 */
387 	u_int64_t sdend = sd->driveoffset + sd->sectors;    /* end of our subdisk */
388 	for (fe = 0; fe < drive->freelist_entries; fe++) {
389 	    u_int64_t dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of entry */
390 	    if (dend >= sdend) {			    /* fits before here */
391 		if (drive->freelist[fe].offset > sd->driveoffset) { /* starts after the beginning of sd area */
392 		    sd->driveoffset = -1;		    /* don't be confusing */
393 		    set_sd_state(sd->sdno, sd_down, setstate_force);
394 		    throw_rude_remark(ENOSPC,
395 			"No space for %s on drive %s at offset %lld",
396 			sd->name,
397 			drive->label.name,
398 			sd->driveoffset);
399 		    return;
400 		}
401 		/*
402 		 * We've found the space, and we can allocate it.
403 		 * We don't need to say that to the subdisk, which
404 		 * already knows about it.  We need to tell it to
405 		 * the free list, though.  We have four possibilities:
406 		 *
407 		 * 1.  The subdisk exactly eats up the entry.  That's the
408 		 *     same as above.
409 		 * 2.  The subdisk starts at the beginning and leaves space
410 		 *     at the end.
411 		 * 3.  The subdisk starts after the beginning and leaves
412 		 *     space at the end as well: we end up with another
413 		 *     fragment.
414 		 * 4.  The subdisk leaves space at the beginning and finishes
415 		 *     at the end.
416 		 */
417 		drive->sectors_available -= sd->sectors;    /* note how much less space we have */
418 		if (sd->driveoffset == drive->freelist[fe].offset) { /* 1 or 2 */
419 		    if (sd->sectors == drive->freelist[fe].sectors) { /* 1: used up the entire entry */
420 			if (fe < (drive->freelist_entries - 1))	/* not the last one, */
421 			    bcopy(&drive->freelist[fe + 1],
422 				&drive->freelist[fe],
423 				(drive->freelist_entries - fe) * sizeof(struct drive_freelist));
424 			drive->freelist_entries--;	    /* one less entry */
425 		    } else {				    /* 2: space at the end */
426 			drive->freelist[fe].sectors -= sd->sectors; /* this much less space */
427 			drive->freelist[fe].offset += sd->sectors; /* this much further on */
428 		    }
429 		} else {				    /* 3 or 4 */
430 		    drive->freelist[fe].sectors = sd->driveoffset - drive->freelist[fe].offset;
431 		    if (dend > sdend) {			    /* 3: space at the end as well */
432 			if (fe < (drive->freelist_entries - 1))	/* not the last one */
433 			    bcopy(&drive->freelist[fe],	    /* move the rest down */
434 				&drive->freelist[fe + 1],
435 				(drive->freelist_entries - fe) * sizeof(struct drive_freelist));
436 			drive->freelist_entries++;	    /* one less entry */
437 			drive->freelist[fe + 1].offset = sdend;	/* second entry starts after sd */
438 			drive->freelist[fe + 1].sectors = dend - sdend;	/* and is this long */
439 		    }
440 		}
441 		break;
442 	    }
443 	}
444     }
445     drive->opencount++;					    /* one more subdisk attached */
446 }
447 
448 /* Get an empty drive entry from the drive table */
449 int
450 get_empty_drive(void)
451 {
452     int driveno;
453     struct drive *drive;
454 
455     /* first see if we have one which has been deallocated */
456     for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
457 	if (DRIVE[driveno].state == drive_unallocated)	    /* bingo */
458 	    break;
459     }
460 
461     if (driveno >= vinum_conf.drives_allocated)		    /* we've used all our allocation */
462 	EXPAND(DRIVE, struct drive, vinum_conf.drives_allocated, INITIAL_DRIVES);
463 
464     /* got a drive entry.  Make it pretty */
465     drive = &DRIVE[driveno];
466     bzero(drive, sizeof(struct drive));
467     drive->driveno = driveno;				    /* put number in structure */
468     drive->flags |= VF_NEWBORN;				    /* newly born drive */
469     strcpy(drive->devicename, "unknown");		    /* and make the name ``unknown'' */
470     return driveno;					    /* return the index */
471 }
472 
473 /*
474  * Find the named drive in vinum_conf.drive, return a pointer
475  * return the index in vinum_conf.drive.
476  * Don't mark the drive as allocated (XXX SMP)
477  * If create != 0, create an entry if it doesn't exist
478  */
479 /* XXX check if we have it open from attach */
480 int
481 find_drive(const char *name, int create)
482 {
483     int driveno;
484     struct drive *drive;
485 
486     if (name != NULL) {
487 	for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
488 	    drive = &DRIVE[driveno];			    /* point to drive */
489 	    if ((drive->label.name[0] != '\0')		    /* it has a name */
490 	    &&(strcmp(drive->label.name, name) == 0)	    /* and it's this one */
491 	    &&(drive->state > drive_unallocated))	    /* and it's a real one: found */
492 		return driveno;
493 	}
494     }
495     /* the drive isn't in the list.  Add it if he wants */
496     if (create == 0)					    /* don't want to create */
497 	return -1;					    /* give up */
498 
499     driveno = get_empty_drive();
500     drive = &DRIVE[driveno];
501     if (name != NULL)
502 	ksnprintf(drive->label.name, sizeof(drive->label.name), "%s", name);
503     drive->state = drive_referenced;			    /* in use, nothing worthwhile there */
504     return driveno;					    /* return the index */
505 }
506 
507 /*
508  * Find a drive given its device name.
509  * devname must be valid.
510  * Otherwise the same as find_drive above
511  */
512 int
513 find_drive_by_dev(const char *devname, int create)
514 {
515     int driveno;
516     struct drive *drive;
517 
518     for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
519 	drive = &DRIVE[driveno];
520 	if (strcmp(drive->devicename, devname) == 0 &&
521 	    drive->state > drive_unallocated
522 	) {
523 	    return driveno;
524 	}
525     }
526 
527     if (create == 0)
528 	return -1;
529 
530     driveno = get_empty_drive();
531     drive = &DRIVE[driveno];
532     ksnprintf(drive->devicename, sizeof(drive->devicename), "%s", devname);
533     /* in use, nothing worthwhile there */
534     drive->state = drive_referenced;
535     return driveno;
536 }
537 
538 /* Find an empty subdisk in the subdisk table */
539 int
540 get_empty_sd(void)
541 {
542     int sdno;
543     struct sd *sd;
544 
545     /* first see if we have one which has been deallocated */
546     for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
547 	if (SD[sdno].state == sd_unallocated)		    /* bingo */
548 	    break;
549     }
550     if (sdno >= vinum_conf.subdisks_allocated)
551 	/*
552 	 * We've run out of space.  sdno is pointing
553 	 * where we want it, but at the moment we
554 	 * don't have the space.  Get it.
555 	 */
556 	EXPAND(SD, struct sd, vinum_conf.subdisks_allocated, INITIAL_SUBDISKS);
557 
558     /* initialize some things */
559     sd = &SD[sdno];					    /* point to it */
560     bzero(sd, sizeof(struct sd));			    /* initialize */
561     sd->flags |= VF_NEWBORN;				    /* newly born subdisk */
562     sd->plexno = -1;					    /* no plex */
563     sd->sectors = -1;					    /* no space */
564     sd->driveno = -1;					    /* no drive */
565     sd->plexoffset = -1;				    /* and no offsets */
566     sd->driveoffset = -1;
567     return sdno;					    /* return the index */
568 }
569 
570 /* return a drive to the free pool */
571 void
572 free_drive(struct drive *drive)
573 {
574     if ((drive->state > drive_referenced)		    /* real drive */
575     ||(drive->flags & VF_OPEN)) {			    /* how can it be open without a state? */
576 	LOCKDRIVE(drive);
577 	if (drive->flags & VF_OPEN) {			    /* it's open, */
578 	    close_locked_drive(drive);			    /* close it */
579 	    drive->state = drive_down;			    /* and note the fact */
580 	}
581 	if (drive->freelist)
582 	    Free(drive->freelist);
583 	bzero(drive, sizeof(struct drive));		    /* this also sets drive_unallocated */
584 	unlockdrive(drive);
585     }
586 }
587 
588 /*
589  * Find the named subdisk in vinum_conf.sd.
590  *
591  * If create != 0, create an entry if it doesn't exist
592  *
593  * Return index in vinum_conf.sd
594  */
595 int
596 find_subdisk(const char *name, int create)
597 {
598     int sdno;
599     struct sd *sd;
600 
601     for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
602 	if (strcmp(SD[sdno].name, name) == 0)		    /* found it */
603 	    return sdno;
604     }
605 
606     /* the subdisk isn't in the list.  Add it if he wants */
607     if (create == 0)					    /* don't want to create */
608 	return -1;					    /* give up */
609 
610     /* Allocate one and insert the name */
611     sdno = get_empty_sd();
612     sd = &SD[sdno];
613     ksnprintf(sd->name, sizeof(sd->name), "%s", name);
614     return sdno;					    /* return the pointer */
615 }
616 
617 /* Return space to a drive */
618 void
619 return_drive_space(int driveno, int64_t offset, int length)
620 {
621     struct drive *drive;
622     int fe;						    /* free list entry */
623     u_int64_t sdend;					    /* end of our subdisk */
624     u_int64_t dend;					    /* end of our freelist entry */
625 
626     drive = &DRIVE[driveno];
627     if (drive->state == drive_up) {
628 	sdend = offset + length;			    /* end of our subdisk */
629 
630 	/* Look for where to return the sd address space */
631 	for (fe = 0;
632 	    (fe < drive->freelist_entries) && (drive->freelist[fe].offset < offset);
633 	    fe++);
634 	/*
635 	 * Now we are pointing to the last entry, the first
636 	 * with a higher offset than the subdisk, or both.
637 	 */
638 	if ((fe > 1)					    /* not the first entry */
639 	&&((fe == drive->freelist_entries)		    /* gone past the end */
640 	||(drive->freelist[fe].offset > offset)))	    /* or past the block were looking for */
641 	    fe--;					    /* point to the block before */
642 	dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of the entry */
643 
644 	/*
645 	 * At this point, we are pointing to the correct
646 	 * place in the free list.  A number of possibilities
647 	 * exist:
648 	 *
649 	 * 1.  The block to be freed starts at the end of the
650 	 *     block to which we are pointing.  This has two
651 	 *     subcases:
652 	 *
653 	 * a.  The block to be freed ends at the beginning
654 	 *     of the following block.  Merge the three
655 	 *     areas into a single block.
656 	 *
657 	 * b.  The block is shorter than the space between
658 	 *     the current block and the next one.  Enlarge
659 	 *     the current block.
660 	 *
661 	 * 2.  The block to be freed starts after the end
662 	 *     of the block.  Again, we have two cases:
663 	 *
664 	 * a.  It ends before the start of the following block.
665 	 *     Create a new free block.
666 	 *
667 	 * b.  It ends at the start of the following block.
668 	 *     Enlarge the following block downwards.
669 	 *
670 	 * When there is only one free space block, and the
671 	 * space to be returned is before it, the pointer is
672 	 * to a non-existent zeroth block. XXX check this
673 	 */
674 	if (offset == dend) {				    /* Case 1: it starts at the end of this block */
675 	    if ((fe < drive->freelist_entries - 1)	    /* we're not the last block in the free list */
676 	    /* and the subdisk ends at the start of the next block */
677 	    &&(sdend == drive->freelist[fe + 1].offset)) {
678 		drive->freelist[fe].sectors		    /* 1a: merge all three blocks */
679 		    = drive->freelist[fe + 1].sectors;
680 		if (fe < drive->freelist_entries - 2)	    /* still more blocks after next */
681 		    bcopy(&drive->freelist[fe + 2],	    /* move down one */
682 			&drive->freelist[fe + 1],
683 			(drive->freelist_entries - 2 - fe)
684 			* sizeof(struct drive_freelist));
685 		drive->freelist_entries--;		    /* one less entry in the free list */
686 	    } else					    /* 1b: just enlarge this block */
687 		drive->freelist[fe].sectors += length;
688 	} else {					    /* Case 2 */
689 	    if (offset > dend)				    /* it starts after this block */
690 		fe++;					    /* so look at the next block */
691 	    if ((fe < drive->freelist_entries)		    /* we're not the last block in the free list */
692 	    /* and the subdisk ends at the start of this block: case 4 */
693 	    &&(sdend == drive->freelist[fe].offset)) {
694 		drive->freelist[fe].offset = offset;	    /* it starts where the sd was */
695 		drive->freelist[fe].sectors += length;	    /* and it's this much bigger */
696 	    } else {					    /* case 3: non-contiguous */
697 		if (fe < drive->freelist_entries)	    /* not after the last block, */
698 		    bcopy(&drive->freelist[fe],		    /* move the rest up one entry */
699 			&drive->freelist[fe + 1],
700 			(drive->freelist_entries - fe)
701 			* sizeof(struct drive_freelist));
702 		drive->freelist_entries++;		    /* one less entry */
703 		drive->freelist[fe].offset = offset;	    /* this entry represents the sd */
704 		drive->freelist[fe].sectors = length;
705 	    }
706 	}
707 	drive->sectors_available += length;		    /* the sectors are now available */
708     }
709 }
710 
711 /*
712  * Free an allocated sd entry.
713  * This performs memory management only.  remove()
714  * is responsible for checking relationships.
715  */
716 void
717 free_sd(int sdno)
718 {
719     struct sd *sd;
720 
721     sd = &SD[sdno];
722     if ((sd->driveno >= 0)				    /* we have a drive, */
723     &&(sd->sectors > 0))				    /* and some space on it */
724 	return_drive_space(sd->driveno,			    /* return the space */
725 	    sd->driveoffset,
726 	    sd->sectors);
727     if (sd->plexno >= 0)
728 	PLEX[sd->plexno].subdisks--;			    /* one less subdisk */
729     sd->state = sd_unallocated;
730     made_sd(sd);
731     bzero(sd, sizeof(struct sd));			    /* and clear it out */
732     sd->state = sd_unallocated;
733     vinum_conf.subdisks_used--;				    /* one less sd */
734 }
735 
736 static void
737 made_sd(struct sd *sd)
738 {
739     if (sd->sd_dev == NULL && sd->state != sd_unallocated) {
740 	sd->sd_dev = make_dev(&vinum_ops, VINUM_SD(sd->sdno),
741 			      UID_ROOT, GID_OPERATOR, 0640,
742 			      VINUM_BASE "sd/%s", sd->name);
743 #if 0
744 	if (sd->plexno >= 0 && PLEX[sd->plexno].volno >= 0) {
745 		make_dev_alias(sd->sd_dev, "vol/%s.plex/%s",
746 				VOL[PLEX[sd->plexno].volno].name,
747 				plex->name, VOL[plex->volno].name);
748 	}
749 #endif
750     }
751     if (sd->sd_dev && sd->state == sd_unallocated) {
752 	destroy_dev(sd->sd_dev);
753 	sd->sd_dev = NULL;
754     }
755 }
756 
757 static void
758 made_vol(struct volume *vol)
759 {
760     if (vol->vol_dev == NULL && vol->state != volume_unallocated) {
761 	vol->vol_dev = make_dev(&vinum_ops,
762 				VINUMDEV(vol->volno, 0, 0, VINUM_VOLUME_TYPE),
763 				UID_ROOT, GID_OPERATOR, 0640,
764 				VINUM_BASE "vol/%s", vol->name);
765     }
766     if (vol->vol_dev && vol->state == volume_unallocated) {
767 	destroy_dev(vol->vol_dev);
768 	vol->vol_dev = NULL;
769     }
770 }
771 
772 static void
773 made_plex(struct plex *plex)
774 {
775     if (plex->plex_dev == NULL && plex->state != plex_unallocated) {
776 	plex->plex_dev = make_dev(&vinum_ops, VINUM_PLEX(plex->plexno),
777 				UID_ROOT, GID_OPERATOR, 0640,
778 				VINUM_BASE "plex/%s", plex->name);
779 	if (plex->volno >= 0) {
780 		make_dev_alias(plex->plex_dev, "vol/%s.plex/%s",
781 				plex->name, VOL[plex->volno].name);
782 	}
783     }
784     if (plex->plex_dev && plex->state == plex_unallocated) {
785 	destroy_dev(plex->plex_dev);
786 	plex->plex_dev = NULL;
787     }
788 }
789 
790 /* Find an empty plex in the plex table */
791 int
792 get_empty_plex(void)
793 {
794     int plexno;
795     struct plex *plex;					    /* if we allocate one */
796 
797     /* first see if we have one which has been deallocated */
798     for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) {
799 	if (PLEX[plexno].state == plex_unallocated)	    /* bingo */
800 	    break;					    /* and get out of here */
801     }
802 
803     if (plexno >= vinum_conf.plexes_allocated)
804 	EXPAND(PLEX, struct plex, vinum_conf.plexes_allocated, INITIAL_PLEXES);
805 
806     /* Found a plex.  Give it an sd structure */
807     plex = &PLEX[plexno];				    /* this one is ours */
808     bzero(plex, sizeof(struct plex));			    /* polish it up */
809     plex->sdnos = (int *) Malloc(sizeof(int) * INITIAL_SUBDISKS_IN_PLEX); /* allocate sd table */
810     CHECKALLOC(plex->sdnos, "vinum: Can't allocate plex subdisk table");
811     bzero(plex->sdnos, (sizeof(int) * INITIAL_SUBDISKS_IN_PLEX)); /* do we need this? */
812     plex->flags |= VF_NEWBORN;				    /* newly born plex */
813     plex->subdisks = 0;					    /* no subdisks in use */
814     plex->subdisks_allocated = INITIAL_SUBDISKS_IN_PLEX;    /* and we have space for this many */
815     plex->organization = plex_disorg;			    /* and it's not organized */
816     plex->volno = -1;					    /* no volume yet */
817     return plexno;					    /* return the index */
818 }
819 
820 /*
821  * Find the named plex in vinum_conf.plex
822  *
823  * If create != 0, create an entry if it doesn't exist
824  * return index in vinum_conf.plex
825  */
826 int
827 find_plex(const char *name, int create)
828 {
829     int plexno;
830     struct plex *plex;
831 
832     for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) {
833 	if (strcmp(PLEX[plexno].name, name) == 0)	    /* found it */
834 	    return plexno;
835     }
836 
837     /* the plex isn't in the list.  Add it if he wants */
838     if (create == 0)					    /* don't want to create */
839 	return -1;					    /* give up */
840 
841     /* Allocate one and insert the name */
842     plexno = get_empty_plex();
843     plex = &PLEX[plexno];				    /* point to it */
844     ksnprintf(plex->name, sizeof(plex->name), "%s", name);
845     return plexno;					    /* return the pointer */
846 }
847 
848 /*
849  * Free an allocated plex entry
850  * and its associated memory areas
851  */
852 void
853 free_plex(int plexno)
854 {
855     struct plex *plex;
856 
857     plex = &PLEX[plexno];
858     if (plex->sdnos)
859 	Free(plex->sdnos);
860     if (plex->lock)
861 	Free(plex->lock);
862     plex->state = plex_unallocated;
863     made_plex(plex);
864     bzero(plex, sizeof(struct plex));			    /* and clear it out */
865     plex->state = plex_unallocated;
866 }
867 
868 /* Find an empty volume in the volume table */
869 int
870 get_empty_volume(void)
871 {
872     int volno;
873     struct volume *vol;
874     int i;
875 
876     /* first see if we have one which has been deallocated */
877     for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
878 	if (VOL[volno].state == volume_unallocated)	    /* bingo */
879 	    break;
880     }
881 
882     if (volno >= vinum_conf.volumes_allocated)
883 	EXPAND(VOL, struct volume, vinum_conf.volumes_allocated, INITIAL_VOLUMES);
884 
885     /* Now initialize fields */
886     vol = &VOL[volno];
887     bzero(vol, sizeof(struct volume));
888     vol->flags |= VF_NEWBORN | VF_CREATED;		    /* newly born volume */
889     vol->preferred_plex = ROUND_ROBIN_READPOL;		    /* round robin */
890     for (i = 0; i < MAXPLEX; i++)			    /* mark the plexes missing */
891 	vol->plex[i] = -1;
892     return volno;					    /* return the index */
893 }
894 
895 /*
896  * Find the named volume in vinum_conf.volume.
897  *
898  * If create != 0, create an entry if it doesn't exist
899  * return the index in vinum_conf
900  */
901 int
902 find_volume(const char *name, int create)
903 {
904     int volno;
905     struct volume *vol;
906 
907     for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
908 	if (strcmp(VOL[volno].name, name) == 0)		    /* found it */
909 	    return volno;
910     }
911 
912     /* the volume isn't in the list.  Add it if he wants */
913     if (create == 0)					    /* don't want to create */
914 	return -1;					    /* give up */
915 
916     /* Allocate one and insert the name */
917     volno = get_empty_volume();
918     vol = &VOL[volno];
919     ksnprintf(vol->name, sizeof(vol->name), "%s", name);
920     vol->blocksize = DEV_BSIZE;				    /* block size of this volume */
921     return volno;					    /* return the pointer */
922 }
923 
924 /*
925  * Free an allocated volume entry
926  * and its associated memory areas
927  */
928 void
929 free_volume(int volno)
930 {
931     struct volume *vol;
932 
933     vol = &VOL[volno];
934     vol->state = volume_unallocated;
935     made_vol(vol);
936     bzero(vol, sizeof(struct volume));			    /* and clear it out */
937     vol->state = volume_unallocated;
938 }
939 
940 /*
941  * Handle a drive definition.  We store the information in the global variable
942  * drive, so we don't need to allocate.
943  *
944  * If we find an error, print a message and return
945  */
946 void
947 config_drive(int update)
948 {
949     enum drive_label_info partition_status;		    /* info about the partition */
950     int parameter;
951     int driveno;					    /* index of drive in vinum_conf */
952     struct drive *drive;				    /* and pointer to it */
953     int otherdriveno;					    /* index of possible second drive */
954     int sdno;
955 
956     if (tokens < 2)					    /* not enough tokens */
957 	throw_rude_remark(EINVAL, "Drive has no name\n");
958     driveno = find_drive(token[1], 1);			    /* allocate a drive to initialize */
959     drive = &DRIVE[driveno];				    /* and get a pointer */
960     if (update && ((drive->flags & VF_NEWBORN) == 0))	    /* this drive exists already */
961 	return;						    /* don't do anything */
962     drive->flags &= ~VF_NEWBORN;			    /* no longer newly born */
963 
964     if (drive->state != drive_referenced) {		    /* we already know this drive */
965 	/*
966 	 * XXX Check which definition is more up-to-date.  Give
967 	 * preference for the definition on its own drive.
968 	 */
969 	return;						    /* XXX */
970     }
971     for (parameter = 2; parameter < tokens; parameter++) {  /* look at the other tokens */
972 	switch (get_keyword(token[parameter], &keyword_set)) {
973 	case kw_device:
974 	    parameter++;
975 	    otherdriveno = find_drive_by_dev(token[parameter], 0); /* see if it exists already */
976 	    if (otherdriveno >= 0) {			    /* yup, */
977 		drive->state = drive_unallocated;	    /* deallocate the drive */
978 		throw_rude_remark(EEXIST,		    /* and complain */
979 		    "Drive %s would have same device as drive %s",
980 		    token[1],
981 		    DRIVE[otherdriveno].label.name);
982 	    }
983 	    if (drive->devicename[0] == '/') {		    /* we know this drive... */
984 		if (strcmp(drive->devicename, token[parameter])) /* different name */
985 		    close_drive(drive);			    /* close it if it's open */
986 		else					    /* no change */
987 		    break;
988 	    }
989 
990 	    /*
991 	     * open the device and get the configuration
992 	     */
993 	    ksnprintf(drive->devicename, sizeof(drive->devicename),
994 		      "%s", token[parameter]);
995 	    partition_status = read_drive_label(drive, 1);
996 
997 	    switch (partition_status) {
998 	    case DL_CANT_OPEN:				    /* not our kind */
999 		close_drive(drive);
1000 		if (drive->lasterror == EFTYPE)		    /* wrong kind of partition */
1001 		    throw_rude_remark(drive->lasterror,
1002 			"Drive %s has invalid partition type",
1003 			drive->label.name);
1004 		else					    /* I/O error of some kind */
1005 		    throw_rude_remark(drive->lasterror,
1006 			"Can't initialize drive %s",
1007 			drive->label.name);
1008 		break;
1009 
1010 	    case DL_WRONG_DRIVE:			    /* valid drive, not the name we expected */
1011 		if (vinum_conf.flags & VF_FORCECONFIG) {    /* but we'll accept that */
1012 		    bcopy(token[1], drive->label.name, sizeof(drive->label.name));
1013 		    break;
1014 		}
1015 		close_drive(drive);
1016 		/*
1017 		 * There's a potential race condition here:
1018 		 * the rude remark refers to a field in an
1019 		 * unallocated drive, which potentially could
1020 		 * be reused.  This works because we're the only
1021 		 * thread accessing the config at the moment.
1022 		 */
1023 		drive->state = drive_unallocated;	    /* throw it away completely */
1024 		throw_rude_remark(drive->lasterror,
1025 		    "Incorrect drive name %s specified for drive %s",
1026 		    token[1],
1027 		    drive->label.name);
1028 		break;
1029 
1030 	    case DL_DELETED_LABEL:			    /* it was a drive, but we deleted it */
1031 	    case DL_NOT_OURS:				    /* nothing to do with the rest */
1032 	    case DL_OURS:
1033 		break;
1034 	    }
1035 	    /*
1036 	     * read_drive_label overwrites the device name.
1037 	     * If we get here, we can have the drive,
1038 	     * so put it back again
1039 	     */
1040 	    ksnprintf(drive->devicename, sizeof(drive->devicename),
1041 		      "%s", token[parameter]);
1042 	    break;
1043 
1044 	case kw_state:
1045 	    parameter++;				    /* skip the keyword */
1046 	    if (vinum_conf.flags & VF_READING_CONFIG)
1047 		drive->state = DriveState(token[parameter]); /* set the state */
1048 	    break;
1049 
1050 	case kw_hotspare:				    /* this drive is a hot spare */
1051 	    drive->flags |= VF_HOTSPARE;
1052 	    break;
1053 
1054 	default:
1055 	    close_drive(drive);
1056 	    throw_rude_remark(EINVAL,
1057 		"Drive %s, invalid keyword: %s",
1058 		token[1],
1059 		token[parameter]);
1060 	}
1061     }
1062 
1063     if (drive->devicename[0] != '/') {
1064 	drive->state = drive_unallocated;		    /* deallocate the drive */
1065 	throw_rude_remark(EINVAL, "No device name for %s", drive->label.name);
1066     }
1067     vinum_conf.drives_used++;				    /* passed all hurdles: one more in use */
1068     /*
1069      * If we're replacing a drive, it could be that
1070      * we already have subdisks referencing this
1071      * drive.  Note where they should be and change
1072      * their state to obsolete.
1073      */
1074     for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
1075 	if ((SD[sdno].state > sd_referenced)
1076 	    && (SD[sdno].driveno == driveno)) {
1077 	    give_sd_to_drive(sdno);
1078 	    if (SD[sdno].state > sd_stale)
1079 		SD[sdno].state = sd_stale;
1080 	}
1081     }
1082 }
1083 
1084 /*
1085  * Handle a subdisk definition.  We store the information in the global variable
1086  * sd, so we don't need to allocate.
1087  *
1088  * If we find an error, print a message and return
1089  */
1090 void
1091 config_subdisk(int update)
1092 {
1093     int parameter;
1094     int sdno;						    /* index of sd in vinum_conf */
1095     struct sd *sd;					    /* and pointer to it */
1096     u_int64_t size;
1097     int detached = 0;					    /* set to 1 if this is a detached subdisk */
1098     int sdindex = -1;					    /* index in plexes subdisk table */
1099     enum sdstate state = sd_unallocated;		    /* state to set, if specified */
1100     int autosize = 0;					    /* set if we autosize in give_sd_to_drive */
1101     int namedsdno;					    /* index of another with this name */
1102     char partition = 0;					    /* partition of external subdisk */
1103 
1104     sdno = get_empty_sd();				    /* allocate an SD to initialize */
1105     sd = &SD[sdno];					    /* and get a pointer */
1106 
1107     for (parameter = 1; parameter < tokens; parameter++) {  /* look at the other tokens */
1108 	switch (get_keyword(token[parameter], &keyword_set)) {
1109 	    /*
1110 	     * If we have a 'name' parameter, it must
1111 	     * come first, because we're too lazy to tidy
1112 	     * up dangling refs if it comes later.
1113 	     */
1114 	case kw_name:
1115 	    namedsdno = find_subdisk(token[++parameter], 0); /* find an existing sd with this name */
1116 	    if (namedsdno >= 0) {			    /* got one */
1117 		if (SD[namedsdno].state == sd_referenced) { /* we've been told about this one */
1118 		    if (parameter > 2)
1119 			throw_rude_remark(EINVAL,
1120 			    "sd %s: name parameter must come first\n", /* no go */
1121 			    token[parameter]);
1122 		    else {
1123 			int i;
1124 			struct plex *plex;		    /* for tidying up dangling references */
1125 
1126 			*sd = SD[namedsdno];		    /* copy from the referenced one */
1127 			sd->sd_dev = NULL;
1128 			made_sd(sd);
1129 			SD[namedsdno].state = sd_unallocated; /* and deallocate the referenced one */
1130 			made_sd(&SD[namedsdno]);
1131 			plex = &PLEX[sd->plexno];	    /* now take a look at our plex */
1132 			for (i = 0; i < plex->subdisks; i++) { /* look for the pointer */
1133 			    if (plex->sdnos[i] == namedsdno) /* pointing to the old subdisk */
1134 				plex->sdnos[i] = sdno;	    /* bend it to point here */
1135 			}
1136 		    }
1137 		}
1138 		if (update)				    /* are we updating? */
1139 		    return;				    /* that's OK, nothing more to do */
1140 		else
1141 		    throw_rude_remark(EINVAL, "Duplicate subdisk %s", token[parameter]);
1142 	    } else {
1143 		    ksnprintf(sd->name, sizeof(sd->name),
1144 			      "%s", token[parameter]);
1145 	    }
1146 	    break;
1147 
1148 	case kw_detached:
1149 	    detached = 1;
1150 	    break;
1151 
1152 	case kw_plexoffset:
1153 	    size = sizespec(token[++parameter]);
1154 	    if ((size == -1)				    /* unallocated */
1155 	    &&(vinum_conf.flags & VF_READING_CONFIG))	    /* reading from disk */
1156 		break;					    /* invalid sd; just ignore it */
1157 	    if ((size % DEV_BSIZE) != 0)
1158 		throw_rude_remark(EINVAL,
1159 		    "sd %s, bad plex offset alignment: %lld",
1160 		    sd->name,
1161 		    (long long) size);
1162 	    else
1163 		sd->plexoffset = size / DEV_BSIZE;
1164 	    break;
1165 
1166 	case kw_driveoffset:
1167 	    size = sizespec(token[++parameter]);
1168 	    if ((size == -1)				    /* unallocated */
1169 	    &&(vinum_conf.flags & VF_READING_CONFIG))	    /* reading from disk */
1170 		break;					    /* invalid sd; just ignore it */
1171 	    if ((size % DEV_BSIZE) != 0)
1172 		throw_rude_remark(EINVAL,
1173 		    "sd %s, bad drive offset alignment: %lld",
1174 		    sd->name,
1175 		    (long long) size);
1176 	    else
1177 		sd->driveoffset = size / DEV_BSIZE;
1178 	    break;
1179 
1180 	case kw_len:
1181 	    if (get_keyword(token[++parameter], &keyword_set) == kw_max) /* select maximum size from drive */
1182 		size = 0;				    /* this is how we say it :-) */
1183 	    else
1184 		size = sizespec(token[parameter]);
1185 	    if ((size % DEV_BSIZE) != 0)
1186 		throw_rude_remark(EINVAL, "sd %s, length %d not multiple of sector size", sd->name, size);
1187 	    else
1188 		sd->sectors = size / DEV_BSIZE;
1189 	    /*
1190 	     * We have a problem with autosizing: we need to
1191 	     * give the drive to the plex before we give it
1192 	     * to the drive, in order to be clean if we give
1193 	     * up in the middle, but at this time the size hasn't
1194 	     * been set.  Note that we have to fix up after
1195 	     * giving the subdisk to the drive.
1196 	     */
1197 	    if (size == 0)
1198 		autosize = 1;				    /* note that we're autosizing */
1199 	    break;
1200 
1201 	case kw_drive:
1202 	    sd->driveno = find_drive(token[++parameter], 1); /* insert drive information */
1203 	    break;
1204 
1205 	case kw_plex:
1206 	    sd->plexno = find_plex(token[++parameter], 1);  /* insert plex information */
1207 	    break;
1208 
1209 	    /*
1210 	     * Set the state.  We can't do this directly,
1211 	     * because give_sd_to_plex may change it
1212 	     */
1213 	case kw_state:
1214 	    parameter++;				    /* skip the keyword */
1215 	    if (vinum_conf.flags & VF_READING_CONFIG)
1216 		state = SdState(token[parameter]);	    /* set the state */
1217 	    break;
1218 
1219 	case kw_partition:
1220 	    parameter++;				    /* skip the keyword */
1221 	    if ((strlen(token[parameter]) != 1)
1222 		|| (token[parameter][0] < 'a')
1223 		|| (token[parameter][0] > 'p'))
1224 		throw_rude_remark(EINVAL,
1225 		    "%s: invalid partition %c",
1226 		    sd->name,
1227 		    token[parameter][0]);
1228 	    else
1229 		partition = token[parameter][0];
1230 	    break;
1231 
1232 	case kw_retryerrors:
1233 	    sd->flags |= VF_RETRYERRORS;
1234 	    break;
1235 
1236 	default:
1237 	    throw_rude_remark(EINVAL, "%s: invalid keyword: %s", sd->name, token[parameter]);
1238 	}
1239     }
1240 
1241     /* Check we have a drive name */
1242     if (sd->driveno < 0) {				    /* didn't specify a drive */
1243 	sd->driveno = current_drive;			    /* set to the current drive */
1244 	if (sd->driveno < 0)				    /* no current drive? */
1245 	    throw_rude_remark(EINVAL, "Subdisk %s is not associated with a drive", sd->name);
1246     }
1247     /*
1248      * This is tacky.  If something goes wrong
1249      * with the checks, we may end up losing drive
1250      * space.  FIXME.
1251      */
1252     if (autosize != 0)					    /* need to find a size, */
1253 	give_sd_to_drive(sdno);				    /* do it before the plex */
1254 
1255     /*  Check for a plex name */
1256     if ((sd->plexno < 0)				    /* didn't specify a plex */
1257     &&(!detached))					    /* and didn't say not to, */
1258 	sd->plexno = current_plex;			    /* set to the current plex */
1259 
1260     if (sd->plexno >= 0)
1261 	sdindex = give_sd_to_plex(sd->plexno, sdno);	    /* now tell the plex that it has this sd */
1262 
1263     sd->sdno = sdno;					    /* point to our entry in the table */
1264 
1265     /* Does the subdisk have a name?  If not, give it one */
1266     if (sd->name[0] == '\0') {				    /* no name */
1267 	char sdsuffix[8];				    /* form sd name suffix here */
1268 
1269 	/* Do we have a plex name? */
1270 	if (sdindex >= 0)				    /* we have a plex */
1271 	    strcpy(sd->name, PLEX[sd->plexno].name);	    /* take it from there */
1272 	else						    /* no way */
1273 	    throw_rude_remark(EINVAL, "Unnamed sd is not associated with a plex");
1274 	ksprintf(sdsuffix, ".s%d", sdindex);		    /* form the suffix */
1275 	strcat(sd->name, sdsuffix);			    /* and add it to the name */
1276     }
1277     /* do we have complete info for this subdisk? */
1278     if (sd->sectors < 0)
1279 	throw_rude_remark(EINVAL, "sd %s has no length spec", sd->name);
1280 
1281     if (state != sd_unallocated) {			    /* we had a specific state to set */
1282 	sd->state = state;				    /* do it now */
1283 	made_sd(sd);
1284     } else if (sd->state == sd_unallocated) {		    /* no, nothing set yet, */
1285 	sd->state = sd_empty;				    /* must be empty */
1286 	made_sd(sd);
1287     }
1288     if (autosize == 0)					    /* no autoconfig, do the drive now */
1289 	give_sd_to_drive(sdno);
1290     vinum_conf.subdisks_used++;				    /* one more in use */
1291 }
1292 
1293 /*
1294  * Handle a plex definition.
1295  */
1296 void
1297 config_plex(int update)
1298 {
1299     int parameter;
1300     int plexno;						    /* index of plex in vinum_conf */
1301     struct plex *plex;					    /* and pointer to it */
1302     int pindex = MAXPLEX;				    /* index in volume's plex list */
1303     int detached = 0;					    /* don't give it to a volume */
1304     int namedplexno;
1305     enum plexstate state = plex_init;			    /* state to set at end */
1306 
1307     current_plex = -1;					    /* forget the previous plex */
1308     plexno = get_empty_plex();				    /* allocate a plex */
1309     plex = &PLEX[plexno];				    /* and point to it */
1310     plex->plexno = plexno;				    /* and back to the config */
1311 
1312     for (parameter = 1; parameter < tokens; parameter++) {  /* look at the other tokens */
1313 	switch (get_keyword(token[parameter], &keyword_set)) {
1314 	    /*
1315 	     * If we have a 'name' parameter, it must
1316 	     * come first, because we're too lazy to tidy
1317 	     * up dangling refs if it comes later.
1318 	     */
1319 	case kw_name:
1320 	    namedplexno = find_plex(token[++parameter], 0); /* find an existing plex with this name */
1321 	    if (namedplexno >= 0) {			    /* plex exists already, */
1322 		if (PLEX[namedplexno].state == plex_referenced) { /* we've been told about this one */
1323 		    if (parameter > 2)			    /* we've done other things first, */
1324 			throw_rude_remark(EINVAL,
1325 			    "plex %s: name parameter must come first\n", /* no go */
1326 			    token[parameter]);
1327 		    else {
1328 			int i;
1329 			struct volume *vol;		    /* for tidying up dangling references */
1330 
1331 			*plex = PLEX[namedplexno];	    /* get the info */
1332 			plex->plex_dev = NULL;
1333 			made_plex(plex);
1334 			PLEX[namedplexno].state = plex_unallocated; /* and deallocate the other one */
1335 			made_plex(&PLEX[namedplexno]);
1336 			vol = &VOL[plex->volno];	    /* point to the volume */
1337 			for (i = 0; i < MAXPLEX; i++) {	    /* for each plex */
1338 			    if (vol->plex[i] == namedplexno)
1339 				vol->plex[i] = plexno;	    /* bend the pointer */
1340 			}
1341 		    }
1342 		    break;				    /* use this one */
1343 		}
1344 		if (update)				    /* are we updating? */
1345 		    return;				    /* yes: that's OK, just return */
1346 		else
1347 		    throw_rude_remark(EINVAL, "Duplicate plex %s", token[parameter]);
1348 	    } else {
1349 		    ksnprintf(plex->name, sizeof(plex->name),
1350 			      "%s", token[parameter]);
1351 	    }
1352 	    break;
1353 
1354 	case kw_detached:
1355 	    detached = 1;
1356 	    break;
1357 
1358 	case kw_org:					    /* plex organization */
1359 	    switch (get_keyword(token[++parameter], &keyword_set)) {
1360 	    case kw_concat:
1361 		plex->organization = plex_concat;
1362 		break;
1363 
1364 	    case kw_striped:
1365 		{
1366 		    int stripesize = sizespec(token[++parameter]);
1367 
1368 		    plex->organization = plex_striped;
1369 		    if (stripesize % DEV_BSIZE != 0)	    /* not a multiple of block size, */
1370 			throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size",
1371 			    plex->name,
1372 			    stripesize);
1373 		    else
1374 			plex->stripesize = stripesize / DEV_BSIZE;
1375 		    break;
1376 		}
1377 
1378 	    case kw_raid4:
1379 		{
1380 		    int stripesize = sizespec(token[++parameter]);
1381 
1382 		    plex->organization = plex_raid4;
1383 		    if (stripesize % DEV_BSIZE != 0)	    /* not a multiple of block size, */
1384 			throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size",
1385 			    plex->name,
1386 			    stripesize);
1387 		    else
1388 			plex->stripesize = stripesize / DEV_BSIZE;
1389 		    break;
1390 		}
1391 
1392 	    case kw_raid5:
1393 		{
1394 		    int stripesize = sizespec(token[++parameter]);
1395 
1396 		    plex->organization = plex_raid5;
1397 		    if (stripesize % DEV_BSIZE != 0)	    /* not a multiple of block size, */
1398 			throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size",
1399 			    plex->name,
1400 			    stripesize);
1401 		    else
1402 			plex->stripesize = stripesize / DEV_BSIZE;
1403 		    break;
1404 		}
1405 
1406 	    default:
1407 		throw_rude_remark(EINVAL, "Invalid plex organization");
1408 	    }
1409 	    if (isstriped(plex)
1410 		&& (plex->stripesize == 0))		    /* didn't specify a valid stripe size */
1411 		throw_rude_remark(EINVAL, "Need a stripe size parameter");
1412 	    break;
1413 
1414 	case kw_volume:
1415 	    plex->volno = find_volume(token[++parameter], 1); /* insert a pointer to the volume */
1416 	    break;
1417 
1418 	case kw_sd:					    /* add a subdisk */
1419 	    {
1420 		int sdno;
1421 
1422 		sdno = find_subdisk(token[++parameter], 1); /* find a subdisk */
1423 		SD[sdno].plexoffset = sizespec(token[++parameter]); /* get the offset */
1424 		give_sd_to_plex(plexno, sdno);		    /* and insert it there */
1425 		break;
1426 	    }
1427 
1428 	case kw_state:
1429 	    parameter++;				    /* skip the keyword */
1430 	    if (vinum_conf.flags & VF_READING_CONFIG)
1431 		state = PlexState(token[parameter]);	    /* set the state */
1432 	    break;
1433 
1434 	default:
1435 	    throw_rude_remark(EINVAL, "plex %s, invalid keyword: %s",
1436 		plex->name,
1437 		token[parameter]);
1438 	}
1439     }
1440 
1441     if (plex->organization == plex_disorg)
1442 	throw_rude_remark(EINVAL, "No plex organization specified");
1443 
1444     if ((plex->volno < 0)				    /* we don't have a volume */
1445     &&(!detached))					    /* and we wouldn't object */
1446 	plex->volno = current_volume;
1447 
1448     if (plex->volno >= 0)
1449 	pindex = give_plex_to_volume(plex->volno, plexno);  /* Now tell the volume that it has this plex */
1450 
1451     /* Does the plex have a name?  If not, give it one */
1452     if (plex->name[0] == '\0') {			    /* no name */
1453 	char plexsuffix[8];				    /* form plex name suffix here */
1454 	/* Do we have a volume name? */
1455 	if (plex->volno >= 0)				    /* we have a volume */
1456 	    strcpy(plex->name,				    /* take it from there */
1457 		VOL[plex->volno].name);
1458 	else						    /* no way */
1459 	    throw_rude_remark(EINVAL, "Unnamed plex is not associated with a volume");
1460 	ksprintf(plexsuffix, ".p%d", pindex);		    /* form the suffix */
1461 	strcat(plex->name, plexsuffix);			    /* and add it to the name */
1462     }
1463     if (isstriped(plex)) {
1464 	plex->lock = (struct rangelock *)
1465 	    Malloc(PLEX_LOCKS * sizeof(struct rangelock));
1466 	CHECKALLOC(plex->lock, "vinum: Can't allocate lock table\n");
1467 	bzero((char *) plex->lock, PLEX_LOCKS * sizeof(struct rangelock));
1468     }
1469     /* Note the last plex we configured */
1470     current_plex = plexno;
1471     plex->state = state;				    /* set whatever state we chose */
1472     made_plex(plex);
1473     vinum_conf.plexes_used++;				    /* one more in use */
1474 }
1475 
1476 /*
1477  * Handle a volume definition.
1478  * If we find an error, print a message, deallocate the nascent volume, and return
1479  */
1480 void
1481 config_volume(int update)
1482 {
1483     int parameter;
1484     int volno;
1485     struct volume *vol;					    /* collect volume info here */
1486     int i;
1487 
1488     if (tokens < 2)					    /* not enough tokens */
1489 	throw_rude_remark(EINVAL, "Volume has no name");
1490     current_volume = -1;				    /* forget the previous volume */
1491     volno = find_volume(token[1], 1);			    /* allocate a volume to initialize */
1492     vol = &VOL[volno];					    /* and get a pointer */
1493     if (update && ((vol->flags & VF_CREATED) == 0))	    /* this volume exists already */
1494 	return;						    /* don't do anything */
1495     vol->flags &= ~VF_CREATED;				    /* it exists now */
1496 
1497     for (parameter = 2; parameter < tokens; parameter++) {  /* look at all tokens */
1498 	switch (get_keyword(token[parameter], &keyword_set)) {
1499 	case kw_plex:
1500 	    {
1501 		int plexno;				    /* index of this plex */
1502 		int myplexno;				    /* and index if it's already ours */
1503 
1504 		plexno = find_plex(token[++parameter], 1);  /* find a plex */
1505 		if (plexno < 0)				    /* couldn't */
1506 		    break;				    /* we've already had an error message */
1507 		myplexno = my_plex(volno, plexno);	    /* does it already belong to us? */
1508 		if (myplexno > 0)			    /* yes, shouldn't get it again */
1509 		    throw_rude_remark(EINVAL,
1510 			"Plex %s already belongs to volume %s",
1511 			token[parameter],
1512 			vol->name);
1513 		else if (vol->plexes + 1 > 8)		    /* another entry */
1514 		    throw_rude_remark(EINVAL,
1515 			"Too many plexes for volume %s",
1516 			vol->name);
1517 		vol->plex[vol->plexes] = plexno;
1518 		vol->plexes++;
1519 		PLEX[plexno].state = plex_referenced;	    /* we know something about it */
1520 		PLEX[plexno].volno = volno;		    /* and this volume references it */
1521 	    }
1522 	    break;
1523 
1524 	case kw_readpol:
1525 	    switch (get_keyword(token[++parameter], &keyword_set)) { /* decide what to do */
1526 	    case kw_round:
1527 		vol->preferred_plex = ROUND_ROBIN_READPOL;  /* default */
1528 		break;
1529 
1530 	    case kw_prefer:
1531 		{
1532 		    int myplexno;			    /* index of this plex */
1533 
1534 		    myplexno = find_plex(token[++parameter], 1); /* find a plex */
1535 		    if (myplexno < 0)			    /* couldn't */
1536 			break;				    /* we've already had an error message */
1537 		    myplexno = my_plex(volno, myplexno);    /* does it already belong to us? */
1538 		    if (myplexno > 0)			    /* yes */
1539 			vol->preferred_plex = myplexno;	    /* just note the index */
1540 		    else if (++vol->plexes > 8)		    /* another entry */
1541 			throw_rude_remark(EINVAL, "Too many plexes");
1542 		    else {				    /* space for the new plex */
1543 			vol->plex[vol->plexes - 1] = myplexno; /* add it to our list */
1544 			vol->preferred_plex = vol->plexes - 1; /* and note the index */
1545 		    }
1546 		}
1547 		break;
1548 
1549 	    default:
1550 		throw_rude_remark(EINVAL, "Invalid read policy");
1551 	    }
1552 
1553 	case kw_setupstate:
1554 	    vol->flags |= VF_CONFIG_SETUPSTATE;		    /* set the volume up later on */
1555 	    break;
1556 
1557 	case kw_state:
1558 	    parameter++;				    /* skip the keyword */
1559 	    if (vinum_conf.flags & VF_READING_CONFIG) {
1560 		vol->state = VolState(token[parameter]);    /* set the state */
1561 		made_vol(vol);
1562 	    }
1563 	    break;
1564 
1565 	    /*
1566 	     * XXX experimental ideas.  These are not
1567 	     * documented, and will not be until I
1568 	     * decide they're worth keeping
1569 	     */
1570 	case kw_writethrough:				    /* set writethrough mode */
1571 	    vol->flags |= VF_WRITETHROUGH;
1572 	    break;
1573 
1574 	case kw_writeback:				    /* set writeback mode */
1575 	    vol->flags &= ~VF_WRITETHROUGH;
1576 	    break;
1577 
1578 	case kw_raw:
1579 	    vol->flags |= VF_RAW;			    /* raw volume (no label) */
1580 	    break;
1581 
1582 	default:
1583 	    throw_rude_remark(EINVAL, "volume %s, invalid keyword: %s",
1584 		vol->name,
1585 		token[parameter]);
1586 	}
1587     }
1588     current_volume = volno;				    /* note last referred volume */
1589     vol->volno = volno;					    /* also note in volume */
1590 
1591     /*
1592      * Before we can actually use the volume, we need
1593      * a volume label.  We could start to fake one here,
1594      * but it will be a lot easier when we have some
1595      * to copy from the drives, so defer it until we
1596      * set up the configuration. XXX
1597      */
1598     if (vol->state == volume_unallocated) {
1599 	vol->state = volume_down;			    /* now ready to bring up at the end */
1600 	made_vol(vol);
1601     }
1602 
1603     /* Find out how big our volume is */
1604     for (i = 0; i < vol->plexes; i++)
1605 	vol->size = u64max(vol->size, PLEX[vol->plex[i]].length);
1606     vinum_conf.volumes_used++;				    /* one more in use */
1607 }
1608 
1609 /*
1610  * Parse a config entry.  CARE!  This destroys the original contents of the
1611  * config entry, which we don't really need after this.  More specifically, it
1612  * places \0 characters at the end of each token.
1613  *
1614  * Return 0 if all is well, otherwise EINVAL for invalid keyword,
1615  * or ENOENT if 'read' command doesn't find any drives.
1616  */
1617 int
1618 parse_config(char *cptr, struct keywordset *keyset, int update)
1619 {
1620     int status;
1621 
1622     status = 0;						    /* until proven otherwise */
1623     tokens = tokenize(cptr, token);			    /* chop up into tokens */
1624 
1625     if (tokens <= 0)					    /* screwed up or empty line */
1626 	return tokens;					    /* give up */
1627 
1628     if (token[0][0] == '#')				    /* comment line */
1629 	return 0;
1630 
1631     switch (get_keyword(token[0], keyset)) {		    /* decide what to do */
1632     case kw_read:					    /* read config from a specified drive */
1633 	status = vinum_scandisk(&token[1], tokens - 1);	    /* read the config from disk */
1634 	break;
1635 
1636     case kw_drive:
1637 	config_drive(update);
1638 	break;
1639 
1640     case kw_subdisk:
1641 	config_subdisk(update);
1642 	break;
1643 
1644     case kw_plex:
1645 	config_plex(update);
1646 	break;
1647 
1648     case kw_volume:
1649 	config_volume(update);
1650 	break;
1651 
1652 	/* Anything else is invalid in this context */
1653     default:
1654 	throw_rude_remark(EINVAL,			    /* should we die? */
1655 	    "Invalid configuration information: %s",
1656 	    token[0]);
1657     }
1658     return status;
1659 }
1660 
1661 /*
1662  * parse a line handed in from userland via ioctl.
1663  * This differs only by the error reporting mechanism:
1664  * we return the error indication in the reply to the
1665  * ioctl, so we need to set a global static pointer in
1666  * this file.  This technique works because we have
1667  * ensured that configuration is performed in a single-
1668  * threaded manner
1669  */
1670 int
1671 parse_user_config(char *cptr, struct keywordset *keyset)
1672 {
1673     int status;
1674 
1675     ioctl_reply = (struct _ioctl_reply *) cptr;
1676     status = parse_config(cptr, keyset, 0);
1677     if (status == ENOENT)				    /* from scandisk, but it can't tell us */
1678 	strcpy(ioctl_reply->msg, "no drives found");
1679     ioctl_reply = NULL;					    /* don't do this again */
1680     return status;
1681 }
1682 
1683 /* Remove an object */
1684 void
1685 remove(struct vinum_ioctl_msg *msg)
1686 {
1687     struct vinum_ioctl_msg message = *msg;		    /* make a copy to hand on */
1688 
1689     ioctl_reply = (struct _ioctl_reply *) msg;		    /* reinstate the address to reply to */
1690     ioctl_reply->error = 0;				    /* no error, */
1691     ioctl_reply->msg[0] = '\0';				    /* no message */
1692 
1693     switch (message.type) {
1694     case drive_object:
1695 	remove_drive_entry(message.index, message.force);
1696 	updateconfig(0);
1697 	return;
1698 
1699     case sd_object:
1700 	remove_sd_entry(message.index, message.force, message.recurse);
1701 	updateconfig(0);
1702 	return;
1703 
1704     case plex_object:
1705 	remove_plex_entry(message.index, message.force, message.recurse);
1706 	updateconfig(0);
1707 	return;
1708 
1709     case volume_object:
1710 	remove_volume_entry(message.index, message.force, message.recurse);
1711 	updateconfig(0);
1712 	return;
1713 
1714     default:
1715 	ioctl_reply->error = EINVAL;
1716 	strcpy(ioctl_reply->msg, "Invalid object type");
1717     }
1718 }
1719 
1720 /* Remove a drive.  */
1721 void
1722 remove_drive_entry(int driveno, int force)
1723 {
1724     struct drive *drive = &DRIVE[driveno];
1725     int sdno;
1726 
1727     if ((driveno > vinum_conf.drives_allocated)		    /* not a valid drive */
1728     ||(drive->state == drive_unallocated)) {		    /* or nothing there */
1729 	ioctl_reply->error = EINVAL;
1730 	strcpy(ioctl_reply->msg, "No such drive");
1731     } else if (drive->opencount > 0) {			    /* we have subdisks */
1732 	if (force) {					    /* do it at any cost */
1733 	    for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
1734 		if ((SD[sdno].state != sd_unallocated)	    /* subdisk is allocated */
1735 		&&(SD[sdno].driveno == driveno))	    /* and it belongs to this drive */
1736 		    remove_sd_entry(sdno, force, 0);
1737 	    }
1738 	    remove_drive(driveno);			    /* now remove it */
1739 	    vinum_conf.drives_used--;			    /* one less drive */
1740 	} else
1741 	    ioctl_reply->error = EBUSY;			    /* can't do that */
1742     } else {
1743 	remove_drive(driveno);				    /* just remove it */
1744 	vinum_conf.drives_used--;			    /* one less drive */
1745     }
1746 }
1747 
1748 /* remove a subdisk */
1749 void
1750 remove_sd_entry(int sdno, int force, int recurse)
1751 {
1752     struct sd *sd = &SD[sdno];
1753 
1754     if ((sdno > vinum_conf.subdisks_allocated)		    /* not a valid sd */
1755     ||(sd->state == sd_unallocated)) {			    /* or nothing there */
1756 	ioctl_reply->error = EINVAL;
1757 	strcpy(ioctl_reply->msg, "No such subdisk");
1758     } else if (sd->flags & VF_OPEN) {			    /* we're open */
1759 	ioctl_reply->error = EBUSY;			    /* no getting around that */
1760 	return;
1761     } else if (sd->plexno >= 0) {			    /* we have a plex */
1762 	if (force) {					    /* do it at any cost */
1763 	    struct plex *plex = &PLEX[sd->plexno];	    /* point to our plex */
1764 	    int mysdno;
1765 
1766 	    for (mysdno = 0;				    /* look for ourselves */
1767 		mysdno < plex->subdisks && &SD[plex->sdnos[mysdno]] != sd;
1768 		mysdno++);
1769 	    if (mysdno == plex->subdisks)		    /* didn't find it */
1770 		log(LOG_ERR,
1771 		    "Error removing subdisk %s: not found in plex %s\n",
1772 		    SD[mysdno].name,
1773 		    plex->name);
1774 	    else {					    /* remove the subdisk from plex */
1775 		if (mysdno < (plex->subdisks - 1))	    /* not the last subdisk */
1776 		    bcopy(&plex->sdnos[mysdno + 1],
1777 			&plex->sdnos[mysdno],
1778 			(plex->subdisks - 1 - mysdno) * sizeof(int));
1779 		plex->subdisks--;
1780 		sd->plexno = -1;			    /* disown the subdisk */
1781 	    }
1782 
1783 	    /*
1784 	     * Removing a subdisk from a striped or
1785 	     * RAID-4 or RAID-5 plex really tears the
1786 	     * hell out of the structure, and it needs
1787 	     * to be reinitialized.
1788 	     */
1789 	    if (plex->organization != plex_concat)	    /* not concatenated, */
1790 		set_plex_state(plex->plexno, plex_faulty, setstate_force); /* need to reinitialize */
1791 	    log(LOG_INFO, "vinum: removing %s\n", sd->name);
1792 	    free_sd(sdno);
1793 	} else
1794 	    ioctl_reply->error = EBUSY;			    /* can't do that */
1795     } else {
1796 	log(LOG_INFO, "vinum: removing %s\n", sd->name);
1797 	free_sd(sdno);
1798     }
1799 }
1800 
1801 /* remove a plex */
1802 void
1803 remove_plex_entry(int plexno, int force, int recurse)
1804 {
1805     struct plex *plex = &PLEX[plexno];
1806     int sdno;
1807 
1808     if ((plexno > vinum_conf.plexes_allocated)		    /* not a valid plex */
1809     ||(plex->state == plex_unallocated)) {		    /* or nothing there */
1810 	ioctl_reply->error = EINVAL;
1811 	strcpy(ioctl_reply->msg, "No such plex");
1812     } else if (plex->flags & VF_OPEN) {			    /* we're open */
1813 	ioctl_reply->error = EBUSY;			    /* no getting around that */
1814 	return;
1815     }
1816     if (plex->subdisks) {
1817 	if (force) {					    /* do it anyway */
1818 	    if (recurse) {				    /* remove all below */
1819 		int sds = plex->subdisks;
1820 		for (sdno = 0; sdno < sds; sdno++)
1821 		    free_sd(plex->sdnos[sdno]);		    /* free all subdisks */
1822 	    } else {					    /* just tear them out */
1823 		int sds = plex->subdisks;
1824 		for (sdno = 0; sdno < sds; sdno++)
1825 		    SD[plex->sdnos[sdno]].plexno = -1;	    /* no plex any more */
1826 	    }
1827 	} else {					    /* can't do it without force */
1828 	    ioctl_reply->error = EBUSY;			    /* can't do that */
1829 	    return;
1830 	}
1831     }
1832     if (plex->volno >= 0) {				    /* we are part of a volume */
1833 	if (force) {					    /* do it at any cost */
1834 	    struct volume *vol = &VOL[plex->volno];
1835 	    int myplexno;
1836 
1837 	    for (myplexno = 0; myplexno < vol->plexes; myplexno++)
1838 		if (vol->plex[myplexno] == plexno)	    /* found it */
1839 		    break;
1840 	    if (myplexno == vol->plexes)		    /* didn't find it.  Huh? */
1841 		log(LOG_ERR,
1842 		    "Error removing plex %s: not found in volume %s\n",
1843 		    plex->name,
1844 		    vol->name);
1845 	    if (myplexno < (vol->plexes - 1))		    /* not the last plex in the list */
1846 		bcopy(&vol->plex[myplexno + 1],
1847 		    &vol->plex[myplexno],
1848 		    vol->plexes - 1 - myplexno);
1849 	    vol->plexes--;
1850 	} else {
1851 	    ioctl_reply->error = EBUSY;			    /* can't do that */
1852 	    return;
1853 	}
1854     }
1855     log(LOG_INFO, "vinum: removing %s\n", plex->name);
1856     free_plex(plexno);
1857     vinum_conf.plexes_used--;				    /* one less plex */
1858 }
1859 
1860 /* remove a volume */
1861 void
1862 remove_volume_entry(int volno, int force, int recurse)
1863 {
1864     struct volume *vol = &VOL[volno];
1865     int plexno;
1866 
1867     if ((volno > vinum_conf.volumes_allocated)		    /* not a valid volume */
1868     ||(vol->state == volume_unallocated)) {		    /* or nothing there */
1869 	ioctl_reply->error = EINVAL;
1870 	strcpy(ioctl_reply->msg, "No such volume");
1871     } else if (vol->flags & VF_OPEN)			    /* we're open */
1872 	ioctl_reply->error = EBUSY;			    /* no getting around that */
1873     else if (vol->plexes) {
1874 	if (recurse && force) {				    /* remove all below */
1875 	    int plexes = vol->plexes;
1876 
1877 /*       for (plexno = plexes - 1; plexno >= 0; plexno--) */
1878 	    for (plexno = 0; plexno < plexes; plexno++)
1879 		remove_plex_entry(vol->plex[plexno], force, recurse);
1880 	    log(LOG_INFO, "vinum: removing %s\n", vol->name);
1881 	    free_volume(volno);
1882 	    vinum_conf.volumes_used--;			    /* one less volume */
1883 	} else
1884 	    ioctl_reply->error = EBUSY;			    /* can't do that */
1885     } else {
1886 	log(LOG_INFO, "vinum: removing %s\n", vol->name);
1887 	free_volume(volno);
1888 	vinum_conf.volumes_used--;			    /* one less volume */
1889     }
1890 }
1891 
1892 /* Currently called only from ioctl */
1893 void
1894 update_sd_config(int sdno, int diskconfig)
1895 {
1896     if (!diskconfig)
1897 	set_sd_state(sdno, sd_up, setstate_configuring);
1898     SD[sdno].flags &= ~VF_NEWBORN;
1899 }
1900 
1901 void
1902 update_plex_config(int plexno, int diskconfig)
1903 {
1904     u_int64_t size;
1905     int sdno;
1906     struct plex *plex = &PLEX[plexno];
1907     enum plexstate state = plex_up;			    /* state we want the plex in */
1908     int remainder;					    /* size of fractional stripe at end */
1909     int added_plex;					    /* set if we add a plex to a volume */
1910     int required_sds;					    /* number of subdisks we need */
1911     struct sd *sd;
1912     struct volume *vol;
1913     int data_sds = 0;					    /* number of sds carrying data */
1914 
1915     if (plex->state < plex_init)			    /* not a real plex, */
1916 	return;
1917     added_plex = 0;
1918     if (plex->volno >= 0) {				    /* we have a volume */
1919 	vol = &VOL[plex->volno];
1920 
1921 	/*
1922 	 * If we're newly born,
1923 	 * and the volume isn't,
1924 	 * and it has other plexes,
1925 	 * and we didn't read this mess from disk,
1926 	 * we were added later.
1927 	 */
1928 	if ((plex->flags & VF_NEWBORN)
1929 	    && ((vol->flags & VF_NEWBORN) == 0)
1930 	    && (vol->plexes > 0)
1931 	    && (diskconfig == 0)) {
1932 	    added_plex = 1;
1933 	    state = plex_down;				    /* so take ourselves down */
1934 	}
1935     }
1936     /*
1937      * Check that our subdisks make sense.  For
1938      * striped, RAID-4 and RAID-5 plexes, we need at
1939      * least two subdisks, and they must all be the
1940      * same size.
1941      */
1942     if (plex->organization == plex_striped) {
1943 	data_sds = plex->subdisks;
1944 	required_sds = 2;
1945     } else if (isparity(plex)) {			    /* RAID 4 or 5 */
1946 	data_sds = plex->subdisks - 1;
1947 	required_sds = 3;
1948     } else
1949 	required_sds = 0;
1950     if (required_sds > 0) {				    /* striped, RAID-4 or RAID-5 */
1951 	if (plex->subdisks < required_sds) {
1952 	    log(LOG_ERR,
1953 		"vinum: plex %s does not have at least %d subdisks\n",
1954 		plex->name,
1955 		required_sds);
1956 	    state = plex_faulty;
1957 	}
1958 	/*
1959 	 * Now see if the plex size is a multiple of
1960 	 * the stripe size.  If not, trim off the end
1961 	 * of each subdisk and return it to the drive.
1962 	 */
1963 	if (plex->length > 0) {
1964 	    if (data_sds > 0) {
1965 		if (plex->stripesize > 0) {
1966 		    remainder = (int) (plex->length	    /* are we exact? */
1967 			% ((u_int64_t) plex->stripesize * data_sds));
1968 		    if (remainder) {			    /* no */
1969 			log(LOG_INFO, "vinum: removing %d blocks of partial stripe at the end of %s\n",
1970 			    remainder,
1971 			    plex->name);
1972 			plex->length -= remainder;	    /* shorten the plex */
1973 			remainder /= data_sds;		    /* spread the remainder amongst the sds */
1974 			for (sdno = 0; sdno < plex->subdisks; sdno++) {
1975 			    sd = &SD[plex->sdnos[sdno]];    /* point to the subdisk */
1976 			    return_drive_space(sd->driveno, /* return the space */
1977 				sd->driveoffset + sd->sectors - remainder,
1978 				remainder);
1979 			    sd->sectors -= remainder;	    /* and shorten it */
1980 			}
1981 		    }
1982 		} else					    /* no data sds, */
1983 		    plex->length = 0;			    /* reset length */
1984 	    }
1985 	}
1986     }
1987     size = 0;
1988     for (sdno = 0; sdno < plex->subdisks; sdno++) {
1989 	sd = &SD[plex->sdnos[sdno]];
1990 	if (isstriped(plex)
1991 	    && (sdno > 0)
1992 	    && (sd->sectors != SD[plex->sdnos[sdno - 1]].sectors)) {
1993 	    log(LOG_ERR, "vinum: %s must have equal sized subdisks\n", plex->name);
1994 	    state = plex_down;
1995 	}
1996 	size += sd->sectors;
1997 	if (added_plex) {				    /* we were added later */
1998 	    sd->state = sd_stale;			    /* stale until proven otherwise */
1999 	    made_sd(sd);
2000 	}
2001     }
2002 
2003     if (plex->subdisks) {				    /* plex has subdisks, calculate size */
2004 	/*
2005 	 * XXX We shouldn't need to calculate the size any
2006 	 * more.  Check this some time
2007 	 */
2008 	if (isparity(plex))
2009 	    size = size / plex->subdisks * (plex->subdisks - 1); /* less space for RAID-4 and RAID-5 */
2010 	if (plex->length != size)
2011 	    log(LOG_INFO,
2012 		"Correcting length of %s: was %lld, is %lld\n",
2013 		plex->name,
2014 		(long long) plex->length,
2015 		(long long) size);
2016 	plex->length = size;
2017     } else {						    /* no subdisks, */
2018 	plex->length = 0;				    /* no size */
2019 	state = plex_down;				    /* take it down */
2020     }
2021     update_plex_state(plexno);				    /* set the state */
2022     plex->flags &= ~VF_NEWBORN;
2023 }
2024 
2025 void
2026 update_volume_config(int volno, int diskconfig)
2027 {
2028     struct volume *vol = &VOL[volno];
2029     struct plex *plex;
2030     int plexno;
2031 
2032     if (vol->state != volume_unallocated)
2033 	/*
2034 	 * Recalculate the size of the volume,
2035 	 * which might change if the original
2036 	 * plexes were not a multiple of the
2037 	 * stripe size.
2038 	 */
2039     {
2040 	vol->size = 0;
2041 	for (plexno = 0; plexno < vol->plexes; plexno++) {
2042 	    plex = &PLEX[vol->plex[plexno]];
2043 	    vol->size = u64max(plex->length, vol->size);
2044 	    plex->volplexno = plexno;	    /* note it in the plex */
2045 	}
2046     }
2047     vol->flags &= ~VF_NEWBORN;		    /* no longer newly born */
2048 }
2049 
2050 /*
2051  * Update the global configuration.
2052  * diskconfig is != 0 if we're reading in a config
2053  * from disk.  In this case, we don't try to
2054  * bring the devices up, though we will bring
2055  * them down if there's some error which got
2056  * missed when writing to disk.
2057  */
2058 void
2059 updateconfig(int diskconfig)
2060 {
2061     int plexno;
2062     int volno;
2063 
2064     for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++)
2065 	update_plex_config(plexno, diskconfig);
2066 
2067     for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
2068 	if (VOL[volno].state > volume_uninit) {
2069 	    VOL[volno].flags &= ~VF_CONFIG_SETUPSTATE;	    /* no more setupstate */
2070 	    update_volume_state(volno);
2071 	    update_volume_config(volno, diskconfig);
2072 	}
2073     }
2074     save_config();
2075 }
2076 
2077 /*
2078  * Start manual changes to the configuration and lock out
2079  * others who may wish to do so.
2080  * XXX why do we need this and lock_config too?
2081  */
2082 int
2083 start_config(int force)
2084 {
2085     int error;
2086 
2087     current_drive = -1;					    /* note the last drive we mention, for
2088 							    * some defaults */
2089     current_plex = -1;					    /* and the same for the last plex */
2090     current_volume = -1;				    /* and the last volume */
2091     while ((vinum_conf.flags & VF_CONFIGURING) != 0) {
2092 	vinum_conf.flags |= VF_WILL_CONFIGURE;
2093 	if ((error = tsleep(&vinum_conf, PCATCH, "vincfg", 0)) != 0)
2094 	    return error;
2095     }
2096     /*
2097      * We need two flags here: VF_CONFIGURING
2098      * tells other processes to hold off (this
2099      * function), and VF_CONFIG_INCOMPLETE
2100      * tells the state change routines not to
2101      * propagate incrememntal state changes
2102      */
2103     vinum_conf.flags |= VF_CONFIGURING | VF_CONFIG_INCOMPLETE;
2104     if (force)
2105 	vinum_conf.flags |= VF_FORCECONFIG;		    /* overwrite differently named drives */
2106     current_drive = -1;					    /* reset the defaults */
2107     current_plex = -1;					    /* and the same for the last plex */
2108     current_volume = -1;				    /* and the last volme */
2109     return 0;
2110 }
2111 
2112 /*
2113  * Update the config if update is 1, and unlock
2114  * it.  We won't update the configuration if we
2115  * are called in a recursive loop via throw_rude_remark.
2116  */
2117 void
2118 finish_config(int update)
2119 {
2120     /* we've finished our config */
2121     vinum_conf.flags &= ~(VF_CONFIG_INCOMPLETE | VF_READING_CONFIG | VF_FORCECONFIG);
2122     if (update)
2123 	updateconfig(0);				    /* so update things */
2124     else
2125 	updateconfig(1);				    /* do some updates only */
2126     vinum_conf.flags &= ~VF_CONFIGURING;		    /* and now other people can take a turn */
2127     if ((vinum_conf.flags & VF_WILL_CONFIGURE) != 0) {
2128 	vinum_conf.flags &= ~VF_WILL_CONFIGURE;
2129 	wakeup_one(&vinum_conf);
2130     }
2131 }
2132 /* Local Variables: */
2133 /* fill-column: 50 */
2134 /* End: */
2135