xref: /dragonfly/sys/dev/raid/vinum/vinumio.c (revision 3f625015)
1 /*-
2  * Copyright (c) 1997, 1998
3  *	Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  This software is distributed under the so-called ``Berkeley
6  *  License'':
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by Nan Yang Computer
19  *      Services Limited.
20  * 4. Neither the name of the Company nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * This software is provided ``as is'', and any express or implied
25  * warranties, including, but not limited to, the implied warranties of
26  * merchantability and fitness for a particular purpose are disclaimed.
27  * In no event shall the company or contributors be liable for any
28  * direct, indirect, incidental, special, exemplary, or consequential
29  * damages (including, but not limited to, procurement of substitute
30  * goods or services; loss of use, data, or profits; or business
31  * interruption) however caused and on any theory of liability, whether
32  * in contract, strict liability, or tort (including negligence or
33  * otherwise) arising in any way out of the use of this software, even if
34  * advised of the possibility of such damage.
35  *
36  * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $
37  * $FreeBSD: src/sys/dev/vinum/vinumio.c,v 1.52.2.6 2002/05/02 08:43:44 grog Exp $
38  * $DragonFly: src/sys/dev/raid/vinum/vinumio.c,v 1.22 2007/05/08 02:31:42 dillon Exp $
39  */
40 
41 #include "vinumhdr.h"
42 #include "request.h"
43 #include <vm/vm_zone.h>
44 
45 static char *sappend(char *txt, char *s);
46 static int drivecmp(const void *va, const void *vb);
47 
48 /*
49  * Open the device associated with the drive, and set drive's vp.
50  * Return an error number
51  */
52 int
53 open_drive(struct drive *drive, struct proc *p, int verbose)
54 {
55     int devmajor;					    /* major devs for disk device */
56     int devminor;					    /* minor devs for disk device */
57     int unit;
58     char *dname;
59 
60     if (bcmp(drive->devicename, "/dev/", 5))		    /* device name doesn't start with /dev */
61 	return ENOENT;					    /* give up */
62     if (drive->flags & VF_OPEN)				    /* open already, */
63 	return EBUSY;					    /* don't do it again */
64 
65     /*
66      * Yes, Bruce, I know this is horrible, but we
67      * don't have a root file system when we first
68      * try to do this.  If you can come up with a
69      * better solution, I'd really like it.  I'm
70      * just putting it in now to add ammuntion to
71      * moving the system to devfs.
72      */
73     dname = &drive->devicename[5];
74     drive->dev = NULL;					    /* no device yet */
75 
76     /* Find the device */
77     if (bcmp(dname, "ad", 2) == 0)			    /* IDE disk */
78 	devmajor = 116;
79     else if (bcmp(dname, "wd", 2) == 0)			    /* IDE disk */
80 	devmajor = 3;
81     else if (bcmp(dname, "da", 2) == 0)
82 	devmajor = 13;
83     else if (bcmp(dname, "vn", 2) == 0)
84 	devmajor = 43;
85     else if (bcmp(dname, "md", 2) == 0)
86 	devmajor = 95;
87     else if (bcmp(dname, "vkd", 3) == 0) {
88 	devmajor = 97;
89 	dname += 1;
90     } else if (bcmp(dname, "amrd", 4) == 0) {
91 	devmajor = 133;
92 	dname += 2;
93     } else if (bcmp(dname, "mlxd", 4) == 0) {
94 	devmajor = 131;
95 	dname += 2;
96     } else if (bcmp(dname, "idad", 4) == 0) {
97 	devmajor = 109;
98 	dname += 2;
99     } else if (bcmp(dname, "twed", 4) == 0) {               /* 3ware raid */
100       devmajor = 147;
101       dname += 2;
102     } else if (bcmp(dname, "ar", 2) == 0) {
103 	devmajor = 157;
104     } else
105 	return ENODEV;
106     dname += 2;						    /* point past */
107 
108     /*
109      * Found the device.  We can expect one of
110      * two formats for the rest: a unit number,
111      * then either a partition letter for the
112      * compatiblity partition (e.g. h) or a
113      * slice ID and partition (e.g. s2e).
114      * Create a minor number for each of them.
115      */
116     unit = 0;
117     while ((*dname >= '0')				    /* unit number */
118     &&(*dname <= '9')) {
119 	unit = unit * 10 + *dname - '0';
120 	dname++;
121     }
122 
123     if (*dname == 's') {				    /* slice */
124 	if (((dname[1] < '1') || (dname[1] > '4'))	    /* invalid slice */
125 	||((dname[2] < 'a') || (dname[2] > 'p')))	    /* or invalid partition */
126 	    return ENODEV;
127 	devminor = dkmakeminor(unit, dname[1] - '0' + 1, (dname[2] - 'a'));
128     } else {						    /* compatibility partition */
129 	if ((*dname < 'a') || (*dname > 'p'))		    /* or invalid partition */
130 	    return ENODEV;
131 	devminor = dkmakeminor(unit, 0, (dname[0] - 'a'));
132     }
133 
134     /*
135      * Disallow partition c
136      */
137     if ((((devminor >> 17) & 0x08) | (devminor & 7)) == 2)
138 	return ENOTTY;					    /* not buying that */
139 
140     drive->dev = udev2dev(makeudev(devmajor, devminor), 0);
141 
142     if (drive->dev == NULL)
143 	return ENODEV;
144 
145     drive->dev->si_iosize_max = DFLTPHYS;
146     if (dev_is_good(drive->dev))
147 	drive->lasterror = dev_dopen(drive->dev, FWRITE, 0, proc0.p_ucred);
148     else
149 	drive->lasterror = ENOENT;
150 
151     if (drive->lasterror != 0) {			    /* failed */
152 	drive->state = drive_down;			    /* just force it down */
153 	if (verbose)
154 	    log(LOG_WARNING,
155 		"vinum open_drive %s: failed with error %d\n",
156 		drive->devicename, drive->lasterror);
157     } else
158 	drive->flags |= VF_OPEN;			    /* we're open now */
159 
160     return drive->lasterror;
161 }
162 
163 /*
164  * Set some variables in the drive struct
165  * in more convenient form.  Return error indication
166  */
167 int
168 set_drive_parms(struct drive *drive)
169 {
170     drive->blocksize = BLKDEV_IOSIZE;			    /* do we need this? */
171     drive->secsperblock = drive->blocksize		    /* number of sectors per block */
172 	/ drive->partinfo.disklab->d_secsize;
173 
174     /* Now update the label part */
175     bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
176     getmicrotime(&drive->label.date_of_birth);		    /* and current time */
177     drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
178     *((u_int64_t) drive->partinfo.disklab->d_secsize);
179 #if VINUMDEBUG
180     if (debug & DEBUG_BIGDRIVE)				    /* pretend we're 100 times as big */
181 	drive->label.drive_size *= 100;
182 #endif
183 
184     /* number of sectors available for subdisks */
185     drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
186 
187     /*
188      * Bug in 3.0 as of January 1998: you can open
189      * non-existent slices.  They have a length of 0.
190      */
191     if (drive->label.drive_size < MINVINUMSLICE) {	    /* too small to worry about */
192 	set_drive_state(drive->driveno, drive_down, setstate_force);
193 	drive->lasterror = ENOSPC;
194 	return ENOSPC;
195     }
196     drive->freelist_size = INITIAL_DRIVE_FREELIST;	    /* initial number of entries */
197     drive->freelist = (struct drive_freelist *)
198 	Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
199     if (drive->freelist == NULL)			    /* can't malloc, dammit */
200 	return ENOSPC;
201     drive->freelist_entries = 1;			    /* just (almost) the complete drive */
202     drive->freelist[0].offset = DATASTART;		    /* starts here */
203     drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
204     if (drive->label.name[0] != '\0')			    /* got a name */
205 	set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
206     else						    /* we know about it, but that's all */
207 	drive->state = drive_referenced;
208     return 0;
209 }
210 
211 /*
212  * Initialize a drive: open the device and add device
213  * information
214  */
215 int
216 init_drive(struct drive *drive, int verbose)
217 {
218     if (drive->devicename[0] != '/') {
219 	drive->lasterror = EINVAL;
220 	log(LOG_ERR, "vinum: Can't open drive without drive name\n");
221 	return EINVAL;
222     }
223     drive->lasterror = open_drive(drive, curproc, verbose); /* open the drive */
224     if (drive->lasterror)
225 	return drive->lasterror;
226 
227     drive->lasterror = dev_dioctl(
228 	drive->dev,
229 	DIOCGPART,
230 	(caddr_t) & drive->partinfo,
231 	FREAD,
232 	proc0.p_ucred);
233     if (drive->lasterror) {
234 	if (verbose)
235 	    log(LOG_WARNING,
236 		"vinum open_drive %s: Can't get partition information, drive->lasterror %d\n",
237 		drive->devicename,
238 		drive->lasterror);
239 	close_drive(drive);
240 	return drive->lasterror;
241     }
242     if (drive->partinfo.part->p_fstype != FS_VINUM) {	    /* not Vinum */
243 	drive->lasterror = EFTYPE;
244 	if (verbose)
245 	    log(LOG_WARNING,
246 		"vinum open_drive %s: Wrong partition type for vinum\n",
247 		drive->devicename);
248 	close_drive(drive);
249 	return EFTYPE;
250     }
251     return set_drive_parms(drive);			    /* set various odds and ends */
252 }
253 
254 /* Close a drive if it's open. */
255 void
256 close_drive(struct drive *drive)
257 {
258     LOCKDRIVE(drive);					    /* keep the daemon out */
259     if (drive->flags & VF_OPEN)
260 	close_locked_drive(drive);			    /* and close it */
261     if (drive->state > drive_down)			    /* if it's up */
262 	drive->state = drive_down;			    /* make sure it's down */
263     unlockdrive(drive);
264 }
265 
266 /*
267  * Real drive close code, called with drive already locked.
268  * We have also checked that the drive is open.  No errors.
269  */
270 void
271 close_locked_drive(struct drive *drive)
272 {
273     /*
274      * If we can't access the drive, we can't flush
275      * the queues, which spec_close() will try to
276      * do.  Get rid of them here first.
277      */
278     drive->lasterror = dev_dclose(drive->dev, 0, 0);
279     drive->flags &= ~VF_OPEN;				    /* no longer open */
280 }
281 
282 /*
283  * Remove drive from the configuration.
284  * Caller must ensure that it isn't active.
285  */
286 void
287 remove_drive(int driveno)
288 {
289     struct drive *drive = &vinum_conf.drive[driveno];
290     struct vinum_hdr *vhdr;				    /* buffer for header */
291     int error;
292 
293     if (drive->state > drive_referenced) {		    /* real drive */
294 	if (drive->state == drive_up) {
295 	    vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	/* allocate buffer */
296 	    CHECKALLOC(vhdr, "Can't allocate memory");
297 	    error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
298 	    if (error)
299 		drive->lasterror = error;
300 	    else {
301 		vhdr->magic = VINUM_NOMAGIC;		    /* obliterate the magic, but leave the rest */
302 		write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
303 	    }
304 	    Free(vhdr);
305 	}
306 	free_drive(drive);				    /* close it and free resources */
307 	save_config();					    /* and save the updated configuration */
308     }
309 }
310 
311 /*
312  * Transfer drive data.  Usually called from one of these defines;
313  * #define read_drive(a, b, c, d) driveio (a, b, c, d, BUF_CMD_READ)
314  * #define write_drive(a, b, c, d) driveio (a, b, c, d, BUF_CMD_WRITE)
315  *
316  * length and offset are in bytes, but must be multiples of sector
317  * size.  The function *does not check* for this condition, and
318  * truncates ruthlessly.
319  * Return error number
320  */
321 int
322 driveio(struct drive *drive, char *buf, size_t length, off_t offset, buf_cmd_t cmd)
323 {
324     int error;
325     struct buf *bp;
326     caddr_t saveaddr;
327 
328     error = 0;						    /* to keep the compiler happy */
329     while (length) {					    /* divide into small enough blocks */
330 	int len = min(length, MAXBSIZE);		    /* maximum block device transfer is MAXBSIZE */
331 
332 	bp = geteblk(len);				    /* get a buffer header */
333 	bp->b_cmd = cmd;
334 	bp->b_bio1.bio_offset = offset;			    /* disk offset */
335 	saveaddr = bp->b_data;
336 	bp->b_data = buf;
337 	bp->b_bcount = len;
338 	dev_dstrategy(drive->dev, &bp->b_bio1);
339 	error = biowait(bp);
340 	bp->b_data = saveaddr;
341 	bp->b_flags |= B_INVAL | B_AGE;
342 	bp->b_flags &= ~B_ERROR;
343 	brelse(bp);
344 	if (error)
345 	    break;
346 	length -= len;					    /* update pointers */
347 	buf += len;
348 	offset += len;
349     }
350     return error;
351 }
352 
353 /*
354  * Check a drive for a vinum header.  If found,
355  * update the drive information.  We come here
356  * with a partially populated drive structure
357  * which includes the device name.
358  *
359  * Return information on what we found.
360  *
361  * This function is called from two places: check_drive,
362  * which wants to find out whether the drive is a
363  * Vinum drive, and config_drive, which asserts that
364  * it is a vinum drive.  In the first case, we don't
365  * print error messages (verbose==0), in the second
366  * we do (verbose==1).
367  */
368 enum drive_label_info
369 read_drive_label(struct drive *drive, int verbose)
370 {
371     int error;
372     int result;						    /* result of our search */
373     struct vinum_hdr *vhdr;				    /* and as header */
374 
375     error = init_drive(drive, 0);			    /* find the drive */
376     if (error)						    /* find the drive */
377 	return DL_CANT_OPEN;				    /* not ours */
378 
379     vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	    /* allocate buffers */
380     CHECKALLOC(vhdr, "Can't allocate memory");
381 
382     drive->state = drive_up;				    /* be optimistic */
383     error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
384     if (vhdr->magic == VINUM_MAGIC) {			    /* ours! */
385 	if (drive->label.name[0]			    /* we have a name for this drive */
386 	&&(strcmp(drive->label.name, vhdr->label.name))) {  /* but it doesn't match the real name */
387 	    drive->lasterror = EINVAL;
388 	    result = DL_WRONG_DRIVE;			    /* it's the wrong drive */
389 	    drive->state = drive_unallocated;		    /* put it back, it's not ours */
390 	} else
391 	    result = DL_OURS;
392 	/*
393 	 * We copy the drive anyway so that we have
394 	 * the correct name in the drive info.  This
395 	 * may not be the name specified
396 	 */
397 	drive->label = vhdr->label;			    /* put in the label information */
398     } else if (vhdr->magic == VINUM_NOMAGIC)		    /* was ours, but we gave it away */
399 	result = DL_DELETED_LABEL;			    /* and return the info */
400     else
401 	result = DL_NOT_OURS;				    /* we could have it, but we don't yet */
402     Free(vhdr);						    /* that's all. */
403     return result;
404 }
405 
406 /*
407  * Check a drive for a vinum header.  If found,
408  * read configuration information from the drive and
409  * incorporate the data into the configuration.
410  *
411  * Return drive number.
412  */
413 struct drive *
414 check_drive(char *devicename)
415 {
416     int driveno;
417     int i;
418     struct drive *drive;
419 
420     driveno = find_drive_by_dev(devicename, 1);		    /* if entry doesn't exist, create it */
421     drive = &vinum_conf.drive[driveno];			    /* and get a pointer */
422 
423     if (read_drive_label(drive, 0) == DL_OURS) {	    /* one of ours */
424 	for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
425 	    if ((i != driveno)				    /* not this drive */
426 	    &&(DRIVE[i].state != drive_unallocated)	    /* and it's allocated */
427 	    &&(strcmp(DRIVE[i].label.name,
428 			DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
429 		struct drive *mydrive = &DRIVE[i];
430 
431 		if (mydrive->devicename[0] == '/') {	    /* we know a device name for it */
432 		    /*
433 		     * set an error, but don't take the
434 		     * drive down: that would cause unneeded
435 		     * error messages.
436 		     */
437 		    drive->lasterror = EEXIST;
438 		    break;
439 		} else {				    /* it's just a place holder, */
440 		    int sdno;
441 
442 		    for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
443 			if ((SD[sdno].driveno == i)	    /* it's pointing to this one, */
444 			&&(SD[sdno].state != sd_unallocated)) {	/* and it's a real subdisk */
445 			    SD[sdno].driveno = drive->driveno; /* point to the one we found */
446 			    update_sd_state(sdno);	    /* and update its state */
447 			}
448 		    }
449 		    bzero(mydrive, sizeof(struct drive));   /* don't deallocate it, just remove it */
450 		}
451 	    }
452 	}
453     } else {
454 	if (drive->lasterror == 0)
455 	    drive->lasterror = ENODEV;
456 	close_drive(drive);
457 	drive->state = drive_down;
458     }
459     return drive;
460 }
461 
462 static char *
463 sappend(char *txt, char *s)
464 {
465     while ((*s++ = *txt++) != 0);
466     return s - 1;
467 }
468 
469 void
470 format_config(char *config, int len)
471 {
472     int i;
473     int j;
474     char *s = config;
475     char *configend = &config[len];
476 
477     bzero(config, len);
478 
479     /* First write the volume configuration */
480     for (i = 0; i < vinum_conf.volumes_allocated; i++) {
481 	struct volume *vol;
482 
483 	vol = &vinum_conf.volume[i];
484 	if ((vol->state > volume_uninit)
485 	    && (vol->name[0] != '\0')) {		    /* paranoia */
486 	    ksnprintf(s,
487 		configend - s,
488 		"volume %s state %s",
489 		vol->name,
490 		volume_state(vol->state));
491 	    while (*s)
492 		s++;					    /* find the end */
493 	    if (vol->preferred_plex >= 0)		    /* preferences, */
494 		ksnprintf(s,
495 		    configend - s,
496 		    " readpol prefer %s",
497 		    vinum_conf.plex[vol->preferred_plex].name);
498 	    while (*s)
499 		s++;					    /* find the end */
500 	    s = sappend("\n", s);
501 	}
502     }
503 
504     /* Then the plex configuration */
505     for (i = 0; i < vinum_conf.plexes_allocated; i++) {
506 	struct plex *plex;
507 
508 	plex = &vinum_conf.plex[i];
509 	if ((plex->state > plex_referenced)
510 	    && (plex->name[0] != '\0')) {		    /* paranoia */
511 	    ksnprintf(s,
512 		configend - s,
513 		"plex name %s state %s org %s ",
514 		plex->name,
515 		plex_state(plex->state),
516 		plex_org(plex->organization));
517 	    while (*s)
518 		s++;					    /* find the end */
519 	    if (isstriped(plex)) {
520 		ksnprintf(s,
521 		    configend - s,
522 		    "%ds ",
523 		    (int) plex->stripesize);
524 		while (*s)
525 		    s++;				    /* find the end */
526 	    }
527 	    if (plex->volno >= 0)			    /* we have a volume */
528 		ksnprintf(s,
529 		    configend - s,
530 		    "vol %s ",
531 		    vinum_conf.volume[plex->volno].name);
532 	    while (*s)
533 		s++;					    /* find the end */
534 	    for (j = 0; j < plex->subdisks; j++) {
535 		ksnprintf(s,
536 		    configend - s,
537 		    " sd %s",
538 		    vinum_conf.sd[plex->sdnos[j]].name);
539 	    }
540 	    s = sappend("\n", s);
541 	}
542     }
543 
544     /* And finally the subdisk configuration */
545     for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
546 	struct sd *sd;
547 	char *drivename;
548 
549 	sd = &SD[i];
550 	if ((sd->state != sd_referenced)
551 	    && (sd->state != sd_unallocated)
552 	    && (sd->name[0] != '\0')) {			    /* paranoia */
553 	    drivename = vinum_conf.drive[sd->driveno].label.name;
554 	    /*
555 	     * XXX We've seen cases of dead subdisks
556 	     * which don't have a drive.  If we let them
557 	     * through here, the drive name is null, so
558 	     * they get the drive named 'plex'.
559 	     *
560 	     * This is a breakage limiter, not a fix.
561 	     */
562 	    if (drivename[0] == '\0')
563 		drivename = "*invalid*";
564 	    ksnprintf(s,
565 		configend - s,
566 		"sd name %s drive %s plex %s len %llus driveoffset %llus state %s",
567 		sd->name,
568 		drivename,
569 		vinum_conf.plex[sd->plexno].name,
570 		(unsigned long long) sd->sectors,
571 		(unsigned long long) sd->driveoffset,
572 		sd_state(sd->state));
573 	    while (*s)
574 		s++;					    /* find the end */
575 	    if (sd->plexno >= 0)
576 		ksnprintf(s,
577 		    configend - s,
578 		    " plexoffset %llds",
579 		    (long long) sd->plexoffset);
580 	    else
581 		ksnprintf(s, configend - s, " detached");
582 	    while (*s)
583 		s++;					    /* find the end */
584 	    if (sd->flags & VF_RETRYERRORS) {
585 		ksnprintf(s, configend - s, " retryerrors");
586 		while (*s)
587 		    s++;				    /* find the end */
588 	    }
589 	    ksnprintf(s, configend - s, " \n");
590 	    while (*s)
591 		s++;					    /* find the end */
592 	}
593     }
594     if (s > &config[len - 2])
595 	panic("vinum: configuration data overflow");
596 }
597 
598 /*
599  * issue a save config request to the d�mon.  The actual work
600  * is done in process context by daemon_save_config
601  */
602 void
603 save_config(void)
604 {
605     queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) NULL);
606 }
607 
608 /*
609  * Write the configuration to all vinum slices.  This
610  * is performed by the d�mon only
611  */
612 void
613 daemon_save_config(void)
614 {
615     int error;
616     int written_config;					    /* set when we first write the config to disk */
617     int driveno;
618     struct drive *drive;				    /* point to current drive info */
619     struct vinum_hdr *vhdr;				    /* and as header */
620     char *config;					    /* point to config data */
621     int wlabel_on;					    /* to set writing label on/off */
622 
623     /* don't save the configuration while we're still working on it */
624     if (vinum_conf.flags & VF_CONFIGURING)
625 	return;
626     written_config = 0;					    /* no config written yet */
627     /* Build a volume header */
628     vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	    /* get space for the config data */
629     CHECKALLOC(vhdr, "Can't allocate config data");
630     vhdr->magic = VINUM_MAGIC;				    /* magic number */
631     vhdr->config_length = MAXCONFIG;			    /* length of following config info */
632 
633     config = Malloc(MAXCONFIG);				    /* get space for the config data */
634     CHECKALLOC(config, "Can't allocate config data");
635 
636     format_config(config, MAXCONFIG);
637     error = 0;						    /* no errors yet */
638     for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
639 	drive = &vinum_conf.drive[driveno];		    /* point to drive */
640 	if (drive->state > drive_referenced) {
641 	    LOCKDRIVE(drive);				    /* don't let it change */
642 
643 	    /*
644 	     * First, do some drive consistency checks.  Some
645 	     * of these are kludges, others require a process
646 	     * context and couldn't be done before
647 	     */
648 	    if ((drive->devicename[0] == '\0')
649 		|| (drive->label.name[0] == '\0')) {
650 		unlockdrive(drive);
651 		free_drive(drive);			    /* get rid of it */
652 		break;
653 	    }
654 	    if (((drive->flags & VF_OPEN) == 0)		    /* drive not open */
655 	    &&(drive->state > drive_down)) {		    /* and it thinks it's not down */
656 		unlockdrive(drive);
657 		set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
658 		continue;
659 	    }
660 	    if ((drive->state == drive_down)		    /* it's down */
661 	    &&(drive->flags & VF_OPEN)) {		    /* but open, */
662 		unlockdrive(drive);
663 		close_drive(drive);			    /* close it */
664 	    } else if (drive->state > drive_down) {
665 		getmicrotime(&drive->label.last_update);    /* time of last update is now */
666 		bcopy((char *) &drive->label,		    /* and the label info from the drive structure */
667 		    (char *) &vhdr->label,
668 		    sizeof(vhdr->label));
669 		if ((drive->state != drive_unallocated)
670 		    && (drive->state != drive_referenced)) { /* and it's a real drive */
671 		    wlabel_on = 1;			    /* enable writing the label */
672 		    error = dev_dioctl(drive->dev, /* make the label writeable */
673 			DIOCWLABEL,
674 			(caddr_t) & wlabel_on,
675 			FWRITE,
676 			proc0.p_ucred);
677 		    if (error == 0)
678 			error = write_drive(drive, (char *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
679 		    if (error == 0)
680 			error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET); /* first config copy */
681 		    if (error == 0)
682 			error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET + MAXCONFIG);	/* second copy */
683 		    wlabel_on = 0;			    /* enable writing the label */
684 		    if (error == 0)
685 			error = dev_dioctl(drive->dev, /* make the label non-writeable again */
686 			    DIOCWLABEL,
687 			    (caddr_t) & wlabel_on,
688 			    FWRITE,
689 			    proc0.p_ucred);
690 		    unlockdrive(drive);
691 		    if (error) {
692 			log(LOG_ERR,
693 			    "vinum: Can't write config to %s, error %d\n",
694 			    drive->devicename,
695 			    error);
696 			set_drive_state(drive->driveno, drive_down, setstate_force);
697 		    } else
698 			written_config = 1;		    /* we've written it on at least one drive */
699 		}
700 	    } else					    /* not worth looking at, */
701 		unlockdrive(drive);			    /* just unlock it again */
702 	}
703     }
704     Free(vhdr);
705     Free(config);
706 }
707 
708 /*
709  * Disk labels are a mess.  The correct way to
710  * access them is with the DIOC[GSW]DINFO ioctls,
711  * but some programs, such as newfs, access the
712  * disk directly, so we have to write things
713  * there.  We do this only on request.  If a user
714  * request tries to read it directly, we fake up
715  * one on the fly.
716  */
717 
718 /*
719  * get_volume_label returns a label structure to lp, which
720  * is allocated by the caller
721  */
722 void
723 get_volume_label(char *name, int plexes, u_int64_t size, struct disklabel *lp)
724 {
725     bzero(lp, sizeof(struct disklabel));
726 
727     strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
728     lp->d_type = DTYPE_VINUM;
729     strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
730     lp->d_rpm = 14400 * plexes;				    /* to keep them guessing */
731     lp->d_interleave = 1;
732     lp->d_flags = 0;
733 
734     /*
735      * A Vinum volume has a single track with all
736      * its sectors.
737      */
738     lp->d_secsize = DEV_BSIZE;				    /* bytes per sector */
739     lp->d_nsectors = size;				    /* data sectors per track */
740     lp->d_ntracks = 1;					    /* tracks per cylinder */
741     lp->d_ncylinders = 1;				    /* data cylinders per unit */
742     lp->d_secpercyl = size;				    /* data sectors per cylinder */
743     lp->d_secperunit = size;				    /* data sectors per unit */
744 
745     lp->d_bbsize = BBSIZE;
746     lp->d_sbsize = SBSIZE;
747 
748     lp->d_magic = DISKMAGIC;
749     lp->d_magic2 = DISKMAGIC;
750 
751     /*
752      * Set up partitions a, b and c to be identical
753      * and the size of the volume.  a is UFS, b is
754      * swap, c is nothing.
755      */
756     lp->d_partitions[0].p_size = size;
757     lp->d_partitions[0].p_fsize = 1024;
758     lp->d_partitions[0].p_fstype = FS_BSDFFS;		    /* FreeBSD File System :-) */
759     lp->d_partitions[0].p_fsize = 1024;			    /* FS fragment size */
760     lp->d_partitions[0].p_frag = 8;			    /* and fragments per block */
761     lp->d_partitions[SWAP_PART].p_size = size;
762     lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP;	    /* swap partition */
763     lp->d_partitions[LABEL_PART].p_size = size;
764     lp->d_npartitions = LABEL_PART + 1;
765     strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
766     lp->d_checksum = dkcksum(lp);
767 }
768 
769 /* Write a volume label.  This implements the VINUM_LABEL ioctl. */
770 int
771 write_volume_label(int volno)
772 {
773     struct disklabel *lp;
774     struct buf *bp;
775     struct disklabel *dlp;
776     struct volume *vol;
777     int error;
778     cdev_t dev;
779 
780     lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
781     if (lp == 0)
782 	return ENOMEM;
783 
784     if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_allocated) /* invalid volume */
785 	return ENOENT;
786 
787     vol = &VOL[volno];					    /* volume in question */
788     if (vol->state <= volume_uninit)			    /* nothing there */
789 	return ENXIO;
790     else if (vol->state < volume_up)			    /* not accessible */
791 	return EIO;					    /* I/O error */
792 
793     get_volume_label(vol->name, vol->plexes, vol->size, lp); /* get the label */
794 
795     /*
796      * Now write to disk.  This code is derived from the
797      * system writedisklabel (), which does silly things
798      * like reading the label and refusing to write
799      * unless it's already there.
800      */
801     bp = geteblk((int) lp->d_secsize);			    /* get a buffer */
802     dev = make_adhoc_dev(&vinum_ops, vol->volno);
803     bp->b_bio1.bio_offset = (off_t)LABELSECTOR * lp->d_secsize;
804     bp->b_bcount = lp->d_secsize;
805     bzero(bp->b_data, lp->d_secsize);
806     dlp = (struct disklabel *) bp->b_data;
807     *dlp = *lp;
808     bp->b_flags &= ~B_INVAL;
809     bp->b_cmd = BUF_CMD_WRITE;
810 
811     /*
812      * This should read:
813      *
814      *       vinumstrategy (bp);
815      *
816      * Negotiate with phk to get it fixed.
817      */
818     dev_dstrategy(dev, &bp->b_bio1);
819     error = biowait(bp);
820     bp->b_flags |= B_INVAL | B_AGE;
821     bp->b_flags &= ~B_ERROR;
822     brelse(bp);
823     return error;
824 }
825 
826 /* Look at all disks on the system for vinum slices */
827 int
828 vinum_scandisk(char *devicename[], int drives)
829 {
830     struct drive *volatile drive;
831     volatile int driveno;
832     int firstdrive;					    /* first drive in this list */
833     volatile int gooddrives;				    /* number of usable drives found */
834     int firsttime;					    /* set if we have never configured before */
835     int error;
836     char *config_text;					    /* read the config info from disk into here */
837     char *volatile cptr;				    /* pointer into config information */
838     char *eptr;						    /* end pointer into config information */
839     char *config_line;					    /* copy the config line to */
840     volatile int status;
841     int *volatile drivelist;				    /* list of drive indices */
842 #define DRIVENAMELEN 64
843 #define DRIVEPARTS   35					    /* max partitions per drive, excluding c */
844     char partname[DRIVENAMELEN];			    /* for creating partition names */
845 
846     status = 0;						    /* success indication */
847     vinum_conf.flags |= VF_READING_CONFIG;		    /* reading config from disk */
848 
849     gooddrives = 0;					    /* number of usable drives found */
850     firstdrive = vinum_conf.drives_used;		    /* the first drive */
851     firsttime = vinum_conf.drives_used == 0;		    /* are we a virgin? */
852 
853     /* allocate a drive pointer list */
854     drivelist = (int *) Malloc(drives * DRIVEPARTS * sizeof(int));
855     CHECKALLOC(drivelist, "Can't allocate memory");
856 
857     /* Open all drives and find which was modified most recently */
858     for (driveno = 0; driveno < drives; driveno++) {
859 	char part;					    /* UNIX partition */
860 	int slice;
861 	int founddrive;					    /* flag when we find a vinum drive */
862 	int has_slice = 0;
863 	int has_part = 0;
864 	char *tmp;
865 
866 	founddrive = 0;					    /* no vinum drive found yet on this spindle */
867 
868 	/*
869 	 * If the device path contains a slice we do not try to tack on
870 	 * another slice.  If the device path has a partition we only check
871 	 * that partition.
872 	 */
873 	if ((tmp = rindex(devicename[driveno], '/')) == NULL)
874 	    tmp = devicename[driveno];
875 	while (*tmp && (*tmp < '0' || *tmp > '9'))
876 	    ++tmp;
877 	while (*tmp && *tmp >= '0' && *tmp <= '9')
878 	    ++tmp;
879 	if (*tmp == 's')
880 	    has_slice = strtol(tmp + 1, &tmp, 0);
881 	if (*tmp >= 'a' && *tmp <= 'p')
882 	    has_part = *tmp;
883 
884 	/*
885 	 * Scan slices if no slice was specified, only if no partition was
886 	 * specified.
887 	 */
888 	if (has_slice == 0 && has_part == 0)
889 	for (slice = 1; slice < 5; slice++) {
890 	    if (has_slice && slice != has_slice)
891 		continue;
892 
893 	    for (part = 'a'; part <= 'p'; part++) {
894 		if (has_part && part != has_part)
895 		    continue;
896 		if (part == 'c')
897 		    continue;
898 		ksnprintf(partname, DRIVENAMELEN,
899 			"%ss%d%c", devicename[driveno], slice, part);
900 		drive = check_drive(partname);	    /* try to open it */
901 		if ((drive->lasterror != 0)		    /* didn't work, */
902 		    ||(drive->state != drive_up))
903 		    free_drive(drive);		    /* get rid of it */
904 		else if (drive->flags & VF_CONFIGURED)  /* already read this config, */
905 		    log(LOG_WARNING,
906 			"vinum: already read config from %s\n", /* say so */
907 			drive->label.name);
908 		else {
909 		    drivelist[gooddrives] = drive->driveno;	/* keep the drive index */
910 		    drive->flags &= ~VF_NEWBORN;	    /* which is no longer newly born */
911 		    gooddrives++;
912 		    founddrive++;
913 		}
914 	    }
915 	}
916 	if (founddrive == 0 && has_slice == 0) {	    /* didn't find anything, */
917 	    for (part = 'a'; part <= 'p'; part++) {	    /* try the compatibility partition */
918 		if (has_part && has_part != part)
919 		    continue;
920 		if (part == 'c')
921 		    continue;
922 		if (has_part) {
923 		    ksnprintf(partname, DRIVENAMELEN,
924 			    "%s", devicename[driveno]);
925 		} else {
926 		    ksnprintf(partname, DRIVENAMELEN,
927 			    "%s%c", devicename[driveno], part);
928 		}
929 		drive = check_drive(partname);	    /* try to open it */
930 		if ((drive->lasterror != 0)		    /* didn't work, */
931 		||(drive->state != drive_up))
932 		    free_drive(drive);		    /* get rid of it */
933 		else if (drive->flags & VF_CONFIGURED)  /* already read this config, */
934 		    log(LOG_WARNING,
935 			"vinum: already read config from %s\n", /* say so */
936 			drive->label.name);
937 		else {
938 		    drivelist[gooddrives] = drive->driveno;	/* keep the drive index */
939 		    drive->flags &= ~VF_NEWBORN;	    /* which is no longer newly born */
940 		    gooddrives++;
941 		}
942 	    }
943 	}
944     }
945 
946     if (gooddrives == 0) {
947 	if (firsttime)
948 	    log(LOG_WARNING, "vinum: no drives found\n");
949 	else
950 	    log(LOG_INFO, "vinum: no additional drives found\n");
951 	return ENOENT;
952     }
953     /*
954      * We now have at least one drive
955      * open.  Sort them in order of config time
956      * and merge the config info with what we
957      * have already.
958      */
959     kqsort(drivelist, gooddrives, sizeof(int), drivecmp);
960     config_text = (char *) Malloc(MAXCONFIG * 2);	    /* allocate buffers */
961     CHECKALLOC(config_text, "Can't allocate memory");
962     config_line = (char *) Malloc(MAXCONFIGLINE * 2);	    /* allocate buffers */
963     CHECKALLOC(config_line, "Can't allocate memory");
964     for (driveno = 0; driveno < gooddrives; driveno++) {    /* now include the config */
965 	drive = &DRIVE[drivelist[driveno]];		    /* point to the drive */
966 
967 	if (firsttime && (driveno == 0))		    /* we've never configured before, */
968 	    log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
969 	else
970 	    log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
971 
972 	if (drive->state == drive_up)
973 	    /* Read in both copies of the configuration information */
974 	    error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
975 	else {
976 	    error = EIO;
977 	    kprintf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state));
978 	}
979 
980 	if (error != 0) {
981 	    log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
982 	    free_drive(drive);				    /* give it back */
983 	    status = error;
984 	}
985 	/*
986 	 * At this point, check that the two copies
987 	 * are the same, and do something useful if
988 	 * not.  In particular, consider which is
989 	 * newer, and what this means for the
990 	 * integrity of the data on the drive.
991 	 */
992 	else {
993 	    vinum_conf.drives_used++;			    /* another drive in use */
994 	    /* Parse the configuration, and add it to the global configuration */
995 	    for (cptr = config_text; *cptr != '\0';) {	    /* love this style(9) */
996 		volatile int parse_status;		    /* return value from parse_config */
997 
998 		for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
999 		    *eptr++ = *cptr++;
1000 		*eptr = '\0';				    /* and delimit */
1001 		if (setjmp(command_fail) == 0) {	    /* come back here on error and continue */
1002 		    parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
1003 		    if (parse_status < 0) {		    /* error in config */
1004 			/*
1005 			   * This config should have been parsed in user
1006 			   * space.  If we run into problems here, something
1007 			   * serious is afoot.  Complain and let the user
1008 			   * snarf the config to see what's wrong.
1009 			 */
1010 			log(LOG_ERR,
1011 			    "vinum: Config error on %s, aborting integration\n",
1012 			    drive->devicename);
1013 			free_drive(drive);		    /* give it back */
1014 			status = EINVAL;
1015 		    }
1016 		}
1017 		while (*cptr == '\n')
1018 		    cptr++;				    /* skip to next line */
1019 	    }
1020 	}
1021 	drive->flags |= VF_CONFIGURED;			    /* read this drive's configuration */
1022     }
1023 
1024     Free(config_line);
1025     Free(config_text);
1026     Free(drivelist);
1027     vinum_conf.flags &= ~VF_READING_CONFIG;		    /* no longer reading from disk */
1028     if (status != 0)
1029 	kprintf("vinum: couldn't read configuration");
1030     else
1031 	updateconfig(VF_READING_CONFIG);		    /* update from disk config */
1032     return status;
1033 }
1034 
1035 /*
1036  * Compare the modification dates of the drives, for qsort.
1037  * Return 1 if a < b, 0 if a == b, 01 if a > b: in other
1038  * words, sort backwards.
1039  */
1040 int
1041 drivecmp(const void *va, const void *vb)
1042 {
1043     const struct drive *a = &DRIVE[*(const int *) va];
1044     const struct drive *b = &DRIVE[*(const int *) vb];
1045 
1046     if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
1047 	&& (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
1048 	return 0;
1049     else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
1050 	    || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
1051 	    && (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
1052 	return -1;
1053     else
1054 	return 1;
1055 }
1056 /* Local Variables: */
1057 /* fill-column: 50 */
1058 /* End: */
1059