xref: /dragonfly/sys/dev/raid/vinum/vinumio.c (revision 1de703da)
1 /*-
2  * Copyright (c) 1997, 1998
3  *	Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  This software is distributed under the so-called ``Berkeley
6  *  License'':
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by Nan Yang Computer
19  *      Services Limited.
20  * 4. Neither the name of the Company nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * This software is provided ``as is'', and any express or implied
25  * warranties, including, but not limited to, the implied warranties of
26  * merchantability and fitness for a particular purpose are disclaimed.
27  * In no event shall the company or contributors be liable for any
28  * direct, indirect, incidental, special, exemplary, or consequential
29  * damages (including, but not limited to, procurement of substitute
30  * goods or services; loss of use, data, or profits; or business
31  * interruption) however caused and on any theory of liability, whether
32  * in contract, strict liability, or tort (including negligence or
33  * otherwise) arising in any way out of the use of this software, even if
34  * advised of the possibility of such damage.
35  *
36  * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $
37  * $FreeBSD: src/sys/dev/vinum/vinumio.c,v 1.52.2.6 2002/05/02 08:43:44 grog Exp $
38  * $DragonFly: src/sys/dev/raid/vinum/vinumio.c,v 1.2 2003/06/17 04:28:33 dillon Exp $
39  */
40 
41 #include <dev/vinum/vinumhdr.h>
42 #include <dev/vinum/request.h>
43 #include <vm/vm_zone.h>
44 
45 static char *sappend(char *txt, char *s);
46 static int drivecmp(const void *va, const void *vb);
47 
48 /*
49  * Open the device associated with the drive, and set drive's vp.
50  * Return an error number
51  */
52 int
53 open_drive(struct drive *drive, struct proc *p, int verbose)
54 {
55     int devmajor;					    /* major devs for disk device */
56     int devminor;					    /* minor devs for disk device */
57     int unit;
58     char *dname;
59     struct cdevsw *dsw;					    /* pointer to cdevsw entry */
60 
61     if (bcmp(drive->devicename, "/dev/", 5))		    /* device name doesn't start with /dev */
62 	return ENOENT;					    /* give up */
63     if (drive->flags & VF_OPEN)				    /* open already, */
64 	return EBUSY;					    /* don't do it again */
65 
66     /*
67      * Yes, Bruce, I know this is horrible, but we
68      * don't have a root file system when we first
69      * try to do this.  If you can come up with a
70      * better solution, I'd really like it.  I'm
71      * just putting it in now to add ammuntion to
72      * moving the system to devfs.
73      */
74     dname = &drive->devicename[5];
75     drive->dev = NULL;					    /* no device yet */
76 
77     /* Find the device */
78     if (bcmp(dname, "ad", 2) == 0)			    /* IDE disk */
79 	devmajor = 116;
80     else if (bcmp(dname, "wd", 2) == 0)			    /* IDE disk */
81 	devmajor = 3;
82     else if (bcmp(dname, "da", 2) == 0)
83 	devmajor = 13;
84     else if (bcmp(dname, "vn", 2) == 0)
85 	devmajor = 43;
86     else if (bcmp(dname, "md", 2) == 0)
87 	devmajor = 95;
88     else if (bcmp(dname, "amrd", 4) == 0) {
89 	devmajor = 133;
90 	dname += 2;
91     } else if (bcmp(dname, "mlxd", 4) == 0) {
92 	devmajor = 131;
93 	dname += 2;
94     } else if (bcmp(dname, "idad", 4) == 0) {
95 	devmajor = 109;
96 	dname += 2;
97     } else if (bcmp(dname, "twed", 4) == 0) {               /* 3ware raid */
98       devmajor = 147;
99       dname += 2;
100     } else
101 	return ENODEV;
102     dname += 2;						    /* point past */
103 
104     /*
105      * Found the device.  We can expect one of
106      * two formats for the rest: a unit number,
107      * then either a partition letter for the
108      * compatiblity partition (e.g. h) or a
109      * slice ID and partition (e.g. s2e).
110      * Create a minor number for each of them.
111      */
112     unit = 0;
113     while ((*dname >= '0')				    /* unit number */
114     &&(*dname <= '9')) {
115 	unit = unit * 10 + *dname - '0';
116 	dname++;
117     }
118 
119     if (*dname == 's') {				    /* slice */
120 	if (((dname[1] < '1') || (dname[1] > '4'))	    /* invalid slice */
121 	||((dname[2] < 'a') || (dname[2] > 'h')))	    /* or invalid partition */
122 	    return ENODEV;
123 	devminor = ((unit & 31) << 3)			    /* unit */
124 	+(dname[2] - 'a')				    /* partition */
125 	+((dname[1] - '0' + 1) << 16)			    /* slice */
126 	+((unit & ~31) << 16);				    /* high-order unit bits */
127     } else {						    /* compatibility partition */
128 	if ((*dname < 'a') || (*dname > 'h'))		    /* or invalid partition */
129 	    return ENODEV;
130 	devminor = (*dname - 'a')			    /* partition */
131 	+((unit & 31) << 3)				    /* unit */
132 	+((unit & ~31) << 16);				    /* high-order unit bits */
133     }
134 
135     if ((devminor & 7) == 2)				    /* partition c */
136 	return ENOTTY;					    /* not buying that */
137 
138     drive->dev = makedev(devmajor, devminor);		    /* find the device */
139     if (drive->dev == NULL)				    /* didn't find anything */
140 	return ENODEV;
141 
142     drive->dev->si_iosize_max = DFLTPHYS;
143     dsw = devsw(drive->dev);
144     if (dsw == NULL)
145 	drive->lasterror = ENOENT;
146     else
147 	drive->lasterror = (dsw->d_open) (drive->dev, FWRITE, 0, NULL);
148 
149     if (drive->lasterror != 0) {			    /* failed */
150 	drive->state = drive_down;			    /* just force it down */
151 	if (verbose)
152 	    log(LOG_WARNING,
153 		"vinum open_drive %s: failed with error %d\n",
154 		drive->devicename, drive->lasterror);
155     } else
156 	drive->flags |= VF_OPEN;			    /* we're open now */
157 
158     return drive->lasterror;
159 }
160 
161 /*
162  * Set some variables in the drive struct
163  * in more convenient form.  Return error indication
164  */
165 int
166 set_drive_parms(struct drive *drive)
167 {
168     drive->blocksize = BLKDEV_IOSIZE;			    /* do we need this? */
169     drive->secsperblock = drive->blocksize		    /* number of sectors per block */
170 	/ drive->partinfo.disklab->d_secsize;
171 
172     /* Now update the label part */
173     bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
174     getmicrotime(&drive->label.date_of_birth);		    /* and current time */
175     drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
176     *((u_int64_t) drive->partinfo.disklab->d_secsize);
177 #if VINUMDEBUG
178     if (debug & DEBUG_BIGDRIVE)				    /* pretend we're 100 times as big */
179 	drive->label.drive_size *= 100;
180 #endif
181 
182     /* number of sectors available for subdisks */
183     drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
184 
185     /*
186      * Bug in 3.0 as of January 1998: you can open
187      * non-existent slices.  They have a length of 0.
188      */
189     if (drive->label.drive_size < MINVINUMSLICE) {	    /* too small to worry about */
190 	set_drive_state(drive->driveno, drive_down, setstate_force);
191 	drive->lasterror = ENOSPC;
192 	return ENOSPC;
193     }
194     drive->freelist_size = INITIAL_DRIVE_FREELIST;	    /* initial number of entries */
195     drive->freelist = (struct drive_freelist *)
196 	Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
197     if (drive->freelist == NULL)			    /* can't malloc, dammit */
198 	return ENOSPC;
199     drive->freelist_entries = 1;			    /* just (almost) the complete drive */
200     drive->freelist[0].offset = DATASTART;		    /* starts here */
201     drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
202     if (drive->label.name[0] != '\0')			    /* got a name */
203 	set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
204     else						    /* we know about it, but that's all */
205 	drive->state = drive_referenced;
206     return 0;
207 }
208 
209 /*
210  * Initialize a drive: open the device and add device
211  * information
212  */
213 int
214 init_drive(struct drive *drive, int verbose)
215 {
216     if (drive->devicename[0] != '/') {
217 	drive->lasterror = EINVAL;
218 	log(LOG_ERR, "vinum: Can't open drive without drive name\n");
219 	return EINVAL;
220     }
221     drive->lasterror = open_drive(drive, curproc, verbose); /* open the drive */
222     if (drive->lasterror)
223 	return drive->lasterror;
224 
225     drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
226 	DIOCGPART,
227 	(caddr_t) & drive->partinfo,
228 	FREAD,
229 	curproc);
230     if (drive->lasterror) {
231 	if (verbose)
232 	    log(LOG_WARNING,
233 		"vinum open_drive %s: Can't get partition information, drive->lasterror %d\n",
234 		drive->devicename,
235 		drive->lasterror);
236 	close_drive(drive);
237 	return drive->lasterror;
238     }
239     if (drive->partinfo.part->p_fstype != FS_VINUM) {	    /* not Vinum */
240 	drive->lasterror = EFTYPE;
241 	if (verbose)
242 	    log(LOG_WARNING,
243 		"vinum open_drive %s: Wrong partition type for vinum\n",
244 		drive->devicename);
245 	close_drive(drive);
246 	return EFTYPE;
247     }
248     return set_drive_parms(drive);			    /* set various odds and ends */
249 }
250 
251 /* Close a drive if it's open. */
252 void
253 close_drive(struct drive *drive)
254 {
255     LOCKDRIVE(drive);					    /* keep the daemon out */
256     if (drive->flags & VF_OPEN)
257 	close_locked_drive(drive);			    /* and close it */
258     if (drive->state > drive_down)			    /* if it's up */
259 	drive->state = drive_down;			    /* make sure it's down */
260     unlockdrive(drive);
261 }
262 
263 /*
264  * Real drive close code, called with drive already locked.
265  * We have also checked that the drive is open.  No errors.
266  */
267 void
268 close_locked_drive(struct drive *drive)
269 {
270     /*
271      * If we can't access the drive, we can't flush
272      * the queues, which spec_close() will try to
273      * do.  Get rid of them here first.
274      */
275     drive->lasterror = (*devsw(drive->dev)->d_close) (drive->dev, 0, 0, NULL);
276     drive->flags &= ~VF_OPEN;				    /* no longer open */
277 }
278 
279 /*
280  * Remove drive from the configuration.
281  * Caller must ensure that it isn't active.
282  */
283 void
284 remove_drive(int driveno)
285 {
286     struct drive *drive = &vinum_conf.drive[driveno];
287     struct vinum_hdr *vhdr;				    /* buffer for header */
288     int error;
289 
290     if (drive->state > drive_referenced) {		    /* real drive */
291 	if (drive->state == drive_up) {
292 	    vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	/* allocate buffer */
293 	    CHECKALLOC(vhdr, "Can't allocate memory");
294 	    error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
295 	    if (error)
296 		drive->lasterror = error;
297 	    else {
298 		vhdr->magic = VINUM_NOMAGIC;		    /* obliterate the magic, but leave the rest */
299 		write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
300 	    }
301 	    Free(vhdr);
302 	}
303 	free_drive(drive);				    /* close it and free resources */
304 	save_config();					    /* and save the updated configuration */
305     }
306 }
307 
308 /*
309  * Transfer drive data.  Usually called from one of these defines;
310  * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
311  * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
312  *
313  * length and offset are in bytes, but must be multiples of sector
314  * size.  The function *does not check* for this condition, and
315  * truncates ruthlessly.
316  * Return error number
317  */
318 int
319 driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
320 {
321     int error;
322     struct buf *bp;
323 
324     error = 0;						    /* to keep the compiler happy */
325     while (length) {					    /* divide into small enough blocks */
326 	int len = min(length, MAXBSIZE);		    /* maximum block device transfer is MAXBSIZE */
327 
328 	bp = geteblk(len);				    /* get a buffer header */
329 	bp->b_flags = flag;
330 	bp->b_dev = drive->dev;				    /* device */
331 	bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
332 	bp->b_saveaddr = bp->b_data;
333 	bp->b_data = buf;
334 	bp->b_bcount = len;
335 	BUF_STRATEGY(bp, 0);				    /* initiate the transfer */
336 	error = biowait(bp);
337 	bp->b_data = bp->b_saveaddr;
338 	bp->b_flags |= B_INVAL | B_AGE;
339 	bp->b_flags &= ~B_ERROR;
340 	brelse(bp);
341 	if (error)
342 	    break;
343 	length -= len;					    /* update pointers */
344 	buf += len;
345 	offset += len;
346     }
347     return error;
348 }
349 
350 /*
351  * Check a drive for a vinum header.  If found,
352  * update the drive information.  We come here
353  * with a partially populated drive structure
354  * which includes the device name.
355  *
356  * Return information on what we found.
357  *
358  * This function is called from two places: check_drive,
359  * which wants to find out whether the drive is a
360  * Vinum drive, and config_drive, which asserts that
361  * it is a vinum drive.  In the first case, we don't
362  * print error messages (verbose==0), in the second
363  * we do (verbose==1).
364  */
365 enum drive_label_info
366 read_drive_label(struct drive *drive, int verbose)
367 {
368     int error;
369     int result;						    /* result of our search */
370     struct vinum_hdr *vhdr;				    /* and as header */
371 
372     error = init_drive(drive, 0);			    /* find the drive */
373     if (error)						    /* find the drive */
374 	return DL_CANT_OPEN;				    /* not ours */
375 
376     vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	    /* allocate buffers */
377     CHECKALLOC(vhdr, "Can't allocate memory");
378 
379     drive->state = drive_up;				    /* be optimistic */
380     error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
381     if (vhdr->magic == VINUM_MAGIC) {			    /* ours! */
382 	if (drive->label.name[0]			    /* we have a name for this drive */
383 	&&(strcmp(drive->label.name, vhdr->label.name))) {  /* but it doesn't match the real name */
384 	    drive->lasterror = EINVAL;
385 	    result = DL_WRONG_DRIVE;			    /* it's the wrong drive */
386 	    drive->state = drive_unallocated;		    /* put it back, it's not ours */
387 	} else
388 	    result = DL_OURS;
389 	/*
390 	 * We copy the drive anyway so that we have
391 	 * the correct name in the drive info.  This
392 	 * may not be the name specified
393 	 */
394 	drive->label = vhdr->label;			    /* put in the label information */
395     } else if (vhdr->magic == VINUM_NOMAGIC)		    /* was ours, but we gave it away */
396 	result = DL_DELETED_LABEL;			    /* and return the info */
397     else
398 	result = DL_NOT_OURS;				    /* we could have it, but we don't yet */
399     Free(vhdr);						    /* that's all. */
400     return result;
401 }
402 
403 /*
404  * Check a drive for a vinum header.  If found,
405  * read configuration information from the drive and
406  * incorporate the data into the configuration.
407  *
408  * Return drive number.
409  */
410 struct drive *
411 check_drive(char *devicename)
412 {
413     int driveno;
414     int i;
415     struct drive *drive;
416 
417     driveno = find_drive_by_dev(devicename, 1);		    /* if entry doesn't exist, create it */
418     drive = &vinum_conf.drive[driveno];			    /* and get a pointer */
419 
420     if (read_drive_label(drive, 0) == DL_OURS) {	    /* one of ours */
421 	for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
422 	    if ((i != driveno)				    /* not this drive */
423 	    &&(DRIVE[i].state != drive_unallocated)	    /* and it's allocated */
424 	    &&(strcmp(DRIVE[i].label.name,
425 			DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
426 		struct drive *mydrive = &DRIVE[i];
427 
428 		if (mydrive->devicename[0] == '/') {	    /* we know a device name for it */
429 		    /*
430 		     * set an error, but don't take the
431 		     * drive down: that would cause unneeded
432 		     * error messages.
433 		     */
434 		    drive->lasterror = EEXIST;
435 		    break;
436 		} else {				    /* it's just a place holder, */
437 		    int sdno;
438 
439 		    for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
440 			if ((SD[sdno].driveno == i)	    /* it's pointing to this one, */
441 			&&(SD[sdno].state != sd_unallocated)) {	/* and it's a real subdisk */
442 			    SD[sdno].driveno = drive->driveno; /* point to the one we found */
443 			    update_sd_state(sdno);	    /* and update its state */
444 			}
445 		    }
446 		    bzero(mydrive, sizeof(struct drive));   /* don't deallocate it, just remove it */
447 		}
448 	    }
449 	}
450     } else {
451 	if (drive->lasterror == 0)
452 	    drive->lasterror = ENODEV;
453 	close_drive(drive);
454 	drive->state = drive_down;
455     }
456     return drive;
457 }
458 
459 static char *
460 sappend(char *txt, char *s)
461 {
462     while ((*s++ = *txt++) != 0);
463     return s - 1;
464 }
465 
466 void
467 format_config(char *config, int len)
468 {
469     int i;
470     int j;
471     char *s = config;
472     char *configend = &config[len];
473 
474     bzero(config, len);
475 
476     /* First write the volume configuration */
477     for (i = 0; i < vinum_conf.volumes_allocated; i++) {
478 	struct volume *vol;
479 
480 	vol = &vinum_conf.volume[i];
481 	if ((vol->state > volume_uninit)
482 	    && (vol->name[0] != '\0')) {		    /* paranoia */
483 	    snprintf(s,
484 		configend - s,
485 		"volume %s state %s",
486 		vol->name,
487 		volume_state(vol->state));
488 	    while (*s)
489 		s++;					    /* find the end */
490 	    if (vol->preferred_plex >= 0)		    /* preferences, */
491 		snprintf(s,
492 		    configend - s,
493 		    " readpol prefer %s",
494 		    vinum_conf.plex[vol->preferred_plex].name);
495 	    while (*s)
496 		s++;					    /* find the end */
497 	    s = sappend("\n", s);
498 	}
499     }
500 
501     /* Then the plex configuration */
502     for (i = 0; i < vinum_conf.plexes_allocated; i++) {
503 	struct plex *plex;
504 
505 	plex = &vinum_conf.plex[i];
506 	if ((plex->state > plex_referenced)
507 	    && (plex->name[0] != '\0')) {		    /* paranoia */
508 	    snprintf(s,
509 		configend - s,
510 		"plex name %s state %s org %s ",
511 		plex->name,
512 		plex_state(plex->state),
513 		plex_org(plex->organization));
514 	    while (*s)
515 		s++;					    /* find the end */
516 	    if (isstriped(plex)) {
517 		snprintf(s,
518 		    configend - s,
519 		    "%ds ",
520 		    (int) plex->stripesize);
521 		while (*s)
522 		    s++;				    /* find the end */
523 	    }
524 	    if (plex->volno >= 0)			    /* we have a volume */
525 		snprintf(s,
526 		    configend - s,
527 		    "vol %s ",
528 		    vinum_conf.volume[plex->volno].name);
529 	    while (*s)
530 		s++;					    /* find the end */
531 	    for (j = 0; j < plex->subdisks; j++) {
532 		snprintf(s,
533 		    configend - s,
534 		    " sd %s",
535 		    vinum_conf.sd[plex->sdnos[j]].name);
536 	    }
537 	    s = sappend("\n", s);
538 	}
539     }
540 
541     /* And finally the subdisk configuration */
542     for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
543 	struct sd *sd;
544 	char *drivename;
545 
546 	sd = &SD[i];
547 	if ((sd->state != sd_referenced)
548 	    && (sd->state != sd_unallocated)
549 	    && (sd->name[0] != '\0')) {			    /* paranoia */
550 	    drivename = vinum_conf.drive[sd->driveno].label.name;
551 	    /*
552 	     * XXX We've seen cases of dead subdisks
553 	     * which don't have a drive.  If we let them
554 	     * through here, the drive name is null, so
555 	     * they get the drive named 'plex'.
556 	     *
557 	     * This is a breakage limiter, not a fix.
558 	     */
559 	    if (drivename[0] == '\0')
560 		drivename = "*invalid*";
561 	    snprintf(s,
562 		configend - s,
563 		"sd name %s drive %s plex %s len %llus driveoffset %llus state %s",
564 		sd->name,
565 		drivename,
566 		vinum_conf.plex[sd->plexno].name,
567 		(unsigned long long) sd->sectors,
568 		(unsigned long long) sd->driveoffset,
569 		sd_state(sd->state));
570 	    while (*s)
571 		s++;					    /* find the end */
572 	    if (sd->plexno >= 0)
573 		snprintf(s,
574 		    configend - s,
575 		    " plexoffset %llds",
576 		    (long long) sd->plexoffset);
577 	    else
578 		snprintf(s, configend - s, " detached");
579 	    while (*s)
580 		s++;					    /* find the end */
581 	    if (sd->flags & VF_RETRYERRORS) {
582 		snprintf(s, configend - s, " retryerrors");
583 		while (*s)
584 		    s++;				    /* find the end */
585 	    }
586 	    snprintf(s, configend - s, " \n");
587 	    while (*s)
588 		s++;					    /* find the end */
589 	}
590     }
591     if (s > &config[len - 2])
592 	panic("vinum: configuration data overflow");
593 }
594 
595 /*
596  * issue a save config request to the d�mon.  The actual work
597  * is done in process context by daemon_save_config
598  */
599 void
600 save_config(void)
601 {
602     queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) NULL);
603 }
604 
605 /*
606  * Write the configuration to all vinum slices.  This
607  * is performed by the d�mon only
608  */
609 void
610 daemon_save_config(void)
611 {
612     int error;
613     int written_config;					    /* set when we first write the config to disk */
614     int driveno;
615     struct drive *drive;				    /* point to current drive info */
616     struct vinum_hdr *vhdr;				    /* and as header */
617     char *config;					    /* point to config data */
618     int wlabel_on;					    /* to set writing label on/off */
619 
620     /* don't save the configuration while we're still working on it */
621     if (vinum_conf.flags & VF_CONFIGURING)
622 	return;
623     written_config = 0;					    /* no config written yet */
624     /* Build a volume header */
625     vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	    /* get space for the config data */
626     CHECKALLOC(vhdr, "Can't allocate config data");
627     vhdr->magic = VINUM_MAGIC;				    /* magic number */
628     vhdr->config_length = MAXCONFIG;			    /* length of following config info */
629 
630     config = Malloc(MAXCONFIG);				    /* get space for the config data */
631     CHECKALLOC(config, "Can't allocate config data");
632 
633     format_config(config, MAXCONFIG);
634     error = 0;						    /* no errors yet */
635     for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
636 	drive = &vinum_conf.drive[driveno];		    /* point to drive */
637 	if (drive->state > drive_referenced) {
638 	    LOCKDRIVE(drive);				    /* don't let it change */
639 
640 	    /*
641 	     * First, do some drive consistency checks.  Some
642 	     * of these are kludges, others require a process
643 	     * context and couldn't be done before
644 	     */
645 	    if ((drive->devicename[0] == '\0')
646 		|| (drive->label.name[0] == '\0')) {
647 		unlockdrive(drive);
648 		free_drive(drive);			    /* get rid of it */
649 		break;
650 	    }
651 	    if (((drive->flags & VF_OPEN) == 0)		    /* drive not open */
652 	    &&(drive->state > drive_down)) {		    /* and it thinks it's not down */
653 		unlockdrive(drive);
654 		set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
655 		continue;
656 	    }
657 	    if ((drive->state == drive_down)		    /* it's down */
658 	    &&(drive->flags & VF_OPEN)) {		    /* but open, */
659 		unlockdrive(drive);
660 		close_drive(drive);			    /* close it */
661 	    } else if (drive->state > drive_down) {
662 		getmicrotime(&drive->label.last_update);    /* time of last update is now */
663 		bcopy((char *) &drive->label,		    /* and the label info from the drive structure */
664 		    (char *) &vhdr->label,
665 		    sizeof(vhdr->label));
666 		if ((drive->state != drive_unallocated)
667 		    && (drive->state != drive_referenced)) { /* and it's a real drive */
668 		    wlabel_on = 1;			    /* enable writing the label */
669 		    error = (*devsw(drive->dev)->d_ioctl) (drive->dev, /* make the label writeable */
670 			DIOCWLABEL,
671 			(caddr_t) & wlabel_on,
672 			FWRITE,
673 			curproc);
674 		    if (error == 0)
675 			error = write_drive(drive, (char *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
676 		    if (error == 0)
677 			error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET); /* first config copy */
678 		    if (error == 0)
679 			error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET + MAXCONFIG);	/* second copy */
680 		    wlabel_on = 0;			    /* enable writing the label */
681 		    if (error == 0)
682 			error = (*devsw(drive->dev)->d_ioctl) (drive->dev, /* make the label non-writeable again */
683 			    DIOCWLABEL,
684 			    (caddr_t) & wlabel_on,
685 			    FWRITE,
686 			    curproc);
687 		    unlockdrive(drive);
688 		    if (error) {
689 			log(LOG_ERR,
690 			    "vinum: Can't write config to %s, error %d\n",
691 			    drive->devicename,
692 			    error);
693 			set_drive_state(drive->driveno, drive_down, setstate_force);
694 		    } else
695 			written_config = 1;		    /* we've written it on at least one drive */
696 		}
697 	    } else					    /* not worth looking at, */
698 		unlockdrive(drive);			    /* just unlock it again */
699 	}
700     }
701     Free(vhdr);
702     Free(config);
703 }
704 
705 /*
706  * Disk labels are a mess.  The correct way to
707  * access them is with the DIOC[GSW]DINFO ioctls,
708  * but some programs, such as newfs, access the
709  * disk directly, so we have to write things
710  * there.  We do this only on request.  If a user
711  * request tries to read it directly, we fake up
712  * one on the fly.
713  */
714 
715 /*
716  * get_volume_label returns a label structure to lp, which
717  * is allocated by the caller
718  */
719 void
720 get_volume_label(char *name, int plexes, u_int64_t size, struct disklabel *lp)
721 {
722     bzero(lp, sizeof(struct disklabel));
723 
724     strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
725     lp->d_type = DTYPE_VINUM;
726     strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
727     lp->d_rpm = 14400 * plexes;				    /* to keep them guessing */
728     lp->d_interleave = 1;
729     lp->d_flags = 0;
730 
731     /*
732      * A Vinum volume has a single track with all
733      * its sectors.
734      */
735     lp->d_secsize = DEV_BSIZE;				    /* bytes per sector */
736     lp->d_nsectors = size;				    /* data sectors per track */
737     lp->d_ntracks = 1;					    /* tracks per cylinder */
738     lp->d_ncylinders = 1;				    /* data cylinders per unit */
739     lp->d_secpercyl = size;				    /* data sectors per cylinder */
740     lp->d_secperunit = size;				    /* data sectors per unit */
741 
742     lp->d_bbsize = BBSIZE;
743     lp->d_sbsize = SBSIZE;
744 
745     lp->d_magic = DISKMAGIC;
746     lp->d_magic2 = DISKMAGIC;
747 
748     /*
749      * Set up partitions a, b and c to be identical
750      * and the size of the volume.  a is UFS, b is
751      * swap, c is nothing.
752      */
753     lp->d_partitions[0].p_size = size;
754     lp->d_partitions[0].p_fsize = 1024;
755     lp->d_partitions[0].p_fstype = FS_BSDFFS;		    /* FreeBSD File System :-) */
756     lp->d_partitions[0].p_fsize = 1024;			    /* FS fragment size */
757     lp->d_partitions[0].p_frag = 8;			    /* and fragments per block */
758     lp->d_partitions[SWAP_PART].p_size = size;
759     lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP;	    /* swap partition */
760     lp->d_partitions[LABEL_PART].p_size = size;
761     lp->d_npartitions = LABEL_PART + 1;
762     strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
763     lp->d_checksum = dkcksum(lp);
764 }
765 
766 /* Write a volume label.  This implements the VINUM_LABEL ioctl. */
767 int
768 write_volume_label(int volno)
769 {
770     struct disklabel *lp;
771     struct buf *bp;
772     struct disklabel *dlp;
773     struct volume *vol;
774     int error;
775 
776     lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
777     if (lp == 0)
778 	return ENOMEM;
779 
780     if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_allocated) /* invalid volume */
781 	return ENOENT;
782 
783     vol = &VOL[volno];					    /* volume in question */
784     if (vol->state <= volume_uninit)			    /* nothing there */
785 	return ENXIO;
786     else if (vol->state < volume_up)			    /* not accessible */
787 	return EIO;					    /* I/O error */
788 
789     get_volume_label(vol->name, vol->plexes, vol->size, lp); /* get the label */
790 
791     /*
792      * Now write to disk.  This code is derived from the
793      * system writedisklabel (), which does silly things
794      * like reading the label and refusing to write
795      * unless it's already there.
796      */
797     bp = geteblk((int) lp->d_secsize);			    /* get a buffer */
798     bp->b_dev = makedev(VINUM_CDEV_MAJOR, vol->volno);	    /* our own raw volume */
799     bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
800     bp->b_bcount = lp->d_secsize;
801     bzero(bp->b_data, lp->d_secsize);
802     dlp = (struct disklabel *) bp->b_data;
803     *dlp = *lp;
804     bp->b_flags &= ~B_INVAL;
805     bp->b_flags |= B_WRITE;
806 
807     /*
808      * This should read:
809      *
810      *       vinumstrategy (bp);
811      *
812      * Negotiate with phk to get it fixed.
813      */
814     BUF_STRATEGY(bp, 0);
815     error = biowait(bp);
816     bp->b_flags |= B_INVAL | B_AGE;
817     bp->b_flags &= ~B_ERROR;
818 
819     brelse(bp);
820     return error;
821 }
822 
823 /* Look at all disks on the system for vinum slices */
824 int
825 vinum_scandisk(char *devicename[], int drives)
826 {
827     struct drive *volatile drive;
828     volatile int driveno;
829     int firstdrive;					    /* first drive in this list */
830     volatile int gooddrives;				    /* number of usable drives found */
831     int firsttime;					    /* set if we have never configured before */
832     int error;
833     char *config_text;					    /* read the config info from disk into here */
834     char *volatile cptr;				    /* pointer into config information */
835     char *eptr;						    /* end pointer into config information */
836     char *config_line;					    /* copy the config line to */
837     volatile int status;
838     int *volatile drivelist;				    /* list of drive indices */
839 #define DRIVENAMELEN 64
840 #define DRIVEPARTS   35					    /* max partitions per drive, excluding c */
841     char partname[DRIVENAMELEN];			    /* for creating partition names */
842 
843     status = 0;						    /* success indication */
844     vinum_conf.flags |= VF_READING_CONFIG;		    /* reading config from disk */
845 
846     gooddrives = 0;					    /* number of usable drives found */
847     firstdrive = vinum_conf.drives_used;		    /* the first drive */
848     firsttime = vinum_conf.drives_used == 0;		    /* are we a virgin? */
849 
850     /* allocate a drive pointer list */
851     drivelist = (int *) Malloc(drives * DRIVEPARTS * sizeof(int));
852     CHECKALLOC(drivelist, "Can't allocate memory");
853 
854     /* Open all drives and find which was modified most recently */
855     for (driveno = 0; driveno < drives; driveno++) {
856 	char part;					    /* UNIX partition */
857 	int slice;
858 	int founddrive;					    /* flag when we find a vinum drive */
859 
860 	founddrive = 0;					    /* no vinum drive found yet on this spindle */
861 	/* first try the partition table */
862 	for (slice = 1; slice < 5; slice++)
863 	    for (part = 'a'; part < 'i'; part++) {
864 		if (part != 'c') {			    /* don't do the c partition */
865 		    snprintf(partname,
866 			DRIVENAMELEN,
867 			"%ss%d%c",
868 			devicename[driveno],
869 			slice,
870 			part);
871 		    drive = check_drive(partname);	    /* try to open it */
872 		    if ((drive->lasterror != 0)		    /* didn't work, */
873 		    ||(drive->state != drive_up))
874 			free_drive(drive);		    /* get rid of it */
875 		    else if (drive->flags & VF_CONFIGURED)  /* already read this config, */
876 			log(LOG_WARNING,
877 			    "vinum: already read config from %s\n", /* say so */
878 			    drive->label.name);
879 		    else {
880 			drivelist[gooddrives] = drive->driveno;	/* keep the drive index */
881 			drive->flags &= ~VF_NEWBORN;	    /* which is no longer newly born */
882 			gooddrives++;
883 			founddrive++;
884 		    }
885 		}
886 	    }
887 	if (founddrive == 0) {				    /* didn't find anything, */
888 	    for (part = 'a'; part < 'i'; part++)	    /* try the compatibility partition */
889 		if (part != 'c') {			    /* don't do the c partition */
890 		    snprintf(partname,			    /* /dev/sd0a */
891 			DRIVENAMELEN,
892 			"%s%c",
893 			devicename[driveno],
894 			part);
895 		    drive = check_drive(partname);	    /* try to open it */
896 		    if ((drive->lasterror != 0)		    /* didn't work, */
897 		    ||(drive->state != drive_up))
898 			free_drive(drive);		    /* get rid of it */
899 		    else if (drive->flags & VF_CONFIGURED)  /* already read this config, */
900 			log(LOG_WARNING,
901 			    "vinum: already read config from %s\n", /* say so */
902 			    drive->label.name);
903 		    else {
904 			drivelist[gooddrives] = drive->driveno;	/* keep the drive index */
905 			drive->flags &= ~VF_NEWBORN;	    /* which is no longer newly born */
906 			gooddrives++;
907 		    }
908 		}
909 	}
910     }
911 
912     if (gooddrives == 0) {
913 	if (firsttime)
914 	    log(LOG_WARNING, "vinum: no drives found\n");
915 	else
916 	    log(LOG_INFO, "vinum: no additional drives found\n");
917 	return ENOENT;
918     }
919     /*
920      * We now have at least one drive
921      * open.  Sort them in order of config time
922      * and merge the config info with what we
923      * have already.
924      */
925     qsort(drivelist, gooddrives, sizeof(int), drivecmp);
926     config_text = (char *) Malloc(MAXCONFIG * 2);	    /* allocate buffers */
927     CHECKALLOC(config_text, "Can't allocate memory");
928     config_line = (char *) Malloc(MAXCONFIGLINE * 2);	    /* allocate buffers */
929     CHECKALLOC(config_line, "Can't allocate memory");
930     for (driveno = 0; driveno < gooddrives; driveno++) {    /* now include the config */
931 	drive = &DRIVE[drivelist[driveno]];		    /* point to the drive */
932 
933 	if (firsttime && (driveno == 0))		    /* we've never configured before, */
934 	    log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
935 	else
936 	    log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
937 
938 	if (drive->state == drive_up)
939 	    /* Read in both copies of the configuration information */
940 	    error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
941 	else {
942 	    error = EIO;
943 	    printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state));
944 	}
945 
946 	if (error != 0) {
947 	    log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
948 	    free_drive(drive);				    /* give it back */
949 	    status = error;
950 	}
951 	/*
952 	 * At this point, check that the two copies
953 	 * are the same, and do something useful if
954 	 * not.  In particular, consider which is
955 	 * newer, and what this means for the
956 	 * integrity of the data on the drive.
957 	 */
958 	else {
959 	    vinum_conf.drives_used++;			    /* another drive in use */
960 	    /* Parse the configuration, and add it to the global configuration */
961 	    for (cptr = config_text; *cptr != '\0';) {	    /* love this style(9) */
962 		volatile int parse_status;		    /* return value from parse_config */
963 
964 		for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
965 		    *eptr++ = *cptr++;
966 		*eptr = '\0';				    /* and delimit */
967 		if (setjmp(command_fail) == 0) {	    /* come back here on error and continue */
968 		    parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
969 		    if (parse_status < 0) {		    /* error in config */
970 			/*
971 			   * This config should have been parsed in user
972 			   * space.  If we run into problems here, something
973 			   * serious is afoot.  Complain and let the user
974 			   * snarf the config to see what's wrong.
975 			 */
976 			log(LOG_ERR,
977 			    "vinum: Config error on %s, aborting integration\n",
978 			    drive->devicename);
979 			free_drive(drive);		    /* give it back */
980 			status = EINVAL;
981 		    }
982 		}
983 		while (*cptr == '\n')
984 		    cptr++;				    /* skip to next line */
985 	    }
986 	}
987 	drive->flags |= VF_CONFIGURED;			    /* read this drive's configuration */
988     }
989 
990     Free(config_text);
991     Free(drivelist);
992     vinum_conf.flags &= ~VF_READING_CONFIG;		    /* no longer reading from disk */
993     if (status != 0)
994 	printf("vinum: couldn't read configuration");
995     else
996 	updateconfig(VF_READING_CONFIG);		    /* update from disk config */
997     return status;
998 }
999 
1000 /*
1001  * Compare the modification dates of the drives, for qsort.
1002  * Return 1 if a < b, 0 if a == b, 01 if a > b: in other
1003  * words, sort backwards.
1004  */
1005 int
1006 drivecmp(const void *va, const void *vb)
1007 {
1008     const struct drive *a = &DRIVE[*(const int *) va];
1009     const struct drive *b = &DRIVE[*(const int *) vb];
1010 
1011     if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
1012 	&& (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
1013 	return 0;
1014     else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
1015 	    || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
1016 	    && (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
1017 	return -1;
1018     else
1019 	return 1;
1020 }
1021 /* Local Variables: */
1022 /* fill-column: 50 */
1023 /* End: */
1024