xref: /dragonfly/sys/kern/subr_diskslice.c (revision 3d201fd0)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.12 2005/08/26 12:45:53 hmp Exp $
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/conf.h>
54 #include <sys/disklabel.h>
55 #include <sys/diskslice.h>
56 #include <sys/diskmbr.h>
57 #include <sys/fcntl.h>
58 #include <sys/malloc.h>
59 #include <sys/stat.h>
60 #include <sys/syslog.h>
61 #include <sys/vnode.h>
62 #include <sys/device.h>
63 #include <sys/thread2.h>
64 
65 #include <vfs/ufs/fs.h>
66 
67 #define TRACE(str)	do { if (ds_debug) printf str; } while (0)
68 
69 typedef	u_char	bool_t;
70 
71 static volatile bool_t ds_debug;
72 
73 static struct disklabel *clone_label (struct disklabel *lp);
74 static void dsiodone (struct buf *bp);
75 static char *fixlabel (char *sname, struct diskslice *sp,
76 			   struct disklabel *lp, int writeflag);
77 static void free_ds_label (struct diskslices *ssp, int slice);
78 static void partition_info (char *sname, int part, struct partition *pp);
79 static void slice_info (char *sname, struct diskslice *sp);
80 static void set_ds_label (struct diskslices *ssp, int slice,
81 			      struct disklabel *lp);
82 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
83 
84 /*
85  * Duplicate a label for the whole disk, and initialize defaults in the
86  * copy for fields that are not already initialized.  The caller only
87  * needs to initialize d_secsize and d_secperunit, and zero the fields
88  * that are to be defaulted.
89  */
90 static struct disklabel *
91 clone_label(struct disklabel *lp)
92 {
93 	struct disklabel *lp1;
94 
95 	lp1 = malloc(sizeof *lp1, M_DEVBUF, M_WAITOK);
96 	*lp1 = *lp;
97 	lp = NULL;
98 	if (lp1->d_typename[0] == '\0')
99 		strncpy(lp1->d_typename, "amnesiac", sizeof(lp1->d_typename));
100 	if (lp1->d_packname[0] == '\0')
101 		strncpy(lp1->d_packname, "fictitious", sizeof(lp1->d_packname));
102 	if (lp1->d_nsectors == 0)
103 		lp1->d_nsectors = 32;
104 	if (lp1->d_ntracks == 0)
105 		lp1->d_ntracks = 64;
106 	lp1->d_secpercyl = lp1->d_nsectors * lp1->d_ntracks;
107 	lp1->d_ncylinders = lp1->d_secperunit / lp1->d_secpercyl;
108 	if (lp1->d_rpm == 0)
109 		lp1->d_rpm = 3600;
110 	if (lp1->d_interleave == 0)
111 		lp1->d_interleave = 1;
112 	if (lp1->d_npartitions < RAW_PART + 1)
113 		lp1->d_npartitions = MAXPARTITIONS;
114 	if (lp1->d_bbsize == 0)
115 		lp1->d_bbsize = BBSIZE;
116 	if (lp1->d_sbsize == 0)
117 		lp1->d_sbsize = SBSIZE;
118 	lp1->d_partitions[RAW_PART].p_size = lp1->d_secperunit;
119 	lp1->d_magic = DISKMAGIC;
120 	lp1->d_magic2 = DISKMAGIC;
121 	lp1->d_checksum = dkcksum(lp1);
122 	return (lp1);
123 }
124 
125 /*
126  * Determine the size of the transfer, and make sure it is
127  * within the boundaries of the partition. Adjust transfer
128  * if needed, and signal errors or early completion.
129  *
130  * XXX TODO:
131  *	o Split buffers that are too big for the device.
132  *	o Check for overflow.
133  *	o Finish cleaning this up.
134  *
135  * This function returns 1 on success, 0 if transfer equates
136  * to EOF (end of disk) or -1 on failure.  The appropriate
137  * 'errno' value is also set in bp->b_error and bp->b_flags
138  * is marked with B_ERROR.
139  */
140 int
141 dscheck(struct buf *bp, struct diskslices *ssp)
142 {
143 	daddr_t	blkno;
144 	u_long	endsecno;
145 	daddr_t	labelsect;
146 	struct disklabel *lp;
147 	char *msg;
148 	long nsec;
149 	struct partition *pp;
150 	daddr_t	secno;
151 	daddr_t	slicerel_secno;
152 	struct diskslice *sp;
153 
154 	blkno = bp->b_blkno;
155 	if (blkno < 0) {
156 		printf("dscheck(%s): negative b_blkno %ld\n",
157 		    devtoname(bp->b_dev), (long)blkno);
158 		bp->b_error = EINVAL;
159 		goto bad;
160 	}
161 	sp = &ssp->dss_slices[dkslice(bp->b_dev)];
162 	lp = sp->ds_label;
163 	if (ssp->dss_secmult == 1) {
164 		if (bp->b_bcount % (u_long)DEV_BSIZE)
165 			goto bad_bcount;
166 		secno = blkno;
167 		nsec = bp->b_bcount >> DEV_BSHIFT;
168 	} else if (ssp->dss_secshift != -1) {
169 		if (bp->b_bcount & (ssp->dss_secsize - 1))
170 			goto bad_bcount;
171 		if (blkno & (ssp->dss_secmult - 1))
172 			goto bad_blkno;
173 		secno = blkno >> ssp->dss_secshift;
174 		nsec = bp->b_bcount >> (DEV_BSHIFT + ssp->dss_secshift);
175 	} else {
176 		if (bp->b_bcount % ssp->dss_secsize)
177 			goto bad_bcount;
178 		if (blkno % ssp->dss_secmult)
179 			goto bad_blkno;
180 		secno = blkno / ssp->dss_secmult;
181 		nsec = bp->b_bcount / ssp->dss_secsize;
182 	}
183 	if (lp == NULL) {
184 		labelsect = -LABELSECTOR - 1;
185 		endsecno = sp->ds_size;
186 		slicerel_secno = secno;
187 	} else {
188 		labelsect = lp->d_partitions[LABEL_PART].p_offset;
189 		if (labelsect != 0)
190 			Debugger("labelsect != 0 in dscheck()");
191 		pp = &lp->d_partitions[dkpart(bp->b_dev)];
192 		endsecno = pp->p_size;
193 		slicerel_secno = pp->p_offset + secno;
194 	}
195 
196 	/* overwriting disk label ? */
197 	/* XXX should also protect bootstrap in first 8K */
198 	if (slicerel_secno <= LABELSECTOR + labelsect &&
199 #if LABELSECTOR != 0
200 	    slicerel_secno + nsec > LABELSECTOR + labelsect &&
201 #endif
202 	    (bp->b_flags & B_READ) == 0 && sp->ds_wlabel == 0) {
203 		bp->b_error = EROFS;
204 		goto bad;
205 	}
206 
207 #if defined(DOSBBSECTOR) && defined(notyet)
208 	/* overwriting master boot record? */
209 	if (slicerel_secno <= DOSBBSECTOR && (bp->b_flags & B_READ) == 0 &&
210 	    sp->ds_wlabel == 0) {
211 		bp->b_error = EROFS;
212 		goto bad;
213 	}
214 #endif
215 
216 	/* beyond partition? */
217 	if (secno + nsec > endsecno) {
218 		/* if exactly at end of disk, return an EOF */
219 		if (secno == endsecno) {
220 			bp->b_resid = bp->b_bcount;
221 			return (0);
222 		}
223 		/* or truncate if part of it fits */
224 		nsec = endsecno - secno;
225 		if (nsec <= 0) {
226 			bp->b_error = EINVAL;
227 			goto bad;
228 		}
229 		bp->b_bcount = nsec * ssp->dss_secsize;
230 	}
231 
232 	bp->b_pblkno = sp->ds_offset + slicerel_secno;
233 
234 	/*
235 	 * Snoop on label accesses if the slice offset is nonzero.  Fudge
236 	 * offsets in the label to keep the in-core label coherent with
237 	 * the on-disk one.
238 	 */
239 	if (slicerel_secno <= LABELSECTOR + labelsect
240 #if LABELSECTOR != 0
241 	    && slicerel_secno + nsec > LABELSECTOR + labelsect
242 #endif
243 	    && sp->ds_offset != 0) {
244 		struct iodone_chain *ic;
245 
246 		ic = malloc(sizeof *ic , M_DEVBUF, M_WAITOK);
247 		ic->ic_prev_flags = bp->b_flags;
248 		ic->ic_prev_iodone = bp->b_iodone;
249 		ic->ic_prev_iodone_chain = bp->b_iodone_chain;
250 		ic->ic_args[0].ia_long = (LABELSECTOR + labelsect -
251 		    slicerel_secno) * ssp->dss_secsize;
252 		ic->ic_args[1].ia_ptr = sp;
253 		bp->b_iodone = dsiodone;
254 		bp->b_iodone_chain = ic;
255 		if (!(bp->b_flags & B_READ)) {
256 			/*
257 			 * XXX even disklabel(8) writes directly so we need
258 			 * to adjust writes.  Perhaps we should drop support
259 			 * for DIOCWLABEL (always write protect labels) and
260 			 * require the use of DIOCWDINFO.
261 			 *
262 			 * XXX probably need to copy the data to avoid even
263 			 * temporarily corrupting the in-core copy.
264 			 */
265 			if (bp->b_vp != NULL) {
266 				crit_enter();
267 				bp->b_vp->v_numoutput++;
268 				crit_exit();
269 			}
270 			/* XXX need name here. */
271 			msg = fixlabel((char *)NULL, sp,
272 				       (struct disklabel *)
273 				       (bp->b_data + ic->ic_args[0].ia_long),
274 				       TRUE);
275 			if (msg != NULL) {
276 				printf("dscheck(%s): %s\n",
277 				    devtoname(bp->b_dev), msg);
278 				bp->b_error = EROFS;
279 				goto bad;
280 			}
281 		}
282 	}
283 	return (1);
284 
285 bad_bcount:
286 	printf(
287 	"dscheck(%s): b_bcount %ld is not on a sector boundary (ssize %d)\n",
288 	    devtoname(bp->b_dev), bp->b_bcount, ssp->dss_secsize);
289 	bp->b_error = EINVAL;
290 	goto bad;
291 
292 bad_blkno:
293 	printf(
294 	"dscheck(%s): b_blkno %ld is not on a sector boundary (ssize %d)\n",
295 	    devtoname(bp->b_dev), (long)blkno, ssp->dss_secsize);
296 	bp->b_error = EINVAL;
297 	goto bad;
298 
299 bad:
300 	bp->b_resid = bp->b_bcount;
301 	bp->b_flags |= B_ERROR;
302 	return (-1);
303 }
304 
305 void
306 dsclose(dev_t dev, int mode, struct diskslices *ssp)
307 {
308 	u_char mask;
309 	struct diskslice *sp;
310 
311 	sp = &ssp->dss_slices[dkslice(dev)];
312 	mask = 1 << dkpart(dev);
313 	sp->ds_openmask &= ~mask;
314 }
315 
316 void
317 dsgone(struct diskslices **sspp)
318 {
319 	int slice;
320 	struct diskslice *sp;
321 	struct diskslices *ssp;
322 
323 	for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
324 		sp = &ssp->dss_slices[slice];
325 		free_ds_label(ssp, slice);
326 	}
327 	free(ssp, M_DEVBUF);
328 	*sspp = NULL;
329 }
330 
331 /*
332  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
333  * is subject to the same restriction as dsopen().
334  */
335 int
336 dsioctl(dev_t dev, u_long cmd, caddr_t data,
337 	int flags, struct diskslices **sspp)
338 {
339 	int error;
340 	struct disklabel *lp;
341 	int old_wlabel;
342 	u_char openmask;
343 	int part;
344 	int slice;
345 	struct diskslice *sp;
346 	struct diskslices *ssp;
347 	struct partition *pp;
348 
349 	slice = dkslice(dev);
350 	ssp = *sspp;
351 	sp = &ssp->dss_slices[slice];
352 	lp = sp->ds_label;
353 	switch (cmd) {
354 
355 	case DIOCGDVIRGIN:
356 		lp = (struct disklabel *)data;
357 		if (ssp->dss_slices[WHOLE_DISK_SLICE].ds_label) {
358 			*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
359 		} else {
360 			bzero(lp, sizeof(struct disklabel));
361 		}
362 
363 		lp->d_magic = DISKMAGIC;
364 		lp->d_magic2 = DISKMAGIC;
365 		pp = &lp->d_partitions[RAW_PART];
366 		pp->p_offset = 0;
367 		pp->p_size = sp->ds_size;
368 
369 		lp->d_npartitions = MAXPARTITIONS;
370 		if (lp->d_interleave == 0)
371 			lp->d_interleave = 1;
372 		if (lp->d_rpm == 0)
373 			lp->d_rpm = 3600;
374 		if (lp->d_nsectors == 0)
375 			lp->d_nsectors = 32;
376 		if (lp->d_ntracks == 0)
377 			lp->d_ntracks = 64;
378 
379 		lp->d_bbsize = BBSIZE;
380 		lp->d_sbsize = SBSIZE;
381 		lp->d_secpercyl = lp->d_nsectors * lp->d_ntracks;
382 		lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
383 		lp->d_secperunit = sp->ds_size;
384 		lp->d_checksum = 0;
385 		lp->d_checksum = dkcksum(lp);
386 		return (0);
387 
388 	case DIOCGDINFO:
389 		if (lp == NULL)
390 			return (EINVAL);
391 		*(struct disklabel *)data = *lp;
392 		return (0);
393 
394 #ifdef notyet
395 	case DIOCGDINFOP:
396 		if (lp == NULL)
397 			return (EINVAL);
398 		*(struct disklabel **)data = lp;
399 		return (0);
400 #endif
401 
402 	case DIOCGPART:
403 		if (lp == NULL)
404 			return (EINVAL);
405 		((struct partinfo *)data)->disklab = lp;
406 		((struct partinfo *)data)->part
407 			= &lp->d_partitions[dkpart(dev)];
408 		return (0);
409 
410 	case DIOCGSLICEINFO:
411 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
412 				 (char *)ssp);
413 		return (0);
414 
415 	case DIOCSDINFO:
416 		if (slice == WHOLE_DISK_SLICE)
417 			return (ENODEV);
418 		if (!(flags & FWRITE))
419 			return (EBADF);
420 		lp = malloc(sizeof *lp, M_DEVBUF, M_WAITOK);
421 		if (sp->ds_label == NULL)
422 			bzero(lp, sizeof *lp);
423 		else
424 			bcopy(sp->ds_label, lp, sizeof *lp);
425 		if (sp->ds_label == NULL)
426 			openmask = 0;
427 		else {
428 			openmask = sp->ds_openmask;
429 			if (slice == COMPATIBILITY_SLICE)
430 				openmask |= ssp->dss_slices[
431 				    ssp->dss_first_bsd_slice].ds_openmask;
432 			else if (slice == ssp->dss_first_bsd_slice)
433 				openmask |= ssp->dss_slices[
434 				    COMPATIBILITY_SLICE].ds_openmask;
435 		}
436 		error = setdisklabel(lp, (struct disklabel *)data,
437 				     (u_long)openmask);
438 		/* XXX why doesn't setdisklabel() check this? */
439 		if (error == 0 && lp->d_partitions[RAW_PART].p_offset != 0)
440 			error = EXDEV;
441 		if (error == 0) {
442 			if (lp->d_secperunit > sp->ds_size)
443 				error = ENOSPC;
444 			for (part = 0; part < lp->d_npartitions; part++)
445 				if (lp->d_partitions[part].p_size > sp->ds_size)
446 					error = ENOSPC;
447 		}
448 		if (error != 0) {
449 			free(lp, M_DEVBUF);
450 			return (error);
451 		}
452 		free_ds_label(ssp, slice);
453 		set_ds_label(ssp, slice, lp);
454 		return (0);
455 
456 	case DIOCSYNCSLICEINFO:
457 		if (slice != WHOLE_DISK_SLICE || dkpart(dev) != RAW_PART)
458 			return (EINVAL);
459 		if (!*(int *)data)
460 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
461 				openmask = ssp->dss_slices[slice].ds_openmask;
462 				if (openmask
463 				    && (slice != WHOLE_DISK_SLICE
464 					|| openmask & ~(1 << RAW_PART)))
465 					return (EBUSY);
466 			}
467 
468 		/*
469 		 * Temporarily forget the current slices struct and read
470 		 * the current one.
471 		 * XXX should wait for current accesses on this disk to
472 		 * complete, then lock out future accesses and opens.
473 		 */
474 		*sspp = NULL;
475 		lp = malloc(sizeof *lp, M_DEVBUF, M_WAITOK);
476 		*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
477 		error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, lp);
478 		if (error != 0) {
479 			free(lp, M_DEVBUF);
480 			*sspp = ssp;
481 			return (error);
482 		}
483 
484 		/*
485 		 * Reopen everything.  This is a no-op except in the "force"
486 		 * case and when the raw bdev and cdev are both open.  Abort
487 		 * if anything fails.
488 		 */
489 		for (slice = 0; slice < ssp->dss_nslices; slice++) {
490 			for (openmask = ssp->dss_slices[slice].ds_openmask,
491 			     part = 0; openmask; openmask >>= 1, part++) {
492 				if (!(openmask & 1))
493 					continue;
494 				error = dsopen(dkmodslice(dkmodpart(dev, part),
495 							  slice),
496 					       S_IFCHR, ssp->dss_oflags, sspp,
497 					       lp);
498 				if (error != 0) {
499 					free(lp, M_DEVBUF);
500 					*sspp = ssp;
501 					return (EBUSY);
502 				}
503 			}
504 		}
505 
506 		free(lp, M_DEVBUF);
507 		dsgone(&ssp);
508 		return (0);
509 
510 	case DIOCWDINFO:
511 		error = dsioctl(dev, DIOCSDINFO, data, flags, &ssp);
512 		if (error != 0)
513 			return (error);
514 		/*
515 		 * XXX this used to hack on dk_openpart to fake opening
516 		 * partition 0 in case that is used instead of dkpart(dev).
517 		 */
518 		old_wlabel = sp->ds_wlabel;
519 		set_ds_wlabel(ssp, slice, TRUE);
520 		error = writedisklabel(dev, sp->ds_label);
521 		/* XXX should invalidate in-core label if write failed. */
522 		set_ds_wlabel(ssp, slice, old_wlabel);
523 		return (error);
524 
525 	case DIOCWLABEL:
526 		if (slice == WHOLE_DISK_SLICE)
527 			return (ENODEV);
528 		if (!(flags & FWRITE))
529 			return (EBADF);
530 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
531 		return (0);
532 
533 	default:
534 		return (ENOIOCTL);
535 	}
536 }
537 
538 static void
539 dsiodone(struct buf *bp)
540 {
541 	struct iodone_chain *ic;
542 	char *msg;
543 
544 	ic = bp->b_iodone_chain;
545 	bp->b_flags = bp->b_flags & ~B_DONE;
546 	bp->b_iodone = ic->ic_prev_iodone;
547 	bp->b_iodone_chain = ic->ic_prev_iodone_chain;
548 	if (!(bp->b_flags & B_READ)
549 	    || (!(bp->b_flags & B_ERROR) && bp->b_error == 0)) {
550 		msg = fixlabel((char *)NULL, ic->ic_args[1].ia_ptr,
551 			       (struct disklabel *)
552 			       (bp->b_data + ic->ic_args[0].ia_long),
553 			       FALSE);
554 		if (msg != NULL)
555 			printf("%s\n", msg);
556 	}
557 	free(ic, M_DEVBUF);
558 	biodone(bp);
559 }
560 
561 int
562 dsisopen(struct diskslices *ssp)
563 {
564 	int slice;
565 
566 	if (ssp == NULL)
567 		return (0);
568 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
569 		if (ssp->dss_slices[slice].ds_openmask)
570 			return (1);
571 	}
572 	return (0);
573 }
574 
575 /*
576  * Allocate a slices "struct" and initialize it to contain only an empty
577  * compatibility slice (pointing to itself), a whole disk slice (covering
578  * the disk as described by the label), and (nslices - BASE_SLICES) empty
579  * slices beginning at BASE_SLICE.
580  */
581 struct diskslices *
582 dsmakeslicestruct(int nslices, struct disklabel *lp)
583 {
584 	struct diskslice *sp;
585 	struct diskslices *ssp;
586 
587 	ssp = malloc(offsetof(struct diskslices, dss_slices) +
588 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
589 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
590 	ssp->dss_nslices = nslices;
591 	ssp->dss_oflags = 0;
592 	ssp->dss_secmult = lp->d_secsize / DEV_BSIZE;
593 	if (ssp->dss_secmult & (ssp->dss_secmult - 1))
594 		ssp->dss_secshift = -1;
595 	else
596 		ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
597 	ssp->dss_secsize = lp->d_secsize;
598 	sp = &ssp->dss_slices[0];
599 	bzero(sp, nslices * sizeof *sp);
600 	sp[WHOLE_DISK_SLICE].ds_size = lp->d_secperunit;
601 	return (ssp);
602 }
603 
604 char *
605 dsname(dev_t dev, int unit, int slice, int part, char *partname)
606 {
607 	static char name[32];
608 	const char *dname;
609 
610 	dname = dev_dname(dev);
611 	if (strlen(dname) > 16)
612 		dname = "nametoolong";
613 	snprintf(name, sizeof(name), "%s%d", dname, unit);
614 	partname[0] = '\0';
615 	if (slice != WHOLE_DISK_SLICE || part != RAW_PART) {
616 		partname[0] = 'a' + part;
617 		partname[1] = '\0';
618 		if (slice != COMPATIBILITY_SLICE) {
619 			snprintf(name + strlen(name),
620 			    sizeof(name) - strlen(name), "s%d", slice - 1);
621 		}
622 	}
623 	return (name);
624 }
625 
626 /*
627  * This should only be called when the unit is inactive and the strategy
628  * routine should not allow it to become active unless we call it.  Our
629  * strategy routine must be special to allow activity.
630  */
631 int
632 dsopen(dev_t dev, int mode, u_int flags,
633 	struct diskslices **sspp, struct disklabel *lp)
634 {
635 	dev_t dev1;
636 	int error;
637 	struct disklabel *lp1;
638 	char *msg;
639 	u_char mask;
640 	bool_t need_init;
641 	int part;
642 	char partname[2];
643 	int slice;
644 	char *sname;
645 	struct diskslice *sp;
646 	struct diskslices *ssp;
647 	int unit;
648 
649 	dev->si_bsize_phys = lp->d_secsize;
650 
651 	unit = dkunit(dev);
652 	if (lp->d_secsize % DEV_BSIZE) {
653 		printf("%s: invalid sector size %lu\n", devtoname(dev),
654 		    (u_long)lp->d_secsize);
655 		return (EINVAL);
656 	}
657 
658 	/*
659 	 * XXX reinitialize the slice table unless there is an open device
660 	 * on the unit.  This should only be done if the media has changed.
661 	 */
662 	ssp = *sspp;
663 	need_init = !dsisopen(ssp);
664 	if (ssp != NULL && need_init)
665 		dsgone(sspp);
666 	if (need_init) {
667 		/*
668 		 * Allocate a minimal slices "struct".  This will become
669 		 * the final slices "struct" if we don't want real slices
670 		 * or if we can't find any real slices.
671 		 */
672 		*sspp = dsmakeslicestruct(BASE_SLICE, lp);
673 
674 		if (!(flags & DSO_ONESLICE)) {
675 			TRACE(("dsinit\n"));
676 			error = dsinit(dev, lp, sspp);
677 			if (error != 0) {
678 				dsgone(sspp);
679 				return (error);
680 			}
681 		}
682 		ssp = *sspp;
683 		ssp->dss_oflags = flags;
684 
685 		/*
686 		 * If there are no real slices, then make the compatiblity
687 		 * slice cover the whole disk.
688 		 */
689 		if (ssp->dss_nslices == BASE_SLICE)
690 			ssp->dss_slices[COMPATIBILITY_SLICE].ds_size
691 				= lp->d_secperunit;
692 
693 		/* Point the compatibility slice at the BSD slice, if any. */
694 		for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
695 			sp = &ssp->dss_slices[slice];
696 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
697 				ssp->dss_first_bsd_slice = slice;
698 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_offset
699 					= sp->ds_offset;
700 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_size
701 					= sp->ds_size;
702 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_type
703 					= sp->ds_type;
704 				break;
705 			}
706 		}
707 
708 		ssp->dss_slices[WHOLE_DISK_SLICE].ds_label = clone_label(lp);
709 		ssp->dss_slices[WHOLE_DISK_SLICE].ds_wlabel = TRUE;
710 	}
711 
712 	/*
713 	 * Initialize secondary info for all slices.  It is needed for more
714 	 * than the current slice in the DEVFS case.  XXX DEVFS is no more.
715 	 */
716 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
717 		sp = &ssp->dss_slices[slice];
718 		if (sp->ds_label != NULL)
719 			continue;
720 		dev1 = dkmodslice(dkmodpart(dev, RAW_PART), slice);
721 		sname = dsname(dev, unit, slice, RAW_PART, partname);
722 		/*
723 		 * XXX this should probably only be done for the need_init
724 		 * case, but there may be a problem with DIOCSYNCSLICEINFO.
725 		 */
726 		set_ds_wlabel(ssp, slice, TRUE);	/* XXX invert */
727 		lp1 = clone_label(lp);
728 		TRACE(("readdisklabel\n"));
729 		if (flags & DSO_NOLABELS)
730 			msg = NULL;
731 		else {
732 			msg = readdisklabel(dev1, lp1);
733 
734 			/*
735 			 * readdisklabel() returns NULL for success, and an
736 			 * error string for failure.
737 			 *
738 			 * If there isn't a label on the disk, and if the
739 			 * DSO_COMPATLABEL is set, we want to use the
740 			 * faked-up label provided by the caller.
741 			 *
742 			 * So we set msg to NULL to indicate that there is
743 			 * no failure (since we have a faked-up label),
744 			 * free lp1, and then clone it again from lp.
745 			 * (In case readdisklabel() modified lp1.)
746 			 */
747 			if (msg != NULL && (flags & DSO_COMPATLABEL)) {
748 				msg = NULL;
749 				free(lp1, M_DEVBUF);
750 				lp1 = clone_label(lp);
751 			}
752 		}
753 		if (msg == NULL)
754 			msg = fixlabel(sname, sp, lp1, FALSE);
755 		if (msg == NULL && lp1->d_secsize != ssp->dss_secsize)
756 			msg = "inconsistent sector size";
757 		if (msg != NULL) {
758 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
759 				log(LOG_WARNING, "%s: cannot find label (%s)\n",
760 				    sname, msg);
761 			free(lp1, M_DEVBUF);
762 			continue;
763 		}
764 		if (lp1->d_flags & D_BADSECT) {
765 			log(LOG_ERR, "%s: bad sector table not supported\n",
766 			    sname);
767 			free(lp1, M_DEVBUF);
768 			continue;
769 		}
770 		set_ds_label(ssp, slice, lp1);
771 		set_ds_wlabel(ssp, slice, FALSE);
772 	}
773 
774 	slice = dkslice(dev);
775 	if (slice >= ssp->dss_nslices)
776 		return (ENXIO);
777 	sp = &ssp->dss_slices[slice];
778 	part = dkpart(dev);
779 	if (part != RAW_PART
780 	    && (sp->ds_label == NULL || part >= sp->ds_label->d_npartitions))
781 		return (EINVAL);	/* XXX needs translation */
782 	mask = 1 << part;
783 	sp->ds_openmask |= mask;
784 	return (0);
785 }
786 
787 int
788 dssize(dev_t dev, struct diskslices **sspp)
789 {
790 	struct disklabel *lp;
791 	int part;
792 	int slice;
793 	struct diskslices *ssp;
794 
795 	slice = dkslice(dev);
796 	part = dkpart(dev);
797 	ssp = *sspp;
798 	if (ssp == NULL || slice >= ssp->dss_nslices
799 	    || !(ssp->dss_slices[slice].ds_openmask & (1 << part))) {
800 		if (dev_dopen(dev, FREAD, S_IFCHR, NULL) != 0)
801 			return (-1);
802 		dev_dclose(dev, FREAD, S_IFCHR, NULL);
803 		ssp = *sspp;
804 	}
805 	lp = ssp->dss_slices[slice].ds_label;
806 	if (lp == NULL)
807 		return (-1);
808 	return ((int)lp->d_partitions[part].p_size);
809 }
810 
811 static void
812 free_ds_label(struct diskslices *ssp, int slice)
813 {
814 	struct disklabel *lp;
815 	struct diskslice *sp;
816 
817 	sp = &ssp->dss_slices[slice];
818 	lp = sp->ds_label;
819 	if (lp == NULL)
820 		return;
821 	free(lp, M_DEVBUF);
822 	set_ds_label(ssp, slice, (struct disklabel *)NULL);
823 }
824 
825 static char *
826 fixlabel(char *sname, struct diskslice *sp, struct disklabel *lp, int writeflag)
827 {
828 	u_long end;
829 	u_long offset;
830 	int part;
831 	struct partition *pp;
832 	u_long start;
833 	bool_t warned;
834 
835 	/* These errors "can't happen" so don't bother reporting details. */
836 	if (lp->d_magic != DISKMAGIC || lp->d_magic2 != DISKMAGIC)
837 		return ("fixlabel: invalid magic");
838 	if (dkcksum(lp) != 0)
839 		return ("fixlabel: invalid checksum");
840 
841 	pp = &lp->d_partitions[RAW_PART];
842 	if (writeflag) {
843 		start = 0;
844 		offset = sp->ds_offset;
845 	} else {
846 		start = sp->ds_offset;
847 		offset = -sp->ds_offset;
848 	}
849 	if (pp->p_offset != start) {
850 		if (sname != NULL) {
851 			printf(
852 "%s: rejecting BSD label: raw partition offset != slice offset\n",
853 			       sname);
854 			slice_info(sname, sp);
855 			partition_info(sname, RAW_PART, pp);
856 		}
857 		return ("fixlabel: raw partition offset != slice offset");
858 	}
859 	if (pp->p_size != sp->ds_size) {
860 		if (sname != NULL) {
861 			printf("%s: raw partition size != slice size\n", sname);
862 			slice_info(sname, sp);
863 			partition_info(sname, RAW_PART, pp);
864 		}
865 		if (pp->p_size > sp->ds_size) {
866 			if (sname == NULL)
867 				return ("fixlabel: raw partition size > slice size");
868 			printf("%s: truncating raw partition\n", sname);
869 			pp->p_size = sp->ds_size;
870 		}
871 	}
872 	end = start + sp->ds_size;
873 	if (start > end)
874 		return ("fixlabel: slice wraps");
875 	if (lp->d_secpercyl <= 0)
876 		return ("fixlabel: d_secpercyl <= 0");
877 	pp -= RAW_PART;
878 	warned = FALSE;
879 	for (part = 0; part < lp->d_npartitions; part++, pp++) {
880 		if (pp->p_offset != 0 || pp->p_size != 0) {
881 			if (pp->p_offset < start
882 			    || pp->p_offset + pp->p_size > end
883 			    || pp->p_offset + pp->p_size < pp->p_offset) {
884 				if (sname != NULL) {
885 					printf(
886 "%s: rejecting partition in BSD label: it isn't entirely within the slice\n",
887 					       sname);
888 					if (!warned) {
889 						slice_info(sname, sp);
890 						warned = TRUE;
891 					}
892 					partition_info(sname, part, pp);
893 				}
894 				/* XXX else silently discard junk. */
895 				bzero(pp, sizeof *pp);
896 			} else
897 				pp->p_offset += offset;
898 		}
899 	}
900 	lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
901 	lp->d_secperunit = sp->ds_size;
902  	lp->d_checksum = 0;
903  	lp->d_checksum = dkcksum(lp);
904 	return (NULL);
905 }
906 
907 static void
908 partition_info(char *sname, int part, struct partition *pp)
909 {
910 	printf("%s%c: start %lu, end %lu, size %lu\n", sname, 'a' + part,
911 	       (u_long)pp->p_offset, (u_long)(pp->p_offset + pp->p_size - 1),
912 	       (u_long)pp->p_size);
913 }
914 
915 static void
916 slice_info(char *sname, struct diskslice *sp)
917 {
918 	printf("%s: start %lu, end %lu, size %lu\n", sname,
919 	       sp->ds_offset, sp->ds_offset + sp->ds_size - 1, sp->ds_size);
920 }
921 
922 static void
923 set_ds_label(struct diskslices *ssp, int slice, struct disklabel *lp)
924 {
925 	ssp->dss_slices[slice].ds_label = lp;
926 	if (slice == COMPATIBILITY_SLICE)
927 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_label = lp;
928 	else if (slice == ssp->dss_first_bsd_slice)
929 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_label = lp;
930 }
931 
932 static void
933 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
934 {
935 	ssp->dss_slices[slice].ds_wlabel = wlabel;
936 	if (slice == COMPATIBILITY_SLICE)
937 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_wlabel = wlabel;
938 	else if (slice == ssp->dss_first_bsd_slice)
939 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_wlabel = wlabel;
940 }
941