xref: /dragonfly/sys/kern/subr_diskslice.c (revision 23265324)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.26 2006/12/23 00:35:04 swildner Exp $
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/conf.h>
54 #include <sys/disklabel.h>
55 #include <sys/diskslice.h>
56 #include <sys/diskmbr.h>
57 #include <sys/fcntl.h>
58 #include <sys/malloc.h>
59 #include <sys/stat.h>
60 #include <sys/syslog.h>
61 #include <sys/proc.h>
62 #include <sys/vnode.h>
63 #include <sys/device.h>
64 #include <sys/thread2.h>
65 
66 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
67 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
68 
69 #define TRACE(str)	do { if (ds_debug) kprintf str; } while (0)
70 
71 typedef	u_char	bool_t;
72 
73 static volatile bool_t ds_debug;
74 
75 static struct disklabel *clone_label (struct disklabel *lp);
76 static void dsiodone (struct bio *bio);
77 static char *fixlabel (char *sname, struct diskslice *sp,
78 			   struct disklabel *lp, int writeflag);
79 static void free_ds_label (struct diskslices *ssp, int slice);
80 static void partition_info (char *sname, int part, struct partition *pp);
81 static void slice_info (char *sname, struct diskslice *sp);
82 static void set_ds_label (struct diskslices *ssp, int slice,
83 			      struct disklabel *lp);
84 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
85 
86 /*
87  * Duplicate a label for the whole disk, and initialize defaults in the
88  * copy for fields that are not already initialized.  The caller only
89  * needs to initialize d_secsize and d_secperunit, and zero the fields
90  * that are to be defaulted.
91  */
92 static struct disklabel *
93 clone_label(struct disklabel *lp)
94 {
95 	struct disklabel *lp1;
96 
97 	lp1 = kmalloc(sizeof *lp1, M_DEVBUF, M_WAITOK);
98 	*lp1 = *lp;
99 	lp = NULL;
100 	if (lp1->d_typename[0] == '\0')
101 		strncpy(lp1->d_typename, "amnesiac", sizeof(lp1->d_typename));
102 	if (lp1->d_packname[0] == '\0')
103 		strncpy(lp1->d_packname, "fictitious", sizeof(lp1->d_packname));
104 	if (lp1->d_nsectors == 0)
105 		lp1->d_nsectors = 32;
106 	if (lp1->d_ntracks == 0)
107 		lp1->d_ntracks = 64;
108 	lp1->d_secpercyl = lp1->d_nsectors * lp1->d_ntracks;
109 	lp1->d_ncylinders = lp1->d_secperunit / lp1->d_secpercyl;
110 	if (lp1->d_rpm == 0)
111 		lp1->d_rpm = 3600;
112 	if (lp1->d_interleave == 0)
113 		lp1->d_interleave = 1;
114 	if (lp1->d_npartitions < RAW_PART + 1)
115 		lp1->d_npartitions = MAXPARTITIONS;
116 	if (lp1->d_bbsize == 0)
117 		lp1->d_bbsize = BBSIZE;
118 	if (lp1->d_sbsize == 0)
119 		lp1->d_sbsize = SBSIZE;
120 	lp1->d_partitions[RAW_PART].p_size = lp1->d_secperunit;
121 	lp1->d_magic = DISKMAGIC;
122 	lp1->d_magic2 = DISKMAGIC;
123 	lp1->d_checksum = dkcksum(lp1);
124 	return (lp1);
125 }
126 
127 /*
128  * Determine the size of the transfer, and make sure it is
129  * within the boundaries of the partition. Adjust transfer
130  * if needed, and signal errors or early completion.
131  *
132  * XXX TODO:
133  *	o Split buffers that are too big for the device.
134  *	o Check for overflow.
135  *	o Finish cleaning this up.
136  *
137  * This function returns 1 on success, 0 if transfer equates
138  * to EOF (end of disk) or -1 on failure.  The appropriate
139  * 'errno' value is also set in bp->b_error and bp->b_flags
140  * is marked with B_ERROR.
141  */
142 struct bio *
143 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
144 {
145 	struct buf *bp = bio->bio_buf;
146 	struct bio *nbio;
147 	u_long	endsecno;
148 	daddr_t	labelsect;
149 	struct disklabel *lp;
150 	char *msg;
151 	long nsec;
152 	struct partition *pp;
153 	daddr_t	secno;
154 	daddr_t	slicerel_secno;
155 	struct diskslice *sp;
156 	int shift;
157 	int mask;
158 
159 	if (bio->bio_offset < 0) {
160 		kprintf("dscheck(%s): negative bio_offset %lld\n",
161 		    devtoname(dev), bio->bio_offset);
162 		goto bad;
163 	}
164 	sp = &ssp->dss_slices[dkslice(dev)];
165 	lp = sp->ds_label;
166 
167 	if (ssp->dss_secmult == 1) {
168 		shift = DEV_BSHIFT;
169 		goto doshift;
170 	} else if (ssp->dss_secshift != -1) {
171 		shift = DEV_BSHIFT + ssp->dss_secshift;
172 doshift:
173 		mask = (1 << shift) - 1;
174 		if ((int)bp->b_bcount & mask)
175 			goto bad_bcount;
176 		if ((int)bio->bio_offset & mask)
177 			goto bad_blkno;
178 		secno = (daddr_t)(bio->bio_offset >> shift);
179 		nsec = bp->b_bcount >> shift;
180 	} else {
181 		if (bp->b_bcount % ssp->dss_secsize)
182 			goto bad_bcount;
183 		if (bio->bio_offset % ssp->dss_secsize)
184 			goto bad_blkno;
185 		secno = (daddr_t)(bio->bio_offset / ssp->dss_secsize);
186 		nsec = bp->b_bcount / ssp->dss_secsize;
187 	}
188 	if (lp == NULL) {
189 		labelsect = -LABELSECTOR - 1;
190 		endsecno = sp->ds_size;
191 		slicerel_secno = secno;
192 	} else {
193 		labelsect = lp->d_partitions[LABEL_PART].p_offset;
194 		if (labelsect != 0)
195 			Debugger("labelsect != 0 in dscheck()");
196 		pp = &lp->d_partitions[dkpart(dev)];
197 		endsecno = pp->p_size;
198 		slicerel_secno = pp->p_offset + secno;
199 	}
200 
201 	/* overwriting disk label ? */
202 	/* XXX should also protect bootstrap in first 8K */
203 	if (slicerel_secno <= LABELSECTOR + labelsect &&
204 #if LABELSECTOR != 0
205 	    slicerel_secno + nsec > LABELSECTOR + labelsect &&
206 #endif
207 	    bp->b_cmd != BUF_CMD_READ && sp->ds_wlabel == 0) {
208 		bp->b_error = EROFS;
209 		goto error;
210 	}
211 
212 #if defined(DOSBBSECTOR) && defined(notyet)
213 	/* overwriting master boot record? */
214 	if (slicerel_secno <= DOSBBSECTOR && bp->b_cmd != BUF_CMD_READ &&
215 	    sp->ds_wlabel == 0) {
216 		bp->b_error = EROFS;
217 		goto error;
218 	}
219 #endif
220 
221 	/*
222 	 * EOF handling
223 	 */
224 	if (secno + nsec > endsecno) {
225 		/*
226 		 * Return an error if beyond the end of the disk, or
227 		 * if B_BNOCLIP is set.  Tell the system that we do not
228 		 * need to keep the buffer around.
229 		 */
230 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
231 			goto bad;
232 
233 		/*
234 		 * If exactly at end of disk, return an EOF.  Throw away
235 		 * the buffer contents, if any, by setting B_INVAL.
236 		 */
237 		if (secno == endsecno) {
238 			bp->b_resid = bp->b_bcount;
239 			bp->b_flags |= B_INVAL;
240 			goto done;
241 		}
242 
243 		/*
244 		 * Else truncate
245 		 */
246 		nsec = endsecno - secno;
247 		bp->b_bcount = nsec * ssp->dss_secsize;
248 	}
249 
250 	nbio = push_bio(bio);
251 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
252 			   ssp->dss_secsize;
253 
254 	/*
255 	 * Snoop on label accesses if the slice offset is nonzero.  Fudge
256 	 * offsets in the label to keep the in-core label coherent with
257 	 * the on-disk one.
258 	 */
259 	if (slicerel_secno <= LABELSECTOR + labelsect
260 #if LABELSECTOR != 0
261 	    && slicerel_secno + nsec > LABELSECTOR + labelsect
262 #endif
263 	    && sp->ds_offset != 0) {
264 		nbio->bio_done = dsiodone;
265 		nbio->bio_caller_info1.ptr = sp;
266 		nbio->bio_caller_info2.offset = (off_t)(LABELSECTOR + labelsect -
267 					 slicerel_secno) * ssp->dss_secsize;
268 		if (bp->b_cmd != BUF_CMD_READ) {
269 			/*
270 			 * XXX even disklabel(8) writes directly so we need
271 			 * to adjust writes.  Perhaps we should drop support
272 			 * for DIOCWLABEL (always write protect labels) and
273 			 * require the use of DIOCWDINFO.
274 			 *
275 			 * XXX probably need to copy the data to avoid even
276 			 * temporarily corrupting the in-core copy.
277 			 */
278 			/* XXX need name here. */
279 			msg = fixlabel(
280 				NULL, sp,
281 			       (struct disklabel *)
282 			       (bp->b_data + (int)nbio->bio_caller_info2.offset),
283 			       TRUE);
284 			if (msg != NULL) {
285 				kprintf("dscheck(%s): %s\n",
286 				    devtoname(dev), msg);
287 				bp->b_error = EROFS;
288 				pop_bio(nbio);
289 				goto error;
290 			}
291 		}
292 	}
293 	return (nbio);
294 
295 bad_bcount:
296 	kprintf(
297 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
298 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
299 	goto bad;
300 
301 bad_blkno:
302 	kprintf(
303 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
304 	    devtoname(dev), bio->bio_offset, ssp->dss_secsize);
305 bad:
306 	bp->b_error = EINVAL;
307 	/* fall through */
308 error:
309 	/*
310 	 * Terminate the I/O with a ranging error.  Since the buffer is
311 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
312 	 */
313 	bp->b_resid = bp->b_bcount;
314 	bp->b_flags |= B_ERROR | B_INVAL;
315 done:
316 	/*
317 	 * Caller must biodone() the originally passed bio if NULL is
318 	 * returned.
319 	 */
320 	return (NULL);
321 }
322 
323 void
324 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
325 {
326 	u_char mask;
327 	struct diskslice *sp;
328 
329 	sp = &ssp->dss_slices[dkslice(dev)];
330 	mask = 1 << dkpart(dev);
331 	sp->ds_openmask &= ~mask;
332 }
333 
334 void
335 dsgone(struct diskslices **sspp)
336 {
337 	int slice;
338 	struct diskslice *sp;
339 	struct diskslices *ssp;
340 
341 	for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
342 		sp = &ssp->dss_slices[slice];
343 		free_ds_label(ssp, slice);
344 	}
345 	kfree(ssp, M_DEVBUF);
346 	*sspp = NULL;
347 }
348 
349 /*
350  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
351  * is subject to the same restriction as dsopen().
352  */
353 int
354 dsioctl(cdev_t dev, u_long cmd, caddr_t data,
355 	int flags, struct diskslices **sspp)
356 {
357 	int error;
358 	struct disklabel *lp;
359 	int old_wlabel;
360 	u_char openmask;
361 	int part;
362 	int slice;
363 	struct diskslice *sp;
364 	struct diskslices *ssp;
365 	struct partition *pp;
366 
367 	slice = dkslice(dev);
368 	ssp = *sspp;
369 	sp = &ssp->dss_slices[slice];
370 	lp = sp->ds_label;
371 	switch (cmd) {
372 
373 	case DIOCGDVIRGIN:
374 		lp = (struct disklabel *)data;
375 		if (ssp->dss_slices[WHOLE_DISK_SLICE].ds_label) {
376 			*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
377 		} else {
378 			bzero(lp, sizeof(struct disklabel));
379 		}
380 
381 		lp->d_magic = DISKMAGIC;
382 		lp->d_magic2 = DISKMAGIC;
383 		pp = &lp->d_partitions[RAW_PART];
384 		pp->p_offset = 0;
385 		pp->p_size = sp->ds_size;
386 
387 		lp->d_npartitions = MAXPARTITIONS;
388 		if (lp->d_interleave == 0)
389 			lp->d_interleave = 1;
390 		if (lp->d_rpm == 0)
391 			lp->d_rpm = 3600;
392 		if (lp->d_nsectors == 0)
393 			lp->d_nsectors = 32;
394 		if (lp->d_ntracks == 0)
395 			lp->d_ntracks = 64;
396 
397 		lp->d_bbsize = BBSIZE;
398 		lp->d_sbsize = SBSIZE;
399 		lp->d_secpercyl = lp->d_nsectors * lp->d_ntracks;
400 		lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
401 		lp->d_secperunit = sp->ds_size;
402 		lp->d_checksum = 0;
403 		lp->d_checksum = dkcksum(lp);
404 		return (0);
405 
406 	case DIOCGDINFO:
407 		if (lp == NULL)
408 			return (EINVAL);
409 		*(struct disklabel *)data = *lp;
410 		return (0);
411 
412 #ifdef notyet
413 	case DIOCGDINFOP:
414 		if (lp == NULL)
415 			return (EINVAL);
416 		*(struct disklabel **)data = lp;
417 		return (0);
418 #endif
419 
420 	case DIOCGPART:
421 		if (lp == NULL)
422 			return (EINVAL);
423 		((struct partinfo *)data)->disklab = lp;
424 		((struct partinfo *)data)->part
425 			= &lp->d_partitions[dkpart(dev)];
426 		return (0);
427 
428 	case DIOCGSLICEINFO:
429 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
430 				 (char *)ssp);
431 		return (0);
432 
433 	case DIOCSDINFO:
434 		if (slice == WHOLE_DISK_SLICE)
435 			return (ENODEV);
436 		if (!(flags & FWRITE))
437 			return (EBADF);
438 		lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK);
439 		if (sp->ds_label == NULL)
440 			bzero(lp, sizeof *lp);
441 		else
442 			bcopy(sp->ds_label, lp, sizeof *lp);
443 		if (sp->ds_label == NULL)
444 			openmask = 0;
445 		else {
446 			openmask = sp->ds_openmask;
447 			if (slice == COMPATIBILITY_SLICE)
448 				openmask |= ssp->dss_slices[
449 				    ssp->dss_first_bsd_slice].ds_openmask;
450 			else if (slice == ssp->dss_first_bsd_slice)
451 				openmask |= ssp->dss_slices[
452 				    COMPATIBILITY_SLICE].ds_openmask;
453 		}
454 		error = setdisklabel(lp, (struct disklabel *)data,
455 				     (u_long)openmask);
456 		/* XXX why doesn't setdisklabel() check this? */
457 		if (error == 0 && lp->d_partitions[RAW_PART].p_offset != 0)
458 			error = EXDEV;
459 		if (error == 0) {
460 			if (lp->d_secperunit > sp->ds_size)
461 				error = ENOSPC;
462 			for (part = 0; part < lp->d_npartitions; part++)
463 				if (lp->d_partitions[part].p_size > sp->ds_size)
464 					error = ENOSPC;
465 		}
466 		if (error != 0) {
467 			kfree(lp, M_DEVBUF);
468 			return (error);
469 		}
470 		free_ds_label(ssp, slice);
471 		set_ds_label(ssp, slice, lp);
472 		return (0);
473 
474 	case DIOCSYNCSLICEINFO:
475 		if (slice != WHOLE_DISK_SLICE || dkpart(dev) != RAW_PART)
476 			return (EINVAL);
477 		if (!*(int *)data)
478 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
479 				openmask = ssp->dss_slices[slice].ds_openmask;
480 				if (openmask
481 				    && (slice != WHOLE_DISK_SLICE
482 					|| openmask & ~(1 << RAW_PART)))
483 					return (EBUSY);
484 			}
485 
486 		/*
487 		 * Temporarily forget the current slices struct and read
488 		 * the current one.
489 		 * XXX should wait for current accesses on this disk to
490 		 * complete, then lock out future accesses and opens.
491 		 */
492 		*sspp = NULL;
493 		lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK);
494 		*lp = *ssp->dss_slices[WHOLE_DISK_SLICE].ds_label;
495 		error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, lp);
496 		if (error != 0) {
497 			kfree(lp, M_DEVBUF);
498 			*sspp = ssp;
499 			return (error);
500 		}
501 
502 		/*
503 		 * Reopen everything.  This is a no-op except in the "force"
504 		 * case and when the raw bdev and cdev are both open.  Abort
505 		 * if anything fails.
506 		 */
507 		for (slice = 0; slice < ssp->dss_nslices; slice++) {
508 			for (openmask = ssp->dss_slices[slice].ds_openmask,
509 			     part = 0; openmask; openmask >>= 1, part++) {
510 				if (!(openmask & 1))
511 					continue;
512 				error = dsopen(dkmodslice(dkmodpart(dev, part),
513 							  slice),
514 					       S_IFCHR, ssp->dss_oflags, sspp,
515 					       lp);
516 				if (error != 0) {
517 					kfree(lp, M_DEVBUF);
518 					*sspp = ssp;
519 					return (EBUSY);
520 				}
521 			}
522 		}
523 
524 		kfree(lp, M_DEVBUF);
525 		dsgone(&ssp);
526 		return (0);
527 
528 	case DIOCWDINFO:
529 		error = dsioctl(dev, DIOCSDINFO, data, flags, &ssp);
530 		if (error != 0)
531 			return (error);
532 		/*
533 		 * XXX this used to hack on dk_openpart to fake opening
534 		 * partition 0 in case that is used instead of dkpart(dev).
535 		 */
536 		old_wlabel = sp->ds_wlabel;
537 		set_ds_wlabel(ssp, slice, TRUE);
538 		error = writedisklabel(dev, sp->ds_label);
539 		/* XXX should invalidate in-core label if write failed. */
540 		set_ds_wlabel(ssp, slice, old_wlabel);
541 		return (error);
542 
543 	case DIOCWLABEL:
544 		if (slice == WHOLE_DISK_SLICE)
545 			return (ENODEV);
546 		if (!(flags & FWRITE))
547 			return (EBADF);
548 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
549 		return (0);
550 
551 	default:
552 		return (ENOIOCTL);
553 	}
554 }
555 
556 /*
557  * Chain the bio_done.  b_cmd remains valid through such chaining.
558  */
559 static void
560 dsiodone(struct bio *bio)
561 {
562 	struct buf *bp = bio->bio_buf;
563 	char *msg;
564 
565 	if (bp->b_cmd != BUF_CMD_READ
566 	    || (!(bp->b_flags & B_ERROR) && bp->b_error == 0)) {
567 		msg = fixlabel(NULL, bio->bio_caller_info1.ptr,
568 			       (struct disklabel *)
569 			       (bp->b_data + (int)bio->bio_caller_info2.offset),
570 			       FALSE);
571 		if (msg != NULL)
572 			kprintf("%s\n", msg);
573 	}
574 	biodone(bio->bio_prev);
575 }
576 
577 int
578 dsisopen(struct diskslices *ssp)
579 {
580 	int slice;
581 
582 	if (ssp == NULL)
583 		return (0);
584 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
585 		if (ssp->dss_slices[slice].ds_openmask)
586 			return (1);
587 	}
588 	return (0);
589 }
590 
591 /*
592  * Allocate a slices "struct" and initialize it to contain only an empty
593  * compatibility slice (pointing to itself), a whole disk slice (covering
594  * the disk as described by the label), and (nslices - BASE_SLICES) empty
595  * slices beginning at BASE_SLICE.
596  */
597 struct diskslices *
598 dsmakeslicestruct(int nslices, struct disklabel *lp)
599 {
600 	struct diskslice *sp;
601 	struct diskslices *ssp;
602 
603 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
604 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
605 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
606 	ssp->dss_nslices = nslices;
607 	ssp->dss_oflags = 0;
608 	ssp->dss_secmult = lp->d_secsize / DEV_BSIZE;
609 	if (ssp->dss_secmult & (ssp->dss_secmult - 1))
610 		ssp->dss_secshift = -1;
611 	else
612 		ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
613 	ssp->dss_secsize = lp->d_secsize;
614 	sp = &ssp->dss_slices[0];
615 	bzero(sp, nslices * sizeof *sp);
616 	sp[WHOLE_DISK_SLICE].ds_size = lp->d_secperunit;
617 	return (ssp);
618 }
619 
620 char *
621 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
622 {
623 	static char name[32];
624 	const char *dname;
625 
626 	dname = dev_dname(dev);
627 	if (strlen(dname) > 16)
628 		dname = "nametoolong";
629 	ksnprintf(name, sizeof(name), "%s%d", dname, unit);
630 	partname[0] = '\0';
631 	if (slice != WHOLE_DISK_SLICE || part != RAW_PART) {
632 		partname[0] = 'a' + part;
633 		partname[1] = '\0';
634 		if (slice != COMPATIBILITY_SLICE) {
635 			ksnprintf(name + strlen(name),
636 			    sizeof(name) - strlen(name), "s%d", slice - 1);
637 		}
638 	}
639 	return (name);
640 }
641 
642 /*
643  * This should only be called when the unit is inactive and the strategy
644  * routine should not allow it to become active unless we call it.  Our
645  * strategy routine must be special to allow activity.
646  */
647 int
648 dsopen(cdev_t dev, int mode, u_int flags,
649 	struct diskslices **sspp, struct disklabel *lp)
650 {
651 	cdev_t dev1;
652 	int error;
653 	struct disklabel *lp1;
654 	char *msg;
655 	u_char mask;
656 	bool_t need_init;
657 	int part;
658 	char partname[2];
659 	int slice;
660 	char *sname;
661 	struct diskslice *sp;
662 	struct diskslices *ssp;
663 	int unit;
664 
665 	dev->si_bsize_phys = lp->d_secsize;
666 
667 	unit = dkunit(dev);
668 	if (lp->d_secsize % DEV_BSIZE) {
669 		kprintf("%s: invalid sector size %lu\n", devtoname(dev),
670 		    (u_long)lp->d_secsize);
671 		return (EINVAL);
672 	}
673 
674 	/*
675 	 * Do not attempt to read the slice table or disk label when
676 	 * accessing the raw disk.
677 	 */
678 	if (dkslice(dev) == WHOLE_DISK_SLICE && dkpart(dev) == RAW_PART) {
679 		flags |= DSO_ONESLICE | DSO_NOLABELS;
680 	}
681 
682 	/*
683 	 * XXX reinitialize the slice table unless there is an open device
684 	 * on the unit.  This should only be done if the media has changed.
685 	 */
686 	ssp = *sspp;
687 	need_init = !dsisopen(ssp);
688 	if (ssp != NULL && need_init)
689 		dsgone(sspp);
690 	if (need_init) {
691 		/*
692 		 * Allocate a minimal slices "struct".  This will become
693 		 * the final slices "struct" if we don't want real slices
694 		 * or if we can't find any real slices.
695 		 */
696 		*sspp = dsmakeslicestruct(BASE_SLICE, lp);
697 
698 		if (!(flags & DSO_ONESLICE)) {
699 			TRACE(("dsinit\n"));
700 			error = dsinit(dev, lp, sspp);
701 			if (error != 0) {
702 				dsgone(sspp);
703 				return (error);
704 			}
705 		}
706 		ssp = *sspp;
707 		ssp->dss_oflags = flags;
708 
709 		/*
710 		 * If there are no real slices, then make the compatiblity
711 		 * slice cover the whole disk.
712 		 */
713 		if (ssp->dss_nslices == BASE_SLICE)
714 			ssp->dss_slices[COMPATIBILITY_SLICE].ds_size
715 				= lp->d_secperunit;
716 
717 		/* Point the compatibility slice at the BSD slice, if any. */
718 		for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
719 			sp = &ssp->dss_slices[slice];
720 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
721 				ssp->dss_first_bsd_slice = slice;
722 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_offset
723 					= sp->ds_offset;
724 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_size
725 					= sp->ds_size;
726 				ssp->dss_slices[COMPATIBILITY_SLICE].ds_type
727 					= sp->ds_type;
728 				break;
729 			}
730 		}
731 
732 		ssp->dss_slices[WHOLE_DISK_SLICE].ds_label = clone_label(lp);
733 		ssp->dss_slices[WHOLE_DISK_SLICE].ds_wlabel = TRUE;
734 	}
735 
736 	/*
737 	 * Initialize secondary info for all slices.  It is needed for more
738 	 * than the current slice in the DEVFS case.  XXX DEVFS is no more.
739 	 */
740 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
741 		sp = &ssp->dss_slices[slice];
742 		if (sp->ds_label != NULL)
743 			continue;
744 		dev1 = dkmodslice(dkmodpart(dev, RAW_PART), slice);
745 		sname = dsname(dev, unit, slice, RAW_PART, partname);
746 		/*
747 		 * XXX this should probably only be done for the need_init
748 		 * case, but there may be a problem with DIOCSYNCSLICEINFO.
749 		 */
750 		set_ds_wlabel(ssp, slice, TRUE);	/* XXX invert */
751 		lp1 = clone_label(lp);
752 		TRACE(("readdisklabel\n"));
753 		if (flags & DSO_NOLABELS)
754 			msg = NULL;
755 		else {
756 			msg = readdisklabel(dev1, lp1);
757 
758 			/*
759 			 * readdisklabel() returns NULL for success, and an
760 			 * error string for failure.
761 			 *
762 			 * If there isn't a label on the disk, and if the
763 			 * DSO_COMPATLABEL is set, we want to use the
764 			 * faked-up label provided by the caller.
765 			 *
766 			 * So we set msg to NULL to indicate that there is
767 			 * no failure (since we have a faked-up label),
768 			 * free lp1, and then clone it again from lp.
769 			 * (In case readdisklabel() modified lp1.)
770 			 */
771 			if (msg != NULL && (flags & DSO_COMPATLABEL)) {
772 				msg = NULL;
773 				kfree(lp1, M_DEVBUF);
774 				lp1 = clone_label(lp);
775 			}
776 		}
777 		if (msg == NULL)
778 			msg = fixlabel(sname, sp, lp1, FALSE);
779 		if (msg == NULL && lp1->d_secsize != ssp->dss_secsize)
780 			msg = "inconsistent sector size";
781 		if (msg != NULL) {
782 			if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
783 				log(LOG_WARNING, "%s: cannot find label (%s)\n",
784 				    sname, msg);
785 			kfree(lp1, M_DEVBUF);
786 			continue;
787 		}
788 		if (lp1->d_flags & D_BADSECT) {
789 			log(LOG_ERR, "%s: bad sector table not supported\n",
790 			    sname);
791 			kfree(lp1, M_DEVBUF);
792 			continue;
793 		}
794 		set_ds_label(ssp, slice, lp1);
795 		set_ds_wlabel(ssp, slice, FALSE);
796 	}
797 
798 	slice = dkslice(dev);
799 	if (slice >= ssp->dss_nslices)
800 		return (ENXIO);
801 	sp = &ssp->dss_slices[slice];
802 	part = dkpart(dev);
803 	if (part != RAW_PART
804 	    && (sp->ds_label == NULL || part >= sp->ds_label->d_npartitions))
805 		return (EINVAL);	/* XXX needs translation */
806 	mask = 1 << part;
807 	sp->ds_openmask |= mask;
808 	return (0);
809 }
810 
811 int
812 dssize(cdev_t dev, struct diskslices **sspp)
813 {
814 	struct disklabel *lp;
815 	int part;
816 	int slice;
817 	struct diskslices *ssp;
818 
819 	slice = dkslice(dev);
820 	part = dkpart(dev);
821 	ssp = *sspp;
822 	if (ssp == NULL || slice >= ssp->dss_nslices
823 	    || !(ssp->dss_slices[slice].ds_openmask & (1 << part))) {
824 		if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
825 			return (-1);
826 		dev_dclose(dev, FREAD, S_IFCHR);
827 		ssp = *sspp;
828 	}
829 	lp = ssp->dss_slices[slice].ds_label;
830 	if (lp == NULL)
831 		return (-1);
832 	return ((int)lp->d_partitions[part].p_size);
833 }
834 
835 static void
836 free_ds_label(struct diskslices *ssp, int slice)
837 {
838 	struct disklabel *lp;
839 	struct diskslice *sp;
840 
841 	sp = &ssp->dss_slices[slice];
842 	lp = sp->ds_label;
843 	if (lp == NULL)
844 		return;
845 	kfree(lp, M_DEVBUF);
846 	set_ds_label(ssp, slice, (struct disklabel *)NULL);
847 }
848 
849 static char *
850 fixlabel(char *sname, struct diskslice *sp, struct disklabel *lp, int writeflag)
851 {
852 	u_long end;
853 	u_long offset;
854 	int part;
855 	struct partition *pp;
856 	u_long start;
857 	bool_t warned;
858 
859 	/* These errors "can't happen" so don't bother reporting details. */
860 	if (lp->d_magic != DISKMAGIC || lp->d_magic2 != DISKMAGIC)
861 		return ("fixlabel: invalid magic");
862 	if (dkcksum(lp) != 0)
863 		return ("fixlabel: invalid checksum");
864 
865 	pp = &lp->d_partitions[RAW_PART];
866 	if (writeflag) {
867 		start = 0;
868 		offset = sp->ds_offset;
869 	} else {
870 		start = sp->ds_offset;
871 		offset = -sp->ds_offset;
872 	}
873 	if (pp->p_offset != start) {
874 		if (sname != NULL) {
875 			kprintf(
876 "%s: rejecting BSD label: raw partition offset != slice offset\n",
877 			       sname);
878 			slice_info(sname, sp);
879 			partition_info(sname, RAW_PART, pp);
880 		}
881 		return ("fixlabel: raw partition offset != slice offset");
882 	}
883 	if (pp->p_size != sp->ds_size) {
884 		if (sname != NULL) {
885 			kprintf("%s: raw partition size != slice size\n", sname);
886 			slice_info(sname, sp);
887 			partition_info(sname, RAW_PART, pp);
888 		}
889 		if (pp->p_size > sp->ds_size) {
890 			if (sname == NULL)
891 				return ("fixlabel: raw partition size > slice size");
892 			kprintf("%s: truncating raw partition\n", sname);
893 			pp->p_size = sp->ds_size;
894 		}
895 	}
896 	end = start + sp->ds_size;
897 	if (start > end)
898 		return ("fixlabel: slice wraps");
899 	if (lp->d_secpercyl <= 0)
900 		return ("fixlabel: d_secpercyl <= 0");
901 	pp -= RAW_PART;
902 	warned = FALSE;
903 	for (part = 0; part < lp->d_npartitions; part++, pp++) {
904 		if (pp->p_offset != 0 || pp->p_size != 0) {
905 			if (pp->p_offset < start
906 			    || pp->p_offset + pp->p_size > end
907 			    || pp->p_offset + pp->p_size < pp->p_offset) {
908 				if (sname != NULL) {
909 					kprintf(
910 "%s: rejecting partition in BSD label: it isn't entirely within the slice\n",
911 					       sname);
912 					if (!warned) {
913 						slice_info(sname, sp);
914 						warned = TRUE;
915 					}
916 					partition_info(sname, part, pp);
917 				}
918 				/* XXX else silently discard junk. */
919 				bzero(pp, sizeof *pp);
920 			} else
921 				pp->p_offset += offset;
922 		}
923 	}
924 	lp->d_ncylinders = sp->ds_size / lp->d_secpercyl;
925 	lp->d_secperunit = sp->ds_size;
926  	lp->d_checksum = 0;
927  	lp->d_checksum = dkcksum(lp);
928 	return (NULL);
929 }
930 
931 static void
932 partition_info(char *sname, int part, struct partition *pp)
933 {
934 	kprintf("%s%c: start %lu, end %lu, size %lu\n", sname, 'a' + part,
935 	       (u_long)pp->p_offset, (u_long)(pp->p_offset + pp->p_size - 1),
936 	       (u_long)pp->p_size);
937 }
938 
939 static void
940 slice_info(char *sname, struct diskslice *sp)
941 {
942 	kprintf("%s: start %lu, end %lu, size %lu\n", sname,
943 	       sp->ds_offset, sp->ds_offset + sp->ds_size - 1, sp->ds_size);
944 }
945 
946 static void
947 set_ds_label(struct diskslices *ssp, int slice, struct disklabel *lp)
948 {
949 	ssp->dss_slices[slice].ds_label = lp;
950 	if (slice == COMPATIBILITY_SLICE)
951 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_label = lp;
952 	else if (slice == ssp->dss_first_bsd_slice)
953 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_label = lp;
954 }
955 
956 static void
957 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
958 {
959 	ssp->dss_slices[slice].ds_wlabel = wlabel;
960 	if (slice == COMPATIBILITY_SLICE)
961 		ssp->dss_slices[ssp->dss_first_bsd_slice].ds_wlabel = wlabel;
962 	else if (slice == ssp->dss_first_bsd_slice)
963 		ssp->dss_slices[COMPATIBILITY_SLICE].ds_wlabel = wlabel;
964 }
965