xref: /dragonfly/sys/kern/subr_diskslice.c (revision 25a2db75)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
43  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
45  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47  */
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/buf.h>
52 #include <sys/conf.h>
53 #include <sys/disklabel.h>
54 #include <sys/disklabel32.h>
55 #include <sys/disklabel64.h>
56 #include <sys/diskslice.h>
57 #include <sys/disk.h>
58 #include <sys/diskmbr.h>
59 #include <sys/fcntl.h>
60 #include <sys/malloc.h>
61 #include <sys/stat.h>
62 #include <sys/syslog.h>
63 #include <sys/proc.h>
64 #include <sys/vnode.h>
65 #include <sys/device.h>
66 #include <sys/thread2.h>
67 
68 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
69 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
70 #include <sys/devfs.h>
71 
72 static int  dsreadandsetlabel(cdev_t dev, u_int flags,
73 			   struct diskslices *ssp, struct diskslice *sp,
74 			   struct disk_info *info);
75 static void free_ds_label (struct diskslices *ssp, int slice);
76 static void set_ds_label (struct diskslices *ssp, int slice, disklabel_t lp,
77 			   disklabel_ops_t ops);
78 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
79 
80 /*
81  * Determine the size of the transfer, and make sure it is
82  * within the boundaries of the partition. Adjust transfer
83  * if needed, and signal errors or early completion.
84  *
85  * XXX TODO:
86  *	o Split buffers that are too big for the device.
87  *	o Check for overflow.
88  *	o Finish cleaning this up.
89  *
90  * This function returns 1 on success, 0 if transfer equates
91  * to EOF (end of disk) or -1 on failure.  The appropriate
92  * 'errno' value is also set in bp->b_error and bp->b_flags
93  * is marked with B_ERROR.
94  */
95 struct bio *
96 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
97 {
98 	struct buf *bp = bio->bio_buf;
99 	struct bio *nbio;
100 	disklabel_t lp;
101 	disklabel_ops_t ops;
102 	long nsec;
103 	u_int64_t secno;
104 	u_int64_t endsecno;
105 	u_int64_t slicerel_secno;
106 	struct diskslice *sp;
107 	u_int32_t part;
108 	u_int32_t slice;
109 	int shift;
110 	int mask;
111 
112 	slice = dkslice(dev);
113 	part  = dkpart(dev);
114 
115 	if (bio->bio_offset < 0) {
116 		kprintf("dscheck(%s): negative bio_offset %lld\n",
117 			devtoname(dev), (long long)bio->bio_offset);
118 		goto bad;
119 	}
120 	if (slice >= ssp->dss_nslices) {
121 		kprintf("dscheck(%s): slice too large %d/%d\n",
122 			devtoname(dev), slice, ssp->dss_nslices);
123 		goto bad;
124 	}
125 	sp = &ssp->dss_slices[slice];
126 	/*
127 	 * Calculate secno and nsec
128 	 */
129 	if (ssp->dss_secmult == 1) {
130 		shift = DEV_BSHIFT;
131 		goto doshift;
132 	} else if (ssp->dss_secshift != -1) {
133 		shift = DEV_BSHIFT + ssp->dss_secshift;
134 doshift:
135 		mask = (1 << shift) - 1;
136 		if ((int)bp->b_bcount & mask)
137 			goto bad_bcount;
138 		if ((int)bio->bio_offset & mask)
139 			goto bad_blkno;
140 		secno = bio->bio_offset >> shift;
141 		nsec = bp->b_bcount >> shift;
142 	} else {
143 		if (bp->b_bcount % ssp->dss_secsize)
144 			goto bad_bcount;
145 		if (bio->bio_offset % ssp->dss_secsize)
146 			goto bad_blkno;
147 		secno = bio->bio_offset / ssp->dss_secsize;
148 		nsec = bp->b_bcount / ssp->dss_secsize;
149 	}
150 
151 	/*
152 	 * Calculate slice-relative sector number end slice-relative
153 	 * limit.
154 	 */
155 	if (slice == WHOLE_DISK_SLICE) {
156 		/*
157 		 * Labels have not been allowed on whole-disks for a while.
158 		 * This really puts the nail in the coffin.
159 		 *
160 		 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
161 		 * and partition numbers are special-cased.  Currently numbers
162 		 * less then 128 are not allowed.  Partition numbers >= 128
163 		 * are encoded in the high 8 bits of the 64 bit buffer offset
164 		 * and are fed directly through to the device with no
165 		 * further interpretation.  In particular, no sector
166 		 * translation interpretation should occur because the
167 		 * sector size for the special raw access may not be the
168 		 * same as the nominal sector size for the device.
169 		 */
170 		lp.opaque = NULL;
171 		if (part < 128) {
172 			kprintf("dscheck(%s): illegal partition number (%d) "
173 				"for WHOLE_DISK_SLICE access\n",
174 				devtoname(dev), part);
175 			goto bad;
176 		} else if (part != WHOLE_SLICE_PART) {
177 			nbio = push_bio(bio);
178 			nbio->bio_offset = bio->bio_offset |
179 					   (u_int64_t)part << 56;
180 			return(nbio);
181 		} else {
182 			/*
183 			 * If writing to the raw disk request a
184 			 * reprobe on the last close.
185 			 */
186 			if (bp->b_cmd == BUF_CMD_WRITE)
187 				sp->ds_flags |= DSF_REPROBE;
188 		}
189 
190 		/*
191 		 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE,
192 		 * there are no reserved areas.
193 		 */
194 		endsecno = sp->ds_size;
195 		slicerel_secno = secno;
196 	} else if (part == WHOLE_SLICE_PART) {
197 		/*
198 		 * NOTE! opens on a whole-slice partition will not attempt
199 		 * to read a disklabel in, so there may not be an in-core
200 		 * disklabel even if there is one on the disk.
201 		 */
202 		endsecno = sp->ds_size;
203 		slicerel_secno = secno;
204 	} else if ((lp = sp->ds_label).opaque != NULL) {
205 		/*
206 		 * A label is present, extract the partition.  Snooping of
207 		 * the disklabel is not supported even if accessible.  Of
208 		 * course, the reserved area is still write protected.
209 		 */
210 		ops = sp->ds_ops;
211 		if (ops->op_getpartbounds(ssp, lp, part,
212 					  &slicerel_secno, &endsecno)) {
213 			kprintf("dscheck(%s): partition %d out of bounds\n",
214 				devtoname(dev), part);
215 			goto bad;
216 		}
217 		slicerel_secno += secno;
218 	} else {
219 		/*
220 		 * Attempt to access partition when no disklabel present
221 		 */
222 		kprintf("dscheck(%s): attempt to access non-existent partition\n",
223 			devtoname(dev));
224 		goto bad;
225 	}
226 
227 	/*
228 	 * Disallow writes to reserved areas unless ds_wlabel allows it.
229 	 * If the reserved area is written to request a reprobe of the
230 	 * disklabel when the slice is closed.
231 	 */
232 	if (slicerel_secno < sp->ds_reserved && nsec &&
233 	    bp->b_cmd == BUF_CMD_WRITE) {
234 		if (sp->ds_wlabel == 0) {
235 			bp->b_error = EROFS;
236 			goto error;
237 		}
238 		sp->ds_flags |= DSF_REPROBE;
239 	}
240 
241 	/*
242 	 * If we get here, bio_offset must be on a block boundary and
243 	 * the sector size must be a power of 2.
244 	 */
245 	if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
246 	    (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
247 	    ((ssp->dss_secsize << 1) - 1)) {
248 		kprintf("%s: invalid BIO offset, not sector aligned or"
249 			" invalid sector size (not power of 2) %08llx %d\n",
250 			devtoname(dev), (long long)bio->bio_offset,
251 			ssp->dss_secsize);
252 		goto bad;
253 	}
254 
255 	/*
256 	 * EOF handling
257 	 */
258 	if (secno + nsec > endsecno) {
259 		/*
260 		 * Return an error if beyond the end of the disk, or
261 		 * if B_BNOCLIP is set.  Tell the system that we do not
262 		 * need to keep the buffer around.
263 		 */
264 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
265 			goto bad;
266 
267 		/*
268 		 * If exactly at end of disk, return an EOF.  Throw away
269 		 * the buffer contents, if any, by setting B_INVAL.
270 		 */
271 		if (secno == endsecno) {
272 			bp->b_resid = bp->b_bcount;
273 			bp->b_flags |= B_INVAL;
274 			goto done;
275 		}
276 
277 		/*
278 		 * Else truncate
279 		 */
280 		nsec = endsecno - secno;
281 		bp->b_bcount = nsec * ssp->dss_secsize;
282 	}
283 
284 	nbio = push_bio(bio);
285 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
286 			   ssp->dss_secsize;
287 	return (nbio);
288 
289 bad_bcount:
290 	kprintf(
291 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
292 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
293 	goto bad;
294 
295 bad_blkno:
296 	kprintf(
297 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
298 	    devtoname(dev), (long long)bio->bio_offset, ssp->dss_secsize);
299 bad:
300 	bp->b_error = EINVAL;
301 	/* fall through */
302 error:
303 	/*
304 	 * Terminate the I/O with a ranging error.  Since the buffer is
305 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
306 	 */
307 	bp->b_resid = bp->b_bcount;
308 	bp->b_flags |= B_ERROR | B_INVAL;
309 done:
310 	/*
311 	 * Caller must biodone() the originally passed bio if NULL is
312 	 * returned.
313 	 */
314 	return (NULL);
315 }
316 
317 /*
318  * dsclose() - close a cooked disk slice.
319  *
320  * WARNING!  The passed diskslices and related diskslice structures may
321  *	     be invalidated or replaced by this function, callers must
322  *	     reload from the disk structure for continued access.
323  */
324 void
325 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
326 {
327 	u_int32_t part;
328 	u_int32_t slice;
329 	struct diskslice *sp;
330 
331 	slice = dkslice(dev);
332 	part  = dkpart(dev);
333 	if (slice < ssp->dss_nslices) {
334 		sp = &ssp->dss_slices[slice];
335 		dsclrmask(sp, part);
336 		if (sp->ds_flags & DSF_REPROBE) {
337 			sp->ds_flags &= ~DSF_REPROBE;
338 			if (slice == WHOLE_DISK_SLICE) {
339 				disk_msg_send_sync(DISK_DISK_REPROBE,
340 						   dev->si_disk, NULL);
341 				devfs_config();
342 			} else {
343 				disk_msg_send_sync(DISK_SLICE_REPROBE,
344 						   dev->si_disk, sp);
345 				devfs_config();
346 			}
347 			/* ssp and sp may both be invalid after reprobe */
348 		}
349 	}
350 }
351 
352 void
353 dsgone(struct diskslices **sspp)
354 {
355 	int slice;
356 	struct diskslices *ssp;
357 
358 	if ((ssp = *sspp) != NULL) {
359 		for (slice = 0; slice < ssp->dss_nslices; slice++)
360 			free_ds_label(ssp, slice);
361 		kfree(ssp, M_DEVBUF);
362 		*sspp = NULL;
363 	}
364 }
365 
366 /*
367  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
368  * is subject to the same restriction as dsopen().
369  */
370 int
371 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
372 	struct diskslices **sspp, struct disk_info *info)
373 {
374 	int error;
375 	disklabel_t lp;
376 	disklabel_t lptmp;
377 	disklabel_ops_t ops;
378 	int old_wlabel;
379 	u_int32_t openmask[DKMAXPARTITIONS/(sizeof(u_int32_t)*8)];
380 	int part;
381 	int slice;
382 	struct diskslice *sp;
383 	struct diskslices *ssp;
384 
385 	slice = dkslice(dev);
386 	part = dkpart(dev);
387 	ssp = *sspp;
388 	if (ssp == NULL)
389 		return (EINVAL);
390 	if (slice >= ssp->dss_nslices)
391 		return (EINVAL);
392 	sp = &ssp->dss_slices[slice];
393 	lp = sp->ds_label;
394 	ops = sp->ds_ops;	/* may be NULL if no label */
395 
396 	switch (cmd) {
397 	case DIOCGDVIRGIN32:
398 		ops = &disklabel32_ops;
399 		/* fall through */
400 	case DIOCGDVIRGIN64:
401 		if (cmd != DIOCGDVIRGIN32)
402 			ops = &disklabel64_ops;
403 		/*
404 		 * You can only retrieve a virgin disklabel on the whole
405 		 * disk slice or whole-slice partition.
406 		 */
407 		if (slice != WHOLE_DISK_SLICE &&
408 		    part != WHOLE_SLICE_PART) {
409 			return(EINVAL);
410 		}
411 
412 		lp.opaque = data;
413 		ops->op_makevirginlabel(lp, ssp, sp, info);
414 		return (0);
415 
416 	case DIOCGDINFO32:
417 	case DIOCGDINFO64:
418 		/*
419 		 * You can only retrieve a disklabel on the whole
420 		 * slice partition.
421 		 *
422 		 * We do not support labels directly on whole-disks
423 		 * any more (that is, disks without slices), unless the
424 		 * device driver has asked for a compatible label (e.g.
425 		 * for a CD) to allow booting off of storage that is
426 		 * otherwise unlabeled.
427 		 */
428 		error = 0;
429 		if (part != WHOLE_SLICE_PART)
430 			return(EINVAL);
431 		if (slice == WHOLE_DISK_SLICE &&
432 		    (info->d_dsflags & DSO_COMPATLABEL) == 0) {
433 			return (ENODEV);
434 		}
435 		if (sp->ds_label.opaque == NULL) {
436 			error = dsreadandsetlabel(dev, info->d_dsflags,
437 						  ssp, sp, info);
438 			ops = sp->ds_ops;	/* may be NULL */
439 		}
440 
441 		/*
442 		 * The type of label we found must match the type of
443 		 * label requested.
444 		 */
445 		if (error == 0 && IOCPARM_LEN(cmd) != ops->labelsize)
446 			error = ENOATTR;
447 		if (error == 0)
448 			bcopy(sp->ds_label.opaque, data, ops->labelsize);
449 		return (error);
450 
451 	case DIOCGPART:
452 		{
453 			struct partinfo *dpart = (void *)data;
454 
455 			/*
456 			 * The disk management layer may not have read the
457 			 * disklabel yet because simply opening a slice no
458 			 * longer 'probes' the disk that way.  Be sure we
459 			 * have tried.
460 			 *
461 			 * We ignore any error.
462 			 */
463 			if (sp->ds_label.opaque == NULL &&
464 			    part == WHOLE_SLICE_PART &&
465 			    slice != WHOLE_DISK_SLICE) {
466 				dsreadandsetlabel(dev, info->d_dsflags,
467 						  ssp, sp, info);
468 				ops = sp->ds_ops;	/* may be NULL */
469 			}
470 
471 			bzero(dpart, sizeof(*dpart));
472 			dpart->media_offset   = (u_int64_t)sp->ds_offset *
473 						info->d_media_blksize;
474 			dpart->media_size     = (u_int64_t)sp->ds_size *
475 						info->d_media_blksize;
476 			dpart->media_blocks   = sp->ds_size;
477 			dpart->media_blksize  = info->d_media_blksize;
478 			dpart->reserved_blocks= sp->ds_reserved;
479 			dpart->fstype_uuid = sp->ds_type_uuid;
480 			dpart->storage_uuid = sp->ds_stor_uuid;
481 
482 			if (slice != WHOLE_DISK_SLICE &&
483 			    part != WHOLE_SLICE_PART) {
484 				u_int64_t start;
485 				u_int64_t blocks;
486 				if (lp.opaque == NULL)
487 					return(EINVAL);
488 				if (ops->op_getpartbounds(ssp, lp, part,
489 							  &start, &blocks)) {
490 					return(EINVAL);
491 				}
492 				ops->op_loadpartinfo(lp, part, dpart);
493 				dpart->media_offset += start *
494 						       info->d_media_blksize;
495 				dpart->media_size = blocks *
496 						    info->d_media_blksize;
497 				dpart->media_blocks = blocks;
498 
499 				/*
500 				 * partition starting sector (p_offset)
501 				 * requires slice's reserved areas to be
502 				 * adjusted.
503 				 */
504 				if (dpart->reserved_blocks > start)
505 					dpart->reserved_blocks -= start;
506 				else
507 					dpart->reserved_blocks = 0;
508 			}
509 
510 			/*
511 			 * Load remaining fields from the info structure
512 			 */
513 			dpart->d_nheads =	info->d_nheads;
514 			dpart->d_ncylinders =	info->d_ncylinders;
515 			dpart->d_secpertrack =	info->d_secpertrack;
516 			dpart->d_secpercyl =	info->d_secpercyl;
517 		}
518 		return (0);
519 
520 	case DIOCGSLICEINFO:
521 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
522 				 (char *)ssp);
523 		return (0);
524 
525 	case DIOCSDINFO32:
526 		ops = &disklabel32_ops;
527 		/* fall through */
528 	case DIOCSDINFO64:
529 		if (cmd != DIOCSDINFO32)
530 			ops = &disklabel64_ops;
531 		/*
532 		 * You can write a disklabel on the whole disk slice or
533 		 * whole-slice partition.
534 		 */
535 		if (slice != WHOLE_DISK_SLICE &&
536 		    part != WHOLE_SLICE_PART) {
537 			return(EINVAL);
538 		}
539 
540 		/*
541 		 * We no longer support writing disklabels directly to media
542 		 * without there being a slice.  Keep this as a separate
543 		 * conditional.
544 		 */
545 		if (slice == WHOLE_DISK_SLICE)
546 			return (ENODEV);
547 		if (!(flags & FWRITE))
548 			return (EBADF);
549 
550 		/*
551 		 * If an existing label is present it must be the same
552 		 * type as the label being passed by the ioctl.
553 		 */
554 		if (sp->ds_label.opaque && sp->ds_ops != ops)
555 			return (ENOATTR);
556 
557 		/*
558 		 * Create a temporary copy of the existing label
559 		 * (if present) so setdisklabel can compare it against
560 		 * the new label.
561 		 */
562 		lp.opaque = kmalloc(ops->labelsize, M_DEVBUF, M_WAITOK);
563 		if (sp->ds_label.opaque == NULL)
564 			bzero(lp.opaque, ops->labelsize);
565 		else
566 			bcopy(sp->ds_label.opaque, lp.opaque, ops->labelsize);
567 		if (sp->ds_label.opaque == NULL) {
568 			bzero(openmask, sizeof(openmask));
569 		} else {
570 			bcopy(sp->ds_openmask, openmask, sizeof(openmask));
571 		}
572 		lptmp.opaque = data;
573 		error = ops->op_setdisklabel(lp, lptmp, ssp, sp, openmask);
574 		disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
575 		devfs_config();
576 		if (error != 0) {
577 			kfree(lp.opaque, M_DEVBUF);
578 			return (error);
579 		}
580 		free_ds_label(ssp, slice);
581 		set_ds_label(ssp, slice, lp, ops);
582 		return (0);
583 
584 	case DIOCSYNCSLICEINFO:
585 		/*
586 		 * This ioctl can only be done on the whole disk
587 		 */
588 		if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
589 			return (EINVAL);
590 
591 		if (*(int *)data == 0) {
592 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
593 				struct diskslice *ds = &ssp->dss_slices[slice];
594 
595 				switch(dscountmask(ds)) {
596 				case 0:
597 					break;
598 				case 1:
599 					if (slice != WHOLE_DISK_SLICE)
600 						return (EBUSY);
601 					if (!dschkmask(ds, RAW_PART))
602 						return (EBUSY);
603 					break;
604 				default:
605 					return (EBUSY);
606 				}
607 			}
608 		}
609 
610 		disk_msg_send_sync(DISK_DISK_REPROBE, dev->si_disk, NULL);
611 		devfs_config();
612 		return 0;
613 
614 	case DIOCWDINFO32:
615 	case DIOCWDINFO64:
616 		error = dsioctl(dev, ((cmd == DIOCWDINFO32) ?
617 					DIOCSDINFO32 : DIOCSDINFO64),
618 				data, flags, &ssp, info);
619 		if (error == 0 && sp->ds_label.opaque == NULL)
620 			error = EINVAL;
621 		if (part != WHOLE_SLICE_PART)
622 			error = EINVAL;
623 		if (error != 0)
624 			return (error);
625 
626 		/*
627 		 * Allow the reserved area to be written, reload ops
628 		 * because the DIOCSDINFO op above may have installed
629 		 * a new label type.
630 		 */
631 		ops = sp->ds_ops;
632 		old_wlabel = sp->ds_wlabel;
633 		set_ds_wlabel(ssp, slice, TRUE);
634 		error = ops->op_writedisklabel(dev, ssp, sp, sp->ds_label);
635 		disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
636 		devfs_config();
637 		set_ds_wlabel(ssp, slice, old_wlabel);
638 		/* XXX should invalidate in-core label if write failed. */
639 		return (error);
640 
641 	case DIOCWLABEL:
642 		if (slice == WHOLE_DISK_SLICE)
643 			return (ENODEV);
644 		if (!(flags & FWRITE))
645 			return (EBADF);
646 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
647 		return (0);
648 
649 	default:
650 		return (ENOIOCTL);
651 	}
652 }
653 
654 int
655 dsisopen(struct diskslices *ssp)
656 {
657 	int slice;
658 
659 	if (ssp == NULL)
660 		return (0);
661 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
662 		if (dscountmask(&ssp->dss_slices[slice]))
663 			return (1);
664 	}
665 	return (0);
666 }
667 
668 /*
669  * Allocate a slices "struct" and initialize it to contain only an empty
670  * compatibility slice (pointing to itself), a whole disk slice (covering
671  * the disk as described by the label), and (nslices - BASE_SLICES) empty
672  * slices beginning at BASE_SLICE.
673  *
674  * Note that the compatibility slice is no longer really a compatibility
675  * slice.  It is slice 0 if a GPT label is present, and the dangerously
676  * dedicated slice if no slice table otherwise exists.  Else it is 0-sized.
677  */
678 struct diskslices *
679 dsmakeslicestruct(int nslices, struct disk_info *info)
680 {
681 	struct diskslice *sp;
682 	struct diskslices *ssp;
683 
684 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
685 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
686 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
687 	ssp->dss_nslices = nslices;
688 	ssp->dss_oflags = 0;
689 
690 	/*
691 	 * Figure out if we can use shifts or whether we have to
692 	 * use mod/multply to translate byte offsets into sector numbers.
693 	 */
694 	if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
695 	     (info->d_media_blksize << 1) - 1) {
696 		ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
697 		if (ssp->dss_secmult & (ssp->dss_secmult - 1))
698 			ssp->dss_secshift = -1;
699 		else
700 			ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
701 	} else {
702 		ssp->dss_secmult = 0;
703 		ssp->dss_secshift = -1;
704 	}
705 	ssp->dss_secsize = info->d_media_blksize;
706 	sp = &ssp->dss_slices[0];
707 	bzero(sp, nslices * sizeof *sp);
708 	sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
709 	return (ssp);
710 }
711 
712 char *
713 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
714 {
715 	return dev->si_name;
716 }
717 
718 /*
719  * This should only be called when the unit is inactive and the strategy
720  * routine should not allow it to become active unless we call it.  Our
721  * strategy routine must be special to allow activity.
722  */
723 int
724 dsopen(cdev_t dev, int mode, u_int flags,
725        struct diskslices **sspp, struct disk_info *info)
726 {
727 	struct diskslice *sp;
728 	struct diskslices *ssp;
729 	int slice;
730 	int part;
731 
732 	ssp = *sspp;
733 	dev->si_bsize_phys = info->d_media_blksize;
734 	slice = dkslice(dev);
735 	part = dkpart(dev);
736 	sp = &ssp->dss_slices[slice];
737 	dssetmask(sp, part);
738 
739 	return 0;
740 }
741 
742 /*
743  * Attempt to read the disklabel.  If successful, store it in sp->ds_label.
744  *
745  * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
746  * a fake label covering the whole disk.
747  */
748 static
749 int
750 dsreadandsetlabel(cdev_t dev, u_int flags,
751 		  struct diskslices *ssp, struct diskslice *sp,
752 		  struct disk_info *info)
753 {
754 	disklabel_t lp;
755 	disklabel_ops_t ops;
756 	const char *msg;
757 	const char *sname;
758 	char partname[2];
759 	int slice = dkslice(dev);
760 
761 	/*
762 	 * Probe the disklabel
763 	 */
764 	lp.opaque = NULL;
765 	sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
766 	ops = &disklabel32_ops;
767 	msg = ops->op_readdisklabel(dev, sp, &lp, info);
768 	if (msg && strcmp(msg, "no disk label") == 0) {
769 		ops = &disklabel64_ops;
770 		msg = disklabel64_ops.op_readdisklabel(dev, sp, &lp, info);
771 	}
772 
773 	/*
774 	 * If we failed and COMPATLABEL is set, create a dummy disklabel.
775 	 */
776 	if (msg != NULL && (flags & DSO_COMPATLABEL)) {
777 		msg = NULL;
778 		if (sp->ds_size >= 0x100000000ULL)
779 			ops = &disklabel64_ops;
780 		else
781 			ops = &disklabel32_ops;
782 		lp = ops->op_clone_label(info, sp);
783 	}
784 	if (msg != NULL) {
785 		if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
786 			log(LOG_WARNING, "%s: cannot find label (%s)\n",
787 			    sname, msg);
788 		if (lp.opaque)
789 			kfree(lp.opaque, M_DEVBUF);
790 	} else {
791 		set_ds_label(ssp, slice, lp, ops);
792 		set_ds_wlabel(ssp, slice, FALSE);
793 	}
794 	return (msg ? EINVAL : 0);
795 }
796 
797 int64_t
798 dssize(cdev_t dev, struct diskslices **sspp)
799 {
800 	disklabel_t lp;
801 	disklabel_ops_t ops;
802 	int part;
803 	int slice;
804 	struct diskslices *ssp;
805 	u_int64_t start;
806 	u_int64_t blocks;
807 
808 	slice = dkslice(dev);
809 	part = dkpart(dev);
810 	ssp = *sspp;
811 	if (ssp == NULL || slice >= ssp->dss_nslices
812 	    || !dschkmask(&ssp->dss_slices[slice], part)) {
813 		if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
814 			return (-1);
815 		dev_dclose(dev, FREAD, S_IFCHR);
816 		ssp = *sspp;
817 	}
818 	lp = ssp->dss_slices[slice].ds_label;
819 	if (lp.opaque == NULL)
820 		return (-1);
821 	ops = ssp->dss_slices[slice].ds_ops;
822 	if (ops->op_getpartbounds(ssp, lp, part, &start, &blocks))
823 		return (-1);
824 	return ((int64_t)blocks);
825 }
826 
827 static void
828 free_ds_label(struct diskslices *ssp, int slice)
829 {
830 	struct diskslice *sp;
831 	disklabel_t lp;
832 
833 	sp = &ssp->dss_slices[slice];
834 	lp = sp->ds_label;
835 	if (lp.opaque != NULL) {
836 		kfree(lp.opaque, M_DEVBUF);
837 		lp.opaque = NULL;
838 		set_ds_label(ssp, slice, lp, NULL);
839 	}
840 }
841 
842 static void
843 set_ds_label(struct diskslices *ssp, int slice,
844 	     disklabel_t lp, disklabel_ops_t ops)
845 {
846 	struct diskslice *sp = &ssp->dss_slices[slice];
847 
848 	sp->ds_label = lp;
849 	sp->ds_ops = ops;
850 	if (lp.opaque && slice != WHOLE_DISK_SLICE)
851 		ops->op_adjust_label_reserved(ssp, slice, sp);
852 	else
853 		sp->ds_reserved = 0;
854 }
855 
856 static void
857 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
858 {
859 	ssp->dss_slices[slice].ds_wlabel = wlabel;
860 }
861 
862