xref: /dragonfly/sys/kern/subr_diskslice.c (revision 631c21f2)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
39  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
40  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
41  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
42  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/conf.h>
49 #include <sys/disklabel.h>
50 #include <sys/disklabel32.h>
51 #include <sys/disklabel64.h>
52 #include <sys/diskslice.h>
53 #include <sys/disk.h>
54 #include <sys/diskmbr.h>
55 #include <sys/fcntl.h>
56 #include <sys/malloc.h>
57 #include <sys/stat.h>
58 #include <sys/syslog.h>
59 #include <sys/proc.h>
60 #include <sys/vnode.h>
61 #include <sys/device.h>
62 
63 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
64 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
65 #include <sys/devfs.h>
66 
67 static int  dsreadandsetlabel(cdev_t dev, u_int flags,
68 			   struct diskslices *ssp, struct diskslice *sp,
69 			   struct disk_info *info);
70 static void free_ds_label (struct diskslices *ssp, int slice);
71 static void set_ds_label (struct diskslices *ssp, int slice, disklabel_t lp,
72 			   disklabel_ops_t ops);
73 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
74 
75 /*
76  * Determine the size of the transfer, and make sure it is
77  * within the boundaries of the partition. Adjust transfer
78  * if needed, and signal errors or early completion.
79  *
80  * XXX TODO:
81  *	o Split buffers that are too big for the device.
82  *	o Check for overflow.
83  *	o Finish cleaning this up.
84  *
85  * This function returns 1 on success, 0 if transfer equates
86  * to EOF (end of disk) or -1 on failure.  The appropriate
87  * 'errno' value is also set in bp->b_error and bp->b_flags
88  * is marked with B_ERROR.
89  */
90 struct bio *
91 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
92 {
93 	struct buf *bp = bio->bio_buf;
94 	struct bio *nbio;
95 	disklabel_t lp;
96 	disklabel_ops_t ops;
97 	long nsec;
98 	u_int64_t secno;
99 	u_int64_t endsecno;
100 	u_int64_t slicerel_secno;
101 	struct diskslice *sp;
102 	u_int32_t part;
103 	u_int32_t slice;
104 	int shift;
105 	int mask;
106 
107 	slice = dkslice(dev);
108 	part  = dkpart(dev);
109 
110 	if (bio->bio_offset < 0) {
111 		kprintf("dscheck(%s): negative bio_offset %lld\n",
112 			devtoname(dev), (long long)bio->bio_offset);
113 		goto bad;
114 	}
115 	if (slice >= ssp->dss_nslices) {
116 		kprintf("dscheck(%s): slice too large %d/%d\n",
117 			devtoname(dev), slice, ssp->dss_nslices);
118 		goto bad;
119 	}
120 	sp = &ssp->dss_slices[slice];
121 	/*
122 	 * Calculate secno and nsec
123 	 */
124 	if (ssp->dss_secmult == 1) {
125 		shift = DEV_BSHIFT;
126 		goto doshift;
127 	} else if (ssp->dss_secshift != -1) {
128 		shift = DEV_BSHIFT + ssp->dss_secshift;
129 doshift:
130 		mask = (1 << shift) - 1;
131 		if ((int)bp->b_bcount & mask)
132 			goto bad_bcount;
133 		if ((int)bio->bio_offset & mask)
134 			goto bad_blkno;
135 		secno = bio->bio_offset >> shift;
136 		nsec = bp->b_bcount >> shift;
137 	} else {
138 		if (bp->b_bcount % ssp->dss_secsize)
139 			goto bad_bcount;
140 		if (bio->bio_offset % ssp->dss_secsize)
141 			goto bad_blkno;
142 		secno = bio->bio_offset / ssp->dss_secsize;
143 		nsec = bp->b_bcount / ssp->dss_secsize;
144 	}
145 
146 	/*
147 	 * Calculate slice-relative sector number end slice-relative
148 	 * limit.
149 	 */
150 	if (slice == WHOLE_DISK_SLICE) {
151 		/*
152 		 * Labels have not been allowed on whole-disks for a while.
153 		 * This really puts the nail in the coffin.
154 		 *
155 		 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
156 		 * and partition numbers are special-cased.  Currently numbers
157 		 * less then 128 are not allowed.  Partition numbers >= 128
158 		 * are encoded in the high 8 bits of the 64 bit buffer offset
159 		 * and are fed directly through to the device with no
160 		 * further interpretation.  In particular, no sector
161 		 * translation interpretation should occur because the
162 		 * sector size for the special raw access may not be the
163 		 * same as the nominal sector size for the device.
164 		 */
165 		lp.opaque = NULL;
166 		if (part < 128) {
167 			kprintf("dscheck(%s): illegal partition number (%d) "
168 				"for WHOLE_DISK_SLICE access\n",
169 				devtoname(dev), part);
170 			goto bad;
171 		} else if (part != WHOLE_SLICE_PART) {
172 			nbio = push_bio(bio);
173 			nbio->bio_offset = bio->bio_offset |
174 					   (u_int64_t)part << 56;
175 			return(nbio);
176 		} else {
177 			/*
178 			 * If writing to the raw disk request a
179 			 * reprobe on the last close.
180 			 */
181 			if (bp->b_cmd == BUF_CMD_WRITE)
182 				sp->ds_flags |= DSF_REPROBE;
183 		}
184 
185 		/*
186 		 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE,
187 		 * there are no reserved areas.
188 		 */
189 		endsecno = sp->ds_size;
190 		slicerel_secno = secno;
191 	} else if (part == WHOLE_SLICE_PART) {
192 		/*
193 		 * NOTE! opens on a whole-slice partition will not attempt
194 		 * to read a disklabel in, so there may not be an in-core
195 		 * disklabel even if there is one on the disk.
196 		 */
197 		endsecno = sp->ds_size;
198 		slicerel_secno = secno;
199 	} else if ((lp = sp->ds_label).opaque != NULL) {
200 		/*
201 		 * A label is present, extract the partition.  Snooping of
202 		 * the disklabel is not supported even if accessible.  Of
203 		 * course, the reserved area is still write protected.
204 		 */
205 		ops = sp->ds_ops;
206 		if (ops->op_getpartbounds(ssp, lp, part,
207 					  &slicerel_secno, &endsecno)) {
208 			kprintf("dscheck(%s): partition %d out of bounds\n",
209 				devtoname(dev), part);
210 			goto bad;
211 		}
212 		slicerel_secno += secno;
213 	} else {
214 		/*
215 		 * Attempt to access partition when no disklabel present
216 		 */
217 		kprintf("dscheck(%s): attempt to access non-existent partition\n",
218 			devtoname(dev));
219 		goto bad;
220 	}
221 
222 	/*
223 	 * Disallow writes to reserved areas unless ds_wlabel allows it.
224 	 * If the reserved area is written to request a reprobe of the
225 	 * disklabel when the slice is closed.
226 	 */
227 	if (slicerel_secno < sp->ds_reserved && nsec &&
228 	    bp->b_cmd == BUF_CMD_WRITE) {
229 		if (sp->ds_wlabel == 0) {
230 			bp->b_error = EROFS;
231 			goto error;
232 		}
233 		sp->ds_flags |= DSF_REPROBE;
234 	}
235 
236 	/*
237 	 * If we get here, bio_offset must be on a block boundary and
238 	 * the sector size must be a power of 2.
239 	 */
240 	if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
241 	    (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
242 	    ((ssp->dss_secsize << 1) - 1)) {
243 		kprintf("%s: invalid BIO offset, not sector aligned or"
244 			" invalid sector size (not power of 2) %08llx %d\n",
245 			devtoname(dev), (long long)bio->bio_offset,
246 			ssp->dss_secsize);
247 		goto bad;
248 	}
249 
250 	/*
251 	 * EOF handling
252 	 */
253 	if (secno + nsec > endsecno) {
254 		/*
255 		 * Return an error if beyond the end of the disk, or
256 		 * if B_BNOCLIP is set.  Tell the system that we do not
257 		 * need to keep the buffer around.
258 		 */
259 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
260 			goto bad;
261 
262 		/*
263 		 * If exactly at end of disk, return an EOF.  Throw away
264 		 * the buffer contents, if any, by setting B_INVAL.
265 		 */
266 		if (secno == endsecno) {
267 			bp->b_resid = bp->b_bcount;
268 			bp->b_flags |= B_INVAL;
269 			goto done;
270 		}
271 
272 		/*
273 		 * Else truncate
274 		 */
275 		nsec = endsecno - secno;
276 		bp->b_bcount = nsec * ssp->dss_secsize;
277 	}
278 
279 	nbio = push_bio(bio);
280 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
281 			   ssp->dss_secsize;
282 	return (nbio);
283 
284 bad_bcount:
285 	kprintf(
286 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
287 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
288 	goto bad;
289 
290 bad_blkno:
291 	kprintf(
292 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
293 	    devtoname(dev), (long long)bio->bio_offset, ssp->dss_secsize);
294 bad:
295 	bp->b_error = EINVAL;
296 	/* fall through */
297 error:
298 	/*
299 	 * Terminate the I/O with a ranging error.  Since the buffer is
300 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
301 	 */
302 	bp->b_resid = bp->b_bcount;
303 	bp->b_flags |= B_ERROR | B_INVAL;
304 done:
305 	/*
306 	 * Caller must biodone() the originally passed bio if NULL is
307 	 * returned.
308 	 */
309 	return (NULL);
310 }
311 
312 /*
313  * dsclose() - close a cooked disk slice.
314  *
315  * WARNING!  The passed diskslices and related diskslice structures may
316  *	     be invalidated or replaced by this function, callers must
317  *	     reload from the disk structure for continued access.
318  */
319 void
320 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
321 {
322 	u_int32_t part;
323 	u_int32_t slice;
324 	struct diskslice *sp;
325 
326 	slice = dkslice(dev);
327 	part  = dkpart(dev);
328 	if (slice < ssp->dss_nslices) {
329 		sp = &ssp->dss_slices[slice];
330 		dsclrmask(sp, part);
331 		if (sp->ds_flags & DSF_REPROBE) {
332 			sp->ds_flags &= ~DSF_REPROBE;
333 			if (slice == WHOLE_DISK_SLICE) {
334 				disk_msg_send_sync(DISK_DISK_REPROBE,
335 						   dev->si_disk, NULL);
336 				devfs_config();
337 			} else {
338 				disk_msg_send_sync(DISK_SLICE_REPROBE,
339 						   dev->si_disk, sp);
340 				devfs_config();
341 			}
342 			/* ssp and sp may both be invalid after reprobe */
343 		}
344 	}
345 }
346 
347 void
348 dsgone(struct diskslices **sspp)
349 {
350 	int slice;
351 	struct diskslices *ssp;
352 
353 	if ((ssp = *sspp) != NULL) {
354 		for (slice = 0; slice < ssp->dss_nslices; slice++)
355 			free_ds_label(ssp, slice);
356 		kfree(ssp, M_DEVBUF);
357 		*sspp = NULL;
358 	}
359 }
360 
361 /*
362  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
363  * is subject to the same restriction as dsopen().
364  */
365 int
366 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
367 	struct diskslices **sspp, struct disk_info *info)
368 {
369 	int error;
370 	disklabel_t lp;
371 	disklabel_t lptmp;
372 	disklabel_ops_t ops;
373 	int old_wlabel;
374 	u_int32_t openmask[DKMAXPARTITIONS/(sizeof(u_int32_t)*8)];
375 	int part;
376 	int slice;
377 	struct diskslice *sp;
378 	struct diskslices *ssp;
379 
380 	slice = dkslice(dev);
381 	part = dkpart(dev);
382 	ssp = *sspp;
383 	if (ssp == NULL)
384 		return (EINVAL);
385 	if (slice >= ssp->dss_nslices)
386 		return (EINVAL);
387 	sp = &ssp->dss_slices[slice];
388 	lp = sp->ds_label;
389 	ops = sp->ds_ops;	/* may be NULL if no label */
390 
391 	switch (cmd) {
392 	case DIOCGDVIRGIN32:
393 		ops = &disklabel32_ops;
394 		/* fall through */
395 	case DIOCGDVIRGIN64:
396 		if (cmd != DIOCGDVIRGIN32)
397 			ops = &disklabel64_ops;
398 		/*
399 		 * You can only retrieve a virgin disklabel on the whole
400 		 * disk slice or whole-slice partition.
401 		 */
402 		if (slice != WHOLE_DISK_SLICE &&
403 		    part != WHOLE_SLICE_PART) {
404 			return(EINVAL);
405 		}
406 
407 		lp.opaque = data;
408 		ops->op_makevirginlabel(lp, ssp, sp, info);
409 		return (0);
410 
411 	case DIOCGDINFO32:
412 	case DIOCGDINFO64:
413 		/*
414 		 * You can only retrieve a disklabel on the whole
415 		 * slice partition.
416 		 *
417 		 * We do not support labels directly on whole-disks
418 		 * any more (that is, disks without slices), unless the
419 		 * device driver has asked for a compatible label (e.g.
420 		 * for a CD) to allow booting off of storage that is
421 		 * otherwise unlabeled.
422 		 */
423 		error = 0;
424 		if (part != WHOLE_SLICE_PART)
425 			return(EINVAL);
426 		if (slice == WHOLE_DISK_SLICE &&
427 		    (info->d_dsflags & DSO_COMPATLABEL) == 0) {
428 			return (ENODEV);
429 		}
430 		if (sp->ds_label.opaque == NULL) {
431 			error = dsreadandsetlabel(dev, info->d_dsflags,
432 						  ssp, sp, info);
433 			ops = sp->ds_ops;	/* may be NULL */
434 		}
435 
436 		/*
437 		 * The type of label we found must match the type of
438 		 * label requested.
439 		 */
440 		if (error == 0 && IOCPARM_LEN(cmd) != ops->labelsize)
441 			error = ENOATTR;
442 		if (error == 0)
443 			bcopy(sp->ds_label.opaque, data, ops->labelsize);
444 		return (error);
445 
446 	case DIOCGPART:
447 		{
448 			struct partinfo *dpart = (void *)data;
449 
450 			/*
451 			 * The disk management layer may not have read the
452 			 * disklabel yet because simply opening a slice no
453 			 * longer 'probes' the disk that way.  Be sure we
454 			 * have tried.
455 			 *
456 			 * We ignore any error.
457 			 */
458 			if (sp->ds_label.opaque == NULL &&
459 			    part == WHOLE_SLICE_PART &&
460 			    slice != WHOLE_DISK_SLICE) {
461 				dsreadandsetlabel(dev, info->d_dsflags,
462 						  ssp, sp, info);
463 				ops = sp->ds_ops;	/* may be NULL */
464 			}
465 
466 			bzero(dpart, sizeof(*dpart));
467 			dpart->media_offset   = (u_int64_t)sp->ds_offset *
468 						info->d_media_blksize;
469 			dpart->media_size     = (u_int64_t)sp->ds_size *
470 						info->d_media_blksize;
471 			dpart->media_blocks   = sp->ds_size;
472 			dpart->media_blksize  = info->d_media_blksize;
473 			dpart->reserved_blocks= sp->ds_reserved;
474 			dpart->fstype_uuid = sp->ds_type_uuid;
475 			dpart->storage_uuid = sp->ds_stor_uuid;
476 
477 			if (slice != WHOLE_DISK_SLICE &&
478 			    part != WHOLE_SLICE_PART) {
479 				u_int64_t start;
480 				u_int64_t blocks;
481 				if (lp.opaque == NULL)
482 					return(EINVAL);
483 				if (ops->op_getpartbounds(ssp, lp, part,
484 							  &start, &blocks)) {
485 					return(EINVAL);
486 				}
487 				ops->op_loadpartinfo(lp, part, dpart);
488 				dpart->media_offset += start *
489 						       info->d_media_blksize;
490 				dpart->media_size = blocks *
491 						    info->d_media_blksize;
492 				dpart->media_blocks = blocks;
493 
494 				/*
495 				 * partition starting sector (p_offset)
496 				 * requires slice's reserved areas to be
497 				 * adjusted.
498 				 */
499 				if (dpart->reserved_blocks > start)
500 					dpart->reserved_blocks -= start;
501 				else
502 					dpart->reserved_blocks = 0;
503 			}
504 
505 			/*
506 			 * Load remaining fields from the info structure
507 			 */
508 			dpart->d_nheads =	info->d_nheads;
509 			dpart->d_ncylinders =	info->d_ncylinders;
510 			dpart->d_secpertrack =	info->d_secpertrack;
511 			dpart->d_secpercyl =	info->d_secpercyl;
512 		}
513 		return (0);
514 
515 	case DIOCGSLICEINFO:
516 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
517 				 (char *)ssp);
518 		return (0);
519 
520 	case DIOCSDINFO32:
521 		ops = &disklabel32_ops;
522 		/* fall through */
523 	case DIOCSDINFO64:
524 		if (cmd != DIOCSDINFO32)
525 			ops = &disklabel64_ops;
526 		/*
527 		 * You can write a disklabel on the whole disk slice or
528 		 * whole-slice partition.
529 		 */
530 		if (slice != WHOLE_DISK_SLICE &&
531 		    part != WHOLE_SLICE_PART) {
532 			return(EINVAL);
533 		}
534 
535 		/*
536 		 * We no longer support writing disklabels directly to media
537 		 * without there being a slice.  Keep this as a separate
538 		 * conditional.
539 		 */
540 		if (slice == WHOLE_DISK_SLICE)
541 			return (ENODEV);
542 		if (!(flags & FWRITE))
543 			return (EBADF);
544 
545 		/*
546 		 * If an existing label is present it must be the same
547 		 * type as the label being passed by the ioctl.
548 		 */
549 		if (sp->ds_label.opaque && sp->ds_ops != ops)
550 			return (ENOATTR);
551 
552 		/*
553 		 * Create a temporary copy of the existing label
554 		 * (if present) so setdisklabel can compare it against
555 		 * the new label.
556 		 */
557 		lp.opaque = kmalloc(ops->labelsize, M_DEVBUF, M_WAITOK);
558 		if (sp->ds_label.opaque == NULL)
559 			bzero(lp.opaque, ops->labelsize);
560 		else
561 			bcopy(sp->ds_label.opaque, lp.opaque, ops->labelsize);
562 		if (sp->ds_label.opaque == NULL) {
563 			bzero(openmask, sizeof(openmask));
564 		} else {
565 			bcopy(sp->ds_openmask, openmask, sizeof(openmask));
566 		}
567 		lptmp.opaque = data;
568 		error = ops->op_setdisklabel(lp, lptmp, ssp, sp, openmask);
569 		disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
570 		devfs_config();
571 		if (error != 0) {
572 			kfree(lp.opaque, M_DEVBUF);
573 			return (error);
574 		}
575 		free_ds_label(ssp, slice);
576 		set_ds_label(ssp, slice, lp, ops);
577 		return (0);
578 
579 	case DIOCSYNCSLICEINFO:
580 		/*
581 		 * This ioctl can only be done on the whole disk
582 		 */
583 		if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
584 			return (EINVAL);
585 
586 		if (*(int *)data == 0) {
587 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
588 				struct diskslice *ds = &ssp->dss_slices[slice];
589 
590 				switch(dscountmask(ds)) {
591 				case 0:
592 					break;
593 				case 1:
594 					if (slice != WHOLE_DISK_SLICE)
595 						return (EBUSY);
596 					if (!dschkmask(ds, RAW_PART))
597 						return (EBUSY);
598 					break;
599 				default:
600 					return (EBUSY);
601 				}
602 			}
603 		}
604 
605 		disk_msg_send_sync(DISK_DISK_REPROBE, dev->si_disk, NULL);
606 		devfs_config();
607 		return 0;
608 
609 	case DIOCWDINFO32:
610 	case DIOCWDINFO64:
611 		error = dsioctl(dev, ((cmd == DIOCWDINFO32) ?
612 					DIOCSDINFO32 : DIOCSDINFO64),
613 				data, flags, &ssp, info);
614 		if (error == 0 && sp->ds_label.opaque == NULL)
615 			error = EINVAL;
616 		if (part != WHOLE_SLICE_PART)
617 			error = EINVAL;
618 		if (error != 0)
619 			return (error);
620 
621 		/*
622 		 * Allow the reserved area to be written, reload ops
623 		 * because the DIOCSDINFO op above may have installed
624 		 * a new label type.
625 		 */
626 		ops = sp->ds_ops;
627 		old_wlabel = sp->ds_wlabel;
628 		set_ds_wlabel(ssp, slice, TRUE);
629 		error = ops->op_writedisklabel(dev, ssp, sp, sp->ds_label);
630 		disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
631 		devfs_config();
632 		set_ds_wlabel(ssp, slice, old_wlabel);
633 		/* XXX should invalidate in-core label if write failed. */
634 		return (error);
635 
636 	case DIOCWLABEL:
637 		if (slice == WHOLE_DISK_SLICE)
638 			return (ENODEV);
639 		if (!(flags & FWRITE))
640 			return (EBADF);
641 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
642 		return (0);
643 
644 	default:
645 		return (ENOIOCTL);
646 	}
647 }
648 
649 int
650 dsisopen(struct diskslices *ssp)
651 {
652 	int slice;
653 
654 	if (ssp == NULL)
655 		return (0);
656 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
657 		if (dscountmask(&ssp->dss_slices[slice]))
658 			return (1);
659 	}
660 	return (0);
661 }
662 
663 /*
664  * Allocate a slices "struct" and initialize it to contain only an empty
665  * compatibility slice (pointing to itself), a whole disk slice (covering
666  * the disk as described by the label), and (nslices - BASE_SLICES) empty
667  * slices beginning at BASE_SLICE.
668  *
669  * Note that the compatibility slice is no longer really a compatibility
670  * slice.  It is slice 0 if a GPT label is present, and the dangerously
671  * dedicated slice if no slice table otherwise exists.  Else it is 0-sized.
672  */
673 struct diskslices *
674 dsmakeslicestruct(int nslices, struct disk_info *info)
675 {
676 	struct diskslice *sp;
677 	struct diskslices *ssp;
678 
679 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
680 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
681 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
682 	ssp->dss_nslices = nslices;
683 	ssp->dss_oflags = 0;
684 
685 	/*
686 	 * Figure out if we can use shifts or whether we have to
687 	 * use mod/multply to translate byte offsets into sector numbers.
688 	 */
689 	if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
690 	     (info->d_media_blksize << 1) - 1) {
691 		ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
692 		if (ssp->dss_secmult & (ssp->dss_secmult - 1))
693 			ssp->dss_secshift = -1;
694 		else
695 			ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
696 	} else {
697 		ssp->dss_secmult = 0;
698 		ssp->dss_secshift = -1;
699 	}
700 	ssp->dss_secsize = info->d_media_blksize;
701 	sp = &ssp->dss_slices[0];
702 	bzero(sp, nslices * sizeof *sp);
703 	sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
704 	return (ssp);
705 }
706 
707 char *
708 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
709 {
710 	return dev->si_name;
711 }
712 
713 /*
714  * This should only be called when the unit is inactive and the strategy
715  * routine should not allow it to become active unless we call it.  Our
716  * strategy routine must be special to allow activity.
717  */
718 int
719 dsopen(cdev_t dev, int mode, u_int flags,
720        struct diskslices **sspp, struct disk_info *info)
721 {
722 	struct diskslice *sp;
723 	struct diskslices *ssp;
724 	int slice;
725 	int part;
726 
727 	ssp = *sspp;
728 	dev->si_bsize_phys = info->d_media_blksize;
729 	slice = dkslice(dev);
730 	part = dkpart(dev);
731 	sp = &ssp->dss_slices[slice];
732 	dssetmask(sp, part);
733 
734 	return 0;
735 }
736 
737 /*
738  * Attempt to read the disklabel.  If successful, store it in sp->ds_label.
739  *
740  * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
741  * a fake label covering the whole disk.
742  */
743 static
744 int
745 dsreadandsetlabel(cdev_t dev, u_int flags,
746 		  struct diskslices *ssp, struct diskslice *sp,
747 		  struct disk_info *info)
748 {
749 	disklabel_t lp;
750 	disklabel_ops_t ops;
751 	const char *msg;
752 	const char *sname;
753 	char partname[2];
754 	int slice = dkslice(dev);
755 
756 	/*
757 	 * Probe the disklabel
758 	 */
759 	lp.opaque = NULL;
760 	sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
761 	ops = &disklabel32_ops;
762 	msg = ops->op_readdisklabel(dev, sp, &lp, info);
763 	if (msg && strcmp(msg, "no disk label") == 0) {
764 		ops = &disklabel64_ops;
765 		msg = disklabel64_ops.op_readdisklabel(dev, sp, &lp, info);
766 	}
767 
768 	/*
769 	 * If we failed and COMPATLABEL is set, create a dummy disklabel.
770 	 */
771 	if (msg != NULL && (flags & DSO_COMPATLABEL)) {
772 		msg = NULL;
773 		if (sp->ds_size >= 0x100000000ULL)
774 			ops = &disklabel64_ops;
775 		else
776 			ops = &disklabel32_ops;
777 		lp = ops->op_clone_label(info, sp);
778 	}
779 	if (msg != NULL) {
780 		if (sp->ds_type == DOSPTYP_386BSD ||
781 		    sp->ds_type == DOSPTYP_DFLYBSD) {
782 			log(LOG_WARNING, "%s: cannot find label (%s)\n",
783 			    sname, msg);
784 		}
785 		if (lp.opaque)
786 			kfree(lp.opaque, M_DEVBUF);
787 	} else {
788 		set_ds_label(ssp, slice, lp, ops);
789 		set_ds_wlabel(ssp, slice, FALSE);
790 	}
791 	return (msg ? EINVAL : 0);
792 }
793 
794 int64_t
795 dssize(cdev_t dev, struct diskslices **sspp)
796 {
797 	disklabel_t lp;
798 	disklabel_ops_t ops;
799 	int part;
800 	int slice;
801 	struct diskslices *ssp;
802 	u_int64_t start;
803 	u_int64_t blocks;
804 
805 	slice = dkslice(dev);
806 	part = dkpart(dev);
807 	ssp = *sspp;
808 	if (ssp == NULL || slice >= ssp->dss_nslices
809 	    || !dschkmask(&ssp->dss_slices[slice], part)) {
810 		if (dev_dopen(dev, FREAD, S_IFCHR,
811 			      proc0.p_ucred, NULL, NULL) != 0)
812 		{
813 			return (-1);
814 		}
815 		dev_dclose(dev, FREAD, S_IFCHR, NULL);
816 		ssp = *sspp;
817 	}
818 	lp = ssp->dss_slices[slice].ds_label;
819 	if (part == WHOLE_SLICE_PART) {
820 		blocks = ssp->dss_slices[slice].ds_size;
821 	} else if (lp.opaque == NULL) {
822 		blocks = (u_int64_t)-1;
823 	} else {
824 		ops = ssp->dss_slices[slice].ds_ops;
825 		if (ops->op_getpartbounds(ssp, lp, part, &start, &blocks))
826 			return (-1);
827 	}
828 	return ((int64_t)blocks);
829 }
830 
831 static void
832 free_ds_label(struct diskslices *ssp, int slice)
833 {
834 	struct diskslice *sp;
835 	disklabel_t lp;
836 
837 	sp = &ssp->dss_slices[slice];
838 	lp = sp->ds_label;
839 	if (lp.opaque != NULL) {
840 		kfree(lp.opaque, M_DEVBUF);
841 		lp.opaque = NULL;
842 		set_ds_label(ssp, slice, lp, NULL);
843 	}
844 }
845 
846 static void
847 set_ds_label(struct diskslices *ssp, int slice,
848 	     disklabel_t lp, disklabel_ops_t ops)
849 {
850 	struct diskslice *sp = &ssp->dss_slices[slice];
851 
852 	sp->ds_label = lp;
853 	sp->ds_ops = ops;
854 	if (lp.opaque && slice != WHOLE_DISK_SLICE)
855 		ops->op_adjust_label_reserved(ssp, slice, sp);
856 	else
857 		sp->ds_reserved = 0;
858 }
859 
860 static void
861 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
862 {
863 	ssp->dss_slices[slice].ds_wlabel = wlabel;
864 }
865 
866