xref: /dragonfly/sys/kern/subr_diskslice.c (revision 19b217af)
1 /*-
2  * Copyright (c) 1994 Bruce D. Evans.
3  * All rights reserved.
4  *
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * William Jolitz.
10  *
11  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	from: @(#)wd.c	7.2 (Berkeley) 5/9/91
39  *	from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
40  *	from: @(#)ufs_disksubr.c	7.16 (Berkeley) 5/4/91
41  *	from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
42  * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/conf.h>
49 #include <sys/disklabel.h>
50 #include <sys/disklabel32.h>
51 #include <sys/disklabel64.h>
52 #include <sys/diskslice.h>
53 #include <sys/disk.h>
54 #include <sys/diskmbr.h>
55 #include <sys/fcntl.h>
56 #include <sys/malloc.h>
57 #include <sys/stat.h>
58 #include <sys/syslog.h>
59 #include <sys/proc.h>
60 #include <sys/vnode.h>
61 #include <sys/device.h>
62 #include <sys/thread2.h>
63 
64 #include <vfs/ufs/dinode.h>	/* XXX used only for fs.h */
65 #include <vfs/ufs/fs.h>		/* XXX used only to get BBSIZE/SBSIZE */
66 #include <sys/devfs.h>
67 
68 static int  dsreadandsetlabel(cdev_t dev, u_int flags,
69 			   struct diskslices *ssp, struct diskslice *sp,
70 			   struct disk_info *info);
71 static void free_ds_label (struct diskslices *ssp, int slice);
72 static void set_ds_label (struct diskslices *ssp, int slice, disklabel_t lp,
73 			   disklabel_ops_t ops);
74 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
75 
76 /*
77  * Determine the size of the transfer, and make sure it is
78  * within the boundaries of the partition. Adjust transfer
79  * if needed, and signal errors or early completion.
80  *
81  * XXX TODO:
82  *	o Split buffers that are too big for the device.
83  *	o Check for overflow.
84  *	o Finish cleaning this up.
85  *
86  * This function returns 1 on success, 0 if transfer equates
87  * to EOF (end of disk) or -1 on failure.  The appropriate
88  * 'errno' value is also set in bp->b_error and bp->b_flags
89  * is marked with B_ERROR.
90  */
91 struct bio *
92 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
93 {
94 	struct buf *bp = bio->bio_buf;
95 	struct bio *nbio;
96 	disklabel_t lp;
97 	disklabel_ops_t ops;
98 	long nsec;
99 	u_int64_t secno;
100 	u_int64_t endsecno;
101 	u_int64_t slicerel_secno;
102 	struct diskslice *sp;
103 	u_int32_t part;
104 	u_int32_t slice;
105 	int shift;
106 	int mask;
107 
108 	slice = dkslice(dev);
109 	part  = dkpart(dev);
110 
111 	if (bio->bio_offset < 0) {
112 		kprintf("dscheck(%s): negative bio_offset %lld\n",
113 			devtoname(dev), (long long)bio->bio_offset);
114 		goto bad;
115 	}
116 	if (slice >= ssp->dss_nslices) {
117 		kprintf("dscheck(%s): slice too large %d/%d\n",
118 			devtoname(dev), slice, ssp->dss_nslices);
119 		goto bad;
120 	}
121 	sp = &ssp->dss_slices[slice];
122 	/*
123 	 * Calculate secno and nsec
124 	 */
125 	if (ssp->dss_secmult == 1) {
126 		shift = DEV_BSHIFT;
127 		goto doshift;
128 	} else if (ssp->dss_secshift != -1) {
129 		shift = DEV_BSHIFT + ssp->dss_secshift;
130 doshift:
131 		mask = (1 << shift) - 1;
132 		if ((int)bp->b_bcount & mask)
133 			goto bad_bcount;
134 		if ((int)bio->bio_offset & mask)
135 			goto bad_blkno;
136 		secno = bio->bio_offset >> shift;
137 		nsec = bp->b_bcount >> shift;
138 	} else {
139 		if (bp->b_bcount % ssp->dss_secsize)
140 			goto bad_bcount;
141 		if (bio->bio_offset % ssp->dss_secsize)
142 			goto bad_blkno;
143 		secno = bio->bio_offset / ssp->dss_secsize;
144 		nsec = bp->b_bcount / ssp->dss_secsize;
145 	}
146 
147 	/*
148 	 * Calculate slice-relative sector number end slice-relative
149 	 * limit.
150 	 */
151 	if (slice == WHOLE_DISK_SLICE) {
152 		/*
153 		 * Labels have not been allowed on whole-disks for a while.
154 		 * This really puts the nail in the coffin.
155 		 *
156 		 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
157 		 * and partition numbers are special-cased.  Currently numbers
158 		 * less then 128 are not allowed.  Partition numbers >= 128
159 		 * are encoded in the high 8 bits of the 64 bit buffer offset
160 		 * and are fed directly through to the device with no
161 		 * further interpretation.  In particular, no sector
162 		 * translation interpretation should occur because the
163 		 * sector size for the special raw access may not be the
164 		 * same as the nominal sector size for the device.
165 		 */
166 		lp.opaque = NULL;
167 		if (part < 128) {
168 			kprintf("dscheck(%s): illegal partition number (%d) "
169 				"for WHOLE_DISK_SLICE access\n",
170 				devtoname(dev), part);
171 			goto bad;
172 		} else if (part != WHOLE_SLICE_PART) {
173 			nbio = push_bio(bio);
174 			nbio->bio_offset = bio->bio_offset |
175 					   (u_int64_t)part << 56;
176 			return(nbio);
177 		} else {
178 			/*
179 			 * If writing to the raw disk request a
180 			 * reprobe on the last close.
181 			 */
182 			if (bp->b_cmd == BUF_CMD_WRITE)
183 				sp->ds_flags |= DSF_REPROBE;
184 		}
185 
186 		/*
187 		 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE,
188 		 * there are no reserved areas.
189 		 */
190 		endsecno = sp->ds_size;
191 		slicerel_secno = secno;
192 	} else if (part == WHOLE_SLICE_PART) {
193 		/*
194 		 * NOTE! opens on a whole-slice partition will not attempt
195 		 * to read a disklabel in, so there may not be an in-core
196 		 * disklabel even if there is one on the disk.
197 		 */
198 		endsecno = sp->ds_size;
199 		slicerel_secno = secno;
200 	} else if ((lp = sp->ds_label).opaque != NULL) {
201 		/*
202 		 * A label is present, extract the partition.  Snooping of
203 		 * the disklabel is not supported even if accessible.  Of
204 		 * course, the reserved area is still write protected.
205 		 */
206 		ops = sp->ds_ops;
207 		if (ops->op_getpartbounds(ssp, lp, part,
208 					  &slicerel_secno, &endsecno)) {
209 			kprintf("dscheck(%s): partition %d out of bounds\n",
210 				devtoname(dev), part);
211 			goto bad;
212 		}
213 		slicerel_secno += secno;
214 	} else {
215 		/*
216 		 * Attempt to access partition when no disklabel present
217 		 */
218 		kprintf("dscheck(%s): attempt to access non-existent partition\n",
219 			devtoname(dev));
220 		goto bad;
221 	}
222 
223 	/*
224 	 * Disallow writes to reserved areas unless ds_wlabel allows it.
225 	 * If the reserved area is written to request a reprobe of the
226 	 * disklabel when the slice is closed.
227 	 */
228 	if (slicerel_secno < sp->ds_reserved && nsec &&
229 	    bp->b_cmd == BUF_CMD_WRITE) {
230 		if (sp->ds_wlabel == 0) {
231 			bp->b_error = EROFS;
232 			goto error;
233 		}
234 		sp->ds_flags |= DSF_REPROBE;
235 	}
236 
237 	/*
238 	 * If we get here, bio_offset must be on a block boundary and
239 	 * the sector size must be a power of 2.
240 	 */
241 	if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
242 	    (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
243 	    ((ssp->dss_secsize << 1) - 1)) {
244 		kprintf("%s: invalid BIO offset, not sector aligned or"
245 			" invalid sector size (not power of 2) %08llx %d\n",
246 			devtoname(dev), (long long)bio->bio_offset,
247 			ssp->dss_secsize);
248 		goto bad;
249 	}
250 
251 	/*
252 	 * EOF handling
253 	 */
254 	if (secno + nsec > endsecno) {
255 		/*
256 		 * Return an error if beyond the end of the disk, or
257 		 * if B_BNOCLIP is set.  Tell the system that we do not
258 		 * need to keep the buffer around.
259 		 */
260 		if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
261 			goto bad;
262 
263 		/*
264 		 * If exactly at end of disk, return an EOF.  Throw away
265 		 * the buffer contents, if any, by setting B_INVAL.
266 		 */
267 		if (secno == endsecno) {
268 			bp->b_resid = bp->b_bcount;
269 			bp->b_flags |= B_INVAL;
270 			goto done;
271 		}
272 
273 		/*
274 		 * Else truncate
275 		 */
276 		nsec = endsecno - secno;
277 		bp->b_bcount = nsec * ssp->dss_secsize;
278 	}
279 
280 	nbio = push_bio(bio);
281 	nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
282 			   ssp->dss_secsize;
283 	return (nbio);
284 
285 bad_bcount:
286 	kprintf(
287 	"dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
288 	    devtoname(dev), bp->b_bcount, ssp->dss_secsize);
289 	goto bad;
290 
291 bad_blkno:
292 	kprintf(
293 	"dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
294 	    devtoname(dev), (long long)bio->bio_offset, ssp->dss_secsize);
295 bad:
296 	bp->b_error = EINVAL;
297 	/* fall through */
298 error:
299 	/*
300 	 * Terminate the I/O with a ranging error.  Since the buffer is
301 	 * either illegal or beyond the file EOF, mark it B_INVAL as well.
302 	 */
303 	bp->b_resid = bp->b_bcount;
304 	bp->b_flags |= B_ERROR | B_INVAL;
305 done:
306 	/*
307 	 * Caller must biodone() the originally passed bio if NULL is
308 	 * returned.
309 	 */
310 	return (NULL);
311 }
312 
313 /*
314  * dsclose() - close a cooked disk slice.
315  *
316  * WARNING!  The passed diskslices and related diskslice structures may
317  *	     be invalidated or replaced by this function, callers must
318  *	     reload from the disk structure for continued access.
319  */
320 void
321 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
322 {
323 	u_int32_t part;
324 	u_int32_t slice;
325 	struct diskslice *sp;
326 
327 	slice = dkslice(dev);
328 	part  = dkpart(dev);
329 	if (slice < ssp->dss_nslices) {
330 		sp = &ssp->dss_slices[slice];
331 		dsclrmask(sp, part);
332 		if (sp->ds_flags & DSF_REPROBE) {
333 			sp->ds_flags &= ~DSF_REPROBE;
334 			if (slice == WHOLE_DISK_SLICE) {
335 				disk_msg_send_sync(DISK_DISK_REPROBE,
336 						   dev->si_disk, NULL);
337 				devfs_config();
338 			} else {
339 				disk_msg_send_sync(DISK_SLICE_REPROBE,
340 						   dev->si_disk, sp);
341 				devfs_config();
342 			}
343 			/* ssp and sp may both be invalid after reprobe */
344 		}
345 	}
346 }
347 
348 void
349 dsgone(struct diskslices **sspp)
350 {
351 	int slice;
352 	struct diskslices *ssp;
353 
354 	if ((ssp = *sspp) != NULL) {
355 		for (slice = 0; slice < ssp->dss_nslices; slice++)
356 			free_ds_label(ssp, slice);
357 		kfree(ssp, M_DEVBUF);
358 		*sspp = NULL;
359 	}
360 }
361 
362 /*
363  * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
364  * is subject to the same restriction as dsopen().
365  */
366 int
367 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
368 	struct diskslices **sspp, struct disk_info *info)
369 {
370 	int error;
371 	disklabel_t lp;
372 	disklabel_t lptmp;
373 	disklabel_ops_t ops;
374 	int old_wlabel;
375 	u_int32_t openmask[DKMAXPARTITIONS/(sizeof(u_int32_t)*8)];
376 	int part;
377 	int slice;
378 	struct diskslice *sp;
379 	struct diskslices *ssp;
380 
381 	slice = dkslice(dev);
382 	part = dkpart(dev);
383 	ssp = *sspp;
384 	if (ssp == NULL)
385 		return (EINVAL);
386 	if (slice >= ssp->dss_nslices)
387 		return (EINVAL);
388 	sp = &ssp->dss_slices[slice];
389 	lp = sp->ds_label;
390 	ops = sp->ds_ops;	/* may be NULL if no label */
391 
392 	switch (cmd) {
393 	case DIOCGDVIRGIN32:
394 		ops = &disklabel32_ops;
395 		/* fall through */
396 	case DIOCGDVIRGIN64:
397 		if (cmd != DIOCGDVIRGIN32)
398 			ops = &disklabel64_ops;
399 		/*
400 		 * You can only retrieve a virgin disklabel on the whole
401 		 * disk slice or whole-slice partition.
402 		 */
403 		if (slice != WHOLE_DISK_SLICE &&
404 		    part != WHOLE_SLICE_PART) {
405 			return(EINVAL);
406 		}
407 
408 		lp.opaque = data;
409 		ops->op_makevirginlabel(lp, ssp, sp, info);
410 		return (0);
411 
412 	case DIOCGDINFO32:
413 	case DIOCGDINFO64:
414 		/*
415 		 * You can only retrieve a disklabel on the whole
416 		 * slice partition.
417 		 *
418 		 * We do not support labels directly on whole-disks
419 		 * any more (that is, disks without slices), unless the
420 		 * device driver has asked for a compatible label (e.g.
421 		 * for a CD) to allow booting off of storage that is
422 		 * otherwise unlabeled.
423 		 */
424 		error = 0;
425 		if (part != WHOLE_SLICE_PART)
426 			return(EINVAL);
427 		if (slice == WHOLE_DISK_SLICE &&
428 		    (info->d_dsflags & DSO_COMPATLABEL) == 0) {
429 			return (ENODEV);
430 		}
431 		if (sp->ds_label.opaque == NULL) {
432 			error = dsreadandsetlabel(dev, info->d_dsflags,
433 						  ssp, sp, info);
434 			ops = sp->ds_ops;	/* may be NULL */
435 		}
436 
437 		/*
438 		 * The type of label we found must match the type of
439 		 * label requested.
440 		 */
441 		if (error == 0 && IOCPARM_LEN(cmd) != ops->labelsize)
442 			error = ENOATTR;
443 		if (error == 0)
444 			bcopy(sp->ds_label.opaque, data, ops->labelsize);
445 		return (error);
446 
447 	case DIOCGPART:
448 		{
449 			struct partinfo *dpart = (void *)data;
450 
451 			/*
452 			 * The disk management layer may not have read the
453 			 * disklabel yet because simply opening a slice no
454 			 * longer 'probes' the disk that way.  Be sure we
455 			 * have tried.
456 			 *
457 			 * We ignore any error.
458 			 */
459 			if (sp->ds_label.opaque == NULL &&
460 			    part == WHOLE_SLICE_PART &&
461 			    slice != WHOLE_DISK_SLICE) {
462 				dsreadandsetlabel(dev, info->d_dsflags,
463 						  ssp, sp, info);
464 				ops = sp->ds_ops;	/* may be NULL */
465 			}
466 
467 			bzero(dpart, sizeof(*dpart));
468 			dpart->media_offset   = (u_int64_t)sp->ds_offset *
469 						info->d_media_blksize;
470 			dpart->media_size     = (u_int64_t)sp->ds_size *
471 						info->d_media_blksize;
472 			dpart->media_blocks   = sp->ds_size;
473 			dpart->media_blksize  = info->d_media_blksize;
474 			dpart->reserved_blocks= sp->ds_reserved;
475 			dpart->fstype_uuid = sp->ds_type_uuid;
476 			dpart->storage_uuid = sp->ds_stor_uuid;
477 
478 			if (slice != WHOLE_DISK_SLICE &&
479 			    part != WHOLE_SLICE_PART) {
480 				u_int64_t start;
481 				u_int64_t blocks;
482 				if (lp.opaque == NULL)
483 					return(EINVAL);
484 				if (ops->op_getpartbounds(ssp, lp, part,
485 							  &start, &blocks)) {
486 					return(EINVAL);
487 				}
488 				ops->op_loadpartinfo(lp, part, dpart);
489 				dpart->media_offset += start *
490 						       info->d_media_blksize;
491 				dpart->media_size = blocks *
492 						    info->d_media_blksize;
493 				dpart->media_blocks = blocks;
494 
495 				/*
496 				 * partition starting sector (p_offset)
497 				 * requires slice's reserved areas to be
498 				 * adjusted.
499 				 */
500 				if (dpart->reserved_blocks > start)
501 					dpart->reserved_blocks -= start;
502 				else
503 					dpart->reserved_blocks = 0;
504 			}
505 
506 			/*
507 			 * Load remaining fields from the info structure
508 			 */
509 			dpart->d_nheads =	info->d_nheads;
510 			dpart->d_ncylinders =	info->d_ncylinders;
511 			dpart->d_secpertrack =	info->d_secpertrack;
512 			dpart->d_secpercyl =	info->d_secpercyl;
513 		}
514 		return (0);
515 
516 	case DIOCGSLICEINFO:
517 		bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
518 				 (char *)ssp);
519 		return (0);
520 
521 	case DIOCSDINFO32:
522 		ops = &disklabel32_ops;
523 		/* fall through */
524 	case DIOCSDINFO64:
525 		if (cmd != DIOCSDINFO32)
526 			ops = &disklabel64_ops;
527 		/*
528 		 * You can write a disklabel on the whole disk slice or
529 		 * whole-slice partition.
530 		 */
531 		if (slice != WHOLE_DISK_SLICE &&
532 		    part != WHOLE_SLICE_PART) {
533 			return(EINVAL);
534 		}
535 
536 		/*
537 		 * We no longer support writing disklabels directly to media
538 		 * without there being a slice.  Keep this as a separate
539 		 * conditional.
540 		 */
541 		if (slice == WHOLE_DISK_SLICE)
542 			return (ENODEV);
543 		if (!(flags & FWRITE))
544 			return (EBADF);
545 
546 		/*
547 		 * If an existing label is present it must be the same
548 		 * type as the label being passed by the ioctl.
549 		 */
550 		if (sp->ds_label.opaque && sp->ds_ops != ops)
551 			return (ENOATTR);
552 
553 		/*
554 		 * Create a temporary copy of the existing label
555 		 * (if present) so setdisklabel can compare it against
556 		 * the new label.
557 		 */
558 		lp.opaque = kmalloc(ops->labelsize, M_DEVBUF, M_WAITOK);
559 		if (sp->ds_label.opaque == NULL)
560 			bzero(lp.opaque, ops->labelsize);
561 		else
562 			bcopy(sp->ds_label.opaque, lp.opaque, ops->labelsize);
563 		if (sp->ds_label.opaque == NULL) {
564 			bzero(openmask, sizeof(openmask));
565 		} else {
566 			bcopy(sp->ds_openmask, openmask, sizeof(openmask));
567 		}
568 		lptmp.opaque = data;
569 		error = ops->op_setdisklabel(lp, lptmp, ssp, sp, openmask);
570 		disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
571 		devfs_config();
572 		if (error != 0) {
573 			kfree(lp.opaque, M_DEVBUF);
574 			return (error);
575 		}
576 		free_ds_label(ssp, slice);
577 		set_ds_label(ssp, slice, lp, ops);
578 		return (0);
579 
580 	case DIOCSYNCSLICEINFO:
581 		/*
582 		 * This ioctl can only be done on the whole disk
583 		 */
584 		if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
585 			return (EINVAL);
586 
587 		if (*(int *)data == 0) {
588 			for (slice = 0; slice < ssp->dss_nslices; slice++) {
589 				struct diskslice *ds = &ssp->dss_slices[slice];
590 
591 				switch(dscountmask(ds)) {
592 				case 0:
593 					break;
594 				case 1:
595 					if (slice != WHOLE_DISK_SLICE)
596 						return (EBUSY);
597 					if (!dschkmask(ds, RAW_PART))
598 						return (EBUSY);
599 					break;
600 				default:
601 					return (EBUSY);
602 				}
603 			}
604 		}
605 
606 		disk_msg_send_sync(DISK_DISK_REPROBE, dev->si_disk, NULL);
607 		devfs_config();
608 		return 0;
609 
610 	case DIOCWDINFO32:
611 	case DIOCWDINFO64:
612 		error = dsioctl(dev, ((cmd == DIOCWDINFO32) ?
613 					DIOCSDINFO32 : DIOCSDINFO64),
614 				data, flags, &ssp, info);
615 		if (error == 0 && sp->ds_label.opaque == NULL)
616 			error = EINVAL;
617 		if (part != WHOLE_SLICE_PART)
618 			error = EINVAL;
619 		if (error != 0)
620 			return (error);
621 
622 		/*
623 		 * Allow the reserved area to be written, reload ops
624 		 * because the DIOCSDINFO op above may have installed
625 		 * a new label type.
626 		 */
627 		ops = sp->ds_ops;
628 		old_wlabel = sp->ds_wlabel;
629 		set_ds_wlabel(ssp, slice, TRUE);
630 		error = ops->op_writedisklabel(dev, ssp, sp, sp->ds_label);
631 		disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
632 		devfs_config();
633 		set_ds_wlabel(ssp, slice, old_wlabel);
634 		/* XXX should invalidate in-core label if write failed. */
635 		return (error);
636 
637 	case DIOCWLABEL:
638 		if (slice == WHOLE_DISK_SLICE)
639 			return (ENODEV);
640 		if (!(flags & FWRITE))
641 			return (EBADF);
642 		set_ds_wlabel(ssp, slice, *(int *)data != 0);
643 		return (0);
644 
645 	default:
646 		return (ENOIOCTL);
647 	}
648 }
649 
650 int
651 dsisopen(struct diskslices *ssp)
652 {
653 	int slice;
654 
655 	if (ssp == NULL)
656 		return (0);
657 	for (slice = 0; slice < ssp->dss_nslices; slice++) {
658 		if (dscountmask(&ssp->dss_slices[slice]))
659 			return (1);
660 	}
661 	return (0);
662 }
663 
664 /*
665  * Allocate a slices "struct" and initialize it to contain only an empty
666  * compatibility slice (pointing to itself), a whole disk slice (covering
667  * the disk as described by the label), and (nslices - BASE_SLICES) empty
668  * slices beginning at BASE_SLICE.
669  *
670  * Note that the compatibility slice is no longer really a compatibility
671  * slice.  It is slice 0 if a GPT label is present, and the dangerously
672  * dedicated slice if no slice table otherwise exists.  Else it is 0-sized.
673  */
674 struct diskslices *
675 dsmakeslicestruct(int nslices, struct disk_info *info)
676 {
677 	struct diskslice *sp;
678 	struct diskslices *ssp;
679 
680 	ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
681 		     nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
682 	ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
683 	ssp->dss_nslices = nslices;
684 	ssp->dss_oflags = 0;
685 
686 	/*
687 	 * Figure out if we can use shifts or whether we have to
688 	 * use mod/multply to translate byte offsets into sector numbers.
689 	 */
690 	if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
691 	     (info->d_media_blksize << 1) - 1) {
692 		ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
693 		if (ssp->dss_secmult & (ssp->dss_secmult - 1))
694 			ssp->dss_secshift = -1;
695 		else
696 			ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
697 	} else {
698 		ssp->dss_secmult = 0;
699 		ssp->dss_secshift = -1;
700 	}
701 	ssp->dss_secsize = info->d_media_blksize;
702 	sp = &ssp->dss_slices[0];
703 	bzero(sp, nslices * sizeof *sp);
704 	sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
705 	return (ssp);
706 }
707 
708 char *
709 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
710 {
711 	return dev->si_name;
712 }
713 
714 /*
715  * This should only be called when the unit is inactive and the strategy
716  * routine should not allow it to become active unless we call it.  Our
717  * strategy routine must be special to allow activity.
718  */
719 int
720 dsopen(cdev_t dev, int mode, u_int flags,
721        struct diskslices **sspp, struct disk_info *info)
722 {
723 	struct diskslice *sp;
724 	struct diskslices *ssp;
725 	int slice;
726 	int part;
727 
728 	ssp = *sspp;
729 	dev->si_bsize_phys = info->d_media_blksize;
730 	slice = dkslice(dev);
731 	part = dkpart(dev);
732 	sp = &ssp->dss_slices[slice];
733 	dssetmask(sp, part);
734 
735 	return 0;
736 }
737 
738 /*
739  * Attempt to read the disklabel.  If successful, store it in sp->ds_label.
740  *
741  * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
742  * a fake label covering the whole disk.
743  */
744 static
745 int
746 dsreadandsetlabel(cdev_t dev, u_int flags,
747 		  struct diskslices *ssp, struct diskslice *sp,
748 		  struct disk_info *info)
749 {
750 	disklabel_t lp;
751 	disklabel_ops_t ops;
752 	const char *msg;
753 	const char *sname;
754 	char partname[2];
755 	int slice = dkslice(dev);
756 
757 	/*
758 	 * Probe the disklabel
759 	 */
760 	lp.opaque = NULL;
761 	sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
762 	ops = &disklabel32_ops;
763 	msg = ops->op_readdisklabel(dev, sp, &lp, info);
764 	if (msg && strcmp(msg, "no disk label") == 0) {
765 		ops = &disklabel64_ops;
766 		msg = disklabel64_ops.op_readdisklabel(dev, sp, &lp, info);
767 	}
768 
769 	/*
770 	 * If we failed and COMPATLABEL is set, create a dummy disklabel.
771 	 */
772 	if (msg != NULL && (flags & DSO_COMPATLABEL)) {
773 		msg = NULL;
774 		if (sp->ds_size >= 0x100000000ULL)
775 			ops = &disklabel64_ops;
776 		else
777 			ops = &disklabel32_ops;
778 		lp = ops->op_clone_label(info, sp);
779 	}
780 	if (msg != NULL) {
781 		if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
782 			log(LOG_WARNING, "%s: cannot find label (%s)\n",
783 			    sname, msg);
784 		if (lp.opaque)
785 			kfree(lp.opaque, M_DEVBUF);
786 	} else {
787 		set_ds_label(ssp, slice, lp, ops);
788 		set_ds_wlabel(ssp, slice, FALSE);
789 	}
790 	return (msg ? EINVAL : 0);
791 }
792 
793 int64_t
794 dssize(cdev_t dev, struct diskslices **sspp)
795 {
796 	disklabel_t lp;
797 	disklabel_ops_t ops;
798 	int part;
799 	int slice;
800 	struct diskslices *ssp;
801 	u_int64_t start;
802 	u_int64_t blocks;
803 
804 	slice = dkslice(dev);
805 	part = dkpart(dev);
806 	ssp = *sspp;
807 	if (ssp == NULL || slice >= ssp->dss_nslices
808 	    || !dschkmask(&ssp->dss_slices[slice], part)) {
809 		if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
810 			return (-1);
811 		dev_dclose(dev, FREAD, S_IFCHR);
812 		ssp = *sspp;
813 	}
814 	lp = ssp->dss_slices[slice].ds_label;
815 	if (part == WHOLE_SLICE_PART) {
816 		blocks = ssp->dss_slices[slice].ds_size;
817 	} else if (lp.opaque == NULL) {
818 		blocks = (u_int64_t)-1;
819 	} else {
820 		ops = ssp->dss_slices[slice].ds_ops;
821 		if (ops->op_getpartbounds(ssp, lp, part, &start, &blocks))
822 			return (-1);
823 	}
824 	return ((int64_t)blocks);
825 }
826 
827 static void
828 free_ds_label(struct diskslices *ssp, int slice)
829 {
830 	struct diskslice *sp;
831 	disklabel_t lp;
832 
833 	sp = &ssp->dss_slices[slice];
834 	lp = sp->ds_label;
835 	if (lp.opaque != NULL) {
836 		kfree(lp.opaque, M_DEVBUF);
837 		lp.opaque = NULL;
838 		set_ds_label(ssp, slice, lp, NULL);
839 	}
840 }
841 
842 static void
843 set_ds_label(struct diskslices *ssp, int slice,
844 	     disklabel_t lp, disklabel_ops_t ops)
845 {
846 	struct diskslice *sp = &ssp->dss_slices[slice];
847 
848 	sp->ds_label = lp;
849 	sp->ds_ops = ops;
850 	if (lp.opaque && slice != WHOLE_DISK_SLICE)
851 		ops->op_adjust_label_reserved(ssp, slice, sp);
852 	else
853 		sp->ds_reserved = 0;
854 }
855 
856 static void
857 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
858 {
859 	ssp->dss_slices[slice].ds_wlabel = wlabel;
860 }
861 
862