xref: /dragonfly/sys/kern/subr_disklabel64.c (revision e6d22e9b)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/disklabel.h>
41 #include <sys/disklabel64.h>
42 #include <sys/diskslice.h>
43 #include <sys/disk.h>
44 #include <sys/kern_syscall.h>
45 #include <sys/buf2.h>
46 
47 /*
48  * Alignment against physical start (verses slice start).  We use a megabyte
49  * here.  Why do we use a megabyte?  Because SSDs already use large 128K
50  * blocks internally (for MLC) and who the hell knows in the future.
51  *
52  * This way if the sysop picks sane values for partition sizes everything
53  * will be nicely aligned, particularly swap for e.g. swapcache, and
54  * clustered operations against larger physical sector sizes for newer HDs,
55  * and so forth.
56  */
57 #define PALIGN_SIZE	(1024 * 1024)
58 #define PALIGN_MASK	(PALIGN_SIZE - 1)
59 
60 /*
61  * Retrieve the partition start and extent, in blocks.  Return 0 on success,
62  * EINVAL on error.
63  */
64 static int
65 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part,
66 		  u_int64_t *start, u_int64_t *blocks)
67 {
68 	struct partition64 *pp;
69 
70 	if (part >= lp.lab64->d_npartitions)
71 		return (EINVAL);
72 
73 	pp = &lp.lab64->d_partitions[part];
74 
75 	if ((pp->p_boffset & (ssp->dss_secsize - 1)) ||
76 	    (pp->p_bsize & (ssp->dss_secsize - 1))) {
77 		return (EINVAL);
78 	}
79 	*start = pp->p_boffset / ssp->dss_secsize;
80 	*blocks = pp->p_bsize / ssp->dss_secsize;
81 	return(0);
82 }
83 
84 /*
85  * Get the filesystem type XXX - diskslices code needs to use uuids
86  */
87 static void
88 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart)
89 {
90 	struct partition64 *pp;
91 	const size_t uuid_size = sizeof(struct uuid);
92 
93 	if (part < lp.lab64->d_npartitions) {
94 		pp = &lp.lab64->d_partitions[part];
95 		dpart->fstype_uuid = pp->p_type_uuid;
96 		dpart->storage_uuid = pp->p_stor_uuid;
97 		dpart->fstype = pp->p_fstype;
98 	} else {
99 		bzero(&dpart->fstype_uuid, uuid_size);
100 		bzero(&dpart->storage_uuid, uuid_size);
101 		dpart->fstype = 0;
102 	}
103 }
104 
105 /*
106  * Get the number of partitions
107  */
108 static u_int32_t
109 l64_getnumparts(disklabel_t lp)
110 {
111 	return(lp.lab64->d_npartitions);
112 }
113 
114 static int
115 l64_getpackname(disklabel_t lp, char *buf, size_t bytes)
116 {
117 	size_t slen;
118 
119 	if (lp.lab64->d_packname[0] == 0) {
120 		buf[0] = 0;
121 		return -1;
122 	}
123 	slen = strnlen(lp.lab64->d_packname, sizeof(lp.lab64->d_packname));
124 	if (slen >= bytes)
125 		slen = bytes - 1;
126 	bcopy(lp.lab64->d_packname, buf, slen);
127 	buf[slen] = 0;
128 
129 	return 0;
130 }
131 
132 static void
133 l64_freedisklabel(disklabel_t *lpp)
134 {
135 	kfree((*lpp).lab64, M_DEVBUF);
136 	(*lpp).lab64 = NULL;
137 }
138 
139 /*
140  * Attempt to read a disk label from a device.  64 bit disklabels are
141  * sector-agnostic and begin at offset 0 on the device.
142  *
143  * Returns NULL on sucess, and an error string on failure.
144  */
145 static const char *
146 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp,
147 		  struct disk_info *info)
148 {
149 	struct buf *bp;
150 	struct disklabel64 *dlp;
151 	const char *msg;
152 	uint32_t savecrc;
153 	size_t dlpcrcsize;
154 	size_t bpsize;
155 	int secsize;
156 
157 	/*
158 	 * XXX I/O size is subject to device DMA limitations
159 	 */
160 	secsize = info->d_media_blksize;
161 	bpsize = roundup2(sizeof(*dlp), secsize);
162 
163 	bp = getpbuf_mem(NULL);
164 	KKASSERT(bpsize <= bp->b_bufsize);
165 	bp->b_bio1.bio_offset = 0;
166 	bp->b_bio1.bio_done = biodone_sync;
167 	bp->b_bio1.bio_flags |= BIO_SYNC;
168 	bp->b_bcount = bpsize;
169 	bp->b_flags &= ~B_INVAL;
170 	bp->b_flags |= B_FAILONDIS;
171 	bp->b_cmd = BUF_CMD_READ;
172 	dev_dstrategy(dev, &bp->b_bio1);
173 
174 	if (biowait(&bp->b_bio1, "labrd")) {
175 		msg = "I/O error";
176 	} else {
177 		dlp = (struct disklabel64 *)bp->b_data;
178 		dlpcrcsize = offsetof(struct disklabel64,
179 				      d_partitions[dlp->d_npartitions]) -
180 			     offsetof(struct disklabel64, d_magic);
181 		savecrc = dlp->d_crc;
182 		dlp->d_crc = 0;
183 		if (dlp->d_magic != DISKMAGIC64) {
184 			msg = "no disk label";
185 		} else if (dlp->d_npartitions > MAXPARTITIONS64) {
186 			msg = "disklabel64 corrupted, too many partitions";
187 		} else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) {
188 			msg = "disklabel64 corrupted, bad CRC";
189 		} else {
190 			dlp->d_crc = savecrc;
191 			(*lpp).lab64 = kmalloc(sizeof(*dlp),
192 					       M_DEVBUF, M_WAITOK|M_ZERO);
193 			*(*lpp).lab64 = *dlp;
194 			msg = NULL;
195 		}
196 	}
197 	bp->b_flags |= B_INVAL | B_AGE;
198 	relpbuf(bp, NULL);
199 
200 	return (msg);
201 }
202 
203 /*
204  * If everything is good, copy olpx to nlpx.  Check to see if any
205  * open partitions would change.
206  */
207 static int
208 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp,
209 		 struct diskslice *sp, u_int32_t *openmask)
210 {
211 	struct disklabel64 *olp, *nlp;
212 	struct partition64 *opp, *npp;
213 	uint32_t savecrc;
214 	uint64_t slicebsize;
215 	size_t nlpcrcsize;
216 	int i;
217 
218 	olp = olpx.lab64;
219 	nlp = nlpx.lab64;
220 
221 	slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize;
222 
223 	if (nlp->d_magic != DISKMAGIC64)
224 		return (EINVAL);
225 	if (nlp->d_npartitions > MAXPARTITIONS64)
226 		return (EINVAL);
227 	savecrc = nlp->d_crc;
228 	nlp->d_crc = 0;
229 	nlpcrcsize = offsetof(struct disklabel64,
230 			      d_partitions[nlp->d_npartitions]) -
231 		     offsetof(struct disklabel64, d_magic);
232 	if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) {
233 		nlp->d_crc = savecrc;
234 		return (EINVAL);
235 	}
236 	nlp->d_crc = savecrc;
237 
238 	/*
239 	 * Check if open partitions have changed
240 	 */
241 	i = 0;
242 	while (i < MAXPARTITIONS64) {
243 		if (openmask[i >> 5] == 0) {
244 			i += 32;
245 			continue;
246 		}
247 		if ((openmask[i >> 5] & (1 << (i & 31))) == 0) {
248 			++i;
249 			continue;
250 		}
251 		if (nlp->d_npartitions <= i)
252 			return (EBUSY);
253 		opp = &olp->d_partitions[i];
254 		npp = &nlp->d_partitions[i];
255 		if (npp->p_boffset != opp->p_boffset ||
256 		    npp->p_bsize < opp->p_bsize) {
257 			return (EBUSY);
258 		}
259 
260 		/*
261 		 * Do not allow p_type_uuid or p_stor_uuid to change if
262 		 * the partition is currently open.
263 		 */
264 		if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid,
265 		     sizeof(npp->p_type_uuid)) != 0) {
266 			return (EBUSY);
267 		}
268 		if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid,
269 		     sizeof(npp->p_stor_uuid)) != 0) {
270 			return (EBUSY);
271 		}
272 		++i;
273 	}
274 
275 	/*
276 	 * Make sure the label and partition offsets and sizes are sane.
277 	 */
278 	if (nlp->d_total_size > slicebsize)
279 		return (ENOSPC);
280 	if (nlp->d_total_size & (ssp->dss_secsize - 1))
281 		return (EINVAL);
282 	if (nlp->d_bbase & (ssp->dss_secsize - 1))
283 		return (EINVAL);
284 	if (nlp->d_pbase & (ssp->dss_secsize - 1))
285 		return (EINVAL);
286 	if (nlp->d_pstop & (ssp->dss_secsize - 1))
287 		return (EINVAL);
288 	if (nlp->d_abase & (ssp->dss_secsize - 1))
289 		return (EINVAL);
290 
291 	for (i = 0; i < nlp->d_npartitions; ++i) {
292 		npp = &nlp->d_partitions[i];
293 		if (npp->p_bsize == 0) {
294 			if (npp->p_boffset != 0)
295 				return (EINVAL);
296 			continue;
297 		}
298 		if (npp->p_boffset & (ssp->dss_secsize - 1))
299 			return (EINVAL);
300 		if (npp->p_bsize & (ssp->dss_secsize - 1))
301 			return (EINVAL);
302 		if (npp->p_boffset < nlp->d_pbase)
303 			return (ENOSPC);
304 		if (npp->p_boffset + npp->p_bsize > nlp->d_total_size)
305 			return (ENOSPC);
306 	}
307 
308 	/*
309 	 * Structurally we may add code to make modifications above in the
310 	 * future, so regenerate the crc anyway.
311 	 */
312 	nlp->d_crc = 0;
313 	nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize);
314 	*olp = *nlp;
315 
316 	return (0);
317 }
318 
319 /*
320  * Write disk label back to device after modification.
321  */
322 static int
323 l64_writedisklabel(cdev_t dev, struct diskslices *ssp,
324 		   struct diskslice *sp, disklabel_t lpx)
325 {
326 	struct disklabel64 *lp;
327 	struct disklabel64 *dlp;
328 	struct buf *bp;
329 	int error = 0;
330 	size_t bpsize;
331 	int secsize;
332 
333 	lp = lpx.lab64;
334 
335 	/*
336 	 * XXX I/O size is subject to device DMA limitations
337 	 */
338 	secsize = ssp->dss_secsize;
339 	bpsize = roundup2(sizeof(*lp), secsize);
340 
341 	bp = getpbuf_mem(NULL);
342 	KKASSERT(bpsize <= bp->b_bufsize);
343 	bp->b_bio1.bio_offset = 0;
344 	bp->b_bio1.bio_done = biodone_sync;
345 	bp->b_bio1.bio_flags |= BIO_SYNC;
346 	bp->b_bcount = bpsize;
347 	bp->b_flags |= B_FAILONDIS;
348 
349 	/*
350 	 * Because our I/O is larger then the label, and because we do not
351 	 * write the d_reserved0[] area, do a read-modify-write.
352 	 */
353 	bp->b_flags &= ~B_INVAL;
354 	bp->b_cmd = BUF_CMD_READ;
355 	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
356 	dev_dstrategy(dev, &bp->b_bio1);
357 	error = biowait(&bp->b_bio1, "labrd");
358 	if (error)
359 		goto done;
360 
361 	dlp = (void *)bp->b_data;
362 	bcopy(&lp->d_magic, &dlp->d_magic,
363 	      sizeof(*lp) - offsetof(struct disklabel64, d_magic));
364 	bp->b_cmd = BUF_CMD_WRITE;
365 	bp->b_bio1.bio_done = biodone_sync;
366 	bp->b_bio1.bio_flags |= BIO_SYNC;
367 	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
368 	dev_dstrategy(dev, &bp->b_bio1);
369 	error = biowait(&bp->b_bio1, "labwr");
370 done:
371 	bp->b_flags |= B_INVAL | B_AGE;
372 	relpbuf(bp, NULL);
373 
374 	return (error);
375 }
376 
377 /*
378  * Create a disklabel based on a disk_info structure for the purposes of
379  * DSO_COMPATLABEL - cases where no real label exists on the storage medium.
380  *
381  * If a diskslice is passed, the label is truncated to the slice.
382  *
383  * NOTE!  This is not a legal label because d_bbase and d_pbase are both
384  * set to 0.
385  */
386 static disklabel_t
387 l64_clone_label(struct disk_info *info, struct diskslice *sp)
388 {
389 	struct disklabel64 *lp;
390 	disklabel_t res;
391 	uint32_t blksize = info->d_media_blksize;
392 	size_t lpcrcsize;
393 
394 	lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO);
395 
396 	if (sp)
397 		lp->d_total_size = (uint64_t)sp->ds_size * blksize;
398 	else
399 		lp->d_total_size = info->d_media_blocks * blksize;
400 
401 	lp->d_magic = DISKMAGIC64;
402 	lp->d_align = blksize;
403 	lp->d_npartitions = MAXPARTITIONS64;
404 	lp->d_pstop = lp->d_total_size;
405 
406 	/*
407 	 * Create a dummy 'c' part and a dummy 'a' part (if requested).
408 	 * Note that the 'c' part is really a hack.  64 bit disklabels
409 	 * do not use 'c' to mean the raw partition.
410 	 */
411 
412 	lp->d_partitions[2].p_boffset = 0;
413 	lp->d_partitions[2].p_bsize = lp->d_total_size;
414 	/* XXX SET FS TYPE */
415 
416 	if (info->d_dsflags & DSO_COMPATPARTA) {
417 		lp->d_partitions[0].p_boffset = 0;
418 		lp->d_partitions[0].p_bsize = lp->d_total_size;
419 		/* XXX SET FS TYPE */
420 	}
421 
422 	lpcrcsize = offsetof(struct disklabel64,
423 			     d_partitions[lp->d_npartitions]) -
424 		    offsetof(struct disklabel64, d_magic);
425 
426 	lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
427 	res.lab64 = lp;
428 	return (res);
429 }
430 
431 /*
432  * Create a virgin disklabel64 suitable for writing to the media.
433  *
434  * disklabel64 always reserves 32KB for a boot area and leaves room
435  * for up to RESPARTITIONS64 partitions.
436  */
437 static void
438 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp,
439 		    struct diskslice *sp, struct disk_info *info)
440 {
441 	struct disklabel64 *lp = lpx.lab64;
442 	struct partition64 *pp;
443 	uint32_t blksize;
444 	uint32_t ressize;
445 	uint64_t blkmask;	/* 64 bits so we can ~ */
446 	uint64_t doffset;
447 	size_t lpcrcsize;
448 
449 	doffset = sp->ds_offset * info->d_media_blksize;
450 
451 	/*
452 	 * Setup the initial label.  Use of a block size of at least 4KB
453 	 * for calculating the initial reserved areas to allow some degree
454 	 * of portability between media with different sector sizes.
455 	 *
456 	 * Note that the modified blksize is stored in d_align as a hint
457 	 * to the disklabeling program.
458 	 */
459 	bzero(lp, sizeof(*lp));
460 	if ((blksize = info->d_media_blksize) < 4096)
461 		blksize = 4096;
462 	blkmask = blksize - 1;
463 
464 	if (sp)
465 		lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize;
466 	else
467 		lp->d_total_size = info->d_media_blocks * info->d_media_blksize;
468 
469 	lp->d_magic = DISKMAGIC64;
470 	lp->d_align = blksize;
471 	lp->d_npartitions = MAXPARTITIONS64;
472 	kern_uuidgen(&lp->d_stor_uuid, 1);
473 
474 	ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]);
475 	ressize = (ressize + (uint32_t)blkmask) & ~blkmask;
476 
477 	/* Reserve space for the stage2 boot code */
478 	lp->d_bbase = ressize;
479 	lp->d_pbase = lp->d_bbase + ((BOOT2SIZE64 + blkmask) & ~blkmask);
480 
481 	/* Reserve space for the backup label at the slice end */
482 	lp->d_abase = lp->d_total_size - ressize;
483 
484 	/*
485 	 * NOTE: The pbase and pstop are calculated to align to PALIGN_SIZE
486 	 *	 and adjusted with the slice offset, so the partitions are
487 	 *	 aligned relative to the start of the physical disk.
488 	 */
489 	lp->d_pbase = ((doffset + lp->d_pbase + PALIGN_MASK) &
490 		       ~(uint64_t)PALIGN_MASK) - doffset;
491 	lp->d_pstop = ((lp->d_abase - lp->d_pbase) &
492 		       ~(uint64_t)PALIGN_MASK) + lp->d_pbase;
493 
494 	/*
495 	 * All partitions are left empty unless DSO_COMPATPARTA is set
496 	 */
497 
498 	if (info->d_dsflags & DSO_COMPATPARTA) {
499 		pp = &lp->d_partitions[0];
500 		pp->p_boffset = lp->d_pbase;
501 		pp->p_bsize = lp->d_pstop - lp->d_pbase;
502 		/* XXX SET FS TYPE */
503 	}
504 
505 	lpcrcsize = offsetof(struct disklabel64,
506 			     d_partitions[lp->d_npartitions]) -
507 		    offsetof(struct disklabel64, d_magic);
508 	lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
509 }
510 
511 /*
512  * Set the number of blocks at the beginning of the slice which have
513  * been reserved for label operations.  This area will be write-protected
514  * when accessed via the slice.
515  *
516  * For now just protect the label area proper.  Do not protect the
517  * boot area.  Note partitions in 64 bit disklabels do not overlap
518  * the disklabel or boot area.
519  */
520 static void
521 l64_adjust_label_reserved(struct diskslices *ssp, int slice,
522 			  struct diskslice *sp)
523 {
524 	struct disklabel64 *lp = sp->ds_label.lab64;
525 
526 	sp->ds_reserved = lp->d_bbase / ssp->dss_secsize;
527 }
528 
529 struct disklabel_ops disklabel64_ops = {
530 	.labelsize = sizeof(struct disklabel64),
531 	.op_readdisklabel = l64_readdisklabel,
532 	.op_setdisklabel = l64_setdisklabel,
533 	.op_writedisklabel = l64_writedisklabel,
534 	.op_clone_label = l64_clone_label,
535 	.op_adjust_label_reserved = l64_adjust_label_reserved,
536 	.op_getpartbounds = l64_getpartbounds,
537 	.op_loadpartinfo = l64_loadpartinfo,
538 	.op_getnumparts = l64_getnumparts,
539 	.op_getpackname = l64_getpackname,
540 	.op_makevirginlabel = l64_makevirginlabel,
541 	.op_freedisklabel = l64_freedisklabel
542 };
543 
544