xref: /dragonfly/sys/kern/subr_disklabel64.c (revision 6877ea2d)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/conf.h>
39 #include <sys/disklabel.h>
40 #include <sys/disklabel64.h>
41 #include <sys/diskslice.h>
42 #include <sys/disk.h>
43 #include <sys/kern_syscall.h>
44 #include <sys/buf2.h>
45 
46 /*
47  * Alignment against physical start (verses slice start).  We use a megabyte
48  * here.  Why do we use a megabyte?  Because SSDs already use large 128K
49  * blocks internally (for MLC) and who the hell knows in the future.
50  *
51  * This way if the sysop picks sane values for partition sizes everything
52  * will be nicely aligned, particularly swap for e.g. swapcache, and
53  * clustered operations against larger physical sector sizes for newer HDs,
54  * and so forth.
55  */
56 #define PALIGN_SIZE	(1024 * 1024)
57 #define PALIGN_MASK	(PALIGN_SIZE - 1)
58 
59 /*
60  * Retrieve the partition start and extent, in blocks.  Return 0 on success,
61  * EINVAL on error.
62  */
63 static int
64 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part,
65 		  u_int64_t *start, u_int64_t *blocks)
66 {
67 	struct partition64 *pp;
68 
69 	if (part >= lp.lab64->d_npartitions)
70 		return (EINVAL);
71 
72 	pp = &lp.lab64->d_partitions[part];
73 
74 	if ((pp->p_boffset & (ssp->dss_secsize - 1)) ||
75 	    (pp->p_bsize & (ssp->dss_secsize - 1))) {
76 		return (EINVAL);
77 	}
78 	*start = pp->p_boffset / ssp->dss_secsize;
79 	*blocks = pp->p_bsize / ssp->dss_secsize;
80 	return(0);
81 }
82 
83 /*
84  * Get the filesystem type XXX - diskslices code needs to use uuids
85  */
86 static void
87 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart)
88 {
89 	struct partition64 *pp;
90 	const size_t uuid_size = sizeof(struct uuid);
91 
92 	if (part < lp.lab64->d_npartitions) {
93 		pp = &lp.lab64->d_partitions[part];
94 		dpart->fstype_uuid = pp->p_type_uuid;
95 		dpart->storage_uuid = pp->p_stor_uuid;
96 		dpart->fstype = pp->p_fstype;
97 	} else {
98 		bzero(&dpart->fstype_uuid, uuid_size);
99 		bzero(&dpart->storage_uuid, uuid_size);
100 		dpart->fstype = 0;
101 	}
102 }
103 
104 /*
105  * Get the number of partitions
106  */
107 static u_int32_t
108 l64_getnumparts(disklabel_t lp)
109 {
110 	return(lp.lab64->d_npartitions);
111 }
112 
113 static void
114 l64_freedisklabel(disklabel_t *lpp)
115 {
116 	kfree((*lpp).lab64, M_DEVBUF);
117 	(*lpp).lab64 = NULL;
118 }
119 
120 /*
121  * Attempt to read a disk label from a device.  64 bit disklabels are
122  * sector-agnostic and begin at offset 0 on the device.
123  *
124  * Returns NULL on sucess, and an error string on failure.
125  */
126 static const char *
127 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp,
128 		  struct disk_info *info)
129 {
130 	struct buf *bp;
131 	struct disklabel64 *dlp;
132 	const char *msg;
133 	uint32_t savecrc;
134 	size_t dlpcrcsize;
135 	size_t bpsize;
136 	int secsize;
137 
138 	/*
139 	 * XXX I/O size is subject to device DMA limitations
140 	 */
141 	secsize = info->d_media_blksize;
142 	bpsize = roundup2(sizeof(*dlp), secsize);
143 
144 	bp = getpbuf_mem(NULL);
145 	KKASSERT(bpsize <= bp->b_bufsize);
146 	bp->b_bio1.bio_offset = 0;
147 	bp->b_bio1.bio_done = biodone_sync;
148 	bp->b_bio1.bio_flags |= BIO_SYNC;
149 	bp->b_bcount = bpsize;
150 	bp->b_flags &= ~B_INVAL;
151 	bp->b_flags |= B_FAILONDIS;
152 	bp->b_cmd = BUF_CMD_READ;
153 	dev_dstrategy(dev, &bp->b_bio1);
154 
155 	if (biowait(&bp->b_bio1, "labrd")) {
156 		msg = "I/O error";
157 	} else {
158 		dlp = (struct disklabel64 *)bp->b_data;
159 		dlpcrcsize = offsetof(struct disklabel64,
160 				      d_partitions[dlp->d_npartitions]) -
161 			     offsetof(struct disklabel64, d_magic);
162 		savecrc = dlp->d_crc;
163 		dlp->d_crc = 0;
164 		if (dlp->d_magic != DISKMAGIC64) {
165 			msg = "no disk label";
166 		} else if (dlp->d_npartitions > MAXPARTITIONS64) {
167 			msg = "disklabel64 corrupted, too many partitions";
168 		} else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) {
169 			msg = "disklabel64 corrupted, bad CRC";
170 		} else {
171 			dlp->d_crc = savecrc;
172 			(*lpp).lab64 = kmalloc(sizeof(*dlp),
173 					       M_DEVBUF, M_WAITOK|M_ZERO);
174 			*(*lpp).lab64 = *dlp;
175 			msg = NULL;
176 		}
177 	}
178 	bp->b_flags |= B_INVAL | B_AGE;
179 	relpbuf(bp, NULL);
180 
181 	return (msg);
182 }
183 
184 /*
185  * If everything is good, copy olpx to nlpx.  Check to see if any
186  * open partitions would change.
187  */
188 static int
189 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp,
190 		 struct diskslice *sp, u_int32_t *openmask)
191 {
192 	struct disklabel64 *olp, *nlp;
193 	struct partition64 *opp, *npp;
194 	uint32_t savecrc;
195 	uint64_t slicebsize;
196 	size_t nlpcrcsize;
197 	int i;
198 
199 	olp = olpx.lab64;
200 	nlp = nlpx.lab64;
201 
202 	slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize;
203 
204 	if (nlp->d_magic != DISKMAGIC64)
205 		return (EINVAL);
206 	if (nlp->d_npartitions > MAXPARTITIONS64)
207 		return (EINVAL);
208 	savecrc = nlp->d_crc;
209 	nlp->d_crc = 0;
210 	nlpcrcsize = offsetof(struct disklabel64,
211 			      d_partitions[nlp->d_npartitions]) -
212 		     offsetof(struct disklabel64, d_magic);
213 	if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) {
214 		nlp->d_crc = savecrc;
215 		return (EINVAL);
216 	}
217 	nlp->d_crc = savecrc;
218 
219 	/*
220 	 * Check if open partitions have changed
221 	 */
222 	i = 0;
223 	while (i < MAXPARTITIONS64) {
224 		if (openmask[i >> 5] == 0) {
225 			i += 32;
226 			continue;
227 		}
228 		if ((openmask[i >> 5] & (1 << (i & 31))) == 0) {
229 			++i;
230 			continue;
231 		}
232 		if (nlp->d_npartitions <= i)
233 			return (EBUSY);
234 		opp = &olp->d_partitions[i];
235 		npp = &nlp->d_partitions[i];
236 		if (npp->p_boffset != opp->p_boffset ||
237 		    npp->p_bsize < opp->p_bsize) {
238 			return (EBUSY);
239 		}
240 
241 		/*
242 		 * Do not allow p_type_uuid or p_stor_uuid to change if
243 		 * the partition is currently open.
244 		 */
245 		if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid,
246 		     sizeof(npp->p_type_uuid)) != 0) {
247 			return (EBUSY);
248 		}
249 		if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid,
250 		     sizeof(npp->p_stor_uuid)) != 0) {
251 			return (EBUSY);
252 		}
253 		++i;
254 	}
255 
256 	/*
257 	 * Make sure the label and partition offsets and sizes are sane.
258 	 */
259 	if (nlp->d_total_size > slicebsize)
260 		return (ENOSPC);
261 	if (nlp->d_total_size & (ssp->dss_secsize - 1))
262 		return (EINVAL);
263 	if (nlp->d_bbase & (ssp->dss_secsize - 1))
264 		return (EINVAL);
265 	if (nlp->d_pbase & (ssp->dss_secsize - 1))
266 		return (EINVAL);
267 	if (nlp->d_pstop & (ssp->dss_secsize - 1))
268 		return (EINVAL);
269 	if (nlp->d_abase & (ssp->dss_secsize - 1))
270 		return (EINVAL);
271 
272 	for (i = 0; i < nlp->d_npartitions; ++i) {
273 		npp = &nlp->d_partitions[i];
274 		if (npp->p_bsize == 0) {
275 			if (npp->p_boffset != 0)
276 				return (EINVAL);
277 			continue;
278 		}
279 		if (npp->p_boffset & (ssp->dss_secsize - 1))
280 			return (EINVAL);
281 		if (npp->p_bsize & (ssp->dss_secsize - 1))
282 			return (EINVAL);
283 		if (npp->p_boffset < nlp->d_pbase)
284 			return (ENOSPC);
285 		if (npp->p_boffset + npp->p_bsize > nlp->d_total_size)
286 			return (ENOSPC);
287 	}
288 
289 	/*
290 	 * Structurally we may add code to make modifications above in the
291 	 * future, so regenerate the crc anyway.
292 	 */
293 	nlp->d_crc = 0;
294 	nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize);
295 	*olp = *nlp;
296 
297 	return (0);
298 }
299 
300 /*
301  * Write disk label back to device after modification.
302  */
303 static int
304 l64_writedisklabel(cdev_t dev, struct diskslices *ssp,
305 		   struct diskslice *sp, disklabel_t lpx)
306 {
307 	struct disklabel64 *lp;
308 	struct disklabel64 *dlp;
309 	struct buf *bp;
310 	int error = 0;
311 	size_t bpsize;
312 	int secsize;
313 
314 	lp = lpx.lab64;
315 
316 	/*
317 	 * XXX I/O size is subject to device DMA limitations
318 	 */
319 	secsize = ssp->dss_secsize;
320 	bpsize = roundup2(sizeof(*lp), secsize);
321 
322 	bp = getpbuf_mem(NULL);
323 	KKASSERT(bpsize <= bp->b_bufsize);
324 	bp->b_bio1.bio_offset = 0;
325 	bp->b_bio1.bio_done = biodone_sync;
326 	bp->b_bio1.bio_flags |= BIO_SYNC;
327 	bp->b_bcount = bpsize;
328 	bp->b_flags |= B_FAILONDIS;
329 
330 	/*
331 	 * Because our I/O is larger then the label, and because we do not
332 	 * write the d_reserved0[] area, do a read-modify-write.
333 	 */
334 	bp->b_flags &= ~B_INVAL;
335 	bp->b_cmd = BUF_CMD_READ;
336 	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
337 	dev_dstrategy(dev, &bp->b_bio1);
338 	error = biowait(&bp->b_bio1, "labrd");
339 	if (error)
340 		goto done;
341 
342 	dlp = (void *)bp->b_data;
343 	bcopy(&lp->d_magic, &dlp->d_magic,
344 	      sizeof(*lp) - offsetof(struct disklabel64, d_magic));
345 	bp->b_cmd = BUF_CMD_WRITE;
346 	bp->b_bio1.bio_done = biodone_sync;
347 	bp->b_bio1.bio_flags |= BIO_SYNC;
348 	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
349 	dev_dstrategy(dev, &bp->b_bio1);
350 	error = biowait(&bp->b_bio1, "labwr");
351 done:
352 	bp->b_flags |= B_INVAL | B_AGE;
353 	relpbuf(bp, NULL);
354 
355 	return (error);
356 }
357 
358 /*
359  * Create a disklabel based on a disk_info structure for the purposes of
360  * DSO_COMPATLABEL - cases where no real label exists on the storage medium.
361  *
362  * If a diskslice is passed, the label is truncated to the slice.
363  *
364  * NOTE!  This is not a legal label because d_bbase and d_pbase are both
365  * set to 0.
366  */
367 static disklabel_t
368 l64_clone_label(struct disk_info *info, struct diskslice *sp)
369 {
370 	struct disklabel64 *lp;
371 	disklabel_t res;
372 	uint32_t blksize = info->d_media_blksize;
373 	size_t lpcrcsize;
374 
375 	lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO);
376 
377 	if (sp)
378 		lp->d_total_size = (uint64_t)sp->ds_size * blksize;
379 	else
380 		lp->d_total_size = info->d_media_blocks * blksize;
381 
382 	lp->d_magic = DISKMAGIC64;
383 	lp->d_align = blksize;
384 	lp->d_npartitions = MAXPARTITIONS64;
385 	lp->d_pstop = lp->d_total_size;
386 
387 	/*
388 	 * Create a dummy 'c' part and a dummy 'a' part (if requested).
389 	 * Note that the 'c' part is really a hack.  64 bit disklabels
390 	 * do not use 'c' to mean the raw partition.
391 	 */
392 
393 	lp->d_partitions[2].p_boffset = 0;
394 	lp->d_partitions[2].p_bsize = lp->d_total_size;
395 	/* XXX SET FS TYPE */
396 
397 	if (info->d_dsflags & DSO_COMPATPARTA) {
398 		lp->d_partitions[0].p_boffset = 0;
399 		lp->d_partitions[0].p_bsize = lp->d_total_size;
400 		/* XXX SET FS TYPE */
401 	}
402 
403 	lpcrcsize = offsetof(struct disklabel64,
404 			     d_partitions[lp->d_npartitions]) -
405 		    offsetof(struct disklabel64, d_magic);
406 
407 	lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
408 	res.lab64 = lp;
409 	return (res);
410 }
411 
412 /*
413  * Create a virgin disklabel64 suitable for writing to the media.
414  *
415  * disklabel64 always reserves 32KB for a boot area and leaves room
416  * for up to RESPARTITIONS64 partitions.
417  */
418 static void
419 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp,
420 		    struct diskslice *sp, struct disk_info *info)
421 {
422 	struct disklabel64 *lp = lpx.lab64;
423 	struct partition64 *pp;
424 	uint32_t blksize;
425 	uint32_t ressize;
426 	uint64_t blkmask;	/* 64 bits so we can ~ */
427 	uint64_t doffset;
428 	size_t lpcrcsize;
429 
430 	doffset = sp->ds_offset * info->d_media_blksize;
431 
432 	/*
433 	 * Setup the initial label.  Use of a block size of at least 4KB
434 	 * for calculating the initial reserved areas to allow some degree
435 	 * of portability between media with different sector sizes.
436 	 *
437 	 * Note that the modified blksize is stored in d_align as a hint
438 	 * to the disklabeling program.
439 	 */
440 	bzero(lp, sizeof(*lp));
441 	if ((blksize = info->d_media_blksize) < 4096)
442 		blksize = 4096;
443 	blkmask = blksize - 1;
444 
445 	if (sp)
446 		lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize;
447 	else
448 		lp->d_total_size = info->d_media_blocks * info->d_media_blksize;
449 
450 	lp->d_magic = DISKMAGIC64;
451 	lp->d_align = blksize;
452 	lp->d_npartitions = MAXPARTITIONS64;
453 	kern_uuidgen(&lp->d_stor_uuid, 1);
454 
455 	ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]);
456 	ressize = (ressize + (uint32_t)blkmask) & ~blkmask;
457 
458 	/* Reserve space for the stage2 boot code */
459 	lp->d_bbase = ressize;
460 	lp->d_pbase = lp->d_bbase + ((BOOT2SIZE64 + blkmask) & ~blkmask);
461 
462 	/* Reserve space for the backup label at the slice end */
463 	lp->d_abase = lp->d_total_size - ressize;
464 
465 	/*
466 	 * NOTE: The pbase and pstop are calculated to align to PALIGN_SIZE
467 	 *	 and adjusted with the slice offset, so the partitions are
468 	 *	 aligned relative to the start of the physical disk.
469 	 */
470 	lp->d_pbase = ((doffset + lp->d_pbase + PALIGN_MASK) &
471 		       ~(uint64_t)PALIGN_MASK) - doffset;
472 	lp->d_pstop = ((lp->d_abase - lp->d_pbase) &
473 		       ~(uint64_t)PALIGN_MASK) + lp->d_pbase;
474 
475 	/*
476 	 * All partitions are left empty unless DSO_COMPATPARTA is set
477 	 */
478 
479 	if (info->d_dsflags & DSO_COMPATPARTA) {
480 		pp = &lp->d_partitions[0];
481 		pp->p_boffset = lp->d_pbase;
482 		pp->p_bsize = lp->d_pstop - lp->d_pbase;
483 		/* XXX SET FS TYPE */
484 	}
485 
486 	lpcrcsize = offsetof(struct disklabel64,
487 			     d_partitions[lp->d_npartitions]) -
488 		    offsetof(struct disklabel64, d_magic);
489 	lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
490 }
491 
492 /*
493  * Set the number of blocks at the beginning of the slice which have
494  * been reserved for label operations.  This area will be write-protected
495  * when accessed via the slice.
496  *
497  * For now just protect the label area proper.  Do not protect the
498  * boot area.  Note partitions in 64 bit disklabels do not overlap
499  * the disklabel or boot area.
500  */
501 static void
502 l64_adjust_label_reserved(struct diskslices *ssp, int slice,
503 			  struct diskslice *sp)
504 {
505 	struct disklabel64 *lp = sp->ds_label.lab64;
506 
507 	sp->ds_reserved = lp->d_bbase / ssp->dss_secsize;
508 }
509 
510 struct disklabel_ops disklabel64_ops = {
511 	.labelsize = sizeof(struct disklabel64),
512 	.op_readdisklabel = l64_readdisklabel,
513 	.op_setdisklabel = l64_setdisklabel,
514 	.op_writedisklabel = l64_writedisklabel,
515 	.op_clone_label = l64_clone_label,
516 	.op_adjust_label_reserved = l64_adjust_label_reserved,
517 	.op_getpartbounds = l64_getpartbounds,
518 	.op_loadpartinfo = l64_loadpartinfo,
519 	.op_getnumparts = l64_getnumparts,
520 	.op_makevirginlabel = l64_makevirginlabel,
521 	.op_freedisklabel = l64_freedisklabel
522 };
523 
524