xref: /dragonfly/sys/kern/subr_disklabel64.c (revision 92fc8b5c)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/kern/subr_disklabel64.c,v 1.5 2007/07/20 17:21:51 dillon Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/conf.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel64.h>
43 #include <sys/diskslice.h>
44 #include <sys/disk.h>
45 #include <sys/kern_syscall.h>
46 #include <sys/buf2.h>
47 
48 /*
49  * Alignment against physical start (verses slice start).  We use a megabyte
50  * here.  Why do we use a megabyte?  Because SSDs already use large 128K
51  * blocks internally (for MLC) and who the hell knows in the future.
52  *
53  * This way if the sysop picks sane values for partition sizes everything
54  * will be nicely aligned, particularly swap for e.g. swapcache, and
55  * clustered operations against larger physical sector sizes for newer HDs,
56  * and so forth.
57  */
58 #define PALIGN_SIZE	(1024 * 1024)
59 #define PALIGN_MASK	(PALIGN_SIZE - 1)
60 
61 /*
62  * Retrieve the partition start and extent, in blocks.  Return 0 on success,
63  * EINVAL on error.
64  */
65 static int
66 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part,
67 		  u_int64_t *start, u_int64_t *blocks)
68 {
69 	struct partition64 *pp;
70 
71 	if (part >= lp.lab64->d_npartitions)
72 		return (EINVAL);
73 
74 	pp = &lp.lab64->d_partitions[part];
75 
76 	if ((pp->p_boffset & (ssp->dss_secsize - 1)) ||
77 	    (pp->p_bsize & (ssp->dss_secsize - 1))) {
78 		return (EINVAL);
79 	}
80 	*start = pp->p_boffset / ssp->dss_secsize;
81 	*blocks = pp->p_bsize / ssp->dss_secsize;
82 	return(0);
83 }
84 
85 /*
86  * Get the filesystem type XXX - diskslices code needs to use uuids
87  */
88 static void
89 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart)
90 {
91 	struct partition64 *pp;
92 	const size_t uuid_size = sizeof(struct uuid);
93 
94 	if (part < lp.lab64->d_npartitions) {
95 		pp = &lp.lab64->d_partitions[part];
96 		dpart->fstype_uuid = pp->p_type_uuid;
97 		dpart->storage_uuid = pp->p_stor_uuid;
98 		dpart->fstype = pp->p_fstype;
99 	} else {
100 		bzero(&dpart->fstype_uuid, uuid_size);
101 		bzero(&dpart->storage_uuid, uuid_size);
102 		dpart->fstype = 0;
103 	}
104 }
105 
106 /*
107  * Get the number of partitions
108  */
109 static u_int32_t
110 l64_getnumparts(disklabel_t lp)
111 {
112 	return(lp.lab64->d_npartitions);
113 }
114 
115 /*
116  * Attempt to read a disk label from a device.  64 bit disklabels are
117  * sector-agnostic and begin at offset 0 on the device.  64 bit disklabels
118  * may only be used with GPT partitioning schemes.
119  *
120  * Returns NULL on sucess, and an error string on failure.
121  */
122 static const char *
123 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp,
124 		  struct disk_info *info)
125 {
126 	struct buf *bp;
127 	struct disklabel64 *dlp;
128 	const char *msg;
129 	uint32_t savecrc;
130 	size_t dlpcrcsize;
131 	size_t bpsize;
132 	int secsize;
133 
134 	/*
135 	 * XXX I/O size is subject to device DMA limitations
136 	 */
137 	secsize = info->d_media_blksize;
138 	bpsize = (sizeof(*dlp) + secsize - 1) & ~(secsize - 1);
139 
140 	bp = geteblk(bpsize);
141 	bp->b_bio1.bio_offset = 0;
142 	bp->b_bio1.bio_done = biodone_sync;
143 	bp->b_bio1.bio_flags |= BIO_SYNC;
144 	bp->b_bcount = bpsize;
145 	bp->b_flags &= ~B_INVAL;
146 	bp->b_cmd = BUF_CMD_READ;
147 	dev_dstrategy(dev, &bp->b_bio1);
148 
149 	if (biowait(&bp->b_bio1, "labrd")) {
150 		msg = "I/O error";
151 	} else {
152 		dlp = (struct disklabel64 *)bp->b_data;
153 		dlpcrcsize = offsetof(struct disklabel64,
154 				      d_partitions[dlp->d_npartitions]) -
155 			     offsetof(struct disklabel64, d_magic);
156 		savecrc = dlp->d_crc;
157 		dlp->d_crc = 0;
158 		if (dlp->d_magic != DISKMAGIC64) {
159 			msg = "no disk label";
160 		} else if (dlp->d_npartitions > MAXPARTITIONS64) {
161 			msg = "disklabel64 corrupted, too many partitions";
162 		} else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) {
163 			msg = "disklabel64 corrupted, bad CRC";
164 		} else {
165 			dlp->d_crc = savecrc;
166 			(*lpp).lab64 = kmalloc(sizeof(*dlp),
167 					       M_DEVBUF, M_WAITOK|M_ZERO);
168 			*(*lpp).lab64 = *dlp;
169 			msg = NULL;
170 		}
171 	}
172 	bp->b_flags |= B_INVAL | B_AGE;
173 	brelse(bp);
174 	return (msg);
175 }
176 
177 /*
178  * If everything is good, copy olpx to nlpx.  Check to see if any
179  * open partitions would change.
180  */
181 static int
182 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp,
183 		 struct diskslice *sp, u_int32_t *openmask)
184 {
185 	struct disklabel64 *olp, *nlp;
186 	struct partition64 *opp, *npp;
187 	uint32_t savecrc;
188 	uint64_t slicebsize;
189 	size_t nlpcrcsize;
190 	int i;
191 
192 	olp = olpx.lab64;
193 	nlp = nlpx.lab64;
194 
195 	slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize;
196 
197 	if (nlp->d_magic != DISKMAGIC64)
198 		return (EINVAL);
199 	if (nlp->d_npartitions > MAXPARTITIONS64)
200 		return (EINVAL);
201 	savecrc = nlp->d_crc;
202 	nlp->d_crc = 0;
203 	nlpcrcsize = offsetof(struct disklabel64,
204 			      d_partitions[nlp->d_npartitions]) -
205 		     offsetof(struct disklabel64, d_magic);
206 	if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) {
207 		nlp->d_crc = savecrc;
208 		return (EINVAL);
209 	}
210 	nlp->d_crc = savecrc;
211 
212 	/*
213 	 * Check if open partitions have changed
214 	 */
215 	i = 0;
216 	while (i < MAXPARTITIONS64) {
217 		if (openmask[i >> 5] == 0) {
218 			i += 32;
219 			continue;
220 		}
221 		if ((openmask[i >> 5] & (1 << (i & 31))) == 0) {
222 			++i;
223 			continue;
224 		}
225 		if (nlp->d_npartitions <= i)
226 			return (EBUSY);
227 		opp = &olp->d_partitions[i];
228 		npp = &nlp->d_partitions[i];
229 		if (npp->p_boffset != opp->p_boffset ||
230 		    npp->p_bsize < opp->p_bsize) {
231 			return (EBUSY);
232 		}
233 
234 		/*
235 		 * Do not allow p_type_uuid or p_stor_uuid to change if
236 		 * the partition is currently open.
237 		 */
238 		if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid,
239 		     sizeof(npp->p_type_uuid)) != 0) {
240 			return (EBUSY);
241 		}
242 		if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid,
243 		     sizeof(npp->p_stor_uuid)) != 0) {
244 			return (EBUSY);
245 		}
246 		++i;
247 	}
248 
249 	/*
250 	 * Make sure the label and partition offsets and sizes are sane.
251 	 */
252 	if (nlp->d_total_size > slicebsize)
253 		return (ENOSPC);
254 	if (nlp->d_total_size & (ssp->dss_secsize - 1))
255 		return (EINVAL);
256 	if (nlp->d_bbase & (ssp->dss_secsize - 1))
257 		return (EINVAL);
258 	if (nlp->d_pbase & (ssp->dss_secsize - 1))
259 		return (EINVAL);
260 	if (nlp->d_pstop & (ssp->dss_secsize - 1))
261 		return (EINVAL);
262 	if (nlp->d_abase & (ssp->dss_secsize - 1))
263 		return (EINVAL);
264 
265 	for (i = 0; i < nlp->d_npartitions; ++i) {
266 		npp = &nlp->d_partitions[i];
267 		if (npp->p_bsize == 0) {
268 			if (npp->p_boffset != 0)
269 				return (EINVAL);
270 			continue;
271 		}
272 		if (npp->p_boffset & (ssp->dss_secsize - 1))
273 			return (EINVAL);
274 		if (npp->p_bsize & (ssp->dss_secsize - 1))
275 			return (EINVAL);
276 		if (npp->p_boffset < nlp->d_pbase)
277 			return (ENOSPC);
278 		if (npp->p_boffset + npp->p_bsize > nlp->d_total_size)
279 			return (ENOSPC);
280 	}
281 
282 	/*
283 	 * Structurally we may add code to make modifications above in the
284 	 * future, so regenerate the crc anyway.
285 	 */
286 	nlp->d_crc = 0;
287 	nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize);
288 	*olp = *nlp;
289 
290 	return (0);
291 }
292 
293 /*
294  * Write disk label back to device after modification.
295  */
296 static int
297 l64_writedisklabel(cdev_t dev, struct diskslices *ssp,
298 		   struct diskslice *sp, disklabel_t lpx)
299 {
300 	struct disklabel64 *lp;
301 	struct disklabel64 *dlp;
302 	struct buf *bp;
303 	int error = 0;
304 	size_t bpsize;
305 	int secsize;
306 
307 	lp = lpx.lab64;
308 
309 	/*
310 	 * XXX I/O size is subject to device DMA limitations
311 	 */
312 	secsize = ssp->dss_secsize;
313 	bpsize = (sizeof(*lp) + secsize - 1) & ~(secsize - 1);
314 
315 	bp = geteblk(bpsize);
316 	bp->b_bio1.bio_offset = 0;
317 	bp->b_bio1.bio_done = biodone_sync;
318 	bp->b_bio1.bio_flags |= BIO_SYNC;
319 	bp->b_bcount = bpsize;
320 
321 	/*
322 	 * Because our I/O is larger then the label, and because we do not
323 	 * write the d_reserved0[] area, do a read-modify-write.
324 	 */
325 	bp->b_flags &= ~B_INVAL;
326 	bp->b_cmd = BUF_CMD_READ;
327 	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
328 	dev_dstrategy(dev, &bp->b_bio1);
329 	error = biowait(&bp->b_bio1, "labrd");
330 	if (error)
331 		goto done;
332 
333 	dlp = (void *)bp->b_data;
334 	bcopy(&lp->d_magic, &dlp->d_magic,
335 	      sizeof(*lp) - offsetof(struct disklabel64, d_magic));
336 	bp->b_cmd = BUF_CMD_WRITE;
337 	bp->b_bio1.bio_done = biodone_sync;
338 	bp->b_bio1.bio_flags |= BIO_SYNC;
339 	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
340 	dev_dstrategy(dev, &bp->b_bio1);
341 	error = biowait(&bp->b_bio1, "labwr");
342 done:
343 	bp->b_flags |= B_INVAL | B_AGE;
344 	brelse(bp);
345 	return (error);
346 }
347 
348 /*
349  * Create a disklabel based on a disk_info structure for the purposes of
350  * DSO_COMPATLABEL - cases where no real label exists on the storage medium.
351  *
352  * If a diskslice is passed, the label is truncated to the slice.
353  *
354  * NOTE!  This is not a legal label because d_bbase and d_pbase are both
355  * set to 0.
356  */
357 static disklabel_t
358 l64_clone_label(struct disk_info *info, struct diskslice *sp)
359 {
360 	struct disklabel64 *lp;
361 	disklabel_t res;
362 	uint32_t blksize = info->d_media_blksize;
363 	size_t lpcrcsize;
364 
365 	lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO);
366 
367 	if (sp)
368 		lp->d_total_size = (uint64_t)sp->ds_size * blksize;
369 	else
370 		lp->d_total_size = info->d_media_blocks * blksize;
371 
372 	lp->d_magic = DISKMAGIC64;
373 	lp->d_align = blksize;
374 	lp->d_npartitions = MAXPARTITIONS64;
375 	lp->d_pstop = lp->d_total_size;
376 
377 	/*
378 	 * Create a dummy 'c' part and a dummy 'a' part (if requested).
379 	 * Note that the 'c' part is really a hack.  64 bit disklabels
380 	 * do not use 'c' to mean the raw partition.
381 	 */
382 
383 	lp->d_partitions[2].p_boffset = 0;
384 	lp->d_partitions[2].p_bsize = lp->d_total_size;
385 	/* XXX SET FS TYPE */
386 
387 	if (info->d_dsflags & DSO_COMPATPARTA) {
388 		lp->d_partitions[0].p_boffset = 0;
389 		lp->d_partitions[0].p_bsize = lp->d_total_size;
390 		/* XXX SET FS TYPE */
391 	}
392 
393 	lpcrcsize = offsetof(struct disklabel64,
394 			     d_partitions[lp->d_npartitions]) -
395 		    offsetof(struct disklabel64, d_magic);
396 
397 	lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
398 	res.lab64 = lp;
399 	return (res);
400 }
401 
402 /*
403  * Create a virgin disklabel64 suitable for writing to the media.
404  *
405  * disklabel64 always reserves 32KB for a boot area and leaves room
406  * for up to RESPARTITIONS64 partitions.
407  */
408 static void
409 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp,
410 		    struct diskslice *sp, struct disk_info *info)
411 {
412 	struct disklabel64 *lp = lpx.lab64;
413 	struct partition64 *pp;
414 	uint32_t blksize;
415 	uint32_t ressize;
416 	uint64_t blkmask;	/* 64 bits so we can ~ */
417 	size_t lpcrcsize;
418 
419 	/*
420 	 * Setup the initial label.  Use of a block size of at least 4KB
421 	 * for calculating the initial reserved areas to allow some degree
422 	 * of portability between media with different sector sizes.
423 	 *
424 	 * Note that the modified blksize is stored in d_align as a hint
425 	 * to the disklabeling program.
426 	 */
427 	bzero(lp, sizeof(*lp));
428 	if ((blksize = info->d_media_blksize) < 4096)
429 		blksize = 4096;
430 	blkmask = blksize - 1;
431 
432 	if (sp)
433 		lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize;
434 	else
435 		lp->d_total_size = info->d_media_blocks * info->d_media_blksize;
436 
437 	lp->d_magic = DISKMAGIC64;
438 	lp->d_align = blksize;
439 	lp->d_npartitions = MAXPARTITIONS64;
440 	kern_uuidgen(&lp->d_stor_uuid, 1);
441 
442 	ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]);
443 	ressize = (ressize + (uint32_t)blkmask) & ~blkmask;
444 
445 	/*
446 	 * NOTE: When calculating pbase take into account the slice offset
447 	 *	 so the partitions are at least 32K-aligned relative to the
448 	 *	 start of the physical disk.  This will accomodate efficient
449 	 *	 access to 4096 byte physical sector drives.
450 	 */
451 	lp->d_bbase = ressize;
452 	lp->d_pbase = lp->d_bbase + ((32768 + blkmask) & ~blkmask);
453 	lp->d_pbase = (lp->d_pbase + PALIGN_MASK) & ~(uint64_t)PALIGN_MASK;
454 
455 	/* adjust for slice offset so we are physically aligned */
456 	lp->d_pbase += 32768 - (sp->ds_offset * info->d_media_blksize) % 32768;
457 
458 	lp->d_pstop = (lp->d_total_size - lp->d_bbase) & ~blkmask;
459 	lp->d_abase = lp->d_pstop;
460 
461 	/*
462 	 * All partitions are left empty unless DSO_COMPATPARTA is set
463 	 */
464 
465 	if (info->d_dsflags & DSO_COMPATPARTA) {
466 		pp = &lp->d_partitions[0];
467 		pp->p_boffset = lp->d_pbase;
468 		pp->p_bsize = lp->d_pstop - lp->d_pbase;
469 		/* XXX SET FS TYPE */
470 	}
471 
472 	lpcrcsize = offsetof(struct disklabel64,
473 			     d_partitions[lp->d_npartitions]) -
474 		    offsetof(struct disklabel64, d_magic);
475 	lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
476 }
477 
478 /*
479  * Set the number of blocks at the beginning of the slice which have
480  * been reserved for label operations.  This area will be write-protected
481  * when accessed via the slice.
482  *
483  * For now just protect the label area proper.  Do not protect the
484  * boot area.  Note partitions in 64 bit disklabels do not overlap
485  * the disklabel or boot area.
486  */
487 static void
488 l64_adjust_label_reserved(struct diskslices *ssp, int slice,
489 			  struct diskslice *sp)
490 {
491 	struct disklabel64 *lp = sp->ds_label.lab64;
492 
493 	sp->ds_reserved = lp->d_bbase / ssp->dss_secsize;
494 }
495 
496 struct disklabel_ops disklabel64_ops = {
497 	.labelsize = sizeof(struct disklabel64),
498 	.op_readdisklabel = l64_readdisklabel,
499 	.op_setdisklabel = l64_setdisklabel,
500 	.op_writedisklabel = l64_writedisklabel,
501 	.op_clone_label = l64_clone_label,
502 	.op_adjust_label_reserved = l64_adjust_label_reserved,
503 	.op_getpartbounds = l64_getpartbounds,
504 	.op_loadpartinfo = l64_loadpartinfo,
505 	.op_getnumparts = l64_getnumparts,
506 	.op_makevirginlabel = l64_makevirginlabel
507 };
508 
509