1 /*	$OpenBSD: softraid_amd64.c,v 1.2 2016/09/11 17:51:21 jsing Exp $	*/
2 
3 /*
4  * Copyright (c) 2012 Joel Sing <jsing@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/queue.h>
21 #include <sys/disklabel.h>
22 #include <sys/reboot.h>
23 
24 #include <dev/biovar.h>
25 #include <dev/softraidvar.h>
26 
27 #include <lib/libsa/aes_xts.h>
28 #include <lib/libsa/softraid.h>
29 #include <lib/libz/zlib.h>
30 
31 #include "libsa.h"
32 #include "disk.h"
33 #include "softraid_amd64.h"
34 
35 static int gpt_chk_mbr(struct dos_partition *, u_int64_t);
36 static uint64_t findopenbsd_gpt(struct sr_boot_volume *, const char **);
37 
38 void
39 srprobe_meta_opt_load(struct sr_metadata *sm, struct sr_meta_opt_head *som)
40 {
41 	struct sr_meta_opt_hdr	*omh;
42 	struct sr_meta_opt_item *omi;
43 #if 0
44 	u_int8_t checksum[MD5_DIGEST_LENGTH];
45 #endif
46 	int			i;
47 
48 	/* Process optional metadata. */
49 	omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) +
50 	    sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
51 	for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
52 
53 #ifdef BIOS_DEBUG
54 		printf("Found optional metadata of type %u, length %u\n",
55 		    omh->som_type, omh->som_length);
56 #endif
57 
58 		/* Unsupported old fixed length optional metadata. */
59 		if (omh->som_length == 0) {
60 			omh = (struct sr_meta_opt_hdr *)((void *)omh +
61 			    SR_OLD_META_OPT_SIZE);
62 			continue;
63 		}
64 
65 		/* Load variable length optional metadata. */
66 		omi = alloc(sizeof(struct sr_meta_opt_item));
67 		bzero(omi, sizeof(struct sr_meta_opt_item));
68 		SLIST_INSERT_HEAD(som, omi, omi_link);
69 		omi->omi_som = alloc(omh->som_length);
70 		bzero(omi->omi_som, omh->som_length);
71 		bcopy(omh, omi->omi_som, omh->som_length);
72 
73 #if 0
74 		/* XXX - Validate checksum. */
75 		bcopy(&omi->omi_som->som_checksum, &checksum,
76 		    MD5_DIGEST_LENGTH);
77 		bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH);
78 		sr_checksum(sc, omi->omi_som,
79 		    &omi->omi_som->som_checksum, omh->som_length);
80 		if (bcmp(&checksum, &omi->omi_som->som_checksum,
81 		    sizeof(checksum)))
82 			panic("%s: invalid optional metadata checksum",
83 			    DEVNAME(sc));
84 #endif
85 
86 		omh = (struct sr_meta_opt_hdr *)((void *)omh +
87 		    omh->som_length);
88 	}
89 }
90 
91 void
92 srprobe_keydisk_load(struct sr_metadata *sm)
93 {
94 	struct sr_meta_opt_hdr	*omh;
95 	struct sr_meta_keydisk	*skm;
96 	struct sr_boot_keydisk	*kd;
97 	int i;
98 
99 	/* Process optional metadata. */
100 	omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) +
101 	    sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
102 	for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
103 
104 		/* Unsupported old fixed length optional metadata. */
105 		if (omh->som_length == 0) {
106 			omh = (struct sr_meta_opt_hdr *)((void *)omh +
107 			    SR_OLD_META_OPT_SIZE);
108 			continue;
109 		}
110 
111 		if (omh->som_type != SR_OPT_KEYDISK) {
112 			omh = (struct sr_meta_opt_hdr *)((void *)omh +
113 			    omh->som_length);
114 			continue;
115 		}
116 
117 		kd = alloc(sizeof(struct sr_boot_keydisk));
118 		bcopy(&sm->ssdi.ssd_uuid, &kd->kd_uuid, sizeof(kd->kd_uuid));
119 		skm = (struct sr_meta_keydisk*)omh;
120 		bcopy(&skm->skm_maskkey, &kd->kd_key, sizeof(kd->kd_key));
121 		SLIST_INSERT_HEAD(&sr_keydisks, kd, kd_link);
122 	}
123 }
124 
125 void
126 srprobe(void)
127 {
128 	struct sr_boot_volume *bv, *bv1, *bv2;
129 	struct sr_boot_chunk *bc, *bc1, *bc2;
130 	struct sr_meta_chunk *mc;
131 	struct sr_metadata *md;
132 	struct diskinfo *dip;
133 	struct partition *pp;
134 	int i, error, volno;
135 	dev_t bsd_dev;
136 	daddr_t off;
137 
138 	/* Probe for softraid volumes. */
139 	SLIST_INIT(&sr_volumes);
140 	SLIST_INIT(&sr_keydisks);
141 
142 	md = alloc(SR_META_SIZE * DEV_BSIZE);
143 
144 	TAILQ_FOREACH(dip, &disklist, list) {
145 
146 		/* Only check hard disks, skip those with I/O errors. */
147 		if ((dip->bios_info.bios_number & 0x80) == 0 ||
148 		    (dip->bios_info.flags & BDI_INVALID))
149 			continue;
150 
151 		/* Make sure disklabel has been read. */
152 		if ((dip->bios_info.flags & (BDI_BADLABEL|BDI_GOODLABEL)) == 0)
153 			continue;
154 
155 		for (i = 0; i < MAXPARTITIONS; i++) {
156 
157 			pp = &dip->disklabel.d_partitions[i];
158 			if (pp->p_fstype != FS_RAID || pp->p_size == 0)
159 				continue;
160 
161 			/* Read softraid metadata. */
162 			bzero(md, SR_META_SIZE * DEV_BSIZE);
163 			off = DL_SECTOBLK(&dip->disklabel, DL_GETPOFFSET(pp));
164 			off += SR_META_OFFSET;
165 			error = dip->diskio(F_READ, dip, off, SR_META_SIZE, md);
166 			if (error)
167 				continue;
168 
169 			/* Is this valid softraid metadata? */
170 			if (md->ssdi.ssd_magic != SR_MAGIC)
171 				continue;
172 
173 			/* XXX - validate checksum. */
174 
175 			/* Handle key disks separately... */
176 			if (md->ssdi.ssd_level == SR_KEYDISK_LEVEL) {
177 				srprobe_keydisk_load(md);
178 				continue;
179 			}
180 
181 			/* Locate chunk-specific metadata for this chunk. */
182 			mc = (struct sr_meta_chunk *)(md + 1);
183 			mc += md->ssdi.ssd_chunk_id;
184 
185 			bc = alloc(sizeof(struct sr_boot_chunk));
186 			bc->sbc_diskinfo = dip;
187 			bc->sbc_disk = dip->bios_info.bios_number;
188 			bc->sbc_part = 'a' + i;
189 
190 			bsd_dev = dip->bios_info.bsd_dev;
191 			bc->sbc_mm = MAKEBOOTDEV(B_TYPE(bsd_dev),
192 			    B_ADAPTOR(bsd_dev), B_CONTROLLER(bsd_dev),
193 			    B_UNIT(bsd_dev), bc->sbc_part - 'a');
194 
195 			bc->sbc_chunk_id = md->ssdi.ssd_chunk_id;
196 			bc->sbc_ondisk = md->ssd_ondisk;
197 			bc->sbc_state = mc->scm_status;
198 
199 			SLIST_FOREACH(bv, &sr_volumes, sbv_link) {
200 				if (bcmp(&md->ssdi.ssd_uuid, &bv->sbv_uuid,
201 				    sizeof(md->ssdi.ssd_uuid)) == 0)
202 					break;
203 			}
204 
205 			if (bv == NULL) {
206 				bv = alloc(sizeof(struct sr_boot_volume));
207 				bzero(bv, sizeof(struct sr_boot_volume));
208 				bv->sbv_level = md->ssdi.ssd_level;
209 				bv->sbv_volid = md->ssdi.ssd_volid;
210 				bv->sbv_chunk_no = md->ssdi.ssd_chunk_no;
211 				bv->sbv_flags = md->ssdi.ssd_vol_flags;
212 				bv->sbv_size = md->ssdi.ssd_size;
213 				bv->sbv_data_blkno = md->ssd_data_blkno;
214 				bcopy(&md->ssdi.ssd_uuid, &bv->sbv_uuid,
215 				    sizeof(md->ssdi.ssd_uuid));
216 				SLIST_INIT(&bv->sbv_chunks);
217 				SLIST_INIT(&bv->sbv_meta_opt);
218 
219 				/* Load optional metadata for this volume. */
220 				srprobe_meta_opt_load(md, &bv->sbv_meta_opt);
221 
222 				/* Maintain volume order. */
223 				bv2 = NULL;
224 				SLIST_FOREACH(bv1, &sr_volumes, sbv_link) {
225 					if (bv1->sbv_volid > bv->sbv_volid)
226 						break;
227 					bv2 = bv1;
228 				}
229 				if (bv2 == NULL)
230 					SLIST_INSERT_HEAD(&sr_volumes, bv,
231 					    sbv_link);
232 				else
233 					SLIST_INSERT_AFTER(bv2, bv, sbv_link);
234 			}
235 
236 			/* Maintain chunk order. */
237 			bc2 = NULL;
238 			SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) {
239 				if (bc1->sbc_chunk_id > bc->sbc_chunk_id)
240 					break;
241 				bc2 = bc1;
242 			}
243 			if (bc2 == NULL)
244 				SLIST_INSERT_HEAD(&bv->sbv_chunks,
245 				    bc, sbc_link);
246 			else
247 				SLIST_INSERT_AFTER(bc2, bc, sbc_link);
248 
249 			bv->sbv_chunks_found++;
250 		}
251 	}
252 
253 	/*
254 	 * Assemble RAID volumes.
255 	 */
256 	volno = 0;
257 	SLIST_FOREACH(bv, &sr_volumes, sbv_link) {
258 
259 		/* Skip if this is a hotspare "volume". */
260 		if (bv->sbv_level == SR_HOTSPARE_LEVEL &&
261 		    bv->sbv_chunk_no == 1)
262 			continue;
263 
264 		/* Determine current ondisk version. */
265 		bv->sbv_ondisk = 0;
266 		SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) {
267 			if (bc->sbc_ondisk > bv->sbv_ondisk)
268 				bv->sbv_ondisk = bc->sbc_ondisk;
269 		}
270 		SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) {
271 			if (bc->sbc_ondisk != bv->sbv_ondisk)
272 				bc->sbc_state = BIOC_SDOFFLINE;
273 		}
274 
275 		/* XXX - Check for duplicate chunks. */
276 
277 		/*
278 		 * Validate that volume has sufficient chunks for
279 		 * read-only access.
280 		 *
281 		 * XXX - check chunk states.
282 		 */
283 		bv->sbv_state = BIOC_SVOFFLINE;
284 		switch (bv->sbv_level) {
285 		case 0:
286 		case 'C':
287 		case 'c':
288 			if (bv->sbv_chunk_no == bv->sbv_chunks_found)
289 				bv->sbv_state = BIOC_SVONLINE;
290 			break;
291 
292 		case 1:
293 			if (bv->sbv_chunk_no == bv->sbv_chunks_found)
294 				bv->sbv_state = BIOC_SVONLINE;
295 			else if (bv->sbv_chunks_found > 0)
296 				bv->sbv_state = BIOC_SVDEGRADED;
297 			break;
298 		}
299 
300 		bv->sbv_unit = volno++;
301 		if (bv->sbv_state != BIOC_SVOFFLINE)
302 			printf(" sr%d%s", bv->sbv_unit,
303 			    bv->sbv_flags & BIOC_SCBOOTABLE ? "*" : "");
304 	}
305 
306 	explicit_bzero(md, SR_META_SIZE * DEV_BSIZE);
307 	free(md, 0);
308 }
309 
310 int
311 sr_strategy(struct sr_boot_volume *bv, int rw, daddr32_t blk, size_t size,
312     void *buf, size_t *rsize)
313 {
314 	struct diskinfo *sr_dip, *dip;
315 	struct sr_boot_chunk *bc;
316 	struct aes_xts_ctx ctx;
317 	size_t i, j, nsect;
318 	daddr_t blkno;
319 	u_char iv[8];
320 	u_char *bp;
321 	int err;
322 
323 	/* We only support read-only softraid. */
324 	if (rw != F_READ)
325 		return ENOTSUP;
326 
327 	/* Partition offset within softraid volume. */
328 	sr_dip = (struct diskinfo *)bv->sbv_diskinfo;
329 	blk += sr_dip->disklabel.d_partitions[bv->sbv_part - 'a'].p_offset;
330 
331 	if (bv->sbv_level == 0) {
332 		return ENOTSUP;
333 	} else if (bv->sbv_level == 1) {
334 
335 		/* Select first online chunk. */
336 		SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link)
337 			if (bc->sbc_state == BIOC_SDONLINE)
338 				break;
339 		if (bc == NULL)
340 			return EIO;
341 
342 		dip = (struct diskinfo *)bc->sbc_diskinfo;
343 		dip->bsddev = bc->sbc_mm;
344 		blk += bv->sbv_data_blkno;
345 
346 		/* XXX - If I/O failed we should try another chunk... */
347 		return dip->strategy(dip, rw, blk, size, buf, rsize);
348 
349 	} else if (bv->sbv_level == 'C') {
350 
351 		/* Select first online chunk. */
352 		SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link)
353 			if (bc->sbc_state == BIOC_SDONLINE)
354 				break;
355 		if (bc == NULL)
356 			return EIO;
357 
358 		dip = (struct diskinfo *)bc->sbc_diskinfo;
359 		dip->bsddev = bc->sbc_mm;
360 
361 		/* XXX - select correct key. */
362 		aes_xts_setkey(&ctx, (u_char *)bv->sbv_keys, 64);
363 
364 		nsect = (size + DEV_BSIZE - 1) / DEV_BSIZE;
365 		for (i = 0; i < nsect; i++) {
366 			blkno = blk + i;
367 			bp = ((u_char *)buf) + i * DEV_BSIZE;
368 			err = dip->strategy(dip, rw, bv->sbv_data_blkno + blkno,
369 			    DEV_BSIZE, bp, NULL);
370 			if (err != 0)
371 				return err;
372 
373 			bcopy(&blkno, iv, sizeof(blkno));
374 			aes_xts_reinit(&ctx, iv);
375 			for (j = 0; j < DEV_BSIZE; j += AES_XTS_BLOCKSIZE)
376 				aes_xts_decrypt(&ctx, bp + j);
377 		}
378 		if (rsize != NULL)
379 			*rsize = nsect * DEV_BSIZE;
380 
381 		return err;
382 
383 	} else
384 		return ENOTSUP;
385 }
386 
387 /*
388  * Returns 0 if the MBR with the provided partition array is a GPT protective
389  * MBR, and returns 1 otherwise. A GPT protective MBR would have one and only
390  * one MBR partition, an EFI partition that either covers the whole disk or as
391  * much of it as is possible with a 32bit size field.
392  *
393  * Taken from kern/subr_disk.c.
394  *
395  * NOTE: MS always uses a size of UINT32_MAX for the EFI partition!**
396  */
397 static int
398 gpt_chk_mbr(struct dos_partition *dp, u_int64_t dsize)
399 {
400 	struct dos_partition *dp2;
401 	int efi, found, i;
402 	u_int32_t psize;
403 
404 	found = efi = 0;
405 	for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) {
406 		if (dp2->dp_typ == DOSPTYP_UNUSED)
407 			continue;
408 		found++;
409 		if (dp2->dp_typ != DOSPTYP_EFI)
410 			continue;
411 		psize = letoh32(dp2->dp_size);
412 		if (psize == (dsize - 1) ||
413 		    psize == UINT32_MAX) {
414 			if (letoh32(dp2->dp_start) == 1)
415 				efi++;
416 		}
417 	}
418 	if (found == 1 && efi == 1)
419 		return (0);
420 
421 	return (1);
422 }
423 
424 static uint64_t
425 findopenbsd_gpt(struct sr_boot_volume *bv, const char **err)
426 {
427 	struct			 gpt_header gh;
428 	int			 i, part, found;
429 	uint64_t		 lba;
430 	uint32_t		 orig_csum, new_csum;
431 	uint32_t		 ghsize, ghpartsize, ghpartnum, ghpartspersec;
432 	uint32_t		 gpsectors;
433 	const char		 openbsd_uuid_code[] = GPT_UUID_OPENBSD;
434 	struct gpt_partition	 gp;
435 	static struct uuid	*openbsd_uuid = NULL, openbsd_uuid_space;
436 	static u_char		 buf[DEV_BSIZE];
437 
438 	/* Prepare OpenBSD UUID */
439 	if (openbsd_uuid == NULL) {
440 		/* XXX: should be replaced by uuid_dec_be() */
441 		memcpy(&openbsd_uuid_space, openbsd_uuid_code,
442 		    sizeof(openbsd_uuid_space));
443 		openbsd_uuid_space.time_low =
444 		    betoh32(openbsd_uuid_space.time_low);
445 		openbsd_uuid_space.time_mid =
446 		    betoh16(openbsd_uuid_space.time_mid);
447 		openbsd_uuid_space.time_hi_and_version =
448 		    betoh16(openbsd_uuid_space.time_hi_and_version);
449 
450 		openbsd_uuid = &openbsd_uuid_space;
451 	}
452 
453 	/* LBA1: GPT Header */
454 	lba = 1;
455 	sr_strategy(bv, F_READ, lba, DEV_BSIZE, buf, NULL);
456 	memcpy(&gh, buf, sizeof(gh));
457 
458 	/* Check signature */
459 	if (letoh64(gh.gh_sig) != GPTSIGNATURE) {
460 		*err = "bad GPT signature\n";
461 		return (-1);
462 	}
463 
464 	if (letoh32(gh.gh_rev) != GPTREVISION) {
465 		*err = "bad GPT revision\n";
466 		return (-1);
467 	}
468 
469 	ghsize = letoh32(gh.gh_size);
470 	if (ghsize < GPTMINHDRSIZE || ghsize > sizeof(struct gpt_header)) {
471 		*err = "bad GPT header size\n";
472 		return (-1);
473 	}
474 
475 	/* Check checksum */
476 	orig_csum = gh.gh_csum;
477 	gh.gh_csum = 0;
478 	new_csum = crc32(0, (unsigned char *)&gh, ghsize);
479 	gh.gh_csum = orig_csum;
480 	if (letoh32(orig_csum) != new_csum) {
481 		*err = "bad GPT header checksum\n";
482 		return (-1);
483 	}
484 
485 	lba = letoh64(gh.gh_part_lba);
486 	ghpartsize = letoh32(gh.gh_part_size);
487 	ghpartspersec = DEV_BSIZE / ghpartsize;
488 	ghpartnum = letoh32(gh.gh_part_num);
489 	gpsectors = (ghpartnum + ghpartspersec - 1) / ghpartspersec;
490 	new_csum = crc32(0L, Z_NULL, 0);
491 	found = 0;
492 	for (i = 0; i < gpsectors; i++, lba++) {
493 		sr_strategy(bv, F_READ, lba, DEV_BSIZE, buf, NULL);
494 		for (part = 0; part < ghpartspersec; part++) {
495 			if (ghpartnum == 0)
496 				break;
497 			new_csum = crc32(new_csum, buf + part * sizeof(gp),
498 			    sizeof(gp));
499 			ghpartnum--;
500 			if (found)
501 				continue;
502 			memcpy(&gp, buf + part * sizeof(gp), sizeof(gp));
503 			if (memcmp(&gp.gp_type, openbsd_uuid,
504 			    sizeof(struct uuid)) == 0)
505 				found = 1;
506 		}
507 	}
508 	if (new_csum != letoh32(gh.gh_part_csum)) {
509 		*err = "bad GPT entries checksum\n";
510 		return (-1);
511 	}
512 	if (found)
513 		return (letoh64(gp.gp_lba_start));
514 
515 	return (-1);
516 }
517 
518 const char *
519 sr_getdisklabel(struct sr_boot_volume *bv, struct disklabel *label)
520 {
521 	struct dos_partition *dp;
522 	struct dos_mbr mbr;
523 	const char *err = NULL;
524 	u_int start = 0;
525 	char buf[DEV_BSIZE];
526 	int i;
527 
528 	/* Check for MBR to determine partition offset. */
529 	bzero(&mbr, sizeof(mbr));
530 	sr_strategy(bv, F_READ, DOSBBSECTOR, sizeof(mbr), &mbr, NULL);
531 	if (gpt_chk_mbr(mbr.dmbr_parts, bv->sbv_size) == 0) {
532 		start = findopenbsd_gpt(bv, &err);
533 		if (start == (u_int)-1) {
534 			if (err != NULL)
535 				return (err);
536 			return "no OpenBSD partition\n";
537 		}
538 	} else if (mbr.dmbr_sign == DOSMBR_SIGNATURE) {
539 
540 		/* Search for OpenBSD partition */
541 		for (i = 0; i < NDOSPART; i++) {
542 			dp = &mbr.dmbr_parts[i];
543 			if (!dp->dp_size)
544 				continue;
545 			if (dp->dp_typ == DOSPTYP_OPENBSD) {
546 				start = dp->dp_start;
547 				break;
548 			}
549 		}
550 	}
551 
552 	/* Read the disklabel. */
553 	sr_strategy(bv, F_READ, start + DOS_LABELSECTOR,
554 	    sizeof(struct disklabel), buf, NULL);
555 
556 #ifdef BIOS_DEBUG
557 	printf("sr_getdisklabel: magic %lx\n",
558 	    ((struct disklabel *)buf)->d_magic);
559 	for (i = 0; i < MAXPARTITIONS; i++)
560 		printf("part %c: type = %d, size = %d, offset = %d\n", 'a' + i,
561 		    (int)((struct disklabel *)buf)->d_partitions[i].p_fstype,
562 		    (int)((struct disklabel *)buf)->d_partitions[i].p_size,
563 		    (int)((struct disklabel *)buf)->d_partitions[i].p_offset);
564 #endif
565 
566 	/* Fill in disklabel */
567 	return (getdisklabel(buf, label));
568 }
569