xref: /freebsd/sys/geom/part/g_part_ldm.c (revision 1f474190)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/bio.h>
34 #include <sys/diskmbr.h>
35 #include <sys/endian.h>
36 #include <sys/gpt.h>
37 #include <sys/kernel.h>
38 #include <sys/kobj.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/queue.h>
44 #include <sys/sbuf.h>
45 #include <sys/systm.h>
46 #include <sys/sysctl.h>
47 #include <sys/uuid.h>
48 #include <geom/geom.h>
49 #include <geom/part/g_part.h>
50 
51 #include "g_part_if.h"
52 
53 FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
54 
55 SYSCTL_DECL(_kern_geom_part);
56 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm,
57     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
58     "GEOM_PART_LDM Logical Disk Manager");
59 
60 static u_int ldm_debug = 0;
61 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
62     CTLFLAG_RWTUN, &ldm_debug, 0, "Debug level");
63 
64 /*
65  * This allows access to mirrored LDM volumes. Since we do not
66  * doing mirroring here, it is not enabled by default.
67  */
68 static u_int show_mirrors = 0;
69 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
70     CTLFLAG_RWTUN, &show_mirrors, 0, "Show mirrored volumes");
71 
72 #define	LDM_DEBUG(lvl, fmt, ...)	do {				\
73 	if (ldm_debug >= (lvl)) {					\
74 		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
75 	}								\
76 } while (0)
77 #define	LDM_DUMP(buf, size)	do {					\
78 	if (ldm_debug > 1) {						\
79 		hexdump(buf, size, NULL, 0);				\
80 	}								\
81 } while (0)
82 
83 /*
84  * There are internal representations of LDM structures.
85  *
86  * We do not keep all fields of on-disk structures, only most useful.
87  * All numbers in an on-disk structures are in big-endian format.
88  */
89 
90 /*
91  * Private header is 512 bytes long. There are three copies on each disk.
92  * Offset and sizes are in sectors. Location of each copy:
93  * - the first offset is relative to the disk start;
94  * - the second and third offset are relative to the LDM database start.
95  *
96  * On a disk partitioned with GPT, the LDM has not first private header.
97  */
98 #define	LDM_PH_MBRINDEX		0
99 #define	LDM_PH_GPTINDEX		2
100 static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
101 #define	LDM_VERSION_2K		0x2000b
102 #define	LDM_VERSION_VISTA	0x2000c
103 #define	LDM_PH_VERSION_OFF	0x00c
104 #define	LDM_PH_DISKGUID_OFF	0x030
105 #define	LDM_PH_DGGUID_OFF	0x0b0
106 #define	LDM_PH_DGNAME_OFF	0x0f0
107 #define	LDM_PH_START_OFF	0x11b
108 #define	LDM_PH_SIZE_OFF		0x123
109 #define	LDM_PH_DB_OFF		0x12b
110 #define	LDM_PH_DBSIZE_OFF	0x133
111 #define	LDM_PH_TH1_OFF		0x13b
112 #define	LDM_PH_TH2_OFF		0x143
113 #define	LDM_PH_CONFSIZE_OFF	0x153
114 #define	LDM_PH_LOGSIZE_OFF	0x15b
115 #define	LDM_PH_SIGN		"PRIVHEAD"
116 struct ldm_privhdr {
117 	struct uuid	disk_guid;
118 	struct uuid	dg_guid;
119 	u_char		dg_name[32];
120 	uint64_t	start;		/* logical disk start */
121 	uint64_t	size;		/* logical disk size */
122 	uint64_t	db_offset;	/* LDM database start */
123 #define	LDM_DB_SIZE		2048
124 	uint64_t	db_size;	/* LDM database size */
125 #define	LDM_TH_COUNT		2
126 	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
127 	uint64_t	conf_size;	/* configuration size */
128 	uint64_t	log_size;	/* size of log */
129 };
130 
131 /*
132  * Table of contents header is 512 bytes long.
133  * There are two identical copies at offsets from the private header.
134  * Offsets are relative to the LDM database start.
135  */
136 #define	LDM_TH_SIGN		"TOCBLOCK"
137 #define	LDM_TH_NAME1		"config"
138 #define	LDM_TH_NAME2		"log"
139 #define	LDM_TH_NAME1_OFF	0x024
140 #define	LDM_TH_CONF_OFF		0x02e
141 #define	LDM_TH_CONFSIZE_OFF	0x036
142 #define	LDM_TH_NAME2_OFF	0x046
143 #define	LDM_TH_LOG_OFF		0x050
144 #define	LDM_TH_LOGSIZE_OFF	0x058
145 struct ldm_tochdr {
146 	uint64_t	conf_offset;	/* configuration offset */
147 	uint64_t	log_offset;	/* log offset */
148 };
149 
150 /*
151  * LDM database header is 512 bytes long.
152  */
153 #define	LDM_VMDB_SIGN		"VMDB"
154 #define	LDM_DB_LASTSEQ_OFF	0x004
155 #define	LDM_DB_SIZE_OFF		0x008
156 #define	LDM_DB_STATUS_OFF	0x010
157 #define	LDM_DB_VERSION_OFF	0x012
158 #define	LDM_DB_DGNAME_OFF	0x016
159 #define	LDM_DB_DGGUID_OFF	0x035
160 struct ldm_vmdbhdr {
161 	uint32_t	last_seq;	/* sequence number of last VBLK */
162 	uint32_t	size;		/* size of VBLK */
163 };
164 
165 /*
166  * The LDM database configuration section contains VMDB header and
167  * many VBLKs. Each VBLK represents a disk group, disk partition,
168  * component or volume.
169  *
170  * The most interesting for us are volumes, they are represents
171  * partitions in the GEOM_PART meaning. But volume VBLK does not
172  * contain all information needed to create GEOM provider. And we
173  * should get this information from the related VBLK. This is how
174  * VBLK releated:
175  *	Volumes <- Components <- Partitions -> Disks
176  *
177  * One volume can contain several components. In this case LDM
178  * does mirroring of volume data to each component.
179  *
180  * Also each component can contain several partitions (spanned or
181  * striped volumes).
182  */
183 
184 struct ldm_component {
185 	uint64_t	id;		/* object id */
186 	uint64_t	vol_id;		/* parent volume object id */
187 
188 	int		count;
189 	LIST_HEAD(, ldm_partition) partitions;
190 	LIST_ENTRY(ldm_component) entry;
191 };
192 
193 struct ldm_volume {
194 	uint64_t	id;		/* object id */
195 	uint64_t	size;		/* volume size */
196 	uint8_t		number;		/* used for ordering */
197 	uint8_t		part_type;	/* partition type */
198 
199 	int		count;
200 	LIST_HEAD(, ldm_component) components;
201 	LIST_ENTRY(ldm_volume)	entry;
202 };
203 
204 struct ldm_disk {
205 	uint64_t	id;		/* object id */
206 	struct uuid	guid;		/* disk guid */
207 
208 	LIST_ENTRY(ldm_disk) entry;
209 };
210 
211 #if 0
212 struct ldm_disk_group {
213 	uint64_t	id;		/* object id */
214 	struct uuid	guid;		/* disk group guid */
215 	u_char		name[32];	/* disk group name */
216 
217 	LIST_ENTRY(ldm_disk_group) entry;
218 };
219 #endif
220 
221 struct ldm_partition {
222 	uint64_t	id;		/* object id */
223 	uint64_t	disk_id;	/* disk object id */
224 	uint64_t	comp_id;	/* parent component object id */
225 	uint64_t	start;		/* offset relative to disk start */
226 	uint64_t	offset;		/* offset for spanned volumes */
227 	uint64_t	size;		/* partition size */
228 
229 	LIST_ENTRY(ldm_partition) entry;
230 };
231 
232 /*
233  * Each VBLK is 128 bytes long and has standard 16 bytes header.
234  * Some of VBLK's fields are fixed size, but others has variable size.
235  * Fields with variable size are prefixed with one byte length marker.
236  * Some fields are strings and also can have fixed size and variable.
237  * Strings with fixed size are NULL-terminated, others are not.
238  * All VBLKs have same several first fields:
239  *	Offset		Size		Description
240  *	---------------+---------------+--------------------------
241  *	0x00		16		standard VBLK header
242  *	0x10		2		update status
243  *	0x13		1		VBLK type
244  *	0x18		PS		object id
245  *	0x18+		PN		object name
246  *
247  *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
248  *  o 'P' in size column means 'prefixed' (variable-width),
249  *    'S' - string, 'N' - number.
250  */
251 #define	LDM_VBLK_SIGN		"VBLK"
252 #define	LDM_VBLK_SEQ_OFF	0x04
253 #define	LDM_VBLK_GROUP_OFF	0x08
254 #define	LDM_VBLK_INDEX_OFF	0x0c
255 #define	LDM_VBLK_COUNT_OFF	0x0e
256 #define	LDM_VBLK_TYPE_OFF	0x13
257 #define	LDM_VBLK_OID_OFF	0x18
258 struct ldm_vblkhdr {
259 	uint32_t	seq;		/* sequence number */
260 	uint32_t	group;		/* group number */
261 	uint16_t	index;		/* index in the group */
262 	uint16_t	count;		/* number of entries in the group */
263 };
264 
265 #define	LDM_VBLK_T_COMPONENT	0x32
266 #define	LDM_VBLK_T_PARTITION	0x33
267 #define	LDM_VBLK_T_DISK		0x34
268 #define	LDM_VBLK_T_DISKGROUP	0x35
269 #define	LDM_VBLK_T_DISK4	0x44
270 #define	LDM_VBLK_T_DISKGROUP4	0x45
271 #define	LDM_VBLK_T_VOLUME	0x51
272 struct ldm_vblk {
273 	uint8_t		type;		/* VBLK type */
274 	union {
275 		uint64_t		id;
276 		struct ldm_volume	vol;
277 		struct ldm_component	comp;
278 		struct ldm_disk		disk;
279 		struct ldm_partition	part;
280 #if 0
281 		struct ldm_disk_group	disk_group;
282 #endif
283 	} u;
284 	LIST_ENTRY(ldm_vblk) entry;
285 };
286 
287 /*
288  * Some VBLKs contains a bit more data than can fit into 128 bytes. These
289  * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
290  * should be placed into continuous memory buffer. We can determine xVBLK
291  * by the count field in the standard VBLK header (count > 1).
292  */
293 struct ldm_xvblk {
294 	uint32_t	group;		/* xVBLK group number */
295 	uint32_t	size;		/* the total size of xVBLK */
296 	uint8_t		map;		/* bitmask of currently saved VBLKs */
297 	u_char		*data;		/* xVBLK data */
298 
299 	LIST_ENTRY(ldm_xvblk)	entry;
300 };
301 
302 /* The internal representation of LDM database. */
303 struct ldm_db {
304 	struct ldm_privhdr		ph;	/* private header */
305 	struct ldm_tochdr		th;	/* TOC header */
306 	struct ldm_vmdbhdr		dh;	/* VMDB header */
307 
308 	LIST_HEAD(, ldm_volume)		volumes;
309 	LIST_HEAD(, ldm_disk)		disks;
310 	LIST_HEAD(, ldm_vblk)		vblks;
311 	LIST_HEAD(, ldm_xvblk)		xvblks;
312 };
313 
314 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
315 
316 struct g_part_ldm_table {
317 	struct g_part_table	base;
318 	uint64_t		db_offset;
319 	int			is_gpt;
320 };
321 struct g_part_ldm_entry {
322 	struct g_part_entry	base;
323 	uint8_t			type;
324 };
325 
326 static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
327     struct g_part_parms *);
328 static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
329 static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
330 static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
331 static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
332     struct sbuf *, const char *);
333 static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
334 static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
335     struct g_part_parms *);
336 static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
337     char *, size_t);
338 static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
339 static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
340 static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
341     char *, size_t);
342 static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
343 
344 static kobj_method_t g_part_ldm_methods[] = {
345 	KOBJMETHOD(g_part_add,		g_part_ldm_add),
346 	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
347 	KOBJMETHOD(g_part_create,	g_part_ldm_create),
348 	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
349 	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
350 	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
351 	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
352 	KOBJMETHOD(g_part_name,		g_part_ldm_name),
353 	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
354 	KOBJMETHOD(g_part_read,		g_part_ldm_read),
355 	KOBJMETHOD(g_part_type,		g_part_ldm_type),
356 	KOBJMETHOD(g_part_write,	g_part_ldm_write),
357 	{ 0, 0 }
358 };
359 
360 static struct g_part_scheme g_part_ldm_scheme = {
361 	"LDM",
362 	g_part_ldm_methods,
363 	sizeof(struct g_part_ldm_table),
364 	.gps_entrysz = sizeof(struct g_part_ldm_entry)
365 };
366 G_PART_SCHEME_DECLARE(g_part_ldm);
367 MODULE_VERSION(geom_part_ldm, 0);
368 
369 static struct g_part_ldm_alias {
370 	u_char		typ;
371 	int		alias;
372 } ldm_alias_match[] = {
373 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
374 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
375 	{ DOSPTYP_FAT32LBA,	G_PART_ALIAS_MS_FAT32LBA },
376 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
377 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
378 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
379 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
380 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
381 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
382 };
383 
384 static u_char*
385 ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
386 {
387 	struct g_provider *pp;
388 	u_char *buf;
389 
390 	pp = cp->provider;
391 	buf = g_read_data(cp, off, pp->sectorsize, error);
392 	if (buf == NULL)
393 		return (NULL);
394 
395 	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
396 		LDM_DEBUG(1, "%s: invalid LDM private header signature",
397 		    pp->name);
398 		g_free(buf);
399 		buf = NULL;
400 		*error = EINVAL;
401 	}
402 	return (buf);
403 }
404 
405 static int
406 ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
407     const u_char *buf)
408 {
409 	uint32_t version;
410 	int error;
411 
412 	memset(hdr, 0, sizeof(*hdr));
413 	version = be32dec(buf + LDM_PH_VERSION_OFF);
414 	if (version != LDM_VERSION_2K &&
415 	    version != LDM_VERSION_VISTA) {
416 		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
417 		    cp->provider->name, version >> 16,
418 		    version & 0xFFFF);
419 		return (ENXIO);
420 	}
421 	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
422 	if (error != 0)
423 		return (error);
424 	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
425 	if (error != 0)
426 		return (error);
427 	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
428 	hdr->start = be64dec(buf + LDM_PH_START_OFF);
429 	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
430 	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
431 	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
432 	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
433 	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
434 	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
435 	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
436 	return (0);
437 }
438 
439 static int
440 ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
441 {
442 	struct g_consumer *cp2;
443 	struct g_provider *pp;
444 	struct ldm_privhdr hdr;
445 	uint64_t offset, last;
446 	int error, found, i;
447 	u_char *buf;
448 
449 	pp = cp->provider;
450 	if (is_gpt) {
451 		/*
452 		 * The last LBA is used in several checks below, for the
453 		 * GPT case it should be calculated relative to the whole
454 		 * disk.
455 		 */
456 		cp2 = LIST_FIRST(&pp->geom->consumer);
457 		last =
458 		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
459 	} else
460 		last = pp->mediasize / pp->sectorsize - 1;
461 	for (found = 0, i = is_gpt; i < nitems(ldm_ph_off); i++) {
462 		offset = ldm_ph_off[i];
463 		/*
464 		 * In the GPT case consumer is attached to the LDM metadata
465 		 * partition and we don't need add db_offset.
466 		 */
467 		if (!is_gpt)
468 			offset += db->ph.db_offset;
469 		if (i == LDM_PH_MBRINDEX) {
470 			/*
471 			 * Prepare to errors and setup new base offset
472 			 * to read backup private headers. Assume that LDM
473 			 * database is in the last 1Mbyte area.
474 			 */
475 			db->ph.db_offset = last - LDM_DB_SIZE;
476 		}
477 		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
478 		if (buf == NULL) {
479 			LDM_DEBUG(1, "%s: failed to read private header "
480 			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
481 			continue;
482 		}
483 		error = ldm_privhdr_parse(cp, &hdr, buf);
484 		if (error != 0) {
485 			LDM_DEBUG(1, "%s: failed to parse private "
486 			    "header %d", pp->name, i);
487 			LDM_DUMP(buf, pp->sectorsize);
488 			g_free(buf);
489 			continue;
490 		}
491 		g_free(buf);
492 		if (hdr.start > last ||
493 		    hdr.start + hdr.size - 1 > last ||
494 		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
495 		    hdr.db_size != LDM_DB_SIZE ||
496 		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
497 		    hdr.th_offset[0] >= LDM_DB_SIZE ||
498 		    hdr.th_offset[1] >= LDM_DB_SIZE ||
499 		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
500 			LDM_DEBUG(1, "%s: invalid values in the "
501 			    "private header %d", pp->name, i);
502 			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
503 			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
504 			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
505 			    "last: %jd", pp->name, hdr.start, hdr.size,
506 			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
507 			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
508 			    last);
509 			continue;
510 		}
511 		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
512 			LDM_DEBUG(0, "%s: private headers are not equal",
513 			    pp->name);
514 			if (i > 1) {
515 				/*
516 				 * We have different headers in the LDM.
517 				 * We can not trust this metadata.
518 				 */
519 				LDM_DEBUG(0, "%s: refuse LDM metadata",
520 				    pp->name);
521 				return (EINVAL);
522 			}
523 			/*
524 			 * We already have read primary private header
525 			 * and it differs from this backup one.
526 			 * Prefer the backup header and save it.
527 			 */
528 			found = 0;
529 		}
530 		if (found == 0)
531 			memcpy(&db->ph, &hdr, sizeof(hdr));
532 		found = 1;
533 	}
534 	if (found == 0) {
535 		LDM_DEBUG(1, "%s: valid LDM private header not found",
536 		    pp->name);
537 		return (ENXIO);
538 	}
539 	return (0);
540 }
541 
542 static int
543 ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
544 {
545 	struct g_part_table *gpt;
546 	struct g_part_entry *e;
547 	struct g_consumer *cp2;
548 	int error;
549 
550 	cp2 = LIST_NEXT(cp, consumer);
551 	g_topology_lock();
552 	gpt = cp->provider->geom->softc;
553 	error = 0;
554 	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
555 		if (cp->provider == e->gpe_pp) {
556 			/* ms-ldm-metadata partition */
557 			if (e->gpe_start != db->ph.db_offset ||
558 			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
559 				error++;
560 		} else if (cp2->provider == e->gpe_pp) {
561 			/* ms-ldm-data partition */
562 			if (e->gpe_start != db->ph.start ||
563 			    e->gpe_end != db->ph.start + db->ph.size - 1)
564 				error++;
565 		}
566 		if (error != 0) {
567 			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
568 			    "do not match with the LDM metadata",
569 			    e->gpe_pp->name, e->gpe_index);
570 			error = ENXIO;
571 			break;
572 		}
573 	}
574 	g_topology_unlock();
575 	return (error);
576 }
577 
578 static int
579 ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
580 {
581 	struct g_provider *pp;
582 	struct ldm_tochdr hdr;
583 	uint64_t offset, conf_size, log_size;
584 	int error, found, i;
585 	u_char *buf;
586 
587 	pp = cp->provider;
588 	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
589 		offset = db->ph.db_offset + db->ph.th_offset[i];
590 		buf = g_read_data(cp,
591 		    offset * pp->sectorsize, pp->sectorsize, &error);
592 		if (buf == NULL) {
593 			LDM_DEBUG(1, "%s: failed to read TOC header "
594 			    "at LBA %ju", pp->name, (uintmax_t)offset);
595 			continue;
596 		}
597 		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
598 		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
599 		    strlen(LDM_TH_NAME1)) != 0 ||
600 		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
601 		    strlen(LDM_TH_NAME2)) != 0) {
602 			LDM_DEBUG(1, "%s: failed to parse TOC header "
603 			    "at LBA %ju", pp->name, (uintmax_t)offset);
604 			LDM_DUMP(buf, pp->sectorsize);
605 			g_free(buf);
606 			continue;
607 		}
608 		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
609 		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
610 		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
611 		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
612 		if (conf_size != db->ph.conf_size ||
613 		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
614 		    log_size != db->ph.log_size ||
615 		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
616 			LDM_DEBUG(1, "%s: invalid values in the "
617 			    "TOC header at LBA %ju", pp->name,
618 			    (uintmax_t)offset);
619 			LDM_DUMP(buf, pp->sectorsize);
620 			g_free(buf);
621 			continue;
622 		}
623 		g_free(buf);
624 		if (found == 0)
625 			memcpy(&db->th, &hdr, sizeof(hdr));
626 		found = 1;
627 	}
628 	if (found == 0) {
629 		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
630 		    pp->name);
631 		return (ENXIO);
632 	}
633 	return (0);
634 }
635 
636 static int
637 ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
638 {
639 	struct g_provider *pp;
640 	struct uuid dg_guid;
641 	uint64_t offset;
642 	uint32_t version;
643 	int error;
644 	u_char *buf;
645 
646 	pp = cp->provider;
647 	offset = db->ph.db_offset + db->th.conf_offset;
648 	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
649 	    &error);
650 	if (buf == NULL) {
651 		LDM_DEBUG(0, "%s: failed to read VMDB header at "
652 		    "LBA %ju", pp->name, (uintmax_t)offset);
653 		return (error);
654 	}
655 	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
656 		g_free(buf);
657 		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
658 		    "LBA %ju", pp->name, (uintmax_t)offset);
659 		return (ENXIO);
660 	}
661 	/* Check version. */
662 	version = be32dec(buf + LDM_DB_VERSION_OFF);
663 	if (version != 0x4000A) {
664 		g_free(buf);
665 		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
666 		    pp->name, version >> 16, version & 0xFFFF);
667 		return (ENXIO);
668 	}
669 	/*
670 	 * Check VMDB update status:
671 	 *	1 - in a consistent state;
672 	 *	2 - in a creation phase;
673 	 *	3 - in a deletion phase;
674 	 */
675 	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
676 		g_free(buf);
677 		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
678 		    pp->name);
679 		return (ENXIO);
680 	}
681 	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
682 	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
683 	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
684 	/* Compare disk group name and guid from VMDB and private headers */
685 	if (error != 0 || db->dh.size == 0 ||
686 	    pp->sectorsize % db->dh.size != 0 ||
687 	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
688 	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
689 	    db->dh.size * db->dh.last_seq >
690 	    db->ph.conf_size * pp->sectorsize) {
691 		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
692 		    pp->name);
693 		LDM_DUMP(buf, pp->sectorsize);
694 		g_free(buf);
695 		return (EINVAL);
696 	}
697 	g_free(buf);
698 	return (0);
699 }
700 
701 static int
702 ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
703 {
704 	struct ldm_xvblk *blk;
705 	size_t size;
706 
707 	size = db->dh.size - 16;
708 	LIST_FOREACH(blk, &db->xvblks, entry)
709 		if (blk->group == vh->group)
710 			break;
711 	if (blk == NULL) {
712 		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
713 		blk->group = vh->group;
714 		blk->size = size * vh->count + 16;
715 		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
716 		blk->map = 0xFF << vh->count;
717 		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
718 	}
719 	if ((blk->map & (1 << vh->index)) != 0) {
720 		/* Block with given index has been already saved. */
721 		return (EINVAL);
722 	}
723 	/* Copy the data block to the place related to index. */
724 	memcpy(blk->data + size * vh->index + 16, p + 16, size);
725 	blk->map |= 1 << vh->index;
726 	return (0);
727 }
728 
729 /* Read the variable-width numeric field and return new offset */
730 static int
731 ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
732 {
733 	uint64_t num;
734 	uint8_t len;
735 
736 	len = buf[offset++];
737 	if (len > sizeof(uint64_t) || len + offset >= range)
738 		return (-1);
739 	for (num = 0; len > 0; len--)
740 		num = (num << 8) | buf[offset++];
741 	*result = num;
742 	return (offset);
743 }
744 
745 /* Read the variable-width string and return new offset */
746 static int
747 ldm_vstr_get(const u_char *buf, int offset, u_char *result,
748     size_t maxlen, size_t range)
749 {
750 	uint8_t len;
751 
752 	len = buf[offset++];
753 	if (len >= maxlen || len + offset >= range)
754 		return (-1);
755 	memcpy(result, buf + offset, len);
756 	result[len] = '\0';
757 	return (offset + len);
758 }
759 
760 /* Just skip the variable-width variable and return new offset */
761 static int
762 ldm_vparm_skip(const u_char *buf, int offset, size_t range)
763 {
764 	uint8_t len;
765 
766 	len = buf[offset++];
767 	if (offset + len >= range)
768 		return (-1);
769 
770 	return (offset + len);
771 }
772 
773 static int
774 ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
775 {
776 	struct ldm_vblk *blk;
777 	struct ldm_volume *volume, *last;
778 	const char *errstr;
779 	u_char vstr[64];
780 	int error, offset;
781 
782 	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
783 	blk->type = p[LDM_VBLK_TYPE_OFF];
784 	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
785 	if (offset < 0) {
786 		errstr = "object id";
787 		goto fail;
788 	}
789 	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
790 	if (offset < 0) {
791 		errstr = "object name";
792 		goto fail;
793 	}
794 	switch (blk->type) {
795 	/*
796 	 * Component VBLK fields:
797 	 * Offset	Size	Description
798 	 * ------------+-------+------------------------
799 	 *  0x18+	PS	volume state
800 	 *  0x18+5	PN	component children count
801 	 *  0x1D+16	PN	parent's volume object id
802 	 *  0x2D+1	PN	stripe size
803 	 */
804 	case LDM_VBLK_T_COMPONENT:
805 		offset = ldm_vparm_skip(p, offset, size);
806 		if (offset < 0) {
807 			errstr = "volume state";
808 			goto fail;
809 		}
810 		offset = ldm_vparm_skip(p, offset + 5, size);
811 		if (offset < 0) {
812 			errstr = "children count";
813 			goto fail;
814 		}
815 		offset = ldm_vnum_get(p, offset + 16,
816 		    &blk->u.comp.vol_id, size);
817 		if (offset < 0) {
818 			errstr = "volume id";
819 			goto fail;
820 		}
821 		break;
822 	/*
823 	 * Partition VBLK fields:
824 	 * Offset	Size	Description
825 	 * ------------+-------+------------------------
826 	 *  0x18+12	8	partition start offset
827 	 *  0x18+20	8	volume offset
828 	 *  0x18+28	PN	partition size
829 	 *  0x34+	PN	parent's component object id
830 	 *  0x34+	PN	disk's object id
831 	 */
832 	case LDM_VBLK_T_PARTITION:
833 		if (offset + 28 >= size) {
834 			errstr = "too small buffer";
835 			goto fail;
836 		}
837 		blk->u.part.start = be64dec(p + offset + 12);
838 		blk->u.part.offset = be64dec(p + offset + 20);
839 		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
840 		if (offset < 0) {
841 			errstr = "partition size";
842 			goto fail;
843 		}
844 		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
845 		if (offset < 0) {
846 			errstr = "component id";
847 			goto fail;
848 		}
849 		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
850 		if (offset < 0) {
851 			errstr = "disk id";
852 			goto fail;
853 		}
854 		break;
855 	/*
856 	 * Disk VBLK fields:
857 	 * Offset	Size	Description
858 	 * ------------+-------+------------------------
859 	 *  0x18+	PS	disk GUID
860 	 */
861 	case LDM_VBLK_T_DISK:
862 		errstr = "disk guid";
863 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
864 		if (offset < 0)
865 			goto fail;
866 		error = parse_uuid(vstr, &blk->u.disk.guid);
867 		if (error != 0)
868 			goto fail;
869 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
870 		break;
871 	/*
872 	 * Disk group VBLK fields:
873 	 * Offset	Size	Description
874 	 * ------------+-------+------------------------
875 	 *  0x18+	PS	disk group GUID
876 	 */
877 	case LDM_VBLK_T_DISKGROUP:
878 #if 0
879 		strncpy(blk->u.disk_group.name, vstr,
880 		    sizeof(blk->u.disk_group.name));
881 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
882 		if (offset < 0) {
883 			errstr = "disk group guid";
884 			goto fail;
885 		}
886 		error = parse_uuid(name, &blk->u.disk_group.guid);
887 		if (error != 0) {
888 			errstr = "disk group guid";
889 			goto fail;
890 		}
891 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
892 #endif
893 		break;
894 	/*
895 	 * Disk VBLK fields:
896 	 * Offset	Size	Description
897 	 * ------------+-------+------------------------
898 	 *  0x18+	16	disk GUID
899 	 */
900 	case LDM_VBLK_T_DISK4:
901 		be_uuid_dec(p + offset, &blk->u.disk.guid);
902 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
903 		break;
904 	/*
905 	 * Disk group VBLK fields:
906 	 * Offset	Size	Description
907 	 * ------------+-------+------------------------
908 	 *  0x18+	16	disk GUID
909 	 */
910 	case LDM_VBLK_T_DISKGROUP4:
911 #if 0
912 		strncpy(blk->u.disk_group.name, vstr,
913 		    sizeof(blk->u.disk_group.name));
914 		be_uuid_dec(p + offset, &blk->u.disk.guid);
915 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
916 #endif
917 		break;
918 	/*
919 	 * Volume VBLK fields:
920 	 * Offset	Size	Description
921 	 * ------------+-------+------------------------
922 	 *  0x18+	PS	volume type
923 	 *  0x18+	PS	unknown
924 	 *  0x18+	14(S)	volume state
925 	 *  0x18+16	1	volume number
926 	 *  0x18+21	PN	volume children count
927 	 *  0x2D+16	PN	volume size
928 	 *  0x3D+4	1	partition type
929 	 */
930 	case LDM_VBLK_T_VOLUME:
931 		offset = ldm_vparm_skip(p, offset, size);
932 		if (offset < 0) {
933 			errstr = "volume type";
934 			goto fail;
935 		}
936 		offset = ldm_vparm_skip(p, offset, size);
937 		if (offset < 0) {
938 			errstr = "unknown param";
939 			goto fail;
940 		}
941 		if (offset + 21 >= size) {
942 			errstr = "too small buffer";
943 			goto fail;
944 		}
945 		blk->u.vol.number = p[offset + 16];
946 		offset = ldm_vparm_skip(p, offset + 21, size);
947 		if (offset < 0) {
948 			errstr = "children count";
949 			goto fail;
950 		}
951 		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
952 		if (offset < 0) {
953 			errstr = "volume size";
954 			goto fail;
955 		}
956 		if (offset + 4 >= size) {
957 			errstr = "too small buffer";
958 			goto fail;
959 		}
960 		blk->u.vol.part_type = p[offset + 4];
961 		/* keep volumes ordered by volume number */
962 		last = NULL;
963 		LIST_FOREACH(volume, &db->volumes, entry) {
964 			if (volume->number > blk->u.vol.number)
965 				break;
966 			last = volume;
967 		}
968 		if (last != NULL)
969 			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
970 		else
971 			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
972 		break;
973 	default:
974 		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
975 		LDM_DUMP(p, size);
976 	}
977 	LIST_INSERT_HEAD(&db->vblks, blk, entry);
978 	return (0);
979 fail:
980 	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
981 	    errstr, blk->type);
982 	LDM_DUMP(p, size);
983 	g_free(blk);
984 	return (EINVAL);
985 }
986 
987 static void
988 ldm_vmdb_free(struct ldm_db *db)
989 {
990 	struct ldm_vblk *vblk;
991 	struct ldm_xvblk *xvblk;
992 
993 	while (!LIST_EMPTY(&db->xvblks)) {
994 		xvblk = LIST_FIRST(&db->xvblks);
995 		LIST_REMOVE(xvblk, entry);
996 		g_free(xvblk->data);
997 		g_free(xvblk);
998 	}
999 	while (!LIST_EMPTY(&db->vblks)) {
1000 		vblk = LIST_FIRST(&db->vblks);
1001 		LIST_REMOVE(vblk, entry);
1002 		g_free(vblk);
1003 	}
1004 }
1005 
1006 static int
1007 ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
1008 {
1009 	struct g_provider *pp;
1010 	struct ldm_vblk *vblk;
1011 	struct ldm_xvblk *xvblk;
1012 	struct ldm_volume *volume;
1013 	struct ldm_component *comp;
1014 	struct ldm_vblkhdr vh;
1015 	u_char *buf, *p;
1016 	size_t size, n, sectors;
1017 	uint64_t offset;
1018 	int error;
1019 
1020 	pp = cp->provider;
1021 	size = howmany(db->dh.last_seq * db->dh.size, pp->sectorsize);
1022 	size -= 1; /* one sector takes vmdb header */
1023 	for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) {
1024 		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
1025 		sectors = (size - n) > (MAXPHYS / pp->sectorsize) ?
1026 		    MAXPHYS / pp->sectorsize: size - n;
1027 		/* read VBLKs */
1028 		buf = g_read_data(cp, offset * pp->sectorsize,
1029 		    sectors * pp->sectorsize, &error);
1030 		if (buf == NULL) {
1031 			LDM_DEBUG(0, "%s: failed to read VBLK\n",
1032 			    pp->name);
1033 			goto fail;
1034 		}
1035 		for (p = buf; p < buf + sectors * pp->sectorsize;
1036 		    p += db->dh.size) {
1037 			if (memcmp(p, LDM_VBLK_SIGN,
1038 			    strlen(LDM_VBLK_SIGN)) != 0) {
1039 				LDM_DEBUG(0, "%s: no VBLK signature\n",
1040 				    pp->name);
1041 				LDM_DUMP(p, db->dh.size);
1042 				goto fail;
1043 			}
1044 			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
1045 			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
1046 			/* skip empty blocks */
1047 			if (vh.seq == 0 || vh.group == 0)
1048 				continue;
1049 			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
1050 			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
1051 			if (vh.count == 0 || vh.count > 4 ||
1052 			    vh.seq > db->dh.last_seq) {
1053 				LDM_DEBUG(0, "%s: invalid values "
1054 				    "in the VBLK header\n", pp->name);
1055 				LDM_DUMP(p, db->dh.size);
1056 				goto fail;
1057 			}
1058 			if (vh.count > 1) {
1059 				error = ldm_xvblk_handle(db, &vh, p);
1060 				if (error != 0) {
1061 					LDM_DEBUG(0, "%s: xVBLK "
1062 					    "is corrupted\n", pp->name);
1063 					LDM_DUMP(p, db->dh.size);
1064 					goto fail;
1065 				}
1066 				continue;
1067 			}
1068 			if (be16dec(p + 16) != 0)
1069 				LDM_DEBUG(1, "%s: VBLK update"
1070 				    " status is %u\n", pp->name,
1071 				    be16dec(p + 16));
1072 			error = ldm_vblk_handle(db, p, db->dh.size);
1073 			if (error != 0)
1074 				goto fail;
1075 		}
1076 		g_free(buf);
1077 		buf = NULL;
1078 	}
1079 	/* Parse xVBLKs */
1080 	while (!LIST_EMPTY(&db->xvblks)) {
1081 		xvblk = LIST_FIRST(&db->xvblks);
1082 		if (xvblk->map == 0xFF) {
1083 			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
1084 			if (error != 0)
1085 				goto fail;
1086 		} else {
1087 			LDM_DEBUG(0, "%s: incomplete or corrupt "
1088 			    "xVBLK found\n", pp->name);
1089 			goto fail;
1090 		}
1091 		LIST_REMOVE(xvblk, entry);
1092 		g_free(xvblk->data);
1093 		g_free(xvblk);
1094 	}
1095 	/* construct all VBLKs relations */
1096 	LIST_FOREACH(volume, &db->volumes, entry) {
1097 		LIST_FOREACH(vblk, &db->vblks, entry)
1098 			if (vblk->type == LDM_VBLK_T_COMPONENT &&
1099 			    vblk->u.comp.vol_id == volume->id) {
1100 				LIST_INSERT_HEAD(&volume->components,
1101 				    &vblk->u.comp, entry);
1102 				volume->count++;
1103 			}
1104 		LIST_FOREACH(comp, &volume->components, entry)
1105 			LIST_FOREACH(vblk, &db->vblks, entry)
1106 				if (vblk->type == LDM_VBLK_T_PARTITION &&
1107 				    vblk->u.part.comp_id == comp->id) {
1108 					LIST_INSERT_HEAD(&comp->partitions,
1109 					    &vblk->u.part, entry);
1110 					comp->count++;
1111 				}
1112 	}
1113 	return (0);
1114 fail:
1115 	ldm_vmdb_free(db);
1116 	g_free(buf);
1117 	return (ENXIO);
1118 }
1119 
1120 static int
1121 g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
1122     struct g_part_parms *gpp)
1123 {
1124 
1125 	return (ENOSYS);
1126 }
1127 
1128 static int
1129 g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
1130 {
1131 
1132 	return (ENOSYS);
1133 }
1134 
1135 static int
1136 g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
1137 {
1138 
1139 	return (ENOSYS);
1140 }
1141 
1142 static int
1143 g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
1144 {
1145 	struct g_part_ldm_table *table;
1146 	struct g_provider *pp;
1147 
1148 	table = (struct g_part_ldm_table *)basetable;
1149 	/*
1150 	 * To destroy LDM on a disk partitioned with GPT we should delete
1151 	 * ms-ldm-metadata partition, but we can't do this via standard
1152 	 * GEOM_PART method.
1153 	 */
1154 	if (table->is_gpt)
1155 		return (ENOSYS);
1156 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1157 	/*
1158 	 * To destroy LDM we should wipe MBR, first private header and
1159 	 * backup private headers.
1160 	 */
1161 	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
1162 	/*
1163 	 * Don't touch last backup private header when LDM database is
1164 	 * not located in the last 1MByte area.
1165 	 * XXX: can't remove all blocks.
1166 	 */
1167 	if (table->db_offset + LDM_DB_SIZE ==
1168 	    pp->mediasize / pp->sectorsize)
1169 		basetable->gpt_smtail = 1;
1170 	return (0);
1171 }
1172 
1173 static void
1174 g_part_ldm_dumpconf(struct g_part_table *basetable,
1175     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
1176 {
1177 	struct g_part_ldm_entry *entry;
1178 
1179 	entry = (struct g_part_ldm_entry *)baseentry;
1180 	if (indent == NULL) {
1181 		/* conftxt: libdisk compatibility */
1182 		sbuf_printf(sb, " xs LDM xt %u", entry->type);
1183 	} else if (entry != NULL) {
1184 		/* confxml: partition entry information */
1185 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
1186 		    entry->type);
1187 	} else {
1188 		/* confxml: scheme information */
1189 	}
1190 }
1191 
1192 static int
1193 g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
1194 {
1195 
1196 	return (0);
1197 }
1198 
1199 static int
1200 g_part_ldm_modify(struct g_part_table *basetable,
1201     struct g_part_entry *baseentry, struct g_part_parms *gpp)
1202 {
1203 
1204 	return (ENOSYS);
1205 }
1206 
1207 static const char *
1208 g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
1209     char *buf, size_t bufsz)
1210 {
1211 
1212 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
1213 	return (buf);
1214 }
1215 
1216 static int
1217 ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
1218 {
1219 	struct g_part_ldm_table *table;
1220 	struct g_part_table *gpt;
1221 	struct g_part_entry *entry;
1222 	struct g_consumer *cp2;
1223 	struct gpt_ent *part;
1224 	u_char *buf;
1225 	int error;
1226 
1227 	/*
1228 	 * XXX: We use some knowledge about GEOM_PART_GPT internal
1229 	 * structures, but it is easier than parse GPT by himself.
1230 	 */
1231 	g_topology_lock();
1232 	gpt = cp->provider->geom->softc;
1233 	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
1234 		part = (struct gpt_ent *)(entry + 1);
1235 		/* Search ms-ldm-metadata partition */
1236 		if (memcmp(&part->ent_type,
1237 		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
1238 		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
1239 			continue;
1240 
1241 		/* Create new consumer and attach it to metadata partition */
1242 		cp2 = g_new_consumer(cp->geom);
1243 		error = g_attach(cp2, entry->gpe_pp);
1244 		if (error != 0) {
1245 			g_destroy_consumer(cp2);
1246 			g_topology_unlock();
1247 			return (ENXIO);
1248 		}
1249 		error = g_access(cp2, 1, 0, 0);
1250 		if (error != 0) {
1251 			g_detach(cp2);
1252 			g_destroy_consumer(cp2);
1253 			g_topology_unlock();
1254 			return (ENXIO);
1255 		}
1256 		g_topology_unlock();
1257 
1258 		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
1259 		    cp->provider->name, cp2->provider->name);
1260 		/* Read the LDM private header */
1261 		buf = ldm_privhdr_read(cp2,
1262 		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
1263 		    &error);
1264 		if (buf != NULL) {
1265 			table = (struct g_part_ldm_table *)basetable;
1266 			table->is_gpt = 1;
1267 			g_free(buf);
1268 			return (G_PART_PROBE_PRI_HIGH);
1269 		}
1270 
1271 		/* second consumer is no longer needed. */
1272 		g_topology_lock();
1273 		g_access(cp2, -1, 0, 0);
1274 		g_detach(cp2);
1275 		g_destroy_consumer(cp2);
1276 		break;
1277 	}
1278 	g_topology_unlock();
1279 	return (ENXIO);
1280 }
1281 
1282 static int
1283 g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
1284 {
1285 	struct g_provider *pp;
1286 	u_char *buf, type[64];
1287 	int error, idx;
1288 
1289 	pp = cp->provider;
1290 	if (pp->sectorsize != 512)
1291 		return (ENXIO);
1292 
1293 	error = g_getattr("PART::scheme", cp, &type);
1294 	if (error == 0 && strcmp(type, "GPT") == 0) {
1295 		if (g_getattr("PART::type", cp, &type) != 0 ||
1296 		    strcmp(type, "ms-ldm-data") != 0)
1297 			return (ENXIO);
1298 		error = ldm_gpt_probe(basetable, cp);
1299 		return (error);
1300 	}
1301 
1302 	if (basetable->gpt_depth != 0)
1303 		return (ENXIO);
1304 
1305 	/* LDM has 1M metadata area */
1306 	if (pp->mediasize <= 1024 * 1024)
1307 		return (ENOSPC);
1308 
1309 	/* Check that there's a MBR */
1310 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
1311 	if (buf == NULL)
1312 		return (error);
1313 
1314 	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
1315 		g_free(buf);
1316 		return (ENXIO);
1317 	}
1318 	error = ENXIO;
1319 	/* Check that we have LDM partitions in the MBR */
1320 	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
1321 		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
1322 			error = 0;
1323 	}
1324 	g_free(buf);
1325 	if (error == 0) {
1326 		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
1327 		    pp->name);
1328 		/* Read the LDM private header */
1329 		buf = ldm_privhdr_read(cp,
1330 		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
1331 		if (buf == NULL)
1332 			return (error);
1333 		g_free(buf);
1334 		return (G_PART_PROBE_PRI_HIGH);
1335 	}
1336 	return (error);
1337 }
1338 
1339 static int
1340 g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
1341 {
1342 	struct g_part_ldm_table *table;
1343 	struct g_part_ldm_entry *entry;
1344 	struct g_consumer *cp2;
1345 	struct ldm_component *comp;
1346 	struct ldm_partition *part;
1347 	struct ldm_volume *vol;
1348 	struct ldm_disk *disk;
1349 	struct ldm_db db;
1350 	int error, index, skipped;
1351 
1352 	table = (struct g_part_ldm_table *)basetable;
1353 	memset(&db, 0, sizeof(db));
1354 	cp2 = cp;					/* ms-ldm-data */
1355 	if (table->is_gpt)
1356 		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
1357 	/* Read and parse LDM private headers. */
1358 	error = ldm_privhdr_check(&db, cp, table->is_gpt);
1359 	if (error != 0)
1360 		goto gpt_cleanup;
1361 	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
1362 	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
1363 	table->db_offset = db.ph.db_offset;
1364 	/* Make additional checks for GPT */
1365 	if (table->is_gpt) {
1366 		error = ldm_gpt_check(&db, cp);
1367 		if (error != 0)
1368 			goto gpt_cleanup;
1369 		/*
1370 		 * Now we should reset database offset to zero, because our
1371 		 * consumer cp is attached to the ms-ldm-metadata partition
1372 		 * and we don't need add db_offset to read from it.
1373 		 */
1374 		db.ph.db_offset = 0;
1375 	}
1376 	/* Read and parse LDM TOC headers. */
1377 	error = ldm_tochdr_check(&db, cp);
1378 	if (error != 0)
1379 		goto gpt_cleanup;
1380 	/* Read and parse LDM VMDB header. */
1381 	error = ldm_vmdbhdr_check(&db, cp);
1382 	if (error != 0)
1383 		goto gpt_cleanup;
1384 	error = ldm_vmdb_parse(&db, cp);
1385 	/*
1386 	 * For the GPT case we must detach and destroy
1387 	 * second consumer before return.
1388 	 */
1389 gpt_cleanup:
1390 	if (table->is_gpt) {
1391 		g_topology_lock();
1392 		g_access(cp, -1, 0, 0);
1393 		g_detach(cp);
1394 		g_destroy_consumer(cp);
1395 		g_topology_unlock();
1396 		cp = cp2;
1397 	}
1398 	if (error != 0)
1399 		return (error);
1400 	/* Search current disk in the disk list. */
1401 	LIST_FOREACH(disk, &db.disks, entry)
1402 	    if (memcmp(&disk->guid, &db.ph.disk_guid,
1403 		sizeof(struct uuid)) == 0)
1404 		    break;
1405 	if (disk == NULL) {
1406 		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
1407 		    cp->provider->name);
1408 		ldm_vmdb_free(&db);
1409 		return (ENXIO);
1410 	}
1411 	index = 1;
1412 	LIST_FOREACH(vol, &db.volumes, entry) {
1413 		LIST_FOREACH(comp, &vol->components, entry) {
1414 			/* Skip volumes from different disks. */
1415 			part = LIST_FIRST(&comp->partitions);
1416 			if (part->disk_id != disk->id)
1417 				continue;
1418 			skipped = 0;
1419 			/* We don't support spanned and striped volumes. */
1420 			if (comp->count > 1 || part->offset != 0) {
1421 				LDM_DEBUG(1, "%s: LDM volume component "
1422 				    "%ju has %u partitions. Skipped",
1423 				    cp->provider->name, (uintmax_t)comp->id,
1424 				    comp->count);
1425 				skipped = 1;
1426 			}
1427 			/*
1428 			 * Allow mirrored volumes only when they are explicitly
1429 			 * allowed with kern.geom.part.ldm.show_mirrors=1.
1430 			 */
1431 			if (vol->count > 1 && show_mirrors == 0) {
1432 				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
1433 				    "components. Skipped",
1434 				    cp->provider->name, (uintmax_t)vol->id,
1435 				    vol->count);
1436 				skipped = 1;
1437 			}
1438 			entry = (struct g_part_ldm_entry *)g_part_new_entry(
1439 			    basetable, index++,
1440 			    basetable->gpt_first + part->start,
1441 			    basetable->gpt_first + part->start +
1442 			    part->size - 1);
1443 			/*
1444 			 * Mark skipped partition as ms-ldm-data partition.
1445 			 * We do not support them, but it is better to show
1446 			 * that we have something there, than just show
1447 			 * free space.
1448 			 */
1449 			if (skipped == 0)
1450 				entry->type = vol->part_type;
1451 			else
1452 				entry->type = DOSPTYP_LDM;
1453 			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
1454 			    " end: %ju, type: 0x%02x\n", cp->provider->name,
1455 			    (uintmax_t)part->id,(uintmax_t)part->start +
1456 			    basetable->gpt_first, (uintmax_t)part->start +
1457 			    part->size + basetable->gpt_first - 1,
1458 			    vol->part_type);
1459 		}
1460 	}
1461 	ldm_vmdb_free(&db);
1462 	return (error);
1463 }
1464 
1465 static const char *
1466 g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
1467     char *buf, size_t bufsz)
1468 {
1469 	struct g_part_ldm_entry *entry;
1470 	int i;
1471 
1472 	entry = (struct g_part_ldm_entry *)baseentry;
1473 	for (i = 0; i < nitems(ldm_alias_match); i++) {
1474 		if (ldm_alias_match[i].typ == entry->type)
1475 			return (g_part_alias_name(ldm_alias_match[i].alias));
1476 	}
1477 	snprintf(buf, bufsz, "!%d", entry->type);
1478 	return (buf);
1479 }
1480 
1481 static int
1482 g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
1483 {
1484 
1485 	return (ENOSYS);
1486 }
1487