xref: /openbsd/usr.sbin/ldomctl/config.c (revision e5dd7070)
1 /*	$OpenBSD: config.c,v 1.41 2020/06/29 18:25:26 kn Exp $	*/
2 
3 /*
4  * Copyright (c) 2012, 2018 Mark Kettenis
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/queue.h>
21 #include <assert.h>
22 #include <err.h>
23 #include <stdarg.h>
24 #include <stdbool.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mdesc.h"
30 #include "ldomctl.h"
31 #include "ldom_util.h"
32 
33 #define LDC_GUEST	0
34 #define LDC_HV		1
35 #define LDC_SP		2
36 
37 #define LDC_HVCTL_SVC	1
38 #define LDC_CONSOLE_SVC	2
39 
40 #define MAX_STRANDS_PER_CORE	16
41 
42 struct core {
43 	struct guest *guests[MAX_STRANDS_PER_CORE];
44 	TAILQ_ENTRY(core) link;
45 };
46 
47 TAILQ_HEAD(, core) cores;
48 
49 struct component {
50 	const char *path;
51 	const char *nac;
52 	int assigned;
53 
54 	struct md_node *hv_node;
55 	TAILQ_ENTRY(component) link;
56 };
57 
58 TAILQ_HEAD(, component) components;
59 
60 struct hostbridge {
61 	const char *path;
62 
63 	uint64_t num_msi_eqs;
64 	uint64_t num_msis;
65 	uint64_t max_vpcis;
66 	TAILQ_ENTRY(hostbridge) link;
67 };
68 
69 TAILQ_HEAD(, hostbridge) hostbridges;
70 
71 struct frag {
72 	TAILQ_ENTRY(frag) link;
73 	uint64_t base;
74 };
75 
76 struct guest **guests;
77 struct console **consoles;
78 struct cpu **cpus;
79 struct device **pcie_busses;
80 struct device **network_devices;
81 struct mblock **mblocks;
82 struct ldc_endpoint **ldc_endpoints;
83 
84 TAILQ_HEAD(, rootcomplex) rootcomplexes;
85 
86 uint64_t max_cpus;
87 bool have_cwqs;
88 bool have_rngs;
89 
90 uint64_t max_guests;
91 uint64_t max_hv_ldcs;
92 uint64_t max_guest_ldcs;
93 uint64_t md_maxsize;
94 uint64_t md_elbow_room;
95 uint64_t max_mblocks;
96 uint64_t directio_capability;
97 
98 uint64_t max_devices = 16;
99 
100 uint64_t rombase;
101 uint64_t romsize;
102 uint64_t uartbase;
103 
104 uint64_t max_page_size;
105 
106 uint64_t content_version;
107 uint64_t stick_frequency;
108 uint64_t tod_frequency;
109 uint64_t tod;
110 uint64_t erpt_pa;
111 uint64_t erpt_size;
112 
113 struct md *pri;
114 struct md *hvmd;
115 struct md *protomd;
116 
117 struct guest *guest_lookup(const char *);
118 void guest_prune_phys_io(struct guest *);
119 void guest_prune_pcie(struct guest *, struct md_node *, const char *);
120 void guest_add_vpcie(struct guest *, uint64_t);
121 void guest_fixup_phys_io(struct guest *);
122 
123 TAILQ_HEAD(, frag) free_frags = TAILQ_HEAD_INITIALIZER(free_frags);
124 TAILQ_HEAD(, cpu) free_cpus = TAILQ_HEAD_INITIALIZER(free_cpus);
125 int total_cpus;
126 TAILQ_HEAD(, mblock) free_memory = TAILQ_HEAD_INITIALIZER(free_memory);
127 uint64_t total_memory;
128 
129 struct cpu *
130 pri_find_cpu(uint64_t pid)
131 {
132 	struct cpu *cpu = NULL;
133 
134 	TAILQ_FOREACH(cpu, &free_cpus, link) {
135 		if (cpu->pid == pid)
136 			break;
137 	}
138 
139 	return cpu;
140 }
141 
142 void
143 pri_link_core(struct md *md, struct md_node *node, struct core *core)
144 {
145 	struct md_node *node2;
146 	struct md_prop *prop;
147 	struct cpu *cpu;
148 	uint64_t pid;
149 
150 	TAILQ_FOREACH(prop, &node->prop_list, link) {
151 		if (prop->tag == MD_PROP_ARC &&
152 		    strcmp(prop->name->str, "back") == 0) {
153 			node2 = prop->d.arc.node;
154 			if (strcmp(node2->name->str, "cpu") != 0) {
155 				pri_link_core(md, node2, core);
156 				continue;
157 			}
158 
159 			pid = -1;
160 			if (!md_get_prop_val(md, node2, "pid", &pid))
161 				md_get_prop_val(md, node2, "id", &pid);
162 
163 			cpu = pri_find_cpu(pid);
164 			if (cpu == NULL)
165 				errx(1, "couldn't determine core for VCPU %lld\n", pid);
166 			cpu->core = core;
167 		}
168 	}
169 }
170 
171 void
172 pri_add_core(struct md *md, struct md_node *node)
173 {
174 	struct core *core;
175 
176 	core = xzalloc(sizeof(*core));
177 	TAILQ_INSERT_TAIL(&cores, core, link);
178 
179 	pri_link_core(md, node, core);
180 }
181 
182 void
183 pri_init_cores(struct md *md)
184 {
185 	struct md_node *node;
186 	const void *type;
187 	size_t len;
188 
189 	TAILQ_INIT(&cores);
190 
191 	TAILQ_FOREACH(node, &md->node_list, link) {
192 		if (strcmp(node->name->str, "tlb") == 0 &&
193 		    md_get_prop_data(md, node, "type", &type, &len) &&
194 		    strcmp(type, "data") == 0) {
195 			pri_add_core(md, node);
196 		}
197 	}
198 }
199 
200 void
201 pri_add_hostbridge(struct md *md, struct md_node *node)
202 {
203 	struct hostbridge *hostbridge;
204 
205 	hostbridge = xzalloc(sizeof(*hostbridge));
206 	md_get_prop_str(md, node, "path", &hostbridge->path);
207 	md_get_prop_val(md, node, "#msi-eqs", &hostbridge->num_msi_eqs);
208 	md_get_prop_val(md, node, "#msi", &hostbridge->num_msis);
209 	if (!md_get_prop_val(md, node, "#max-vpcis", &hostbridge->max_vpcis))
210 		hostbridge->max_vpcis = 10;
211 	TAILQ_INSERT_TAIL(&hostbridges, hostbridge, link);
212 }
213 
214 void
215 pri_init_components(struct md *md)
216 {
217 	struct component *component;
218 	struct md_node *node;
219 	const char *path;
220 	const char *nac;
221 	const char *type;
222 
223 	TAILQ_INIT(&components);
224 	TAILQ_INIT(&hostbridges);
225 
226 	TAILQ_FOREACH(node, &md->node_list, link) {
227 		if (strcmp(node->name->str, "component") != 0)
228 			continue;
229 
230 		if (md_get_prop_str(md, node, "assignable-path", &path)) {
231 			component = xzalloc(sizeof(*component));
232 			component->path = path;
233 			if (md_get_prop_str(md, node, "nac", &nac))
234 				component->nac = nac;
235 			else
236 				component->nac = "-";
237 			TAILQ_INSERT_TAIL(&components, component, link);
238 		}
239 
240 		if (md_get_prop_str(md, node, "type", &type) &&
241 		    strcmp(type, "hostbridge") == 0)
242 			pri_add_hostbridge(md, node);
243 	}
244 }
245 
246 void
247 pri_init_phys_io(struct md *md)
248 {
249 	struct md_node *node;
250 	const char *device_type;
251 	uint64_t cfg_handle;
252 	struct rootcomplex *rootcomplex;
253 	char *path;
254 	size_t len;
255 
256 	TAILQ_INIT(&rootcomplexes);
257 
258 	TAILQ_FOREACH(node, &md->node_list, link) {
259 		if (strcmp(node->name->str, "iodevice") == 0 &&
260 		    md_get_prop_str(md, node, "device-type", &device_type) &&
261 		    strcmp(device_type, "pciex") == 0) {
262 			if (!md_get_prop_val(md, node, "cfg-handle",
263 					     &cfg_handle))
264 				continue;
265 
266 			rootcomplex = xzalloc(sizeof(*rootcomplex));
267 			md_get_prop_val(md, node, "#msi-eqs",
268 			    &rootcomplex->num_msi_eqs);
269 			md_get_prop_val(md, node, "#msi",
270 			    &rootcomplex->num_msis);
271 			md_get_prop_data(md, node, "msi-ranges",
272 			    &rootcomplex->msi_ranges, &len);
273 			rootcomplex->num_msi_ranges =
274 			    len / (2 * sizeof(uint64_t));
275 			md_get_prop_data(md, node, "virtual-dma",
276 			    &rootcomplex->vdma_ranges, &len);
277 			rootcomplex->num_vdma_ranges =
278 			    len / (2 * sizeof(uint64_t));
279 			rootcomplex->cfghandle = cfg_handle;
280 			xasprintf(&path, "/@%llx", cfg_handle);
281 			rootcomplex->path = path;
282 			TAILQ_INSERT_TAIL(&rootcomplexes, rootcomplex, link);
283 		}
284 	}
285 }
286 
287 void
288 pri_add_cpu(struct md *md, struct md_node *node)
289 {
290 	struct cpu *cpu;
291 	uint64_t mmu_page_size_list;
292 	uint64_t page_size;
293 
294 	cpu = xzalloc(sizeof(*cpu));
295 	/*
296 	 * Only UltraSPARC T1 CPUs have a "pid" property.  All other
297 	 * just have a "id" property that can be used as the physical ID.
298 	 */
299 	if (!md_get_prop_val(md, node, "pid", &cpu->pid))
300 		md_get_prop_val(md, node, "id", &cpu->pid);
301 	cpu->vid = -1;
302 	cpu->gid = -1;
303 	cpu->partid = -1;
304 	cpu->resource_id = -1;
305 	TAILQ_INSERT_TAIL(&free_cpus, cpu, link);
306 	total_cpus++;
307 
308 	mmu_page_size_list = 0x9;
309 	md_get_prop_val(md, node, "mmu-page-size-list", &mmu_page_size_list);
310 
311 	page_size = 1024;
312 	while (mmu_page_size_list) {
313 		page_size *= 8;
314 		mmu_page_size_list >>= 1;
315 	}
316 
317 	if (page_size > max_page_size)
318 		max_page_size = page_size;
319 }
320 
321 struct cpu *
322 pri_alloc_cpu(uint64_t pid)
323 {
324 	struct cpu *cpu;
325 
326 	if (pid == -1 && !TAILQ_EMPTY(&free_cpus)) {
327 		cpu = TAILQ_FIRST(&free_cpus);
328 		TAILQ_REMOVE(&free_cpus, cpu, link);
329 		return cpu;
330 	}
331 
332 	TAILQ_FOREACH(cpu, &free_cpus, link) {
333 		if (cpu->pid == pid) {
334 			TAILQ_REMOVE(&free_cpus, cpu, link);
335 			return cpu;
336 		}
337 	}
338 
339 	return NULL;
340 }
341 
342 void
343 pri_free_cpu(struct cpu *cpu)
344 {
345 	TAILQ_INSERT_TAIL(&free_cpus, cpu, link);
346 }
347 
348 void
349 pri_add_mblock(struct md *md, struct md_node *node)
350 {
351 	struct mblock *mblock;
352 
353 	mblock = xzalloc(sizeof(*mblock));
354 	md_get_prop_val(md, node, "base", &mblock->membase);
355 	md_get_prop_val(md, node, "size", &mblock->memsize);
356 	mblock->resource_id = -1;
357 	TAILQ_INSERT_TAIL(&free_memory, mblock, link);
358 	total_memory += mblock->memsize;
359 }
360 
361 struct mblock *
362 pri_alloc_memory(uint64_t base, uint64_t size)
363 {
364 	struct mblock *mblock, *new_mblock;
365 	uint64_t memend;
366 
367 	if (base == -1 && !TAILQ_EMPTY(&free_memory)) {
368 		mblock = TAILQ_FIRST(&free_memory);
369 		base = mblock->membase;
370 	}
371 
372 	TAILQ_FOREACH(mblock, &free_memory, link) {
373 		if (base >= mblock->membase &&
374 		    base < mblock->membase + mblock->memsize) {
375 			if (base > mblock->membase) {
376 				new_mblock = xzalloc(sizeof(*new_mblock));
377 				new_mblock->membase = mblock->membase;
378 				new_mblock->memsize = base - mblock->membase;
379 				new_mblock->resource_id = -1;
380 				TAILQ_INSERT_BEFORE(mblock, new_mblock, link);
381 			}
382 
383 			memend = mblock->membase + mblock->memsize;
384 			mblock->membase = base + size;
385 			mblock->memsize = memend - mblock->membase;
386 			if (mblock->memsize == 0) {
387 				TAILQ_REMOVE(&free_memory, mblock, link);
388 				free(mblock);
389 			}
390 
391 			total_memory -= size;
392 
393 			new_mblock = xzalloc(sizeof(*new_mblock));
394 			new_mblock->membase = base;
395 			new_mblock->memsize = size;
396 			new_mblock->resource_id = -1;
397 			return new_mblock;
398 		}
399 	}
400 
401 	return NULL;
402 }
403 
404 void
405 pri_delete_devalias(struct md *md)
406 {
407 	struct md_node *node;
408 
409 	/*
410 	 * There may be multiple "devalias" nodes.  Only remove the one
411 	 * that resides under the "openboot" node.
412 	 */
413 	node = md_find_node(protomd, "openboot");
414 	assert(node);
415 	node = md_find_subnode(protomd, node, "devalias");
416 	if (node)
417 		md_delete_node(protomd, node);
418 }
419 
420 void
421 pri_init(struct md *md)
422 {
423 	struct md_node *node, *node2;
424 	struct md_prop *prop;
425 	uint64_t base, size;
426 	uint64_t offset, guest_use;
427 
428 	node = md_find_node(pri, "platform");
429 	if (node == NULL)
430 		errx(1, "platform node not found");
431 
432 	md_get_prop_val(md, node, "max-cpus", &max_cpus);
433 
434 	node = md_find_node(pri, "firmware");
435 	if (node == NULL)
436 		errx(1, "firmware node not found");
437 
438 	md_get_prop_val(md, node, "max_guests", &max_guests);
439 	md_get_prop_val(md, node, "max_hv_ldcs", &max_hv_ldcs);
440 	md_get_prop_val(md, node, "max_guest_ldcs", &max_guest_ldcs);
441 	md_get_prop_val(md, node, "md_elbow_room", &md_elbow_room);
442 	md_get_prop_val(md, node, "max_mblocks", &max_mblocks);
443 	md_get_prop_val(md, node, "directio_capability", &directio_capability);
444 
445 	node = md_find_node(md, "read_only_memory");
446 	if (node == NULL)
447 		errx(1, "read_only_memory node not found");
448 	if (!md_get_prop_val(md, node, "base", &base))
449 		errx(1, "missing base property in read_only_memory node");
450 	if (!md_get_prop_val(md, node, "size", &size))
451 		errx(1, "missing size property in read_only_memory node");
452 	TAILQ_FOREACH(prop, &node->prop_list, link) {
453 		if (prop->tag == MD_PROP_ARC &&
454 		    strcmp(prop->name->str, "fwd") == 0) {
455 			node2 = prop->d.arc.node;
456 			if (!md_get_prop_val(md, node2, "guest_use",
457 			    &guest_use) || guest_use == 0)
458 				continue;
459 			if (!md_get_prop_val(md, node2, "offset", &offset) ||
460 			    !md_get_prop_val(md, node2, "size", &size))
461 				continue;
462 			rombase = base + offset;
463 			romsize = size;
464 		}
465 	}
466 	if (romsize == 0)
467 		errx(1, "no suitable firmware image found");
468 
469 	node = md_find_node(md, "platform");
470 	assert(node);
471 	md_set_prop_val(md, node, "domaining-enabled", 0x1);
472 
473 	md_write(md, "pri");
474 
475 	protomd = md_copy(md);
476 	md_find_delete_node(protomd, "components");
477 	md_find_delete_node(protomd, "domain-services");
478 	md_find_delete_node(protomd, "channel-devices");
479 	md_find_delete_node(protomd, "channel-endpoints");
480 	md_find_delete_node(protomd, "firmware");
481 	md_find_delete_node(protomd, "ldc_endpoints");
482 	md_find_delete_node(protomd, "memory-segments");
483 	pri_delete_devalias(protomd);
484 	md_collect_garbage(protomd);
485 	md_write(protomd, "protomd");
486 
487 	guests = xzalloc(max_guests * sizeof(*guests));
488 	consoles = xzalloc(max_guests * sizeof(*consoles));
489 	cpus = xzalloc(max_cpus * sizeof(*cpus));
490 	pcie_busses = xzalloc(max_devices * sizeof(*pcie_busses));
491 	network_devices = xzalloc(max_devices * sizeof(*network_devices));
492 	mblocks = xzalloc(max_mblocks * sizeof(*mblocks));
493 	ldc_endpoints = xzalloc(max_guest_ldcs * sizeof(*ldc_endpoints));
494 
495 	node = md_find_node(md, "cpus");
496 	TAILQ_FOREACH(prop, &node->prop_list, link) {
497 		if (prop->tag == MD_PROP_ARC &&
498 		    strcmp(prop->name->str, "fwd") == 0)
499 			pri_add_cpu(md, prop->d.arc.node);
500 	}
501 
502 	node = md_find_node(md, "memory");
503 	TAILQ_FOREACH(prop, &node->prop_list, link) {
504 		if (prop->tag == MD_PROP_ARC &&
505 		    strcmp(prop->name->str, "fwd") == 0)
506 			pri_add_mblock(md, prop->d.arc.node);
507 	}
508 
509 	pri_init_cores(md);
510 	pri_init_components(md);
511 	pri_init_phys_io(md);
512 }
513 
514 void
515 hvmd_fixup_guest(struct md *md, struct md_node *guest, struct md_node *node)
516 {
517 	struct md_prop *prop;
518 
519 	TAILQ_FOREACH(prop, &guest->prop_list, link) {
520 		if (prop->tag == MD_PROP_ARC &&
521 		    strcmp(prop->name->str, "fwd") == 0) {
522 			if (prop->d.arc.node == node)
523 				return;
524 		}
525 	}
526 
527 	md_add_prop_arc(md, guest, "fwd", node);
528 }
529 
530 uint64_t fragsize;
531 TAILQ_HEAD(, mblock) frag_mblocks;
532 struct mblock *hvmd_mblock;
533 
534 void
535 hvmd_init_frag(struct md *md, struct md_node *node)
536 {
537 	struct frag *frag;
538 	struct mblock *mblock;
539 	uint64_t base, size;
540 
541 	md_get_prop_val(md, node, "base", &base);
542 	md_get_prop_val(md, node, "size", &size);
543 
544 	pri_alloc_memory(base, size);
545 
546 	mblock = xzalloc(sizeof(*mblock));
547 	mblock->membase = base;
548 	mblock->memsize = size;
549 	TAILQ_INSERT_TAIL(&frag_mblocks, mblock, link);
550 
551 	while (size > fragsize) {
552 		frag = xmalloc(sizeof(*frag));
553 		frag->base = base;
554 		TAILQ_INSERT_TAIL(&free_frags, frag, link);
555 		base += fragsize;
556 		size -= fragsize;
557 	}
558 }
559 
560 uint64_t
561 hvmd_alloc_frag(uint64_t base)
562 {
563 	struct frag *frag = TAILQ_FIRST(&free_frags);
564 
565 	if (base != -1) {
566 		TAILQ_FOREACH(frag, &free_frags, link) {
567 			if (frag->base == base)
568 				break;
569 		}
570 	}
571 
572 	if (frag == NULL)
573 		return -1;
574 
575 	TAILQ_REMOVE(&free_frags, frag, link);
576 	base = frag->base;
577 	free(frag);
578 
579 	return base;
580 }
581 
582 void
583 hvmd_free_frag(uint64_t base)
584 {
585 	struct frag *frag;
586 
587 	frag = xmalloc(sizeof(*frag));
588 	frag->base = base;
589 	TAILQ_INSERT_TAIL(&free_frags, frag, link);
590 }
591 
592 void
593 hvmd_init_mblock(struct md *md, struct md_node *node)
594 {
595 	struct mblock *mblock;
596 	uint64_t resource_id;
597 	struct md_node *node2;
598 	struct md_prop *prop;
599 
600 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
601 		errx(1, "missing resource_id property in mblock node");
602 
603 	if (resource_id >= max_mblocks)
604 		errx(1, "resource_id larger than max_mblocks");
605 
606 	mblock = xzalloc(sizeof(*mblock));
607 	md_get_prop_val(md, node, "membase", &mblock->membase);
608 	md_get_prop_val(md, node, "memsize", &mblock->memsize);
609 	md_get_prop_val(md, node, "realbase", &mblock->realbase);
610 	mblock->resource_id = resource_id;
611 	mblocks[resource_id] = mblock;
612 	mblock->hv_node = node;
613 
614 	/* Fixup missing links. */
615 	TAILQ_FOREACH(prop, &node->prop_list, link) {
616 		if (prop->tag == MD_PROP_ARC &&
617 		    strcmp(prop->name->str, "back") == 0) {
618 			node2 = prop->d.arc.node;
619 			if (strcmp(node2->name->str, "guest") == 0)
620 				hvmd_fixup_guest(md, node2, node);
621 		}
622 	}
623 }
624 
625 void
626 hvmd_init_console(struct md *md, struct md_node *node)
627 {
628 	struct console *console;
629 	uint64_t resource_id;
630 
631 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
632 		errx(1, "missing resource_id property in console node");
633 
634 	if (resource_id >= max_guests)
635 		errx(1, "resource_id larger than max_guests");
636 
637 	console = xzalloc(sizeof(*console));
638 	md_get_prop_val(md, node, "ino", &console->ino);
639 	md_get_prop_val(md, node, "uartbase", &console->uartbase);
640 	console->resource_id = resource_id;
641 	consoles[resource_id] = console;
642 	console->hv_node = node;
643 }
644 
645 void
646 hvmd_init_cpu(struct md *md, struct md_node *node)
647 {
648 	struct cpu *cpu;
649 	uint64_t pid;
650 	uint64_t resource_id;
651 	struct md_node *node2;
652 	struct md_prop *prop;
653 
654 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
655 		errx(1, "missing resource_id property in cpu node");
656 
657 	if (resource_id >= max_cpus)
658 		errx(1, "resource_id larger than max-cpus");
659 
660 	if (!md_get_prop_val(md, node, "pid", &pid))
661 		errx(1, "missing pid property in cpu node");
662 
663 	cpu = pri_alloc_cpu(pid);
664 	md_get_prop_val(md, node, "vid", &cpu->vid);
665 	if (!md_get_prop_val(md, node, "gid", &cpu->gid))
666 		cpu->gid = 0;
667 	md_get_prop_val(md, node, "partid", &cpu->partid);
668 	cpu->resource_id = resource_id;
669 	cpus[resource_id] = cpu;
670 	cpu->hv_node = node;
671 
672 	/* Fixup missing links. */
673 	TAILQ_FOREACH(prop, &node->prop_list, link) {
674 		if (prop->tag == MD_PROP_ARC &&
675 		    strcmp(prop->name->str, "back") == 0) {
676 			node2 = prop->d.arc.node;
677 			if (strcmp(node2->name->str, "guest") == 0)
678 				hvmd_fixup_guest(md, node2, node);
679 		}
680 	}
681 }
682 
683 void
684 hvmd_init_device(struct md *md, struct md_node *node)
685 {
686 	struct hostbridge *hostbridge;
687 	struct device *device;
688 	uint64_t resource_id;
689 	struct md_node *node2;
690 	struct md_prop *prop;
691 	char *path;
692 
693 	if (strcmp(node->name->str, "pcie_bus") != 0 &&
694 	    strcmp(node->name->str, "network_device") != 0)
695 		return;
696 
697 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
698 		errx(1, "missing resource_id property in ldc_endpoint node");
699 
700 	if (resource_id >= max_devices)
701 		errx(1, "resource_id larger than max_devices");
702 
703 	device = xzalloc(sizeof(*device));
704 	md_get_prop_val(md, node, "gid", &device->gid);
705 	md_get_prop_val(md, node, "cfghandle", &device->cfghandle);
706 	md_get_prop_val(md, node, "rcid", &device->rcid);
707 	device->resource_id = resource_id;
708 	if (strcmp(node->name->str, "pcie_bus") == 0)
709 		pcie_busses[resource_id] = device;
710 	else
711 		network_devices[resource_id] = device;
712 	device->hv_node = node;
713 
714 	/* Fixup missing links. */
715 	TAILQ_FOREACH(prop, &node->prop_list, link) {
716 		if (prop->tag == MD_PROP_ARC &&
717 		    strcmp(prop->name->str, "back") == 0) {
718 			node2 = prop->d.arc.node;
719 			if (strcmp(node2->name->str, "guest") == 0)
720 				hvmd_fixup_guest(md, node2, node);
721 		}
722 	}
723 
724 	xasprintf(&path, "/@%llx", device->cfghandle);
725 	TAILQ_FOREACH(hostbridge, &hostbridges, link) {
726 		if (strcmp(hostbridge->path, path) == 0)
727 			break;
728 	}
729 	free(path);
730 	if (hostbridge == NULL)
731 		return;
732 
733 	device->msi_eqs_per_vpci =
734 	    hostbridge->num_msi_eqs / hostbridge->max_vpcis;
735 	device->msis_per_vpci =
736 	    hostbridge->num_msis / hostbridge->max_vpcis;
737 	device->msi_base = hostbridge->num_msis;
738 
739 	device->num_msi_eqs = device->msi_eqs_per_vpci +
740 	    hostbridge->num_msi_eqs % hostbridge->max_vpcis;
741 	device->num_msis = device->msis_per_vpci +
742 	    hostbridge->num_msis % hostbridge->max_vpcis;
743 	device->msi_ranges[0] = 0;
744 	device->msi_ranges[1] = device->num_msis;
745 }
746 
747 void
748 hvmd_init_endpoint(struct md *md, struct md_node *node)
749 {
750 	struct ldc_endpoint *endpoint;
751 	uint64_t resource_id;
752 
753 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
754 		errx(1, "missing resource_id property in ldc_endpoint node");
755 
756 	if (resource_id >= max_guest_ldcs)
757 		errx(1, "resource_id larger than max_guest_ldcs");
758 
759 	if (ldc_endpoints[resource_id]) {
760 		/*
761 		 * Some machine descriptions seem to have duplicate
762 		 * arcs.  Fortunately, these can be easily detected
763 		 * and ignored.
764 		 */
765 		if (ldc_endpoints[resource_id]->hv_node == node)
766 			return;
767 		errx(1, "duplicate resource_id");
768 	}
769 
770 	endpoint = xzalloc(sizeof(*endpoint));
771 	endpoint->target_guest = -1;
772 	endpoint->tx_ino = -1;
773 	endpoint->rx_ino = -1;
774 	endpoint->private_svc = -1;
775 	endpoint->svc_id = -1;
776 	md_get_prop_val(md, node, "target_type", &endpoint->target_type);
777 	md_get_prop_val(md, node, "target_guest", &endpoint->target_guest);
778 	md_get_prop_val(md, node, "channel", &endpoint->channel);
779 	md_get_prop_val(md, node, "target_channel", &endpoint->target_channel);
780 	md_get_prop_val(md, node, "tx-ino", &endpoint->tx_ino);
781 	md_get_prop_val(md, node, "rx-ino", &endpoint->rx_ino);
782 	md_get_prop_val(md, node, "private_svc", &endpoint->private_svc);
783 	md_get_prop_val(md, node, "svc_id", &endpoint->svc_id);
784 	endpoint->resource_id = resource_id;
785 	ldc_endpoints[resource_id] = endpoint;
786 	endpoint->hv_node = node;
787 }
788 
789 void
790 hvmd_init_guest(struct md *md, struct md_node *node)
791 {
792 	struct guest *guest;
793 	struct md_node *node2;
794 	struct md_prop *prop;
795 	uint64_t resource_id;
796 	struct ldc_endpoint *endpoint;
797 	char *path;
798 
799 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
800 		errx(1, "missing resource_id property in guest node");
801 
802 	if (resource_id >= max_guests)
803 		errx(1, "resource_id larger than max_guests");
804 
805 	guest = xzalloc(sizeof(*guest));
806 	TAILQ_INIT(&guest->cpu_list);
807 	TAILQ_INIT(&guest->device_list);
808 	TAILQ_INIT(&guest->subdevice_list);
809 	TAILQ_INIT(&guest->mblock_list);
810 	TAILQ_INIT(&guest->endpoint_list);
811 	md_get_prop_str(md, node, "name", &guest->name);
812 	md_get_prop_val(md, node, "gid", &guest->gid);
813 	md_get_prop_val(md, node, "pid", &guest->pid);
814 	md_get_prop_val(md, node, "tod-offset", &guest->tod_offset);
815 	md_get_prop_val(md, node, "perfctraccess", &guest->perfctraccess);
816 	md_get_prop_val(md, node, "perfctrhtaccess", &guest->perfctrhtaccess);
817 	md_get_prop_val(md, node, "rngctlaccessible", &guest->rngctlaccessible);
818 	md_get_prop_val(md, node, "mdpa", &guest->mdpa);
819 	guest->resource_id = resource_id;
820 	guests[resource_id] = guest;
821 	guest->hv_node = node;
822 
823 	if (strcmp(guest->name, "primary") == 0 && guest->gid != 0)
824 		errx(1, "gid of primary domain isn't 0");
825 
826 	hvmd_alloc_frag(guest->mdpa);
827 
828 	TAILQ_FOREACH(prop, &node->prop_list, link) {
829 		if (prop->tag == MD_PROP_ARC &&
830 		    strcmp(prop->name->str, "fwd") == 0) {
831 			node2 = prop->d.arc.node;
832 			if (strcmp(node2->name->str, "console") == 0) {
833 				md_get_prop_val(md, node2, "resource_id",
834 				    &resource_id);
835 				guest->console = consoles[resource_id];
836 				consoles[resource_id]->guest = guest;
837 			}
838 			if (strcmp(node2->name->str, "cpu") == 0) {
839 				md_get_prop_val(md, node2, "resource_id",
840 				    &resource_id);
841 				TAILQ_INSERT_TAIL(&guest->cpu_list,
842 				    cpus[resource_id], link);
843 				cpus[resource_id]->guest = guest;
844 			}
845 			if (strcmp(node2->name->str, "pcie_bus") == 0) {
846 				md_get_prop_val(md, node2, "resource_id",
847 				    &resource_id);
848 				TAILQ_INSERT_TAIL(&guest->device_list,
849 				    pcie_busses[resource_id], link);
850 				pcie_busses[resource_id]->guest = guest;
851 			}
852 			if (strcmp(node2->name->str, "network_device") == 0) {
853 				md_get_prop_val(md, node2, "resource_id",
854 				    &resource_id);
855 				TAILQ_INSERT_TAIL(&guest->device_list,
856 				    network_devices[resource_id], link);
857 				network_devices[resource_id]->guest = guest;
858 			}
859 			if (strcmp(node2->name->str, "mblock") == 0) {
860 				md_get_prop_val(md, node2, "resource_id",
861 				    &resource_id);
862 				TAILQ_INSERT_TAIL(&guest->mblock_list,
863 				    mblocks[resource_id], link);
864 				mblocks[resource_id]->guest = guest;
865 			}
866 			if (strcmp(node2->name->str, "ldc_endpoint") == 0) {
867 				md_get_prop_val(md, node2, "resource_id",
868 				    &resource_id);
869 				TAILQ_INSERT_TAIL(&guest->endpoint_list,
870 				    ldc_endpoints[resource_id], link);
871 				ldc_endpoints[resource_id]->guest = guest;
872 			}
873 		}
874 	}
875 
876 	TAILQ_FOREACH(endpoint, &guest->endpoint_list, link) {
877 		if (endpoint->channel >= guest->endpoint_id)
878 			guest->endpoint_id = endpoint->channel + 1;
879 	}
880 
881 	xasprintf(&path, "%s.md", guest->name);
882 	guest->md = md_read(path);
883 
884 	if (guest->md == NULL)
885 		err(1, "unable to get guest MD");
886 
887 	free(path);
888 }
889 
890 void
891 hvmd_init(struct md *md)
892 {
893 	struct md_node *node;
894 	struct md_prop *prop;
895 
896 	node = md_find_node(md, "root");
897 	md_get_prop_val(md, node, "content-version", &content_version);
898 	md_get_prop_val(md, node, "stick-frequency", &stick_frequency);
899 	md_get_prop_val(md, node, "tod-frequency", &tod_frequency);
900 	md_get_prop_val(md, node, "tod", &tod);
901 	md_get_prop_val(md, node, "erpt-pa", &erpt_pa);
902 	md_get_prop_val(md, node, "erpt-size", &erpt_size);
903 	md_get_prop_val(md, node, "uartbase", &uartbase);
904 
905 	node = md_find_node(md, "platform");
906 	if (node)
907 		md_get_prop_val(md, node, "stick-frequency", &stick_frequency);
908 
909 	node = md_find_node(md, "hvmd_mblock");
910 	if (node) {
911 		hvmd_mblock = xzalloc(sizeof(*hvmd_mblock));
912 		md_get_prop_val(md, node, "base", &hvmd_mblock->membase);
913 		md_get_prop_val(md, node, "size", &hvmd_mblock->memsize);
914 		md_get_prop_val(md, node, "md_maxsize", &md_maxsize);
915 		pri_alloc_memory(hvmd_mblock->membase, hvmd_mblock->memsize);
916 	}
917 
918 	node = md_find_node(md, "frag_space");
919 	md_get_prop_val(md, node, "fragsize", &fragsize);
920 	if (fragsize == 0)
921 		fragsize = md_maxsize;
922 	TAILQ_INIT(&frag_mblocks);
923 	TAILQ_FOREACH(prop, &node->prop_list, link) {
924 		if (prop->tag == MD_PROP_ARC &&
925 		    strcmp(prop->name->str, "fwd") == 0)
926 			hvmd_init_frag(md, prop->d.arc.node);
927 	}
928 	pri_alloc_memory(0, fragsize);
929 
930 	node = md_find_node(md, "consoles");
931 	TAILQ_FOREACH(prop, &node->prop_list, link) {
932 		if (prop->tag == MD_PROP_ARC &&
933 		    strcmp(prop->name->str, "fwd") == 0)
934 			hvmd_init_console(md, prop->d.arc.node);
935 	}
936 
937 	node = md_find_node(md, "cpus");
938 	TAILQ_FOREACH(prop, &node->prop_list, link) {
939 		if (prop->tag == MD_PROP_ARC &&
940 		    strcmp(prop->name->str, "fwd") == 0)
941 			hvmd_init_cpu(md, prop->d.arc.node);
942 	}
943 
944 	have_cwqs = (md_find_node(md, "cwqs") != NULL);
945 	have_rngs = (md_find_node(md, "rngs") != NULL);
946 
947 	node = md_find_node(md, "devices");
948 	TAILQ_FOREACH(prop, &node->prop_list, link) {
949 		if (prop->tag == MD_PROP_ARC &&
950 		    strcmp(prop->name->str, "fwd") == 0)
951 			hvmd_init_device(md, prop->d.arc.node);
952 	}
953 
954 	node = md_find_node(md, "memory");
955 	TAILQ_FOREACH(prop, &node->prop_list, link) {
956 		if (prop->tag == MD_PROP_ARC &&
957 		    strcmp(prop->name->str, "fwd") == 0)
958 			hvmd_init_mblock(md, prop->d.arc.node);
959 	}
960 
961 	node = md_find_node(md, "ldc_endpoints");
962 	TAILQ_FOREACH(prop, &node->prop_list, link) {
963 		if (prop->tag == MD_PROP_ARC &&
964 		    strcmp(prop->name->str, "fwd") == 0)
965 			hvmd_init_endpoint(md, prop->d.arc.node);
966 	}
967 
968 	node = md_find_node(md, "guests");
969 	TAILQ_FOREACH(prop, &node->prop_list, link) {
970 		if (prop->tag == MD_PROP_ARC &&
971 		    strcmp(prop->name->str, "fwd") == 0)
972 			hvmd_init_guest(md, prop->d.arc.node);
973 	}
974 
975 	hvmd_alloc_frag(-1);
976 }
977 
978 void
979 hvmd_finalize_cpu(struct md *md, struct cpu *cpu)
980 {
981 	struct md_node *parent;
982 	struct md_node *node;
983 	int i;
984 
985 	for (i = 0; i < MAX_STRANDS_PER_CORE; i++) {
986 		if (cpu->core->guests[i] == cpu->guest) {
987 			cpu->partid = i + 1;
988 			break;
989 		}
990 		if (cpu->core->guests[i] == NULL) {
991 			cpu->core->guests[i] = cpu->guest;
992 			cpu->partid = i + 1;
993 			break;
994 		}
995 	}
996 
997 	parent = md_find_node(md, "cpus");
998 	assert(parent);
999 
1000 	node = md_add_node(md, "cpu");
1001 	md_link_node(md, parent, node);
1002 	md_add_prop_val(md, node, "pid", cpu->pid);
1003 	md_add_prop_val(md, node, "vid", cpu->vid);
1004 	md_add_prop_val(md, node, "gid", cpu->gid);
1005 	md_add_prop_val(md, node, "partid", cpu->partid);
1006 	md_add_prop_val(md, node, "resource_id", cpu->resource_id);
1007 	cpu->hv_node = node;
1008 }
1009 
1010 void
1011 hvmd_finalize_cpus(struct md *md)
1012 {
1013 	struct md_node *parent;
1014 	struct md_node *node;
1015 	uint64_t resource_id;
1016 
1017 	parent = md_find_node(md, "root");
1018 	assert(parent);
1019 
1020 	node = md_add_node(md, "cpus");
1021 	md_link_node(md, parent, node);
1022 
1023 	for (resource_id = 0; resource_id < max_cpus; resource_id++) {
1024 		if (cpus[resource_id])
1025 			hvmd_finalize_cpu(md, cpus[resource_id]);
1026 	}
1027 }
1028 
1029 void
1030 hvmd_finalize_maus(struct md *md)
1031 {
1032 	struct md_node *parent;
1033 	struct md_node *node;
1034 	struct md_node *child;
1035 	int i;
1036 
1037 	parent = md_find_node(md, "root");
1038 	assert(parent);
1039 
1040 	node = md_add_node(md, "maus");
1041 	md_link_node(md, parent, node);
1042 
1043 	if (have_cwqs) {
1044 		node = md_add_node(md, "cwqs");
1045 		md_link_node(md, parent, node);
1046 	}
1047 
1048 	if (have_rngs) {
1049 		node = md_add_node(md, "rngs");
1050 		md_link_node(md, parent, node);
1051 		child = md_add_node(md, "rng");
1052 		md_link_node(md, node, child);
1053 		for (i = 0; i < max_cpus; i++) {
1054 			if (cpus[i])
1055 				md_link_node(md, cpus[i]->hv_node, child);
1056 		}
1057 	}
1058 }
1059 
1060 void
1061 hvmd_finalize_device(struct md *md, struct device *device, const char *name)
1062 {
1063 	struct md_node *parent;
1064 	struct md_node *node;
1065 
1066 	parent = md_find_node(md, "devices");
1067 	assert(parent);
1068 
1069 	node = md_add_node(md, name);
1070 	md_link_node(md, parent, node);
1071 	md_add_prop_val(md, node, "resource_id", device->resource_id);
1072 	md_add_prop_val(md, node, "cfghandle", device->cfghandle);
1073 	md_add_prop_val(md, node, "gid", device->gid);
1074 	md_add_prop_val(md, node, "rcid", device->rcid);
1075 	device->hv_node = node;
1076 }
1077 
1078 void
1079 hvmd_finalize_pcie_device(struct md *md, struct device *device)
1080 {
1081 	struct rootcomplex *rootcomplex;
1082 	struct md_node *node, *child, *parent;
1083 	struct component *component;
1084 	struct subdevice *subdevice;
1085 	uint64_t resource_id = 0;
1086 	char *path;
1087 
1088 	hvmd_finalize_device(md, device,
1089 	    device->virtual ? "virtual_pcie_bus" : "pcie_bus");
1090 	node = device->hv_node;
1091 
1092 	if (!directio_capability)
1093 		return;
1094 
1095 	TAILQ_FOREACH(rootcomplex, &rootcomplexes, link) {
1096 		if (rootcomplex->cfghandle == device->cfghandle)
1097 			break;
1098 	}
1099 	if (rootcomplex == NULL)
1100 		return;
1101 
1102 	md_add_prop_val(md, node, "allow_bypass", 0);
1103 
1104 	md_add_prop_val(md, node, "#msi-eqs", device->num_msi_eqs);
1105 	md_add_prop_val(md, node, "#msi", device->num_msis);
1106 	md_add_prop_data(md, node, "msi-ranges", (void *)device->msi_ranges,
1107 	    sizeof(device->msi_ranges));
1108 	md_add_prop_data(md, node, "virtual-dma", rootcomplex->vdma_ranges,
1109 	    rootcomplex->num_vdma_ranges * 2 * sizeof(uint64_t));
1110 
1111 	xasprintf(&path, "/@%llx", device->cfghandle);
1112 
1113 	if (!device->virtual) {
1114 		parent = md_add_node(md, "pcie_assignable_devices");
1115 		md_link_node(md, node, parent);
1116 
1117 		TAILQ_FOREACH(component, &components, link) {
1118 			const char *path2 = component->path;
1119 
1120 			if (strncmp(path, path2, strlen(path)) != 0)
1121 				continue;
1122 
1123 			path2 = strchr(path2, '/');
1124 			if (path2 == NULL || *path2++ == 0)
1125 				continue;
1126 			path2 = strchr(path2, '/');
1127 			if (path2 == NULL || *path2++ == 0)
1128 				continue;
1129 
1130 			child = md_add_node(md, "pcie_device");
1131 			md_link_node(md, parent, child);
1132 
1133 			md_add_prop_str(md, child, "path", path2);
1134 			md_add_prop_val(md, child, "resource_id", resource_id);
1135 			resource_id++;
1136 
1137 			component->hv_node = child;
1138 		}
1139 	}
1140 
1141 	parent = md_add_node(md, "pcie_assigned_devices");
1142 	md_link_node(md, node, parent);
1143 
1144 	TAILQ_FOREACH(subdevice, &device->guest->subdevice_list, link) {
1145 		if (strncmp(path, subdevice->path, strlen(path)) != 0)
1146 			continue;
1147 		TAILQ_FOREACH(component, &components, link) {
1148 			if (strcmp(subdevice->path, component->path) == 0)
1149 				md_link_node(md, parent, component->hv_node);
1150 		}
1151 	}
1152 
1153 	free(path);
1154 }
1155 
1156 void
1157 hvmd_finalize_devices(struct md *md)
1158 {
1159 	struct md_node *parent;
1160 	struct md_node *node;
1161 	uint64_t resource_id;
1162 
1163 	parent = md_find_node(md, "root");
1164 	assert(parent);
1165 
1166 	node = md_add_node(md, "devices");
1167 	md_link_node(md, parent, node);
1168 
1169 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
1170 		if (pcie_busses[resource_id])
1171 			hvmd_finalize_pcie_device(md, pcie_busses[resource_id]);
1172 	}
1173 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
1174 		if (network_devices[resource_id])
1175 			hvmd_finalize_device(md, network_devices[resource_id],
1176 			    "network_device");
1177 	}
1178 }
1179 
1180 void
1181 hvmd_finalize_mblock(struct md *md, struct mblock *mblock)
1182 {
1183 	struct md_node *parent;
1184 	struct md_node *node;
1185 
1186 	parent = md_find_node(md, "memory");
1187 	assert(parent);
1188 
1189 	node = md_add_node(md, "mblock");
1190 	md_link_node(md, parent, node);
1191 	md_add_prop_val(md, node, "membase", mblock->membase);
1192 	md_add_prop_val(md, node, "memsize", mblock->memsize);
1193 	md_add_prop_val(md, node, "realbase", mblock->realbase);
1194 	md_add_prop_val(md, node, "resource_id", mblock->resource_id);
1195 	mblock->hv_node = node;
1196 }
1197 
1198 void
1199 hvmd_finalize_memory(struct md *md)
1200 {
1201 	struct md_node *parent;
1202 	struct md_node *node;
1203 	uint64_t resource_id;
1204 
1205 	parent = md_find_node(md, "root");
1206 	assert(parent);
1207 
1208 	node = md_add_node(md, "memory");
1209 	md_link_node(md, parent, node);
1210 
1211 	for (resource_id = 0; resource_id < max_mblocks; resource_id++) {
1212 		if (mblocks[resource_id])
1213 			hvmd_finalize_mblock(md, mblocks[resource_id]);
1214 	}
1215 }
1216 
1217 void
1218 hvmd_finalize_endpoint(struct md *md, struct ldc_endpoint *endpoint)
1219 {
1220 	struct md_node *parent;
1221 	struct md_node *node;
1222 
1223 	parent = md_find_node(md, "ldc_endpoints");
1224 	assert(parent);
1225 
1226 	node = md_add_node(md, "ldc_endpoint");
1227 	md_link_node(md, parent, node);
1228 	md_add_prop_val(md, node, "resource_id", endpoint->resource_id);
1229 	md_add_prop_val(md, node, "target_type", endpoint->target_type);
1230 	md_add_prop_val(md, node, "channel", endpoint->channel);
1231 	if (endpoint->target_guest != -1)
1232 		md_add_prop_val(md, node, "target_guest",
1233 		    endpoint->target_guest);
1234 	md_add_prop_val(md, node, "target_channel", endpoint->target_channel);
1235 	if (endpoint->tx_ino != -1)
1236 		md_add_prop_val(md, node, "tx-ino", endpoint->tx_ino);
1237 	if (endpoint->rx_ino != -1)
1238 		md_add_prop_val(md, node, "rx-ino", endpoint->rx_ino);
1239 	if (endpoint->private_svc != -1)
1240 		md_add_prop_val(md, node, "private_svc",
1241 		    endpoint->private_svc);
1242 	if (endpoint->svc_id != -1)
1243 		md_add_prop_val(md, node, "svc_id", endpoint->svc_id);
1244 	endpoint->hv_node = node;
1245 }
1246 
1247 void
1248 hvmd_finalize_endpoints(struct md *md)
1249 {
1250 	struct md_node *parent;
1251 	struct md_node *node;
1252 	uint64_t resource_id;
1253 
1254 	parent = md_find_node(md, "root");
1255 	assert(parent);
1256 
1257 	node = md_add_node(md, "ldc_endpoints");
1258 	md_link_node(md, parent, node);
1259 
1260 	for (resource_id = 0; resource_id < max_guest_ldcs; resource_id++) {
1261 		if (ldc_endpoints[resource_id])
1262 			hvmd_finalize_endpoint(md, ldc_endpoints[resource_id]);
1263 	}
1264 }
1265 
1266 void
1267 hvmd_finalize_console(struct md *md, struct console *console)
1268 {
1269 	struct md_node *parent;
1270 	struct md_node *node;
1271 	struct ldc_endpoint *endpoint;
1272 
1273 	parent = md_find_node(md, "consoles");
1274 	assert(parent);
1275 
1276 	node = md_add_node(md, "console");
1277 	md_link_node(md, parent, node);
1278 	md_add_prop_val(md, node, "resource_id", console->resource_id);
1279 	md_add_prop_val(md, node, "ino", console->ino);
1280 	console->hv_node = node;
1281 
1282 	if (console->uartbase) {
1283 		md_add_prop_val(md, node, "uartbase", console->uartbase);
1284 		return;
1285 	}
1286 
1287 	TAILQ_FOREACH(endpoint, &console->guest->endpoint_list, link) {
1288 		if (endpoint->rx_ino == console->ino) {
1289 			md_link_node(md, node, endpoint->hv_node);
1290 			break;
1291 		}
1292 	}
1293 }
1294 
1295 void
1296 hvmd_finalize_consoles(struct md *md)
1297 {
1298 	struct md_node *parent;
1299 	struct md_node *node;
1300 	uint64_t resource_id;
1301 
1302 	parent = md_find_node(md, "root");
1303 	assert(parent);
1304 
1305 	node = md_add_node(md, "consoles");
1306 	md_link_node(md, parent, node);
1307 
1308 	for (resource_id = 0; resource_id < max_guests; resource_id++) {
1309 		if (consoles[resource_id])
1310 			hvmd_finalize_console(md, consoles[resource_id]);
1311 	}
1312 }
1313 
1314 void
1315 hvmd_finalize_guest(struct md *md, struct guest *guest)
1316 {
1317 	struct md_node *node;
1318 	struct md_node *parent;
1319 	struct cpu *cpu;
1320 	struct device *device;
1321 	struct mblock *mblock;
1322 	struct ldc_endpoint *endpoint;
1323 
1324 	parent = md_find_node(md, "guests");
1325 	assert(parent);
1326 
1327 	node = md_add_node(md, "guest");
1328 	md_link_node(md, parent, node);
1329 	md_add_prop_str(md, node, "name", guest->name);
1330 	md_add_prop_val(md, node, "gid", guest->gid);
1331 	md_add_prop_val(md, node, "pid", guest->pid);
1332 	md_add_prop_val(md, node, "resource_id", guest->resource_id);
1333 	md_add_prop_val(md, node, "tod-offset", guest->tod_offset);
1334 	md_add_prop_val(md, node, "reset-reason", 0);
1335 	md_add_prop_val(md, node, "perfctraccess", guest->perfctraccess);
1336 	md_add_prop_val(md, node, "perfctrhtaccess", guest->perfctrhtaccess);
1337 	md_add_prop_val(md, node, "rngctlaccessible", guest->rngctlaccessible);
1338 	md_add_prop_val(md, node, "diagpriv", 0);
1339 	md_add_prop_val(md, node, "mdpa", guest->mdpa);
1340 	md_add_prop_val(md, node, "rombase", rombase);
1341 	md_add_prop_val(md, node, "romsize", romsize);
1342 	md_add_prop_val(md, node, "uartbase", uartbase);
1343 	guest->hv_node = node;
1344 
1345 	node = md_add_node(md, "virtual_devices");
1346 	md_link_node(md, guest->hv_node, node);
1347 	md_add_prop_val(md, node, "cfghandle", 0x100);
1348 
1349 	node = md_add_node(md, "channel_devices");
1350 	md_link_node(md, guest->hv_node, node);
1351 	md_add_prop_val(md, node, "cfghandle", 0x200);
1352 
1353 	if (guest->console)
1354 		md_link_node(md, guest->hv_node, guest->console->hv_node);
1355 	TAILQ_FOREACH(cpu, &guest->cpu_list, link)
1356 		md_link_node(md, guest->hv_node, cpu->hv_node);
1357 	TAILQ_FOREACH(device, &guest->device_list, link)
1358 		md_link_node(md, guest->hv_node, device->hv_node);
1359 	TAILQ_FOREACH(mblock, &guest->mblock_list, link)
1360 		md_link_node(md, guest->hv_node, mblock->hv_node);
1361 	TAILQ_FOREACH(endpoint, &guest->endpoint_list, link)
1362 		md_link_node(md, guest->hv_node, endpoint->hv_node);
1363 }
1364 
1365 void
1366 hvmd_finalize_guests(struct md *md)
1367 {
1368 	struct md_node *parent;
1369 	struct md_node *node;
1370 	uint64_t resource_id;
1371 
1372 	parent = md_find_node(md, "root");
1373 	assert(parent);
1374 
1375 	node = md_add_node(md, "guests");
1376 	md_link_node(md, parent, node);
1377 
1378 	for (resource_id = 0; resource_id < max_guests; resource_id++) {
1379 		if (guests[resource_id])
1380 			hvmd_finalize_guest(md, guests[resource_id]);
1381 	}
1382 }
1383 
1384 void
1385 hvmd_finalize(void)
1386 {
1387 	struct md *md;
1388 	struct md_node *node;
1389 	struct md_node *parent;
1390 	struct mblock *mblock;
1391 
1392 	md = md_alloc();
1393 	node = md_add_node(md, "root");
1394 	md_add_prop_val(md, node, "content-version", content_version);
1395 	if (content_version <= 0x100000000) {
1396 		md_add_prop_val(md, node, "stick-frequency", stick_frequency);
1397 		if (tod_frequency != 0)
1398 			md_add_prop_val(md, node, "tod-frequency",
1399 			    tod_frequency);
1400 		if (tod != 0)
1401 			md_add_prop_val(md, node, "tod", tod);
1402 		if (erpt_pa != 0)
1403 			md_add_prop_val(md, node, "erpt-pa", erpt_pa);
1404 		if (erpt_size != 0)
1405 			md_add_prop_val(md, node, "erpt-size", erpt_size);
1406 
1407 		parent = node;
1408 		node = md_add_node(md, "platform");
1409 		md_link_node(md, parent, node);
1410 		md_add_prop_val(md, node, "stick-frequency", stick_frequency);
1411 	}
1412 
1413 	parent = md_find_node(md, "root");
1414 	assert(parent);
1415 
1416 	node = md_add_node(md, "frag_space");
1417 	md_link_node(md, parent, node);
1418 	md_add_prop_val(md, node, "fragsize", fragsize);
1419 
1420 	parent = md_find_node(md, "frag_space");
1421 	TAILQ_FOREACH(mblock, &frag_mblocks, link) {
1422 		node = md_add_node(md, "frag_mblock");
1423 		md_link_node(md, parent, node);
1424 		md_add_prop_val(md, node, "base", mblock->membase);
1425 		md_add_prop_val(md, node, "size", mblock->memsize);
1426 	}
1427 
1428 	if (hvmd_mblock) {
1429 		parent = md_find_node(md, "root");
1430 		assert(parent);
1431 
1432 		node = md_add_node(md, "hvmd_mblock");
1433 		md_link_node(md, parent, node);
1434 		md_add_prop_val(md, node, "base", hvmd_mblock->membase);
1435 		md_add_prop_val(md, node, "size", hvmd_mblock->memsize);
1436 		md_add_prop_val(md, node, "md_maxsize", md_maxsize);
1437 	}
1438 
1439 	hvmd_finalize_cpus(md);
1440 	hvmd_finalize_maus(md);
1441 	hvmd_finalize_devices(md);
1442 	hvmd_finalize_memory(md);
1443 	hvmd_finalize_endpoints(md);
1444 	hvmd_finalize_consoles(md);
1445 	hvmd_finalize_guests(md);
1446 
1447 	md_write(md, "hv.md");
1448 }
1449 
1450 struct ldc_endpoint *
1451 hvmd_add_endpoint(struct guest *guest)
1452 {
1453 	struct ldc_endpoint *endpoint;
1454 	uint64_t resource_id;
1455 
1456 	for (resource_id = 0; resource_id < max_guest_ldcs; resource_id++)
1457 		if (ldc_endpoints[resource_id] == NULL)
1458 			break;
1459 	assert(resource_id < max_guest_ldcs);
1460 
1461 	endpoint = xzalloc(sizeof(*endpoint));
1462 	endpoint->target_guest = -1;
1463 	endpoint->tx_ino = -1;
1464 	endpoint->rx_ino = -1;
1465 	endpoint->private_svc = -1;
1466 	endpoint->svc_id = -1;
1467 	endpoint->resource_id = resource_id;
1468 	ldc_endpoints[resource_id] = endpoint;
1469 
1470 	TAILQ_INSERT_TAIL(&guest->endpoint_list, endpoint, link);
1471 	endpoint->guest = guest;
1472 
1473 	return endpoint;
1474 }
1475 
1476 struct console *
1477 hvmd_add_console(struct guest *guest)
1478 {
1479 	struct guest *primary;
1480 	struct console *console;
1481 	uint64_t resource_id;
1482 	uint64_t client_channel, server_channel;
1483 
1484 	primary = guest_lookup("primary");
1485 	client_channel = guest->endpoint_id++;
1486 	server_channel = primary->endpoint_id++;
1487 
1488 	for (resource_id = 0; resource_id < max_guests; resource_id++)
1489 		if (consoles[resource_id] == NULL)
1490 			break;
1491 	assert(resource_id < max_guests);
1492 
1493 	console = xzalloc(sizeof(*console));
1494 	console->ino = 0x11;
1495 	console->resource_id = resource_id;
1496 	consoles[resource_id] = console;
1497 
1498 	console->client_endpoint = hvmd_add_endpoint(guest);
1499 	console->client_endpoint->tx_ino = 0x11;
1500 	console->client_endpoint->rx_ino = 0x11;
1501 	console->client_endpoint->target_type = LDC_GUEST;
1502 	console->client_endpoint->target_guest = primary->gid;
1503 	console->client_endpoint->target_channel = server_channel;
1504 	console->client_endpoint->channel = client_channel;
1505 	console->client_endpoint->private_svc = LDC_CONSOLE_SVC;
1506 
1507 	console->server_endpoint = hvmd_add_endpoint(primary);
1508 	console->server_endpoint->tx_ino = 2 * server_channel;
1509 	console->server_endpoint->rx_ino = 2 * server_channel + 1;
1510 	console->server_endpoint->target_type = LDC_GUEST;
1511 	console->server_endpoint->target_guest = guest->gid;
1512 	console->server_endpoint->channel = server_channel;
1513 	console->server_endpoint->target_channel = client_channel;
1514 
1515 	guest->console = console;
1516 	console->guest = guest;
1517 
1518 	return console;
1519 }
1520 
1521 void
1522 hvmd_add_domain_services(struct guest *guest)
1523 {
1524 	struct guest *primary;
1525 	struct ldc_channel *ds = &guest->domain_services;
1526 	uint64_t client_channel, server_channel;
1527 
1528 	primary = guest_lookup("primary");
1529 	client_channel = guest->endpoint_id++;
1530 	server_channel = primary->endpoint_id++;
1531 
1532 	ds->client_endpoint = hvmd_add_endpoint(guest);
1533 	ds->client_endpoint->tx_ino = 2 * client_channel;
1534 	ds->client_endpoint->rx_ino = 2 * client_channel + 1;
1535 	ds->client_endpoint->target_type = LDC_GUEST;
1536 	ds->client_endpoint->target_guest = primary->gid;
1537 	ds->client_endpoint->target_channel = server_channel;
1538 	ds->client_endpoint->channel = client_channel;
1539 
1540 	ds->server_endpoint = hvmd_add_endpoint(primary);
1541 	ds->server_endpoint->tx_ino = 2 * server_channel;
1542 	ds->server_endpoint->rx_ino = 2 * server_channel + 1;
1543 	ds->server_endpoint->target_type = LDC_GUEST;
1544 	ds->server_endpoint->target_guest = guest->gid;
1545 	ds->server_endpoint->channel = server_channel;
1546 	ds->server_endpoint->target_channel = client_channel;
1547 }
1548 
1549 struct ldc_channel *
1550 hvmd_add_vio(struct guest *guest)
1551 {
1552 	struct guest *primary;
1553 	struct ldc_channel *lc = &guest->vio[guest->num_vios++];
1554 	uint64_t client_channel, server_channel;
1555 
1556 	primary = guest_lookup("primary");
1557 	client_channel = guest->endpoint_id++;
1558 	server_channel = primary->endpoint_id++;
1559 
1560 	lc->client_endpoint = hvmd_add_endpoint(guest);
1561 	lc->client_endpoint->tx_ino = 2 * client_channel;
1562 	lc->client_endpoint->rx_ino = 2 * client_channel + 1;
1563 	lc->client_endpoint->target_type = LDC_GUEST;
1564 	lc->client_endpoint->target_guest = primary->gid;
1565 	lc->client_endpoint->target_channel = server_channel;
1566 	lc->client_endpoint->channel = client_channel;
1567 
1568 	lc->server_endpoint = hvmd_add_endpoint(primary);
1569 	lc->server_endpoint->tx_ino = 2 * server_channel;
1570 	lc->server_endpoint->rx_ino = 2 * server_channel + 1;
1571 	lc->server_endpoint->target_type = LDC_GUEST;
1572 	lc->server_endpoint->target_guest = guest->gid;
1573 	lc->server_endpoint->channel = server_channel;
1574 	lc->server_endpoint->target_channel = client_channel;
1575 
1576 	return lc;
1577 }
1578 
1579 struct guest *
1580 hvmd_add_guest(const char *name)
1581 {
1582 	struct guest *guest;
1583 	uint64_t resource_id;
1584 
1585 	for (resource_id = 0; resource_id < max_guests; resource_id++)
1586 		if (guests[resource_id] == NULL)
1587 			break;
1588 	assert(resource_id < max_guests);
1589 
1590 	guest = xzalloc(sizeof(*guest));
1591 	TAILQ_INIT(&guest->cpu_list);
1592 	TAILQ_INIT(&guest->device_list);
1593 	TAILQ_INIT(&guest->subdevice_list);
1594 	TAILQ_INIT(&guest->mblock_list);
1595 	TAILQ_INIT(&guest->endpoint_list);
1596 	guests[resource_id] = guest;
1597 	guest->name = name;
1598 	guest->gid = resource_id;
1599 	guest->pid = resource_id + 1;
1600 	guest->resource_id = resource_id;
1601 	guest->mdpa = hvmd_alloc_frag(-1);
1602 
1603 	hvmd_add_console(guest);
1604 	hvmd_add_domain_services(guest);
1605 
1606 	return guest;
1607 }
1608 
1609 struct md_node *
1610 guest_add_channel_endpoints(struct guest *guest)
1611 {
1612 	struct md *md = guest->md;
1613 	struct md_node *parent;
1614 	struct md_node *node;
1615 
1616 	parent = md_find_node(md, "root");
1617 	assert(parent);
1618 
1619 	node = md_add_node(md, "channel-endpoints");
1620 	md_link_node(md, parent, node);
1621 
1622 	return node;
1623 }
1624 
1625 struct md_node *
1626 guest_add_endpoint(struct guest *guest, uint64_t id)
1627 {
1628 	struct md *md = guest->md;
1629 	struct md_node *parent;
1630 	struct md_node *node;
1631 
1632 	parent = md_find_node(md, "channel-endpoints");
1633 	if (parent == NULL)
1634 		parent = guest_add_channel_endpoints(guest);
1635 
1636 	node = md_add_node(md, "channel-endpoint");
1637 	md_link_node(md, parent, node);
1638 	md_add_prop_val(md, node, "id", id);
1639 	md_add_prop_val(md, node, "tx-ino", 2 * id);
1640 	md_add_prop_val(md, node, "rx-ino", 2 * id + 1);
1641 
1642 	return node;
1643 }
1644 
1645 struct md_node *
1646 guest_add_vcc(struct guest *guest)
1647 {
1648 	const char compatible[] = "SUNW,sun4v-virtual-console-concentrator";
1649 	struct md *md = guest->md;
1650 	struct md_node *parent;
1651 	struct md_node *node;
1652 
1653 	parent = md_find_node(md, "channel-devices");
1654 	assert(parent != NULL);
1655 
1656 	node = md_add_node(md, "virtual-device");
1657 	md_link_node(md, parent, node);
1658 	md_add_prop_str(md, node, "name", "virtual-console-concentrator");
1659 	md_add_prop_data(md, node, "compatible", compatible,
1660 	    sizeof(compatible));
1661 	md_add_prop_str(md, node, "device_type", "vcc");
1662 	md_add_prop_val(md, node, "cfg-handle", 0x0);
1663 	md_add_prop_str(md, node, "svc-name", "primary-vcc0");
1664 
1665 	return node;
1666 }
1667 
1668 struct md_node *
1669 guest_find_vcc(struct guest *guest)
1670 {
1671 	struct md *md = guest->md;
1672 	struct md_node *node, *node2;
1673 	struct md_prop *prop;
1674 	const char *name;
1675 
1676 	node = md_find_node(md, "channel-devices");
1677 	assert(node != NULL);
1678 
1679 	TAILQ_FOREACH(prop, &node->prop_list, link) {
1680 		if (prop->tag == MD_PROP_ARC &&
1681 		    strcmp(prop->name->str, "fwd") == 0) {
1682 			node2 = prop->d.arc.node;
1683 			if (!md_get_prop_str(md, node2, "name", &name))
1684 				continue;
1685 			if (strcmp(name, "virtual-console-concentrator") == 0)
1686 				return node2;
1687 		}
1688 	}
1689 
1690 	return NULL;
1691 }
1692 
1693 struct md_node *
1694 guest_add_vcc_port(struct guest *guest, struct md_node *vcc,
1695     const char *domain, uint64_t id, uint64_t channel)
1696 {
1697 	struct md *md = guest->md;
1698 	struct md_node *node;
1699 	struct md_node *child;
1700 
1701 	if (vcc == NULL)
1702 		vcc = guest_find_vcc(guest);
1703 	if (vcc == NULL)
1704 		vcc = guest_add_vcc(guest);
1705 
1706 	node = md_add_node(md, "virtual-device-port");
1707 	md_link_node(md, vcc, node);
1708 	md_add_prop_str(md, node, "name", "vcc-port");
1709 	md_add_prop_val(md, node, "id", id);
1710 	md_add_prop_str(md, node, "vcc-domain-name", domain);
1711 	md_add_prop_str(md, node, "vcc-group-name", domain);
1712 	/* OpenBSD doesn't care about this, but Solaris might. */
1713 	md_add_prop_val(md, node, "vcc-tcp-port", 5000 + id);
1714 
1715 	child = guest_add_endpoint(guest, channel);
1716 	md_link_node(md, node, child);
1717 
1718 	return node;
1719 }
1720 
1721 struct md_node *
1722 guest_add_vds(struct guest *guest)
1723 {
1724 	const char compatible[] = "SUNW,sun4v-disk-server";
1725 	struct md *md = guest->md;
1726 	struct md_node *parent;
1727 	struct md_node *node;
1728 
1729 	parent = md_find_node(md, "channel-devices");
1730 	assert(parent != NULL);
1731 
1732 	node = md_add_node(md, "virtual-device");
1733 	md_link_node(md, parent, node);
1734 	md_add_prop_str(md, node, "name", "virtual-disk-server");
1735 	md_add_prop_data(md, node, "compatible", compatible,
1736 	    sizeof(compatible));
1737 	md_add_prop_str(md, node, "device_type", "vds");
1738 	md_add_prop_val(md, node, "cfg-handle", 0x0);
1739 	md_add_prop_str(md, node, "svc-name", "primary-vds0");
1740 
1741 	return node;
1742 }
1743 
1744 struct md_node *
1745 guest_find_vds(struct guest *guest)
1746 {
1747 	struct md *md = guest->md;
1748 	struct md_node *node, *node2;
1749 	struct md_prop *prop;
1750 	const char *name;
1751 
1752 	node = md_find_node(md, "channel-devices");
1753 	assert(node != NULL);
1754 
1755 	TAILQ_FOREACH(prop, &node->prop_list, link) {
1756 		if (prop->tag == MD_PROP_ARC &&
1757 		    strcmp(prop->name->str, "fwd") == 0) {
1758 			node2 = prop->d.arc.node;
1759 			if (!md_get_prop_str(md, node2, "name", &name))
1760 				continue;
1761 			if (strcmp(name, "virtual-disk-server") == 0)
1762 				return node2;
1763 		}
1764 	}
1765 
1766 	return NULL;
1767 }
1768 
1769 struct md_node *
1770 guest_add_vds_port(struct guest *guest, struct md_node *vds,
1771     const char *path, uint64_t id, uint64_t channel)
1772 {
1773 	struct md *md = guest->md;
1774 	struct md_node *node;
1775 	struct md_node *child;
1776 
1777 	if (vds == NULL)
1778 		vds = guest_find_vds(guest);
1779 	if (vds == NULL)
1780 		vds = guest_add_vds(guest);
1781 
1782 	node = md_add_node(md, "virtual-device-port");
1783 	md_link_node(md, vds, node);
1784 	md_add_prop_str(md, node, "name", "vds-port");
1785 	md_add_prop_val(md, node, "id", id);
1786 	md_add_prop_str(md, node, "vds-block-device", path);
1787 
1788 	child = guest_add_endpoint(guest, channel);
1789 	md_link_node(md, node, child);
1790 
1791 	return node;
1792 }
1793 
1794 struct md_node *
1795 guest_add_vsw(struct guest *guest)
1796 {
1797 	const char compatible[] = "SUNW,sun4v-network-switch";
1798 	struct md *md = guest->md;
1799 	struct md_node *parent;
1800 	struct md_node *node;
1801 
1802 	parent = md_find_node(md, "channel-devices");
1803 	assert(parent != NULL);
1804 
1805 	node = md_add_node(md, "virtual-device");
1806 	md_link_node(md, parent, node);
1807 	md_add_prop_str(md, node, "name", "virtual-network-switch");
1808 	md_add_prop_data(md, node, "compatible", compatible,
1809 	    sizeof(compatible));
1810 	md_add_prop_str(md, node, "device_type", "vsw");
1811 	md_add_prop_val(md, node, "cfg-handle", 0x0);
1812 	md_add_prop_str(md, node, "svc-name", "primary-vsw0");
1813 
1814 	return node;
1815 }
1816 
1817 struct md_node *
1818 guest_find_vsw(struct guest *guest)
1819 {
1820 	struct md *md = guest->md;
1821 	struct md_node *node, *node2;
1822 	struct md_prop *prop;
1823 	const char *name;
1824 
1825 	node = md_find_node(md, "channel-devices");
1826 	assert(node != NULL);
1827 
1828 	TAILQ_FOREACH(prop, &node->prop_list, link) {
1829 		if (prop->tag == MD_PROP_ARC &&
1830 		    strcmp(prop->name->str, "fwd") == 0) {
1831 			node2 = prop->d.arc.node;
1832 			if (!md_get_prop_str(md, node2, "name", &name))
1833 				continue;
1834 			if (strcmp(name, "virtual-network-switch") == 0)
1835 				return node2;
1836 		}
1837 	}
1838 
1839 	return NULL;
1840 }
1841 
1842 struct md_node *
1843 guest_add_vsw_port(struct guest *guest, struct md_node *vds,
1844     uint64_t id, uint64_t channel)
1845 {
1846 	struct md *md = guest->md;
1847 	struct md_node *node;
1848 	struct md_node *child;
1849 	uint64_t mac_addr;
1850 
1851 	if (vds == NULL)
1852 		vds = guest_find_vsw(guest);
1853 	if (vds == NULL)
1854 		vds = guest_add_vsw(guest);
1855 	if (!md_get_prop_val(md, vds, "local-mac-address", &mac_addr)) {
1856 		mac_addr = 0x00144ff80000 + (arc4random() & 0x3ffff);
1857 		md_add_prop_val(md, vds, "local-mac-address", mac_addr);
1858 	}
1859 
1860 	node = md_add_node(md, "virtual-device-port");
1861 	md_link_node(md, vds, node);
1862 	md_add_prop_str(md, node, "name", "vsw-port");
1863 	md_add_prop_val(md, node, "id", id);
1864 
1865 	child = guest_add_endpoint(guest, channel);
1866 	md_link_node(md, node, child);
1867 
1868 	return node;
1869 }
1870 
1871 struct md_node *
1872 guest_add_console_device(struct guest *guest)
1873 {
1874 	const char compatible[] = "SUNW,sun4v-console";
1875 	struct md *md = guest->md;
1876 	struct md_node *parent;
1877 	struct md_node *node;
1878 
1879 	parent = md_find_node(md, "virtual-devices");
1880 	assert(parent);
1881 
1882 	node = md_add_node(md, "virtual-device");
1883 	md_link_node(md, parent, node);
1884 	md_add_prop_str(md, node, "name", "console");
1885 	md_add_prop_str(md, node, "device-type", "serial");
1886 	md_add_prop_val(md, node, "intr", 0x1);
1887 	md_add_prop_val(md, node, "ino", 0x11);
1888 	md_add_prop_val(md, node, "channel#", 0);
1889 	md_add_prop_val(md, node, "cfg-handle", 0x1);
1890 	md_add_prop_data(md, node, "compatible", compatible,
1891 	    sizeof(compatible));
1892 
1893 	return node;
1894 }
1895 
1896 struct md_node *
1897 guest_add_vdc(struct guest *guest, uint64_t cfghandle)
1898 {
1899 	const char compatible[] = "SUNW,sun4v-disk";
1900 	struct md *md = guest->md;
1901 	struct md_node *parent;
1902 	struct md_node *node;
1903 
1904 	parent = md_find_node(md, "channel-devices");
1905 	assert(parent);
1906 
1907 	node = md_add_node(md, "virtual-device");
1908 	md_link_node(md, parent, node);
1909 	md_add_prop_str(md, node, "name", "disk");
1910 	md_add_prop_str(md, node, "device-type", "block");
1911 	md_add_prop_val(md, node, "cfg-handle", cfghandle);
1912 	md_add_prop_data(md, node, "compatible", compatible,
1913 	    sizeof(compatible));
1914 
1915 	return node;
1916 }
1917 
1918 struct md_node *
1919 guest_add_vdc_port(struct guest *guest, struct md_node *vdc,
1920     uint64_t cfghandle, uint64_t id, uint64_t channel)
1921 {
1922 	struct md *md = guest->md;
1923 	struct md_node *node;
1924 	struct md_node *child;
1925 
1926 	if (vdc == NULL)
1927 		vdc = guest_add_vdc(guest, cfghandle);
1928 
1929 	node = md_add_node(md, "virtual-device-port");
1930 	md_link_node(md, vdc, node);
1931 	md_add_prop_str(md, node, "name", "vdc-port");
1932 	md_add_prop_val(md, node, "id", id);
1933 
1934 	child = guest_add_endpoint(guest, channel);
1935 	md_link_node(md, node, child);
1936 
1937 	return node;
1938 }
1939 
1940 struct md_node *
1941 guest_add_vnet(struct guest *guest, uint64_t mac_addr, uint64_t mtu,
1942     uint64_t cfghandle)
1943 {
1944 	const char compatible[] = "SUNW,sun4v-network";
1945 	struct md *md = guest->md;
1946 	struct md_node *parent;
1947 	struct md_node *node;
1948 
1949 	parent = md_find_node(md, "channel-devices");
1950 	assert(parent);
1951 
1952 	node = md_add_node(md, "virtual-device");
1953 	md_link_node(md, parent, node);
1954 	md_add_prop_str(md, node, "name", "network");
1955 	md_add_prop_str(md, node, "device-type", "network");
1956 	md_add_prop_val(md, node, "cfg-handle", cfghandle);
1957 	md_add_prop_data(md, node, "compatible", compatible,
1958 	    sizeof(compatible));
1959 	if (mac_addr == -1)
1960 		mac_addr = 0x00144ff80000 + (arc4random() & 0x3ffff);
1961 	md_add_prop_val(md, node, "local-mac-address", mac_addr);
1962 	md_add_prop_val(md, node, "mtu", mtu);
1963 
1964 	return node;
1965 }
1966 
1967 struct md_node *
1968 guest_add_vnet_port(struct guest *guest, struct md_node *vdc,
1969     uint64_t mac_addr, uint64_t remote_mac_addr, uint64_t mtu, uint64_t cfghandle,
1970     uint64_t id, uint64_t channel)
1971 {
1972 	struct md *md = guest->md;
1973 	struct md_node *node;
1974 	struct md_node *child;
1975 
1976 	if (vdc == NULL)
1977 		vdc = guest_add_vnet(guest, mac_addr, mtu, cfghandle);
1978 
1979 	node = md_add_node(md, "virtual-device-port");
1980 	md_link_node(md, vdc, node);
1981 	md_add_prop_str(md, node, "name", "vnet-port");
1982 	md_add_prop_val(md, node, "id", id);
1983 	md_add_prop_val(md, node, "switch-port", 0);
1984 	md_add_prop_data(md, node, "remote-mac-address",
1985 	    (uint8_t *)&remote_mac_addr, sizeof(remote_mac_addr));
1986 
1987 	child = guest_add_endpoint(guest, channel);
1988 	md_link_node(md, node, child);
1989 
1990 	return node;
1991 }
1992 
1993 struct md_node *
1994 guest_add_channel_devices(struct guest *guest)
1995 {
1996 	const char compatible[] = "SUNW,sun4v-channel-devices";
1997 	struct md *md = guest->md;
1998 	struct md_node *parent;
1999 	struct md_node *node;
2000 
2001 	parent = md_find_node(md, "virtual-devices");
2002 	assert(parent);
2003 
2004 	node = md_add_node(md, "channel-devices");
2005 	md_link_node(md, parent, node);
2006 	md_add_prop_str(md, node, "name", "channel-devices");
2007 	md_add_prop_str(md, node, "device-type", "channel-devices");
2008 	md_add_prop_data(md, node, "compatible", compatible,
2009 	    sizeof(compatible));
2010 	md_add_prop_val(md, node, "cfg-handle", 0x200);
2011 
2012 	return node;
2013 }
2014 
2015 struct md_node *
2016 guest_add_domain_services(struct guest *guest)
2017 {
2018 	struct md *md = guest->md;
2019 	struct md_node *parent;
2020 	struct md_node *node;
2021 
2022 	parent = md_find_node(md, "root");
2023 	assert(parent);
2024 
2025 	node = md_add_node(md, "domain-services");
2026 	md_link_node(md, parent, node);
2027 
2028 	return node;
2029 }
2030 
2031 struct md_node *
2032 guest_add_domain_services_port(struct guest *guest, uint64_t id)
2033 {
2034 	struct md *md = guest->md;
2035 	struct md_node *parent;
2036 	struct md_node *node;
2037 	struct md_node *child;
2038 
2039 	parent = md_find_node(md, "domain-services");
2040 	if (parent == NULL)
2041 		parent = guest_add_domain_services(guest);
2042 
2043 	node = md_add_node(md, "domain-services-port");
2044 	md_link_node(md, parent, node);
2045 	md_add_prop_val(md, node, "id", id);
2046 
2047 	child = guest_add_endpoint(guest,
2048 	    guest->domain_services.client_endpoint->channel);
2049 	md_link_node(md, node, child);
2050 
2051 	return node;
2052 }
2053 
2054 void
2055 guest_add_devalias(struct guest *guest, const char *name, const char *path)
2056 {
2057 	struct md *md = guest->md;
2058 	struct md_node *parent;
2059 	struct md_node *node;
2060 
2061 	parent = md_find_node(md, "openboot");
2062 	assert(parent);
2063 
2064 	node = md_find_subnode(md, parent, "devalias");
2065 	if (node == NULL) {
2066 		node = md_add_node(md, "devalias");
2067 		md_link_node(md, parent, node);
2068 	}
2069 
2070 	md_add_prop_str(md, node, name, path);
2071 }
2072 
2073 void
2074 guest_set_domaining_enabled(struct guest *guest)
2075 {
2076 	struct md *md = guest->md;
2077 	struct md_node *node;
2078 
2079 	node = md_find_node(md, "platform");
2080 	assert(node);
2081 
2082 	md_set_prop_val(md, node, "domaining-enabled", 0x1);
2083 }
2084 
2085 void
2086 guest_set_mac_address(struct guest *guest)
2087 {
2088 	struct md *md = guest->md;
2089 	struct md_node *node;
2090 	uint64_t mac_address;
2091 	uint64_t hostid;
2092 
2093 	node = md_find_node(md, "platform");
2094 	assert(node);
2095 
2096 	mac_address = 0x00144ff80000 + (arc4random() & 0x3ffff);
2097 	md_set_prop_val(md, node, "mac-address", mac_address);
2098 
2099 	hostid = 0x84000000 | (mac_address & 0x00ffffff);
2100 	md_set_prop_val(md, node, "hostid", hostid);
2101 }
2102 
2103 struct md_node *
2104 guest_find_vc(struct guest *guest)
2105 {
2106 	struct md *md = guest->md;
2107 	struct md_node *node, *node2;
2108 	struct md_node *vc = NULL;
2109 	struct md_prop *prop;
2110 	const char *name;
2111 
2112 	node = md_find_node(md, "channel-devices");
2113 	assert(node != NULL);
2114 
2115 	TAILQ_FOREACH(prop, &node->prop_list, link) {
2116 		if (prop->tag == MD_PROP_ARC &&
2117 		    strcmp(prop->name->str, "fwd") == 0) {
2118 			node2 = prop->d.arc.node;
2119 			if (!md_get_prop_str(md, node2, "name", &name))
2120 				continue;
2121 			if (strcmp(name, "virtual-channel") == 0)
2122 				vc = node2;
2123 		}
2124 	}
2125 
2126 	return vc;
2127 }
2128 
2129 struct md_node *
2130 guest_add_vc_port(struct guest *guest, struct md_node *vc,
2131     const char *domain, uint64_t id, uint64_t channel)
2132 {
2133 	struct md *md = guest->md;
2134 	struct md_node *node;
2135 	struct md_node *child;
2136 	char *str;
2137 
2138 	if (vc == NULL)
2139 		vc = guest_find_vc(guest);
2140 	assert(vc);
2141 
2142 	node = md_add_node(md, "virtual-device-port");
2143 	md_link_node(md, vc, node);
2144 	md_add_prop_str(md, node, "name", "vldc-port");
2145 	md_add_prop_val(md, node, "id", id);
2146 	xasprintf(&str, "ldom-%s", domain);
2147 	md_add_prop_str(md, node, "vldc-svc-name", str);
2148 	free(str);
2149 
2150 	child = guest_add_endpoint(guest, channel);
2151 	md_link_node(md, node, child);
2152 
2153 	return node;
2154 }
2155 
2156 struct guest *
2157 guest_create(const char *name)
2158 {
2159 	struct guest *guest;
2160 	struct guest *primary;
2161 	struct md_node *node;
2162 
2163 	primary = guest_lookup("primary");
2164 
2165 	guest = hvmd_add_guest(name);
2166 	guest->md = md_copy(protomd);
2167 
2168 	md_find_delete_node(guest->md, "dimm_configuration");
2169 	md_find_delete_node(guest->md, "platform_services");
2170 	md_collect_garbage(guest->md);
2171 
2172 	guest_set_domaining_enabled(guest);
2173 	guest_set_mac_address(guest);
2174 	guest_add_channel_devices(guest);
2175 	guest_add_domain_services_port(guest, 0);
2176 	guest_add_console_device(guest);
2177 	guest_add_devalias(guest, "virtual-console",
2178 	    "/virtual-devices/console@1");
2179 
2180 	guest_add_vcc_port(primary, NULL, guest->name, guest->gid - 1,
2181 	    guest->console->server_endpoint->channel);
2182 
2183 	guest_add_vc_port(primary, NULL, guest->name, guest->gid + 2,
2184 	    guest->domain_services.server_endpoint->channel);
2185 
2186 	node = md_find_node(guest->md, "root");
2187 	md_add_prop_val(guest->md, node, "reset-reason", 0);
2188 
2189 	return guest;
2190 }
2191 
2192 int
2193 guest_match_path(struct guest *guest, const char *path)
2194 {
2195 	struct subdevice *subdevice;
2196 	size_t len = strlen(path);
2197 
2198 	TAILQ_FOREACH(subdevice, &guest->subdevice_list, link) {
2199 		const char *path2 = subdevice->path;
2200 		size_t len2 = strlen(path2);
2201 
2202 		if (strncmp(path, path2, len < len2 ? len : len2) == 0)
2203 			return 1;
2204 	}
2205 
2206 	return 0;
2207 }
2208 
2209 void
2210 guest_prune_phys_io(struct guest *guest)
2211 {
2212 	const char compatible[] = "SUNW,sun4v-vpci";
2213 	struct md *md = guest->md;
2214 	struct md_node *node, *node2;
2215 	struct md_prop *prop, *prop2;
2216 	const char *device_type;
2217 	uint64_t cfg_handle;
2218 	char *path;
2219 
2220 	node = md_find_node(guest->md, "phys_io");
2221 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2222 		if (prop->tag == MD_PROP_ARC &&
2223 		    strcmp(prop->name->str, "fwd") == 0) {
2224 			node2 = prop->d.arc.node;
2225 			if (!md_get_prop_str(md, node2, "device-type",
2226 			    &device_type))
2227 				device_type = "unknown";
2228 			if (strcmp(device_type, "pciex") != 0) {
2229 				md_delete_node(md, node2);
2230 				continue;
2231 			}
2232 
2233 			if (!md_get_prop_val(md, node2, "cfg-handle",
2234 			    &cfg_handle)) {
2235 				md_delete_node(md, node2);
2236 				continue;
2237 			}
2238 
2239 			xasprintf(&path, "/@%llx", cfg_handle);
2240 			if (!guest_match_path(guest, path)) {
2241 				md_delete_node(md, node2);
2242 				continue;
2243 			}
2244 
2245 			md_set_prop_data(md, node2, "compatible",
2246 			    compatible, sizeof(compatible));
2247 			md_add_prop_val(md, node2, "virtual-root-complex", 1);
2248 			guest_prune_pcie(guest, node2, path);
2249 			free(path);
2250 
2251 			guest_add_vpcie(guest, cfg_handle);
2252 		}
2253 	}
2254 }
2255 
2256 void
2257 guest_prune_pcie(struct guest *guest, struct md_node *node, const char *path)
2258 {
2259 	struct md *md = guest->md;
2260 	struct md_node *node2;
2261 	struct md_prop *prop, *prop2;
2262 	uint64_t device_number;
2263 	char *path2;
2264 
2265 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2266 		if (prop->tag == MD_PROP_ARC &&
2267 		    strcmp(prop->name->str, "fwd") == 0) {
2268 			node2 = prop->d.arc.node;
2269 			if (strcmp(node2->name->str, "wart") == 0) {
2270 				md_delete_node(md, node2);
2271 				continue;
2272 			}
2273 			if (!md_get_prop_val(md, node2, "device-number",
2274 			    &device_number))
2275 				continue;
2276 			xasprintf(&path2, "%s/@%llx", path, device_number);
2277 			if (guest_match_path(guest, path2))
2278 				guest_prune_pcie(guest, node2, path2);
2279 			else
2280 				md_delete_node(md, node2);
2281 			free(path2);
2282 		}
2283 	}
2284 }
2285 
2286 void
2287 guest_add_vpcie(struct guest *guest, uint64_t cfghandle)
2288 {
2289 	struct device *device, *phys_device = NULL;
2290 	uint64_t resource_id;
2291 
2292 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
2293 		if (pcie_busses[resource_id] &&
2294 		    pcie_busses[resource_id]->cfghandle == cfghandle) {
2295 			phys_device = pcie_busses[resource_id];
2296 			break;
2297 		}
2298 	}
2299 	if (phys_device == NULL)
2300 		errx(1, "no matching physical device");
2301 
2302 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
2303 		if (pcie_busses[resource_id] == NULL)
2304 			break;
2305 	}
2306 	if (resource_id >= max_devices)
2307 		errx(1, "no available resource_id");
2308 
2309 	device = xzalloc(sizeof(*device));
2310 	device->gid = guest->gid;
2311 	device->cfghandle = cfghandle;
2312 	device->resource_id = resource_id;
2313 	device->rcid = phys_device->rcid;
2314 	device->virtual = 1;
2315 	device->guest = guest;
2316 
2317 	device->num_msi_eqs = phys_device->msi_eqs_per_vpci;
2318 	device->num_msis = phys_device->msis_per_vpci;
2319 	phys_device->msi_base -= phys_device->msis_per_vpci;
2320 	device->msi_ranges[0] = phys_device->msi_base;
2321 	device->msi_ranges[1] = device->num_msis;
2322 
2323 	pcie_busses[resource_id] = device;
2324 	TAILQ_INSERT_TAIL(&guest->device_list, device, link);
2325 }
2326 
2327 void
2328 guest_fixup_phys_io(struct guest *guest)
2329 {
2330 	struct md *md = guest->md;
2331 	struct md_node *node, *node2;
2332 	struct md_prop *prop, *prop2;
2333 	struct device *device;
2334 	uint64_t cfg_handle;
2335 	uint64_t mapping[3];
2336 	const void *buf;
2337 	size_t len;
2338 
2339 	if (!directio_capability)
2340 		return;
2341 
2342 	node = md_find_node(guest->md, "phys_io");
2343 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2344 		if (prop->tag == MD_PROP_ARC &&
2345 		    strcmp(prop->name->str, "fwd") == 0) {
2346 			node2 = prop->d.arc.node;
2347 
2348 			if (!md_get_prop_val(md, node2, "cfg-handle",
2349 			    &cfg_handle))
2350 				continue;
2351 
2352 			TAILQ_FOREACH(device, &guest->device_list, link) {
2353 				if (device->cfghandle == cfg_handle)
2354 					break;
2355 			}
2356 			if (device == NULL)
2357 				continue;
2358 
2359 			md_set_prop_val(md, node2, "#msi-eqs",
2360 			    device->num_msi_eqs);
2361 			md_set_prop_val(md, node2, "#msi",
2362 			    device->num_msis);
2363 			md_set_prop_data(md, node2, "msi-ranges",
2364 			    (void *)device->msi_ranges,
2365 			    sizeof(device->msi_ranges));
2366 
2367 			md_get_prop_data(md, node2, "msi-eq-to-devino",
2368 			    &buf, &len);
2369 			memcpy(mapping, buf, sizeof(mapping));
2370 			mapping[1] = device->num_msi_eqs;
2371 			md_set_prop_data(md, node2, "msi-eq-to-devino",
2372 			    (void *)mapping, sizeof(mapping));
2373 		}
2374 	}
2375 }
2376 
2377 struct guest *
2378 guest_lookup(const char *name)
2379 {
2380 	uint64_t resource_id;
2381 
2382 	for (resource_id = 0; resource_id < max_guests; resource_id++) {
2383 		if (guests[resource_id] &&
2384 		    strcmp(guests[resource_id]->name, name) == 0)
2385 			return guests[resource_id];
2386 	}
2387 
2388 	return NULL;
2389 }
2390 
2391 void
2392 guest_delete_virtual_device_port(struct guest *guest, struct md_node *port)
2393 {
2394 	struct md *md = guest->md;
2395 	struct md_node *node;
2396 	struct md_prop *prop;
2397 
2398 	TAILQ_FOREACH(node, &md->node_list, link) {
2399 		if (strcmp(node->name->str, "virtual-device-port") != 0)
2400 			continue;
2401 		TAILQ_FOREACH(prop, &node->prop_list, link) {
2402 			if (prop->tag == MD_PROP_ARC &&
2403 			    prop->d.arc.node == port) {
2404 				md_delete_node(md, node);
2405 				return;
2406 			}
2407 		}
2408 	}
2409 }
2410 
2411 void
2412 guest_delete_endpoint(struct guest *guest, struct ldc_endpoint *endpoint)
2413 {
2414 	struct md *md = guest->md;
2415 	struct md_node *node, *node2;
2416 	struct md_prop *prop;
2417 	uint64_t id, resource_id;
2418 
2419 	node = md_find_node(md, "channel-endpoints");
2420 	TAILQ_FOREACH(prop, &node->prop_list, link) {
2421 		if (prop->tag == MD_PROP_ARC &&
2422 		    strcmp(prop->name->str, "fwd") == 0) {
2423 			node2 = prop->d.arc.node;
2424 			if (!md_get_prop_val(hvmd, node2, "id", &id))
2425 				continue;
2426 			if (id == endpoint->channel) {
2427 				guest_delete_virtual_device_port(guest, node2);
2428 				md_delete_node(md, node2);
2429 				break;
2430 			}
2431 		}
2432 	}
2433 
2434 	TAILQ_REMOVE(&guest->endpoint_list, endpoint, link);
2435 	ldc_endpoints[endpoint->resource_id] = NULL;
2436 
2437 	/* Delete peer as well. */
2438 	for (resource_id = 0; resource_id < max_guest_ldcs; resource_id++) {
2439 		struct ldc_endpoint *peer = ldc_endpoints[resource_id];
2440 
2441 		if (peer && peer->target_type == LDC_GUEST &&
2442 		    peer->target_channel == endpoint->channel &&
2443 		    peer->channel == endpoint->target_channel &&
2444 		    peer->target_guest == guest->gid)
2445 			guest_delete_endpoint(peer->guest, peer);
2446 	}
2447 
2448 	free(endpoint);
2449 }
2450 
2451 void
2452 guest_delete(struct guest *guest)
2453 {
2454 	struct cpu *cpu, *cpu2;
2455 	struct mblock *mblock, *mblock2;
2456 	struct ldc_endpoint *endpoint, *endpoint2;
2457 
2458 	consoles[guest->console->resource_id] = NULL;
2459 	free(guest->console);
2460 
2461 	TAILQ_FOREACH_SAFE(cpu, &guest->cpu_list, link, cpu2) {
2462 		TAILQ_REMOVE(&guest->cpu_list, cpu, link);
2463 		cpus[cpu->resource_id] = NULL;
2464 		pri_free_cpu(cpu);
2465 	}
2466 
2467 	TAILQ_FOREACH_SAFE(mblock, &guest->mblock_list, link, mblock2) {
2468 		TAILQ_REMOVE(&guest->mblock_list, mblock, link);
2469 		mblocks[mblock->resource_id] = NULL;
2470 		free(mblock);
2471 	}
2472 
2473 	TAILQ_FOREACH_SAFE(endpoint, &guest->endpoint_list, link, endpoint2)
2474 		guest_delete_endpoint(guest, endpoint);
2475 
2476 	hvmd_free_frag(guest->mdpa);
2477 
2478 	guests[guest->resource_id] = NULL;
2479 	free(guest);
2480 }
2481 
2482 void
2483 guest_delete_cpu(struct guest *guest, uint64_t vid)
2484 {
2485 	struct cpu *cpu;
2486 
2487 	TAILQ_FOREACH(cpu, &guest->cpu_list, link) {
2488 		if (cpu->vid == vid) {
2489 			TAILQ_REMOVE(&guest->cpu_list, cpu, link);
2490 			cpus[cpu->resource_id] = NULL;
2491 			pri_free_cpu(cpu);
2492 			return;
2493 		}
2494 	}
2495 }
2496 
2497 void
2498 guest_add_cpu(struct guest *guest, uint64_t stride)
2499 {
2500 	struct cpu *cpu;
2501 
2502 	cpu = pri_alloc_cpu(-1);
2503 
2504 	/*
2505 	 * Allocate (but don't assign) additional virtual CPUs if the
2506 	 * specified stride is bigger than one.
2507 	 */
2508 	while (stride-- > 1)
2509 		pri_alloc_cpu(-1);
2510 
2511 	if (cpu->resource_id == -1) {
2512 		uint64_t resource_id;
2513 
2514 		for (resource_id = 0; resource_id < max_cpus; resource_id++)
2515 			if (cpus[resource_id] == NULL)
2516 				break;
2517 		assert(resource_id < max_cpus);
2518 		cpu->resource_id = resource_id;
2519 	}
2520 	cpus[cpu->resource_id] = cpu;
2521 
2522 	cpu->vid = guest->cpu_vid++;
2523 	cpu->gid = guest->gid;
2524 	cpu->partid = 1;
2525 
2526 	TAILQ_INSERT_TAIL(&guest->cpu_list, cpu, link);
2527 	cpu->guest = guest;
2528 }
2529 
2530 void
2531 guest_delete_memory(struct guest *guest)
2532 {
2533 	struct mblock *mblock, *tmp;
2534 
2535 	TAILQ_FOREACH_SAFE(mblock, &guest->mblock_list, link, tmp) {
2536 		if (mblock->resource_id != -1)
2537 			mblocks[mblock->resource_id] = NULL;
2538 		TAILQ_REMOVE(&guest->mblock_list, mblock, link);
2539 		free(mblock);
2540 	}
2541 }
2542 
2543 void
2544 guest_add_memory(struct guest *guest, uint64_t base, uint64_t size)
2545 {
2546 	struct mblock *mblock;
2547 	uint64_t resource_id;
2548 
2549 	mblock = pri_alloc_memory(base, size);
2550 	if (mblock == NULL)
2551 		errx(1, "unable to allocate guest memory");
2552 	for (resource_id = 0; resource_id < max_cpus; resource_id++)
2553 		if (mblocks[resource_id] == NULL)
2554 			break;
2555 	assert(resource_id < max_mblocks);
2556 	mblock->resource_id = resource_id;
2557 	mblocks[resource_id] = mblock;
2558 
2559 	mblock->realbase = mblock->membase & (max_page_size - 1);
2560 	if (mblock->realbase == 0)
2561 		mblock->realbase = max_page_size;
2562 
2563 	TAILQ_INSERT_TAIL(&guest->mblock_list, mblock, link);
2564 	mblock->guest = guest;
2565 }
2566 
2567 void
2568 guest_add_vdisk(struct guest *guest, uint64_t id, const char *path,
2569     const char *user_devalias)
2570 {
2571 	struct guest *primary;
2572 	struct ldc_channel *lc;
2573 	char *devalias;
2574 	char *devpath;
2575 
2576 	primary = guest_lookup("primary");
2577 
2578 	lc = hvmd_add_vio(guest);
2579 	guest_add_vds_port(primary, NULL, path, id,
2580 	    lc->server_endpoint->channel);
2581 	guest_add_vdc_port(guest, NULL, id, 0, lc->client_endpoint->channel);
2582 
2583 	xasprintf(&devalias, "disk%d", id);
2584 	xasprintf(&devpath,
2585 	    "/virtual-devices@100/channel-devices@200/disk@%d", id);
2586 	if (id == 0)
2587 		guest_add_devalias(guest, "disk", devpath);
2588 	guest_add_devalias(guest, devalias, devpath);
2589 	if (user_devalias != NULL)
2590 		guest_add_devalias(guest, user_devalias, devpath);
2591 	free(devalias);
2592 	free(devpath);
2593 }
2594 
2595 void
2596 guest_add_vnetwork(struct guest *guest, uint64_t id, uint64_t mac_addr,
2597     uint64_t mtu, const char *user_devalias)
2598 {
2599 	struct guest *primary;
2600 	struct ldc_channel *lc;
2601 	char *devalias;
2602 	char *devpath;
2603 	struct md_node *node;
2604 	uint64_t remote_mac_addr = -1;
2605 
2606 	primary = guest_lookup("primary");
2607 
2608 	lc = hvmd_add_vio(guest);
2609 	guest_add_vsw_port(primary, NULL, id, lc->server_endpoint->channel);
2610 	node = guest_find_vsw(primary);
2611 	md_get_prop_val(primary->md, node, "local-mac-address", &remote_mac_addr);
2612 	guest_add_vnet_port(guest, NULL, mac_addr, remote_mac_addr, mtu, id, 0,
2613 	    lc->client_endpoint->channel);
2614 
2615 	xasprintf(&devalias, "net%d", id);
2616 	xasprintf(&devpath,
2617 	    "/virtual-devices@100/channel-devices@200/network@%d", id);
2618 	if (id == 0)
2619 		guest_add_devalias(guest, "net", devpath);
2620 	guest_add_devalias(guest, devalias, devpath);
2621 	if (user_devalias != NULL)
2622 		guest_add_devalias(guest, user_devalias, devpath);
2623 	free(devalias);
2624 	free(devpath);
2625 }
2626 
2627 void
2628 guest_add_variable(struct guest *guest, const char *name, const char *str)
2629 {
2630 	struct md *md = guest->md;
2631 	struct md_node *parent;
2632 	struct md_node *node;
2633 
2634 	node = md_find_node(md, "variables");
2635 	if (node == NULL) {
2636 		parent = md_find_node(md, "root");
2637 		assert(parent);
2638 
2639 		node = md_add_node(md, "variables");
2640 		md_link_node(md, parent, node);
2641 	}
2642 
2643 	md_add_prop_str(md, node, name, str);
2644 }
2645 
2646 void
2647 guest_add_iodev(struct guest *guest, const char *path)
2648 {
2649 	struct component *component;
2650 	struct subdevice *subdevice;
2651 
2652 	if (!directio_capability)
2653 		errx(1, "direct I/O not supported by hypervisor");
2654 
2655 	TAILQ_FOREACH(component, &components, link) {
2656 		if (strcmp(component->path, path) == 0)
2657 			break;
2658 	}
2659 
2660 	if (component == NULL)
2661 		errx(1, "incorrect device path %s", path);
2662 	if (component->assigned)
2663 		errx(1, "device path %s already assigned", path);
2664 
2665 	subdevice = xzalloc(sizeof(*subdevice));
2666 	subdevice->path = path;
2667 	TAILQ_INSERT_TAIL(&guest->subdevice_list, subdevice, link);
2668 	component->assigned = 1;
2669 }
2670 
2671 struct cpu *
2672 guest_find_cpu(struct guest *guest, uint64_t pid)
2673 {
2674 	struct cpu *cpu;
2675 
2676 	TAILQ_FOREACH(cpu, &guest->cpu_list, link)
2677 		if (cpu->pid == pid)
2678 			return cpu;
2679 
2680 	return NULL;
2681 }
2682 
2683 void
2684 guest_finalize(struct guest *guest)
2685 {
2686 	struct md *md = guest->md;
2687 	struct md_node *node, *node2;
2688 	struct md_prop *prop, *prop2;
2689 	struct mblock *mblock;
2690 	struct md_node *parent;
2691 	struct md_node *child;
2692 	struct cpu *cpu;
2693 	uint64_t pid;
2694 	const char *name;
2695 	char *path;
2696 
2697 	node = md_find_node(md, "cpus");
2698 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2699 		if (prop->tag == MD_PROP_ARC &&
2700 		    strcmp(prop->name->str, "fwd") == 0) {
2701 			node2 = prop->d.arc.node;
2702 			if (!md_get_prop_val(md, node2, "pid", &pid))
2703 				if (!md_get_prop_val(md, node2, "id", &pid))
2704 					continue;
2705 			cpu = guest_find_cpu(guest, pid);
2706 			if (cpu == NULL) {
2707 				md_delete_node(md, node2);
2708 				continue;
2709 			}
2710 			md_set_prop_val(md, node2, "id", cpu->vid);
2711 		}
2712 	}
2713 
2714 	/*
2715 	 * We don't support crypto units yet, so delete any "ncp" and
2716 	 * "n2cp" nodes.  If we don't, Solaris whines about not being
2717 	 * able to configure crypto work queues.
2718 	 */
2719 	node = md_find_node(md, "virtual-devices");
2720 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2721 		if (prop->tag == MD_PROP_ARC &&
2722 		    strcmp(prop->name->str, "fwd") == 0) {
2723 			node2 = prop->d.arc.node;
2724 			if (!md_get_prop_str(md, node2, "name", &name))
2725 				continue;
2726 			if (strcmp(name, "ncp") == 0)
2727 				md_delete_node(md, node2);
2728 			if (strcmp(name, "n2cp") == 0)
2729 				md_delete_node(md, node2);
2730 		}
2731 	}
2732 
2733 	node = md_find_node(md, "memory");
2734 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2735 		if (prop->tag == MD_PROP_ARC &&
2736 		    strcmp(prop->name->str, "fwd") == 0) {
2737 			node2 = prop->d.arc.node;
2738 			md_delete_node(md, node2);
2739 		}
2740 	}
2741 
2742 	if (strcmp(guest->name, "primary") != 0)
2743 		guest_prune_phys_io(guest);
2744 	guest_fixup_phys_io(guest);
2745 
2746 	md_collect_garbage(md);
2747 
2748 	parent = md_find_node(md, "memory");
2749 	TAILQ_FOREACH(mblock, &guest->mblock_list, link) {
2750 		child = md_add_node(md, "mblock");
2751 		md_add_prop_val(md, child, "base", mblock->realbase);
2752 		md_add_prop_val(md, child, "size", mblock->memsize);
2753 		md_link_node(md, parent, child);
2754 	}
2755 
2756 	xasprintf(&path, "%s.md", guest->name);
2757 	md_write(guest->md, path);
2758 	free(path);
2759 }
2760 
2761 struct guest *
2762 primary_init(void)
2763 {
2764 	struct guest *guest;
2765 
2766 	guest = guest_lookup("primary");
2767 	assert(guest);
2768 
2769 	guest_set_domaining_enabled(guest);
2770 
2771 	return guest;
2772 }
2773 
2774 void
2775 build_config(const char *filename, int noaction)
2776 {
2777 	struct guest *primary;
2778 	struct guest *guest;
2779 	struct ldc_endpoint *endpoint;
2780 	struct component *component;
2781 	uint64_t resource_id;
2782 	int i;
2783 
2784 	struct ldom_config conf;
2785 	struct domain *domain;
2786 	struct vdisk *vdisk;
2787 	struct vnet *vnet;
2788 	struct var *var;
2789 	struct iodev *iodev;
2790 	uint64_t num_cpus = 0, primary_num_cpus = 0;
2791 	uint64_t primary_stride = 1;
2792 	uint64_t memory = 0, primary_memory = 0;
2793 
2794 	SIMPLEQ_INIT(&conf.domain_list);
2795 	if (parse_config(filename, &conf) < 0)
2796 		exit(1);
2797 
2798 	pri = md_read("pri");
2799 	if (pri == NULL)
2800 		err(1, "unable to get PRI");
2801 	hvmd = md_read("hv.md");
2802 	if (hvmd == NULL)
2803 		err(1, "unable to get Hypervisor MD");
2804 
2805 	pri_init(pri);
2806 	pri_alloc_memory(hv_membase, hv_memsize);
2807 
2808 	SIMPLEQ_FOREACH(domain, &conf.domain_list, entry) {
2809 		if (strcmp(domain->name, "primary") == 0) {
2810 			primary_num_cpus = domain->vcpu;
2811 			primary_stride = domain->vcpu_stride;
2812 			primary_memory = domain->memory;
2813 		}
2814 		num_cpus += (domain->vcpu * domain->vcpu_stride);
2815 		memory += domain->memory;
2816 	}
2817 	if (primary_num_cpus == 0 && total_cpus > num_cpus)
2818 		primary_num_cpus = total_cpus - num_cpus;
2819 	if (primary_memory == 0 && total_memory > memory)
2820 		primary_memory = total_memory - memory;
2821 	if (num_cpus > total_cpus || primary_num_cpus == 0)
2822 		errx(1, "not enough VCPU resources available");
2823 	if (memory > total_memory || primary_memory == 0)
2824 		errx(1, "not enough memory available");
2825 
2826 	if (noaction)
2827 		exit(0);
2828 
2829 	hvmd_init(hvmd);
2830 	primary = primary_init();
2831 
2832 	for (resource_id = 0; resource_id <max_guests; resource_id++)
2833 		if (guests[resource_id] &&
2834 		    strcmp(guests[resource_id]->name, "primary") != 0)
2835 			guest_delete(guests[resource_id]);
2836 
2837 	primary->endpoint_id = 0;
2838 	TAILQ_FOREACH(endpoint, &primary->endpoint_list, link) {
2839 		if (endpoint->channel >= primary->endpoint_id)
2840 			primary->endpoint_id = endpoint->channel + 1;
2841 	}
2842 
2843 	for (i = 0; i < max_cpus; i++)
2844 		guest_delete_cpu(primary, i);
2845 	for (i = 0; i < primary_num_cpus; i++)
2846 		guest_add_cpu(primary, primary_stride);
2847 	guest_delete_memory(primary);
2848 	guest_add_memory(primary, -1, primary_memory);
2849 
2850 	SIMPLEQ_FOREACH(domain, &conf.domain_list, entry) {
2851 		if (strcmp(domain->name, "primary") != 0)
2852 			continue;
2853 		SIMPLEQ_FOREACH(var, &domain->var_list, entry)
2854 			guest_add_variable(primary, var->name, var->str);
2855 	}
2856 
2857 	SIMPLEQ_FOREACH(domain, &conf.domain_list, entry) {
2858 		if (strcmp(domain->name, "primary") == 0)
2859 			continue;
2860 		guest = guest_create(domain->name);
2861 		for (i = 0; i < domain->vcpu; i++)
2862 			guest_add_cpu(guest, domain->vcpu_stride);
2863 		guest_add_memory(guest, -1, domain->memory);
2864 		i = 0;
2865 		SIMPLEQ_FOREACH(vdisk, &domain->vdisk_list, entry)
2866 			guest_add_vdisk(guest, i++, vdisk->path,
2867 			    vdisk->devalias);
2868 		i = 0;
2869 		SIMPLEQ_FOREACH(vnet, &domain->vnet_list, entry)
2870 			guest_add_vnetwork(guest, i++, vnet->mac_addr,
2871 			    vnet->mtu, vnet->devalias);
2872 		SIMPLEQ_FOREACH(var, &domain->var_list, entry)
2873 			guest_add_variable(guest, var->name, var->str);
2874 		SIMPLEQ_FOREACH(iodev, &domain->iodev_list, entry)
2875 			guest_add_iodev(guest, iodev->path);
2876 
2877 		guest_finalize(guest);
2878 	}
2879 
2880 	TAILQ_FOREACH(component, &components, link) {
2881 		if (component->assigned)
2882 			continue;
2883 		guest_add_iodev(primary, component->path);
2884 	}
2885 
2886 	guest_finalize(primary);
2887 	hvmd_finalize();
2888 }
2889 
2890 void
2891 list_components(void)
2892 {
2893 	struct component *component;
2894 
2895 	pri = md_read("pri");
2896 	if (pri == NULL)
2897 		err(1, "unable to get PRI");
2898 
2899 	pri_init_components(pri);
2900 
2901 	printf("%-16s %s\n", "PATH", "NAME");
2902 	TAILQ_FOREACH(component, &components, link) {
2903 		printf("%-16s %s\n", component->path, component->nac);
2904 	}
2905 }
2906