xref: /openbsd/sys/dev/pv/xen.c (revision c929981a)
1 /*	$OpenBSD: xen.c,v 1.82 2017/06/02 20:25:50 mikeb Exp $	*/
2 
3 /*
4  * Copyright (c) 2015, 2016, 2017 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 
21 /* Xen requires locked atomic operations */
22 #ifndef MULTIPROCESSOR
23 #define _XENMPATOMICS
24 #define MULTIPROCESSOR
25 #endif
26 #include <sys/atomic.h>
27 #ifdef _XENMPATOMICS
28 #undef MULTIPROCESSOR
29 #undef _XENMPATOMICS
30 #endif
31 
32 #include <sys/systm.h>
33 #include <sys/proc.h>
34 #include <sys/signal.h>
35 #include <sys/signalvar.h>
36 #include <sys/refcnt.h>
37 #include <sys/malloc.h>
38 #include <sys/kernel.h>
39 #include <sys/stdint.h>
40 #include <sys/device.h>
41 #include <sys/task.h>
42 #include <sys/syslog.h>
43 
44 #include <machine/bus.h>
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 
48 #include <uvm/uvm_extern.h>
49 
50 #include <machine/i82489var.h>
51 
52 #include <dev/rndvar.h>
53 
54 #include <dev/pv/pvvar.h>
55 #include <dev/pv/pvreg.h>
56 #include <dev/pv/xenreg.h>
57 #include <dev/pv/xenvar.h>
58 
59 /* #define XEN_DEBUG */
60 
61 #ifdef XEN_DEBUG
62 #define DPRINTF(x...)		printf(x)
63 #else
64 #define DPRINTF(x...)
65 #endif
66 
67 struct xen_softc *xen_sc;
68 
69 int	xen_init_hypercall(struct xen_softc *);
70 int	xen_getfeatures(struct xen_softc *);
71 int	xen_init_info_page(struct xen_softc *);
72 int	xen_init_cbvec(struct xen_softc *);
73 int	xen_init_interrupts(struct xen_softc *);
74 int	xen_init_grant_tables(struct xen_softc *);
75 struct xen_gntent *
76 	xen_grant_table_grow(struct xen_softc *);
77 int	xen_grant_table_alloc(struct xen_softc *, grant_ref_t *);
78 void	xen_grant_table_free(struct xen_softc *, grant_ref_t);
79 void	xen_grant_table_enter(struct xen_softc *, grant_ref_t, paddr_t,
80 	    int, int);
81 void	xen_grant_table_remove(struct xen_softc *, grant_ref_t);
82 void	xen_disable_emulated_devices(struct xen_softc *);
83 
84 int 	xen_match(struct device *, void *, void *);
85 void	xen_attach(struct device *, struct device *, void *);
86 void	xen_deferred(struct device *);
87 void	xen_control(void *);
88 void	xen_hotplug(void *);
89 void	xen_resume(struct device *);
90 int	xen_activate(struct device *, int);
91 int	xen_attach_device(struct xen_softc *, struct xen_devlist *,
92 	    const char *, const char *);
93 int	xen_probe_devices(struct xen_softc *);
94 
95 int	xen_bus_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
96 	    bus_size_t, int, bus_dmamap_t *);
97 void	xen_bus_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
98 int	xen_bus_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
99 	    struct proc *, int);
100 int	xen_bus_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
101 	    int);
102 void	xen_bus_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
103 void	xen_bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
104 	    bus_size_t, int);
105 
106 int	xs_attach(struct xen_softc *);
107 
108 struct cfdriver xen_cd = {
109 	NULL, "xen", DV_DULL
110 };
111 
112 const struct cfattach xen_ca = {
113 	sizeof(struct xen_softc), xen_match, xen_attach, NULL, xen_activate
114 };
115 
116 struct bus_dma_tag xen_bus_dma_tag = {
117 	NULL,
118 	xen_bus_dmamap_create,
119 	xen_bus_dmamap_destroy,
120 	xen_bus_dmamap_load,
121 	xen_bus_dmamap_load_mbuf,
122 	NULL,
123 	NULL,
124 	xen_bus_dmamap_unload,
125 	xen_bus_dmamap_sync,
126 	_bus_dmamem_alloc,
127 	NULL,
128 	_bus_dmamem_free,
129 	_bus_dmamem_map,
130 	_bus_dmamem_unmap,
131 	NULL,
132 };
133 
134 int
135 xen_match(struct device *parent, void *match, void *aux)
136 {
137 	struct pv_attach_args *pva = aux;
138 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
139 
140 	if (hv->hv_base == 0)
141 		return (0);
142 
143 	return (1);
144 }
145 
146 void
147 xen_attach(struct device *parent, struct device *self, void *aux)
148 {
149 	struct pv_attach_args *pva = (struct pv_attach_args *)aux;
150 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
151 	struct xen_softc *sc = (struct xen_softc *)self;
152 
153 	sc->sc_base = hv->hv_base;
154 
155 	if (xen_init_hypercall(sc))
156 		return;
157 
158 	/* Wire it up to the global */
159 	xen_sc = sc;
160 
161 	if (xen_getfeatures(sc))
162 		return;
163 
164 	if (xen_init_info_page(sc))
165 		return;
166 
167 	xen_init_cbvec(sc);
168 
169 	if (xen_init_interrupts(sc))
170 		return;
171 
172 	if (xen_init_grant_tables(sc))
173 		return;
174 
175 	if (xs_attach(sc))
176 		return;
177 
178 	xen_probe_devices(sc);
179 
180 	/* pvbus(4) key/value interface */
181 	hv->hv_kvop = xs_kvop;
182 	hv->hv_arg = sc;
183 
184 	xen_disable_emulated_devices(sc);
185 
186 	config_mountroot(self, xen_deferred);
187 }
188 
189 void
190 xen_deferred(struct device *self)
191 {
192 	struct xen_softc *sc = (struct xen_softc *)self;
193 
194 	if (!(sc->sc_flags & XSF_CBVEC)) {
195 		DPRINTF("%s: callback vector hasn't been established\n",
196 		    sc->sc_dev.dv_xname);
197 		return;
198 	}
199 
200 	xen_intr_enable();
201 
202 	if (xs_watch(sc, "control", "shutdown", &sc->sc_ctltsk,
203 	    xen_control, sc))
204 		printf("%s: failed to setup shutdown control watch\n",
205 		    sc->sc_dev.dv_xname);
206 }
207 
208 void
209 xen_control(void *arg)
210 {
211 	struct xen_softc *sc = arg;
212 	struct xs_transaction xst;
213 	char action[128];
214 	int error;
215 
216 	memset(&xst, 0, sizeof(xst));
217 	xst.xst_id = 0;
218 	xst.xst_cookie = sc->sc_xs;
219 
220 	error = xs_getprop(sc, "control", "shutdown", action, sizeof(action));
221 	if (error) {
222 		if (error != ENOENT)
223 			printf("%s: failed to process control event\n",
224 			    sc->sc_dev.dv_xname);
225 		return;
226 	}
227 
228 	if (strlen(action) == 0)
229 		return;
230 
231 	/* Acknowledge the event */
232 	xs_setprop(sc, "control", "shutdown", "", 0);
233 
234 	if (strcmp(action, "halt") == 0 || strcmp(action, "poweroff") == 0) {
235 		pvbus_shutdown(&sc->sc_dev);
236 	} else if (strcmp(action, "reboot") == 0) {
237 		pvbus_reboot(&sc->sc_dev);
238 	} else if (strcmp(action, "crash") == 0) {
239 		panic("xen told us to do this");
240 	} else if (strcmp(action, "suspend") == 0) {
241 		/* Not implemented yet */
242 	} else {
243 		printf("%s: unknown shutdown event \"%s\"\n",
244 		    sc->sc_dev.dv_xname, action);
245 	}
246 }
247 
248 void
249 xen_resume(struct device *self)
250 {
251 }
252 
253 int
254 xen_activate(struct device *self, int act)
255 {
256 	int rv = 0;
257 
258 	switch (act) {
259 	case DVACT_RESUME:
260 		xen_resume(self);
261 		break;
262 	}
263 	return (rv);
264 }
265 
266 int
267 xen_init_hypercall(struct xen_softc *sc)
268 {
269 	extern void *xen_hypercall_page;
270 	uint32_t regs[4];
271 	paddr_t pa;
272 
273 	/* Get hypercall page configuration MSR */
274 	CPUID(sc->sc_base + CPUID_OFFSET_XEN_HYPERCALL,
275 	    regs[0], regs[1], regs[2], regs[3]);
276 
277 	/* We don't support more than one hypercall page */
278 	if (regs[0] != 1) {
279 		printf(": requested %u hypercall pages\n", regs[0]);
280 		return (-1);
281 	}
282 
283 	sc->sc_hc = &xen_hypercall_page;
284 
285 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_hc, &pa)) {
286 		printf(": hypercall page PA extraction failed\n");
287 		return (-1);
288 	}
289 	wrmsr(regs[1], pa);
290 
291 	return (0);
292 }
293 
294 int
295 xen_hypercall(struct xen_softc *sc, int op, int argc, ...)
296 {
297 	va_list ap;
298 	ulong argv[5];
299 	int i;
300 
301 	if (argc < 0 || argc > 5)
302 		return (-1);
303 	va_start(ap, argc);
304 	for (i = 0; i < argc; i++)
305 		argv[i] = (ulong)va_arg(ap, ulong);
306 	return (xen_hypercallv(sc, op, argc, argv));
307 }
308 
309 int
310 xen_hypercallv(struct xen_softc *sc, int op, int argc, ulong *argv)
311 {
312 	ulong hcall;
313 	int rv = 0;
314 
315 	hcall = (ulong)sc->sc_hc + op * 32;
316 
317 #if defined(XEN_DEBUG) && disabled
318 	{
319 		int i;
320 
321 		printf("hypercall %d", op);
322 		if (argc > 0) {
323 			printf(", args {");
324 			for (i = 0; i < argc; i++)
325 				printf(" %#lx", argv[i]);
326 			printf(" }\n");
327 		} else
328 			printf("\n");
329 	}
330 #endif
331 
332 	switch (argc) {
333 	case 0: {
334 		HYPERCALL_RES1;
335 		__asm__ volatile (			\
336 			  HYPERCALL_LABEL		\
337 			: HYPERCALL_OUT1		\
338 			: HYPERCALL_PTR(hcall)		\
339 			: HYPERCALL_CLOBBER		\
340 		);
341 		HYPERCALL_RET(rv);
342 		break;
343 	}
344 	case 1: {
345 		HYPERCALL_RES1; HYPERCALL_RES2;
346 		HYPERCALL_ARG1(argv[0]);
347 		__asm__ volatile (			\
348 			  HYPERCALL_LABEL		\
349 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
350 			: HYPERCALL_IN1			\
351 			, HYPERCALL_PTR(hcall)		\
352 			: HYPERCALL_CLOBBER		\
353 		);
354 		HYPERCALL_RET(rv);
355 		break;
356 	}
357 	case 2: {
358 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
359 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
360 		__asm__ volatile (			\
361 			  HYPERCALL_LABEL		\
362 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
363 			  HYPERCALL_OUT3		\
364 			: HYPERCALL_IN1	HYPERCALL_IN2	\
365 			, HYPERCALL_PTR(hcall)		\
366 			: HYPERCALL_CLOBBER		\
367 		);
368 		HYPERCALL_RET(rv);
369 		break;
370 	}
371 	case 3: {
372 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
373 		HYPERCALL_RES4;
374 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
375 		HYPERCALL_ARG3(argv[2]);
376 		__asm__ volatile (			\
377 			  HYPERCALL_LABEL		\
378 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
379 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
380 			: HYPERCALL_IN1	HYPERCALL_IN2	\
381 			  HYPERCALL_IN3			\
382 			, HYPERCALL_PTR(hcall)		\
383 			: HYPERCALL_CLOBBER		\
384 		);
385 		HYPERCALL_RET(rv);
386 		break;
387 	}
388 	case 4: {
389 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
390 		HYPERCALL_RES4; HYPERCALL_RES5;
391 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
392 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
393 		__asm__ volatile (			\
394 			  HYPERCALL_LABEL		\
395 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
396 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
397 			  HYPERCALL_OUT5		\
398 			: HYPERCALL_IN1	HYPERCALL_IN2	\
399 			  HYPERCALL_IN3	HYPERCALL_IN4	\
400 			, HYPERCALL_PTR(hcall)		\
401 			: HYPERCALL_CLOBBER		\
402 		);
403 		HYPERCALL_RET(rv);
404 		break;
405 	}
406 	case 5: {
407 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
408 		HYPERCALL_RES4; HYPERCALL_RES5; HYPERCALL_RES6;
409 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
410 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
411 		HYPERCALL_ARG5(argv[4]);
412 		__asm__ volatile (			\
413 			  HYPERCALL_LABEL		\
414 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
415 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
416 			  HYPERCALL_OUT5 HYPERCALL_OUT6	\
417 			: HYPERCALL_IN1	HYPERCALL_IN2	\
418 			  HYPERCALL_IN3	HYPERCALL_IN4	\
419 			  HYPERCALL_IN5			\
420 			, HYPERCALL_PTR(hcall)		\
421 			: HYPERCALL_CLOBBER		\
422 		);
423 		HYPERCALL_RET(rv);
424 		break;
425 	}
426 	default:
427 		DPRINTF("%s: wrong number of arguments: %d\n", __func__, argc);
428 		rv = -1;
429 		break;
430 	}
431 	return (rv);
432 }
433 
434 int
435 xen_getfeatures(struct xen_softc *sc)
436 {
437 	struct xen_feature_info xfi;
438 
439 	memset(&xfi, 0, sizeof(xfi));
440 	if (xen_hypercall(sc, XC_VERSION, 2, XENVER_get_features, &xfi) < 0) {
441 		printf(": failed to fetch features\n");
442 		return (-1);
443 	}
444 	sc->sc_features = xfi.submap;
445 #ifdef XEN_DEBUG
446 	printf(": features %b", sc->sc_features,
447 	    "\20\014DOM0\013PIRQ\012PVCLOCK\011CBVEC\010GNTFLAGS\007HMA"
448 	    "\006PTUPD\005PAE4G\004SUPERVISOR\003AUTOPMAP\002WDT\001WPT");
449 #else
450 	printf(": features %#x", sc->sc_features);
451 #endif
452 	return (0);
453 }
454 
455 #ifdef XEN_DEBUG
456 void
457 xen_print_info_page(void)
458 {
459 	struct xen_softc *sc = xen_sc;
460 	struct shared_info *s = sc->sc_ipg;
461 	struct vcpu_info *v;
462 	int i;
463 
464 	virtio_membar_sync();
465 	for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
466 		v = &s->vcpu_info[i];
467 		if (!v->evtchn_upcall_pending && !v->evtchn_upcall_mask &&
468 		    !v->evtchn_pending_sel && !v->time.version &&
469 		    !v->time.tsc_timestamp && !v->time.system_time &&
470 		    !v->time.tsc_to_system_mul && !v->time.tsc_shift)
471 			continue;
472 		printf("vcpu%d:\n"
473 		    "   upcall_pending=%02x upcall_mask=%02x pending_sel=%#lx\n"
474 		    "   time version=%u tsc=%llu system=%llu\n"
475 		    "   time mul=%u shift=%d\n",
476 		    i, v->evtchn_upcall_pending, v->evtchn_upcall_mask,
477 		    v->evtchn_pending_sel, v->time.version,
478 		    v->time.tsc_timestamp, v->time.system_time,
479 		    v->time.tsc_to_system_mul, v->time.tsc_shift);
480 	}
481 	printf("pending events: ");
482 	for (i = 0; i < nitems(s->evtchn_pending); i++) {
483 		if (s->evtchn_pending[i] == 0)
484 			continue;
485 		printf(" %d:%#lx", i, s->evtchn_pending[i]);
486 	}
487 	printf("\nmasked events: ");
488 	for (i = 0; i < nitems(s->evtchn_mask); i++) {
489 		if (s->evtchn_mask[i] == 0xffffffffffffffffULL)
490 			continue;
491 		printf(" %d:%#lx", i, s->evtchn_mask[i]);
492 	}
493 	printf("\nwc ver=%u sec=%u nsec=%u\n", s->wc_version, s->wc_sec,
494 	    s->wc_nsec);
495 	printf("arch maxpfn=%lu framelist=%lu nmi=%lu\n", s->arch.max_pfn,
496 	    s->arch.pfn_to_mfn_frame_list, s->arch.nmi_reason);
497 }
498 #endif	/* XEN_DEBUG */
499 
500 int
501 xen_init_info_page(struct xen_softc *sc)
502 {
503 	struct xen_add_to_physmap xatp;
504 	paddr_t pa;
505 
506 	sc->sc_ipg = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
507 	if (sc->sc_ipg == NULL) {
508 		printf(": failed to allocate shared info page\n");
509 		return (-1);
510 	}
511 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_ipg, &pa)) {
512 		printf(": shared info page PA extraction failed\n");
513 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
514 		return (-1);
515 	}
516 	xatp.domid = DOMID_SELF;
517 	xatp.idx = 0;
518 	xatp.space = XENMAPSPACE_shared_info;
519 	xatp.gpfn = atop(pa);
520 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
521 		printf(": failed to register shared info page\n");
522 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
523 		return (-1);
524 	}
525 	return (0);
526 }
527 
528 int
529 xen_init_cbvec(struct xen_softc *sc)
530 {
531 	struct xen_hvm_param xhp;
532 
533 	if ((sc->sc_features & XENFEAT_CBVEC) == 0)
534 		return (ENOENT);
535 
536 	xhp.domid = DOMID_SELF;
537 	xhp.index = HVM_PARAM_CALLBACK_IRQ;
538 	xhp.value = HVM_CALLBACK_VECTOR(LAPIC_XEN_VECTOR);
539 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_set_param, &xhp)) {
540 		/* Will retry with the xspd(4) PCI interrupt */
541 		return (ENOENT);
542 	}
543 	DPRINTF(", idtvec %d", LAPIC_XEN_VECTOR);
544 
545 	sc->sc_flags |= XSF_CBVEC;
546 
547 	return (0);
548 }
549 
550 int
551 xen_init_interrupts(struct xen_softc *sc)
552 {
553 	int i;
554 
555 	sc->sc_irq = LAPIC_XEN_VECTOR;
556 
557 	/*
558 	 * Clear all pending events and mask all interrupts
559 	 */
560 	for (i = 0; i < nitems(sc->sc_ipg->evtchn_pending); i++) {
561 		sc->sc_ipg->evtchn_pending[i] = 0;
562 		sc->sc_ipg->evtchn_mask[i] = ~0UL;
563 	}
564 
565 	SLIST_INIT(&sc->sc_intrs);
566 
567 	mtx_init(&sc->sc_islck, IPL_NET);
568 
569 	return (0);
570 }
571 
572 static int
573 xen_evtchn_hypercall(struct xen_softc *sc, int cmd, void *arg, size_t len)
574 {
575 	struct evtchn_op compat;
576 	int error;
577 
578 	error = xen_hypercall(sc, XC_EVTCHN, 2, cmd, arg);
579 	if (error == -ENOXENSYS) {
580 		memset(&compat, 0, sizeof(compat));
581 		compat.cmd = cmd;
582 		memcpy(&compat.u, arg, len);
583 		error = xen_hypercall(sc, XC_OEVTCHN, 1, &compat);
584 	}
585 	return (error);
586 }
587 
588 static inline void
589 xen_intsrc_add(struct xen_softc *sc, struct xen_intsrc *xi)
590 {
591 	refcnt_init(&xi->xi_refcnt);
592 	mtx_enter(&sc->sc_islck);
593 	SLIST_INSERT_HEAD(&sc->sc_intrs, xi, xi_entry);
594 	mtx_leave(&sc->sc_islck);
595 }
596 
597 static inline struct xen_intsrc *
598 xen_intsrc_acquire(struct xen_softc *sc, evtchn_port_t port)
599 {
600 	struct xen_intsrc *xi;
601 
602 	mtx_enter(&sc->sc_islck);
603 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
604 		if (xi->xi_port == port) {
605 			refcnt_take(&xi->xi_refcnt);
606 			break;
607 		}
608 	}
609 	mtx_leave(&sc->sc_islck);
610 	return (xi);
611 }
612 
613 static inline void
614 xen_intsrc_release(struct xen_softc *sc, struct xen_intsrc *xi)
615 {
616 	refcnt_rele_wake(&xi->xi_refcnt);
617 }
618 
619 static inline struct xen_intsrc *
620 xen_intsrc_remove(struct xen_softc *sc, evtchn_port_t port)
621 {
622 	struct xen_intsrc *xi;
623 
624 	mtx_enter(&sc->sc_islck);
625 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
626 		if (xi->xi_port == port) {
627 			SLIST_REMOVE(&sc->sc_intrs, xi, xen_intsrc, xi_entry);
628 			break;
629 		}
630 	}
631 	mtx_leave(&sc->sc_islck);
632 	if (xi != NULL)
633 		refcnt_finalize(&xi->xi_refcnt, "xenisrm");
634 	return (xi);
635 }
636 
637 void
638 xen_intr_ack(void)
639 {
640 	struct xen_softc *sc = xen_sc;
641 	struct shared_info *s = sc->sc_ipg;
642 	struct cpu_info *ci = curcpu();
643 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
644 
645 	v->evtchn_upcall_pending = 0;
646 	virtio_membar_sync();
647 }
648 
649 void
650 xen_intr(void)
651 {
652 	struct xen_softc *sc = xen_sc;
653 	struct xen_intsrc *xi;
654 	struct shared_info *s = sc->sc_ipg;
655 	struct cpu_info *ci = curcpu();
656 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
657 	ulong pending, selector;
658 	int port, bit, row;
659 
660 	v->evtchn_upcall_pending = 0;
661 	selector = atomic_swap_ulong(&v->evtchn_pending_sel, 0);
662 
663 	for (row = 0; selector > 0; selector >>= 1, row++) {
664 		if ((selector & 1) == 0)
665 			continue;
666 		if ((sc->sc_ipg->evtchn_pending[row] &
667 		    ~(sc->sc_ipg->evtchn_mask[row])) == 0)
668 			continue;
669 		pending = atomic_swap_ulong(&sc->sc_ipg->evtchn_pending[row],
670 		    0) & ~(sc->sc_ipg->evtchn_mask[row]);
671 		for (bit = 0; pending > 0; pending >>= 1, bit++) {
672 			if ((pending & 1) == 0)
673 				continue;
674 			port = (row * LONG_BIT) + bit;
675 			if ((xi = xen_intsrc_acquire(sc, port)) == NULL) {
676 				printf("%s: unhandled interrupt on port %d\n",
677 				    sc->sc_dev.dv_xname, port);
678 				continue;
679 			}
680 			xi->xi_evcnt.ec_count++;
681 			task_add(xi->xi_taskq, &xi->xi_task);
682 			xen_intsrc_release(sc, xi);
683 		}
684 	}
685 }
686 
687 void
688 xen_intr_schedule(xen_intr_handle_t xih)
689 {
690 	struct xen_softc *sc = xen_sc;
691 	struct xen_intsrc *xi;
692 
693 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
694 		task_add(xi->xi_taskq, &xi->xi_task);
695 		xen_intsrc_release(sc, xi);
696 	}
697 }
698 
699 static void
700 xen_barrier_task(void *arg)
701 {
702 	int *notdone = arg;
703 
704 	*notdone = 0;
705 	wakeup_one(notdone);
706 }
707 
708 /*
709  * This code achieves two goals: 1) makes sure that *after* masking
710  * the interrupt source we're not getting more task_adds: intr_barrier
711  * will take care of that, and 2) makes sure that the interrupt task
712  * has finished executing the current task and won't be called again:
713  * it sets up a barrier task to await completion of the current task
714  * and relies on the interrupt masking to prevent submission of new
715  * tasks in the future.
716  */
717 void
718 xen_intr_barrier(xen_intr_handle_t xih)
719 {
720 	struct xen_softc *sc = xen_sc;
721 	struct xen_intsrc *xi;
722 	struct sleep_state sls;
723 	int notdone = 1;
724 	struct task t = TASK_INITIALIZER(xen_barrier_task, &notdone);
725 
726 	/*
727 	 * XXX This will need to be revised once intr_barrier starts
728 	 * using its argument.
729 	 */
730 	intr_barrier(NULL);
731 
732 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
733 		task_add(xi->xi_taskq, &t);
734 		while (notdone) {
735 			sleep_setup(&sls, &notdone, PWAIT, "xenbar");
736 			sleep_finish(&sls, notdone);
737 		}
738 		xen_intsrc_release(sc, xi);
739 	}
740 }
741 
742 void
743 xen_intr_signal(xen_intr_handle_t xih)
744 {
745 	struct xen_softc *sc = xen_sc;
746 	struct xen_intsrc *xi;
747 	struct evtchn_send es;
748 
749 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
750 		es.port = xi->xi_port;
751 		xen_intsrc_release(sc, xi);
752 		xen_evtchn_hypercall(sc, EVTCHNOP_send, &es, sizeof(es));
753 	}
754 }
755 
756 int
757 xen_intr_establish(evtchn_port_t port, xen_intr_handle_t *xih, int domain,
758     void (*handler)(void *), void *arg, char *name)
759 {
760 	struct xen_softc *sc = xen_sc;
761 	struct xen_intsrc *xi;
762 	struct evtchn_alloc_unbound eau;
763 #ifdef notyet
764 	struct evtchn_bind_vcpu ebv;
765 #endif
766 #if defined(XEN_DEBUG) && disabled
767 	struct evtchn_status es;
768 #endif
769 
770 	if (port && (xi = xen_intsrc_acquire(sc, port)) != NULL) {
771 		xen_intsrc_release(sc, xi);
772 		DPRINTF("%s: interrupt handler has already been established "
773 		    "for port %u\n", sc->sc_dev.dv_xname, port);
774 		return (-1);
775 	}
776 
777 	xi = malloc(sizeof(*xi), M_DEVBUF, M_NOWAIT | M_ZERO);
778 	if (xi == NULL)
779 		return (-1);
780 
781 	xi->xi_port = (evtchn_port_t)*xih;
782 
783 	xi->xi_taskq = taskq_create(name, 1, IPL_NET, TASKQ_MPSAFE);
784 	if (!xi->xi_taskq) {
785 		printf("%s: failed to create interrupt task for %s\n",
786 		    sc->sc_dev.dv_xname, name);
787 		free(xi, M_DEVBUF, sizeof(*xi));
788 		return (-1);
789 	}
790 	task_set(&xi->xi_task, handler, arg);
791 
792 	if (port == 0) {
793 		/* We're being asked to allocate a new event port */
794 		memset(&eau, 0, sizeof(eau));
795 		eau.dom = DOMID_SELF;
796 		eau.remote_dom = domain;
797 		if (xen_evtchn_hypercall(sc, EVTCHNOP_alloc_unbound, &eau,
798 		    sizeof(eau)) != 0) {
799 			DPRINTF("%s: failed to allocate new event port\n",
800 			    sc->sc_dev.dv_xname);
801 			free(xi, M_DEVBUF, sizeof(*xi));
802 			return (-1);
803 		}
804 		*xih = xi->xi_port = eau.port;
805 	} else {
806 		*xih = xi->xi_port = port;
807 		/*
808 		 * The Event Channel API didn't open this port, so it is not
809 		 * responsible for closing it automatically on unbind.
810 		 */
811 		xi->xi_noclose = 1;
812 	}
813 
814 #ifdef notyet
815 	/* Bind interrupt to VCPU#0 */
816 	memset(&ebv, 0, sizeof(ebv));
817 	ebv.port = xi->xi_port;
818 	ebv.vcpu = 0;
819 	if (xen_evtchn_hypercall(sc, EVTCHNOP_bind_vcpu, &ebv, sizeof(ebv))) {
820 		printf("%s: failed to bind interrupt on port %u to vcpu%d\n",
821 		    sc->sc_dev.dv_xname, ebv.port, ebv.vcpu);
822 	}
823 #endif
824 
825 	evcount_attach(&xi->xi_evcnt, name, &sc->sc_irq);
826 
827 	xen_intsrc_add(sc, xi);
828 
829 	/* Mask the event port */
830 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
831 
832 #if defined(XEN_DEBUG) && disabled
833 	memset(&es, 0, sizeof(es));
834 	es.dom = DOMID_SELF;
835 	es.port = xi->xi_port;
836 	if (xen_evtchn_hypercall(sc, EVTCHNOP_status, &es, sizeof(es))) {
837 		printf("%s: failed to obtain status for port %d\n",
838 		    sc->sc_dev.dv_xname, es.port);
839 	}
840 	printf("%s: port %u bound to vcpu%u", sc->sc_dev.dv_xname,
841 	    es.port, es.vcpu);
842 	if (es.status == EVTCHNSTAT_interdomain)
843 		printf(": domain %d port %u\n", es.u.interdomain.dom,
844 		    es.u.interdomain.port);
845 	else if (es.status == EVTCHNSTAT_unbound)
846 		printf(": domain %d\n", es.u.unbound.dom);
847 	else if (es.status == EVTCHNSTAT_pirq)
848 		printf(": pirq %u\n", es.u.pirq);
849 	else if (es.status == EVTCHNSTAT_virq)
850 		printf(": virq %u\n", es.u.virq);
851 	else
852 		printf("\n");
853 #endif
854 
855 	return (0);
856 }
857 
858 int
859 xen_intr_disestablish(xen_intr_handle_t xih)
860 {
861 	struct xen_softc *sc = xen_sc;
862 	evtchn_port_t port = (evtchn_port_t)xih;
863 	struct evtchn_close ec;
864 	struct xen_intsrc *xi;
865 
866 	if ((xi = xen_intsrc_remove(sc, port)) == NULL)
867 		return (-1);
868 
869 	evcount_detach(&xi->xi_evcnt);
870 
871 	taskq_destroy(xi->xi_taskq);
872 
873 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
874 	clear_bit(xi->xi_port, &sc->sc_ipg->evtchn_pending[0]);
875 
876 	if (!xi->xi_noclose) {
877 		ec.port = xi->xi_port;
878 		if (xen_evtchn_hypercall(sc, EVTCHNOP_close, &ec, sizeof(ec))) {
879 			DPRINTF("%s: failed to close event port %u\n",
880 			    sc->sc_dev.dv_xname, xi->xi_port);
881 		}
882 	}
883 
884 	free(xi, M_DEVBUF, sizeof(*xi));
885 	return (0);
886 }
887 
888 void
889 xen_intr_enable(void)
890 {
891 	struct xen_softc *sc = xen_sc;
892 	struct xen_intsrc *xi;
893 	struct evtchn_unmask eu;
894 
895 	mtx_enter(&sc->sc_islck);
896 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
897 		if (!xi->xi_masked) {
898 			eu.port = xi->xi_port;
899 			if (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu,
900 			    sizeof(eu)))
901 				printf("%s: unmasking port %u failed\n",
902 				    sc->sc_dev.dv_xname, xi->xi_port);
903 			virtio_membar_sync();
904 			if (test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]))
905 				printf("%s: port %u is still masked\n",
906 				    sc->sc_dev.dv_xname, xi->xi_port);
907 		}
908 	}
909 	mtx_leave(&sc->sc_islck);
910 }
911 
912 void
913 xen_intr_mask(xen_intr_handle_t xih)
914 {
915 	struct xen_softc *sc = xen_sc;
916 	evtchn_port_t port = (evtchn_port_t)xih;
917 	struct xen_intsrc *xi;
918 
919 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL) {
920 		xi->xi_masked = 1;
921 		set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
922 		xen_intsrc_release(sc, xi);
923 	}
924 }
925 
926 int
927 xen_intr_unmask(xen_intr_handle_t xih)
928 {
929 	struct xen_softc *sc = xen_sc;
930 	evtchn_port_t port = (evtchn_port_t)xih;
931 	struct xen_intsrc *xi;
932 	struct evtchn_unmask eu;
933 
934 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL) {
935 		xi->xi_masked = 0;
936 		if (!test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]))
937 			return (0);
938 		eu.port = xi->xi_port;
939 		xen_intsrc_release(sc, xi);
940 		return (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu,
941 		    sizeof(eu)));
942 	}
943 	return (0);
944 }
945 
946 int
947 xen_init_grant_tables(struct xen_softc *sc)
948 {
949 	struct gnttab_query_size gqs;
950 
951 	gqs.dom = DOMID_SELF;
952 	if (xen_hypercall(sc, XC_GNTTAB, 3, GNTTABOP_query_size, &gqs, 1)) {
953 		printf(": failed the query for grant table pages\n");
954 		return (-1);
955 	}
956 	if (gqs.nr_frames == 0 || gqs.nr_frames > gqs.max_nr_frames) {
957 		printf(": invalid number of grant table pages: %u/%u\n",
958 		    gqs.nr_frames, gqs.max_nr_frames);
959 		return (-1);
960 	}
961 
962 	sc->sc_gntmax = gqs.max_nr_frames;
963 
964 	sc->sc_gnt = mallocarray(sc->sc_gntmax + 1, sizeof(struct xen_gntent),
965 	    M_DEVBUF, M_ZERO | M_NOWAIT);
966 	if (sc->sc_gnt == NULL) {
967 		printf(": failed to allocate grant table lookup table\n");
968 		return (-1);
969 	}
970 
971 	mtx_init(&sc->sc_gntlck, IPL_NET);
972 
973 	if (xen_grant_table_grow(sc) == NULL) {
974 		free(sc->sc_gnt, M_DEVBUF, sc->sc_gntmax *
975 		    sizeof(struct xen_gntent));
976 		return (-1);
977 	}
978 
979 	printf(", %d grant table frames", sc->sc_gntmax);
980 
981 	xen_bus_dma_tag._cookie = sc;
982 
983 	return (0);
984 }
985 
986 struct xen_gntent *
987 xen_grant_table_grow(struct xen_softc *sc)
988 {
989 	struct xen_add_to_physmap xatp;
990 	struct xen_gntent *ge;
991 	void *va;
992 	paddr_t pa;
993 
994 	if (sc->sc_gntcnt == sc->sc_gntmax) {
995 		printf("%s: grant table frame allotment limit reached\n",
996 		    sc->sc_dev.dv_xname);
997 		return (NULL);
998 	}
999 
1000 	va = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
1001 	if (va == NULL)
1002 		return (NULL);
1003 	if (!pmap_extract(pmap_kernel(), (vaddr_t)va, &pa)) {
1004 		printf("%s: grant table page PA extraction failed\n",
1005 		    sc->sc_dev.dv_xname);
1006 		km_free(va, PAGE_SIZE, &kv_any, &kp_zero);
1007 		return (NULL);
1008 	}
1009 
1010 	mtx_enter(&sc->sc_gntlck);
1011 
1012 	ge = &sc->sc_gnt[sc->sc_gntcnt];
1013 	ge->ge_table = va;
1014 
1015 	xatp.domid = DOMID_SELF;
1016 	xatp.idx = sc->sc_gntcnt;
1017 	xatp.space = XENMAPSPACE_grant_table;
1018 	xatp.gpfn = atop(pa);
1019 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
1020 		printf("%s: failed to add a grant table page\n",
1021 		    sc->sc_dev.dv_xname);
1022 		km_free(ge->ge_table, PAGE_SIZE, &kv_any, &kp_zero);
1023 		mtx_leave(&sc->sc_gntlck);
1024 		return (NULL);
1025 	}
1026 	ge->ge_start = sc->sc_gntcnt * GNTTAB_NEPG;
1027 	/* First page has 8 reserved entries */
1028 	ge->ge_reserved = ge->ge_start == 0 ? GNTTAB_NR_RESERVED_ENTRIES : 0;
1029 	ge->ge_free = GNTTAB_NEPG - ge->ge_reserved;
1030 	ge->ge_next = ge->ge_reserved;
1031 	mtx_init(&ge->ge_lock, IPL_NET);
1032 
1033 	sc->sc_gntcnt++;
1034 	mtx_leave(&sc->sc_gntlck);
1035 
1036 	return (ge);
1037 }
1038 
1039 int
1040 xen_grant_table_alloc(struct xen_softc *sc, grant_ref_t *ref)
1041 {
1042 	struct xen_gntent *ge;
1043 	int i;
1044 
1045 	/* Start with a previously allocated table page */
1046 	ge = &sc->sc_gnt[sc->sc_gntcnt - 1];
1047 	if (ge->ge_free > 0) {
1048 		mtx_enter(&ge->ge_lock);
1049 		if (ge->ge_free > 0)
1050 			goto search;
1051 		mtx_leave(&ge->ge_lock);
1052 	}
1053 
1054 	/* Try other existing table pages */
1055 	for (i = 0; i < sc->sc_gntcnt; i++) {
1056 		ge = &sc->sc_gnt[i];
1057 		if (ge->ge_free == 0)
1058 			continue;
1059 		mtx_enter(&ge->ge_lock);
1060 		if (ge->ge_free > 0)
1061 			goto search;
1062 		mtx_leave(&ge->ge_lock);
1063 	}
1064 
1065  alloc:
1066 	/* Allocate a new table page */
1067 	if ((ge = xen_grant_table_grow(sc)) == NULL)
1068 		return (-1);
1069 
1070 	mtx_enter(&ge->ge_lock);
1071 	if (ge->ge_free == 0) {
1072 		/* We were not fast enough... */
1073 		mtx_leave(&ge->ge_lock);
1074 		goto alloc;
1075 	}
1076 
1077  search:
1078 	for (i = ge->ge_next;
1079 	     /* Math works here because GNTTAB_NEPG is a power of 2 */
1080 	     i != ((ge->ge_next + GNTTAB_NEPG - 1) & (GNTTAB_NEPG - 1));
1081 	     i++) {
1082 		if (i == GNTTAB_NEPG)
1083 			i = 0;
1084 		if (ge->ge_reserved && i < ge->ge_reserved)
1085 			continue;
1086 		if (ge->ge_table[i].frame != 0)
1087 			continue;
1088 		*ref = ge->ge_start + i;
1089 		ge->ge_table[i].flags = GTF_invalid;
1090 		ge->ge_table[i].frame = 0xffffffff; /* Mark as taken */
1091 		if ((ge->ge_next = i + 1) == GNTTAB_NEPG)
1092 			ge->ge_next = ge->ge_reserved;
1093 		ge->ge_free--;
1094 		mtx_leave(&ge->ge_lock);
1095 		return (0);
1096 	}
1097 	mtx_leave(&ge->ge_lock);
1098 
1099 	panic("page full, sc %p gnt %p (%d) ge %p", sc, sc->sc_gnt,
1100 	    sc->sc_gntcnt, ge);
1101 	return (-1);
1102 }
1103 
1104 void
1105 xen_grant_table_free(struct xen_softc *sc, grant_ref_t ref)
1106 {
1107 	struct xen_gntent *ge;
1108 
1109 #ifdef XEN_DEBUG
1110 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1111 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1112 		    sc->sc_gnt, sc->sc_gntcnt);
1113 #endif
1114 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1115 	mtx_enter(&ge->ge_lock);
1116 #ifdef XEN_DEBUG
1117 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1118 		mtx_leave(&ge->ge_lock);
1119 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1120 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1121 	}
1122 #endif
1123 	ref -= ge->ge_start;
1124 	if (ge->ge_table[ref].flags != GTF_invalid) {
1125 		mtx_leave(&ge->ge_lock);
1126 		panic("reference %u is still in use, flags %#x frame %#x",
1127 		    ref + ge->ge_start, ge->ge_table[ref].flags,
1128 		    ge->ge_table[ref].frame);
1129 	}
1130 	ge->ge_table[ref].frame = 0;
1131 	ge->ge_next = ref;
1132 	ge->ge_free++;
1133 	mtx_leave(&ge->ge_lock);
1134 }
1135 
1136 void
1137 xen_grant_table_enter(struct xen_softc *sc, grant_ref_t ref, paddr_t pa,
1138     int domain, int flags)
1139 {
1140 	struct xen_gntent *ge;
1141 
1142 #ifdef XEN_DEBUG
1143 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1144 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1145 		    sc->sc_gnt, sc->sc_gntcnt);
1146 #endif
1147 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1148 #ifdef XEN_DEBUG
1149 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1150 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1151 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1152 	}
1153 #endif
1154 	ref -= ge->ge_start;
1155 	ge->ge_table[ref].frame = atop(pa);
1156 	ge->ge_table[ref].domid = domain;
1157 	virtio_membar_sync();
1158 	ge->ge_table[ref].flags = GTF_permit_access | flags;
1159 	virtio_membar_sync();
1160 }
1161 
1162 void
1163 xen_grant_table_remove(struct xen_softc *sc, grant_ref_t ref)
1164 {
1165 	struct xen_gntent *ge;
1166 	uint32_t flags, *ptr;
1167 	int loop;
1168 
1169 #ifdef XEN_DEBUG
1170 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1171 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1172 		    sc->sc_gnt, sc->sc_gntcnt);
1173 #endif
1174 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1175 #ifdef XEN_DEBUG
1176 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1177 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1178 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1179 	}
1180 #endif
1181 	ref -= ge->ge_start;
1182 	/* Invalidate the grant reference */
1183 	virtio_membar_sync();
1184 	ptr = (uint32_t *)&ge->ge_table[ref];
1185 	flags = (ge->ge_table[ref].flags & ~(GTF_reading|GTF_writing)) |
1186 	    (ge->ge_table[ref].domid << 16);
1187 	loop = 0;
1188 	while (atomic_cas_uint(ptr, flags, GTF_invalid) != flags) {
1189 		if (loop++ > 100000000) {
1190 			printf("%s: grant table reference %u is held "
1191 			    "by domain %d\n", sc->sc_dev.dv_xname, ref +
1192 			    ge->ge_start, ge->ge_table[ref].domid);
1193 			return;
1194 		}
1195 #if (defined(__amd64__) || defined(__i386__))
1196 		__asm volatile("pause": : : "memory");
1197 #endif
1198 	}
1199 	ge->ge_table[ref].frame = 0xffffffff;
1200 }
1201 
1202 int
1203 xen_bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
1204     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
1205 {
1206 	struct xen_softc *sc = t->_cookie;
1207 	struct xen_gntmap *gm;
1208 	int i, error;
1209 
1210 	if (maxsegsz < PAGE_SIZE)
1211 		return (EINVAL);
1212 
1213 	/* Allocate a dma map structure */
1214 	error = _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary,
1215 	    flags, dmamp);
1216 	if (error)
1217 		return (error);
1218 	/* Allocate an array of grant table pa<->ref maps */
1219 	gm = mallocarray(nsegments, sizeof(struct xen_gntmap), M_DEVBUF,
1220 	    M_ZERO | ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK));
1221 	if (gm == NULL) {
1222 		_bus_dmamap_destroy(t, *dmamp);
1223 		*dmamp = NULL;
1224 		return (ENOMEM);
1225 	}
1226 	/* Wire it to the dma map */
1227 	(*dmamp)->_dm_cookie = gm;
1228 	/* Claim references from the grant table */
1229 	for (i = 0; i < (*dmamp)->_dm_segcnt; i++) {
1230 		if (xen_grant_table_alloc(sc, &gm[i].gm_ref)) {
1231 			xen_bus_dmamap_destroy(t, *dmamp);
1232 			*dmamp = NULL;
1233 			return (ENOBUFS);
1234 		}
1235 	}
1236 	return (0);
1237 }
1238 
1239 void
1240 xen_bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
1241 {
1242 	struct xen_softc *sc = t->_cookie;
1243 	struct xen_gntmap *gm;
1244 	int i;
1245 
1246 	gm = map->_dm_cookie;
1247 	for (i = 0; i < map->_dm_segcnt; i++) {
1248 		if (gm[i].gm_ref == 0)
1249 			continue;
1250 		xen_grant_table_free(sc, gm[i].gm_ref);
1251 	}
1252 	free(gm, M_DEVBUF, map->_dm_segcnt * sizeof(struct xen_gntmap));
1253 	_bus_dmamap_destroy(t, map);
1254 }
1255 
1256 int
1257 xen_bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
1258     bus_size_t buflen, struct proc *p, int flags)
1259 {
1260 	struct xen_softc *sc = t->_cookie;
1261 	struct xen_gntmap *gm = map->_dm_cookie;
1262 	int i, domain, error;
1263 
1264 	domain = flags >> 16;
1265 	flags &= 0xffff;
1266 	error = _bus_dmamap_load(t, map, buf, buflen, p, flags);
1267 	if (error)
1268 		return (error);
1269 	for (i = 0; i < map->dm_nsegs; i++) {
1270 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1271 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1272 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1273 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1274 	}
1275 	return (0);
1276 }
1277 
1278 int
1279 xen_bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
1280     int flags)
1281 {
1282 	struct xen_softc *sc = t->_cookie;
1283 	struct xen_gntmap *gm = map->_dm_cookie;
1284 	int i, domain, error;
1285 
1286 	domain = flags >> 16;
1287 	flags &= 0xffff;
1288 	error = _bus_dmamap_load_mbuf(t, map, m0, flags);
1289 	if (error)
1290 		return (error);
1291 	for (i = 0; i < map->dm_nsegs; i++) {
1292 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1293 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1294 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1295 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1296 	}
1297 	return (0);
1298 }
1299 
1300 void
1301 xen_bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
1302 {
1303 	struct xen_softc *sc = t->_cookie;
1304 	struct xen_gntmap *gm = map->_dm_cookie;
1305 	int i;
1306 
1307 	for (i = 0; i < map->dm_nsegs; i++) {
1308 		if (gm[i].gm_paddr == 0)
1309 			continue;
1310 		xen_grant_table_remove(sc, gm[i].gm_ref);
1311 		map->dm_segs[i].ds_addr = gm[i].gm_paddr;
1312 		gm[i].gm_paddr = 0;
1313 	}
1314 	_bus_dmamap_unload(t, map);
1315 }
1316 
1317 void
1318 xen_bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
1319     bus_size_t size, int op)
1320 {
1321 	if ((op == (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) ||
1322 	    (op == (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)))
1323 		virtio_membar_sync();
1324 }
1325 
1326 static int
1327 xen_attach_print(void *aux, const char *name)
1328 {
1329 	struct xen_attach_args *xa = aux;
1330 
1331 	if (name)
1332 		printf("\"%s\" at %s: %s", xa->xa_name, name, xa->xa_node);
1333 
1334 	return (UNCONF);
1335 }
1336 
1337 int
1338 xen_attach_device(struct xen_softc *sc, struct xen_devlist *xdl,
1339     const char *name, const char *unit)
1340 {
1341 	struct xen_attach_args xa;
1342 	struct xen_device *xdv;
1343 	unsigned long long res;
1344 
1345 	xa.xa_dmat = &xen_bus_dma_tag;
1346 
1347 	strlcpy(xa.xa_name, name, sizeof(xa.xa_name));
1348 	snprintf(xa.xa_node, sizeof(xa.xa_node), "device/%s/%s", name, unit);
1349 
1350 	if (xs_getprop(sc, xa.xa_node, "backend", xa.xa_backend,
1351 	    sizeof(xa.xa_backend))) {
1352 		DPRINTF("%s: failed to identify \"backend\" for "
1353 		    "\"%s\"\n", sc->sc_dev.dv_xname, xa.xa_node);
1354 		return (EIO);
1355 	}
1356 
1357 	if (xs_getnum(sc, xa.xa_node, "backend-id", &res) || res > UINT16_MAX) {
1358 		DPRINTF("%s: invalid \"backend-id\" for \"%s\"\n",
1359 		    sc->sc_dev.dv_xname, xa.xa_node);
1360 		return (EIO);
1361 	}
1362 	xa.xa_domid = (uint16_t)res;
1363 
1364 	xdv = malloc(sizeof(struct xen_device), M_DEVBUF, M_ZERO | M_NOWAIT);
1365 	if (xdv == NULL)
1366 		return (ENOMEM);
1367 
1368 	strlcpy(xdv->dv_unit, unit, sizeof(xdv->dv_unit));
1369 	LIST_INSERT_HEAD(&xdl->dl_devs, xdv, dv_entry);
1370 
1371 	xdv->dv_dev = config_found((struct device *)sc, &xa, xen_attach_print);
1372 
1373 	return (0);
1374 }
1375 
1376 int
1377 xen_probe_devices(struct xen_softc *sc)
1378 {
1379 	struct xen_devlist *xdl;
1380 	struct xs_transaction xst;
1381 	struct iovec *iovp1 = NULL, *iovp2 = NULL;
1382 	int i, j, error, iov1_cnt = 0, iov2_cnt = 0;
1383 	char path[256];
1384 
1385 	memset(&xst, 0, sizeof(xst));
1386 	xst.xst_id = 0;
1387 	xst.xst_cookie = sc->sc_xs;
1388 
1389 	if ((error = xs_cmd(&xst, XS_LIST, "device", &iovp1, &iov1_cnt)) != 0)
1390 		return (error);
1391 
1392 	for (i = 0; i < iov1_cnt; i++) {
1393 		if (strcmp("suspend", (char *)iovp1[i].iov_base) == 0)
1394 			continue;
1395 		snprintf(path, sizeof(path), "device/%s",
1396 		    (char *)iovp1[i].iov_base);
1397 		if ((error = xs_cmd(&xst, XS_LIST, path, &iovp2,
1398 		    &iov2_cnt)) != 0)
1399 			goto out;
1400 		if ((xdl = malloc(sizeof(struct xen_devlist), M_DEVBUF,
1401 		    M_ZERO | M_NOWAIT)) == NULL) {
1402 			error = ENOMEM;
1403 			goto out;
1404 		}
1405 		xdl->dl_xen = sc;
1406 		strlcpy(xdl->dl_node, (const char *)iovp1[i].iov_base,
1407 		    XEN_MAX_NODE_LEN);
1408 		for (j = 0; j < iov2_cnt; j++) {
1409 			error = xen_attach_device(sc, xdl,
1410 			    (const char *)iovp1[i].iov_base,
1411 			    (const char *)iovp2[j].iov_base);
1412 			if (error) {
1413 				printf("%s: failed to attach \"%s/%s\"\n",
1414 				    sc->sc_dev.dv_xname, path,
1415 				    (const char *)iovp2[j].iov_base);
1416 				goto out;
1417 			}
1418 		}
1419 		/* Setup a watch for every device subtree */
1420 		if (xs_watch(sc, "device", (char *)iovp1[i].iov_base,
1421 		    &xdl->dl_task, xen_hotplug, xdl))
1422 			printf("%s: failed to setup hotplug watch for \"%s\"\n",
1423 			    sc->sc_dev.dv_xname, (char *)iovp1[i].iov_base);
1424 		SLIST_INSERT_HEAD(&sc->sc_devlists, xdl, dl_entry);
1425 		xs_resfree(&xst, iovp2, iov2_cnt);
1426 		iovp2 = NULL;
1427 		iov2_cnt = 0;
1428 	}
1429 
1430  out:
1431 	if (iovp2)
1432 		xs_resfree(&xst, iovp2, iov2_cnt);
1433 	xs_resfree(&xst, iovp1, iov1_cnt);
1434 	return (error);
1435 }
1436 
1437 void
1438 xen_hotplug(void *arg)
1439 {
1440 	struct xen_devlist *xdl = arg;
1441 	struct xen_softc *sc = xdl->dl_xen;
1442 	struct xen_device *xdv, *xvdn;
1443 	struct xs_transaction xst;
1444 	struct iovec *iovp = NULL;
1445 	int error, i, keep, iov_cnt = 0;
1446 	char path[256];
1447 	int8_t *seen;
1448 
1449 	memset(&xst, 0, sizeof(xst));
1450 	xst.xst_id = 0;
1451 	xst.xst_cookie = sc->sc_xs;
1452 
1453 	snprintf(path, sizeof(path), "device/%s", xdl->dl_node);
1454 	if ((error = xs_cmd(&xst, XS_LIST, path, &iovp, &iov_cnt)) != 0)
1455 		return;
1456 
1457 	seen = malloc(iov_cnt, M_TEMP, M_ZERO | M_WAITOK);
1458 
1459 	/* Detect all removed and kept devices */
1460 	LIST_FOREACH_SAFE(xdv, &xdl->dl_devs, dv_entry, xvdn) {
1461 		for (i = 0, keep = 0; i < iov_cnt; i++) {
1462 			if (!seen[i] &&
1463 			    !strcmp(xdv->dv_unit, (char *)iovp[i].iov_base)) {
1464 				seen[i]++;
1465 				keep++;
1466 				break;
1467 			}
1468 		}
1469 		if (!keep) {
1470 			DPRINTF("%s: removing \"%s/%s\"\n", sc->sc_dev.dv_xname,
1471 			    xdl->dl_node, xdv->dv_unit);
1472 			LIST_REMOVE(xdv, dv_entry);
1473 			config_detach(xdv->dv_dev, 0);
1474 			free(xdv, M_DEVBUF, sizeof(struct xen_device));
1475 		}
1476 	}
1477 
1478 	/* Attach all new devices */
1479 	for (i = 0; i < iov_cnt; i++) {
1480 		if (seen[i])
1481 			continue;
1482 		DPRINTF("%s: attaching \"%s/%s\"\n", sc->sc_dev.dv_xname,
1483 			    xdl->dl_node, (const char *)iovp[i].iov_base);
1484 		error = xen_attach_device(sc, xdl, xdl->dl_node,
1485 		    (const char *)iovp[i].iov_base);
1486 		if (error) {
1487 			printf("%s: failed to attach \"%s/%s\"\n",
1488 			    sc->sc_dev.dv_xname, path,
1489 			    (const char *)iovp[i].iov_base);
1490 			continue;
1491 		}
1492 	}
1493 
1494 	free(seen, M_TEMP, iov_cnt);
1495 
1496 	xs_resfree(&xst, iovp, iov_cnt);
1497 }
1498 
1499 #include <machine/pio.h>
1500 
1501 #define	XMI_PORT		0x10
1502 #define XMI_MAGIC		0x49d2
1503 #define XMI_UNPLUG_IDE		0x01
1504 #define XMI_UNPLUG_NIC		0x02
1505 #define XMI_UNPLUG_IDESEC	0x04
1506 
1507 void
1508 xen_disable_emulated_devices(struct xen_softc *sc)
1509 {
1510 #if defined(__i386__) || defined(__amd64__)
1511 	ushort unplug = 0;
1512 
1513 	if (inw(XMI_PORT) != XMI_MAGIC) {
1514 		printf("%s: failed to disable emulated devices\n",
1515 		    sc->sc_dev.dv_xname);
1516 		return;
1517 	}
1518 	if (sc->sc_unplug & XEN_UNPLUG_IDE)
1519 		unplug |= XMI_UNPLUG_IDE;
1520 	if (sc->sc_unplug & XEN_UNPLUG_IDESEC)
1521 		unplug |= XMI_UNPLUG_IDESEC;
1522 	if (sc->sc_unplug & XEN_UNPLUG_NIC)
1523 		unplug |= XMI_UNPLUG_NIC;
1524 	if (unplug)
1525 		outw(XMI_PORT, unplug);
1526 #endif	/* __i386__ || __amd64__ */
1527 }
1528 
1529 void
1530 xen_unplug_emulated(void *xsc, int what)
1531 {
1532 	struct xen_softc *sc = xsc;
1533 
1534 	sc->sc_unplug |= what;
1535 }
1536