xref: /openbsd/sys/dev/pv/xen.c (revision b4155af8)
1 /*	$OpenBSD: xen.c,v 1.98 2024/05/24 10:05:55 jsg Exp $	*/
2 
3 /*
4  * Copyright (c) 2015, 2016, 2017 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 
21 /* Xen requires locked atomic operations */
22 #ifndef MULTIPROCESSOR
23 #define _XENMPATOMICS
24 #define MULTIPROCESSOR
25 #endif
26 #include <sys/atomic.h>
27 #ifdef _XENMPATOMICS
28 #undef MULTIPROCESSOR
29 #undef _XENMPATOMICS
30 #endif
31 
32 #include <sys/systm.h>
33 #include <sys/proc.h>
34 #include <sys/refcnt.h>
35 #include <sys/malloc.h>
36 #include <sys/stdint.h>
37 #include <sys/device.h>
38 #include <sys/task.h>
39 
40 #include <machine/bus.h>
41 #include <machine/cpu.h>
42 #include <machine/cpufunc.h>
43 
44 #include <uvm/uvm_extern.h>
45 
46 #include <machine/i82489var.h>
47 
48 #include <dev/pv/pvvar.h>
49 #include <dev/pv/pvreg.h>
50 #include <dev/pv/xenreg.h>
51 #include <dev/pv/xenvar.h>
52 
53 /* #define XEN_DEBUG */
54 
55 #ifdef XEN_DEBUG
56 #define DPRINTF(x...)		printf(x)
57 #else
58 #define DPRINTF(x...)
59 #endif
60 
61 struct xen_softc *xen_sc;
62 
63 int	xen_init_hypercall(struct xen_softc *);
64 int	xen_getfeatures(struct xen_softc *);
65 int	xen_init_info_page(struct xen_softc *);
66 int	xen_init_cbvec(struct xen_softc *);
67 int	xen_init_interrupts(struct xen_softc *);
68 void	xen_intr_dispatch(void *);
69 int	xen_init_grant_tables(struct xen_softc *);
70 struct xen_gntent *
71 	xen_grant_table_grow(struct xen_softc *);
72 int	xen_grant_table_alloc(struct xen_softc *, grant_ref_t *);
73 void	xen_grant_table_free(struct xen_softc *, grant_ref_t);
74 void	xen_grant_table_enter(struct xen_softc *, grant_ref_t, paddr_t,
75 	    int, int);
76 void	xen_grant_table_remove(struct xen_softc *, grant_ref_t);
77 void	xen_disable_emulated_devices(struct xen_softc *);
78 
79 int 	xen_match(struct device *, void *, void *);
80 void	xen_attach(struct device *, struct device *, void *);
81 void	xen_deferred(struct device *);
82 void	xen_control(void *);
83 void	xen_hotplug(void *);
84 void	xen_resume(struct device *);
85 int	xen_activate(struct device *, int);
86 int	xen_attach_device(struct xen_softc *, struct xen_devlist *,
87 	    const char *, const char *);
88 int	xen_probe_devices(struct xen_softc *);
89 
90 int	xen_bus_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
91 	    bus_size_t, int, bus_dmamap_t *);
92 void	xen_bus_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
93 int	xen_bus_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
94 	    struct proc *, int);
95 int	xen_bus_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
96 	    int);
97 void	xen_bus_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
98 void	xen_bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
99 	    bus_size_t, int);
100 
101 int	xs_attach(struct xen_softc *);
102 
103 struct cfdriver xen_cd = {
104 	NULL, "xen", DV_DULL
105 };
106 
107 const struct cfattach xen_ca = {
108 	sizeof(struct xen_softc), xen_match, xen_attach, NULL, xen_activate
109 };
110 
111 struct bus_dma_tag xen_bus_dma_tag = {
112 	NULL,
113 	xen_bus_dmamap_create,
114 	xen_bus_dmamap_destroy,
115 	xen_bus_dmamap_load,
116 	xen_bus_dmamap_load_mbuf,
117 	NULL,
118 	NULL,
119 	xen_bus_dmamap_unload,
120 	xen_bus_dmamap_sync,
121 	_bus_dmamem_alloc,
122 	NULL,
123 	_bus_dmamem_free,
124 	_bus_dmamem_map,
125 	_bus_dmamem_unmap,
126 	NULL,
127 };
128 
129 int
xen_match(struct device * parent,void * match,void * aux)130 xen_match(struct device *parent, void *match, void *aux)
131 {
132 	struct pv_attach_args *pva = aux;
133 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
134 
135 	if (hv->hv_base == 0)
136 		return (0);
137 
138 	return (1);
139 }
140 
141 void
xen_attach(struct device * parent,struct device * self,void * aux)142 xen_attach(struct device *parent, struct device *self, void *aux)
143 {
144 	struct pv_attach_args *pva = (struct pv_attach_args *)aux;
145 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
146 	struct xen_softc *sc = (struct xen_softc *)self;
147 
148 	sc->sc_base = hv->hv_base;
149 	sc->sc_dmat = pva->pva_dmat;
150 
151 	if (xen_init_hypercall(sc))
152 		return;
153 
154 	/* Wire it up to the global */
155 	xen_sc = sc;
156 
157 	if (xen_getfeatures(sc))
158 		return;
159 
160 	if (xen_init_info_page(sc))
161 		return;
162 
163 	xen_init_cbvec(sc);
164 
165 	if (xen_init_interrupts(sc))
166 		return;
167 
168 	if (xen_init_grant_tables(sc))
169 		return;
170 
171 	if (xs_attach(sc))
172 		return;
173 
174 	xen_probe_devices(sc);
175 
176 	/* pvbus(4) key/value interface */
177 	hv->hv_kvop = xs_kvop;
178 	hv->hv_arg = sc;
179 
180 	xen_disable_emulated_devices(sc);
181 
182 	config_mountroot(self, xen_deferred);
183 }
184 
185 void
xen_deferred(struct device * self)186 xen_deferred(struct device *self)
187 {
188 	struct xen_softc *sc = (struct xen_softc *)self;
189 
190 	if (!(sc->sc_flags & XSF_CBVEC)) {
191 		DPRINTF("%s: callback vector hasn't been established\n",
192 		    sc->sc_dev.dv_xname);
193 		return;
194 	}
195 
196 	xen_intr_enable();
197 
198 	if (xs_watch(sc, "control", "shutdown", &sc->sc_ctltsk,
199 	    xen_control, sc))
200 		printf("%s: failed to setup shutdown control watch\n",
201 		    sc->sc_dev.dv_xname);
202 }
203 
204 void
xen_control(void * arg)205 xen_control(void *arg)
206 {
207 	struct xen_softc *sc = arg;
208 	struct xs_transaction xst;
209 	char action[128];
210 	int error;
211 
212 	memset(&xst, 0, sizeof(xst));
213 	xst.xst_id = 0;
214 	xst.xst_cookie = sc->sc_xs;
215 
216 	error = xs_getprop(sc, "control", "shutdown", action, sizeof(action));
217 	if (error) {
218 		if (error != ENOENT)
219 			printf("%s: failed to process control event\n",
220 			    sc->sc_dev.dv_xname);
221 		return;
222 	}
223 
224 	if (strlen(action) == 0)
225 		return;
226 
227 	/* Acknowledge the event */
228 	xs_setprop(sc, "control", "shutdown", "", 0);
229 
230 	if (strcmp(action, "halt") == 0 || strcmp(action, "poweroff") == 0) {
231 		pvbus_shutdown(&sc->sc_dev);
232 	} else if (strcmp(action, "reboot") == 0) {
233 		pvbus_reboot(&sc->sc_dev);
234 	} else if (strcmp(action, "crash") == 0) {
235 		panic("xen told us to do this");
236 	} else if (strcmp(action, "suspend") == 0) {
237 		/* Not implemented yet */
238 	} else {
239 		printf("%s: unknown shutdown event \"%s\"\n",
240 		    sc->sc_dev.dv_xname, action);
241 	}
242 }
243 
244 void
xen_resume(struct device * self)245 xen_resume(struct device *self)
246 {
247 }
248 
249 int
xen_activate(struct device * self,int act)250 xen_activate(struct device *self, int act)
251 {
252 	int rv = 0;
253 
254 	switch (act) {
255 	case DVACT_RESUME:
256 		xen_resume(self);
257 		break;
258 	}
259 	return (rv);
260 }
261 
262 int
xen_init_hypercall(struct xen_softc * sc)263 xen_init_hypercall(struct xen_softc *sc)
264 {
265 	extern void *xen_hypercall_page;
266 	uint32_t regs[4];
267 	paddr_t pa;
268 
269 	/* Get hypercall page configuration MSR */
270 	CPUID(sc->sc_base + CPUID_OFFSET_XEN_HYPERCALL,
271 	    regs[0], regs[1], regs[2], regs[3]);
272 
273 	/* We don't support more than one hypercall page */
274 	if (regs[0] != 1) {
275 		printf(": requested %u hypercall pages\n", regs[0]);
276 		return (-1);
277 	}
278 
279 	sc->sc_hc = &xen_hypercall_page;
280 
281 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_hc, &pa)) {
282 		printf(": hypercall page PA extraction failed\n");
283 		return (-1);
284 	}
285 	wrmsr(regs[1], pa);
286 
287 	return (0);
288 }
289 
290 int
xen_hypercall(struct xen_softc * sc,int op,int argc,...)291 xen_hypercall(struct xen_softc *sc, int op, int argc, ...)
292 {
293 	va_list ap;
294 	ulong argv[5];
295 	int i;
296 
297 	if (argc < 0 || argc > 5)
298 		return (-1);
299 	va_start(ap, argc);
300 	for (i = 0; i < argc; i++)
301 		argv[i] = (ulong)va_arg(ap, ulong);
302 	va_end(ap);
303 	return (xen_hypercallv(sc, op, argc, argv));
304 }
305 
306 int
xen_hypercallv(struct xen_softc * sc,int op,int argc,ulong * argv)307 xen_hypercallv(struct xen_softc *sc, int op, int argc, ulong *argv)
308 {
309 	ulong hcall;
310 	int rv = 0;
311 
312 	hcall = (ulong)sc->sc_hc + op * 32;
313 
314 #if defined(XEN_DEBUG) && disabled
315 	{
316 		int i;
317 
318 		printf("hypercall %d", op);
319 		if (argc > 0) {
320 			printf(", args {");
321 			for (i = 0; i < argc; i++)
322 				printf(" %#lx", argv[i]);
323 			printf(" }\n");
324 		} else
325 			printf("\n");
326 	}
327 #endif
328 
329 	switch (argc) {
330 	case 0: {
331 		HYPERCALL_RES1;
332 		__asm__ volatile (			\
333 			  HYPERCALL_LABEL		\
334 			: HYPERCALL_OUT1		\
335 			: HYPERCALL_PTR(hcall)		\
336 			: HYPERCALL_CLOBBER		\
337 		);
338 		HYPERCALL_RET(rv);
339 		break;
340 	}
341 	case 1: {
342 		HYPERCALL_RES1; HYPERCALL_RES2;
343 		HYPERCALL_ARG1(argv[0]);
344 		__asm__ volatile (			\
345 			  HYPERCALL_LABEL		\
346 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
347 			: HYPERCALL_IN1			\
348 			, HYPERCALL_PTR(hcall)		\
349 			: HYPERCALL_CLOBBER		\
350 		);
351 		HYPERCALL_RET(rv);
352 		break;
353 	}
354 	case 2: {
355 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
356 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
357 		__asm__ volatile (			\
358 			  HYPERCALL_LABEL		\
359 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
360 			  HYPERCALL_OUT3		\
361 			: HYPERCALL_IN1	HYPERCALL_IN2	\
362 			, HYPERCALL_PTR(hcall)		\
363 			: HYPERCALL_CLOBBER		\
364 		);
365 		HYPERCALL_RET(rv);
366 		break;
367 	}
368 	case 3: {
369 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
370 		HYPERCALL_RES4;
371 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
372 		HYPERCALL_ARG3(argv[2]);
373 		__asm__ volatile (			\
374 			  HYPERCALL_LABEL		\
375 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
376 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
377 			: HYPERCALL_IN1	HYPERCALL_IN2	\
378 			  HYPERCALL_IN3			\
379 			, HYPERCALL_PTR(hcall)		\
380 			: HYPERCALL_CLOBBER		\
381 		);
382 		HYPERCALL_RET(rv);
383 		break;
384 	}
385 	case 4: {
386 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
387 		HYPERCALL_RES4; HYPERCALL_RES5;
388 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
389 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
390 		__asm__ volatile (			\
391 			  HYPERCALL_LABEL		\
392 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
393 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
394 			  HYPERCALL_OUT5		\
395 			: HYPERCALL_IN1	HYPERCALL_IN2	\
396 			  HYPERCALL_IN3	HYPERCALL_IN4	\
397 			, HYPERCALL_PTR(hcall)		\
398 			: HYPERCALL_CLOBBER		\
399 		);
400 		HYPERCALL_RET(rv);
401 		break;
402 	}
403 	case 5: {
404 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
405 		HYPERCALL_RES4; HYPERCALL_RES5; HYPERCALL_RES6;
406 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
407 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
408 		HYPERCALL_ARG5(argv[4]);
409 		__asm__ volatile (			\
410 			  HYPERCALL_LABEL		\
411 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
412 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
413 			  HYPERCALL_OUT5 HYPERCALL_OUT6	\
414 			: HYPERCALL_IN1	HYPERCALL_IN2	\
415 			  HYPERCALL_IN3	HYPERCALL_IN4	\
416 			  HYPERCALL_IN5			\
417 			, HYPERCALL_PTR(hcall)		\
418 			: HYPERCALL_CLOBBER		\
419 		);
420 		HYPERCALL_RET(rv);
421 		break;
422 	}
423 	default:
424 		DPRINTF("%s: wrong number of arguments: %d\n", __func__, argc);
425 		rv = -1;
426 		break;
427 	}
428 	return (rv);
429 }
430 
431 int
xen_getfeatures(struct xen_softc * sc)432 xen_getfeatures(struct xen_softc *sc)
433 {
434 	struct xen_feature_info xfi;
435 
436 	memset(&xfi, 0, sizeof(xfi));
437 	if (xen_hypercall(sc, XC_VERSION, 2, XENVER_get_features, &xfi) < 0) {
438 		printf(": failed to fetch features\n");
439 		return (-1);
440 	}
441 	sc->sc_features = xfi.submap;
442 #ifdef XEN_DEBUG
443 	printf(": features %b", sc->sc_features,
444 	    "\20\014DOM0\013PIRQ\012PVCLOCK\011CBVEC\010GNTFLAGS\007HMA"
445 	    "\006PTUPD\005PAE4G\004SUPERVISOR\003AUTOPMAP\002WDT\001WPT");
446 #else
447 	printf(": features %#x", sc->sc_features);
448 #endif
449 	return (0);
450 }
451 
452 #ifdef XEN_DEBUG
453 void
xen_print_info_page(void)454 xen_print_info_page(void)
455 {
456 	struct xen_softc *sc = xen_sc;
457 	struct shared_info *s = sc->sc_ipg;
458 	struct vcpu_info *v;
459 	int i;
460 
461 	virtio_membar_sync();
462 	for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
463 		v = &s->vcpu_info[i];
464 		if (!v->evtchn_upcall_pending && !v->evtchn_upcall_mask &&
465 		    !v->evtchn_pending_sel && !v->time.version &&
466 		    !v->time.tsc_timestamp && !v->time.system_time &&
467 		    !v->time.tsc_to_system_mul && !v->time.tsc_shift)
468 			continue;
469 		printf("vcpu%d:\n"
470 		    "   upcall_pending=%02x upcall_mask=%02x pending_sel=%#lx\n"
471 		    "   time version=%u tsc=%llu system=%llu\n"
472 		    "   time mul=%u shift=%d\n",
473 		    i, v->evtchn_upcall_pending, v->evtchn_upcall_mask,
474 		    v->evtchn_pending_sel, v->time.version,
475 		    v->time.tsc_timestamp, v->time.system_time,
476 		    v->time.tsc_to_system_mul, v->time.tsc_shift);
477 	}
478 	printf("pending events: ");
479 	for (i = 0; i < nitems(s->evtchn_pending); i++) {
480 		if (s->evtchn_pending[i] == 0)
481 			continue;
482 		printf(" %d:%#lx", i, s->evtchn_pending[i]);
483 	}
484 	printf("\nmasked events: ");
485 	for (i = 0; i < nitems(s->evtchn_mask); i++) {
486 		if (s->evtchn_mask[i] == 0xffffffffffffffffULL)
487 			continue;
488 		printf(" %d:%#lx", i, s->evtchn_mask[i]);
489 	}
490 	printf("\nwc ver=%u sec=%u nsec=%u\n", s->wc_version, s->wc_sec,
491 	    s->wc_nsec);
492 	printf("arch maxpfn=%lu framelist=%lu nmi=%lu\n", s->arch.max_pfn,
493 	    s->arch.pfn_to_mfn_frame_list, s->arch.nmi_reason);
494 }
495 #endif	/* XEN_DEBUG */
496 
497 int
xen_init_info_page(struct xen_softc * sc)498 xen_init_info_page(struct xen_softc *sc)
499 {
500 	struct xen_add_to_physmap xatp;
501 	paddr_t pa;
502 
503 	sc->sc_ipg = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
504 	if (sc->sc_ipg == NULL) {
505 		printf(": failed to allocate shared info page\n");
506 		return (-1);
507 	}
508 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_ipg, &pa)) {
509 		printf(": shared info page PA extraction failed\n");
510 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
511 		return (-1);
512 	}
513 	xatp.domid = DOMID_SELF;
514 	xatp.idx = 0;
515 	xatp.space = XENMAPSPACE_shared_info;
516 	xatp.gpfn = atop(pa);
517 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
518 		printf(": failed to register shared info page\n");
519 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
520 		return (-1);
521 	}
522 	return (0);
523 }
524 
525 int
xen_init_cbvec(struct xen_softc * sc)526 xen_init_cbvec(struct xen_softc *sc)
527 {
528 	struct xen_hvm_param xhp;
529 
530 	if ((sc->sc_features & XENFEAT_CBVEC) == 0)
531 		return (ENOENT);
532 
533 	xhp.domid = DOMID_SELF;
534 	xhp.index = HVM_PARAM_CALLBACK_IRQ;
535 	xhp.value = HVM_CALLBACK_VECTOR(LAPIC_XEN_VECTOR);
536 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_set_param, &xhp)) {
537 		/* Will retry with the xspd(4) PCI interrupt */
538 		return (ENOENT);
539 	}
540 	DPRINTF(", idtvec %d", LAPIC_XEN_VECTOR);
541 
542 	sc->sc_flags |= XSF_CBVEC;
543 
544 	return (0);
545 }
546 
547 int
xen_init_interrupts(struct xen_softc * sc)548 xen_init_interrupts(struct xen_softc *sc)
549 {
550 	int i;
551 
552 	sc->sc_irq = LAPIC_XEN_VECTOR;
553 
554 	/*
555 	 * Clear all pending events and mask all interrupts
556 	 */
557 	for (i = 0; i < nitems(sc->sc_ipg->evtchn_pending); i++) {
558 		sc->sc_ipg->evtchn_pending[i] = 0;
559 		sc->sc_ipg->evtchn_mask[i] = ~0UL;
560 	}
561 
562 	SLIST_INIT(&sc->sc_intrs);
563 
564 	mtx_init(&sc->sc_islck, IPL_NET);
565 
566 	return (0);
567 }
568 
569 static int
xen_evtchn_hypercall(struct xen_softc * sc,int cmd,void * arg,size_t len)570 xen_evtchn_hypercall(struct xen_softc *sc, int cmd, void *arg, size_t len)
571 {
572 	struct evtchn_op compat;
573 	int error;
574 
575 	error = xen_hypercall(sc, XC_EVTCHN, 2, cmd, arg);
576 	if (error == -ENOXENSYS) {
577 		memset(&compat, 0, sizeof(compat));
578 		compat.cmd = cmd;
579 		memcpy(&compat.u, arg, len);
580 		error = xen_hypercall(sc, XC_OEVTCHN, 1, &compat);
581 	}
582 	return (error);
583 }
584 
585 static inline void
xen_intsrc_add(struct xen_softc * sc,struct xen_intsrc * xi)586 xen_intsrc_add(struct xen_softc *sc, struct xen_intsrc *xi)
587 {
588 	refcnt_init(&xi->xi_refcnt);
589 	mtx_enter(&sc->sc_islck);
590 	SLIST_INSERT_HEAD(&sc->sc_intrs, xi, xi_entry);
591 	mtx_leave(&sc->sc_islck);
592 }
593 
594 static inline struct xen_intsrc *
xen_intsrc_acquire(struct xen_softc * sc,evtchn_port_t port)595 xen_intsrc_acquire(struct xen_softc *sc, evtchn_port_t port)
596 {
597 	struct xen_intsrc *xi = NULL;
598 
599 	mtx_enter(&sc->sc_islck);
600 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
601 		if (xi->xi_port == port) {
602 			refcnt_take(&xi->xi_refcnt);
603 			break;
604 		}
605 	}
606 	mtx_leave(&sc->sc_islck);
607 	return (xi);
608 }
609 
610 static inline void
xen_intsrc_release(struct xen_softc * sc,struct xen_intsrc * xi)611 xen_intsrc_release(struct xen_softc *sc, struct xen_intsrc *xi)
612 {
613 	refcnt_rele_wake(&xi->xi_refcnt);
614 }
615 
616 static inline struct xen_intsrc *
xen_intsrc_remove(struct xen_softc * sc,evtchn_port_t port)617 xen_intsrc_remove(struct xen_softc *sc, evtchn_port_t port)
618 {
619 	struct xen_intsrc *xi;
620 
621 	mtx_enter(&sc->sc_islck);
622 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
623 		if (xi->xi_port == port) {
624 			SLIST_REMOVE(&sc->sc_intrs, xi, xen_intsrc, xi_entry);
625 			break;
626 		}
627 	}
628 	mtx_leave(&sc->sc_islck);
629 	if (xi != NULL)
630 		refcnt_finalize(&xi->xi_refcnt, "xenisrm");
631 	return (xi);
632 }
633 
634 static inline void
xen_intr_mask_acquired(struct xen_softc * sc,struct xen_intsrc * xi)635 xen_intr_mask_acquired(struct xen_softc *sc, struct xen_intsrc *xi)
636 {
637 	xi->xi_masked = 1;
638 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
639 }
640 
641 static inline int
xen_intr_unmask_release(struct xen_softc * sc,struct xen_intsrc * xi)642 xen_intr_unmask_release(struct xen_softc *sc, struct xen_intsrc *xi)
643 {
644 	struct evtchn_unmask eu;
645 
646 	xi->xi_masked = 0;
647 	if (!test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0])) {
648 		xen_intsrc_release(sc, xi);
649 		return (0);
650 	}
651 	eu.port = xi->xi_port;
652 	xen_intsrc_release(sc, xi);
653 	return (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu, sizeof(eu)));
654 }
655 
656 void
xen_intr_ack(void)657 xen_intr_ack(void)
658 {
659 	struct xen_softc *sc = xen_sc;
660 	struct shared_info *s = sc->sc_ipg;
661 	struct cpu_info *ci = curcpu();
662 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
663 
664 	v->evtchn_upcall_pending = 0;
665 	virtio_membar_sync();
666 }
667 
668 void
xen_intr(void)669 xen_intr(void)
670 {
671 	struct xen_softc *sc = xen_sc;
672 	struct xen_intsrc *xi;
673 	struct shared_info *s = sc->sc_ipg;
674 	struct cpu_info *ci = curcpu();
675 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
676 	ulong pending, selector;
677 	int port, bit, row;
678 
679 	v->evtchn_upcall_pending = 0;
680 	selector = atomic_swap_ulong(&v->evtchn_pending_sel, 0);
681 
682 	for (row = 0; selector > 0; selector >>= 1, row++) {
683 		if ((selector & 1) == 0)
684 			continue;
685 		if ((sc->sc_ipg->evtchn_pending[row] &
686 		    ~(sc->sc_ipg->evtchn_mask[row])) == 0)
687 			continue;
688 		pending = atomic_swap_ulong(&sc->sc_ipg->evtchn_pending[row],
689 		    0) & ~(sc->sc_ipg->evtchn_mask[row]);
690 		for (bit = 0; pending > 0; pending >>= 1, bit++) {
691 			if ((pending & 1) == 0)
692 				continue;
693 			port = (row * LONG_BIT) + bit;
694 			if ((xi = xen_intsrc_acquire(sc, port)) == NULL) {
695 				printf("%s: unhandled interrupt on port %d\n",
696 				    sc->sc_dev.dv_xname, port);
697 				continue;
698 			}
699 			xi->xi_evcnt.ec_count++;
700 			xen_intr_mask_acquired(sc, xi);
701 			if (!task_add(xi->xi_taskq, &xi->xi_task))
702 				xen_intsrc_release(sc, xi);
703 		}
704 	}
705 }
706 
707 void
xen_intr_schedule(xen_intr_handle_t xih)708 xen_intr_schedule(xen_intr_handle_t xih)
709 {
710 	struct xen_softc *sc = xen_sc;
711 	struct xen_intsrc *xi;
712 
713 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
714 		xen_intr_mask_acquired(sc, xi);
715 		if (!task_add(xi->xi_taskq, &xi->xi_task))
716 			xen_intsrc_release(sc, xi);
717 	}
718 }
719 
720 /*
721  * This code achieves two goals: 1) makes sure that *after* masking
722  * the interrupt source we're not getting more task_adds: sched_barrier
723  * will take care of that, and 2) makes sure that the interrupt task
724  * has finished executing the current task and won't be called again:
725  * it sets up a barrier task to await completion of the current task
726  * and relies on the interrupt masking to prevent submission of new
727  * tasks in the future.
728  */
729 void
xen_intr_barrier(xen_intr_handle_t xih)730 xen_intr_barrier(xen_intr_handle_t xih)
731 {
732 	struct xen_softc *sc = xen_sc;
733 	struct xen_intsrc *xi;
734 
735 	sched_barrier(NULL);
736 
737 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
738 		taskq_barrier(xi->xi_taskq);
739 		xen_intsrc_release(sc, xi);
740 	}
741 }
742 
743 void
xen_intr_signal(xen_intr_handle_t xih)744 xen_intr_signal(xen_intr_handle_t xih)
745 {
746 	struct xen_softc *sc = xen_sc;
747 	struct xen_intsrc *xi;
748 	struct evtchn_send es;
749 
750 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
751 		es.port = xi->xi_port;
752 		xen_intsrc_release(sc, xi);
753 		xen_evtchn_hypercall(sc, EVTCHNOP_send, &es, sizeof(es));
754 	}
755 }
756 
757 int
xen_intr_establish(evtchn_port_t port,xen_intr_handle_t * xih,int domain,void (* handler)(void *),void * arg,char * name)758 xen_intr_establish(evtchn_port_t port, xen_intr_handle_t *xih, int domain,
759     void (*handler)(void *), void *arg, char *name)
760 {
761 	struct xen_softc *sc = xen_sc;
762 	struct xen_intsrc *xi;
763 	struct evtchn_alloc_unbound eau;
764 #ifdef notyet
765 	struct evtchn_bind_vcpu ebv;
766 #endif
767 #if defined(XEN_DEBUG) && disabled
768 	struct evtchn_status es;
769 #endif
770 
771 	if (port && (xi = xen_intsrc_acquire(sc, port)) != NULL) {
772 		xen_intsrc_release(sc, xi);
773 		DPRINTF("%s: interrupt handler has already been established "
774 		    "for port %u\n", sc->sc_dev.dv_xname, port);
775 		return (-1);
776 	}
777 
778 	xi = malloc(sizeof(*xi), M_DEVBUF, M_NOWAIT | M_ZERO);
779 	if (xi == NULL)
780 		return (-1);
781 
782 	xi->xi_port = (evtchn_port_t)*xih;
783 
784 	xi->xi_handler = handler;
785 	xi->xi_ctx = arg;
786 
787 	xi->xi_taskq = taskq_create(name, 1, IPL_NET, TASKQ_MPSAFE);
788 	if (!xi->xi_taskq) {
789 		printf("%s: failed to create interrupt task for %s\n",
790 		    sc->sc_dev.dv_xname, name);
791 		free(xi, M_DEVBUF, sizeof(*xi));
792 		return (-1);
793 	}
794 	task_set(&xi->xi_task, xen_intr_dispatch, xi);
795 
796 	if (port == 0) {
797 		/* We're being asked to allocate a new event port */
798 		memset(&eau, 0, sizeof(eau));
799 		eau.dom = DOMID_SELF;
800 		eau.remote_dom = domain;
801 		if (xen_evtchn_hypercall(sc, EVTCHNOP_alloc_unbound, &eau,
802 		    sizeof(eau)) != 0) {
803 			DPRINTF("%s: failed to allocate new event port\n",
804 			    sc->sc_dev.dv_xname);
805 			free(xi, M_DEVBUF, sizeof(*xi));
806 			return (-1);
807 		}
808 		*xih = xi->xi_port = eau.port;
809 	} else {
810 		*xih = xi->xi_port = port;
811 		/*
812 		 * The Event Channel API didn't open this port, so it is not
813 		 * responsible for closing it automatically on unbind.
814 		 */
815 		xi->xi_noclose = 1;
816 	}
817 
818 #ifdef notyet
819 	/* Bind interrupt to VCPU#0 */
820 	memset(&ebv, 0, sizeof(ebv));
821 	ebv.port = xi->xi_port;
822 	ebv.vcpu = 0;
823 	if (xen_evtchn_hypercall(sc, EVTCHNOP_bind_vcpu, &ebv, sizeof(ebv))) {
824 		printf("%s: failed to bind interrupt on port %u to vcpu%d\n",
825 		    sc->sc_dev.dv_xname, ebv.port, ebv.vcpu);
826 	}
827 #endif
828 
829 	evcount_attach(&xi->xi_evcnt, name, &sc->sc_irq);
830 
831 	xen_intsrc_add(sc, xi);
832 
833 	/* Mask the event port */
834 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
835 
836 #if defined(XEN_DEBUG) && disabled
837 	memset(&es, 0, sizeof(es));
838 	es.dom = DOMID_SELF;
839 	es.port = xi->xi_port;
840 	if (xen_evtchn_hypercall(sc, EVTCHNOP_status, &es, sizeof(es))) {
841 		printf("%s: failed to obtain status for port %d\n",
842 		    sc->sc_dev.dv_xname, es.port);
843 	}
844 	printf("%s: port %u bound to vcpu%u", sc->sc_dev.dv_xname,
845 	    es.port, es.vcpu);
846 	if (es.status == EVTCHNSTAT_interdomain)
847 		printf(": domain %d port %u\n", es.u.interdomain.dom,
848 		    es.u.interdomain.port);
849 	else if (es.status == EVTCHNSTAT_unbound)
850 		printf(": domain %d\n", es.u.unbound.dom);
851 	else if (es.status == EVTCHNSTAT_pirq)
852 		printf(": pirq %u\n", es.u.pirq);
853 	else if (es.status == EVTCHNSTAT_virq)
854 		printf(": virq %u\n", es.u.virq);
855 	else
856 		printf("\n");
857 #endif
858 
859 	return (0);
860 }
861 
862 int
xen_intr_disestablish(xen_intr_handle_t xih)863 xen_intr_disestablish(xen_intr_handle_t xih)
864 {
865 	struct xen_softc *sc = xen_sc;
866 	evtchn_port_t port = (evtchn_port_t)xih;
867 	struct evtchn_close ec;
868 	struct xen_intsrc *xi;
869 
870 	if ((xi = xen_intsrc_remove(sc, port)) == NULL)
871 		return (-1);
872 
873 	evcount_detach(&xi->xi_evcnt);
874 
875 	taskq_destroy(xi->xi_taskq);
876 
877 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
878 	clear_bit(xi->xi_port, &sc->sc_ipg->evtchn_pending[0]);
879 
880 	if (!xi->xi_noclose) {
881 		ec.port = xi->xi_port;
882 		if (xen_evtchn_hypercall(sc, EVTCHNOP_close, &ec, sizeof(ec))) {
883 			DPRINTF("%s: failed to close event port %u\n",
884 			    sc->sc_dev.dv_xname, xi->xi_port);
885 		}
886 	}
887 
888 	free(xi, M_DEVBUF, sizeof(*xi));
889 	return (0);
890 }
891 
892 void
xen_intr_dispatch(void * arg)893 xen_intr_dispatch(void *arg)
894 {
895 	struct xen_softc *sc = xen_sc;
896 	struct xen_intsrc *xi = arg;
897 
898 	if (xi->xi_handler)
899 		xi->xi_handler(xi->xi_ctx);
900 
901 	xen_intr_unmask_release(sc, xi);
902 }
903 
904 void
xen_intr_enable(void)905 xen_intr_enable(void)
906 {
907 	struct xen_softc *sc = xen_sc;
908 	struct xen_intsrc *xi;
909 	struct evtchn_unmask eu;
910 
911 	mtx_enter(&sc->sc_islck);
912 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
913 		if (!xi->xi_masked) {
914 			eu.port = xi->xi_port;
915 			if (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu,
916 			    sizeof(eu)))
917 				printf("%s: unmasking port %u failed\n",
918 				    sc->sc_dev.dv_xname, xi->xi_port);
919 			virtio_membar_sync();
920 			if (test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]))
921 				printf("%s: port %u is still masked\n",
922 				    sc->sc_dev.dv_xname, xi->xi_port);
923 		}
924 	}
925 	mtx_leave(&sc->sc_islck);
926 }
927 
928 void
xen_intr_mask(xen_intr_handle_t xih)929 xen_intr_mask(xen_intr_handle_t xih)
930 {
931 	struct xen_softc *sc = xen_sc;
932 	evtchn_port_t port = (evtchn_port_t)xih;
933 	struct xen_intsrc *xi;
934 
935 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL) {
936 		xen_intr_mask_acquired(sc, xi);
937 		xen_intsrc_release(sc, xi);
938 	}
939 }
940 
941 int
xen_intr_unmask(xen_intr_handle_t xih)942 xen_intr_unmask(xen_intr_handle_t xih)
943 {
944 	struct xen_softc *sc = xen_sc;
945 	evtchn_port_t port = (evtchn_port_t)xih;
946 	struct xen_intsrc *xi;
947 
948 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL)
949 		return (xen_intr_unmask_release(sc, xi));
950 
951 	return (0);
952 }
953 
954 int
xen_init_grant_tables(struct xen_softc * sc)955 xen_init_grant_tables(struct xen_softc *sc)
956 {
957 	struct gnttab_query_size gqs;
958 
959 	gqs.dom = DOMID_SELF;
960 	if (xen_hypercall(sc, XC_GNTTAB, 3, GNTTABOP_query_size, &gqs, 1)) {
961 		printf(": failed the query for grant table pages\n");
962 		return (-1);
963 	}
964 	if (gqs.nr_frames == 0 || gqs.nr_frames > gqs.max_nr_frames) {
965 		printf(": invalid number of grant table pages: %u/%u\n",
966 		    gqs.nr_frames, gqs.max_nr_frames);
967 		return (-1);
968 	}
969 
970 	sc->sc_gntmax = gqs.max_nr_frames;
971 
972 	sc->sc_gnt = mallocarray(sc->sc_gntmax + 1, sizeof(struct xen_gntent),
973 	    M_DEVBUF, M_ZERO | M_NOWAIT);
974 	if (sc->sc_gnt == NULL) {
975 		printf(": failed to allocate grant table lookup table\n");
976 		return (-1);
977 	}
978 
979 	mtx_init(&sc->sc_gntlck, IPL_NET);
980 
981 	if (xen_grant_table_grow(sc) == NULL) {
982 		free(sc->sc_gnt, M_DEVBUF, sc->sc_gntmax *
983 		    sizeof(struct xen_gntent));
984 		return (-1);
985 	}
986 
987 	printf(", %d grant table frames", sc->sc_gntmax);
988 
989 	xen_bus_dma_tag._cookie = sc;
990 
991 	return (0);
992 }
993 
994 struct xen_gntent *
xen_grant_table_grow(struct xen_softc * sc)995 xen_grant_table_grow(struct xen_softc *sc)
996 {
997 	struct xen_add_to_physmap xatp;
998 	struct xen_gntent *ge;
999 	void *va;
1000 	paddr_t pa;
1001 
1002 	if (sc->sc_gntcnt == sc->sc_gntmax) {
1003 		printf("%s: grant table frame allotment limit reached\n",
1004 		    sc->sc_dev.dv_xname);
1005 		return (NULL);
1006 	}
1007 
1008 	va = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
1009 	if (va == NULL)
1010 		return (NULL);
1011 	if (!pmap_extract(pmap_kernel(), (vaddr_t)va, &pa)) {
1012 		printf("%s: grant table page PA extraction failed\n",
1013 		    sc->sc_dev.dv_xname);
1014 		km_free(va, PAGE_SIZE, &kv_any, &kp_zero);
1015 		return (NULL);
1016 	}
1017 
1018 	mtx_enter(&sc->sc_gntlck);
1019 
1020 	ge = &sc->sc_gnt[sc->sc_gntcnt];
1021 	ge->ge_table = va;
1022 
1023 	xatp.domid = DOMID_SELF;
1024 	xatp.idx = sc->sc_gntcnt;
1025 	xatp.space = XENMAPSPACE_grant_table;
1026 	xatp.gpfn = atop(pa);
1027 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
1028 		printf("%s: failed to add a grant table page\n",
1029 		    sc->sc_dev.dv_xname);
1030 		km_free(ge->ge_table, PAGE_SIZE, &kv_any, &kp_zero);
1031 		mtx_leave(&sc->sc_gntlck);
1032 		return (NULL);
1033 	}
1034 	ge->ge_start = sc->sc_gntcnt * GNTTAB_NEPG;
1035 	/* First page has 8 reserved entries */
1036 	ge->ge_reserved = ge->ge_start == 0 ? GNTTAB_NR_RESERVED_ENTRIES : 0;
1037 	ge->ge_free = GNTTAB_NEPG - ge->ge_reserved;
1038 	ge->ge_next = ge->ge_reserved;
1039 	mtx_init(&ge->ge_lock, IPL_NET);
1040 
1041 	sc->sc_gntcnt++;
1042 	mtx_leave(&sc->sc_gntlck);
1043 
1044 	return (ge);
1045 }
1046 
1047 int
xen_grant_table_alloc(struct xen_softc * sc,grant_ref_t * ref)1048 xen_grant_table_alloc(struct xen_softc *sc, grant_ref_t *ref)
1049 {
1050 	struct xen_gntent *ge;
1051 	int i;
1052 
1053 	/* Start with a previously allocated table page */
1054 	ge = &sc->sc_gnt[sc->sc_gntcnt - 1];
1055 	if (ge->ge_free > 0) {
1056 		mtx_enter(&ge->ge_lock);
1057 		if (ge->ge_free > 0)
1058 			goto search;
1059 		mtx_leave(&ge->ge_lock);
1060 	}
1061 
1062 	/* Try other existing table pages */
1063 	for (i = 0; i < sc->sc_gntcnt; i++) {
1064 		ge = &sc->sc_gnt[i];
1065 		if (ge->ge_free == 0)
1066 			continue;
1067 		mtx_enter(&ge->ge_lock);
1068 		if (ge->ge_free > 0)
1069 			goto search;
1070 		mtx_leave(&ge->ge_lock);
1071 	}
1072 
1073  alloc:
1074 	/* Allocate a new table page */
1075 	if ((ge = xen_grant_table_grow(sc)) == NULL)
1076 		return (-1);
1077 
1078 	mtx_enter(&ge->ge_lock);
1079 	if (ge->ge_free == 0) {
1080 		/* We were not fast enough... */
1081 		mtx_leave(&ge->ge_lock);
1082 		goto alloc;
1083 	}
1084 
1085  search:
1086 	for (i = ge->ge_next;
1087 	     /* Math works here because GNTTAB_NEPG is a power of 2 */
1088 	     i != ((ge->ge_next + GNTTAB_NEPG - 1) & (GNTTAB_NEPG - 1));
1089 	     i++) {
1090 		if (i == GNTTAB_NEPG)
1091 			i = 0;
1092 		if (ge->ge_reserved && i < ge->ge_reserved)
1093 			continue;
1094 		if (ge->ge_table[i].frame != 0)
1095 			continue;
1096 		*ref = ge->ge_start + i;
1097 		ge->ge_table[i].flags = GTF_invalid;
1098 		ge->ge_table[i].frame = 0xffffffff; /* Mark as taken */
1099 		if ((ge->ge_next = i + 1) == GNTTAB_NEPG)
1100 			ge->ge_next = ge->ge_reserved;
1101 		ge->ge_free--;
1102 		mtx_leave(&ge->ge_lock);
1103 		return (0);
1104 	}
1105 	mtx_leave(&ge->ge_lock);
1106 
1107 	panic("page full, sc %p gnt %p (%d) ge %p", sc, sc->sc_gnt,
1108 	    sc->sc_gntcnt, ge);
1109 	return (-1);
1110 }
1111 
1112 void
xen_grant_table_free(struct xen_softc * sc,grant_ref_t ref)1113 xen_grant_table_free(struct xen_softc *sc, grant_ref_t ref)
1114 {
1115 	struct xen_gntent *ge;
1116 
1117 #ifdef XEN_DEBUG
1118 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1119 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1120 		    sc->sc_gnt, sc->sc_gntcnt);
1121 #endif
1122 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1123 	mtx_enter(&ge->ge_lock);
1124 #ifdef XEN_DEBUG
1125 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1126 		mtx_leave(&ge->ge_lock);
1127 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1128 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1129 	}
1130 #endif
1131 	ref -= ge->ge_start;
1132 	if (ge->ge_table[ref].flags != GTF_invalid) {
1133 		mtx_leave(&ge->ge_lock);
1134 		panic("reference %u is still in use, flags %#x frame %#x",
1135 		    ref + ge->ge_start, ge->ge_table[ref].flags,
1136 		    ge->ge_table[ref].frame);
1137 	}
1138 	ge->ge_table[ref].frame = 0;
1139 	ge->ge_next = ref;
1140 	ge->ge_free++;
1141 	mtx_leave(&ge->ge_lock);
1142 }
1143 
1144 void
xen_grant_table_enter(struct xen_softc * sc,grant_ref_t ref,paddr_t pa,int domain,int flags)1145 xen_grant_table_enter(struct xen_softc *sc, grant_ref_t ref, paddr_t pa,
1146     int domain, int flags)
1147 {
1148 	struct xen_gntent *ge;
1149 
1150 #ifdef XEN_DEBUG
1151 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1152 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1153 		    sc->sc_gnt, sc->sc_gntcnt);
1154 #endif
1155 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1156 #ifdef XEN_DEBUG
1157 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1158 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1159 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1160 	}
1161 #endif
1162 	ref -= ge->ge_start;
1163 	if (ge->ge_table[ref].flags != GTF_invalid) {
1164 		panic("reference %u is still in use, flags %#x frame %#x",
1165 		    ref + ge->ge_start, ge->ge_table[ref].flags,
1166 		    ge->ge_table[ref].frame);
1167 	}
1168 	ge->ge_table[ref].frame = atop(pa);
1169 	ge->ge_table[ref].domid = domain;
1170 	virtio_membar_sync();
1171 	ge->ge_table[ref].flags = GTF_permit_access | flags;
1172 	virtio_membar_sync();
1173 }
1174 
1175 void
xen_grant_table_remove(struct xen_softc * sc,grant_ref_t ref)1176 xen_grant_table_remove(struct xen_softc *sc, grant_ref_t ref)
1177 {
1178 	struct xen_gntent *ge;
1179 	uint32_t flags, *ptr;
1180 	int loop;
1181 
1182 #ifdef XEN_DEBUG
1183 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1184 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1185 		    sc->sc_gnt, sc->sc_gntcnt);
1186 #endif
1187 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1188 #ifdef XEN_DEBUG
1189 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1190 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1191 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1192 	}
1193 #endif
1194 	ref -= ge->ge_start;
1195 	/* Invalidate the grant reference */
1196 	virtio_membar_sync();
1197 	ptr = (uint32_t *)&ge->ge_table[ref];
1198 	flags = (ge->ge_table[ref].flags & ~(GTF_reading|GTF_writing)) |
1199 	    (ge->ge_table[ref].domid << 16);
1200 	loop = 0;
1201 	while (atomic_cas_uint(ptr, flags, GTF_invalid) != flags) {
1202 		if (loop++ > 10) {
1203 			panic("grant table reference %u is held "
1204 			    "by domain %d: frame %#x flags %#x",
1205 			    ref + ge->ge_start, ge->ge_table[ref].domid,
1206 			    ge->ge_table[ref].frame, ge->ge_table[ref].flags);
1207 		}
1208 #if (defined(__amd64__) || defined(__i386__))
1209 		__asm volatile("pause": : : "memory");
1210 #endif
1211 	}
1212 	ge->ge_table[ref].frame = 0xffffffff;
1213 }
1214 
1215 int
xen_bus_dmamap_create(bus_dma_tag_t t,bus_size_t size,int nsegments,bus_size_t maxsegsz,bus_size_t boundary,int flags,bus_dmamap_t * dmamp)1216 xen_bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
1217     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
1218 {
1219 	struct xen_softc *sc = t->_cookie;
1220 	struct xen_gntmap *gm;
1221 	int i, error;
1222 
1223 	if (maxsegsz < PAGE_SIZE)
1224 		return (EINVAL);
1225 
1226 	/* Allocate a dma map structure */
1227 	error = bus_dmamap_create(sc->sc_dmat, size, nsegments, maxsegsz,
1228 	    boundary, flags, dmamp);
1229 	if (error)
1230 		return (error);
1231 	/* Allocate an array of grant table pa<->ref maps */
1232 	gm = mallocarray(nsegments, sizeof(struct xen_gntmap), M_DEVBUF,
1233 	    M_ZERO | ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK));
1234 	if (gm == NULL) {
1235 		bus_dmamap_destroy(sc->sc_dmat, *dmamp);
1236 		*dmamp = NULL;
1237 		return (ENOMEM);
1238 	}
1239 	/* Wire it to the dma map */
1240 	(*dmamp)->_dm_cookie = gm;
1241 	/* Claim references from the grant table */
1242 	for (i = 0; i < (*dmamp)->_dm_segcnt; i++) {
1243 		if (xen_grant_table_alloc(sc, &gm[i].gm_ref)) {
1244 			xen_bus_dmamap_destroy(t, *dmamp);
1245 			*dmamp = NULL;
1246 			return (ENOBUFS);
1247 		}
1248 	}
1249 	return (0);
1250 }
1251 
1252 void
xen_bus_dmamap_destroy(bus_dma_tag_t t,bus_dmamap_t map)1253 xen_bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
1254 {
1255 	struct xen_softc *sc = t->_cookie;
1256 	struct xen_gntmap *gm;
1257 	int i;
1258 
1259 	gm = map->_dm_cookie;
1260 	for (i = 0; i < map->_dm_segcnt; i++) {
1261 		if (gm[i].gm_ref == 0)
1262 			continue;
1263 		xen_grant_table_free(sc, gm[i].gm_ref);
1264 	}
1265 	free(gm, M_DEVBUF, map->_dm_segcnt * sizeof(struct xen_gntmap));
1266 	bus_dmamap_destroy(sc->sc_dmat, map);
1267 }
1268 
1269 int
xen_bus_dmamap_load(bus_dma_tag_t t,bus_dmamap_t map,void * buf,bus_size_t buflen,struct proc * p,int flags)1270 xen_bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
1271     bus_size_t buflen, struct proc *p, int flags)
1272 {
1273 	struct xen_softc *sc = t->_cookie;
1274 	struct xen_gntmap *gm = map->_dm_cookie;
1275 	int i, domain, error;
1276 
1277 	domain = flags >> 16;
1278 	flags &= 0xffff;
1279 	error = bus_dmamap_load(sc->sc_dmat, map, buf, buflen, p, flags);
1280 	if (error)
1281 		return (error);
1282 	for (i = 0; i < map->dm_nsegs; i++) {
1283 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1284 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1285 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1286 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1287 	}
1288 	return (0);
1289 }
1290 
1291 int
xen_bus_dmamap_load_mbuf(bus_dma_tag_t t,bus_dmamap_t map,struct mbuf * m0,int flags)1292 xen_bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
1293     int flags)
1294 {
1295 	struct xen_softc *sc = t->_cookie;
1296 	struct xen_gntmap *gm = map->_dm_cookie;
1297 	int i, domain, error;
1298 
1299 	domain = flags >> 16;
1300 	flags &= 0xffff;
1301 	error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m0, flags);
1302 	if (error)
1303 		return (error);
1304 	for (i = 0; i < map->dm_nsegs; i++) {
1305 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1306 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1307 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1308 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1309 	}
1310 	return (0);
1311 }
1312 
1313 void
xen_bus_dmamap_unload(bus_dma_tag_t t,bus_dmamap_t map)1314 xen_bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
1315 {
1316 	struct xen_softc *sc = t->_cookie;
1317 	struct xen_gntmap *gm = map->_dm_cookie;
1318 	int i;
1319 
1320 	for (i = 0; i < map->dm_nsegs; i++) {
1321 		if (gm[i].gm_paddr == 0)
1322 			continue;
1323 		xen_grant_table_remove(sc, gm[i].gm_ref);
1324 		map->dm_segs[i].ds_addr = gm[i].gm_paddr;
1325 		gm[i].gm_paddr = 0;
1326 	}
1327 	bus_dmamap_unload(sc->sc_dmat, map);
1328 }
1329 
1330 void
xen_bus_dmamap_sync(bus_dma_tag_t t,bus_dmamap_t map,bus_addr_t addr,bus_size_t size,int op)1331 xen_bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
1332     bus_size_t size, int op)
1333 {
1334 	if ((op == (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) ||
1335 	    (op == (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)))
1336 		virtio_membar_sync();
1337 }
1338 
1339 static int
xen_attach_print(void * aux,const char * name)1340 xen_attach_print(void *aux, const char *name)
1341 {
1342 	struct xen_attach_args *xa = aux;
1343 
1344 	if (name)
1345 		printf("\"%s\" at %s: %s", xa->xa_name, name, xa->xa_node);
1346 
1347 	return (UNCONF);
1348 }
1349 
1350 int
xen_attach_device(struct xen_softc * sc,struct xen_devlist * xdl,const char * name,const char * unit)1351 xen_attach_device(struct xen_softc *sc, struct xen_devlist *xdl,
1352     const char *name, const char *unit)
1353 {
1354 	struct xen_attach_args xa;
1355 	struct xen_device *xdv;
1356 	unsigned long long res;
1357 
1358 	xa.xa_dmat = &xen_bus_dma_tag;
1359 
1360 	strlcpy(xa.xa_name, name, sizeof(xa.xa_name));
1361 	snprintf(xa.xa_node, sizeof(xa.xa_node), "device/%s/%s", name, unit);
1362 
1363 	if (xs_getprop(sc, xa.xa_node, "backend", xa.xa_backend,
1364 	    sizeof(xa.xa_backend))) {
1365 		DPRINTF("%s: failed to identify \"backend\" for "
1366 		    "\"%s\"\n", sc->sc_dev.dv_xname, xa.xa_node);
1367 		return (EIO);
1368 	}
1369 
1370 	if (xs_getnum(sc, xa.xa_node, "backend-id", &res) || res > UINT16_MAX) {
1371 		DPRINTF("%s: invalid \"backend-id\" for \"%s\"\n",
1372 		    sc->sc_dev.dv_xname, xa.xa_node);
1373 		return (EIO);
1374 	}
1375 	xa.xa_domid = (uint16_t)res;
1376 
1377 	xdv = malloc(sizeof(struct xen_device), M_DEVBUF, M_ZERO | M_NOWAIT);
1378 	if (xdv == NULL)
1379 		return (ENOMEM);
1380 
1381 	strlcpy(xdv->dv_unit, unit, sizeof(xdv->dv_unit));
1382 	LIST_INSERT_HEAD(&xdl->dl_devs, xdv, dv_entry);
1383 
1384 	xdv->dv_dev = config_found((struct device *)sc, &xa, xen_attach_print);
1385 
1386 	return (0);
1387 }
1388 
1389 int
xen_probe_devices(struct xen_softc * sc)1390 xen_probe_devices(struct xen_softc *sc)
1391 {
1392 	struct xen_devlist *xdl;
1393 	struct xs_transaction xst;
1394 	struct iovec *iovp1 = NULL, *iovp2 = NULL;
1395 	int i, j, error, iov1_cnt = 0, iov2_cnt = 0;
1396 	char path[256];
1397 
1398 	memset(&xst, 0, sizeof(xst));
1399 	xst.xst_id = 0;
1400 	xst.xst_cookie = sc->sc_xs;
1401 
1402 	if ((error = xs_cmd(&xst, XS_LIST, "device", &iovp1, &iov1_cnt)) != 0)
1403 		return (error);
1404 
1405 	for (i = 0; i < iov1_cnt; i++) {
1406 		if (strcmp("suspend", (char *)iovp1[i].iov_base) == 0)
1407 			continue;
1408 		snprintf(path, sizeof(path), "device/%s",
1409 		    (char *)iovp1[i].iov_base);
1410 		if ((error = xs_cmd(&xst, XS_LIST, path, &iovp2,
1411 		    &iov2_cnt)) != 0)
1412 			goto out;
1413 		if ((xdl = malloc(sizeof(struct xen_devlist), M_DEVBUF,
1414 		    M_ZERO | M_NOWAIT)) == NULL) {
1415 			error = ENOMEM;
1416 			goto out;
1417 		}
1418 		xdl->dl_xen = sc;
1419 		strlcpy(xdl->dl_node, (const char *)iovp1[i].iov_base,
1420 		    XEN_MAX_NODE_LEN);
1421 		for (j = 0; j < iov2_cnt; j++) {
1422 			error = xen_attach_device(sc, xdl,
1423 			    (const char *)iovp1[i].iov_base,
1424 			    (const char *)iovp2[j].iov_base);
1425 			if (error) {
1426 				printf("%s: failed to attach \"%s/%s\"\n",
1427 				    sc->sc_dev.dv_xname, path,
1428 				    (const char *)iovp2[j].iov_base);
1429 				goto out;
1430 			}
1431 		}
1432 		/* Setup a watch for every device subtree */
1433 		if (xs_watch(sc, "device", (char *)iovp1[i].iov_base,
1434 		    &xdl->dl_task, xen_hotplug, xdl))
1435 			printf("%s: failed to setup hotplug watch for \"%s\"\n",
1436 			    sc->sc_dev.dv_xname, (char *)iovp1[i].iov_base);
1437 		SLIST_INSERT_HEAD(&sc->sc_devlists, xdl, dl_entry);
1438 		xs_resfree(&xst, iovp2, iov2_cnt);
1439 		iovp2 = NULL;
1440 		iov2_cnt = 0;
1441 	}
1442 
1443  out:
1444 	if (iovp2)
1445 		xs_resfree(&xst, iovp2, iov2_cnt);
1446 	xs_resfree(&xst, iovp1, iov1_cnt);
1447 	return (error);
1448 }
1449 
1450 void
xen_hotplug(void * arg)1451 xen_hotplug(void *arg)
1452 {
1453 	struct xen_devlist *xdl = arg;
1454 	struct xen_softc *sc = xdl->dl_xen;
1455 	struct xen_device *xdv, *xvdn;
1456 	struct xs_transaction xst;
1457 	struct iovec *iovp = NULL;
1458 	int error, i, keep, iov_cnt = 0;
1459 	char path[256];
1460 	int8_t *seen;
1461 
1462 	memset(&xst, 0, sizeof(xst));
1463 	xst.xst_id = 0;
1464 	xst.xst_cookie = sc->sc_xs;
1465 
1466 	snprintf(path, sizeof(path), "device/%s", xdl->dl_node);
1467 	if ((error = xs_cmd(&xst, XS_LIST, path, &iovp, &iov_cnt)) != 0)
1468 		return;
1469 
1470 	seen = malloc(iov_cnt, M_TEMP, M_ZERO | M_WAITOK);
1471 
1472 	/* Detect all removed and kept devices */
1473 	LIST_FOREACH_SAFE(xdv, &xdl->dl_devs, dv_entry, xvdn) {
1474 		for (i = 0, keep = 0; i < iov_cnt; i++) {
1475 			if (!seen[i] &&
1476 			    !strcmp(xdv->dv_unit, (char *)iovp[i].iov_base)) {
1477 				seen[i]++;
1478 				keep++;
1479 				break;
1480 			}
1481 		}
1482 		if (!keep) {
1483 			DPRINTF("%s: removing \"%s/%s\"\n", sc->sc_dev.dv_xname,
1484 			    xdl->dl_node, xdv->dv_unit);
1485 			LIST_REMOVE(xdv, dv_entry);
1486 			config_detach(xdv->dv_dev, 0);
1487 			free(xdv, M_DEVBUF, sizeof(struct xen_device));
1488 		}
1489 	}
1490 
1491 	/* Attach all new devices */
1492 	for (i = 0; i < iov_cnt; i++) {
1493 		if (seen[i])
1494 			continue;
1495 		DPRINTF("%s: attaching \"%s/%s\"\n", sc->sc_dev.dv_xname,
1496 			    xdl->dl_node, (const char *)iovp[i].iov_base);
1497 		error = xen_attach_device(sc, xdl, xdl->dl_node,
1498 		    (const char *)iovp[i].iov_base);
1499 		if (error) {
1500 			printf("%s: failed to attach \"%s/%s\"\n",
1501 			    sc->sc_dev.dv_xname, path,
1502 			    (const char *)iovp[i].iov_base);
1503 			continue;
1504 		}
1505 	}
1506 
1507 	free(seen, M_TEMP, iov_cnt);
1508 
1509 	xs_resfree(&xst, iovp, iov_cnt);
1510 }
1511 
1512 #include <machine/pio.h>
1513 
1514 #define	XMI_PORT		0x10
1515 #define XMI_MAGIC		0x49d2
1516 #define XMI_UNPLUG_IDE		0x01
1517 #define XMI_UNPLUG_NIC		0x02
1518 #define XMI_UNPLUG_IDESEC	0x04
1519 
1520 void
xen_disable_emulated_devices(struct xen_softc * sc)1521 xen_disable_emulated_devices(struct xen_softc *sc)
1522 {
1523 #if defined(__i386__) || defined(__amd64__)
1524 	ushort unplug = 0;
1525 
1526 	if (inw(XMI_PORT) != XMI_MAGIC) {
1527 		printf("%s: failed to disable emulated devices\n",
1528 		    sc->sc_dev.dv_xname);
1529 		return;
1530 	}
1531 	if (sc->sc_unplug & XEN_UNPLUG_IDE)
1532 		unplug |= XMI_UNPLUG_IDE;
1533 	if (sc->sc_unplug & XEN_UNPLUG_IDESEC)
1534 		unplug |= XMI_UNPLUG_IDESEC;
1535 	if (sc->sc_unplug & XEN_UNPLUG_NIC)
1536 		unplug |= XMI_UNPLUG_NIC;
1537 	if (unplug)
1538 		outw(XMI_PORT, unplug);
1539 #endif	/* __i386__ || __amd64__ */
1540 }
1541 
1542 void
xen_unplug_emulated(void * xsc,int what)1543 xen_unplug_emulated(void *xsc, int what)
1544 {
1545 	struct xen_softc *sc = xsc;
1546 
1547 	sc->sc_unplug |= what;
1548 }
1549