xref: /freebsd/sys/amd64/vmm/intel/vtd.c (revision 366f6083)
1366f6083SPeter Grehan /*-
2366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
3366f6083SPeter Grehan  * All rights reserved.
4366f6083SPeter Grehan  *
5366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
6366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
7366f6083SPeter Grehan  * are met:
8366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
9366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
10366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
12366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
13366f6083SPeter Grehan  *
14366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24366f6083SPeter Grehan  * SUCH DAMAGE.
25366f6083SPeter Grehan  *
26366f6083SPeter Grehan  * $FreeBSD$
27366f6083SPeter Grehan  */
28366f6083SPeter Grehan 
29366f6083SPeter Grehan #include <sys/cdefs.h>
30366f6083SPeter Grehan __FBSDID("$FreeBSD$");
31366f6083SPeter Grehan 
32366f6083SPeter Grehan #include <sys/param.h>
33366f6083SPeter Grehan #include <sys/kernel.h>
34366f6083SPeter Grehan #include <sys/systm.h>
35366f6083SPeter Grehan #include <sys/malloc.h>
36366f6083SPeter Grehan 
37366f6083SPeter Grehan #include <vm/vm.h>
38366f6083SPeter Grehan #include <vm/pmap.h>
39366f6083SPeter Grehan 
40366f6083SPeter Grehan #include <dev/pci/pcireg.h>
41366f6083SPeter Grehan 
42366f6083SPeter Grehan #include <machine/pmap.h>
43366f6083SPeter Grehan #include <machine/vmparam.h>
44366f6083SPeter Grehan #include <machine/pci_cfgreg.h>
45366f6083SPeter Grehan 
46366f6083SPeter Grehan #include "io/iommu.h"
47366f6083SPeter Grehan 
48366f6083SPeter Grehan /*
49366f6083SPeter Grehan  * Documented in the "Intel Virtualization Technology for Directed I/O",
50366f6083SPeter Grehan  * Architecture Spec, September 2008.
51366f6083SPeter Grehan  */
52366f6083SPeter Grehan 
53366f6083SPeter Grehan /* Section 10.4 "Register Descriptions" */
54366f6083SPeter Grehan struct vtdmap {
55366f6083SPeter Grehan 	volatile uint32_t	version;
56366f6083SPeter Grehan 	volatile uint32_t	res0;
57366f6083SPeter Grehan 	volatile uint64_t	cap;
58366f6083SPeter Grehan 	volatile uint64_t	ext_cap;
59366f6083SPeter Grehan 	volatile uint32_t	gcr;
60366f6083SPeter Grehan 	volatile uint32_t	gsr;
61366f6083SPeter Grehan 	volatile uint64_t	rta;
62366f6083SPeter Grehan 	volatile uint64_t	ccr;
63366f6083SPeter Grehan };
64366f6083SPeter Grehan 
65366f6083SPeter Grehan #define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
66366f6083SPeter Grehan #define	VTD_CAP_ND(cap)		((cap) & 0x7)
67366f6083SPeter Grehan #define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
68366f6083SPeter Grehan #define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
69366f6083SPeter Grehan #define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
70366f6083SPeter Grehan 
71366f6083SPeter Grehan #define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
72366f6083SPeter Grehan #define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
73366f6083SPeter Grehan #define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
74366f6083SPeter Grehan 
75366f6083SPeter Grehan #define	VTD_GCR_WBF		(1 << 27)
76366f6083SPeter Grehan #define	VTD_GCR_SRTP		(1 << 30)
77366f6083SPeter Grehan #define	VTD_GCR_TE		(1 << 31)
78366f6083SPeter Grehan 
79366f6083SPeter Grehan #define	VTD_GSR_WBFS		(1 << 27)
80366f6083SPeter Grehan #define	VTD_GSR_RTPS		(1 << 30)
81366f6083SPeter Grehan #define	VTD_GSR_TES		(1 << 31)
82366f6083SPeter Grehan 
83366f6083SPeter Grehan #define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
84366f6083SPeter Grehan #define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
85366f6083SPeter Grehan 
86366f6083SPeter Grehan #define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
87366f6083SPeter Grehan #define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
88366f6083SPeter Grehan #define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
89366f6083SPeter Grehan #define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
90366f6083SPeter Grehan #define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
91366f6083SPeter Grehan #define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
92366f6083SPeter Grehan #define	VTD_IIR_DOMAIN_P	32
93366f6083SPeter Grehan 
94366f6083SPeter Grehan #define	VTD_ROOT_PRESENT	0x1
95366f6083SPeter Grehan #define	VTD_CTX_PRESENT		0x1
96366f6083SPeter Grehan #define	VTD_CTX_TT_ALL		(1UL << 2)
97366f6083SPeter Grehan 
98366f6083SPeter Grehan #define	VTD_PTE_RD		(1UL << 0)
99366f6083SPeter Grehan #define	VTD_PTE_WR		(1UL << 1)
100366f6083SPeter Grehan #define	VTD_PTE_SUPERPAGE	(1UL << 7)
101366f6083SPeter Grehan #define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
102366f6083SPeter Grehan 
103366f6083SPeter Grehan struct domain {
104366f6083SPeter Grehan 	uint64_t	*ptp;		/* first level page table page */
105366f6083SPeter Grehan 	int		pt_levels;	/* number of page table levels */
106366f6083SPeter Grehan 	int		addrwidth;	/* 'AW' field in context entry */
107366f6083SPeter Grehan 	int		spsmask;	/* supported super page sizes */
108366f6083SPeter Grehan 	u_int		id;		/* domain id */
109366f6083SPeter Grehan 	vm_paddr_t	maxaddr;	/* highest address to be mapped */
110366f6083SPeter Grehan 	SLIST_ENTRY(domain) next;
111366f6083SPeter Grehan };
112366f6083SPeter Grehan 
113366f6083SPeter Grehan static SLIST_HEAD(, domain) domhead;
114366f6083SPeter Grehan 
115366f6083SPeter Grehan #define	DRHD_MAX_UNITS	8
116366f6083SPeter Grehan static int		drhd_num;
117366f6083SPeter Grehan static struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
118366f6083SPeter Grehan static int		max_domains;
119366f6083SPeter Grehan typedef int		(*drhd_ident_func_t)(void);
120366f6083SPeter Grehan 
121366f6083SPeter Grehan static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
122366f6083SPeter Grehan static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
123366f6083SPeter Grehan 
124366f6083SPeter Grehan static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
125366f6083SPeter Grehan 
126366f6083SPeter Grehan /*
127366f6083SPeter Grehan  * Config space register definitions from the "Intel 5520 and 5500" datasheet.
128366f6083SPeter Grehan  */
129366f6083SPeter Grehan static int
130366f6083SPeter Grehan tylersburg_vtd_ident(void)
131366f6083SPeter Grehan {
132366f6083SPeter Grehan 	int units, nlbus;
133366f6083SPeter Grehan 	uint16_t did, vid;
134366f6083SPeter Grehan 	uint32_t miscsts, vtbar;
135366f6083SPeter Grehan 
136366f6083SPeter Grehan 	const int bus = 0;
137366f6083SPeter Grehan 	const int slot = 20;
138366f6083SPeter Grehan 	const int func = 0;
139366f6083SPeter Grehan 
140366f6083SPeter Grehan 	units = 0;
141366f6083SPeter Grehan 
142366f6083SPeter Grehan 	vid = pci_cfgregread(bus, slot, func, PCIR_VENDOR, 2);
143366f6083SPeter Grehan 	did = pci_cfgregread(bus, slot, func, PCIR_DEVICE, 2);
144366f6083SPeter Grehan 	if (vid != 0x8086 || did != 0x342E)
145366f6083SPeter Grehan 		goto done;
146366f6083SPeter Grehan 
147366f6083SPeter Grehan 	/*
148366f6083SPeter Grehan 	 * Check if this is a dual IOH configuration.
149366f6083SPeter Grehan 	 */
150366f6083SPeter Grehan 	miscsts = pci_cfgregread(bus, slot, func, 0x9C, 4);
151366f6083SPeter Grehan 	if (miscsts & (1 << 25))
152366f6083SPeter Grehan 		nlbus = pci_cfgregread(bus, slot, func, 0x160, 1);
153366f6083SPeter Grehan 	else
154366f6083SPeter Grehan 		nlbus = -1;
155366f6083SPeter Grehan 
156366f6083SPeter Grehan 	vtbar = pci_cfgregread(bus, slot, func, 0x180, 4);
157366f6083SPeter Grehan 	if (vtbar & 0x1) {
158366f6083SPeter Grehan 		vtdmaps[units++] = (struct vtdmap *)
159366f6083SPeter Grehan 					PHYS_TO_DMAP(vtbar & 0xffffe000);
160366f6083SPeter Grehan 	} else if (bootverbose)
161366f6083SPeter Grehan 		printf("VT-d unit in legacy IOH is disabled!\n");
162366f6083SPeter Grehan 
163366f6083SPeter Grehan 	if (nlbus != -1) {
164366f6083SPeter Grehan 		vtbar = pci_cfgregread(nlbus, slot, func, 0x180, 4);
165366f6083SPeter Grehan 		if (vtbar & 0x1) {
166366f6083SPeter Grehan 			vtdmaps[units++] = (struct vtdmap *)
167366f6083SPeter Grehan 					   PHYS_TO_DMAP(vtbar & 0xffffe000);
168366f6083SPeter Grehan 		} else if (bootverbose)
169366f6083SPeter Grehan 			printf("VT-d unit in non-legacy IOH is disabled!\n");
170366f6083SPeter Grehan 	}
171366f6083SPeter Grehan done:
172366f6083SPeter Grehan 	return (units);
173366f6083SPeter Grehan }
174366f6083SPeter Grehan 
175366f6083SPeter Grehan static drhd_ident_func_t drhd_ident_funcs[] = {
176366f6083SPeter Grehan 	tylersburg_vtd_ident,
177366f6083SPeter Grehan 	NULL
178366f6083SPeter Grehan };
179366f6083SPeter Grehan 
180366f6083SPeter Grehan static int
181366f6083SPeter Grehan vtd_max_domains(struct vtdmap *vtdmap)
182366f6083SPeter Grehan {
183366f6083SPeter Grehan 	int nd;
184366f6083SPeter Grehan 
185366f6083SPeter Grehan 	nd = VTD_CAP_ND(vtdmap->cap);
186366f6083SPeter Grehan 
187366f6083SPeter Grehan 	switch (nd) {
188366f6083SPeter Grehan 	case 0:
189366f6083SPeter Grehan 		return (16);
190366f6083SPeter Grehan 	case 1:
191366f6083SPeter Grehan 		return (64);
192366f6083SPeter Grehan 	case 2:
193366f6083SPeter Grehan 		return (256);
194366f6083SPeter Grehan 	case 3:
195366f6083SPeter Grehan 		return (1024);
196366f6083SPeter Grehan 	case 4:
197366f6083SPeter Grehan 		return (4 * 1024);
198366f6083SPeter Grehan 	case 5:
199366f6083SPeter Grehan 		return (16 * 1024);
200366f6083SPeter Grehan 	case 6:
201366f6083SPeter Grehan 		return (64 * 1024);
202366f6083SPeter Grehan 	default:
203366f6083SPeter Grehan 		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
204366f6083SPeter Grehan 	}
205366f6083SPeter Grehan }
206366f6083SPeter Grehan 
207366f6083SPeter Grehan static u_int
208366f6083SPeter Grehan domain_id(void)
209366f6083SPeter Grehan {
210366f6083SPeter Grehan 	u_int id;
211366f6083SPeter Grehan 	struct domain *dom;
212366f6083SPeter Grehan 
213366f6083SPeter Grehan 	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
214366f6083SPeter Grehan 	for (id = 1; id < max_domains; id++) {
215366f6083SPeter Grehan 		SLIST_FOREACH(dom, &domhead, next) {
216366f6083SPeter Grehan 			if (dom->id == id)
217366f6083SPeter Grehan 				break;
218366f6083SPeter Grehan 		}
219366f6083SPeter Grehan 		if (dom == NULL)
220366f6083SPeter Grehan 			break;		/* found it */
221366f6083SPeter Grehan 	}
222366f6083SPeter Grehan 
223366f6083SPeter Grehan 	if (id >= max_domains)
224366f6083SPeter Grehan 		panic("domain ids exhausted");
225366f6083SPeter Grehan 
226366f6083SPeter Grehan 	return (id);
227366f6083SPeter Grehan }
228366f6083SPeter Grehan 
229366f6083SPeter Grehan static void
230366f6083SPeter Grehan vtd_wbflush(struct vtdmap *vtdmap)
231366f6083SPeter Grehan {
232366f6083SPeter Grehan 
233366f6083SPeter Grehan 	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
234366f6083SPeter Grehan 		pmap_invalidate_cache();
235366f6083SPeter Grehan 
236366f6083SPeter Grehan 	if (VTD_CAP_RWBF(vtdmap->cap)) {
237366f6083SPeter Grehan 		vtdmap->gcr = VTD_GCR_WBF;
238366f6083SPeter Grehan 		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
239366f6083SPeter Grehan 			;
240366f6083SPeter Grehan 	}
241366f6083SPeter Grehan }
242366f6083SPeter Grehan 
243366f6083SPeter Grehan static void
244366f6083SPeter Grehan vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
245366f6083SPeter Grehan {
246366f6083SPeter Grehan 
247366f6083SPeter Grehan 	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
248366f6083SPeter Grehan 	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
249366f6083SPeter Grehan 		;
250366f6083SPeter Grehan }
251366f6083SPeter Grehan 
252366f6083SPeter Grehan static void
253366f6083SPeter Grehan vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
254366f6083SPeter Grehan {
255366f6083SPeter Grehan 	int offset;
256366f6083SPeter Grehan 	volatile uint64_t *iotlb_reg, val;
257366f6083SPeter Grehan 
258366f6083SPeter Grehan 	vtd_wbflush(vtdmap);
259366f6083SPeter Grehan 
260366f6083SPeter Grehan 	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
261366f6083SPeter Grehan 	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
262366f6083SPeter Grehan 
263366f6083SPeter Grehan 	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
264366f6083SPeter Grehan 		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
265366f6083SPeter Grehan 
266366f6083SPeter Grehan 	while (1) {
267366f6083SPeter Grehan 		val = *iotlb_reg;
268366f6083SPeter Grehan 		if ((val & VTD_IIR_IVT) == 0)
269366f6083SPeter Grehan 			break;
270366f6083SPeter Grehan 	}
271366f6083SPeter Grehan }
272366f6083SPeter Grehan 
273366f6083SPeter Grehan static void
274366f6083SPeter Grehan vtd_translation_enable(struct vtdmap *vtdmap)
275366f6083SPeter Grehan {
276366f6083SPeter Grehan 
277366f6083SPeter Grehan 	vtdmap->gcr = VTD_GCR_TE;
278366f6083SPeter Grehan 	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
279366f6083SPeter Grehan 		;
280366f6083SPeter Grehan }
281366f6083SPeter Grehan 
282366f6083SPeter Grehan static void
283366f6083SPeter Grehan vtd_translation_disable(struct vtdmap *vtdmap)
284366f6083SPeter Grehan {
285366f6083SPeter Grehan 
286366f6083SPeter Grehan 	vtdmap->gcr = 0;
287366f6083SPeter Grehan 	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
288366f6083SPeter Grehan 		;
289366f6083SPeter Grehan }
290366f6083SPeter Grehan 
291366f6083SPeter Grehan static int
292366f6083SPeter Grehan vtd_init(void)
293366f6083SPeter Grehan {
294366f6083SPeter Grehan 	int i, units;
295366f6083SPeter Grehan 	struct vtdmap *vtdmap;
296366f6083SPeter Grehan 	vm_paddr_t ctx_paddr;
297366f6083SPeter Grehan 
298366f6083SPeter Grehan 	for (i = 0; drhd_ident_funcs[i] != NULL; i++) {
299366f6083SPeter Grehan 		units = (*drhd_ident_funcs[i])();
300366f6083SPeter Grehan 		if (units > 0)
301366f6083SPeter Grehan 			break;
302366f6083SPeter Grehan 	}
303366f6083SPeter Grehan 
304366f6083SPeter Grehan 	if (units <= 0)
305366f6083SPeter Grehan 		return (ENXIO);
306366f6083SPeter Grehan 
307366f6083SPeter Grehan 	drhd_num = units;
308366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
309366f6083SPeter Grehan 
310366f6083SPeter Grehan 	if (VTD_CAP_CM(vtdmap->cap) != 0)
311366f6083SPeter Grehan 		panic("vtd_init: invalid caching mode");
312366f6083SPeter Grehan 
313366f6083SPeter Grehan 	max_domains = vtd_max_domains(vtdmap);
314366f6083SPeter Grehan 
315366f6083SPeter Grehan 	/*
316366f6083SPeter Grehan 	 * Set up the root-table to point to the context-entry tables
317366f6083SPeter Grehan 	 */
318366f6083SPeter Grehan 	for (i = 0; i < 256; i++) {
319366f6083SPeter Grehan 		ctx_paddr = vtophys(ctx_tables[i]);
320366f6083SPeter Grehan 		if (ctx_paddr & PAGE_MASK)
321366f6083SPeter Grehan 			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
322366f6083SPeter Grehan 
323366f6083SPeter Grehan 		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
324366f6083SPeter Grehan 	}
325366f6083SPeter Grehan 
326366f6083SPeter Grehan 	return (0);
327366f6083SPeter Grehan }
328366f6083SPeter Grehan 
329366f6083SPeter Grehan static void
330366f6083SPeter Grehan vtd_cleanup(void)
331366f6083SPeter Grehan {
332366f6083SPeter Grehan }
333366f6083SPeter Grehan 
334366f6083SPeter Grehan static void
335366f6083SPeter Grehan vtd_enable(void)
336366f6083SPeter Grehan {
337366f6083SPeter Grehan 	int i;
338366f6083SPeter Grehan 	struct vtdmap *vtdmap;
339366f6083SPeter Grehan 
340366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
341366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
342366f6083SPeter Grehan 		vtd_wbflush(vtdmap);
343366f6083SPeter Grehan 
344366f6083SPeter Grehan 		/* Update the root table address */
345366f6083SPeter Grehan 		vtdmap->rta = vtophys(root_table);
346366f6083SPeter Grehan 		vtdmap->gcr = VTD_GCR_SRTP;
347366f6083SPeter Grehan 		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
348366f6083SPeter Grehan 			;
349366f6083SPeter Grehan 
350366f6083SPeter Grehan 		vtd_ctx_global_invalidate(vtdmap);
351366f6083SPeter Grehan 		vtd_iotlb_global_invalidate(vtdmap);
352366f6083SPeter Grehan 
353366f6083SPeter Grehan 		vtd_translation_enable(vtdmap);
354366f6083SPeter Grehan 	}
355366f6083SPeter Grehan }
356366f6083SPeter Grehan 
357366f6083SPeter Grehan static void
358366f6083SPeter Grehan vtd_disable(void)
359366f6083SPeter Grehan {
360366f6083SPeter Grehan 	int i;
361366f6083SPeter Grehan 	struct vtdmap *vtdmap;
362366f6083SPeter Grehan 
363366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
364366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
365366f6083SPeter Grehan 		vtd_translation_disable(vtdmap);
366366f6083SPeter Grehan 	}
367366f6083SPeter Grehan }
368366f6083SPeter Grehan 
369366f6083SPeter Grehan static void
370366f6083SPeter Grehan vtd_add_device(void *arg, int bus, int slot, int func)
371366f6083SPeter Grehan {
372366f6083SPeter Grehan 	int idx;
373366f6083SPeter Grehan 	uint64_t *ctxp;
374366f6083SPeter Grehan 	struct domain *dom = arg;
375366f6083SPeter Grehan 	vm_paddr_t pt_paddr;
376366f6083SPeter Grehan 	struct vtdmap *vtdmap;
377366f6083SPeter Grehan 
378366f6083SPeter Grehan 	if (bus < 0 || bus > PCI_BUSMAX ||
379366f6083SPeter Grehan 	    slot < 0 || slot > PCI_SLOTMAX ||
380366f6083SPeter Grehan 	    func < 0 || func > PCI_FUNCMAX)
381366f6083SPeter Grehan 		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
382366f6083SPeter Grehan 
383366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
384366f6083SPeter Grehan 	ctxp = ctx_tables[bus];
385366f6083SPeter Grehan 	pt_paddr = vtophys(dom->ptp);
386366f6083SPeter Grehan 	idx = (slot << 3 | func) * 2;
387366f6083SPeter Grehan 
388366f6083SPeter Grehan 	if (ctxp[idx] & VTD_CTX_PRESENT) {
389366f6083SPeter Grehan 		panic("vtd_add_device: device %d/%d/%d is already owned by "
390366f6083SPeter Grehan 		      "domain %d", bus, slot, func,
391366f6083SPeter Grehan 		      (uint16_t)(ctxp[idx + 1] >> 8));
392366f6083SPeter Grehan 	}
393366f6083SPeter Grehan 
394366f6083SPeter Grehan 	/*
395366f6083SPeter Grehan 	 * Order is important. The 'present' bit is set only after all fields
396366f6083SPeter Grehan 	 * of the context pointer are initialized.
397366f6083SPeter Grehan 	 */
398366f6083SPeter Grehan 	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
399366f6083SPeter Grehan 
400366f6083SPeter Grehan 	if (VTD_ECAP_DI(vtdmap->ext_cap))
401366f6083SPeter Grehan 		ctxp[idx] = VTD_CTX_TT_ALL;
402366f6083SPeter Grehan 	else
403366f6083SPeter Grehan 		ctxp[idx] = 0;
404366f6083SPeter Grehan 
405366f6083SPeter Grehan 	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
406366f6083SPeter Grehan 
407366f6083SPeter Grehan 	/*
408366f6083SPeter Grehan 	 * 'Not Present' entries are not cached in either the Context Cache
409366f6083SPeter Grehan 	 * or in the IOTLB, so there is no need to invalidate either of them.
410366f6083SPeter Grehan 	 */
411366f6083SPeter Grehan }
412366f6083SPeter Grehan 
413366f6083SPeter Grehan static void
414366f6083SPeter Grehan vtd_remove_device(void *arg, int bus, int slot, int func)
415366f6083SPeter Grehan {
416366f6083SPeter Grehan 	int i, idx;
417366f6083SPeter Grehan 	uint64_t *ctxp;
418366f6083SPeter Grehan 	struct vtdmap *vtdmap;
419366f6083SPeter Grehan 
420366f6083SPeter Grehan 	if (bus < 0 || bus > PCI_BUSMAX ||
421366f6083SPeter Grehan 	    slot < 0 || slot > PCI_SLOTMAX ||
422366f6083SPeter Grehan 	    func < 0 || func > PCI_FUNCMAX)
423366f6083SPeter Grehan 		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
424366f6083SPeter Grehan 
425366f6083SPeter Grehan 	ctxp = ctx_tables[bus];
426366f6083SPeter Grehan 	idx = (slot << 3 | func) * 2;
427366f6083SPeter Grehan 
428366f6083SPeter Grehan 	/*
429366f6083SPeter Grehan 	 * Order is important. The 'present' bit is must be cleared first.
430366f6083SPeter Grehan 	 */
431366f6083SPeter Grehan 	ctxp[idx] = 0;
432366f6083SPeter Grehan 	ctxp[idx + 1] = 0;
433366f6083SPeter Grehan 
434366f6083SPeter Grehan 	/*
435366f6083SPeter Grehan 	 * Invalidate the Context Cache and the IOTLB.
436366f6083SPeter Grehan 	 *
437366f6083SPeter Grehan 	 * XXX use device-selective invalidation for Context Cache
438366f6083SPeter Grehan 	 * XXX use domain-selective invalidation for IOTLB
439366f6083SPeter Grehan 	 */
440366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
441366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
442366f6083SPeter Grehan 		vtd_ctx_global_invalidate(vtdmap);
443366f6083SPeter Grehan 		vtd_iotlb_global_invalidate(vtdmap);
444366f6083SPeter Grehan 	}
445366f6083SPeter Grehan }
446366f6083SPeter Grehan 
447366f6083SPeter Grehan static uint64_t
448366f6083SPeter Grehan vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
449366f6083SPeter Grehan {
450366f6083SPeter Grehan 	struct domain *dom;
451366f6083SPeter Grehan 	int i, spshift, ptpshift, ptpindex, nlevels;
452366f6083SPeter Grehan 	uint64_t spsize, *ptp;
453366f6083SPeter Grehan 
454366f6083SPeter Grehan 	dom = arg;
455366f6083SPeter Grehan 	ptpindex = 0;
456366f6083SPeter Grehan 	ptpshift = 0;
457366f6083SPeter Grehan 
458366f6083SPeter Grehan 	if (gpa & PAGE_MASK)
459366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
460366f6083SPeter Grehan 
461366f6083SPeter Grehan 	if (hpa & PAGE_MASK)
462366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
463366f6083SPeter Grehan 
464366f6083SPeter Grehan 	if (len & PAGE_MASK)
465366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
466366f6083SPeter Grehan 
467366f6083SPeter Grehan 	/*
468366f6083SPeter Grehan 	 * Compute the size of the mapping that we can accomodate.
469366f6083SPeter Grehan 	 *
470366f6083SPeter Grehan 	 * This is based on three factors:
471366f6083SPeter Grehan 	 * - supported super page size
472366f6083SPeter Grehan 	 * - alignment of the region starting at 'gpa' and 'hpa'
473366f6083SPeter Grehan 	 * - length of the region 'len'
474366f6083SPeter Grehan 	 */
475366f6083SPeter Grehan 	spshift = 48;
476366f6083SPeter Grehan 	for (i = 3; i >= 0; i--) {
477366f6083SPeter Grehan 		spsize = 1UL << spshift;
478366f6083SPeter Grehan 		if ((dom->spsmask & (1 << i)) != 0 &&
479366f6083SPeter Grehan 		    (gpa & (spsize - 1)) == 0 &&
480366f6083SPeter Grehan 		    (hpa & (spsize - 1)) == 0 &&
481366f6083SPeter Grehan 		    (len >= spsize)) {
482366f6083SPeter Grehan 			break;
483366f6083SPeter Grehan 		}
484366f6083SPeter Grehan 		spshift -= 9;
485366f6083SPeter Grehan 	}
486366f6083SPeter Grehan 
487366f6083SPeter Grehan 	ptp = dom->ptp;
488366f6083SPeter Grehan 	nlevels = dom->pt_levels;
489366f6083SPeter Grehan 	while (--nlevels >= 0) {
490366f6083SPeter Grehan 		ptpshift = 12 + nlevels * 9;
491366f6083SPeter Grehan 		ptpindex = (gpa >> ptpshift) & 0x1FF;
492366f6083SPeter Grehan 
493366f6083SPeter Grehan 		/* We have reached the leaf mapping */
494366f6083SPeter Grehan 		if (spshift >= ptpshift) {
495366f6083SPeter Grehan 			break;
496366f6083SPeter Grehan 		}
497366f6083SPeter Grehan 
498366f6083SPeter Grehan 		/*
499366f6083SPeter Grehan 		 * We are working on a non-leaf page table page.
500366f6083SPeter Grehan 		 *
501366f6083SPeter Grehan 		 * Create a downstream page table page if necessary and point
502366f6083SPeter Grehan 		 * to it from the current page table.
503366f6083SPeter Grehan 		 */
504366f6083SPeter Grehan 		if (ptp[ptpindex] == 0) {
505366f6083SPeter Grehan 			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
506366f6083SPeter Grehan 			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
507366f6083SPeter Grehan 		}
508366f6083SPeter Grehan 
509366f6083SPeter Grehan 		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
510366f6083SPeter Grehan 	}
511366f6083SPeter Grehan 
512366f6083SPeter Grehan 	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
513366f6083SPeter Grehan 		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
514366f6083SPeter Grehan 
515366f6083SPeter Grehan 	/*
516366f6083SPeter Grehan 	 * Create a 'gpa' -> 'hpa' mapping
517366f6083SPeter Grehan 	 */
518366f6083SPeter Grehan 	ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
519366f6083SPeter Grehan 
520366f6083SPeter Grehan 	if (nlevels > 0)
521366f6083SPeter Grehan 		ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
522366f6083SPeter Grehan 
523366f6083SPeter Grehan 	return (1UL << ptpshift);
524366f6083SPeter Grehan }
525366f6083SPeter Grehan 
526366f6083SPeter Grehan static void *
527366f6083SPeter Grehan vtd_create_domain(vm_paddr_t maxaddr)
528366f6083SPeter Grehan {
529366f6083SPeter Grehan 	struct domain *dom;
530366f6083SPeter Grehan 	vm_paddr_t addr;
531366f6083SPeter Grehan 	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
532366f6083SPeter Grehan 	struct vtdmap *vtdmap;
533366f6083SPeter Grehan 
534366f6083SPeter Grehan 	if (drhd_num <= 0)
535366f6083SPeter Grehan 		panic("vtd_create_domain: no dma remapping hardware available");
536366f6083SPeter Grehan 
537366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
538366f6083SPeter Grehan 
539366f6083SPeter Grehan 	/*
540366f6083SPeter Grehan 	 * Calculate AGAW.
541366f6083SPeter Grehan 	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
542366f6083SPeter Grehan 	 */
543366f6083SPeter Grehan 	addr = 0;
544366f6083SPeter Grehan 	for (gaw = 0; addr < maxaddr; gaw++)
545366f6083SPeter Grehan 		addr = 1ULL << gaw;
546366f6083SPeter Grehan 
547366f6083SPeter Grehan 	res = (gaw - 12) % 9;
548366f6083SPeter Grehan 	if (res == 0)
549366f6083SPeter Grehan 		agaw = gaw;
550366f6083SPeter Grehan 	else
551366f6083SPeter Grehan 		agaw = gaw + 9 - res;
552366f6083SPeter Grehan 
553366f6083SPeter Grehan 	if (agaw > 64)
554366f6083SPeter Grehan 		agaw = 64;
555366f6083SPeter Grehan 
556366f6083SPeter Grehan 	/*
557366f6083SPeter Grehan 	 * Select the smallest Supported AGAW and the corresponding number
558366f6083SPeter Grehan 	 * of page table levels.
559366f6083SPeter Grehan 	 */
560366f6083SPeter Grehan 	pt_levels = 2;
561366f6083SPeter Grehan 	sagaw = 30;
562366f6083SPeter Grehan 	addrwidth = 0;
563366f6083SPeter Grehan 	tmp = VTD_CAP_SAGAW(vtdmap->cap);
564366f6083SPeter Grehan 	for (i = 0; i < 5; i++) {
565366f6083SPeter Grehan 		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
566366f6083SPeter Grehan 			break;
567366f6083SPeter Grehan 		pt_levels++;
568366f6083SPeter Grehan 		addrwidth++;
569366f6083SPeter Grehan 		sagaw += 9;
570366f6083SPeter Grehan 		if (sagaw > 64)
571366f6083SPeter Grehan 			sagaw = 64;
572366f6083SPeter Grehan 	}
573366f6083SPeter Grehan 
574366f6083SPeter Grehan 	if (i >= 5) {
575366f6083SPeter Grehan 		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
576366f6083SPeter Grehan 		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
577366f6083SPeter Grehan 	}
578366f6083SPeter Grehan 
579366f6083SPeter Grehan 	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
580366f6083SPeter Grehan 	dom->pt_levels = pt_levels;
581366f6083SPeter Grehan 	dom->addrwidth = addrwidth;
582366f6083SPeter Grehan 	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
583366f6083SPeter Grehan 	dom->id = domain_id();
584366f6083SPeter Grehan 	dom->maxaddr = maxaddr;
585366f6083SPeter Grehan 	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
586366f6083SPeter Grehan 	if ((uintptr_t)dom->ptp & PAGE_MASK)
587366f6083SPeter Grehan 		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
588366f6083SPeter Grehan 
589366f6083SPeter Grehan 	SLIST_INSERT_HEAD(&domhead, dom, next);
590366f6083SPeter Grehan 
591366f6083SPeter Grehan 	return (dom);
592366f6083SPeter Grehan }
593366f6083SPeter Grehan 
594366f6083SPeter Grehan static void
595366f6083SPeter Grehan vtd_free_ptp(uint64_t *ptp, int level)
596366f6083SPeter Grehan {
597366f6083SPeter Grehan 	int i;
598366f6083SPeter Grehan 	uint64_t *nlp;
599366f6083SPeter Grehan 
600366f6083SPeter Grehan 	if (level > 1) {
601366f6083SPeter Grehan 		for (i = 0; i < 512; i++) {
602366f6083SPeter Grehan 			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
603366f6083SPeter Grehan 				continue;
604366f6083SPeter Grehan 			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
605366f6083SPeter Grehan 				continue;
606366f6083SPeter Grehan 			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
607366f6083SPeter Grehan 			vtd_free_ptp(nlp, level - 1);
608366f6083SPeter Grehan 		}
609366f6083SPeter Grehan 	}
610366f6083SPeter Grehan 
611366f6083SPeter Grehan 	bzero(ptp, PAGE_SIZE);
612366f6083SPeter Grehan 	free(ptp, M_VTD);
613366f6083SPeter Grehan }
614366f6083SPeter Grehan 
615366f6083SPeter Grehan static void
616366f6083SPeter Grehan vtd_destroy_domain(void *arg)
617366f6083SPeter Grehan {
618366f6083SPeter Grehan 	struct domain *dom;
619366f6083SPeter Grehan 
620366f6083SPeter Grehan 	dom = arg;
621366f6083SPeter Grehan 
622366f6083SPeter Grehan 	SLIST_REMOVE(&domhead, dom, domain, next);
623366f6083SPeter Grehan 	vtd_free_ptp(dom->ptp, dom->pt_levels);
624366f6083SPeter Grehan 	free(dom, M_VTD);
625366f6083SPeter Grehan }
626366f6083SPeter Grehan 
627366f6083SPeter Grehan struct iommu_ops iommu_ops_intel = {
628366f6083SPeter Grehan 	vtd_init,
629366f6083SPeter Grehan 	vtd_cleanup,
630366f6083SPeter Grehan 	vtd_enable,
631366f6083SPeter Grehan 	vtd_disable,
632366f6083SPeter Grehan 	vtd_create_domain,
633366f6083SPeter Grehan 	vtd_destroy_domain,
634366f6083SPeter Grehan 	vtd_create_mapping,
635366f6083SPeter Grehan 	vtd_add_device,
636366f6083SPeter Grehan 	vtd_remove_device,
637366f6083SPeter Grehan };
638