xref: /freebsd/sys/x86/iommu/intel_ctx.c (revision 4b9d6057)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/interrupt.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 #include <sys/tree.h>
48 #include <sys/uio.h>
49 #include <sys/vmem.h>
50 #include <vm/vm.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_kern.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_pager.h>
56 #include <vm/vm_map.h>
57 #include <contrib/dev/acpica/include/acpi.h>
58 #include <contrib/dev/acpica/include/accommon.h>
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 #include <machine/atomic.h>
62 #include <machine/bus.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <x86/iommu/intel_reg.h>
68 #include <x86/iommu/intel_dmar.h>
69 
70 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
71 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
72 
73 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
74     struct dmar_domain *domain);
75 static void dmar_domain_destroy(struct dmar_domain *domain);
76 
77 static void
78 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
79 {
80 	struct sf_buf *sf;
81 	dmar_root_entry_t *re;
82 	vm_page_t ctxm;
83 
84 	/*
85 	 * Allocated context page must be linked.
86 	 */
87 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
88 	if (ctxm != NULL)
89 		return;
90 
91 	/*
92 	 * Page not present, allocate and link.  Note that other
93 	 * thread might execute this sequence in parallel.  This
94 	 * should be safe, because the context entries written by both
95 	 * threads are equal.
96 	 */
97 	TD_PREP_PINNED_ASSERT;
98 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
99 	    IOMMU_PGF_WAITOK);
100 	re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
101 	re += bus;
102 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
103 	    VM_PAGE_TO_PHYS(ctxm)));
104 	dmar_flush_root_to_ram(dmar, re);
105 	dmar_unmap_pgtbl(sf);
106 	TD_PINNED_ASSERT;
107 }
108 
109 static dmar_ctx_entry_t *
110 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
111 {
112 	struct dmar_unit *dmar;
113 	dmar_ctx_entry_t *ctxp;
114 
115 	dmar = CTX2DMAR(ctx);
116 
117 	ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
118 	    IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
119 	ctxp += ctx->context.rid & 0xff;
120 	return (ctxp);
121 }
122 
123 static void
124 device_tag_init(struct dmar_ctx *ctx, device_t dev)
125 {
126 	struct dmar_domain *domain;
127 	bus_addr_t maxaddr;
128 
129 	domain = CTX2DOM(ctx);
130 	maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
131 	ctx->context.tag->common.impl = &bus_dma_iommu_impl;
132 	ctx->context.tag->common.boundary = 0;
133 	ctx->context.tag->common.lowaddr = maxaddr;
134 	ctx->context.tag->common.highaddr = maxaddr;
135 	ctx->context.tag->common.maxsize = maxaddr;
136 	ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
137 	ctx->context.tag->common.maxsegsz = maxaddr;
138 	ctx->context.tag->ctx = CTX2IOCTX(ctx);
139 	ctx->context.tag->owner = dev;
140 }
141 
142 static void
143 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
144     vm_page_t ctx_root)
145 {
146 	/*
147 	 * For update due to move, the store is not atomic.  It is
148 	 * possible that DMAR read upper doubleword, while low
149 	 * doubleword is not yet updated.  The domain id is stored in
150 	 * the upper doubleword, while the table pointer in the lower.
151 	 *
152 	 * There is no good solution, for the same reason it is wrong
153 	 * to clear P bit in the ctx entry for update.
154 	 */
155 	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
156 	    domain->awlvl);
157 	if (ctx_root == NULL) {
158 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
159 	} else {
160 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
161 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
162 		    DMAR_CTX1_P);
163 	}
164 }
165 
166 static void
167 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
168     int busno)
169 {
170 	struct dmar_unit *unit;
171 	struct dmar_domain *domain;
172 	vm_page_t ctx_root;
173 	int i;
174 
175 	domain = CTX2DOM(ctx);
176 	unit = DOM2DMAR(domain);
177 	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
178 	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
179 	    unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
180 	    pci_get_function(ctx->context.tag->owner),
181 	    ctxp->ctx1, ctxp->ctx2));
182 
183 	if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
184 	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
185 		KASSERT(domain->pgtbl_obj == NULL,
186 		    ("ctx %p non-null pgtbl_obj", ctx));
187 		ctx_root = NULL;
188 	} else {
189 		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0,
190 		    IOMMU_PGF_NOALLOC);
191 	}
192 
193 	if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
194 		MPASS(!move);
195 		for (i = 0; i <= PCI_BUSMAX; i++) {
196 			ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
197 		}
198 	} else {
199 		ctx_id_entry_init_one(ctxp, domain, ctx_root);
200 	}
201 	dmar_flush_ctx_to_ram(unit, ctxp);
202 }
203 
204 static int
205 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
206 {
207 	int error;
208 
209 	/*
210 	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
211 	 * Mode Consideration" and do the (global) invalidation of the
212 	 * negative TLB entries.
213 	 */
214 	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
215 		return (0);
216 	if (dmar->qi_enabled) {
217 		dmar_qi_invalidate_ctx_glob_locked(dmar);
218 		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
219 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
220 		return (0);
221 	}
222 	error = dmar_inv_ctx_glob(dmar);
223 	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
224 		error = dmar_inv_iotlb_glob(dmar);
225 	return (error);
226 }
227 
228 static int
229 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
230     int slot, int func, int dev_domain, int dev_busno,
231     const void *dev_path, int dev_path_len)
232 {
233 	struct iommu_map_entries_tailq rmrr_entries;
234 	struct iommu_map_entry *entry, *entry1;
235 	vm_page_t *ma;
236 	iommu_gaddr_t start, end;
237 	vm_pindex_t size, i;
238 	int error, error1;
239 
240 	error = 0;
241 	TAILQ_INIT(&rmrr_entries);
242 	dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
243 	    dev_path_len, &rmrr_entries);
244 	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) {
245 		/*
246 		 * VT-d specification requires that the start of an
247 		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
248 		 * anything into the start and end fields.  Truncate
249 		 * and round as neccesary.
250 		 *
251 		 * We also allow the overlapping RMRR entries, see
252 		 * iommu_gas_alloc_region().
253 		 */
254 		start = entry->start;
255 		end = entry->end;
256 		if (bootverbose)
257 			printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
258 			    domain->iodom.iommu->unit, bus, slot, func,
259 			    (uintmax_t)start, (uintmax_t)end);
260 		entry->start = trunc_page(start);
261 		entry->end = round_page(end);
262 		if (entry->start == entry->end) {
263 			/* Workaround for some AMI (?) BIOSes */
264 			if (bootverbose) {
265 				if (dev != NULL)
266 					device_printf(dev, "");
267 				printf("pci%d:%d:%d ", bus, slot, func);
268 				printf("BIOS bug: dmar%d RMRR "
269 				    "region (%jx, %jx) corrected\n",
270 				    domain->iodom.iommu->unit, start, end);
271 			}
272 			entry->end += DMAR_PAGE_SIZE * 0x20;
273 		}
274 		size = OFF_TO_IDX(entry->end - entry->start);
275 		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
276 		for (i = 0; i < size; i++) {
277 			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
278 			    VM_MEMATTR_DEFAULT);
279 		}
280 		error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
281 		    IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
282 		    IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
283 		/*
284 		 * Non-failed RMRR entries are owned by context rb
285 		 * tree.  Get rid of the failed entry, but do not stop
286 		 * the loop.  Rest of the parsed RMRR entries are
287 		 * loaded and removed on the context destruction.
288 		 */
289 		if (error1 == 0 && entry->end != entry->start) {
290 			IOMMU_LOCK(domain->iodom.iommu);
291 			domain->refs++; /* XXXKIB prevent free */
292 			domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
293 			IOMMU_UNLOCK(domain->iodom.iommu);
294 		} else {
295 			if (error1 != 0) {
296 				if (dev != NULL)
297 					device_printf(dev, "");
298 				printf("pci%d:%d:%d ", bus, slot, func);
299 				printf(
300 			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
301 				    domain->iodom.iommu->unit, start, end,
302 				    error1);
303 				error = error1;
304 			}
305 			TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link);
306 			iommu_gas_free_entry(entry);
307 		}
308 		for (i = 0; i < size; i++)
309 			vm_page_putfake(ma[i]);
310 		free(ma, M_TEMP);
311 	}
312 	return (error);
313 }
314 
315 /*
316  * PCI memory address space is shared between memory-mapped devices (MMIO) and
317  * host memory (which may be remapped by an IOMMU).  Device accesses to an
318  * address within a memory aperture in a PCIe root port will be treated as
319  * peer-to-peer and not forwarded to an IOMMU.  To avoid this, reserve the
320  * address space of the root port's memory apertures in the address space used
321  * by the IOMMU for remapping.
322  */
323 static int
324 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
325 {
326 	struct iommu_domain *iodom;
327 	device_t root;
328 	uint32_t val;
329 	uint64_t base, limit;
330 	int error;
331 
332 	iodom = DOM2IODOM(domain);
333 
334 	root = pci_find_pcie_root_port(dev);
335 	if (root == NULL)
336 		return (0);
337 
338 	/* Disable downstream memory */
339 	base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
340 	limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
341 	error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
342 	if (bootverbose || error != 0)
343 		device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
344 		    base, limit + 1, error);
345 	if (error != 0)
346 		return (error);
347 
348 	/* Disable downstream prefetchable memory */
349 	val = pci_read_config(root, PCIR_PMBASEL_1, 2);
350 	if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
351 		if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
352 			base = PCI_PPBMEMBASE(
353 			    pci_read_config(root, PCIR_PMBASEH_1, 4),
354 			    val);
355 			limit = PCI_PPBMEMLIMIT(
356 			    pci_read_config(root, PCIR_PMLIMITH_1, 4),
357 			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
358 		} else {
359 			base = PCI_PPBMEMBASE(0, val);
360 			limit = PCI_PPBMEMLIMIT(0,
361 			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
362 		}
363 		error = iommu_gas_reserve_region_extend(iodom, base,
364 		    limit + 1);
365 		if (bootverbose || error != 0)
366 			device_printf(dev, "DMAR reserve [%#jx-%#jx] "
367 			    "(error %d)\n", base, limit + 1, error);
368 		if (error != 0)
369 			return (error);
370 	}
371 
372 	return (error);
373 }
374 
375 static struct dmar_domain *
376 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
377 {
378 	struct iommu_domain *iodom;
379 	struct iommu_unit *unit;
380 	struct dmar_domain *domain;
381 	int error, id, mgaw;
382 
383 	id = alloc_unr(dmar->domids);
384 	if (id == -1)
385 		return (NULL);
386 	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
387 	iodom = DOM2IODOM(domain);
388 	unit = DMAR2IOMMU(dmar);
389 	domain->domain = id;
390 	LIST_INIT(&domain->contexts);
391 	iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
392 
393 	domain->dmar = dmar;
394 
395 	/*
396 	 * For now, use the maximal usable physical address of the
397 	 * installed memory to calculate the mgaw on id_mapped domain.
398 	 * It is useful for the identity mapping, and less so for the
399 	 * virtualized bus address space.
400 	 */
401 	domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
402 	mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
403 	error = domain_set_agaw(domain, mgaw);
404 	if (error != 0)
405 		goto fail;
406 	if (!id_mapped)
407 		/* Use all supported address space for remapping. */
408 		domain->iodom.end = 1ULL << (domain->agaw - 1);
409 
410 	iommu_gas_init_domain(DOM2IODOM(domain));
411 
412 	if (id_mapped) {
413 		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
414 			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
415 			    domain->iodom.end);
416 		}
417 		domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
418 	} else {
419 		error = domain_alloc_pgtbl(domain);
420 		if (error != 0)
421 			goto fail;
422 		/* Disable local apic region access */
423 		error = iommu_gas_reserve_region(iodom, 0xfee00000,
424 		    0xfeefffff + 1, &iodom->msi_entry);
425 		if (error != 0)
426 			goto fail;
427 	}
428 	return (domain);
429 
430 fail:
431 	dmar_domain_destroy(domain);
432 	return (NULL);
433 }
434 
435 static struct dmar_ctx *
436 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
437 {
438 	struct dmar_ctx *ctx;
439 
440 	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
441 	ctx->context.domain = DOM2IODOM(domain);
442 	ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
443 	    M_DMAR_CTX, M_WAITOK | M_ZERO);
444 	ctx->context.rid = rid;
445 	ctx->refs = 1;
446 	return (ctx);
447 }
448 
449 static void
450 dmar_ctx_link(struct dmar_ctx *ctx)
451 {
452 	struct dmar_domain *domain;
453 
454 	domain = CTX2DOM(ctx);
455 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
456 	KASSERT(domain->refs >= domain->ctx_cnt,
457 	    ("dom %p ref underflow %d %d", domain, domain->refs,
458 	    domain->ctx_cnt));
459 	domain->refs++;
460 	domain->ctx_cnt++;
461 	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
462 }
463 
464 static void
465 dmar_ctx_unlink(struct dmar_ctx *ctx)
466 {
467 	struct dmar_domain *domain;
468 
469 	domain = CTX2DOM(ctx);
470 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
471 	KASSERT(domain->refs > 0,
472 	    ("domain %p ctx dtr refs %d", domain, domain->refs));
473 	KASSERT(domain->ctx_cnt >= domain->refs,
474 	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
475 	    domain->refs, domain->ctx_cnt));
476 	domain->refs--;
477 	domain->ctx_cnt--;
478 	LIST_REMOVE(ctx, link);
479 }
480 
481 static void
482 dmar_domain_destroy(struct dmar_domain *domain)
483 {
484 	struct iommu_domain *iodom;
485 	struct dmar_unit *dmar;
486 
487 	iodom = DOM2IODOM(domain);
488 
489 	KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
490 	    ("unfinished unloads %p", domain));
491 	KASSERT(LIST_EMPTY(&domain->contexts),
492 	    ("destroying dom %p with contexts", domain));
493 	KASSERT(domain->ctx_cnt == 0,
494 	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
495 	KASSERT(domain->refs == 0,
496 	    ("destroying dom %p with refs %d", domain, domain->refs));
497 	if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
498 		DMAR_DOMAIN_LOCK(domain);
499 		iommu_gas_fini_domain(iodom);
500 		DMAR_DOMAIN_UNLOCK(domain);
501 	}
502 	if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
503 		if (domain->pgtbl_obj != NULL)
504 			DMAR_DOMAIN_PGLOCK(domain);
505 		domain_free_pgtbl(domain);
506 	}
507 	iommu_domain_fini(iodom);
508 	dmar = DOM2DMAR(domain);
509 	free_unr(dmar->domids, domain->domain);
510 	free(domain, M_DMAR_DOMAIN);
511 }
512 
513 static struct dmar_ctx *
514 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
515     int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
516     bool id_mapped, bool rmrr_init)
517 {
518 	struct dmar_domain *domain, *domain1;
519 	struct dmar_ctx *ctx, *ctx1;
520 	struct iommu_unit *unit __diagused;
521 	dmar_ctx_entry_t *ctxp;
522 	struct sf_buf *sf;
523 	int bus, slot, func, error;
524 	bool enable;
525 
526 	if (dev != NULL) {
527 		bus = pci_get_bus(dev);
528 		slot = pci_get_slot(dev);
529 		func = pci_get_function(dev);
530 	} else {
531 		bus = PCI_RID2BUS(rid);
532 		slot = PCI_RID2SLOT(rid);
533 		func = PCI_RID2FUNC(rid);
534 	}
535 	enable = false;
536 	TD_PREP_PINNED_ASSERT;
537 	unit = DMAR2IOMMU(dmar);
538 	DMAR_LOCK(dmar);
539 	KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
540 	    ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
541 	    slot, func));
542 	ctx = dmar_find_ctx_locked(dmar, rid);
543 	error = 0;
544 	if (ctx == NULL) {
545 		/*
546 		 * Perform the allocations which require sleep or have
547 		 * higher chance to succeed if the sleep is allowed.
548 		 */
549 		DMAR_UNLOCK(dmar);
550 		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
551 		domain1 = dmar_domain_alloc(dmar, id_mapped);
552 		if (domain1 == NULL) {
553 			TD_PINNED_ASSERT;
554 			return (NULL);
555 		}
556 		if (!id_mapped) {
557 			error = domain_init_rmrr(domain1, dev, bus,
558 			    slot, func, dev_domain, dev_busno, dev_path,
559 			    dev_path_len);
560 			if (error == 0 && dev != NULL)
561 				error = dmar_reserve_pci_regions(domain1, dev);
562 			if (error != 0) {
563 				dmar_domain_destroy(domain1);
564 				TD_PINNED_ASSERT;
565 				return (NULL);
566 			}
567 		}
568 		ctx1 = dmar_ctx_alloc(domain1, rid);
569 		ctxp = dmar_map_ctx_entry(ctx1, &sf);
570 		DMAR_LOCK(dmar);
571 
572 		/*
573 		 * Recheck the contexts, other thread might have
574 		 * already allocated needed one.
575 		 */
576 		ctx = dmar_find_ctx_locked(dmar, rid);
577 		if (ctx == NULL) {
578 			domain = domain1;
579 			ctx = ctx1;
580 			dmar_ctx_link(ctx);
581 			ctx->context.tag->owner = dev;
582 			device_tag_init(ctx, dev);
583 
584 			/*
585 			 * This is the first activated context for the
586 			 * DMAR unit.  Enable the translation after
587 			 * everything is set up.
588 			 */
589 			if (LIST_EMPTY(&dmar->domains))
590 				enable = true;
591 			LIST_INSERT_HEAD(&dmar->domains, domain, link);
592 			ctx_id_entry_init(ctx, ctxp, false, bus);
593 			if (dev != NULL) {
594 				device_printf(dev,
595 			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
596 				    "agaw %d %s-mapped\n",
597 				    dmar->iommu.unit, dmar->segment, bus, slot,
598 				    func, rid, domain->domain, domain->mgaw,
599 				    domain->agaw, id_mapped ? "id" : "re");
600 			}
601 			dmar_unmap_pgtbl(sf);
602 		} else {
603 			dmar_unmap_pgtbl(sf);
604 			dmar_domain_destroy(domain1);
605 			/* Nothing needs to be done to destroy ctx1. */
606 			free(ctx1, M_DMAR_CTX);
607 			domain = CTX2DOM(ctx);
608 			ctx->refs++; /* tag referenced us */
609 		}
610 	} else {
611 		domain = CTX2DOM(ctx);
612 		if (ctx->context.tag->owner == NULL)
613 			ctx->context.tag->owner = dev;
614 		ctx->refs++; /* tag referenced us */
615 	}
616 
617 	error = dmar_flush_for_ctx_entry(dmar, enable);
618 	if (error != 0) {
619 		dmar_free_ctx_locked(dmar, ctx);
620 		TD_PINNED_ASSERT;
621 		return (NULL);
622 	}
623 
624 	/*
625 	 * The dmar lock was potentially dropped between check for the
626 	 * empty context list and now.  Recheck the state of GCMD_TE
627 	 * to avoid unneeded command.
628 	 */
629 	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
630 		error = dmar_disable_protected_regions(dmar);
631 		if (error != 0)
632 			printf("dmar%d: Failed to disable protected regions\n",
633 			    dmar->iommu.unit);
634 		error = dmar_enable_translation(dmar);
635 		if (error == 0) {
636 			if (bootverbose) {
637 				printf("dmar%d: enabled translation\n",
638 				    dmar->iommu.unit);
639 			}
640 		} else {
641 			printf("dmar%d: enabling translation failed, "
642 			    "error %d\n", dmar->iommu.unit, error);
643 			dmar_free_ctx_locked(dmar, ctx);
644 			TD_PINNED_ASSERT;
645 			return (NULL);
646 		}
647 	}
648 	DMAR_UNLOCK(dmar);
649 	TD_PINNED_ASSERT;
650 	return (ctx);
651 }
652 
653 struct dmar_ctx *
654 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
655     bool id_mapped, bool rmrr_init)
656 {
657 	int dev_domain, dev_path_len, dev_busno;
658 
659 	dev_domain = pci_get_domain(dev);
660 	dev_path_len = dmar_dev_depth(dev);
661 	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
662 	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
663 	return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
664 	    dev_path, dev_path_len, id_mapped, rmrr_init));
665 }
666 
667 struct dmar_ctx *
668 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
669     int dev_domain, int dev_busno,
670     const void *dev_path, int dev_path_len,
671     bool id_mapped, bool rmrr_init)
672 {
673 
674 	return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
675 	    dev_path, dev_path_len, id_mapped, rmrr_init));
676 }
677 
678 int
679 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
680 {
681 	struct dmar_unit *dmar;
682 	struct dmar_domain *old_domain;
683 	dmar_ctx_entry_t *ctxp;
684 	struct sf_buf *sf;
685 	int error;
686 
687 	dmar = domain->dmar;
688 	old_domain = CTX2DOM(ctx);
689 	if (domain == old_domain)
690 		return (0);
691 	KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
692 	    ("domain %p %u moving between dmars %u %u", domain,
693 	    domain->domain, old_domain->iodom.iommu->unit,
694 	    domain->iodom.iommu->unit));
695 	TD_PREP_PINNED_ASSERT;
696 
697 	ctxp = dmar_map_ctx_entry(ctx, &sf);
698 	DMAR_LOCK(dmar);
699 	dmar_ctx_unlink(ctx);
700 	ctx->context.domain = &domain->iodom;
701 	dmar_ctx_link(ctx);
702 	ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
703 	dmar_unmap_pgtbl(sf);
704 	error = dmar_flush_for_ctx_entry(dmar, true);
705 	/* If flush failed, rolling back would not work as well. */
706 	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
707 	    dmar->iommu.unit, ctx->context.rid, old_domain->domain,
708 	    domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
709 	    "id" : "re");
710 	dmar_unref_domain_locked(dmar, old_domain);
711 	TD_PINNED_ASSERT;
712 	return (error);
713 }
714 
715 static void
716 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
717 {
718 
719 	DMAR_ASSERT_LOCKED(dmar);
720 	KASSERT(domain->refs >= 1,
721 	    ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
722 	    domain->refs));
723 	KASSERT(domain->refs > domain->ctx_cnt,
724 	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
725 	    domain->refs, domain->ctx_cnt));
726 
727 	if (domain->refs > 1) {
728 		domain->refs--;
729 		DMAR_UNLOCK(dmar);
730 		return;
731 	}
732 
733 	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
734 	    ("lost ref on RMRR domain %p", domain));
735 
736 	LIST_REMOVE(domain, link);
737 	DMAR_UNLOCK(dmar);
738 
739 	taskqueue_drain(dmar->iommu.delayed_taskqueue,
740 	    &domain->iodom.unload_task);
741 	dmar_domain_destroy(domain);
742 }
743 
744 void
745 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
746 {
747 	struct sf_buf *sf;
748 	dmar_ctx_entry_t *ctxp;
749 	struct dmar_domain *domain;
750 
751 	DMAR_ASSERT_LOCKED(dmar);
752 	KASSERT(ctx->refs >= 1,
753 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
754 
755 	/*
756 	 * If our reference is not last, only the dereference should
757 	 * be performed.
758 	 */
759 	if (ctx->refs > 1) {
760 		ctx->refs--;
761 		DMAR_UNLOCK(dmar);
762 		return;
763 	}
764 
765 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
766 	    ("lost ref on disabled ctx %p", ctx));
767 
768 	/*
769 	 * Otherwise, the context entry must be cleared before the
770 	 * page table is destroyed.  The mapping of the context
771 	 * entries page could require sleep, unlock the dmar.
772 	 */
773 	DMAR_UNLOCK(dmar);
774 	TD_PREP_PINNED_ASSERT;
775 	ctxp = dmar_map_ctx_entry(ctx, &sf);
776 	DMAR_LOCK(dmar);
777 	KASSERT(ctx->refs >= 1,
778 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
779 
780 	/*
781 	 * Other thread might have referenced the context, in which
782 	 * case again only the dereference should be performed.
783 	 */
784 	if (ctx->refs > 1) {
785 		ctx->refs--;
786 		DMAR_UNLOCK(dmar);
787 		dmar_unmap_pgtbl(sf);
788 		TD_PINNED_ASSERT;
789 		return;
790 	}
791 
792 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
793 	    ("lost ref on disabled ctx %p", ctx));
794 
795 	/*
796 	 * Clear the context pointer and flush the caches.
797 	 * XXXKIB: cannot do this if any RMRR entries are still present.
798 	 */
799 	dmar_pte_clear(&ctxp->ctx1);
800 	ctxp->ctx2 = 0;
801 	dmar_flush_ctx_to_ram(dmar, ctxp);
802 	dmar_inv_ctx_glob(dmar);
803 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
804 		if (dmar->qi_enabled)
805 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
806 		else
807 			dmar_inv_iotlb_glob(dmar);
808 	}
809 	dmar_unmap_pgtbl(sf);
810 	domain = CTX2DOM(ctx);
811 	dmar_ctx_unlink(ctx);
812 	free(ctx->context.tag, M_DMAR_CTX);
813 	free(ctx, M_DMAR_CTX);
814 	dmar_unref_domain_locked(dmar, domain);
815 	TD_PINNED_ASSERT;
816 }
817 
818 void
819 dmar_free_ctx(struct dmar_ctx *ctx)
820 {
821 	struct dmar_unit *dmar;
822 
823 	dmar = CTX2DMAR(ctx);
824 	DMAR_LOCK(dmar);
825 	dmar_free_ctx_locked(dmar, ctx);
826 }
827 
828 /*
829  * Returns with the domain locked.
830  */
831 struct dmar_ctx *
832 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
833 {
834 	struct dmar_domain *domain;
835 	struct dmar_ctx *ctx;
836 
837 	DMAR_ASSERT_LOCKED(dmar);
838 
839 	LIST_FOREACH(domain, &dmar->domains, link) {
840 		LIST_FOREACH(ctx, &domain->contexts, link) {
841 			if (ctx->context.rid == rid)
842 				return (ctx);
843 		}
844 	}
845 	return (NULL);
846 }
847 
848 void
849 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
850 {
851 	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
852 		iommu_gas_free_region(entry);
853 	else
854 		iommu_gas_free_space(entry);
855 	if (free)
856 		iommu_gas_free_entry(entry);
857 	else
858 		entry->flags = 0;
859 }
860 
861 /*
862  * If the given value for "free" is true, then the caller must not be using
863  * the entry's dmamap_link field.
864  */
865 void
866 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
867     bool cansleep)
868 {
869 	struct dmar_domain *domain;
870 	struct dmar_unit *unit;
871 
872 	domain = IODOM2DOM(entry->domain);
873 	unit = DOM2DMAR(domain);
874 
875 	/*
876 	 * If "free" is false, then the IOTLB invalidation must be performed
877 	 * synchronously.  Otherwise, the caller might free the entry before
878 	 * dmar_qi_task() is finished processing it.
879 	 */
880 	if (unit->qi_enabled) {
881 		if (free) {
882 			DMAR_LOCK(unit);
883 			dmar_qi_invalidate_locked(domain, entry, true);
884 			DMAR_UNLOCK(unit);
885 		} else {
886 			dmar_qi_invalidate_sync(domain, entry->start,
887 			    entry->end - entry->start, cansleep);
888 			dmar_domain_free_entry(entry, false);
889 		}
890 	} else {
891 		domain_flush_iotlb_sync(domain, entry->start, entry->end -
892 		    entry->start);
893 		dmar_domain_free_entry(entry, free);
894 	}
895 }
896 
897 static bool
898 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
899     struct iommu_map_entry *entry)
900 {
901 
902 	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
903 		return (true);
904 	return (domain->batch_no++ % dmar_batch_coalesce == 0);
905 }
906 
907 void
908 iommu_domain_unload(struct iommu_domain *iodom,
909     struct iommu_map_entries_tailq *entries, bool cansleep)
910 {
911 	struct dmar_domain *domain;
912 	struct dmar_unit *unit;
913 	struct iommu_map_entry *entry, *entry1;
914 	int error __diagused;
915 
916 	domain = IODOM2DOM(iodom);
917 	unit = DOM2DMAR(domain);
918 
919 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
920 		KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
921 		    ("not mapped entry %p %p", domain, entry));
922 		error = iodom->ops->unmap(iodom, entry->start, entry->end -
923 		    entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
924 		KASSERT(error == 0, ("unmap %p error %d", domain, error));
925 		if (!unit->qi_enabled) {
926 			domain_flush_iotlb_sync(domain, entry->start,
927 			    entry->end - entry->start);
928 			TAILQ_REMOVE(entries, entry, dmamap_link);
929 			dmar_domain_free_entry(entry, true);
930 		}
931 	}
932 	if (TAILQ_EMPTY(entries))
933 		return;
934 
935 	KASSERT(unit->qi_enabled, ("loaded entry left"));
936 	DMAR_LOCK(unit);
937 	while ((entry = TAILQ_FIRST(entries)) != NULL) {
938 		TAILQ_REMOVE(entries, entry, dmamap_link);
939 		dmar_qi_invalidate_locked(domain, entry,
940 		    dmar_domain_unload_emit_wait(domain, entry));
941 	}
942 	DMAR_UNLOCK(unit);
943 }
944 
945 struct iommu_ctx *
946 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
947     bool id_mapped, bool rmrr_init)
948 {
949 	struct dmar_unit *dmar;
950 	struct dmar_ctx *ret;
951 
952 	dmar = IOMMU2DMAR(iommu);
953 
954 	ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
955 
956 	return (CTX2IOCTX(ret));
957 }
958 
959 void
960 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
961 {
962 	struct dmar_unit *dmar;
963 	struct dmar_ctx *ctx;
964 
965 	dmar = IOMMU2DMAR(iommu);
966 	ctx = IOCTX2CTX(context);
967 
968 	dmar_free_ctx_locked(dmar, ctx);
969 }
970 
971 void
972 iommu_free_ctx(struct iommu_ctx *context)
973 {
974 	struct dmar_ctx *ctx;
975 
976 	ctx = IOCTX2CTX(context);
977 
978 	dmar_free_ctx(ctx);
979 }
980