1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/interrupt.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 #include <sys/tree.h>
48 #include <sys/uio.h>
49 #include <sys/vmem.h>
50 #include <vm/vm.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_kern.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_pager.h>
56 #include <vm/vm_map.h>
57 #include <contrib/dev/acpica/include/acpi.h>
58 #include <contrib/dev/acpica/include/accommon.h>
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 #include <machine/atomic.h>
62 #include <machine/bus.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <x86/iommu/intel_reg.h>
68 #include <x86/iommu/intel_dmar.h>
69
70 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
71 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
72
73 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
74 struct dmar_domain *domain);
75 static void dmar_domain_destroy(struct dmar_domain *domain);
76
77 static void
dmar_ensure_ctx_page(struct dmar_unit * dmar,int bus)78 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
79 {
80 struct sf_buf *sf;
81 dmar_root_entry_t *re;
82 vm_page_t ctxm;
83
84 /*
85 * Allocated context page must be linked.
86 */
87 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
88 if (ctxm != NULL)
89 return;
90
91 /*
92 * Page not present, allocate and link. Note that other
93 * thread might execute this sequence in parallel. This
94 * should be safe, because the context entries written by both
95 * threads are equal.
96 */
97 TD_PREP_PINNED_ASSERT;
98 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
99 IOMMU_PGF_WAITOK);
100 re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
101 re += bus;
102 dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
103 VM_PAGE_TO_PHYS(ctxm)));
104 dmar_flush_root_to_ram(dmar, re);
105 dmar_unmap_pgtbl(sf);
106 TD_PINNED_ASSERT;
107 }
108
109 static dmar_ctx_entry_t *
dmar_map_ctx_entry(struct dmar_ctx * ctx,struct sf_buf ** sfp)110 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
111 {
112 struct dmar_unit *dmar;
113 dmar_ctx_entry_t *ctxp;
114
115 dmar = CTX2DMAR(ctx);
116
117 ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
118 IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
119 ctxp += ctx->context.rid & 0xff;
120 return (ctxp);
121 }
122
123 static void
device_tag_init(struct dmar_ctx * ctx,device_t dev)124 device_tag_init(struct dmar_ctx *ctx, device_t dev)
125 {
126 struct dmar_domain *domain;
127 bus_addr_t maxaddr;
128
129 domain = CTX2DOM(ctx);
130 maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
131 ctx->context.tag->common.impl = &bus_dma_iommu_impl;
132 ctx->context.tag->common.boundary = 0;
133 ctx->context.tag->common.lowaddr = maxaddr;
134 ctx->context.tag->common.highaddr = maxaddr;
135 ctx->context.tag->common.maxsize = maxaddr;
136 ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
137 ctx->context.tag->common.maxsegsz = maxaddr;
138 ctx->context.tag->ctx = CTX2IOCTX(ctx);
139 ctx->context.tag->owner = dev;
140 }
141
142 static void
ctx_id_entry_init_one(dmar_ctx_entry_t * ctxp,struct dmar_domain * domain,vm_page_t ctx_root)143 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
144 vm_page_t ctx_root)
145 {
146 /*
147 * For update due to move, the store is not atomic. It is
148 * possible that DMAR read upper doubleword, while low
149 * doubleword is not yet updated. The domain id is stored in
150 * the upper doubleword, while the table pointer in the lower.
151 *
152 * There is no good solution, for the same reason it is wrong
153 * to clear P bit in the ctx entry for update.
154 */
155 dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
156 domain->awlvl);
157 if (ctx_root == NULL) {
158 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
159 } else {
160 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
161 (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
162 DMAR_CTX1_P);
163 }
164 }
165
166 static void
ctx_id_entry_init(struct dmar_ctx * ctx,dmar_ctx_entry_t * ctxp,bool move,int busno)167 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
168 int busno)
169 {
170 struct dmar_unit *unit;
171 struct dmar_domain *domain;
172 vm_page_t ctx_root;
173 int i;
174
175 domain = CTX2DOM(ctx);
176 unit = DOM2DMAR(domain);
177 KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
178 ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
179 unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
180 pci_get_function(ctx->context.tag->owner),
181 ctxp->ctx1, ctxp->ctx2));
182
183 if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
184 (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
185 KASSERT(domain->pgtbl_obj == NULL,
186 ("ctx %p non-null pgtbl_obj", ctx));
187 ctx_root = NULL;
188 } else {
189 ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0,
190 IOMMU_PGF_NOALLOC);
191 }
192
193 if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
194 MPASS(!move);
195 for (i = 0; i <= PCI_BUSMAX; i++) {
196 ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
197 }
198 } else {
199 ctx_id_entry_init_one(ctxp, domain, ctx_root);
200 }
201 dmar_flush_ctx_to_ram(unit, ctxp);
202 }
203
204 static int
dmar_flush_for_ctx_entry(struct dmar_unit * dmar,bool force)205 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
206 {
207 int error;
208
209 /*
210 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
211 * Mode Consideration" and do the (global) invalidation of the
212 * negative TLB entries.
213 */
214 if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
215 return (0);
216 if (dmar->qi_enabled) {
217 dmar_qi_invalidate_ctx_glob_locked(dmar);
218 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
219 dmar_qi_invalidate_iotlb_glob_locked(dmar);
220 return (0);
221 }
222 error = dmar_inv_ctx_glob(dmar);
223 if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
224 error = dmar_inv_iotlb_glob(dmar);
225 return (error);
226 }
227
228 static int
domain_init_rmrr(struct dmar_domain * domain,device_t dev,int bus,int slot,int func,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len)229 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
230 int slot, int func, int dev_domain, int dev_busno,
231 const void *dev_path, int dev_path_len)
232 {
233 struct iommu_map_entries_tailq rmrr_entries;
234 struct iommu_map_entry *entry, *entry1;
235 vm_page_t *ma;
236 iommu_gaddr_t start, end;
237 vm_pindex_t size, i;
238 int error, error1;
239
240 if (!dmar_rmrr_enable)
241 return (0);
242
243 error = 0;
244 TAILQ_INIT(&rmrr_entries);
245 dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
246 dev_path_len, &rmrr_entries);
247 TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) {
248 /*
249 * VT-d specification requires that the start of an
250 * RMRR entry is 4k-aligned. Buggy BIOSes put
251 * anything into the start and end fields. Truncate
252 * and round as neccesary.
253 *
254 * We also allow the overlapping RMRR entries, see
255 * iommu_gas_alloc_region().
256 */
257 start = entry->start;
258 end = entry->end;
259 if (bootverbose)
260 printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
261 domain->iodom.iommu->unit, bus, slot, func,
262 (uintmax_t)start, (uintmax_t)end);
263 entry->start = trunc_page(start);
264 entry->end = round_page(end);
265 if (entry->start == entry->end) {
266 /* Workaround for some AMI (?) BIOSes */
267 if (bootverbose) {
268 if (dev != NULL)
269 device_printf(dev, "");
270 printf("pci%d:%d:%d ", bus, slot, func);
271 printf("BIOS bug: dmar%d RMRR "
272 "region (%jx, %jx) corrected\n",
273 domain->iodom.iommu->unit, start, end);
274 }
275 entry->end += DMAR_PAGE_SIZE * 0x20;
276 }
277 size = OFF_TO_IDX(entry->end - entry->start);
278 ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
279 for (i = 0; i < size; i++) {
280 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
281 VM_MEMATTR_DEFAULT);
282 }
283 error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
284 IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
285 IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
286 /*
287 * Non-failed RMRR entries are owned by context rb
288 * tree. Get rid of the failed entry, but do not stop
289 * the loop. Rest of the parsed RMRR entries are
290 * loaded and removed on the context destruction.
291 */
292 if (error1 == 0 && entry->end != entry->start) {
293 IOMMU_LOCK(domain->iodom.iommu);
294 domain->refs++; /* XXXKIB prevent free */
295 domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
296 IOMMU_UNLOCK(domain->iodom.iommu);
297 } else {
298 if (error1 != 0) {
299 if (dev != NULL)
300 device_printf(dev, "");
301 printf("pci%d:%d:%d ", bus, slot, func);
302 printf(
303 "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
304 domain->iodom.iommu->unit, start, end,
305 error1);
306 error = error1;
307 }
308 TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link);
309 iommu_gas_free_entry(entry);
310 }
311 for (i = 0; i < size; i++)
312 vm_page_putfake(ma[i]);
313 free(ma, M_TEMP);
314 }
315 return (error);
316 }
317
318 /*
319 * PCI memory address space is shared between memory-mapped devices (MMIO) and
320 * host memory (which may be remapped by an IOMMU). Device accesses to an
321 * address within a memory aperture in a PCIe root port will be treated as
322 * peer-to-peer and not forwarded to an IOMMU. To avoid this, reserve the
323 * address space of the root port's memory apertures in the address space used
324 * by the IOMMU for remapping.
325 */
326 static int
dmar_reserve_pci_regions(struct dmar_domain * domain,device_t dev)327 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
328 {
329 struct iommu_domain *iodom;
330 device_t root;
331 uint32_t val;
332 uint64_t base, limit;
333 int error;
334
335 iodom = DOM2IODOM(domain);
336
337 root = pci_find_pcie_root_port(dev);
338 if (root == NULL)
339 return (0);
340
341 /* Disable downstream memory */
342 base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
343 limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
344 error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
345 if (bootverbose || error != 0)
346 device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
347 base, limit + 1, error);
348 if (error != 0)
349 return (error);
350
351 /* Disable downstream prefetchable memory */
352 val = pci_read_config(root, PCIR_PMBASEL_1, 2);
353 if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
354 if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
355 base = PCI_PPBMEMBASE(
356 pci_read_config(root, PCIR_PMBASEH_1, 4),
357 val);
358 limit = PCI_PPBMEMLIMIT(
359 pci_read_config(root, PCIR_PMLIMITH_1, 4),
360 pci_read_config(root, PCIR_PMLIMITL_1, 2));
361 } else {
362 base = PCI_PPBMEMBASE(0, val);
363 limit = PCI_PPBMEMLIMIT(0,
364 pci_read_config(root, PCIR_PMLIMITL_1, 2));
365 }
366 error = iommu_gas_reserve_region_extend(iodom, base,
367 limit + 1);
368 if (bootverbose || error != 0)
369 device_printf(dev, "DMAR reserve [%#jx-%#jx] "
370 "(error %d)\n", base, limit + 1, error);
371 if (error != 0)
372 return (error);
373 }
374
375 return (error);
376 }
377
378 static struct dmar_domain *
dmar_domain_alloc(struct dmar_unit * dmar,bool id_mapped)379 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
380 {
381 struct iommu_domain *iodom;
382 struct iommu_unit *unit;
383 struct dmar_domain *domain;
384 int error, id, mgaw;
385
386 id = alloc_unr(dmar->domids);
387 if (id == -1)
388 return (NULL);
389 domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
390 iodom = DOM2IODOM(domain);
391 unit = DMAR2IOMMU(dmar);
392 domain->domain = id;
393 LIST_INIT(&domain->contexts);
394 iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
395
396 domain->dmar = dmar;
397
398 /*
399 * For now, use the maximal usable physical address of the
400 * installed memory to calculate the mgaw on id_mapped domain.
401 * It is useful for the identity mapping, and less so for the
402 * virtualized bus address space.
403 */
404 domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
405 mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
406 error = domain_set_agaw(domain, mgaw);
407 if (error != 0)
408 goto fail;
409 if (!id_mapped)
410 /* Use all supported address space for remapping. */
411 domain->iodom.end = 1ULL << (domain->agaw - 1);
412
413 iommu_gas_init_domain(DOM2IODOM(domain));
414
415 if (id_mapped) {
416 if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
417 domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
418 domain->iodom.end);
419 }
420 domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
421 } else {
422 error = domain_alloc_pgtbl(domain);
423 if (error != 0)
424 goto fail;
425 /* Disable local apic region access */
426 error = iommu_gas_reserve_region(iodom, 0xfee00000,
427 0xfeefffff + 1, &iodom->msi_entry);
428 if (error != 0)
429 goto fail;
430 }
431 return (domain);
432
433 fail:
434 dmar_domain_destroy(domain);
435 return (NULL);
436 }
437
438 static struct dmar_ctx *
dmar_ctx_alloc(struct dmar_domain * domain,uint16_t rid)439 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
440 {
441 struct dmar_ctx *ctx;
442
443 ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
444 ctx->context.domain = DOM2IODOM(domain);
445 ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
446 M_DMAR_CTX, M_WAITOK | M_ZERO);
447 ctx->context.rid = rid;
448 ctx->refs = 1;
449 return (ctx);
450 }
451
452 static void
dmar_ctx_link(struct dmar_ctx * ctx)453 dmar_ctx_link(struct dmar_ctx *ctx)
454 {
455 struct dmar_domain *domain;
456
457 domain = CTX2DOM(ctx);
458 IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
459 KASSERT(domain->refs >= domain->ctx_cnt,
460 ("dom %p ref underflow %d %d", domain, domain->refs,
461 domain->ctx_cnt));
462 domain->refs++;
463 domain->ctx_cnt++;
464 LIST_INSERT_HEAD(&domain->contexts, ctx, link);
465 }
466
467 static void
dmar_ctx_unlink(struct dmar_ctx * ctx)468 dmar_ctx_unlink(struct dmar_ctx *ctx)
469 {
470 struct dmar_domain *domain;
471
472 domain = CTX2DOM(ctx);
473 IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
474 KASSERT(domain->refs > 0,
475 ("domain %p ctx dtr refs %d", domain, domain->refs));
476 KASSERT(domain->ctx_cnt >= domain->refs,
477 ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
478 domain->refs, domain->ctx_cnt));
479 domain->refs--;
480 domain->ctx_cnt--;
481 LIST_REMOVE(ctx, link);
482 }
483
484 static void
dmar_domain_destroy(struct dmar_domain * domain)485 dmar_domain_destroy(struct dmar_domain *domain)
486 {
487 struct iommu_domain *iodom;
488 struct dmar_unit *dmar;
489
490 iodom = DOM2IODOM(domain);
491
492 KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
493 ("unfinished unloads %p", domain));
494 KASSERT(LIST_EMPTY(&domain->contexts),
495 ("destroying dom %p with contexts", domain));
496 KASSERT(domain->ctx_cnt == 0,
497 ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
498 KASSERT(domain->refs == 0,
499 ("destroying dom %p with refs %d", domain, domain->refs));
500 if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
501 DMAR_DOMAIN_LOCK(domain);
502 iommu_gas_fini_domain(iodom);
503 DMAR_DOMAIN_UNLOCK(domain);
504 }
505 if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
506 if (domain->pgtbl_obj != NULL)
507 DMAR_DOMAIN_PGLOCK(domain);
508 domain_free_pgtbl(domain);
509 }
510 iommu_domain_fini(iodom);
511 dmar = DOM2DMAR(domain);
512 free_unr(dmar->domids, domain->domain);
513 free(domain, M_DMAR_DOMAIN);
514 }
515
516 static struct dmar_ctx *
dmar_get_ctx_for_dev1(struct dmar_unit * dmar,device_t dev,uint16_t rid,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len,bool id_mapped,bool rmrr_init)517 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
518 int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
519 bool id_mapped, bool rmrr_init)
520 {
521 struct dmar_domain *domain, *domain1;
522 struct dmar_ctx *ctx, *ctx1;
523 struct iommu_unit *unit __diagused;
524 dmar_ctx_entry_t *ctxp;
525 struct sf_buf *sf;
526 int bus, slot, func, error;
527 bool enable;
528
529 if (dev != NULL) {
530 bus = pci_get_bus(dev);
531 slot = pci_get_slot(dev);
532 func = pci_get_function(dev);
533 } else {
534 bus = PCI_RID2BUS(rid);
535 slot = PCI_RID2SLOT(rid);
536 func = PCI_RID2FUNC(rid);
537 }
538 enable = false;
539 TD_PREP_PINNED_ASSERT;
540 unit = DMAR2IOMMU(dmar);
541 DMAR_LOCK(dmar);
542 KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
543 ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
544 slot, func));
545 ctx = dmar_find_ctx_locked(dmar, rid);
546 error = 0;
547 if (ctx == NULL) {
548 /*
549 * Perform the allocations which require sleep or have
550 * higher chance to succeed if the sleep is allowed.
551 */
552 DMAR_UNLOCK(dmar);
553 dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
554 domain1 = dmar_domain_alloc(dmar, id_mapped);
555 if (domain1 == NULL) {
556 TD_PINNED_ASSERT;
557 return (NULL);
558 }
559 if (!id_mapped) {
560 error = domain_init_rmrr(domain1, dev, bus,
561 slot, func, dev_domain, dev_busno, dev_path,
562 dev_path_len);
563 if (error == 0 && dev != NULL)
564 error = dmar_reserve_pci_regions(domain1, dev);
565 if (error != 0) {
566 dmar_domain_destroy(domain1);
567 TD_PINNED_ASSERT;
568 return (NULL);
569 }
570 }
571 ctx1 = dmar_ctx_alloc(domain1, rid);
572 ctxp = dmar_map_ctx_entry(ctx1, &sf);
573 DMAR_LOCK(dmar);
574
575 /*
576 * Recheck the contexts, other thread might have
577 * already allocated needed one.
578 */
579 ctx = dmar_find_ctx_locked(dmar, rid);
580 if (ctx == NULL) {
581 domain = domain1;
582 ctx = ctx1;
583 dmar_ctx_link(ctx);
584 ctx->context.tag->owner = dev;
585 device_tag_init(ctx, dev);
586
587 /*
588 * This is the first activated context for the
589 * DMAR unit. Enable the translation after
590 * everything is set up.
591 */
592 if (LIST_EMPTY(&dmar->domains))
593 enable = true;
594 LIST_INSERT_HEAD(&dmar->domains, domain, link);
595 ctx_id_entry_init(ctx, ctxp, false, bus);
596 if (dev != NULL) {
597 device_printf(dev,
598 "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
599 "agaw %d %s-mapped\n",
600 dmar->iommu.unit, dmar->segment, bus, slot,
601 func, rid, domain->domain, domain->mgaw,
602 domain->agaw, id_mapped ? "id" : "re");
603 }
604 dmar_unmap_pgtbl(sf);
605 } else {
606 dmar_unmap_pgtbl(sf);
607 dmar_domain_destroy(domain1);
608 /* Nothing needs to be done to destroy ctx1. */
609 free(ctx1, M_DMAR_CTX);
610 domain = CTX2DOM(ctx);
611 ctx->refs++; /* tag referenced us */
612 }
613 } else {
614 domain = CTX2DOM(ctx);
615 if (ctx->context.tag->owner == NULL)
616 ctx->context.tag->owner = dev;
617 ctx->refs++; /* tag referenced us */
618 }
619
620 error = dmar_flush_for_ctx_entry(dmar, enable);
621 if (error != 0) {
622 dmar_free_ctx_locked(dmar, ctx);
623 TD_PINNED_ASSERT;
624 return (NULL);
625 }
626
627 /*
628 * The dmar lock was potentially dropped between check for the
629 * empty context list and now. Recheck the state of GCMD_TE
630 * to avoid unneeded command.
631 */
632 if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
633 error = dmar_disable_protected_regions(dmar);
634 if (error != 0)
635 printf("dmar%d: Failed to disable protected regions\n",
636 dmar->iommu.unit);
637 error = dmar_enable_translation(dmar);
638 if (error == 0) {
639 if (bootverbose) {
640 printf("dmar%d: enabled translation\n",
641 dmar->iommu.unit);
642 }
643 } else {
644 printf("dmar%d: enabling translation failed, "
645 "error %d\n", dmar->iommu.unit, error);
646 dmar_free_ctx_locked(dmar, ctx);
647 TD_PINNED_ASSERT;
648 return (NULL);
649 }
650 }
651 DMAR_UNLOCK(dmar);
652 TD_PINNED_ASSERT;
653 return (ctx);
654 }
655
656 struct dmar_ctx *
dmar_get_ctx_for_dev(struct dmar_unit * dmar,device_t dev,uint16_t rid,bool id_mapped,bool rmrr_init)657 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
658 bool id_mapped, bool rmrr_init)
659 {
660 int dev_domain, dev_path_len, dev_busno;
661
662 dev_domain = pci_get_domain(dev);
663 dev_path_len = dmar_dev_depth(dev);
664 ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
665 dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
666 return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
667 dev_path, dev_path_len, id_mapped, rmrr_init));
668 }
669
670 struct dmar_ctx *
dmar_get_ctx_for_devpath(struct dmar_unit * dmar,uint16_t rid,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len,bool id_mapped,bool rmrr_init)671 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
672 int dev_domain, int dev_busno,
673 const void *dev_path, int dev_path_len,
674 bool id_mapped, bool rmrr_init)
675 {
676
677 return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
678 dev_path, dev_path_len, id_mapped, rmrr_init));
679 }
680
681 int
dmar_move_ctx_to_domain(struct dmar_domain * domain,struct dmar_ctx * ctx)682 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
683 {
684 struct dmar_unit *dmar;
685 struct dmar_domain *old_domain;
686 dmar_ctx_entry_t *ctxp;
687 struct sf_buf *sf;
688 int error;
689
690 dmar = domain->dmar;
691 old_domain = CTX2DOM(ctx);
692 if (domain == old_domain)
693 return (0);
694 KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
695 ("domain %p %u moving between dmars %u %u", domain,
696 domain->domain, old_domain->iodom.iommu->unit,
697 domain->iodom.iommu->unit));
698 TD_PREP_PINNED_ASSERT;
699
700 ctxp = dmar_map_ctx_entry(ctx, &sf);
701 DMAR_LOCK(dmar);
702 dmar_ctx_unlink(ctx);
703 ctx->context.domain = &domain->iodom;
704 dmar_ctx_link(ctx);
705 ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
706 dmar_unmap_pgtbl(sf);
707 error = dmar_flush_for_ctx_entry(dmar, true);
708 /* If flush failed, rolling back would not work as well. */
709 printf("dmar%d rid %x domain %d->%d %s-mapped\n",
710 dmar->iommu.unit, ctx->context.rid, old_domain->domain,
711 domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
712 "id" : "re");
713 dmar_unref_domain_locked(dmar, old_domain);
714 TD_PINNED_ASSERT;
715 return (error);
716 }
717
718 static void
dmar_unref_domain_locked(struct dmar_unit * dmar,struct dmar_domain * domain)719 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
720 {
721
722 DMAR_ASSERT_LOCKED(dmar);
723 KASSERT(domain->refs >= 1,
724 ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
725 domain->refs));
726 KASSERT(domain->refs > domain->ctx_cnt,
727 ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
728 domain->refs, domain->ctx_cnt));
729
730 if (domain->refs > 1) {
731 domain->refs--;
732 DMAR_UNLOCK(dmar);
733 return;
734 }
735
736 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
737 ("lost ref on RMRR domain %p", domain));
738
739 LIST_REMOVE(domain, link);
740 DMAR_UNLOCK(dmar);
741
742 taskqueue_drain(dmar->iommu.delayed_taskqueue,
743 &domain->iodom.unload_task);
744 dmar_domain_destroy(domain);
745 }
746
747 void
dmar_free_ctx_locked(struct dmar_unit * dmar,struct dmar_ctx * ctx)748 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
749 {
750 struct sf_buf *sf;
751 dmar_ctx_entry_t *ctxp;
752 struct dmar_domain *domain;
753
754 DMAR_ASSERT_LOCKED(dmar);
755 KASSERT(ctx->refs >= 1,
756 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
757
758 /*
759 * If our reference is not last, only the dereference should
760 * be performed.
761 */
762 if (ctx->refs > 1) {
763 ctx->refs--;
764 DMAR_UNLOCK(dmar);
765 return;
766 }
767
768 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
769 ("lost ref on disabled ctx %p", ctx));
770
771 /*
772 * Otherwise, the context entry must be cleared before the
773 * page table is destroyed. The mapping of the context
774 * entries page could require sleep, unlock the dmar.
775 */
776 DMAR_UNLOCK(dmar);
777 TD_PREP_PINNED_ASSERT;
778 ctxp = dmar_map_ctx_entry(ctx, &sf);
779 DMAR_LOCK(dmar);
780 KASSERT(ctx->refs >= 1,
781 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
782
783 /*
784 * Other thread might have referenced the context, in which
785 * case again only the dereference should be performed.
786 */
787 if (ctx->refs > 1) {
788 ctx->refs--;
789 DMAR_UNLOCK(dmar);
790 dmar_unmap_pgtbl(sf);
791 TD_PINNED_ASSERT;
792 return;
793 }
794
795 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
796 ("lost ref on disabled ctx %p", ctx));
797
798 /*
799 * Clear the context pointer and flush the caches.
800 * XXXKIB: cannot do this if any RMRR entries are still present.
801 */
802 dmar_pte_clear(&ctxp->ctx1);
803 ctxp->ctx2 = 0;
804 dmar_flush_ctx_to_ram(dmar, ctxp);
805 dmar_inv_ctx_glob(dmar);
806 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
807 if (dmar->qi_enabled)
808 dmar_qi_invalidate_iotlb_glob_locked(dmar);
809 else
810 dmar_inv_iotlb_glob(dmar);
811 }
812 dmar_unmap_pgtbl(sf);
813 domain = CTX2DOM(ctx);
814 dmar_ctx_unlink(ctx);
815 free(ctx->context.tag, M_DMAR_CTX);
816 free(ctx, M_DMAR_CTX);
817 dmar_unref_domain_locked(dmar, domain);
818 TD_PINNED_ASSERT;
819 }
820
821 void
dmar_free_ctx(struct dmar_ctx * ctx)822 dmar_free_ctx(struct dmar_ctx *ctx)
823 {
824 struct dmar_unit *dmar;
825
826 dmar = CTX2DMAR(ctx);
827 DMAR_LOCK(dmar);
828 dmar_free_ctx_locked(dmar, ctx);
829 }
830
831 /*
832 * Returns with the domain locked.
833 */
834 struct dmar_ctx *
dmar_find_ctx_locked(struct dmar_unit * dmar,uint16_t rid)835 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
836 {
837 struct dmar_domain *domain;
838 struct dmar_ctx *ctx;
839
840 DMAR_ASSERT_LOCKED(dmar);
841
842 LIST_FOREACH(domain, &dmar->domains, link) {
843 LIST_FOREACH(ctx, &domain->contexts, link) {
844 if (ctx->context.rid == rid)
845 return (ctx);
846 }
847 }
848 return (NULL);
849 }
850
851 void
dmar_domain_free_entry(struct iommu_map_entry * entry,bool free)852 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
853 {
854 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
855 iommu_gas_free_region(entry);
856 else
857 iommu_gas_free_space(entry);
858 if (free)
859 iommu_gas_free_entry(entry);
860 else
861 entry->flags = 0;
862 }
863
864 /*
865 * If the given value for "free" is true, then the caller must not be using
866 * the entry's dmamap_link field.
867 */
868 void
iommu_domain_unload_entry(struct iommu_map_entry * entry,bool free,bool cansleep)869 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
870 bool cansleep)
871 {
872 struct dmar_domain *domain;
873 struct dmar_unit *unit;
874
875 domain = IODOM2DOM(entry->domain);
876 unit = DOM2DMAR(domain);
877
878 /*
879 * If "free" is false, then the IOTLB invalidation must be performed
880 * synchronously. Otherwise, the caller might free the entry before
881 * dmar_qi_task() is finished processing it.
882 */
883 if (unit->qi_enabled) {
884 if (free) {
885 DMAR_LOCK(unit);
886 dmar_qi_invalidate_locked(domain, entry, true);
887 DMAR_UNLOCK(unit);
888 } else {
889 dmar_qi_invalidate_sync(domain, entry->start,
890 entry->end - entry->start, cansleep);
891 dmar_domain_free_entry(entry, false);
892 }
893 } else {
894 domain_flush_iotlb_sync(domain, entry->start, entry->end -
895 entry->start);
896 dmar_domain_free_entry(entry, free);
897 }
898 }
899
900 static bool
dmar_domain_unload_emit_wait(struct dmar_domain * domain,struct iommu_map_entry * entry)901 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
902 struct iommu_map_entry *entry)
903 {
904
905 if (TAILQ_NEXT(entry, dmamap_link) == NULL)
906 return (true);
907 return (domain->batch_no++ % dmar_batch_coalesce == 0);
908 }
909
910 void
iommu_domain_unload(struct iommu_domain * iodom,struct iommu_map_entries_tailq * entries,bool cansleep)911 iommu_domain_unload(struct iommu_domain *iodom,
912 struct iommu_map_entries_tailq *entries, bool cansleep)
913 {
914 struct dmar_domain *domain;
915 struct dmar_unit *unit;
916 struct iommu_map_entry *entry, *entry1;
917 int error __diagused;
918
919 domain = IODOM2DOM(iodom);
920 unit = DOM2DMAR(domain);
921
922 TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
923 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
924 ("not mapped entry %p %p", domain, entry));
925 error = iodom->ops->unmap(iodom, entry->start, entry->end -
926 entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
927 KASSERT(error == 0, ("unmap %p error %d", domain, error));
928 if (!unit->qi_enabled) {
929 domain_flush_iotlb_sync(domain, entry->start,
930 entry->end - entry->start);
931 TAILQ_REMOVE(entries, entry, dmamap_link);
932 dmar_domain_free_entry(entry, true);
933 }
934 }
935 if (TAILQ_EMPTY(entries))
936 return;
937
938 KASSERT(unit->qi_enabled, ("loaded entry left"));
939 DMAR_LOCK(unit);
940 while ((entry = TAILQ_FIRST(entries)) != NULL) {
941 TAILQ_REMOVE(entries, entry, dmamap_link);
942 dmar_qi_invalidate_locked(domain, entry,
943 dmar_domain_unload_emit_wait(domain, entry));
944 }
945 DMAR_UNLOCK(unit);
946 }
947
948 struct iommu_ctx *
iommu_get_ctx(struct iommu_unit * iommu,device_t dev,uint16_t rid,bool id_mapped,bool rmrr_init)949 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
950 bool id_mapped, bool rmrr_init)
951 {
952 struct dmar_unit *dmar;
953 struct dmar_ctx *ret;
954
955 dmar = IOMMU2DMAR(iommu);
956
957 ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
958
959 return (CTX2IOCTX(ret));
960 }
961
962 void
iommu_free_ctx_locked(struct iommu_unit * iommu,struct iommu_ctx * context)963 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
964 {
965 struct dmar_unit *dmar;
966 struct dmar_ctx *ctx;
967
968 dmar = IOMMU2DMAR(iommu);
969 ctx = IOCTX2CTX(context);
970
971 dmar_free_ctx_locked(dmar, ctx);
972 }
973
974 void
iommu_free_ctx(struct iommu_ctx * context)975 iommu_free_ctx(struct iommu_ctx *context)
976 {
977 struct dmar_ctx *ctx;
978
979 ctx = IOCTX2CTX(context);
980
981 dmar_free_ctx(ctx);
982 }
983