1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry)
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/bus.h>
37 #include <sys/interrupt.h>
38 #include <sys/kernel.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/proc.h>
42 #include <sys/rwlock.h>
43 #include <sys/memdesc.h>
44 #include <sys/mutex.h>
45 #include <sys/sysctl.h>
46 #include <sys/rman.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_map.h>
57 #include <vm/uma.h>
58 #include <dev/pci/pcireg.h>
59 #include <dev/pci/pcivar.h>
60 #include <dev/iommu/iommu.h>
61 #include <dev/iommu/iommu_gas.h>
62 #include <dev/iommu/iommu_msi.h>
63 #include <machine/atomic.h>
64 #include <machine/bus.h>
65 #include <machine/md_var.h>
66 #include <machine/iommu.h>
67 #include <dev/iommu/busdma_iommu.h>
68
69 /*
70 * Guest Address Space management.
71 */
72
73 static uma_zone_t iommu_map_entry_zone;
74
75 #ifdef INVARIANTS
76 static int iommu_check_free;
77 #endif
78
79 static void
intel_gas_init(void)80 intel_gas_init(void)
81 {
82
83 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
84 sizeof(struct iommu_map_entry), NULL, NULL,
85 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP);
86 }
87 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
88
89 struct iommu_map_entry *
iommu_gas_alloc_entry(struct iommu_domain * domain,u_int flags)90 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags)
91 {
92 struct iommu_map_entry *res;
93
94 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0,
95 ("unsupported flags %x", flags));
96
97 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
98 0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
99 if (res != NULL && domain != NULL) {
100 res->domain = domain;
101 atomic_add_int(&domain->entries_cnt, 1);
102 }
103 return (res);
104 }
105
106 void
iommu_gas_free_entry(struct iommu_map_entry * entry)107 iommu_gas_free_entry(struct iommu_map_entry *entry)
108 {
109 struct iommu_domain *domain;
110
111 domain = entry->domain;
112 if (domain != NULL)
113 atomic_subtract_int(&domain->entries_cnt, 1);
114 uma_zfree(iommu_map_entry_zone, entry);
115 }
116
117 static int
iommu_gas_cmp_entries(struct iommu_map_entry * a,struct iommu_map_entry * b)118 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b)
119 {
120
121 /* First and last entries have zero size, so <= */
122 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
123 a, (uintmax_t)a->start, (uintmax_t)a->end));
124 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
125 b, (uintmax_t)b->start, (uintmax_t)b->end));
126 KASSERT(((a->flags | b->flags) & IOMMU_MAP_ENTRY_FAKE) != 0 ||
127 a->end <= b->start || b->end <= a->start ||
128 a->end == a->start || b->end == b->start,
129 ("overlapping entries %p (%jx, %jx) f %#x %p (%jx, %jx) f %#x"
130 " domain %p %p",
131 a, (uintmax_t)a->start, (uintmax_t)a->end, a->flags,
132 b, (uintmax_t)b->start, (uintmax_t)b->end, b->flags,
133 a->domain, b->domain));
134
135 if (a->end < b->end)
136 return (-1);
137 else if (b->end < a->end)
138 return (1);
139 return (0);
140 }
141
142 /*
143 * Update augmentation data based on data from children.
144 * Return true if and only if the update changes the augmentation data.
145 */
146 static bool
iommu_gas_augment_entry(struct iommu_map_entry * entry)147 iommu_gas_augment_entry(struct iommu_map_entry *entry)
148 {
149 struct iommu_map_entry *child;
150 iommu_gaddr_t bound, delta, free_down;
151
152 free_down = 0;
153 bound = entry->start;
154 if ((child = RB_LEFT(entry, rb_entry)) != NULL) {
155 free_down = MAX(child->free_down, bound - child->last);
156 bound = child->first;
157 }
158 delta = bound - entry->first;
159 entry->first = bound;
160 bound = entry->end;
161 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) {
162 free_down = MAX(free_down, child->free_down);
163 free_down = MAX(free_down, child->first - bound);
164 bound = child->last;
165 }
166 delta += entry->last - bound;
167 if (delta == 0)
168 delta = entry->free_down - free_down;
169 entry->last = bound;
170 entry->free_down = free_down;
171
172 /*
173 * Return true either if the value of last-first changed,
174 * or if free_down changed.
175 */
176 return (delta != 0);
177 }
178
179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry,
180 iommu_gas_cmp_entries);
181
182 #ifdef INVARIANTS
183 static void
iommu_gas_check_free(struct iommu_domain * domain)184 iommu_gas_check_free(struct iommu_domain *domain)
185 {
186 struct iommu_map_entry *entry, *l, *r;
187 iommu_gaddr_t v;
188
189 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
190 KASSERT(domain == entry->domain,
191 ("mismatched free domain %p entry %p entry->domain %p",
192 domain, entry, entry->domain));
193 l = RB_LEFT(entry, rb_entry);
194 r = RB_RIGHT(entry, rb_entry);
195 v = 0;
196 if (l != NULL) {
197 v = MAX(v, l->free_down);
198 v = MAX(v, entry->start - l->last);
199 }
200 if (r != NULL) {
201 v = MAX(v, r->free_down);
202 v = MAX(v, r->first - entry->end);
203 }
204 MPASS(entry->free_down == v);
205 }
206 }
207 #endif
208
209 static void
iommu_gas_rb_remove(struct iommu_domain * domain,struct iommu_map_entry * entry)210 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry)
211 {
212 struct iommu_map_entry *nbr;
213
214 /* Removing entry may open a new free gap before domain->start_gap. */
215 if (entry->end <= domain->start_gap->end) {
216 if (RB_RIGHT(entry, rb_entry) != NULL)
217 nbr = iommu_gas_entries_tree_RB_NEXT(entry);
218 else if (RB_LEFT(entry, rb_entry) != NULL)
219 nbr = RB_LEFT(entry, rb_entry);
220 else
221 nbr = RB_PARENT(entry, rb_entry);
222 domain->start_gap = nbr;
223 }
224 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
225 }
226
227 struct iommu_domain *
iommu_get_ctx_domain(struct iommu_ctx * ctx)228 iommu_get_ctx_domain(struct iommu_ctx *ctx)
229 {
230
231 return (ctx->domain);
232 }
233
234 void
iommu_gas_init_domain(struct iommu_domain * domain)235 iommu_gas_init_domain(struct iommu_domain *domain)
236 {
237 struct iommu_map_entry *begin, *end;
238
239 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
240 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
241
242 IOMMU_DOMAIN_LOCK(domain);
243 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain));
244 KASSERT(RB_EMPTY(&domain->rb_root),
245 ("non-empty entries %p", domain));
246
247 end->start = domain->end;
248 end->end = domain->end;
249 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
250 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end);
251
252 begin->start = 0;
253 begin->end = 0;
254 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
255 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin);
256 iommu_gas_augment_entry(end);
257 iommu_gas_augment_entry(begin);
258
259 domain->start_gap = begin;
260 domain->first_place = begin;
261 domain->last_place = end;
262 domain->flags |= IOMMU_DOMAIN_GAS_INITED;
263 IOMMU_DOMAIN_UNLOCK(domain);
264 }
265
266 void
iommu_gas_fini_domain(struct iommu_domain * domain)267 iommu_gas_fini_domain(struct iommu_domain *domain)
268 {
269 struct iommu_map_entry *entry;
270
271 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
272 KASSERT(domain->entries_cnt == 2,
273 ("domain still in use %p", domain));
274
275 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root);
276 KASSERT(entry->start == 0, ("start entry start %p", domain));
277 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain));
278 KASSERT(entry->flags ==
279 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
280 ("start entry flags %p", domain));
281 iommu_gas_rb_remove(domain, entry);
282 iommu_gas_free_entry(entry);
283
284 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root);
285 KASSERT(entry->start == domain->end, ("end entry start %p", domain));
286 KASSERT(entry->end == domain->end, ("end entry end %p", domain));
287 KASSERT(entry->flags ==
288 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
289 ("end entry flags %p", domain));
290 iommu_gas_rb_remove(domain, entry);
291 iommu_gas_free_entry(entry);
292 }
293
294 struct iommu_gas_match_args {
295 iommu_gaddr_t size;
296 int offset;
297 const struct bus_dma_tag_common *common;
298 u_int gas_flags;
299 struct iommu_map_entry *entry;
300 };
301
302 /*
303 * The interval [beg, end) is a free interval between two iommu_map_entries.
304 * Addresses can be allocated only in the range [lbound, ubound]. Try to
305 * allocate space in the free interval, subject to the conditions expressed by
306 * a, and return 'true' if and only if the allocation attempt succeeds.
307 */
308 static bool
iommu_gas_match_one(struct iommu_gas_match_args * a,iommu_gaddr_t beg,iommu_gaddr_t end,iommu_gaddr_t lbound,iommu_gaddr_t ubound)309 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg,
310 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound)
311 {
312 struct iommu_map_entry *entry;
313 iommu_gaddr_t first, size, start;
314 int offset;
315
316 /*
317 * The prev->end is always aligned on the page size, which
318 * causes page alignment for the entry->start too.
319 *
320 * Create IOMMU_PAGE_SIZE gaps before, after new entry
321 * to ensure that out-of-bounds accesses fault.
322 */
323 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound);
324 start = roundup2(beg, a->common->alignment);
325 if (start < beg)
326 return (false);
327 if (end < IOMMU_PAGE_SIZE + 1)
328 return (false);
329 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound);
330 offset = a->offset;
331 size = a->size;
332 if (start + offset + size - 1 > end)
333 return (false);
334
335 /* Check for and try to skip past boundary crossing. */
336 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) {
337 /*
338 * The start + offset to start + offset + size region crosses
339 * the boundary. Check if there is enough space after the next
340 * boundary after the beg.
341 */
342 first = start;
343 beg = roundup2(start + offset + 1, a->common->boundary);
344 start = roundup2(beg, a->common->alignment);
345
346 if (start + offset + size - 1 > end ||
347 !vm_addr_bound_ok(start + offset, size,
348 a->common->boundary)) {
349 /*
350 * Not enough space to align at the requested boundary,
351 * or boundary is smaller than the size, but allowed to
352 * split. We already checked that start + size does not
353 * overlap ubound.
354 *
355 * XXXKIB. It is possible that beg is exactly at the
356 * start of the next entry, then we do not have gap.
357 * Ignore for now.
358 */
359 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0)
360 return (false);
361 size = beg - first - offset;
362 start = first;
363 }
364 }
365 entry = a->entry;
366 entry->start = start;
367 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE);
368 entry->flags = IOMMU_MAP_ENTRY_MAP;
369 return (true);
370 }
371
372 /* Find the next entry that might abut a big-enough range. */
373 static struct iommu_map_entry *
iommu_gas_next(struct iommu_map_entry * curr,iommu_gaddr_t min_free)374 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free)
375 {
376 struct iommu_map_entry *next;
377
378 if ((next = RB_RIGHT(curr, rb_entry)) != NULL &&
379 next->free_down >= min_free) {
380 /* Find next entry in right subtree. */
381 do
382 curr = next;
383 while ((next = RB_LEFT(curr, rb_entry)) != NULL &&
384 next->free_down >= min_free);
385 } else {
386 /* Find next entry in a left-parent ancestor. */
387 while ((next = RB_PARENT(curr, rb_entry)) != NULL &&
388 curr == RB_RIGHT(next, rb_entry))
389 curr = next;
390 curr = next;
391 }
392 return (curr);
393 }
394
395 /*
396 * Address-ordered first-fit search of 'domain' for free space satisfying the
397 * conditions of 'a'. The space allocated is at least one page big, and is
398 * bounded by guard pages to the left and right. The allocated space for
399 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and
400 * domain->start_gap points to a map entry less than or adjacent to the first
401 * free-space of size at least 3 pages.
402 */
403 static int
iommu_gas_find_space(struct iommu_domain * domain,struct iommu_gas_match_args * a)404 iommu_gas_find_space(struct iommu_domain *domain,
405 struct iommu_gas_match_args *a)
406 {
407 struct iommu_map_entry *curr, *first;
408 iommu_gaddr_t addr, min_free;
409
410 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
411 KASSERT(a->entry->flags == 0,
412 ("dirty entry %p %p", domain, a->entry));
413
414 /*
415 * start_gap may point to an entry adjacent to gaps too small for any
416 * new allocation. In that case, advance start_gap to the first free
417 * space big enough for a minimum allocation plus two guard pages.
418 */
419 min_free = 3 * IOMMU_PAGE_SIZE;
420 first = domain->start_gap;
421 while (first != NULL && first->free_down < min_free)
422 first = RB_PARENT(first, rb_entry);
423 for (curr = first; curr != NULL;
424 curr = iommu_gas_next(curr, min_free)) {
425 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
426 first->last + min_free <= curr->start)
427 break;
428 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
429 curr->end + min_free <= first->first)
430 break;
431 }
432 domain->start_gap = curr;
433
434 /*
435 * If the subtree doesn't have free space for the requested allocation
436 * plus two guard pages, skip it.
437 */
438 min_free = 2 * IOMMU_PAGE_SIZE +
439 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE);
440
441 /* Climb to find a node in the subtree of big-enough ranges. */
442 first = curr;
443 while (first != NULL && first->free_down < min_free)
444 first = RB_PARENT(first, rb_entry);
445
446 /*
447 * Walk the big-enough ranges tree until one satisfies alignment
448 * requirements, or violates lowaddr address requirement.
449 */
450 addr = a->common->lowaddr;
451 for (curr = first; curr != NULL;
452 curr = iommu_gas_next(curr, min_free)) {
453 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
454 iommu_gas_match_one(a, first->last, curr->start,
455 0, addr)) {
456 RB_INSERT_PREV(iommu_gas_entries_tree,
457 &domain->rb_root, curr, a->entry);
458 return (0);
459 }
460 if (curr->end >= addr) {
461 /* All remaining ranges > addr */
462 break;
463 }
464 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
465 iommu_gas_match_one(a, curr->end, first->first,
466 0, addr)) {
467 RB_INSERT_NEXT(iommu_gas_entries_tree,
468 &domain->rb_root, curr, a->entry);
469 return (0);
470 }
471 }
472
473 /*
474 * To resume the search at the start of the upper region, first climb to
475 * the nearest ancestor that spans highaddr. Then find the last entry
476 * before highaddr that could abut a big-enough range.
477 */
478 addr = a->common->highaddr;
479 while (curr != NULL && curr->last < addr)
480 curr = RB_PARENT(curr, rb_entry);
481 first = NULL;
482 while (curr != NULL && curr->free_down >= min_free) {
483 if (addr < curr->end)
484 curr = RB_LEFT(curr, rb_entry);
485 else {
486 first = curr;
487 curr = RB_RIGHT(curr, rb_entry);
488 }
489 }
490
491 /*
492 * Walk the remaining big-enough ranges until one satisfies alignment
493 * requirements.
494 */
495 for (curr = first; curr != NULL;
496 curr = iommu_gas_next(curr, min_free)) {
497 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
498 iommu_gas_match_one(a, first->last, curr->start,
499 addr + 1, domain->end - 1)) {
500 RB_INSERT_PREV(iommu_gas_entries_tree,
501 &domain->rb_root, curr, a->entry);
502 return (0);
503 }
504 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
505 iommu_gas_match_one(a, curr->end, first->first,
506 addr + 1, domain->end - 1)) {
507 RB_INSERT_NEXT(iommu_gas_entries_tree,
508 &domain->rb_root, curr, a->entry);
509 return (0);
510 }
511 }
512
513 return (ENOMEM);
514 }
515
516 static int
iommu_gas_alloc_region(struct iommu_domain * domain,struct iommu_map_entry * entry,u_int flags)517 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
518 u_int flags)
519 {
520 struct iommu_map_entry *next, *prev;
521
522 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
523
524 if ((entry->start & IOMMU_PAGE_MASK) != 0 ||
525 (entry->end & IOMMU_PAGE_MASK) != 0)
526 return (EINVAL);
527 if (entry->start >= entry->end)
528 return (EINVAL);
529 if (entry->end >= domain->end)
530 return (EINVAL);
531
532 entry->flags |= IOMMU_MAP_ENTRY_FAKE;
533 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry);
534 KASSERT(next != NULL, ("next must be non-null %p %jx", domain,
535 (uintmax_t)entry->start));
536 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
537 /* prev could be NULL */
538 entry->flags &= ~IOMMU_MAP_ENTRY_FAKE;
539
540 /*
541 * Adapt to broken BIOSes which specify overlapping RMRR
542 * entries.
543 *
544 * XXXKIB: this does not handle a case when prev or next
545 * entries are completely covered by the current one, which
546 * extends both ways.
547 */
548 if (prev != NULL && prev->end > entry->start &&
549 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
550 if ((flags & IOMMU_MF_RMRR) == 0 ||
551 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
552 return (EBUSY);
553 entry->start = prev->end;
554 }
555 if (next->start < entry->end &&
556 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
557 if ((flags & IOMMU_MF_RMRR) == 0 ||
558 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
559 return (EBUSY);
560 entry->end = next->start;
561 }
562 if (entry->end == entry->start)
563 return (0);
564
565 if (prev != NULL && prev->end > entry->start) {
566 /* This assumes that prev is the placeholder entry. */
567 iommu_gas_rb_remove(domain, prev);
568 prev = NULL;
569 }
570 RB_INSERT_PREV(iommu_gas_entries_tree,
571 &domain->rb_root, next, entry);
572 if (next->start < entry->end) {
573 iommu_gas_rb_remove(domain, next);
574 next = NULL;
575 }
576
577 if ((flags & IOMMU_MF_RMRR) != 0)
578 entry->flags = IOMMU_MAP_ENTRY_RMRR;
579
580 #ifdef INVARIANTS
581 struct iommu_map_entry *ip, *in;
582 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry);
583 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry);
584 KASSERT(prev == NULL || ip == prev,
585 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
586 entry, entry->start, entry->end, prev,
587 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
588 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
589 KASSERT(next == NULL || in == next,
590 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
591 entry, entry->start, entry->end, next,
592 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
593 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
594 #endif
595
596 return (0);
597 }
598
599 void
iommu_gas_free_space(struct iommu_map_entry * entry)600 iommu_gas_free_space(struct iommu_map_entry *entry)
601 {
602 struct iommu_domain *domain;
603
604 domain = entry->domain;
605 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
606 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP,
607 ("permanent entry %p %p", domain, entry));
608
609 IOMMU_DOMAIN_LOCK(domain);
610 iommu_gas_rb_remove(domain, entry);
611 entry->flags &= ~IOMMU_MAP_ENTRY_MAP;
612 #ifdef INVARIANTS
613 if (iommu_check_free)
614 iommu_gas_check_free(domain);
615 #endif
616 IOMMU_DOMAIN_UNLOCK(domain);
617 }
618
619 void
iommu_gas_free_region(struct iommu_map_entry * entry)620 iommu_gas_free_region(struct iommu_map_entry *entry)
621 {
622 struct iommu_domain *domain;
623
624 domain = entry->domain;
625 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
626 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR,
627 ("non-RMRR entry %p %p", domain, entry));
628
629 IOMMU_DOMAIN_LOCK(domain);
630 if (entry != domain->first_place &&
631 entry != domain->last_place)
632 iommu_gas_rb_remove(domain, entry);
633 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR;
634 IOMMU_DOMAIN_UNLOCK(domain);
635 }
636
637 static struct iommu_map_entry *
iommu_gas_remove_clip_left(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end,struct iommu_map_entry ** r)638 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start,
639 iommu_gaddr_t end, struct iommu_map_entry **r)
640 {
641 struct iommu_map_entry *entry, *res, fentry;
642
643 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
644 MPASS(start <= end);
645 MPASS(end <= domain->end);
646
647 /*
648 * Find an entry which contains the supplied guest's address
649 * start, or the first entry after the start. Since we
650 * asserted that start is below domain end, entry should
651 * exist. Then clip it if needed.
652 */
653 bzero(&fentry, sizeof(fentry));
654 fentry.start = start + 1;
655 fentry.end = start + 1;
656 fentry.flags = IOMMU_MAP_ENTRY_FAKE;
657 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry);
658
659 if (entry->start >= start ||
660 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
661 return (entry);
662
663 res = *r;
664 *r = NULL;
665 *res = *entry;
666 res->start = entry->end = start;
667 RB_UPDATE_AUGMENT(entry, rb_entry);
668 RB_INSERT_NEXT(iommu_gas_entries_tree,
669 &domain->rb_root, entry, res);
670 return (res);
671 }
672
673 static bool
iommu_gas_remove_clip_right(struct iommu_domain * domain,iommu_gaddr_t end,struct iommu_map_entry * entry,struct iommu_map_entry * r)674 iommu_gas_remove_clip_right(struct iommu_domain *domain,
675 iommu_gaddr_t end, struct iommu_map_entry *entry,
676 struct iommu_map_entry *r)
677 {
678 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
679 return (false);
680
681 *r = *entry;
682 r->end = entry->start = end;
683 RB_UPDATE_AUGMENT(entry, rb_entry);
684 RB_INSERT_PREV(iommu_gas_entries_tree,
685 &domain->rb_root, entry, r);
686 return (true);
687 }
688
689 static void
iommu_gas_remove_unmap(struct iommu_domain * domain,struct iommu_map_entry * entry,struct iommu_map_entries_tailq * gcp)690 iommu_gas_remove_unmap(struct iommu_domain *domain,
691 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp)
692 {
693 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
694
695 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED |
696 IOMMU_MAP_ENTRY_RMRR |
697 IOMMU_MAP_ENTRY_REMOVING)) != 0)
698 return;
699 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0);
700 entry->flags |= IOMMU_MAP_ENTRY_REMOVING;
701 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link);
702 }
703
704 static void
iommu_gas_remove_locked(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t size,struct iommu_map_entries_tailq * gc,struct iommu_map_entry ** r1,struct iommu_map_entry ** r2)705 iommu_gas_remove_locked(struct iommu_domain *domain,
706 iommu_gaddr_t start, iommu_gaddr_t size,
707 struct iommu_map_entries_tailq *gc,
708 struct iommu_map_entry **r1, struct iommu_map_entry **r2)
709 {
710 struct iommu_map_entry *entry, *nentry;
711 iommu_gaddr_t end;
712
713 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
714
715 end = start + size;
716
717 nentry = iommu_gas_remove_clip_left(domain, start, end, r1);
718 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) {
719 if (entry->start >= end)
720 break;
721 KASSERT(start <= entry->start,
722 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx",
723 entry->start, entry->end, start));
724 iommu_gas_remove_unmap(domain, entry, gc);
725 }
726 if (iommu_gas_remove_clip_right(domain, end, entry, *r2)) {
727 iommu_gas_remove_unmap(domain, *r2, gc);
728 *r2 = NULL;
729 }
730
731 #ifdef INVARIANTS
732 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
733 if ((entry->flags & (IOMMU_MAP_ENTRY_RMRR |
734 IOMMU_MAP_ENTRY_PLACE)) != 0)
735 continue;
736 KASSERT(entry->end <= start || entry->start >= end,
737 ("iommu_gas_remove leftover entry (%#jx, %#jx) range "
738 "(%#jx, %#jx)",
739 entry->start, entry->end, start, end));
740 }
741 #endif
742 }
743
744 static void
iommu_gas_remove_init(struct iommu_domain * domain,struct iommu_map_entries_tailq * gc,struct iommu_map_entry ** r1,struct iommu_map_entry ** r2)745 iommu_gas_remove_init(struct iommu_domain *domain,
746 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1,
747 struct iommu_map_entry **r2)
748 {
749 TAILQ_INIT(gc);
750 *r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
751 *r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
752 }
753
754 static void
iommu_gas_remove_cleanup(struct iommu_domain * domain,struct iommu_map_entries_tailq * gc,struct iommu_map_entry ** r1,struct iommu_map_entry ** r2)755 iommu_gas_remove_cleanup(struct iommu_domain *domain,
756 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1,
757 struct iommu_map_entry **r2)
758 {
759 if (*r1 != NULL) {
760 iommu_gas_free_entry(*r1);
761 *r1 = NULL;
762 }
763 if (*r2 != NULL) {
764 iommu_gas_free_entry(*r2);
765 *r2 = NULL;
766 }
767 iommu_domain_unload(domain, gc, true);
768 }
769
770 /*
771 * Remove specified range from the GAS of the domain. Note that the
772 * removal is not guaranteed to occur upon the function return, it
773 * might be finalized some time after, when hardware reports that
774 * (queued) IOTLB invalidation was performed.
775 */
776 void
iommu_gas_remove(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t size)777 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start,
778 iommu_gaddr_t size)
779 {
780 struct iommu_map_entry *r1, *r2;
781 struct iommu_map_entries_tailq gc;
782
783 iommu_gas_remove_init(domain, &gc, &r1, &r2);
784 IOMMU_DOMAIN_LOCK(domain);
785 iommu_gas_remove_locked(domain, start, size, &gc, &r1, &r2);
786 IOMMU_DOMAIN_UNLOCK(domain);
787 iommu_gas_remove_cleanup(domain, &gc, &r1, &r2);
788 }
789
790 int
iommu_gas_map(struct iommu_domain * domain,const struct bus_dma_tag_common * common,iommu_gaddr_t size,int offset,u_int eflags,u_int flags,vm_page_t * ma,struct iommu_map_entry ** res)791 iommu_gas_map(struct iommu_domain *domain,
792 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
793 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res)
794 {
795 struct iommu_gas_match_args a;
796 struct iommu_map_entry *entry;
797 int error;
798
799 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0,
800 ("invalid flags 0x%x", flags));
801
802 a.size = size;
803 a.offset = offset;
804 a.common = common;
805 a.gas_flags = flags;
806 entry = iommu_gas_alloc_entry(domain,
807 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0);
808 if (entry == NULL)
809 return (ENOMEM);
810 a.entry = entry;
811 IOMMU_DOMAIN_LOCK(domain);
812 error = iommu_gas_find_space(domain, &a);
813 if (error == ENOMEM) {
814 IOMMU_DOMAIN_UNLOCK(domain);
815 iommu_gas_free_entry(entry);
816 return (error);
817 }
818 #ifdef INVARIANTS
819 if (iommu_check_free)
820 iommu_gas_check_free(domain);
821 #endif
822 KASSERT(error == 0,
823 ("unexpected error %d from iommu_gas_find_entry", error));
824 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx",
825 (uintmax_t)entry->end, (uintmax_t)domain->end));
826 entry->flags |= eflags;
827 IOMMU_DOMAIN_UNLOCK(domain);
828
829 error = domain->ops->map(domain, entry->start,
830 entry->end - entry->start, ma, eflags,
831 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
832 if (error == ENOMEM) {
833 iommu_domain_unload_entry(entry, true,
834 (flags & IOMMU_MF_CANWAIT) != 0);
835 return (error);
836 }
837 KASSERT(error == 0,
838 ("unexpected error %d from domain_map_buf", error));
839
840 *res = entry;
841 return (0);
842 }
843
844 int
iommu_gas_map_region(struct iommu_domain * domain,struct iommu_map_entry * entry,u_int eflags,u_int flags,vm_page_t * ma)845 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
846 u_int eflags, u_int flags, vm_page_t *ma)
847 {
848 iommu_gaddr_t start;
849 int error;
850
851 KASSERT(entry->domain == domain,
852 ("mismatched domain %p entry %p entry->domain %p", domain,
853 entry, entry->domain));
854 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain,
855 entry, entry->flags));
856 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0,
857 ("invalid flags 0x%x", flags));
858
859 start = entry->start;
860 IOMMU_DOMAIN_LOCK(domain);
861 error = iommu_gas_alloc_region(domain, entry, flags);
862 if (error != 0) {
863 IOMMU_DOMAIN_UNLOCK(domain);
864 return (error);
865 }
866 entry->flags |= eflags;
867 IOMMU_DOMAIN_UNLOCK(domain);
868 if (entry->end == entry->start)
869 return (0);
870
871 error = domain->ops->map(domain, entry->start,
872 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
873 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
874 if (error == ENOMEM) {
875 iommu_domain_unload_entry(entry, false,
876 (flags & IOMMU_MF_CANWAIT) != 0);
877 return (error);
878 }
879 KASSERT(error == 0,
880 ("unexpected error %d from domain_map_buf", error));
881
882 return (0);
883 }
884
885 static int
iommu_gas_reserve_region_locked(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end,struct iommu_map_entry * entry)886 iommu_gas_reserve_region_locked(struct iommu_domain *domain,
887 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry)
888 {
889 int error;
890
891 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
892
893 entry->start = start;
894 entry->end = end;
895 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT);
896 if (error == 0)
897 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED;
898 return (error);
899 }
900
901 int
iommu_gas_reserve_region(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end,struct iommu_map_entry ** entry0)902 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
903 iommu_gaddr_t end, struct iommu_map_entry **entry0)
904 {
905 struct iommu_map_entry *entry;
906 int error;
907
908 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
909 IOMMU_DOMAIN_LOCK(domain);
910 error = iommu_gas_reserve_region_locked(domain, start, end, entry);
911 IOMMU_DOMAIN_UNLOCK(domain);
912 if (error != 0)
913 iommu_gas_free_entry(entry);
914 else if (entry0 != NULL)
915 *entry0 = entry;
916 return (error);
917 }
918
919 /*
920 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing
921 * entries.
922 */
923 int
iommu_gas_reserve_region_extend(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end)924 iommu_gas_reserve_region_extend(struct iommu_domain *domain,
925 iommu_gaddr_t start, iommu_gaddr_t end)
926 {
927 struct iommu_map_entry *entry, *next, *prev, key = {};
928 iommu_gaddr_t entry_start, entry_end;
929 int error;
930
931 error = 0;
932 entry = NULL;
933 end = ummin(end, domain->end);
934 while (start < end) {
935 /* Preallocate an entry. */
936 if (entry == NULL)
937 entry = iommu_gas_alloc_entry(domain,
938 IOMMU_PGF_WAITOK);
939 /* Calculate the free region from here to the next entry. */
940 key.start = key.end = start;
941 IOMMU_DOMAIN_LOCK(domain);
942 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key);
943 KASSERT(next != NULL, ("domain %p with end %#jx has no entry "
944 "after %#jx", domain, (uintmax_t)domain->end,
945 (uintmax_t)start));
946 entry_end = ummin(end, next->start);
947 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
948 if (prev != NULL)
949 entry_start = ummax(start, prev->end);
950 else
951 entry_start = start;
952 start = next->end;
953 /* Reserve the region if non-empty. */
954 if (entry_start != entry_end) {
955 error = iommu_gas_reserve_region_locked(domain,
956 entry_start, entry_end, entry);
957 if (error != 0) {
958 IOMMU_DOMAIN_UNLOCK(domain);
959 break;
960 }
961 entry = NULL;
962 }
963 IOMMU_DOMAIN_UNLOCK(domain);
964 }
965 /* Release a preallocated entry if it was not used. */
966 if (entry != NULL)
967 iommu_gas_free_entry(entry);
968 return (error);
969 }
970
971 void
iommu_unmap_msi(struct iommu_ctx * ctx)972 iommu_unmap_msi(struct iommu_ctx *ctx)
973 {
974 struct iommu_map_entry *entry;
975 struct iommu_domain *domain;
976
977 domain = ctx->domain;
978 entry = domain->msi_entry;
979 if (entry == NULL)
980 return;
981
982 domain->ops->unmap(domain, entry->start, entry->end -
983 entry->start, IOMMU_PGF_WAITOK);
984
985 iommu_gas_free_space(entry);
986
987 iommu_gas_free_entry(entry);
988
989 domain->msi_entry = NULL;
990 domain->msi_base = 0;
991 domain->msi_phys = 0;
992 }
993
994 int
iommu_map_msi(struct iommu_ctx * ctx,iommu_gaddr_t size,int offset,u_int eflags,u_int flags,vm_page_t * ma)995 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset,
996 u_int eflags, u_int flags, vm_page_t *ma)
997 {
998 struct iommu_domain *domain;
999 struct iommu_map_entry *entry;
1000 int error;
1001
1002 error = 0;
1003 domain = ctx->domain;
1004
1005 /* Check if there is already an MSI page allocated */
1006 IOMMU_DOMAIN_LOCK(domain);
1007 entry = domain->msi_entry;
1008 IOMMU_DOMAIN_UNLOCK(domain);
1009
1010 if (entry == NULL) {
1011 error = iommu_gas_map(domain, &ctx->tag->common, size, offset,
1012 eflags, flags, ma, &entry);
1013 IOMMU_DOMAIN_LOCK(domain);
1014 if (error == 0) {
1015 if (domain->msi_entry == NULL) {
1016 MPASS(domain->msi_base == 0);
1017 MPASS(domain->msi_phys == 0);
1018
1019 domain->msi_entry = entry;
1020 domain->msi_base = entry->start;
1021 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]);
1022 } else {
1023 /*
1024 * We lost the race and already have an
1025 * MSI page allocated. Free the unneeded entry.
1026 */
1027 iommu_gas_free_entry(entry);
1028 }
1029 } else if (domain->msi_entry != NULL) {
1030 /*
1031 * The allocation failed, but another succeeded.
1032 * Return success as there is a valid MSI page.
1033 */
1034 error = 0;
1035 }
1036 IOMMU_DOMAIN_UNLOCK(domain);
1037 }
1038
1039 return (error);
1040 }
1041
1042 void
iommu_translate_msi(struct iommu_domain * domain,uint64_t * addr)1043 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr)
1044 {
1045
1046 *addr = (*addr - domain->msi_phys) + domain->msi_base;
1047
1048 KASSERT(*addr >= domain->msi_entry->start,
1049 ("%s: Address is below the MSI entry start address (%jx < %jx)",
1050 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start));
1051
1052 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end,
1053 ("%s: Address is above the MSI entry end address (%jx < %jx)",
1054 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end));
1055 }
1056
1057 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "");
1058
1059 #ifdef INVARIANTS
1060 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN,
1061 &iommu_check_free, 0,
1062 "Check the GPA RBtree for free_down and free_after validity");
1063 #endif
1064
1065 #include "opt_ddb.h"
1066 #ifdef DDB
1067
1068 #include <ddb/ddb.h>
1069
1070 static void
iommu_debug_dump_gas(struct iommu_domain * domain)1071 iommu_debug_dump_gas(struct iommu_domain *domain)
1072 {
1073 struct iommu_map_entry *entry;
1074
1075 db_printf("iommu_domain %p tree %p iommu %p fl %#x\n", domain,
1076 &domain->rb_root, domain->iommu, domain->flags);
1077 db_printf("iommu_domain %p tree %p\n", domain, &domain->rb_root);
1078 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
1079 db_printf(
1080 " e %p [%#jx %#jx] fl %#x first %#jx last %#jx free_down %#jx",
1081 entry, (uintmax_t)entry->start, (uintmax_t)entry->end,
1082 entry->flags,
1083 (uintmax_t)entry->first, (uintmax_t)entry->last,
1084 (uintmax_t)entry->free_down);
1085 if (entry == domain->start_gap)
1086 db_printf(" start_gap");
1087 if (entry == domain->first_place)
1088 db_printf(" first_place");
1089 if (entry == domain->last_place)
1090 db_printf(" last_place");
1091 db_printf("\n");
1092 }
1093 }
1094
DB_SHOW_COMMAND(iommu_domain,iommu_domain_show)1095 DB_SHOW_COMMAND(iommu_domain, iommu_domain_show)
1096 {
1097 struct iommu_domain *domain;
1098
1099 if (!have_addr) {
1100 db_printf("show iommu_domain addr\n");
1101 return;
1102 }
1103
1104 domain = (void *)addr;
1105 iommu_debug_dump_gas(domain);
1106 }
1107
1108 #endif
1109