xref: /openbsd/sys/arch/sparc64/dev/iommu.c (revision 17df1aa7)
1 /*	$OpenBSD: iommu.c,v 1.62 2010/04/20 23:26:59 deraadt Exp $	*/
2 /*	$NetBSD: iommu.c,v 1.47 2002/02/08 20:03:45 eeh Exp $	*/
3 
4 /*
5  * Copyright (c) 2003 Henric Jungheim
6  * Copyright (c) 2001, 2002 Eduardo Horvath
7  * Copyright (c) 1999, 2000 Matthew R. Green
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * UltraSPARC IOMMU support; used by both the sbus and pci code.
36  */
37 #include <sys/param.h>
38 #include <sys/extent.h>
39 #include <sys/malloc.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/device.h>
43 #include <sys/mbuf.h>
44 
45 #include <uvm/uvm_extern.h>
46 
47 #include <machine/bus.h>
48 #include <sparc64/sparc64/cache.h>
49 #include <sparc64/dev/iommureg.h>
50 #include <sparc64/dev/iommuvar.h>
51 
52 #include <machine/autoconf.h>
53 #include <machine/cpu.h>
54 
55 #ifdef DDB
56 #include <machine/db_machdep.h>
57 #include <ddb/db_sym.h>
58 #include <ddb/db_extern.h>
59 #endif
60 
61 #ifdef DEBUG
62 #define IDB_BUSDMA	0x1
63 #define IDB_IOMMU	0x2
64 #define IDB_INFO	0x4
65 #define IDB_SYNC	0x8
66 #define IDB_XXX		0x10
67 #define IDB_PRINT_MAP	0x20
68 #define IDB_BREAK	0x40
69 int iommudebug = IDB_INFO;
70 #define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
71 #else
72 #define DPRINTF(l, s)
73 #endif
74 
75 void iommu_enter(struct iommu_state *, struct strbuf_ctl *, bus_addr_t,
76     paddr_t, int);
77 void iommu_remove(struct iommu_state *, struct strbuf_ctl *, bus_addr_t);
78 int iommu_dvmamap_sync_range(struct strbuf_ctl*, bus_addr_t, bus_size_t);
79 int iommu_strbuf_flush_done(struct iommu_map_state *);
80 int iommu_dvmamap_load_seg(bus_dma_tag_t, struct iommu_state *,
81     bus_dmamap_t, bus_dma_segment_t *, int, int, bus_size_t, bus_size_t);
82 int iommu_dvmamap_load_mlist(bus_dma_tag_t, struct iommu_state *,
83     bus_dmamap_t, struct pglist *, int, bus_size_t, bus_size_t);
84 int iommu_dvmamap_validate_map(bus_dma_tag_t, struct iommu_state *,
85     bus_dmamap_t);
86 void iommu_dvmamap_print_map(bus_dma_tag_t, struct iommu_state *,
87     bus_dmamap_t);
88 int iommu_dvmamap_append_range(bus_dma_tag_t, bus_dmamap_t, paddr_t,
89     bus_size_t, int, bus_size_t);
90 int64_t iommu_tsb_entry(struct iommu_state *, bus_addr_t);
91 void strbuf_reset(struct strbuf_ctl *);
92 int iommu_iomap_insert_page(struct iommu_map_state *, paddr_t);
93 bus_addr_t iommu_iomap_translate(struct iommu_map_state *, paddr_t);
94 void iommu_iomap_load_map(struct iommu_state *, struct iommu_map_state *,
95     bus_addr_t, int);
96 void iommu_iomap_unload_map(struct iommu_state *, struct iommu_map_state *);
97 struct iommu_map_state *iommu_iomap_create(int);
98 void iommu_iomap_destroy(struct iommu_map_state *);
99 void iommu_iomap_clear_pages(struct iommu_map_state *);
100 void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dma_tag_t, bus_dmamap_t,
101     bus_addr_t, bus_size_t, int);
102 
103 /*
104  * Initiate an STC entry flush.
105  */
106 static inline void
107 iommu_strbuf_flush(struct strbuf_ctl *sb, bus_addr_t va)
108 {
109 #ifdef DEBUG
110 	if (sb->sb_flush == NULL) {
111 		printf("iommu_strbuf_flush: attempting to flush w/o STC\n");
112 		return;
113 	}
114 #endif
115 
116 	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
117 	    STRBUFREG(strbuf_pgflush), va);
118 }
119 
120 /*
121  * initialise the UltraSPARC IOMMU (SBus or PCI):
122  *	- allocate and setup the iotsb.
123  *	- enable the IOMMU
124  *	- initialise the streaming buffers (if they exist)
125  *	- create a private DVMA map.
126  */
127 void
128 iommu_init(char *name, struct iommu_state *is, int tsbsize, u_int32_t iovabase)
129 {
130 	psize_t size;
131 	vaddr_t va;
132 	paddr_t pa;
133 	struct vm_page *m;
134 	struct pglist mlist;
135 
136 	/*
137 	 * Setup the iommu.
138 	 *
139 	 * The sun4u iommu is part of the SBus or PCI controller so we will
140 	 * deal with it here..
141 	 *
142 	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
143 	 * 0xffffe000, but the starting address depends on the size of the
144 	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
145 	 * entry is 8 bytes.  The start of the map can be calculated by
146 	 * (0xffffe000 << (8 + is->is_tsbsize)).
147 	 *
148 	 * But sabre and hummingbird use a different scheme that seems to
149 	 * be hard-wired, so we read the start and size from the PROM and
150 	 * just use those values.
151 	 */
152 	is->is_cr = IOMMUCR_EN;
153 	is->is_tsbsize = tsbsize;
154 	if (iovabase == (u_int32_t)-1) {
155 		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
156 		is->is_dvmaend = IOTSB_VEND;
157 	} else {
158 		is->is_dvmabase = iovabase;
159 		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize) - 1;
160 	}
161 
162 	/*
163 	 * Allocate memory for I/O pagetables.  They need to be physically
164 	 * contiguous.
165 	 */
166 
167 	size = PAGE_SIZE << is->is_tsbsize;
168 	TAILQ_INIT(&mlist);
169 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
170 	    (paddr_t)PAGE_SIZE, (paddr_t)0, &mlist, 1, UVM_PLA_NOWAIT) != 0)
171 		panic("iommu_init: no memory");
172 
173 	va = uvm_km_valloc(kernel_map, size);
174 	if (va == 0)
175 		panic("iommu_init: no memory");
176 	is->is_tsb = (int64_t *)va;
177 
178 	m = TAILQ_FIRST(&mlist);
179 	is->is_ptsb = VM_PAGE_TO_PHYS(m);
180 
181 	/* Map the pages */
182 	for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
183 		pa = VM_PAGE_TO_PHYS(m);
184 		pmap_enter(pmap_kernel(), va, pa | PMAP_NVC,
185 			VM_PROT_READ|VM_PROT_WRITE,
186 			VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED);
187 		va += PAGE_SIZE;
188 	}
189 	pmap_update(pmap_kernel());
190 	memset(is->is_tsb, 0, size);
191 
192 #ifdef DEBUG
193 	if (iommudebug & IDB_INFO) {
194 		/* Probe the iommu */
195 		/* The address or contents of the regs...? */
196 		printf("iommu regs at: cr=%lx tsb=%lx flush=%lx\n",
197 		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
198 			IOMMUREG(iommu_cr),
199 		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
200 			IOMMUREG(iommu_tsb),
201 		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
202 			IOMMUREG(iommu_flush));
203 		printf("iommu cr=%llx tsb=%llx\n",
204 		    IOMMUREG_READ(is, iommu_cr),
205 		    IOMMUREG_READ(is, iommu_tsb));
206 		printf("TSB base %p phys %llx\n",
207 		    (void *)is->is_tsb, (unsigned long long)is->is_ptsb);
208 		delay(1000000); /* 1 s */
209 	}
210 #endif
211 
212 	/*
213 	 * Now all the hardware's working we need to allocate a dvma map.
214 	 */
215 	printf("dvma map %x-%x", is->is_dvmabase, is->is_dvmaend);
216 #ifdef DEBUG
217 	printf(", iotdb %llx-%llx",
218 	    (unsigned long long)is->is_ptsb,
219 	    (unsigned long long)(is->is_ptsb + size));
220 #endif
221 	is->is_dvmamap = extent_create(name,
222 	    is->is_dvmabase, (u_long)is->is_dvmaend + 1,
223 	    M_DEVBUF, 0, 0, EX_NOWAIT);
224 	mtx_init(&is->is_mtx, IPL_HIGH);
225 
226 	/*
227 	 * Set the TSB size.  The relevant bits were moved to the TSB
228 	 * base register in the PCIe host bridges.
229 	 */
230 	if (strncmp(name, "pyro", 4) == 0)
231 		is->is_ptsb |= is->is_tsbsize;
232 	else
233 		is->is_cr |= (is->is_tsbsize << 16);
234 
235 	/*
236 	 * Now actually start up the IOMMU.
237 	 */
238 	iommu_reset(is);
239 	printf("\n");
240 }
241 
242 /*
243  * Streaming buffers don't exist on the UltraSPARC IIi/e; we should have
244  * detected that already and disabled them.  If not, we will notice that
245  * they aren't there when the STRBUF_EN bit does not remain.
246  */
247 void
248 iommu_reset(struct iommu_state *is)
249 {
250 	int i;
251 
252 	IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
253 
254 	/* Enable IOMMU */
255 	IOMMUREG_WRITE(is, iommu_cr, is->is_cr);
256 
257 	for (i = 0; i < 2; ++i) {
258 		struct strbuf_ctl *sb = is->is_sb[i];
259 
260 		if (sb == NULL)
261 			continue;
262 
263 		sb->sb_iommu = is;
264 		strbuf_reset(sb);
265 
266 		if (sb->sb_flush)
267 			printf(", STC%d enabled", i);
268 	}
269 
270 	if (is->is_flags & IOMMU_FLUSH_CACHE)
271 		IOMMUREG_WRITE(is, iommu_cache_invalidate, -1ULL);
272 }
273 
274 /*
275  * Initialize one STC.
276  */
277 void
278 strbuf_reset(struct strbuf_ctl *sb)
279 {
280 	if(sb->sb_flush == NULL)
281 		return;
282 
283 	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
284 	    STRBUFREG(strbuf_ctl), STRBUF_EN);
285 
286 	membar(Lookaside);
287 
288 	/* No streaming buffers? Disable them */
289 	if (bus_space_read_8(sb->sb_bustag, sb->sb_sb,
290 	    STRBUFREG(strbuf_ctl)) == 0) {
291 		sb->sb_flush = NULL;
292 	} else {
293 		/*
294 		 * locate the pa of the flush buffer
295 		 */
296 		if (pmap_extract(pmap_kernel(),
297 		    (vaddr_t)sb->sb_flush, &sb->sb_flushpa) == FALSE)
298 			sb->sb_flush = NULL;
299 		mtx_init(&sb->sb_mtx, IPL_HIGH);
300 	}
301 }
302 
303 /*
304  * Add an entry to the IOMMU table.
305  *
306  * The entry is marked streaming if an STC was detected and
307  * the BUS_DMA_STREAMING flag is set.
308  */
309 void
310 iommu_enter(struct iommu_state *is, struct strbuf_ctl *sb, bus_addr_t va,
311     paddr_t pa, int flags)
312 {
313 	int64_t tte;
314 	volatile int64_t *tte_ptr = &is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)];
315 
316 #ifdef DIAGNOSTIC
317 	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
318 		panic("iommu_enter: va %#lx not in DVMA space", va);
319 
320 	tte = *tte_ptr;
321 
322 	if (tte & IOTTE_V) {
323 		printf("Overwriting valid tte entry (dva %lx pa %lx "
324 		    "&tte %p tte %llx)\n", va, pa, tte_ptr, tte);
325 		extent_print(is->is_dvmamap);
326 		panic("IOMMU overwrite");
327 	}
328 #endif
329 
330 	tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
331 	    !(flags & BUS_DMA_NOCACHE), (flags & BUS_DMA_STREAMING));
332 
333 	DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
334 	    (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
335 
336 	*tte_ptr = tte;
337 
338 	/*
339 	 * Why bother to flush this va?  It should only be relevant for
340 	 * V ==> V or V ==> non-V transitions.  The former is illegal and
341 	 * the latter is never done here.  It is true that this provides
342 	 * some protection against a misbehaving master using an address
343 	 * after it should.  The IOMMU documentations specifically warns
344 	 * that the consequences of a simultaneous IOMMU flush and DVMA
345 	 * access to the same address are undefined.  (By that argument,
346 	 * the STC should probably be flushed as well.)   Note that if
347 	 * a bus master keeps using a memory region after it has been
348 	 * unmapped, the specific behavior of the IOMMU is likely to
349 	 * be the least of our worries.
350 	 */
351 	IOMMUREG_WRITE(is, iommu_flush, va);
352 
353 	DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
354 	    va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
355 	    (void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
356 	    (u_long)tte));
357 }
358 
359 /*
360  * Remove an entry from the IOMMU table.
361  *
362  * The entry is flushed from the STC if an STC is detected and the TSB
363  * entry has the IOTTE_STREAM flags set.  It should be impossible for
364  * the TSB entry to have this flag set without the BUS_DMA_STREAMING
365  * flag, but better to be safe.  (The IOMMU will be ignored as long
366  * as an STC entry exists.)
367  */
368 void
369 iommu_remove(struct iommu_state *is, struct strbuf_ctl *sb, bus_addr_t va)
370 {
371 	int64_t *tte_ptr = &is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)];
372 	int64_t tte;
373 
374 #ifdef DIAGNOSTIC
375 	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
376 		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
377 	if (va != trunc_page(va)) {
378 		printf("iommu_remove: unaligned va: %lx\n", va);
379 		va = trunc_page(va);
380 	}
381 #endif
382 	tte = *tte_ptr;
383 
384 	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%llx]@%p\n",
385 	    va, tte, tte_ptr));
386 
387 #ifdef DIAGNOSTIC
388 	if ((tte & IOTTE_V) == 0) {
389 		printf("Removing invalid tte entry (dva %lx &tte %p "
390 		    "tte %llx)\n", va, tte_ptr, tte);
391 		extent_print(is->is_dvmamap);
392 		panic("IOMMU remove overwrite");
393 	}
394 #endif
395 
396 	*tte_ptr = tte & ~IOTTE_V;
397 
398 	/*
399 	 * IO operations are strongly ordered WRT each other.  It is
400 	 * unclear how they relate to normal memory accesses.
401 	 */
402 	membar(StoreStore);
403 
404 	IOMMUREG_WRITE(is, iommu_flush, va);
405 
406 	if (sb && (tte & IOTTE_STREAM))
407 		iommu_strbuf_flush(sb, va);
408 
409 	/* Should we sync the iommu and stc here? */
410 }
411 
412 /*
413  * Find the physical address of a DVMA address (debug routine).
414  */
415 paddr_t
416 iommu_extract(struct iommu_state *is, bus_addr_t dva)
417 {
418 	int64_t tte = 0;
419 
420 	if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
421 		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
422 
423 	return (tte & IOTTE_PAMASK);
424 }
425 
426 /*
427  * Lookup a TSB entry for a given DVMA (debug routine).
428  */
429 int64_t
430 iommu_lookup_tte(struct iommu_state *is, bus_addr_t dva)
431 {
432 	int64_t tte = 0;
433 
434 	if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
435 		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
436 
437 	return (tte);
438 }
439 
440 /*
441  * Lookup a TSB entry at a given physical address (debug routine).
442  */
443 int64_t
444 iommu_fetch_tte(struct iommu_state *is, paddr_t pa)
445 {
446 	int64_t tte = 0;
447 
448 	if (pa >= is->is_ptsb && pa < is->is_ptsb +
449 	    (PAGE_SIZE << is->is_tsbsize))
450 		tte = ldxa(pa, ASI_PHYS_CACHED);
451 
452 	return (tte);
453 }
454 
455 /*
456  * Fetch a TSB entry with some sanity checking.
457  */
458 int64_t
459 iommu_tsb_entry(struct iommu_state *is, bus_addr_t dva)
460 {
461 	int64_t tte;
462 
463 	if (dva < is->is_dvmabase || dva > is->is_dvmaend)
464 		panic("invalid dva: %llx", (long long)dva);
465 
466 	tte = is->is_tsb[IOTSBSLOT(dva,is->is_tsbsize)];
467 
468 	if ((tte & IOTTE_V) == 0)
469 		panic("iommu_tsb_entry: invalid entry %lx", dva);
470 
471 	return (tte);
472 }
473 
474 /*
475  * Initiate and then block until an STC flush synchronization has completed.
476  */
477 int
478 iommu_strbuf_flush_done(struct iommu_map_state *ims)
479 {
480 	struct strbuf_ctl *sb = ims->ims_sb;
481 	struct strbuf_flush *sf = &ims->ims_flush;
482 	struct timeval cur, flushtimeout;
483 	struct timeval to = { 0, 500000 };
484 	u_int64_t flush;
485 	int timeout_started = 0;
486 
487 #ifdef DIAGNOSTIC
488 	if (sb == NULL) {
489 		panic("iommu_strbuf_flush_done: invalid flush buffer");
490 	}
491 #endif
492 
493 	mtx_enter(&sb->sb_mtx);
494 
495 	/*
496 	 * Streaming buffer flushes:
497 	 *
498 	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.
499 	 *   2 Store 0 in flag
500 	 *   3 Store pointer to flag in flushsync
501 	 *   4 wait till flushsync becomes 0x1
502 	 *
503 	 * If it takes more than .5 sec, something went very, very wrong.
504 	 */
505 
506 	/*
507 	 * If we're reading from ASI_PHYS_CACHED, then we'll write to
508 	 * it too.  No need to tempt fate or learn about Si bugs or such.
509 	 * FreeBSD just uses normal "volatile" reads/writes...
510 	 */
511 
512 	stxa(sf->sbf_flushpa, ASI_PHYS_CACHED, 0);
513 
514 	/*
515 	 * Insure any previous strbuf operations are complete and that
516 	 * memory is initialized before the IOMMU uses it.
517 	 * Is this Needed?  How are IO and memory operations ordered?
518 	 */
519 	membar(StoreStore);
520 
521 	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
522 		    STRBUFREG(strbuf_flushsync), sf->sbf_flushpa);
523 
524 	DPRINTF(IDB_IOMMU,
525 	    ("iommu_strbuf_flush_done: flush = %llx pa = %lx\n",
526 		ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED), sf->sbf_flushpa));
527 
528 	membar(StoreLoad | Lookaside);
529 
530 	for(;;) {
531 		int i;
532 
533 		/*
534 		 * Try to shave a few instruction cycles off the average
535 		 * latency by only checking the elapsed time every few
536 		 * fetches.
537 		 */
538 		for (i = 0; i < 1000; ++i) {
539 			membar(LoadLoad);
540 			/* Bypass non-coherent D$ */
541 			/* non-coherent...?   Huh? */
542 			flush = ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED);
543 
544 			if (flush) {
545 				DPRINTF(IDB_IOMMU,
546 				    ("iommu_strbuf_flush_done: flushed\n"));
547 				mtx_leave(&sb->sb_mtx);
548 				return (0);
549 			}
550 		}
551 
552 		microtime(&cur);
553 
554 		if (timeout_started) {
555 			if (timercmp(&cur, &flushtimeout, >))
556 				panic("STC timeout at %lx (%lld)",
557 				    sf->sbf_flushpa, flush);
558 		} else {
559 			timeradd(&cur, &to, &flushtimeout);
560 
561 			timeout_started = 1;
562 
563 			DPRINTF(IDB_IOMMU,
564 			    ("iommu_strbuf_flush_done: flush = %llx pa = %lx "
565 				"now=%lx:%lx until = %lx:%lx\n",
566 				ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED),
567 				sf->sbf_flushpa, cur.tv_sec, cur.tv_usec,
568 				flushtimeout.tv_sec, flushtimeout.tv_usec));
569 		}
570 	}
571 }
572 
573 /*
574  * IOMMU DVMA operations, common to SBus and PCI.
575  */
576 
577 #define BUS_DMA_FIND_PARENT(t, fn)                                      \
578         if (t->_parent == NULL)                                         \
579                 panic("null bus_dma parent (" #fn ")");                 \
580         for (t = t->_parent; t->fn == NULL; t = t->_parent)             \
581                 if (t->_parent == NULL)                                 \
582                         panic("no bus_dma " #fn " located");
583 
584 int
585 iommu_dvmamap_create(bus_dma_tag_t t, bus_dma_tag_t t0, struct strbuf_ctl *sb,
586     bus_size_t size, int nsegments, bus_size_t maxsegsz, bus_size_t boundary,
587     int flags, bus_dmamap_t *dmamap)
588 {
589 	int ret;
590 	bus_dmamap_t map;
591 	struct iommu_map_state *ims;
592 
593 	BUS_DMA_FIND_PARENT(t, _dmamap_create);
594 	ret = (*t->_dmamap_create)(t, t0, size, nsegments, maxsegsz, boundary,
595 	    flags, &map);
596 
597 	if (ret)
598 		return (ret);
599 
600 	ims = iommu_iomap_create(atop(round_page(size)));
601 
602 	if (ims == NULL) {
603 		bus_dmamap_destroy(t0, map);
604 		return (ENOMEM);
605 	}
606 
607 	ims->ims_sb = sb;
608 	map->_dm_cookie = ims;
609 
610 #ifdef DIAGNOSTIC
611 	if (ims->ims_sb == NULL)
612 		panic("iommu_dvmamap_create: null sb");
613 	if (ims->ims_sb->sb_iommu == NULL)
614 		panic("iommu_dvmamap_create: null iommu");
615 #endif
616 	*dmamap = map;
617 
618 	return (0);
619 }
620 
621 void
622 iommu_dvmamap_destroy(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
623 {
624 	/*
625 	 * The specification (man page) requires a loaded
626 	 * map to be unloaded before it is destroyed.
627 	 */
628 	if (map->dm_nsegs)
629 		bus_dmamap_unload(t0, map);
630 
631         if (map->_dm_cookie)
632                 iommu_iomap_destroy(map->_dm_cookie);
633 	map->_dm_cookie = NULL;
634 
635 	BUS_DMA_FIND_PARENT(t, _dmamap_destroy);
636 	(*t->_dmamap_destroy)(t, t0, map);
637 }
638 
639 /*
640  * Load a contiguous kva buffer into a dmamap.  The physical pages are
641  * not assumed to be contiguous.  Two passes are made through the buffer
642  * and both call pmap_extract() for the same va->pa translations.  It
643  * is possible to run out of pa->dvma mappings; the code should be smart
644  * enough to resize the iomap (when the "flags" permit allocation).  It
645  * is trivial to compute the number of entries required (round the length
646  * up to the page size and then divide by the page size)...
647  */
648 int
649 iommu_dvmamap_load(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
650     void *buf, bus_size_t buflen, struct proc *p, int flags)
651 {
652 	int err = 0;
653 	bus_size_t sgsize;
654 	u_long dvmaddr, sgstart, sgend;
655 	bus_size_t align, boundary;
656 	struct iommu_state *is;
657 	struct iommu_map_state *ims = map->_dm_cookie;
658 	pmap_t pmap;
659 
660 #ifdef DIAGNOSTIC
661 	if (ims == NULL)
662 		panic("iommu_dvmamap_load: null map state");
663 #endif
664 #ifdef DEBUG
665 	if (ims->ims_sb == NULL)
666 		panic("iommu_dvmamap_load: null sb");
667 	if (ims->ims_sb->sb_iommu == NULL)
668 		panic("iommu_dvmamap_load: null iommu");
669 #endif /* DEBUG */
670 	is = ims->ims_sb->sb_iommu;
671 
672 	if (map->dm_nsegs) {
673 		/*
674 		 * Is it still in use? _bus_dmamap_load should have taken care
675 		 * of this.
676 		 */
677 #ifdef DIAGNOSTIC
678 		panic("iommu_dvmamap_load: map still in use");
679 #endif
680 		bus_dmamap_unload(t0, map);
681 	}
682 
683 	/*
684 	 * Make sure that on error condition we return "no valid mappings".
685 	 */
686 	map->dm_nsegs = 0;
687 
688 	if (buflen < 1 || buflen > map->_dm_size) {
689 		DPRINTF(IDB_BUSDMA,
690 		    ("iommu_dvmamap_load(): error %d > %d -- "
691 		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
692 		return (EINVAL);
693 	}
694 
695 	/*
696 	 * A boundary presented to bus_dmamem_alloc() takes precedence
697 	 * over boundary in the map.
698 	 */
699 	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
700 		boundary = map->_dm_boundary;
701 	align = MAX(map->dm_segs[0]._ds_align, PAGE_SIZE);
702 
703 	pmap = p ? p->p_vmspace->vm_map.pmap : pmap_kernel();
704 
705 	/* Count up the total number of pages we need */
706 	iommu_iomap_clear_pages(ims);
707 	{ /* Scope */
708 		bus_addr_t a, aend;
709 		bus_addr_t addr = (bus_addr_t)buf;
710 		int seg_len = buflen;
711 
712 		aend = round_page(addr + seg_len);
713 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
714 			paddr_t pa;
715 
716 			if (pmap_extract(pmap, a, &pa) == FALSE) {
717 				printf("iomap pmap error addr 0x%llx\n", a);
718 				iommu_iomap_clear_pages(ims);
719 				return (EFBIG);
720 			}
721 
722 			err = iommu_iomap_insert_page(ims, pa);
723 			if (err) {
724 				printf("iomap insert error: %d for "
725 				    "va 0x%llx pa 0x%lx "
726 				    "(buf %p len %lld/%llx)\n",
727 				    err, a, pa, buf, buflen, buflen);
728 				iommu_dvmamap_print_map(t, is, map);
729 				iommu_iomap_clear_pages(ims);
730 				return (EFBIG);
731 			}
732 		}
733 	}
734 	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
735 
736 	mtx_enter(&is->is_mtx);
737 	if (flags & BUS_DMA_24BIT) {
738 		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
739 		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
740 	} else {
741 		sgstart = is->is_dvmamap->ex_start;
742 		sgend = is->is_dvmamap->ex_end;
743 	}
744 
745 	/*
746 	 * If our segment size is larger than the boundary we need to
747 	 * split the transfer up into little pieces ourselves.
748 	 */
749 	err = extent_alloc_subregion(is->is_dvmamap, sgstart, sgend,
750 	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
751 	    EX_NOWAIT | EX_BOUNDZERO, (u_long *)&dvmaddr);
752 	mtx_leave(&is->is_mtx);
753 
754 #ifdef DEBUG
755 	if (err || (dvmaddr == (bus_addr_t)-1))	{
756 		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
757 		    (int)sgsize, flags);
758 #ifdef DDB
759 		if (iommudebug & IDB_BREAK)
760 			Debugger();
761 #endif
762 	}
763 #endif
764 	if (err != 0) {
765 		iommu_iomap_clear_pages(ims);
766 		return (err);
767 	}
768 
769 	/* Set the active DVMA map */
770 	map->_dm_dvmastart = dvmaddr;
771 	map->_dm_dvmasize = sgsize;
772 
773 	map->dm_mapsize = buflen;
774 
775 #ifdef DEBUG
776 	iommu_dvmamap_validate_map(t, is, map);
777 #endif
778 
779 	iommu_iomap_load_map(is, ims, dvmaddr, flags);
780 
781 	{ /* Scope */
782 		bus_addr_t a, aend;
783 		bus_addr_t addr = (bus_addr_t)buf;
784 		int seg_len = buflen;
785 
786 		aend = round_page(addr + seg_len);
787 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
788 			bus_addr_t pgstart;
789 			bus_addr_t pgend;
790 			paddr_t pa;
791 			int pglen;
792 
793 			/* Yuck... Redoing the same pmap_extract... */
794 			if (pmap_extract(pmap, a, &pa) == FALSE) {
795 				printf("iomap pmap error addr 0x%llx\n", a);
796 				err =  EFBIG;
797 				break;
798 			}
799 
800 			pgstart = pa | (MAX(a, addr) & PAGE_MASK);
801 			pgend = pa | (MIN(a + PAGE_SIZE - 1,
802 			    addr + seg_len - 1) & PAGE_MASK);
803 			pglen = pgend - pgstart + 1;
804 
805 			if (pglen < 1)
806 				continue;
807 
808 			err = iommu_dvmamap_append_range(t, map, pgstart,
809 			    pglen, flags, boundary);
810 			if (err == EFBIG)
811 				break;
812 			else if (err) {
813 				printf("iomap load seg page: %d for "
814 				    "va 0x%llx pa %lx (%llx - %llx) "
815 				    "for %d/0x%x\n",
816 				    err, a, pa, pgstart, pgend, pglen, pglen);
817 				break;
818 			}
819 		}
820 	}
821 #ifdef DEBUG
822 	iommu_dvmamap_validate_map(t, is, map);
823 
824 	if (err)
825 		printf("**** iommu_dvmamap_load failed with error %d\n",
826 		    err);
827 
828 	if (err || (iommudebug & IDB_PRINT_MAP)) {
829 		iommu_dvmamap_print_map(t, is, map);
830 #ifdef DDB
831 		if (iommudebug & IDB_BREAK)
832 			Debugger();
833 #endif
834 	}
835 #endif
836 	if (err)
837 		iommu_dvmamap_unload(t, t0, map);
838 
839 	return (err);
840 }
841 
842 /*
843  * Load a dvmamap from an array of segs or an mlist (if the first
844  * "segs" entry's mlist is non-null).  It calls iommu_dvmamap_load_segs()
845  * or iommu_dvmamap_load_mlist() for part of the 2nd pass through the
846  * mapping.  This is ugly.  A better solution would probably be to have
847  * function pointers for implementing the traversal.  That way, there
848  * could be one core load routine for each of the three required algorithms
849  * (buffer, seg, and mlist).  That would also mean that the traversal
850  * algorithm would then only need one implementation for each algorithm
851  * instead of two (one for populating the iomap and one for populating
852  * the dvma map).
853  */
854 int
855 iommu_dvmamap_load_raw(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
856     bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
857 {
858 	int i;
859 	int left;
860 	int err = 0;
861 	bus_size_t sgsize;
862 	bus_size_t boundary, align;
863 	u_long dvmaddr, sgstart, sgend;
864 	struct iommu_state *is;
865 	struct iommu_map_state *ims = map->_dm_cookie;
866 
867 #ifdef DIAGNOSTIC
868 	if (ims == NULL)
869 		panic("iommu_dvmamap_load_raw: null map state");
870 #endif
871 #ifdef DEBUG
872 	if (ims->ims_sb == NULL)
873 		panic("iommu_dvmamap_load_raw: null sb");
874 	if (ims->ims_sb->sb_iommu == NULL)
875 		panic("iommu_dvmamap_load_raw: null iommu");
876 #endif /* DEBUG */
877 	is = ims->ims_sb->sb_iommu;
878 
879 	if (map->dm_nsegs) {
880 		/* Already in use?? */
881 #ifdef DIAGNOSTIC
882 		panic("iommu_dvmamap_load_raw: map still in use");
883 #endif
884 		bus_dmamap_unload(t0, map);
885 	}
886 
887 	/*
888 	 * A boundary presented to bus_dmamem_alloc() takes precedence
889 	 * over boundary in the map.
890 	 */
891 	if ((boundary = segs[0]._ds_boundary) == 0)
892 		boundary = map->_dm_boundary;
893 
894 	align = MAX(segs[0]._ds_align, PAGE_SIZE);
895 
896 	/*
897 	 * Make sure that on error condition we return "no valid mappings".
898 	 */
899 	map->dm_nsegs = 0;
900 
901 	iommu_iomap_clear_pages(ims);
902 	if (segs[0]._ds_mlist) {
903 		struct pglist *mlist = segs[0]._ds_mlist;
904 		struct vm_page *m;
905 		for (m = TAILQ_FIRST(mlist); m != NULL;
906 		    m = TAILQ_NEXT(m,pageq)) {
907 			err = iommu_iomap_insert_page(ims, VM_PAGE_TO_PHYS(m));
908 
909 			if(err) {
910 				printf("iomap insert error: %d for "
911 				    "pa 0x%lx\n", err, VM_PAGE_TO_PHYS(m));
912 				iommu_dvmamap_print_map(t, is, map);
913 				iommu_iomap_clear_pages(ims);
914 				return (EFBIG);
915 			}
916 		}
917 	} else {
918 		/* Count up the total number of pages we need */
919 		for (i = 0, left = size; left > 0 && i < nsegs; i++) {
920 			bus_addr_t a, aend;
921 			bus_size_t len = segs[i].ds_len;
922 			bus_addr_t addr = segs[i].ds_addr;
923 			int seg_len = MIN(left, len);
924 
925 			if (len < 1)
926 				continue;
927 
928 			aend = round_page(addr + seg_len);
929 			for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
930 
931 				err = iommu_iomap_insert_page(ims, a);
932 				if (err) {
933 					printf("iomap insert error: %d for "
934 					    "pa 0x%llx\n", err, a);
935 					iommu_dvmamap_print_map(t, is, map);
936 					iommu_iomap_clear_pages(ims);
937 					return (EFBIG);
938 				}
939 			}
940 
941 			left -= seg_len;
942 		}
943 	}
944 	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
945 
946 	mtx_enter(&is->is_mtx);
947 	if (flags & BUS_DMA_24BIT) {
948 		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
949 		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
950 	} else {
951 		sgstart = is->is_dvmamap->ex_start;
952 		sgend = is->is_dvmamap->ex_end;
953 	}
954 
955 	/*
956 	 * If our segment size is larger than the boundary we need to
957 	 * split the transfer up into little pieces ourselves.
958 	 */
959 	err = extent_alloc_subregion(is->is_dvmamap, sgstart, sgend,
960 	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
961 	    EX_NOWAIT | EX_BOUNDZERO, (u_long *)&dvmaddr);
962 	mtx_leave(&is->is_mtx);
963 
964 	if (err != 0) {
965 		iommu_iomap_clear_pages(ims);
966 		return (err);
967 	}
968 
969 #ifdef DEBUG
970 	if (dvmaddr == (bus_addr_t)-1)	{
971 		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) "
972 		    "failed!\n", (int)sgsize, flags);
973 #ifdef DDB
974 		if (iommudebug & IDB_BREAK)
975 			Debugger();
976 #else
977 		panic("");
978 #endif
979 	}
980 #endif
981 
982 	/* Set the active DVMA map */
983 	map->_dm_dvmastart = dvmaddr;
984 	map->_dm_dvmasize = sgsize;
985 
986 	map->dm_mapsize = size;
987 
988 #ifdef DEBUG
989 	iommu_dvmamap_validate_map(t, is, map);
990 #endif
991 
992 	iommu_iomap_load_map(is, ims, dvmaddr, flags);
993 
994 	if (segs[0]._ds_mlist)
995 		err = iommu_dvmamap_load_mlist(t, is, map, segs[0]._ds_mlist,
996 		    flags, size, boundary);
997 	else
998 		err = iommu_dvmamap_load_seg(t, is, map, segs, nsegs,
999 		    flags, size, boundary);
1000 
1001 #ifdef DEBUG
1002 	/* The map should be valid even if the load failed */
1003 	if (iommu_dvmamap_validate_map(t, is, map)) {
1004 		printf("load size %lld/0x%llx\n", size, size);
1005 		if (segs[0]._ds_mlist)
1006 			printf("mlist %p\n", segs[0]._ds_mlist);
1007 		else  {
1008 			long tot_len = 0;
1009 			long clip_len = 0;
1010 			printf("segs %p nsegs %d\n", segs, nsegs);
1011 
1012 			left = size;
1013 			for(i = 0; i < nsegs; i++) {
1014 				bus_size_t len = segs[i].ds_len;
1015 				bus_addr_t addr = segs[i].ds_addr;
1016 				int seg_len = MIN(left, len);
1017 
1018 				printf("addr %llx len %lld/0x%llx seg_len "
1019 				    "%d/0x%x left %d/0x%x\n", addr, len, len,
1020 				    seg_len, seg_len, left, left);
1021 
1022 				left -= seg_len;
1023 
1024 				clip_len += seg_len;
1025 				tot_len += segs[i].ds_len;
1026 			}
1027 			printf("total length %ld/0x%lx total seg. "
1028 			    "length %ld/0x%lx\n", tot_len, tot_len, clip_len,
1029 			    clip_len);
1030 		}
1031 
1032 		if (err == 0)
1033 			err = 1;
1034 	}
1035 
1036 	if (err)
1037 		printf("**** iommu_dvmamap_load_raw failed with error %d\n",
1038 		    err);
1039 
1040 	if (err || (iommudebug & IDB_PRINT_MAP)) {
1041 		iommu_dvmamap_print_map(t, is, map);
1042 #ifdef DDB
1043 		if (iommudebug & IDB_BREAK)
1044 			Debugger();
1045 #endif
1046 	}
1047 #endif
1048 	if (err)
1049 		iommu_dvmamap_unload(t, t0, map);
1050 
1051 	return (err);
1052 }
1053 
1054 /*
1055  * Insert a range of addresses into a loaded map respecting the specified
1056  * boundary and alignment restrictions.  The range is specified by its
1057  * physical address and length.  The range cannot cross a page boundary.
1058  * This code (along with most of the rest of the function in this file)
1059  * assumes that the IOMMU page size is equal to PAGE_SIZE.
1060  */
1061 int
1062 iommu_dvmamap_append_range(bus_dma_tag_t t, bus_dmamap_t map, paddr_t pa,
1063     bus_size_t length, int flags, bus_size_t boundary)
1064 {
1065 	struct iommu_map_state *ims = map->_dm_cookie;
1066 	bus_addr_t sgstart, sgend, bd_mask;
1067 	bus_dma_segment_t *seg = NULL;
1068 	int i = map->dm_nsegs;
1069 
1070 #ifdef DEBUG
1071 	if (ims == NULL)
1072 		panic("iommu_dvmamap_append_range: null map state");
1073 #endif
1074 
1075 	sgstart = iommu_iomap_translate(ims, pa);
1076 	sgend = sgstart + length - 1;
1077 
1078 #ifdef DIAGNOSTIC
1079 	if (sgstart == NULL || sgstart > sgend) {
1080 		printf("append range invalid mapping for %lx "
1081 		    "(0x%llx - 0x%llx)\n", pa, sgstart, sgend);
1082 		map->dm_nsegs = 0;
1083 		return (EINVAL);
1084 	}
1085 #endif
1086 
1087 #ifdef DEBUG
1088 	if (trunc_page(sgstart) != trunc_page(sgend)) {
1089 		printf("append range crossing page boundary! "
1090 		    "pa %lx length %lld/0x%llx sgstart %llx sgend %llx\n",
1091 		    pa, length, length, sgstart, sgend);
1092 	}
1093 #endif
1094 
1095 	/*
1096 	 * We will attempt to merge this range with the previous entry
1097 	 * (if there is one).
1098 	 */
1099 	if (i > 0) {
1100 		seg = &map->dm_segs[i - 1];
1101 		if (sgstart == seg->ds_addr + seg->ds_len) {
1102 			length += seg->ds_len;
1103 			sgstart = seg->ds_addr;
1104 			sgend = sgstart + length - 1;
1105 		} else
1106 			seg = NULL;
1107 	}
1108 
1109 	if (seg == NULL) {
1110 		seg = &map->dm_segs[i];
1111 		if (++i > map->_dm_segcnt) {
1112 			map->dm_nsegs = 0;
1113 			return (EFBIG);
1114 		}
1115 	}
1116 
1117 	/*
1118 	 * At this point, "i" is the index of the *next* bus_dma_segment_t
1119 	 * (the segment count, aka map->dm_nsegs) and "seg" points to the
1120 	 * *current* entry.  "length", "sgstart", and "sgend" reflect what
1121 	 * we intend to put in "*seg".  No assumptions should be made about
1122 	 * the contents of "*seg".  Only "boundary" issue can change this
1123 	 * and "boundary" is often zero, so explicitly test for that case
1124 	 * (the test is strictly an optimization).
1125 	 */
1126 	if (boundary != 0) {
1127 		bd_mask = ~(boundary - 1);
1128 
1129 		while ((sgstart & bd_mask) != (sgend & bd_mask)) {
1130 			/*
1131 			 * We are crossing a boundary so fill in the current
1132 			 * segment with as much as possible, then grab a new
1133 			 * one.
1134 			 */
1135 
1136 			seg->ds_addr = sgstart;
1137 			seg->ds_len = boundary - (sgstart & bd_mask);
1138 
1139 			sgstart += seg->ds_len; /* sgend stays the same */
1140 			length -= seg->ds_len;
1141 
1142 			seg = &map->dm_segs[i];
1143 			if (++i > map->_dm_segcnt) {
1144 				map->dm_nsegs = 0;
1145 				return (EFBIG);
1146 			}
1147 		}
1148 	}
1149 
1150 	seg->ds_addr = sgstart;
1151 	seg->ds_len = length;
1152 	map->dm_nsegs = i;
1153 
1154 	return (0);
1155 }
1156 
1157 /*
1158  * Populate the iomap from a bus_dma_segment_t array.  See note for
1159  * iommu_dvmamap_load() * regarding page entry exhaustion of the iomap.
1160  * This is less of a problem for load_seg, as the number of pages
1161  * is usually similar to the number of segments (nsegs).
1162  */
1163 int
1164 iommu_dvmamap_load_seg(bus_dma_tag_t t, struct iommu_state *is,
1165     bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int flags,
1166     bus_size_t size, bus_size_t boundary)
1167 {
1168 	int i;
1169 	int left;
1170 	int seg;
1171 
1172 	/*
1173 	 * This segs is made up of individual physical
1174 	 * segments, probably by _bus_dmamap_load_uio() or
1175 	 * _bus_dmamap_load_mbuf().  Ignore the mlist and
1176 	 * load each one individually.
1177 	 */
1178 
1179 	/*
1180 	 * Keep in mind that each segment could span
1181 	 * multiple pages and that these are not always
1182 	 * adjacent. The code is no longer adding dvma
1183 	 * aliases to the IOMMU.  The STC will not cross
1184 	 * page boundaries anyway and a IOMMU table walk
1185 	 * vs. what may be a streamed PCI DMA to a ring
1186 	 * descriptor is probably a wash.  It eases TLB
1187 	 * pressure and in the worst possible case, it is
1188 	 * only as bad a non-IOMMUed architecture.  More
1189 	 * importantly, the code is not quite as hairy.
1190 	 * (It's bad enough as it is.)
1191 	 */
1192 	left = size;
1193 	seg = 0;
1194 	for (i = 0; left > 0 && i < nsegs; i++) {
1195 		bus_addr_t a, aend;
1196 		bus_size_t len = segs[i].ds_len;
1197 		bus_addr_t addr = segs[i].ds_addr;
1198 		int seg_len = MIN(left, len);
1199 
1200 		if (len < 1)
1201 			continue;
1202 
1203 		aend = round_page(addr + seg_len);
1204 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
1205 			bus_addr_t pgstart;
1206 			bus_addr_t pgend;
1207 			int pglen;
1208 			int err;
1209 
1210 			pgstart = MAX(a, addr);
1211 			pgend = MIN(a + PAGE_SIZE - 1, addr + seg_len - 1);
1212 			pglen = pgend - pgstart + 1;
1213 
1214 			if (pglen < 1)
1215 				continue;
1216 
1217 			err = iommu_dvmamap_append_range(t, map, pgstart,
1218 			    pglen, flags, boundary);
1219 			if (err == EFBIG)
1220 				return (err);
1221 			if (err) {
1222 				printf("iomap load seg page: %d for "
1223 				    "pa 0x%llx (%llx - %llx for %d/%x\n",
1224 				    err, a, pgstart, pgend, pglen, pglen);
1225 				return (err);
1226 			}
1227 
1228 		}
1229 
1230 		left -= seg_len;
1231 	}
1232 	return (0);
1233 }
1234 
1235 /*
1236  * Populate the iomap from an mlist.  See note for iommu_dvmamap_load()
1237  * regarding page entry exhaustion of the iomap.
1238  */
1239 int
1240 iommu_dvmamap_load_mlist(bus_dma_tag_t t, struct iommu_state *is,
1241     bus_dmamap_t map, struct pglist *mlist, int flags,
1242     bus_size_t size, bus_size_t boundary)
1243 {
1244 	struct vm_page *m;
1245 	paddr_t pa;
1246 	int err;
1247 
1248 	/*
1249 	 * This was allocated with bus_dmamem_alloc.
1250 	 * The pages are on an `mlist'.
1251 	 */
1252 	for (m = TAILQ_FIRST(mlist); m != NULL; m = TAILQ_NEXT(m,pageq)) {
1253 		pa = VM_PAGE_TO_PHYS(m);
1254 
1255 		err = iommu_dvmamap_append_range(t, map, pa, PAGE_SIZE,
1256 		    flags, boundary);
1257 		if (err == EFBIG)
1258 			return (err);
1259 		if (err) {
1260 			printf("iomap load seg page: %d for pa 0x%lx "
1261 			    "(%lx - %lx for %d/%x\n", err, pa, pa,
1262 			    pa + PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1263 			return (err);
1264 		}
1265 	}
1266 
1267 	return (0);
1268 }
1269 
1270 /*
1271  * Unload a dvmamap.
1272  */
1273 void
1274 iommu_dvmamap_unload(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
1275 {
1276 	struct iommu_state *is;
1277 	struct iommu_map_state *ims = map->_dm_cookie;
1278 	bus_addr_t dvmaddr = map->_dm_dvmastart;
1279 	bus_size_t sgsize = map->_dm_dvmasize;
1280 	int error;
1281 
1282 #ifdef DEBUG
1283 	if (ims == NULL)
1284 		panic("iommu_dvmamap_unload: null map state");
1285 	if (ims->ims_sb == NULL)
1286 		panic("iommu_dvmamap_unload: null sb");
1287 	if (ims->ims_sb->sb_iommu == NULL)
1288 		panic("iommu_dvmamap_unload: null iommu");
1289 #endif /* DEBUG */
1290 
1291 	is = ims->ims_sb->sb_iommu;
1292 
1293 	/* Flush the iommu */
1294 #ifdef DEBUG
1295 	if (dvmaddr == 0) {
1296 		printf("iommu_dvmamap_unload: No dvmastart\n");
1297 #ifdef DDB
1298 		if (iommudebug & IDB_BREAK)
1299 			Debugger();
1300 #endif
1301 		return;
1302 	}
1303 
1304 	iommu_dvmamap_validate_map(t, is, map);
1305 
1306 	if (iommudebug & IDB_PRINT_MAP)
1307 		iommu_dvmamap_print_map(t, is, map);
1308 #endif /* DEBUG */
1309 
1310 	/* Remove the IOMMU entries */
1311 	iommu_iomap_unload_map(is, ims);
1312 
1313 	/* Clear the iomap */
1314 	iommu_iomap_clear_pages(ims);
1315 
1316 	bus_dmamap_unload(t->_parent, map);
1317 
1318 	/* Mark the mappings as invalid. */
1319 	map->dm_mapsize = 0;
1320 	map->dm_nsegs = 0;
1321 
1322 	mtx_enter(&is->is_mtx);
1323 	error = extent_free(is->is_dvmamap, dvmaddr, sgsize, EX_NOWAIT);
1324 	map->_dm_dvmastart = 0;
1325 	map->_dm_dvmasize = 0;
1326 	mtx_leave(&is->is_mtx);
1327 	if (error != 0)
1328 		printf("warning: %qd of DVMA space lost\n", sgsize);
1329 }
1330 
1331 #ifdef DEBUG
1332 /*
1333  * Perform internal consistency checking on a dvmamap.
1334  */
1335 int
1336 iommu_dvmamap_validate_map(bus_dma_tag_t t, struct iommu_state *is,
1337     bus_dmamap_t map)
1338 {
1339 	int err = 0;
1340 	int seg;
1341 
1342 	if (trunc_page(map->_dm_dvmastart) != map->_dm_dvmastart) {
1343 		printf("**** dvmastart address not page aligned: %llx",
1344 			map->_dm_dvmastart);
1345 		err = 1;
1346 	}
1347 	if (trunc_page(map->_dm_dvmasize) != map->_dm_dvmasize) {
1348 		printf("**** dvmasize not a multiple of page size: %llx",
1349 			map->_dm_dvmasize);
1350 		err = 1;
1351 	}
1352 	if (map->_dm_dvmastart < is->is_dvmabase ||
1353 	    (round_page(map->_dm_dvmastart + map->_dm_dvmasize) - 1) >
1354 	    is->is_dvmaend) {
1355 		printf("dvmaddr %llx len %llx out of range %x - %x\n",
1356 			    map->_dm_dvmastart, map->_dm_dvmasize,
1357 			    is->is_dvmabase, is->is_dvmaend);
1358 		err = 1;
1359 	}
1360 	for (seg = 0; seg < map->dm_nsegs; seg++) {
1361 		if (map->dm_segs[seg].ds_addr == 0 ||
1362 		    map->dm_segs[seg].ds_len == 0) {
1363 			printf("seg %d null segment dvmaddr %llx len %llx for "
1364 			    "range %llx len %llx\n",
1365 			    seg,
1366 			    map->dm_segs[seg].ds_addr,
1367 			    map->dm_segs[seg].ds_len,
1368 			    map->_dm_dvmastart, map->_dm_dvmasize);
1369 			err = 1;
1370 		} else if (map->dm_segs[seg].ds_addr < map->_dm_dvmastart ||
1371 		    round_page(map->dm_segs[seg].ds_addr +
1372 			map->dm_segs[seg].ds_len) >
1373 		    map->_dm_dvmastart + map->_dm_dvmasize) {
1374 			printf("seg %d dvmaddr %llx len %llx out of "
1375 			    "range %llx len %llx\n",
1376 			    seg,
1377 			    map->dm_segs[seg].ds_addr,
1378 			    map->dm_segs[seg].ds_len,
1379 			    map->_dm_dvmastart, map->_dm_dvmasize);
1380 			err = 1;
1381 		}
1382 	}
1383 
1384 	if (err) {
1385 		iommu_dvmamap_print_map(t, is, map);
1386 #if defined(DDB) && defined(DEBUG)
1387 		if (iommudebug & IDB_BREAK)
1388 			Debugger();
1389 #endif
1390 	}
1391 
1392 	return (err);
1393 }
1394 #endif /* DEBUG */
1395 
1396 void
1397 iommu_dvmamap_print_map(bus_dma_tag_t t, struct iommu_state *is,
1398     bus_dmamap_t map)
1399 {
1400 	int seg, i;
1401 	long full_len, source_len;
1402 	struct mbuf *m;
1403 
1404 	printf("DVMA %x for %x, mapping %p: dvstart %llx dvsize %llx "
1405 	    "size %lld/%llx maxsegsz %llx boundary %llx segcnt %d "
1406 	    "flags %x type %d source %p "
1407 	    "cookie %p mapsize %llx nsegs %d\n",
1408 	    is ? is->is_dvmabase : 0, is ? is->is_dvmaend : 0, map,
1409 	    map->_dm_dvmastart, map->_dm_dvmasize,
1410 	    map->_dm_size, map->_dm_size, map->_dm_maxsegsz, map->_dm_boundary,
1411 	    map->_dm_segcnt, map->_dm_flags, map->_dm_type,
1412 	    map->_dm_source, map->_dm_cookie, map->dm_mapsize,
1413 	    map->dm_nsegs);
1414 
1415 	full_len = 0;
1416 	for (seg = 0; seg < map->dm_nsegs; seg++) {
1417 		printf("seg %d dvmaddr %llx pa %lx len %llx (tte %llx)\n",
1418 		    seg, map->dm_segs[seg].ds_addr,
1419 		    is ? iommu_extract(is, map->dm_segs[seg].ds_addr) : 0,
1420 		    map->dm_segs[seg].ds_len,
1421 		    is ? iommu_lookup_tte(is, map->dm_segs[seg].ds_addr) : 0);
1422 		full_len += map->dm_segs[seg].ds_len;
1423 	}
1424 	printf("total length = %ld/0x%lx\n", full_len, full_len);
1425 
1426 	if (map->_dm_source) switch (map->_dm_type) {
1427 	case _DM_TYPE_MBUF:
1428 		m = map->_dm_source;
1429 		if (m->m_flags & M_PKTHDR)
1430 			printf("source PKTHDR mbuf (%p) hdr len = %d/0x%x:\n",
1431 			    m, m->m_pkthdr.len, m->m_pkthdr.len);
1432 		else
1433 			printf("source mbuf (%p):\n", m);
1434 
1435 		source_len = 0;
1436 		for ( ; m; m = m->m_next) {
1437 			vaddr_t vaddr = mtod(m, vaddr_t);
1438 			long len = m->m_len;
1439 			paddr_t pa;
1440 
1441 			if (pmap_extract(pmap_kernel(), vaddr, &pa))
1442 				printf("kva %lx pa %lx len %ld/0x%lx\n",
1443 				    vaddr, pa, len, len);
1444 			else
1445 				printf("kva %lx pa <invalid> len %ld/0x%lx\n",
1446 				    vaddr, len, len);
1447 
1448 			source_len += len;
1449 		}
1450 
1451 		if (full_len != source_len)
1452 			printf("mbuf length %ld/0x%lx is %s than mapping "
1453 			    "length %ld/0x%lx\n", source_len, source_len,
1454 			    (source_len > full_len) ? "greater" : "less",
1455 			    full_len, full_len);
1456 		else
1457 			printf("mbuf length %ld/0x%lx\n", source_len,
1458 			    source_len);
1459 		break;
1460 	case _DM_TYPE_LOAD:
1461 	case _DM_TYPE_SEGS:
1462 	case _DM_TYPE_UIO:
1463 	default:
1464 		break;
1465 	}
1466 
1467 	if (map->_dm_cookie) {
1468 		struct iommu_map_state *ims = map->_dm_cookie;
1469 		struct iommu_page_map *ipm = &ims->ims_map;
1470 
1471 		printf("page map (%p) of size %d with %d entries\n",
1472 		    ipm, ipm->ipm_maxpage, ipm->ipm_pagecnt);
1473 		for (i = 0; i < ipm->ipm_pagecnt; ++i) {
1474 			struct iommu_page_entry *e = &ipm->ipm_map[i];
1475 			printf("%d: vmaddr 0x%lx pa 0x%lx\n", i,
1476 			    e->ipe_va, e->ipe_pa);
1477 		}
1478 	} else
1479 		printf("iommu map state (cookie) is NULL\n");
1480 }
1481 
1482 void
1483 _iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
1484 	bus_addr_t offset, bus_size_t len, int ops)
1485 {
1486 	struct iommu_state *is;
1487 	struct iommu_map_state *ims = map->_dm_cookie;
1488 	struct strbuf_ctl *sb;
1489 	bus_size_t count;
1490 	int i, needsflush = 0;
1491 
1492 	sb = ims->ims_sb;
1493 	is = sb->sb_iommu;
1494 
1495 	for (i = 0; i < map->dm_nsegs; i++) {
1496 		if (offset < map->dm_segs[i].ds_len)
1497 			break;
1498 		offset -= map->dm_segs[i].ds_len;
1499 	}
1500 
1501 	if (i == map->dm_nsegs)
1502 		panic("iommu_dvmamap_sync: too short %llu", offset);
1503 
1504 	for (; len > 0 && i < map->dm_nsegs; i++) {
1505 		count = MIN(map->dm_segs[i].ds_len - offset, len);
1506 		if (count > 0 && iommu_dvmamap_sync_range(sb,
1507 		    map->dm_segs[i].ds_addr + offset, count))
1508 			needsflush = 1;
1509 		len -= count;
1510 	}
1511 
1512 #ifdef DIAGNOSTIC
1513 	if (i == map->dm_nsegs && len > 0)
1514 		panic("iommu_dvmamap_sync: leftover %llu", len);
1515 #endif
1516 
1517 	if (needsflush)
1518 		iommu_strbuf_flush_done(ims);
1519 }
1520 
1521 void
1522 iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
1523     bus_addr_t offset, bus_size_t len, int ops)
1524 {
1525 	struct iommu_map_state *ims = map->_dm_cookie;
1526 
1527 #ifdef DIAGNOSTIC
1528 	if (ims == NULL)
1529 		panic("iommu_dvmamap_sync: null map state");
1530 	if (ims->ims_sb == NULL)
1531 		panic("iommu_dvmamap_sync: null sb");
1532 	if (ims->ims_sb->sb_iommu == NULL)
1533 		panic("iommu_dvmamap_sync: null iommu");
1534 #endif
1535 	if (len == 0)
1536 		return;
1537 
1538 	if (ops & BUS_DMASYNC_PREWRITE)
1539 		membar(MemIssue);
1540 
1541 	if ((ims->ims_flags & IOMMU_MAP_STREAM) &&
1542 	    (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)))
1543 		_iommu_dvmamap_sync(t, t0, map, offset, len, ops);
1544 
1545 	if (ops & BUS_DMASYNC_POSTREAD)
1546 		membar(MemIssue);
1547 }
1548 
1549 /*
1550  * Flush an individual dma segment, returns non-zero if the streaming buffers
1551  * need flushing afterwards.
1552  */
1553 int
1554 iommu_dvmamap_sync_range(struct strbuf_ctl *sb, bus_addr_t va, bus_size_t len)
1555 {
1556 	bus_addr_t vaend;
1557 #ifdef DIAGNOSTIC
1558 	struct iommu_state *is = sb->sb_iommu;
1559 
1560 	if (va < is->is_dvmabase || va > is->is_dvmaend)
1561 		panic("invalid va: %llx", (long long)va);
1562 
1563 	if ((is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)] & IOTTE_STREAM) == 0) {
1564 		printf("iommu_dvmamap_sync_range: attempting to flush "
1565 		    "non-streaming entry\n");
1566 		return (0);
1567 	}
1568 #endif
1569 
1570 	vaend = (va + len + PAGE_MASK) & ~PAGE_MASK;
1571 	va &= ~PAGE_MASK;
1572 
1573 #ifdef DIAGNOSTIC
1574 	if (va < is->is_dvmabase || (vaend - 1) > is->is_dvmaend)
1575 		panic("invalid va range: %llx to %llx (%x to %x)",
1576 		    (long long)va, (long long)vaend,
1577 		    is->is_dvmabase,
1578 		    is->is_dvmaend);
1579 #endif
1580 
1581 	for ( ; va <= vaend; va += PAGE_SIZE) {
1582 		DPRINTF(IDB_BUSDMA,
1583 		    ("iommu_dvmamap_sync_range: flushing va %p\n",
1584 		    (void *)(u_long)va));
1585 		iommu_strbuf_flush(sb, va);
1586 	}
1587 
1588 	return (1);
1589 }
1590 
1591 int
1592 iommu_dvmamem_alloc(bus_dma_tag_t t, bus_dma_tag_t t0, bus_size_t size,
1593     bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs,
1594     int nsegs, int *rsegs, int flags)
1595 {
1596 
1597 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx "
1598 	    "bound %llx segp %p flags %d\n", (unsigned long long)size,
1599 	    (unsigned long long)alignment, (unsigned long long)boundary,
1600 	    segs, flags));
1601 	BUS_DMA_FIND_PARENT(t, _dmamem_alloc);
1602 	return ((*t->_dmamem_alloc)(t, t0, size, alignment, boundary,
1603 	    segs, nsegs, rsegs, flags | BUS_DMA_DVMA));
1604 }
1605 
1606 void
1607 iommu_dvmamem_free(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dma_segment_t *segs,
1608     int nsegs)
1609 {
1610 
1611 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
1612 	    segs, nsegs));
1613 	BUS_DMA_FIND_PARENT(t, _dmamem_free);
1614 	(*t->_dmamem_free)(t, t0, segs, nsegs);
1615 }
1616 
1617 /*
1618  * Create a new iomap.
1619  */
1620 struct iommu_map_state *
1621 iommu_iomap_create(int n)
1622 {
1623 	struct iommu_map_state *ims;
1624 	struct strbuf_flush *sbf;
1625 	vaddr_t va;
1626 
1627 	/* Safety for heavily fragmented data, such as mbufs */
1628 	n += 4;
1629 	if (n < 16)
1630 		n = 16;
1631 
1632 	ims = malloc(sizeof(*ims) + (n - 1) * sizeof(ims->ims_map.ipm_map[0]),
1633 		M_DEVBUF, M_NOWAIT | M_ZERO);
1634 	if (ims == NULL)
1635 		return (NULL);
1636 
1637 	/* Initialize the map. */
1638 	ims->ims_map.ipm_maxpage = n;
1639 	SPLAY_INIT(&ims->ims_map.ipm_tree);
1640 
1641 	/* Initialize the flush area. */
1642 	sbf = &ims->ims_flush;
1643 	va = (vaddr_t)&sbf->sbf_area[0x40];
1644 	va &= ~0x3f;
1645 	pmap_extract(pmap_kernel(), va, &sbf->sbf_flushpa);
1646 	sbf->sbf_flush = (void *)va;
1647 
1648 	return (ims);
1649 }
1650 
1651 /*
1652  * Destroy an iomap.
1653  */
1654 void
1655 iommu_iomap_destroy(struct iommu_map_state *ims)
1656 {
1657 #ifdef DIAGNOSTIC
1658 	if (ims->ims_map.ipm_pagecnt > 0)
1659 		printf("iommu_iomap_destroy: %d page entries in use\n",
1660 		    ims->ims_map.ipm_pagecnt);
1661 #endif
1662 
1663 	free(ims, M_DEVBUF);
1664 }
1665 
1666 /*
1667  * Utility function used by splay tree to order page entries by pa.
1668  */
1669 static inline int
1670 iomap_compare(struct iommu_page_entry *a, struct iommu_page_entry *b)
1671 {
1672 	return ((a->ipe_pa > b->ipe_pa) ? 1 :
1673 		(a->ipe_pa < b->ipe_pa) ? -1 : 0);
1674 }
1675 
1676 SPLAY_PROTOTYPE(iommu_page_tree, iommu_page_entry, ipe_node, iomap_compare);
1677 
1678 SPLAY_GENERATE(iommu_page_tree, iommu_page_entry, ipe_node, iomap_compare);
1679 
1680 /*
1681  * Insert a pa entry in the iomap.
1682  */
1683 int
1684 iommu_iomap_insert_page(struct iommu_map_state *ims, paddr_t pa)
1685 {
1686 	struct iommu_page_map *ipm = &ims->ims_map;
1687 	struct iommu_page_entry *e;
1688 
1689 	if (ipm->ipm_pagecnt >= ipm->ipm_maxpage) {
1690 		struct iommu_page_entry ipe;
1691 
1692 		ipe.ipe_pa = pa;
1693 		if (SPLAY_FIND(iommu_page_tree, &ipm->ipm_tree, &ipe))
1694 			return (0);
1695 
1696 		return (ENOMEM);
1697 	}
1698 
1699 	e = &ipm->ipm_map[ipm->ipm_pagecnt];
1700 
1701 	e->ipe_pa = pa;
1702 	e->ipe_va = NULL;
1703 
1704 	e = SPLAY_INSERT(iommu_page_tree, &ipm->ipm_tree, e);
1705 
1706 	/* Duplicates are okay, but only count them once. */
1707 	if (e)
1708 		return (0);
1709 
1710 	++ipm->ipm_pagecnt;
1711 
1712 	return (0);
1713 }
1714 
1715 /*
1716  * Locate the iomap by filling in the pa->va mapping and inserting it
1717  * into the IOMMU tables.
1718  */
1719 void
1720 iommu_iomap_load_map(struct iommu_state *is, struct iommu_map_state *ims,
1721     bus_addr_t vmaddr, int flags)
1722 {
1723 	struct iommu_page_map *ipm = &ims->ims_map;
1724 	struct iommu_page_entry *e;
1725 	struct strbuf_ctl *sb = ims->ims_sb;
1726 	int i, slot;
1727 
1728 	if (sb->sb_flush == NULL)
1729 		flags &= ~BUS_DMA_STREAMING;
1730 
1731 	if (flags & BUS_DMA_STREAMING)
1732 		ims->ims_flags |= IOMMU_MAP_STREAM;
1733 	else
1734 		ims->ims_flags &= ~IOMMU_MAP_STREAM;
1735 
1736 	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e) {
1737 		e->ipe_va = vmaddr;
1738 		iommu_enter(is, sb, e->ipe_va, e->ipe_pa, flags);
1739 
1740 		/* Flush cache if necessary. */
1741 		slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
1742 		if (is->is_flags & IOMMU_FLUSH_CACHE &&
1743 		    (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
1744 			IOMMUREG_WRITE(is, iommu_cache_flush,
1745 			    is->is_ptsb + slot * 8);
1746 
1747 		vmaddr += PAGE_SIZE;
1748 	}
1749 }
1750 
1751 /*
1752  * Remove the iomap from the IOMMU.
1753  */
1754 void
1755 iommu_iomap_unload_map(struct iommu_state *is, struct iommu_map_state *ims)
1756 {
1757 	struct iommu_page_map *ipm = &ims->ims_map;
1758 	struct iommu_page_entry *e;
1759 	struct strbuf_ctl *sb = ims->ims_sb;
1760 	int i, slot;
1761 
1762 	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e) {
1763 		iommu_remove(is, sb, e->ipe_va);
1764 
1765 		/* Flush cache if necessary. */
1766 		slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
1767 		if (is->is_flags & IOMMU_FLUSH_CACHE &&
1768 		    (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
1769 			IOMMUREG_WRITE(is, iommu_cache_flush,
1770 			    is->is_ptsb + slot * 8);
1771 	}
1772 }
1773 
1774 /*
1775  * Translate a physical address (pa) into a DVMA address.
1776  */
1777 bus_addr_t
1778 iommu_iomap_translate(struct iommu_map_state *ims, paddr_t pa)
1779 {
1780 	struct iommu_page_map *ipm = &ims->ims_map;
1781 	struct iommu_page_entry *e;
1782 	struct iommu_page_entry pe;
1783 	paddr_t offset = pa & PAGE_MASK;
1784 
1785 	pe.ipe_pa = trunc_page(pa);
1786 
1787 	e = SPLAY_FIND(iommu_page_tree, &ipm->ipm_tree, &pe);
1788 
1789 	if (e == NULL)
1790 		return (NULL);
1791 
1792 	return (e->ipe_va | offset);
1793 }
1794 
1795 /*
1796  * Clear the iomap table and tree.
1797  */
1798 void
1799 iommu_iomap_clear_pages(struct iommu_map_state *ims)
1800 {
1801 	ims->ims_map.ipm_pagecnt = 0;
1802 	SPLAY_INIT(&ims->ims_map.ipm_tree);
1803 }
1804 
1805