xref: /netbsd/sys/arch/sparc64/dev/iommu.c (revision bf9ec67e)
1 /*	$NetBSD: iommu.c,v 1.51 2002/05/13 21:01:15 eeh Exp $	*/
2 
3 /*
4  * Copyright (c) 2001, 2002 Eduardo Horvath
5  * Copyright (c) 1999, 2000 Matthew R. Green
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * UltraSPARC IOMMU support; used by both the sbus and pci code.
34  */
35 #include "opt_ddb.h"
36 
37 #include <sys/param.h>
38 #include <sys/extent.h>
39 #include <sys/malloc.h>
40 #include <sys/systm.h>
41 #include <sys/device.h>
42 #include <sys/proc.h>
43 
44 #include <uvm/uvm_extern.h>
45 
46 #include <machine/bus.h>
47 #include <sparc64/sparc64/cache.h>
48 #include <sparc64/dev/iommureg.h>
49 #include <sparc64/dev/iommuvar.h>
50 
51 #include <machine/autoconf.h>
52 #include <machine/cpu.h>
53 
54 #ifdef DEBUG
55 #define IDB_BUSDMA	0x1
56 #define IDB_IOMMU	0x2
57 #define IDB_INFO	0x4
58 #define	IDB_SYNC	0x8
59 int iommudebug = 0x0;
60 #define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
61 #else
62 #define DPRINTF(l, s)
63 #endif
64 
65 #define iommu_strbuf_flush(i,v) do {					\
66 	if ((i)->is_sbvalid[0])						\
67 		bus_space_write_8((i)->is_bustag, (i)->is_sb[0],	\
68 			STRBUFREG(strbuf_pgflush), (v));		\
69 	if ((i)->is_sbvalid[1])						\
70 		bus_space_write_8((i)->is_bustag, (i)->is_sb[1],	\
71 			STRBUFREG(strbuf_pgflush), (v));		\
72 	} while (0)
73 
74 static	int iommu_strbuf_flush_done __P((struct iommu_state *));
75 
76 /*
77  * initialise the UltraSPARC IOMMU (SBUS or PCI):
78  *	- allocate and setup the iotsb.
79  *	- enable the IOMMU
80  *	- initialise the streaming buffers (if they exist)
81  *	- create a private DVMA map.
82  */
83 void
84 iommu_init(name, is, tsbsize, iovabase)
85 	char *name;
86 	struct iommu_state *is;
87 	int tsbsize;
88 	u_int32_t iovabase;
89 {
90 	psize_t size;
91 	vaddr_t va;
92 	paddr_t pa;
93 	struct vm_page *m;
94 	struct pglist mlist;
95 
96 	/*
97 	 * Setup the iommu.
98 	 *
99 	 * The sun4u iommu is part of the SBUS or PCI controller so we will
100 	 * deal with it here..
101 	 *
102 	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
103 	 * 0xffffe000, but the starting address depends on the size of the
104 	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
105 	 * entry is 8 bytes.  The start of the map can be calculated by
106 	 * (0xffffe000 << (8 + is->is_tsbsize)).
107 	 *
108 	 * But sabre and hummingbird use a different scheme that seems to
109 	 * be hard-wired, so we read the start and size from the PROM and
110 	 * just use those values.
111 	 */
112 	is->is_cr = (tsbsize << 16) | IOMMUCR_EN;
113 	is->is_tsbsize = tsbsize;
114 	if (iovabase == -1) {
115 		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
116 		is->is_dvmaend = IOTSB_VEND;
117 	} else {
118 		is->is_dvmabase = iovabase;
119 		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize);
120 	}
121 
122 	/*
123 	 * Allocate memory for I/O pagetables.  They need to be physically
124 	 * contiguous.
125 	 */
126 
127 	size = NBPG<<(is->is_tsbsize);
128 	TAILQ_INIT(&mlist);
129 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
130 		(paddr_t)NBPG, (paddr_t)0, &mlist, 1, 0) != 0)
131 		panic("iommu_init: no memory");
132 
133 	va = uvm_km_valloc(kernel_map, size);
134 	if (va == 0)
135 		panic("iommu_init: no memory");
136 	is->is_tsb = (int64_t *)va;
137 
138 	m = TAILQ_FIRST(&mlist);
139 	is->is_ptsb = VM_PAGE_TO_PHYS(m);
140 
141 	/* Map the pages */
142 	for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
143 		pa = VM_PAGE_TO_PHYS(m);
144 		pmap_enter(pmap_kernel(), va, pa | PMAP_NVC,
145 			VM_PROT_READ|VM_PROT_WRITE,
146 			VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED);
147 		va += NBPG;
148 	}
149 	pmap_update(pmap_kernel());
150 	bzero(is->is_tsb, size);
151 
152 #ifdef DEBUG
153 	if (iommudebug & IDB_INFO)
154 	{
155 		/* Probe the iommu */
156 
157 		printf("iommu regs at: cr=%lx tsb=%lx flush=%lx\n",
158 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
159 				offsetof (struct iommureg, iommu_cr)),
160 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
161 				offsetof (struct iommureg, iommu_tsb)),
162 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
163 				offsetof (struct iommureg, iommu_flush)));
164 		printf("iommu cr=%llx tsb=%llx\n",
165 			(unsigned long long)bus_space_read_8(is->is_bustag,
166 				is->is_iommu,
167 				offsetof (struct iommureg, iommu_cr)),
168 			(unsigned long long)bus_space_read_8(is->is_bustag,
169 				is->is_iommu,
170 				offsetof (struct iommureg, iommu_tsb)));
171 		printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
172 			(unsigned long long)is->is_ptsb);
173 		delay(1000000); /* 1 s */
174 	}
175 #endif
176 
177 	/*
178 	 * Initialize streaming buffer, if it is there.
179 	 */
180 	if (is->is_sbvalid[0] || is->is_sbvalid[1])
181 		(void)pmap_extract(pmap_kernel(), (vaddr_t)&is->is_flush[0],
182 		    (paddr_t *)&is->is_flushpa);
183 
184 	/*
185 	 * now actually start up the IOMMU
186 	 */
187 	iommu_reset(is);
188 
189 	/*
190 	 * Now all the hardware's working we need to allocate a dvma map.
191 	 */
192 	printf("DVMA map: %x to %x\n",
193 		(unsigned int)is->is_dvmabase,
194 		(unsigned int)is->is_dvmaend);
195 	printf("IOTSB: %llx to %llx\n",
196 		(unsigned long long)is->is_ptsb,
197 		(unsigned long long)(is->is_ptsb + size));
198 	is->is_dvmamap = extent_create(name,
199 				       is->is_dvmabase, is->is_dvmaend - NBPG,
200 				       M_DEVBUF, 0, 0, EX_NOWAIT);
201 }
202 
203 /*
204  * Streaming buffers don't exist on the UltraSPARC IIi; we should have
205  * detected that already and disabled them.  If not, we will notice that
206  * they aren't there when the STRBUF_EN bit does not remain.
207  */
208 void
209 iommu_reset(is)
210 	struct iommu_state *is;
211 {
212 	int i;
213 
214 	/* Need to do 64-bit stores */
215 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb),
216 		is->is_ptsb);
217 
218 	/* Enable IOMMU in diagnostic mode */
219 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_cr),
220 		is->is_cr|IOMMUCR_DE);
221 
222 	for (i=0; i<2; i++) {
223 		if (is->is_sbvalid[i]) {
224 
225 			/* Enable diagnostics mode? */
226 			bus_space_write_8(is->is_bustag, is->is_sb[i],
227 				STRBUFREG(strbuf_ctl), STRBUF_EN);
228 
229 			/* No streaming buffers? Disable them */
230 			if (bus_space_read_8(is->is_bustag, is->is_sb[i],
231 				STRBUFREG(strbuf_ctl)) == 0)
232 				is->is_sbvalid[i] = 0;
233 		}
234 	}
235 }
236 
237 /*
238  * Here are the iommu control routines.
239  */
240 void
241 iommu_enter(is, va, pa, flags)
242 	struct iommu_state *is;
243 	vaddr_t va;
244 	int64_t pa;
245 	int flags;
246 {
247 	int64_t tte;
248 
249 #ifdef DIAGNOSTIC
250 	if (va < is->is_dvmabase || va > is->is_dvmaend)
251 		panic("iommu_enter: va %#lx not in DVMA space", va);
252 #endif
253 
254 	tte = MAKEIOTTE(pa, !(flags&BUS_DMA_NOWRITE), !(flags&BUS_DMA_NOCACHE),
255 			(flags&BUS_DMA_STREAMING));
256 #ifdef DEBUG
257 	tte |= (flags & 0xff000LL)<<(4*8);
258 #endif
259 
260 	/* Is the streamcache flush really needed? */
261 	if (is->is_sbvalid[0] || is->is_sbvalid[1]) {
262 		iommu_strbuf_flush(is, va);
263 		iommu_strbuf_flush_done(is);
264 	}
265 	DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
266 		       (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
267 	is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] = tte;
268 	bus_space_write_8(is->is_bustag, is->is_iommu,
269 		IOMMUREG(iommu_flush), va);
270 	DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
271 		va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
272 		(void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
273 		(u_long)tte));
274 }
275 
276 
277 /*
278  * Find the value of a DVMA address (debug routine).
279  */
280 paddr_t
281 iommu_extract(is, dva)
282 	struct iommu_state *is;
283 	vaddr_t dva;
284 {
285 	int64_t tte = 0;
286 
287 	if (dva >= is->is_dvmabase && dva < is->is_dvmaend)
288 		tte = is->is_tsb[IOTSBSLOT(dva,is->is_tsbsize)];
289 
290 	if ((tte&IOTTE_V) == 0)
291 		return ((paddr_t)-1L);
292 	return (tte&IOTTE_PAMASK);
293 }
294 
295 /*
296  * iommu_remove: removes mappings created by iommu_enter
297  *
298  * Only demap from IOMMU if flag is set.
299  *
300  * XXX: this function needs better internal error checking.
301  */
302 void
303 iommu_remove(is, va, len)
304 	struct iommu_state *is;
305 	vaddr_t va;
306 	size_t len;
307 {
308 
309 #ifdef DIAGNOSTIC
310 	if (va < is->is_dvmabase || va > is->is_dvmaend)
311 		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
312 	if ((long)(va + len) < (long)va)
313 		panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
314 		      (long) va, (long) len);
315 	if (len & ~0xfffffff)
316 		panic("iommu_remove: rediculous len 0x%lx", (u_long)len);
317 #endif
318 
319 	va = trunc_page(va);
320 	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%lx]@%p\n",
321 		va, (u_long)IOTSBSLOT(va, is->is_tsbsize),
322 		&is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)]));
323 	while (len > 0) {
324 		DPRINTF(IDB_IOMMU, ("iommu_remove: clearing TSB slot %d "
325 			"for va %p size %lx\n",
326 			(int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va,
327 			(u_long)len));
328 		if (is->is_sbvalid[0] || is->is_sbvalid[1]) {
329 			DPRINTF(IDB_IOMMU, ("iommu_remove: flushing va %p "
330 				"TSB[%lx]@%p=%lx, %lu bytes left\n",
331 				(void *)(u_long)va,
332 				(long)IOTSBSLOT(va,is->is_tsbsize),
333 				(void *)(u_long)&is->is_tsb[IOTSBSLOT(va,
334 					is->is_tsbsize)],
335 				(long)(is->is_tsb[IOTSBSLOT(va,
336 					is->is_tsbsize)]),
337 				(u_long)len));
338 			iommu_strbuf_flush(is, va);
339 			if (len <= NBPG)
340 				iommu_strbuf_flush_done(is);
341 			DPRINTF(IDB_IOMMU, ("iommu_remove: flushed va %p TSB[%lx]@%p=%lx, %lu bytes left\n",
342 			       (void *)(u_long)va, (long)IOTSBSLOT(va,is->is_tsbsize),
343 			       (void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
344 			       (long)(is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)]),
345 			       (u_long)len));
346 		}
347 
348 		if (len <= NBPG)
349 			len = 0;
350 		else
351 			len -= NBPG;
352 
353 		/* XXX Zero-ing the entry would not require RMW */
354 		is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] &= ~IOTTE_V;
355 		bus_space_write_8(is->is_bustag, is->is_iommu,
356 			IOMMUREG(iommu_flush), va);
357 		va += NBPG;
358 	}
359 }
360 
361 static int
362 iommu_strbuf_flush_done(is)
363 	struct iommu_state *is;
364 {
365 	struct timeval cur, flushtimeout;
366 
367 #define BUMPTIME(t, usec) { \
368 	register volatile struct timeval *tp = (t); \
369 	register long us; \
370  \
371 	tp->tv_usec = us = tp->tv_usec + (usec); \
372 	if (us >= 1000000) { \
373 		tp->tv_usec = us - 1000000; \
374 		tp->tv_sec++; \
375 	} \
376 }
377 
378 	if (!is->is_sbvalid[0] && !is->is_sbvalid[1])
379 		return (0);
380 
381 	/*
382 	 * Streaming buffer flushes:
383 	 *
384 	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.  If
385 	 *     we're not on a cache line boundary (64-bits):
386 	 *   2 Store 0 in flag
387 	 *   3 Store pointer to flag in flushsync
388 	 *   4 wait till flushsync becomes 0x1
389 	 *
390 	 * If it takes more than .5 sec, something
391 	 * went wrong.
392 	 */
393 
394 	is->is_flush[0] = 1;
395 	is->is_flush[1] = 1;
396 	if (is->is_sbvalid[0]) {
397 		is->is_flush[0] = 0;
398 		bus_space_write_8(is->is_bustag, is->is_sb[0],
399 			STRBUFREG(strbuf_flushsync), is->is_flushpa);
400 	}
401 	if (is->is_sbvalid[1]) {
402 		is->is_flush[0] = 1;
403 		bus_space_write_8(is->is_bustag, is->is_sb[1],
404 			STRBUFREG(strbuf_flushsync), is->is_flushpa + 8);
405 	}
406 
407 	microtime(&flushtimeout);
408 	cur = flushtimeout;
409 	BUMPTIME(&flushtimeout, 500000); /* 1/2 sec */
410 
411 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flush = %lx,%lx "
412 		"at va = %lx pa = %lx now=%lx:%lx until = %lx:%lx\n",
413 		(long)is->is_flush[0], (long)is->is_flush[1],
414 		(long)&is->is_flush[0], (long)is->is_flushpa,
415 		cur.tv_sec, cur.tv_usec,
416 		flushtimeout.tv_sec, flushtimeout.tv_usec));
417 
418 	/* Bypass non-coherent D$ */
419 	while ((!ldxa(is->is_flushpa, ASI_PHYS_CACHED) ||
420 		!ldxa(is->is_flushpa + 8, ASI_PHYS_CACHED)) &&
421 		((cur.tv_sec <= flushtimeout.tv_sec) &&
422 			(cur.tv_usec <= flushtimeout.tv_usec)))
423 		microtime(&cur);
424 
425 #ifdef DIAGNOSTIC
426 	if (!ldxa(is->is_flushpa, ASI_PHYS_CACHED) ||
427 	    !ldxa(is->is_flushpa + 8, ASI_PHYS_CACHED)) {
428 		printf("iommu_strbuf_flush_done: flush timeout %p,%p at %p\n",
429 			(void *)(u_long)is->is_flush[0],
430 			(void *)(u_long)is->is_flush[1],
431 			(void *)(u_long)is->is_flushpa); /* panic? */
432 #ifdef DDB
433 		Debugger();
434 #endif
435 	}
436 #endif
437 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flushed\n"));
438 	return (is->is_flush[0] && is->is_flush[1]);
439 }
440 
441 /*
442  * IOMMU DVMA operations, common to SBUS and PCI.
443  */
444 int
445 iommu_dvmamap_load(t, is, map, buf, buflen, p, flags)
446 	bus_dma_tag_t t;
447 	struct iommu_state *is;
448 	bus_dmamap_t map;
449 	void *buf;
450 	bus_size_t buflen;
451 	struct proc *p;
452 	int flags;
453 {
454 	int s;
455 	int err;
456 	bus_size_t sgsize;
457 	paddr_t curaddr;
458 	u_long dvmaddr, sgstart, sgend;
459 	bus_size_t align, boundary;
460 	vaddr_t vaddr = (vaddr_t)buf;
461 	int seg;
462 	pmap_t pmap;
463 
464 	if (map->dm_nsegs) {
465 		/* Already in use?? */
466 #ifdef DIAGNOSTIC
467 		printf("iommu_dvmamap_load: map still in use\n");
468 #endif
469 		bus_dmamap_unload(t, map);
470 	}
471 	/*
472 	 * Make sure that on error condition we return "no valid mappings".
473 	 */
474 	map->dm_nsegs = 0;
475 
476 	if (buflen > map->_dm_size) {
477 		DPRINTF(IDB_BUSDMA,
478 		    ("iommu_dvmamap_load(): error %d > %d -- "
479 		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
480 		return (EINVAL);
481 	}
482 
483 	sgsize = round_page(buflen + ((int)vaddr & PGOFSET));
484 
485 	/*
486 	 * A boundary presented to bus_dmamem_alloc() takes precedence
487 	 * over boundary in the map.
488 	 */
489 	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
490 		boundary = map->_dm_boundary;
491 	align = max(map->dm_segs[0]._ds_align, NBPG);
492 	s = splhigh();
493 	/*
494 	 * If our segment size is larger than the boundary we need to
495 	 * split the transfer up int little pieces ourselves.
496 	 */
497 	err = extent_alloc(is->is_dvmamap, sgsize, align,
498 		(sgsize > boundary) ? 0 : boundary,
499 		EX_NOWAIT|EX_BOUNDZERO, (u_long *)&dvmaddr);
500 	splx(s);
501 
502 #ifdef DEBUG
503 	if (err || (dvmaddr == (bus_addr_t)-1))
504 	{
505 		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
506 		    (int)sgsize, flags);
507 #ifdef DDB
508 		Debugger();
509 #endif
510 	}
511 #endif
512 	if (err != 0)
513 		return (err);
514 
515 	if (dvmaddr == (bus_addr_t)-1)
516 		return (ENOMEM);
517 
518 	/* Set the active DVMA map */
519 	map->_dm_dvmastart = dvmaddr;
520 	map->_dm_dvmasize = sgsize;
521 
522 	/*
523 	 * Now split the DVMA range into segments, not crossing
524 	 * the boundary.
525 	 */
526 	seg = 0;
527 	sgstart = dvmaddr + (vaddr & PGOFSET);
528 	sgend = sgstart + buflen - 1;
529 	map->dm_segs[seg].ds_addr = sgstart;
530 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: boundary %lx boundary-1 %lx "
531 		"~(boundary-1) %lx\n", boundary, (boundary-1), ~(boundary-1)));
532 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
533 		/* Oops.  We crossed a boundary.  Split the xfer. */
534 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
535 			"seg %d start %lx size %lx\n", seg,
536 			(long)map->dm_segs[seg].ds_addr,
537 			map->dm_segs[seg].ds_len));
538 		map->dm_segs[seg].ds_len =
539 		    boundary - (sgstart & (boundary - 1));
540 		if (++seg > map->_dm_segcnt) {
541 			/* Too many segments.  Fail the operation. */
542 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
543 				"too many segments %d\n", seg));
544 			s = splhigh();
545 			/* How can this fail?  And if it does what can we do? */
546 			err = extent_free(is->is_dvmamap,
547 				dvmaddr, sgsize, EX_NOWAIT);
548 			map->_dm_dvmastart = 0;
549 			map->_dm_dvmasize = 0;
550 			splx(s);
551 			return (E2BIG);
552 		}
553 		sgstart = roundup(sgstart, boundary);
554 		map->dm_segs[seg].ds_addr = sgstart;
555 	}
556 	map->dm_segs[seg].ds_len = sgend - sgstart + 1;
557 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
558 		"seg %d start %lx size %lx\n", seg,
559 		(long)map->dm_segs[seg].ds_addr, map->dm_segs[seg].ds_len));
560 	map->dm_nsegs = seg+1;
561 	map->dm_mapsize = buflen;
562 
563 	if (p != NULL)
564 		pmap = p->p_vmspace->vm_map.pmap;
565 	else
566 		pmap = pmap_kernel();
567 
568 	for (; buflen > 0; ) {
569 		/*
570 		 * Get the physical address for this page.
571 		 */
572 		if (pmap_extract(pmap, (vaddr_t)vaddr, &curaddr) == FALSE) {
573 			bus_dmamap_unload(t, map);
574 			return (-1);
575 		}
576 
577 		/*
578 		 * Compute the segment size, and adjust counts.
579 		 */
580 		sgsize = NBPG - ((u_long)vaddr & PGOFSET);
581 		if (buflen < sgsize)
582 			sgsize = buflen;
583 
584 		DPRINTF(IDB_BUSDMA,
585 		    ("iommu_dvmamap_load: map %p loading va %p "
586 			    "dva %lx at pa %lx\n",
587 			    map, (void *)vaddr, (long)dvmaddr,
588 			    (long)(curaddr&~(NBPG-1))));
589 		iommu_enter(is, trunc_page(dvmaddr), trunc_page(curaddr),
590 		    flags|0x4000);
591 
592 		dvmaddr += PAGE_SIZE;
593 		vaddr += sgsize;
594 		buflen -= sgsize;
595 	}
596 #ifdef DIAGNOSTIC
597 	for (seg = 0; seg < map->dm_nsegs; seg++) {
598 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
599 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
600 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
601 				seg, (long)map->dm_segs[seg].ds_addr,
602 				is->is_dvmabase, is->is_dvmaend);
603 			Debugger();
604 		}
605 	}
606 #endif
607 	return (0);
608 }
609 
610 
611 void
612 iommu_dvmamap_unload(t, is, map)
613 	bus_dma_tag_t t;
614 	struct iommu_state *is;
615 	bus_dmamap_t map;
616 {
617 	int error, s;
618 	bus_size_t sgsize;
619 
620 	/* Flush the iommu */
621 #ifdef DEBUG
622 	if (!map->_dm_dvmastart) {
623 		printf("iommu_dvmamap_unload: No dvmastart is zero\n");
624 #ifdef DDB
625 		Debugger();
626 #endif
627 	}
628 #endif
629 	iommu_remove(is, map->_dm_dvmastart, map->_dm_dvmasize);
630 
631 	/* Flush the caches */
632 	bus_dmamap_unload(t->_parent, map);
633 
634 	/* Mark the mappings as invalid. */
635 	map->dm_mapsize = 0;
636 	map->dm_nsegs = 0;
637 
638 	s = splhigh();
639 	error = extent_free(is->is_dvmamap, map->_dm_dvmastart,
640 		map->_dm_dvmasize, EX_NOWAIT);
641 	map->_dm_dvmastart = 0;
642 	map->_dm_dvmasize = 0;
643 	splx(s);
644 	if (error != 0)
645 		printf("warning: %qd of DVMA space lost\n", (long long)sgsize);
646 
647 	/* Clear the map */
648 }
649 
650 
651 int
652 iommu_dvmamap_load_raw(t, is, map, segs, nsegs, flags, size)
653 	bus_dma_tag_t t;
654 	struct iommu_state *is;
655 	bus_dmamap_t map;
656 	bus_dma_segment_t *segs;
657 	int nsegs;
658 	int flags;
659 	bus_size_t size;
660 {
661 	struct vm_page *m;
662 	int i, j, s;
663 	int left;
664 	int err;
665 	bus_size_t sgsize;
666 	paddr_t pa;
667 	bus_size_t boundary, align;
668 	u_long dvmaddr, sgstart, sgend;
669 	struct pglist *mlist;
670 	int pagesz = PAGE_SIZE;
671 	int npg = 0; /* DEBUG */
672 
673 	if (map->dm_nsegs) {
674 		/* Already in use?? */
675 #ifdef DIAGNOSTIC
676 		printf("iommu_dvmamap_load_raw: map still in use\n");
677 #endif
678 		bus_dmamap_unload(t, map);
679 	}
680 
681 	/*
682 	 * A boundary presented to bus_dmamem_alloc() takes precedence
683 	 * over boundary in the map.
684 	 */
685 	if ((boundary = segs[0]._ds_boundary) == 0)
686 		boundary = map->_dm_boundary;
687 
688 	align = max(segs[0]._ds_align, pagesz);
689 
690 	/*
691 	 * Make sure that on error condition we return "no valid mappings".
692 	 */
693 	map->dm_nsegs = 0;
694 	/* Count up the total number of pages we need */
695 	pa = segs[0].ds_addr;
696 	sgsize = 0;
697 	left = size;
698 	for (i=0; left && i<nsegs; i++) {
699 		if (round_page(pa) != round_page(segs[i].ds_addr))
700 			sgsize = round_page(sgsize);
701 		sgsize += min(left, segs[i].ds_len);
702 		left -= segs[i].ds_len;
703 		pa = segs[i].ds_addr + segs[i].ds_len;
704 	}
705 	sgsize = round_page(sgsize);
706 
707 	s = splhigh();
708 	/*
709 	 * If our segment size is larger than the boundary we need to
710 	 * split the transfer up into little pieces ourselves.
711 	 */
712 	err = extent_alloc(is->is_dvmamap, sgsize, align,
713 		(sgsize > boundary) ? 0 : boundary,
714 		((flags & BUS_DMA_NOWAIT) == 0 ? EX_WAITOK : EX_NOWAIT) |
715 		EX_BOUNDZERO, (u_long *)&dvmaddr);
716 	splx(s);
717 
718 	if (err != 0)
719 		return (err);
720 
721 #ifdef DEBUG
722 	if (dvmaddr == (bus_addr_t)-1)
723 	{
724 		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) failed!\n",
725 		    (int)sgsize, flags);
726 		Debugger();
727 	}
728 #endif
729 	if (dvmaddr == (bus_addr_t)-1)
730 		return (ENOMEM);
731 
732 	/* Set the active DVMA map */
733 	map->_dm_dvmastart = dvmaddr;
734 	map->_dm_dvmasize = sgsize;
735 
736 	if ((mlist = segs[0]._ds_mlist) == NULL) {
737 		u_long prev_va = NULL;
738 		paddr_t prev_pa = 0;
739 		int end = 0, offset;
740 
741 		/*
742 		 * This segs is made up of individual physical
743 		 *  segments, probably by _bus_dmamap_load_uio() or
744 		 * _bus_dmamap_load_mbuf().  Ignore the mlist and
745 		 * load each one individually.
746 		 */
747 		map->dm_mapsize = size;
748 
749 		j = 0;
750 		for (i = 0; i < nsegs ; i++) {
751 
752 			pa = segs[i].ds_addr;
753 			offset = (pa & PGOFSET);
754 			pa = trunc_page(pa);
755 			dvmaddr = trunc_page(dvmaddr);
756 			left = min(size, segs[i].ds_len);
757 
758 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: converting "
759 				"physseg %d start %lx size %lx\n", i,
760 				(long)segs[i].ds_addr, segs[i].ds_len));
761 
762 			if ((pa == prev_pa) &&
763 				((offset != 0) || (end != offset))) {
764 				/* We can re-use this mapping */
765 #ifdef DEBUG
766 if (iommudebug & 0x10) printf("reusing dva %lx prev %lx pa %lx prev %lx\n",
767 	dvmaddr, prev_va, pa, prev_pa);
768 #endif
769 				dvmaddr = prev_va;
770 			}
771 
772 			sgstart = dvmaddr + offset;
773 			sgend = sgstart + left - 1;
774 
775 			/* Are the segments virtually adjacent? */
776 			if ((j > 0) && (end == offset) &&
777 				((offset == 0) || (pa == prev_pa))) {
778 				/* Just append to the previous segment. */
779 #ifdef DEBUG
780 if (iommudebug & 0x10) {
781 printf("appending: offset %x pa %lx prev %lx dva %lx prev %lx\n",
782 	offset, pa, prev_pa, dvmaddr, prev_va);
783 }
784 #endif
785 
786 				map->dm_segs[--j].ds_len += left;
787 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
788 					"appending seg %d start %lx size %lx\n", j,
789 					(long)map->dm_segs[j].ds_addr,
790 					map->dm_segs[j].ds_len));
791 			} else {
792 				map->dm_segs[j].ds_addr = sgstart;
793 				map->dm_segs[j].ds_len = left;
794 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
795 					"seg %d start %lx size %lx\n", j,
796 					(long)map->dm_segs[j].ds_addr,
797 					map->dm_segs[j].ds_len));
798 			}
799 			end = (offset + left) & PGOFSET;
800 
801 			/* Check for boundary issues */
802 			while ((sgstart & ~(boundary - 1)) !=
803 				(sgend & ~(boundary - 1))) {
804 				/* Need a new segment. */
805 				map->dm_segs[j].ds_len =
806 					sgstart & (boundary - 1);
807 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
808 					"seg %d start %lx size %lx\n", j,
809 					(long)map->dm_segs[j].ds_addr,
810 					map->dm_segs[j].ds_len));
811 				if (++j > map->_dm_segcnt) {
812 					iommu_dvmamap_unload(t, is, map);
813 					return (E2BIG);
814 				}
815 				sgstart = roundup(sgstart, boundary);
816 				map->dm_segs[j].ds_addr = sgstart;
817 				map->dm_segs[j].ds_len = sgend - sgstart + 1;
818 			}
819 
820 			if (sgsize == 0)
821 				panic("iommu_dmamap_load_raw: size botch");
822 
823 			/* Now map a series of pages. */
824 			while (dvmaddr <= sgend) {
825 				DPRINTF(IDB_BUSDMA,
826 					("iommu_dvmamap_load_raw: map %p "
827 						"loading va %lx at pa %lx\n",
828 						map, (long)dvmaddr,
829 						(long)(pa)));
830 				/* Enter it if we haven't before. */
831 				if (prev_va != dvmaddr)
832 #ifdef DEBUG
833 { if (iommudebug & 0x10) printf("seg %d:%d entering dvma %lx, prev %lx pa %lx\n", i,j, dvmaddr, prev_va, pa);
834 #endif
835 					iommu_enter(is, prev_va = dvmaddr,
836 						prev_pa = pa,
837 						flags|(++npg<<12));
838 #ifdef DEBUG
839 } else if (iommudebug & 0x10) printf("seg %d:%d skipping dvma %lx, prev %lx\n", i,j, dvmaddr, prev_va);
840 #endif
841 
842 				dvmaddr += pagesz;
843 				pa += pagesz;
844 			}
845 
846 			size -= left;
847 			++j;
848 		}
849 
850 		map->dm_nsegs = j;
851 #ifdef DIAGNOSTIC
852 		{ int seg;
853 	for (seg = 0; seg < map->dm_nsegs; seg++) {
854 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
855 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
856 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
857 				seg, (long)map->dm_segs[seg].ds_addr,
858 				is->is_dvmabase, is->is_dvmaend);
859 			Debugger();
860 		}
861 	}
862 		}
863 #endif
864 		return (0);
865 	}
866 	/*
867 	 * This was allocated with bus_dmamem_alloc.
868 	 * The pages are on an `mlist'.
869 	 */
870 	map->dm_mapsize = size;
871 	i = 0;
872 	sgstart = dvmaddr;
873 	sgend = sgstart + size - 1;
874 	map->dm_segs[i].ds_addr = sgstart;
875 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
876 		/* Oops.  We crossed a boundary.  Split the xfer. */
877 		map->dm_segs[i].ds_len = sgstart & (boundary - 1);
878 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
879 			"seg %d start %lx size %lx\n", i,
880 			(long)map->dm_segs[i].ds_addr,
881 			map->dm_segs[i].ds_len));
882 		if (++i > map->_dm_segcnt) {
883 			/* Too many segments.  Fail the operation. */
884 			s = splhigh();
885 			/* How can this fail?  And if it does what can we do? */
886 			err = extent_free(is->is_dvmamap,
887 				dvmaddr, sgsize, EX_NOWAIT);
888 			map->_dm_dvmastart = 0;
889 			map->_dm_dvmasize = 0;
890 			splx(s);
891 			return (E2BIG);
892 		}
893 		sgstart = roundup(sgstart, boundary);
894 		map->dm_segs[i].ds_addr = sgstart;
895 	}
896 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
897 			"seg %d start %lx size %lx\n", i,
898 			(long)map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len));
899 	map->dm_segs[i].ds_len = sgend - sgstart + 1;
900 
901 	for (m = TAILQ_FIRST(mlist); m != NULL; m = TAILQ_NEXT(m,pageq)) {
902 		if (sgsize == 0)
903 			panic("iommu_dmamap_load_raw: size botch");
904 		pa = VM_PAGE_TO_PHYS(m);
905 
906 		DPRINTF(IDB_BUSDMA,
907 		    ("iommu_dvmamap_load_raw: map %p loading va %lx at pa %lx\n",
908 		    map, (long)dvmaddr, (long)(pa)));
909 		iommu_enter(is, dvmaddr, pa, flags|0x8000);
910 
911 		dvmaddr += pagesz;
912 		sgsize -= pagesz;
913 	}
914 	map->dm_mapsize = size;
915 	map->dm_nsegs = i+1;
916 #ifdef DIAGNOSTIC
917 	{ int seg;
918 	for (seg = 0; seg < map->dm_nsegs; seg++) {
919 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
920 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
921 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
922 				seg, (long)map->dm_segs[seg].ds_addr,
923 				is->is_dvmabase, is->is_dvmaend);
924 			Debugger();
925 		}
926 	}
927 	}
928 #endif
929 	return (0);
930 }
931 
932 void
933 iommu_dvmamap_sync(t, is, map, offset, len, ops)
934 	bus_dma_tag_t t;
935 	struct iommu_state *is;
936 	bus_dmamap_t map;
937 	bus_addr_t offset;
938 	bus_size_t len;
939 	int ops;
940 {
941 	vaddr_t va = map->dm_segs[0].ds_addr + offset;
942 
943 	/*
944 	 * We only support one DMA segment; supporting more makes this code
945          * too unweildy.
946 	 */
947 
948 	if (ops & BUS_DMASYNC_PREREAD) {
949 		DPRINTF(IDB_SYNC,
950 		    ("iommu_dvmamap_sync: syncing va %p len %lu "
951 		     "BUS_DMASYNC_PREREAD\n", (void *)(u_long)va, (u_long)len));
952 
953 		/* Nothing to do */;
954 	}
955 	if (ops & BUS_DMASYNC_POSTREAD) {
956 		DPRINTF(IDB_SYNC,
957 		    ("iommu_dvmamap_sync: syncing va %p len %lu "
958 		     "BUS_DMASYNC_POSTREAD\n", (void *)(u_long)va, (u_long)len));
959 		/* if we have a streaming buffer, flush it here first */
960 		if (is->is_sbvalid[0] || is->is_sbvalid[1])
961 			while (len > 0) {
962 				DPRINTF(IDB_BUSDMA,
963 					("iommu_dvmamap_sync: flushing va %p, "
964 					 "%lu bytes left\n", (void *)(u_long)va,
965 						(u_long)len));
966 				iommu_strbuf_flush(is, va);
967 				if (len <= NBPG) {
968 					iommu_strbuf_flush_done(is);
969 					len = 0;
970 				} else
971 					len -= NBPG;
972 				va += NBPG;
973 			}
974 	}
975 	if (ops & BUS_DMASYNC_PREWRITE) {
976 		DPRINTF(IDB_SYNC,
977 		    ("iommu_dvmamap_sync: syncing va %p len %lu "
978 		     "BUS_DMASYNC_PREWRITE\n", (void *)(u_long)va, (u_long)len));
979 		/* if we have a streaming buffer, flush it here first */
980 		if (is->is_sbvalid[0] || is->is_sbvalid[1])
981 			while (len > 0) {
982 				DPRINTF(IDB_BUSDMA,
983 				    ("iommu_dvmamap_sync: flushing va %p, %lu "
984 				     "bytes left\n", (void *)(u_long)va,
985 					    (u_long)len));
986 				iommu_strbuf_flush(is, va);
987 				if (len <= NBPG) {
988 					iommu_strbuf_flush_done(is);
989 					len = 0;
990 				} else
991 					len -= NBPG;
992 				va += NBPG;
993 			}
994 	}
995 	if (ops & BUS_DMASYNC_POSTWRITE) {
996 		DPRINTF(IDB_SYNC,
997 		    ("iommu_dvmamap_sync: syncing va %p len %lu "
998 		     "BUS_DMASYNC_POSTWRITE\n", (void *)(u_long)va, (u_long)len));
999 		/* Nothing to do */;
1000 	}
1001 }
1002 
1003 int
1004 iommu_dvmamem_alloc(t, is, size, alignment, boundary, segs, nsegs, rsegs, flags)
1005 	bus_dma_tag_t t;
1006 	struct iommu_state *is;
1007 	bus_size_t size, alignment, boundary;
1008 	bus_dma_segment_t *segs;
1009 	int nsegs;
1010 	int *rsegs;
1011 	int flags;
1012 {
1013 
1014 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx bound %llx "
1015 	   "segp %p flags %d\n", (unsigned long long)size,
1016 	   (unsigned long long)alignment, (unsigned long long)boundary,
1017 	   segs, flags));
1018 	return (bus_dmamem_alloc(t->_parent, size, alignment, boundary,
1019 	    segs, nsegs, rsegs, flags|BUS_DMA_DVMA));
1020 }
1021 
1022 void
1023 iommu_dvmamem_free(t, is, segs, nsegs)
1024 	bus_dma_tag_t t;
1025 	struct iommu_state *is;
1026 	bus_dma_segment_t *segs;
1027 	int nsegs;
1028 {
1029 
1030 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
1031 	    segs, nsegs));
1032 	bus_dmamem_free(t->_parent, segs, nsegs);
1033 }
1034 
1035 /*
1036  * Map the DVMA mappings into the kernel pmap.
1037  * Check the flags to see whether we're streaming or coherent.
1038  */
1039 int
1040 iommu_dvmamem_map(t, is, segs, nsegs, size, kvap, flags)
1041 	bus_dma_tag_t t;
1042 	struct iommu_state *is;
1043 	bus_dma_segment_t *segs;
1044 	int nsegs;
1045 	size_t size;
1046 	caddr_t *kvap;
1047 	int flags;
1048 {
1049 	struct vm_page *m;
1050 	vaddr_t va;
1051 	bus_addr_t addr;
1052 	struct pglist *mlist;
1053 	int cbit;
1054 
1055 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: segp %p nsegs %d size %lx\n",
1056 	    segs, nsegs, size));
1057 
1058 	/*
1059 	 * Allocate some space in the kernel map, and then map these pages
1060 	 * into this space.
1061 	 */
1062 	size = round_page(size);
1063 	va = uvm_km_valloc(kernel_map, size);
1064 	if (va == 0)
1065 		return (ENOMEM);
1066 
1067 	*kvap = (caddr_t)va;
1068 
1069 	/*
1070 	 * digest flags:
1071 	 */
1072 	cbit = 0;
1073 	if (flags & BUS_DMA_COHERENT)	/* Disable vcache */
1074 		cbit |= PMAP_NVC;
1075 	if (flags & BUS_DMA_NOCACHE)	/* sideffects */
1076 		cbit |= PMAP_NC;
1077 
1078 	/*
1079 	 * Now take this and map it into the CPU.
1080 	 */
1081 	mlist = segs[0]._ds_mlist;
1082 	for (m = mlist->tqh_first; m != NULL; m = m->pageq.tqe_next) {
1083 #ifdef DIAGNOSTIC
1084 		if (size == 0)
1085 			panic("iommu_dvmamem_map: size botch");
1086 #endif
1087 		addr = VM_PAGE_TO_PHYS(m);
1088 		DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: "
1089 		    "mapping va %lx at %llx\n", va, (unsigned long long)addr | cbit));
1090 		pmap_enter(pmap_kernel(), va, addr | cbit,
1091 		    VM_PROT_READ | VM_PROT_WRITE,
1092 		    VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED);
1093 		va += PAGE_SIZE;
1094 		size -= PAGE_SIZE;
1095 	}
1096 	pmap_update(pmap_kernel());
1097 
1098 	return (0);
1099 }
1100 
1101 /*
1102  * Unmap DVMA mappings from kernel
1103  */
1104 void
1105 iommu_dvmamem_unmap(t, is, kva, size)
1106 	bus_dma_tag_t t;
1107 	struct iommu_state *is;
1108 	caddr_t kva;
1109 	size_t size;
1110 {
1111 
1112 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_unmap: kvm %p size %lx\n",
1113 	    kva, size));
1114 
1115 #ifdef DIAGNOSTIC
1116 	if ((u_long)kva & PGOFSET)
1117 		panic("iommu_dvmamem_unmap");
1118 #endif
1119 
1120 	size = round_page(size);
1121 	pmap_remove(pmap_kernel(), (vaddr_t)kva, size);
1122 	pmap_update(pmap_kernel());
1123 #if 0
1124 	/*
1125 	 * XXX ? is this necessary? i think so and i think other
1126 	 * implementations are missing it.
1127 	 */
1128 	uvm_km_free(kernel_map, (vaddr_t)kva, size);
1129 #endif
1130 }
1131