xref: /openbsd/sys/arch/arm64/dev/smmu.c (revision 6258f8b2)
1 /* $OpenBSD: smmu.c,v 1.21 2022/09/11 10:28:56 patrick Exp $ */
2 /*
3  * Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com>
4  * Copyright (c) 2021 Patrick Wildt <patrick@blueri.se>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/device.h>
22 #include <sys/pool.h>
23 #include <sys/atomic.h>
24 
25 #include <machine/bus.h>
26 #include <machine/cpufunc.h>
27 
28 #include <uvm/uvm_extern.h>
29 #include <arm64/vmparam.h>
30 #include <arm64/pmap.h>
31 
32 #include <dev/pci/pcivar.h>
33 #include <arm64/dev/smmuvar.h>
34 #include <arm64/dev/smmureg.h>
35 
36 struct smmu_map_state {
37 	struct extent_region	sms_er;
38 	bus_addr_t		sms_dva;
39 	bus_size_t		sms_len;
40 	bus_size_t		sms_loaded;
41 };
42 
43 struct smmuvp0 {
44 	uint64_t l0[VP_IDX0_CNT];
45 	struct smmuvp1 *vp[VP_IDX0_CNT];
46 };
47 
48 struct smmuvp1 {
49 	uint64_t l1[VP_IDX1_CNT];
50 	struct smmuvp2 *vp[VP_IDX1_CNT];
51 };
52 
53 struct smmuvp2 {
54 	uint64_t l2[VP_IDX2_CNT];
55 	struct smmuvp3 *vp[VP_IDX2_CNT];
56 };
57 
58 struct smmuvp3 {
59 	uint64_t l3[VP_IDX3_CNT];
60 };
61 
62 CTASSERT(sizeof(struct smmuvp0) == sizeof(struct smmuvp1));
63 CTASSERT(sizeof(struct smmuvp0) == sizeof(struct smmuvp2));
64 CTASSERT(sizeof(struct smmuvp0) != sizeof(struct smmuvp3));
65 
66 uint32_t smmu_gr0_read_4(struct smmu_softc *, bus_size_t);
67 void smmu_gr0_write_4(struct smmu_softc *, bus_size_t, uint32_t);
68 uint32_t smmu_gr1_read_4(struct smmu_softc *, bus_size_t);
69 void smmu_gr1_write_4(struct smmu_softc *, bus_size_t, uint32_t);
70 uint32_t smmu_cb_read_4(struct smmu_softc *, int, bus_size_t);
71 void smmu_cb_write_4(struct smmu_softc *, int, bus_size_t, uint32_t);
72 uint64_t smmu_cb_read_8(struct smmu_softc *, int, bus_size_t);
73 void smmu_cb_write_8(struct smmu_softc *, int, bus_size_t, uint64_t);
74 
75 void smmu_tlb_sync_global(struct smmu_softc *);
76 void smmu_tlb_sync_context(struct smmu_domain *);
77 
78 struct smmu_domain *smmu_domain_lookup(struct smmu_softc *, uint32_t);
79 struct smmu_domain *smmu_domain_create(struct smmu_softc *, uint32_t);
80 
81 void smmu_set_l1(struct smmu_domain *, uint64_t, struct smmuvp1 *);
82 void smmu_set_l2(struct smmu_domain *, uint64_t, struct smmuvp1 *,
83     struct smmuvp2 *);
84 void smmu_set_l3(struct smmu_domain *, uint64_t, struct smmuvp2 *,
85     struct smmuvp3 *);
86 
87 int smmu_vp_lookup(struct smmu_domain *, vaddr_t, uint64_t **);
88 int smmu_vp_enter(struct smmu_domain *, vaddr_t, uint64_t **, int);
89 
90 uint64_t smmu_fill_pte(struct smmu_domain *, vaddr_t, paddr_t,
91     vm_prot_t, int, int);
92 void smmu_pte_update(struct smmu_domain *, uint64_t, uint64_t *);
93 void smmu_pte_remove(struct smmu_domain *, vaddr_t);
94 
95 int smmu_enter(struct smmu_domain *, vaddr_t, paddr_t, vm_prot_t, int, int);
96 void smmu_map(struct smmu_domain *, vaddr_t, paddr_t, vm_prot_t, int, int);
97 void smmu_unmap(struct smmu_domain *, vaddr_t);
98 void smmu_remove(struct smmu_domain *, vaddr_t);
99 
100 int smmu_load_map(struct smmu_domain *, bus_dmamap_t);
101 void smmu_unload_map(struct smmu_domain *, bus_dmamap_t);
102 
103 int smmu_dmamap_create(bus_dma_tag_t , bus_size_t, int,
104      bus_size_t, bus_size_t, int, bus_dmamap_t *);
105 void smmu_dmamap_destroy(bus_dma_tag_t , bus_dmamap_t);
106 int smmu_dmamap_load(bus_dma_tag_t , bus_dmamap_t, void *,
107      bus_size_t, struct proc *, int);
108 int smmu_dmamap_load_mbuf(bus_dma_tag_t , bus_dmamap_t,
109      struct mbuf *, int);
110 int smmu_dmamap_load_uio(bus_dma_tag_t , bus_dmamap_t,
111      struct uio *, int);
112 int smmu_dmamap_load_raw(bus_dma_tag_t , bus_dmamap_t,
113      bus_dma_segment_t *, int, bus_size_t, int);
114 void smmu_dmamap_unload(bus_dma_tag_t , bus_dmamap_t);
115 
116 struct cfdriver smmu_cd = {
117 	NULL, "smmu", DV_DULL
118 };
119 
120 int
smmu_attach(struct smmu_softc * sc)121 smmu_attach(struct smmu_softc *sc)
122 {
123 	uint32_t reg;
124 	int i;
125 
126 	SIMPLEQ_INIT(&sc->sc_domains);
127 
128 	pool_init(&sc->sc_vp_pool, sizeof(struct smmuvp0), PAGE_SIZE, IPL_VM, 0,
129 	    "smmu_vp", NULL);
130 	pool_setlowat(&sc->sc_vp_pool, 20);
131 	pool_init(&sc->sc_vp3_pool, sizeof(struct smmuvp3), PAGE_SIZE, IPL_VM, 0,
132 	    "smmu_vp3", NULL);
133 	pool_setlowat(&sc->sc_vp3_pool, 20);
134 
135 	reg = smmu_gr0_read_4(sc, SMMU_IDR0);
136 	if (reg & SMMU_IDR0_S1TS)
137 		sc->sc_has_s1 = 1;
138 	/*
139 	 * Marvell's 8040 does not support 64-bit writes, hence it
140 	 * is not possible to invalidate stage-2, because the ASID
141 	 * is part of the upper 32-bits and they'd be ignored.
142 	 */
143 	if (sc->sc_is_ap806)
144 		sc->sc_has_s1 = 0;
145 	if (reg & SMMU_IDR0_S2TS)
146 		sc->sc_has_s2 = 1;
147 	if (!sc->sc_has_s1 && !sc->sc_has_s2)
148 		return 1;
149 	if (reg & SMMU_IDR0_EXIDS)
150 		sc->sc_has_exids = 1;
151 
152 	sc->sc_num_streams = 1 << SMMU_IDR0_NUMSIDB(reg);
153 	if (sc->sc_has_exids)
154 		sc->sc_num_streams = 1 << 16;
155 	sc->sc_stream_mask = sc->sc_num_streams - 1;
156 	if (reg & SMMU_IDR0_SMS) {
157 		sc->sc_num_streams = SMMU_IDR0_NUMSMRG(reg);
158 		if (sc->sc_num_streams == 0)
159 			return 1;
160 		sc->sc_smr = mallocarray(sc->sc_num_streams,
161 		    sizeof(*sc->sc_smr), M_DEVBUF, M_WAITOK | M_ZERO);
162 	}
163 
164 	reg = smmu_gr0_read_4(sc, SMMU_IDR1);
165 	sc->sc_pagesize = 4 * 1024;
166 	if (reg & SMMU_IDR1_PAGESIZE_64K)
167 		sc->sc_pagesize = 64 * 1024;
168 	sc->sc_numpage = 1 << (SMMU_IDR1_NUMPAGENDXB(reg) + 1);
169 
170 	/* 0 to NUMS2CB == stage-2, NUMS2CB to NUMCB == stage-1 */
171 	sc->sc_num_context_banks = SMMU_IDR1_NUMCB(reg);
172 	sc->sc_num_s2_context_banks = SMMU_IDR1_NUMS2CB(reg);
173 	if (sc->sc_num_s2_context_banks > sc->sc_num_context_banks)
174 		return 1;
175 	sc->sc_cb = mallocarray(sc->sc_num_context_banks,
176 	    sizeof(*sc->sc_cb), M_DEVBUF, M_WAITOK | M_ZERO);
177 
178 	reg = smmu_gr0_read_4(sc, SMMU_IDR2);
179 	if (reg & SMMU_IDR2_VMID16S)
180 		sc->sc_has_vmid16s = 1;
181 
182 	switch (SMMU_IDR2_IAS(reg)) {
183 	case SMMU_IDR2_IAS_32BIT:
184 		sc->sc_ipa_bits = 32;
185 		break;
186 	case SMMU_IDR2_IAS_36BIT:
187 		sc->sc_ipa_bits = 36;
188 		break;
189 	case SMMU_IDR2_IAS_40BIT:
190 		sc->sc_ipa_bits = 40;
191 		break;
192 	case SMMU_IDR2_IAS_42BIT:
193 		sc->sc_ipa_bits = 42;
194 		break;
195 	case SMMU_IDR2_IAS_44BIT:
196 		sc->sc_ipa_bits = 44;
197 		break;
198 	case SMMU_IDR2_IAS_48BIT:
199 	default:
200 		sc->sc_ipa_bits = 48;
201 		break;
202 	}
203 	switch (SMMU_IDR2_OAS(reg)) {
204 	case SMMU_IDR2_OAS_32BIT:
205 		sc->sc_pa_bits = 32;
206 		break;
207 	case SMMU_IDR2_OAS_36BIT:
208 		sc->sc_pa_bits = 36;
209 		break;
210 	case SMMU_IDR2_OAS_40BIT:
211 		sc->sc_pa_bits = 40;
212 		break;
213 	case SMMU_IDR2_OAS_42BIT:
214 		sc->sc_pa_bits = 42;
215 		break;
216 	case SMMU_IDR2_OAS_44BIT:
217 		sc->sc_pa_bits = 44;
218 		break;
219 	case SMMU_IDR2_OAS_48BIT:
220 	default:
221 		sc->sc_pa_bits = 48;
222 		break;
223 	}
224 	switch (SMMU_IDR2_UBS(reg)) {
225 	case SMMU_IDR2_UBS_32BIT:
226 		sc->sc_va_bits = 32;
227 		break;
228 	case SMMU_IDR2_UBS_36BIT:
229 		sc->sc_va_bits = 36;
230 		break;
231 	case SMMU_IDR2_UBS_40BIT:
232 		sc->sc_va_bits = 40;
233 		break;
234 	case SMMU_IDR2_UBS_42BIT:
235 		sc->sc_va_bits = 42;
236 		break;
237 	case SMMU_IDR2_UBS_44BIT:
238 		sc->sc_va_bits = 44;
239 		break;
240 	case SMMU_IDR2_UBS_49BIT:
241 	default:
242 		sc->sc_va_bits = 48;
243 		break;
244 	}
245 
246 	printf(": %u CBs (%u S2-only)",
247 	    sc->sc_num_context_banks, sc->sc_num_s2_context_banks);
248 	if (sc->sc_is_qcom) {
249 		/*
250 		 * In theory we should check if bypass quirk is needed by
251 		 * modifying S2CR and re-checking if the value is different.
252 		 * This does not work on the last S2CR, but on the first,
253 		 * which is in use.  Revisit this once we have other QCOM HW.
254 		 */
255 		sc->sc_bypass_quirk = 1;
256 		printf(", bypass quirk");
257 		/*
258 		 * Create special context that is turned off.  This allows us
259 		 * to map a stream to a context bank where translation is not
260 		 * happening, and hence bypassed.
261 		 */
262 		sc->sc_cb[sc->sc_num_context_banks - 1] =
263 		    malloc(sizeof(struct smmu_cb), M_DEVBUF, M_WAITOK | M_ZERO);
264 		smmu_cb_write_4(sc, sc->sc_num_context_banks - 1,
265 		    SMMU_CB_SCTLR, 0);
266 		smmu_gr1_write_4(sc, SMMU_CBAR(sc->sc_num_context_banks - 1),
267 		    SMMU_CBAR_TYPE_S1_TRANS_S2_BYPASS);
268 	}
269 	printf("\n");
270 
271 	/* Clear Global Fault Status Register */
272 	smmu_gr0_write_4(sc, SMMU_SGFSR, smmu_gr0_read_4(sc, SMMU_SGFSR));
273 
274 	for (i = 0; i < sc->sc_num_streams; i++) {
275 		/* On QCOM HW we need to keep current streams running. */
276 		if (sc->sc_is_qcom && sc->sc_smr &&
277 		    smmu_gr0_read_4(sc, SMMU_SMR(i)) & SMMU_SMR_VALID) {
278 			reg = smmu_gr0_read_4(sc, SMMU_SMR(i));
279 			sc->sc_smr[i] = malloc(sizeof(struct smmu_smr),
280 			    M_DEVBUF, M_WAITOK | M_ZERO);
281 			sc->sc_smr[i]->ss_id = (reg >> SMMU_SMR_ID_SHIFT) &
282 			    SMMU_SMR_ID_MASK;
283 			sc->sc_smr[i]->ss_mask = (reg >> SMMU_SMR_MASK_SHIFT) &
284 			    SMMU_SMR_MASK_MASK;
285 			if (sc->sc_bypass_quirk) {
286 				smmu_gr0_write_4(sc, SMMU_S2CR(i),
287 				    SMMU_S2CR_TYPE_TRANS |
288 				    sc->sc_num_context_banks - 1);
289 			} else {
290 				smmu_gr0_write_4(sc, SMMU_S2CR(i),
291 				    SMMU_S2CR_TYPE_BYPASS | 0xff);
292 			}
293 			continue;
294 		}
295 #if 1
296 		/* Setup all streams to fault by default */
297 		smmu_gr0_write_4(sc, SMMU_S2CR(i), SMMU_S2CR_TYPE_FAULT);
298 #else
299 		/* For stream indexing, USFCFG bypass isn't enough! */
300 		smmu_gr0_write_4(sc, SMMU_S2CR(i), SMMU_S2CR_TYPE_BYPASS);
301 #endif
302 		/*  Disable all stream map registers */
303 		if (sc->sc_smr)
304 			smmu_gr0_write_4(sc, SMMU_SMR(i), 0);
305 	}
306 
307 	for (i = 0; i < sc->sc_num_context_banks; i++) {
308 		/* Disable Context Bank */
309 		smmu_cb_write_4(sc, i, SMMU_CB_SCTLR, 0);
310 		/* Clear Context Bank Fault Status Register */
311 		smmu_cb_write_4(sc, i, SMMU_CB_FSR, SMMU_CB_FSR_MASK);
312 	}
313 
314 	/* Invalidate TLB */
315 	smmu_gr0_write_4(sc, SMMU_TLBIALLH, ~0);
316 	smmu_gr0_write_4(sc, SMMU_TLBIALLNSNH, ~0);
317 
318 	if (sc->sc_is_mmu500) {
319 		reg = smmu_gr0_read_4(sc, SMMU_SACR);
320 		if (SMMU_IDR7_MAJOR(smmu_gr0_read_4(sc, SMMU_IDR7)) >= 2)
321 			reg &= ~SMMU_SACR_MMU500_CACHE_LOCK;
322 		reg |= SMMU_SACR_MMU500_SMTNMB_TLBEN |
323 		    SMMU_SACR_MMU500_S2CRB_TLBEN;
324 		smmu_gr0_write_4(sc, SMMU_SACR, reg);
325 		for (i = 0; i < sc->sc_num_context_banks; i++) {
326 			reg = smmu_cb_read_4(sc, i, SMMU_CB_ACTLR);
327 			reg &= ~SMMU_CB_ACTLR_CPRE;
328 			smmu_cb_write_4(sc, i, SMMU_CB_ACTLR, reg);
329 		}
330 	}
331 
332 	/* Enable SMMU */
333 	reg = smmu_gr0_read_4(sc, SMMU_SCR0);
334 	reg &= ~(SMMU_SCR0_CLIENTPD |
335 	    SMMU_SCR0_FB | SMMU_SCR0_BSU_MASK);
336 #if 1
337 	/* Disable bypass for unknown streams */
338 	reg |= SMMU_SCR0_USFCFG;
339 #else
340 	/* Enable bypass for unknown streams */
341 	reg &= ~SMMU_SCR0_USFCFG;
342 #endif
343 	reg |= SMMU_SCR0_GFRE | SMMU_SCR0_GFIE |
344 	    SMMU_SCR0_GCFGFRE | SMMU_SCR0_GCFGFIE |
345 	    SMMU_SCR0_VMIDPNE | SMMU_SCR0_PTM;
346 	if (sc->sc_has_exids)
347 		reg |= SMMU_SCR0_EXIDENABLE;
348 	if (sc->sc_has_vmid16s)
349 		reg |= SMMU_SCR0_VMID16EN;
350 
351 	smmu_tlb_sync_global(sc);
352 	smmu_gr0_write_4(sc, SMMU_SCR0, reg);
353 
354 	return 0;
355 }
356 
357 int
smmu_global_irq(void * cookie)358 smmu_global_irq(void *cookie)
359 {
360 	struct smmu_softc *sc = cookie;
361 	uint32_t reg;
362 
363 	reg = smmu_gr0_read_4(sc, SMMU_SGFSR);
364 	if (reg == 0)
365 		return 0;
366 
367 	printf("%s: SGFSR 0x%08x SGFSYNR0 0x%08x SGFSYNR1 0x%08x "
368 	    "SGFSYNR2 0x%08x\n", sc->sc_dev.dv_xname, reg,
369 	    smmu_gr0_read_4(sc, SMMU_SGFSYNR0),
370 	    smmu_gr0_read_4(sc, SMMU_SGFSYNR1),
371 	    smmu_gr0_read_4(sc, SMMU_SGFSYNR2));
372 
373 	smmu_gr0_write_4(sc, SMMU_SGFSR, reg);
374 
375 	return 1;
376 }
377 
378 int
smmu_context_irq(void * cookie)379 smmu_context_irq(void *cookie)
380 {
381 	struct smmu_cb_irq *cbi = cookie;
382 	struct smmu_softc *sc = cbi->cbi_sc;
383 	uint32_t reg;
384 
385 	reg = smmu_cb_read_4(sc, cbi->cbi_idx, SMMU_CB_FSR);
386 	if ((reg & SMMU_CB_FSR_MASK) == 0)
387 		return 0;
388 
389 	printf("%s: FSR 0x%08x FSYNR0 0x%08x FAR 0x%llx "
390 	    "CBFRSYNRA 0x%08x\n", sc->sc_dev.dv_xname, reg,
391 	    smmu_cb_read_4(sc, cbi->cbi_idx, SMMU_CB_FSYNR0),
392 	    smmu_cb_read_8(sc, cbi->cbi_idx, SMMU_CB_FAR),
393 	    smmu_gr1_read_4(sc, SMMU_CBFRSYNRA(cbi->cbi_idx)));
394 
395 	smmu_cb_write_4(sc, cbi->cbi_idx, SMMU_CB_FSR, reg);
396 
397 	return 1;
398 }
399 
400 void
smmu_tlb_sync_global(struct smmu_softc * sc)401 smmu_tlb_sync_global(struct smmu_softc *sc)
402 {
403 	int i;
404 
405 	smmu_gr0_write_4(sc, SMMU_STLBGSYNC, ~0);
406 	for (i = 1000; i > 0; i--) {
407 		if ((smmu_gr0_read_4(sc, SMMU_STLBGSTATUS) &
408 		    SMMU_STLBGSTATUS_GSACTIVE) == 0)
409 			return;
410 	}
411 
412 	printf("%s: global TLB sync timeout\n",
413 	    sc->sc_dev.dv_xname);
414 }
415 
416 void
smmu_tlb_sync_context(struct smmu_domain * dom)417 smmu_tlb_sync_context(struct smmu_domain *dom)
418 {
419 	struct smmu_softc *sc = dom->sd_sc;
420 	int i;
421 
422 	smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TLBSYNC, ~0);
423 	for (i = 1000; i > 0; i--) {
424 		if ((smmu_cb_read_4(sc, dom->sd_cb_idx, SMMU_CB_TLBSTATUS) &
425 		    SMMU_CB_TLBSTATUS_SACTIVE) == 0)
426 			return;
427 	}
428 
429 	printf("%s: context TLB sync timeout\n",
430 	    sc->sc_dev.dv_xname);
431 }
432 
433 uint32_t
smmu_gr0_read_4(struct smmu_softc * sc,bus_size_t off)434 smmu_gr0_read_4(struct smmu_softc *sc, bus_size_t off)
435 {
436 	uint32_t base = 0 * sc->sc_pagesize;
437 
438 	return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
439 }
440 
441 void
smmu_gr0_write_4(struct smmu_softc * sc,bus_size_t off,uint32_t val)442 smmu_gr0_write_4(struct smmu_softc *sc, bus_size_t off, uint32_t val)
443 {
444 	uint32_t base = 0 * sc->sc_pagesize;
445 
446 	bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
447 }
448 
449 uint32_t
smmu_gr1_read_4(struct smmu_softc * sc,bus_size_t off)450 smmu_gr1_read_4(struct smmu_softc *sc, bus_size_t off)
451 {
452 	uint32_t base = 1 * sc->sc_pagesize;
453 
454 	return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
455 }
456 
457 void
smmu_gr1_write_4(struct smmu_softc * sc,bus_size_t off,uint32_t val)458 smmu_gr1_write_4(struct smmu_softc *sc, bus_size_t off, uint32_t val)
459 {
460 	uint32_t base = 1 * sc->sc_pagesize;
461 
462 	bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
463 }
464 
465 uint32_t
smmu_cb_read_4(struct smmu_softc * sc,int idx,bus_size_t off)466 smmu_cb_read_4(struct smmu_softc *sc, int idx, bus_size_t off)
467 {
468 	uint32_t base;
469 
470 	base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
471 	base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
472 
473 	return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
474 }
475 
476 void
smmu_cb_write_4(struct smmu_softc * sc,int idx,bus_size_t off,uint32_t val)477 smmu_cb_write_4(struct smmu_softc *sc, int idx, bus_size_t off, uint32_t val)
478 {
479 	uint32_t base;
480 
481 	base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
482 	base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
483 
484 	bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
485 }
486 
487 uint64_t
smmu_cb_read_8(struct smmu_softc * sc,int idx,bus_size_t off)488 smmu_cb_read_8(struct smmu_softc *sc, int idx, bus_size_t off)
489 {
490 	uint64_t reg;
491 	uint32_t base;
492 
493 	base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
494 	base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
495 
496 	if (sc->sc_is_ap806) {
497 		reg = bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off + 4);
498 		reg <<= 32;
499 		reg |= bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off + 0);
500 		return reg;
501 	}
502 
503 	return bus_space_read_8(sc->sc_iot, sc->sc_ioh, base + off);
504 }
505 
506 void
smmu_cb_write_8(struct smmu_softc * sc,int idx,bus_size_t off,uint64_t val)507 smmu_cb_write_8(struct smmu_softc *sc, int idx, bus_size_t off, uint64_t val)
508 {
509 	uint32_t base;
510 
511 	base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
512 	base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
513 
514 	if (sc->sc_is_ap806) {
515 		bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off + 4,
516 		    val >> 32);
517 		bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off + 0,
518 		    val & 0xffffffff);
519 		return;
520 	}
521 
522 	bus_space_write_8(sc->sc_iot, sc->sc_ioh, base + off, val);
523 }
524 
525 bus_dma_tag_t
smmu_device_map(void * cookie,uint32_t sid,bus_dma_tag_t dmat)526 smmu_device_map(void *cookie, uint32_t sid, bus_dma_tag_t dmat)
527 {
528 	struct smmu_softc *sc = cookie;
529 	struct smmu_domain *dom;
530 
531 	dom = smmu_domain_lookup(sc, sid);
532 	if (dom == NULL)
533 		return dmat;
534 
535 	if (dom->sd_dmat == NULL) {
536 		dom->sd_dmat = malloc(sizeof(*dom->sd_dmat),
537 		    M_DEVBUF, M_WAITOK);
538 		memcpy(dom->sd_dmat, sc->sc_dmat,
539 		    sizeof(*dom->sd_dmat));
540 		dom->sd_dmat->_cookie = dom;
541 		dom->sd_dmat->_dmamap_create = smmu_dmamap_create;
542 		dom->sd_dmat->_dmamap_destroy = smmu_dmamap_destroy;
543 		dom->sd_dmat->_dmamap_load = smmu_dmamap_load;
544 		dom->sd_dmat->_dmamap_load_mbuf = smmu_dmamap_load_mbuf;
545 		dom->sd_dmat->_dmamap_load_uio = smmu_dmamap_load_uio;
546 		dom->sd_dmat->_dmamap_load_raw = smmu_dmamap_load_raw;
547 		dom->sd_dmat->_dmamap_unload = smmu_dmamap_unload;
548 		dom->sd_dmat->_flags |= BUS_DMA_COHERENT;
549 	}
550 
551 	return dom->sd_dmat;
552 }
553 
554 struct smmu_domain *
smmu_domain_lookup(struct smmu_softc * sc,uint32_t sid)555 smmu_domain_lookup(struct smmu_softc *sc, uint32_t sid)
556 {
557 	struct smmu_domain *dom;
558 
559 	SIMPLEQ_FOREACH(dom, &sc->sc_domains, sd_list) {
560 		if (dom->sd_sid == sid)
561 			return dom;
562 	}
563 
564 	return smmu_domain_create(sc, sid);
565 }
566 
567 struct smmu_domain *
smmu_domain_create(struct smmu_softc * sc,uint32_t sid)568 smmu_domain_create(struct smmu_softc *sc, uint32_t sid)
569 {
570 	struct smmu_domain *dom;
571 	uint32_t iovabits, reg;
572 	paddr_t pa;
573 	vaddr_t l0va;
574 	int i, start, end;
575 
576 	dom = malloc(sizeof(*dom), M_DEVBUF, M_WAITOK | M_ZERO);
577 	mtx_init(&dom->sd_iova_mtx, IPL_VM);
578 	mtx_init(&dom->sd_pmap_mtx, IPL_VM);
579 	dom->sd_sc = sc;
580 	dom->sd_sid = sid;
581 
582 	/* Prefer stage 1 if possible! */
583 	if (sc->sc_has_s1) {
584 		start = sc->sc_num_s2_context_banks;
585 		end = sc->sc_num_context_banks;
586 		dom->sd_stage = 1;
587 	} else {
588 		start = 0;
589 		end = sc->sc_num_context_banks;
590 		dom->sd_stage = 2;
591 	}
592 
593 	for (i = start; i < end; i++) {
594 		if (sc->sc_cb[i] != NULL)
595 			continue;
596 		sc->sc_cb[i] = malloc(sizeof(struct smmu_cb),
597 		    M_DEVBUF, M_WAITOK | M_ZERO);
598 		dom->sd_cb_idx = i;
599 		break;
600 	}
601 	if (i >= end) {
602 		printf("%s: out of context blocks, I/O device will fail\n",
603 		    sc->sc_dev.dv_xname);
604 		free(dom, M_DEVBUF, sizeof(*dom));
605 		return NULL;
606 	}
607 
608 	/* Stream indexing is easy */
609 	dom->sd_smr_idx = sid;
610 
611 	/* Stream mapping is a bit more effort */
612 	if (sc->sc_smr) {
613 		for (i = 0; i < sc->sc_num_streams; i++) {
614 			/* Take over QCOM SMRs */
615 			if (sc->sc_is_qcom && sc->sc_smr[i] != NULL &&
616 			    sc->sc_smr[i]->ss_dom == NULL &&
617 			    sc->sc_smr[i]->ss_id == sid &&
618 			    sc->sc_smr[i]->ss_mask == 0) {
619 				free(sc->sc_smr[i], M_DEVBUF,
620 				    sizeof(struct smmu_smr));
621 				sc->sc_smr[i] = NULL;
622 			}
623 			if (sc->sc_smr[i] != NULL)
624 				continue;
625 			sc->sc_smr[i] = malloc(sizeof(struct smmu_smr),
626 			    M_DEVBUF, M_WAITOK | M_ZERO);
627 			sc->sc_smr[i]->ss_dom = dom;
628 			sc->sc_smr[i]->ss_id = sid;
629 			sc->sc_smr[i]->ss_mask = 0;
630 			dom->sd_smr_idx = i;
631 			break;
632 		}
633 
634 		if (i >= sc->sc_num_streams) {
635 			free(sc->sc_cb[dom->sd_cb_idx], M_DEVBUF,
636 			    sizeof(struct smmu_cb));
637 			sc->sc_cb[dom->sd_cb_idx] = NULL;
638 			free(dom, M_DEVBUF, sizeof(*dom));
639 			printf("%s: out of streams, I/O device will fail\n",
640 			    sc->sc_dev.dv_xname);
641 			return NULL;
642 		}
643 	}
644 
645 	reg = SMMU_CBA2R_VA64;
646 	if (sc->sc_has_vmid16s)
647 		reg |= (dom->sd_cb_idx + 1) << SMMU_CBA2R_VMID16_SHIFT;
648 	smmu_gr1_write_4(sc, SMMU_CBA2R(dom->sd_cb_idx), reg);
649 
650 	if (dom->sd_stage == 1) {
651 		reg = SMMU_CBAR_TYPE_S1_TRANS_S2_BYPASS |
652 		    SMMU_CBAR_BPSHCFG_NSH | SMMU_CBAR_MEMATTR_WB;
653 	} else {
654 		reg = SMMU_CBAR_TYPE_S2_TRANS;
655 		if (!sc->sc_has_vmid16s)
656 			reg |= (dom->sd_cb_idx + 1) << SMMU_CBAR_VMID_SHIFT;
657 	}
658 	smmu_gr1_write_4(sc, SMMU_CBAR(dom->sd_cb_idx), reg);
659 
660 	if (dom->sd_stage == 1) {
661 		reg = SMMU_CB_TCR2_AS | SMMU_CB_TCR2_SEP_UPSTREAM;
662 		switch (sc->sc_ipa_bits) {
663 		case 32:
664 			reg |= SMMU_CB_TCR2_PASIZE_32BIT;
665 			break;
666 		case 36:
667 			reg |= SMMU_CB_TCR2_PASIZE_36BIT;
668 			break;
669 		case 40:
670 			reg |= SMMU_CB_TCR2_PASIZE_40BIT;
671 			break;
672 		case 42:
673 			reg |= SMMU_CB_TCR2_PASIZE_42BIT;
674 			break;
675 		case 44:
676 			reg |= SMMU_CB_TCR2_PASIZE_44BIT;
677 			break;
678 		case 48:
679 			reg |= SMMU_CB_TCR2_PASIZE_48BIT;
680 			break;
681 		}
682 		smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TCR2, reg);
683 	}
684 
685 	if (dom->sd_stage == 1)
686 		iovabits = sc->sc_va_bits;
687 	else
688 		iovabits = sc->sc_ipa_bits;
689 	/*
690 	 * Marvell's 8040 does not support 64-bit writes, hence we
691 	 * can only address 44-bits of VA space for TLB invalidation.
692 	 */
693 	if (sc->sc_is_ap806)
694 		iovabits = min(44, iovabits);
695 	if (iovabits >= 40)
696 		dom->sd_4level = 1;
697 
698 	reg = SMMU_CB_TCR_TG0_4KB | SMMU_CB_TCR_T0SZ(64 - iovabits);
699 	if (dom->sd_stage == 1) {
700 		reg |= SMMU_CB_TCR_EPD1;
701 	} else {
702 		if (dom->sd_4level)
703 			reg |= SMMU_CB_TCR_S2_SL0_4KB_L0;
704 		else
705 			reg |= SMMU_CB_TCR_S2_SL0_4KB_L1;
706 		switch (sc->sc_pa_bits) {
707 		case 32:
708 			reg |= SMMU_CB_TCR_S2_PASIZE_32BIT;
709 			break;
710 		case 36:
711 			reg |= SMMU_CB_TCR_S2_PASIZE_36BIT;
712 			break;
713 		case 40:
714 			reg |= SMMU_CB_TCR_S2_PASIZE_40BIT;
715 			break;
716 		case 42:
717 			reg |= SMMU_CB_TCR_S2_PASIZE_42BIT;
718 			break;
719 		case 44:
720 			reg |= SMMU_CB_TCR_S2_PASIZE_44BIT;
721 			break;
722 		case 48:
723 			reg |= SMMU_CB_TCR_S2_PASIZE_48BIT;
724 			break;
725 		}
726 	}
727 	if (sc->sc_coherent)
728 		reg |= SMMU_CB_TCR_IRGN0_WBWA | SMMU_CB_TCR_ORGN0_WBWA |
729 		    SMMU_CB_TCR_SH0_ISH;
730 	else
731 		reg |= SMMU_CB_TCR_IRGN0_NC | SMMU_CB_TCR_ORGN0_NC |
732 		    SMMU_CB_TCR_SH0_OSH;
733 	smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TCR, reg);
734 
735 	if (dom->sd_4level) {
736 		while (dom->sd_vp.l0 == NULL) {
737 			dom->sd_vp.l0 = pool_get(&sc->sc_vp_pool,
738 			    PR_WAITOK | PR_ZERO);
739 		}
740 		l0va = (vaddr_t)dom->sd_vp.l0->l0; /* top level is l0 */
741 	} else {
742 		while (dom->sd_vp.l1 == NULL) {
743 			dom->sd_vp.l1 = pool_get(&sc->sc_vp_pool,
744 			    PR_WAITOK | PR_ZERO);
745 		}
746 		l0va = (vaddr_t)dom->sd_vp.l1->l1; /* top level is l1 */
747 	}
748 	pmap_extract(pmap_kernel(), l0va, &pa);
749 
750 	if (dom->sd_stage == 1) {
751 		smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR0,
752 		    (uint64_t)dom->sd_cb_idx << SMMU_CB_TTBR_ASID_SHIFT | pa);
753 		smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR1,
754 		    (uint64_t)dom->sd_cb_idx << SMMU_CB_TTBR_ASID_SHIFT);
755 	} else
756 		smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR0, pa);
757 
758 	if (dom->sd_stage == 1) {
759 		smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_MAIR0,
760 		    SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_nGnRnE, 0) |
761 		    SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_nGnRE, 1) |
762 		    SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_NC, 2) |
763 		    SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_WB, 3));
764 		smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_MAIR1,
765 		    SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_WT, 0));
766 	}
767 
768 	reg = SMMU_CB_SCTLR_M | SMMU_CB_SCTLR_TRE | SMMU_CB_SCTLR_AFE |
769 	    SMMU_CB_SCTLR_CFRE | SMMU_CB_SCTLR_CFIE;
770 	if (dom->sd_stage == 1)
771 		reg |= SMMU_CB_SCTLR_ASIDPNE;
772 	smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_SCTLR, reg);
773 
774 	/* Point stream to context block */
775 	reg = SMMU_S2CR_TYPE_TRANS | dom->sd_cb_idx;
776 	if (sc->sc_has_exids && sc->sc_smr)
777 		reg |= SMMU_S2CR_EXIDVALID;
778 	smmu_gr0_write_4(sc, SMMU_S2CR(dom->sd_smr_idx), reg);
779 
780 	/* Map stream idx to S2CR idx */
781 	if (sc->sc_smr) {
782 		reg = sid;
783 		if (!sc->sc_has_exids)
784 			reg |= SMMU_SMR_VALID;
785 		smmu_gr0_write_4(sc, SMMU_SMR(dom->sd_smr_idx), reg);
786 	}
787 
788 	snprintf(dom->sd_exname, sizeof(dom->sd_exname), "%s:%x",
789 	    sc->sc_dev.dv_xname, sid);
790 	dom->sd_iovamap = extent_create(dom->sd_exname, 0,
791 	    (1LL << iovabits) - 1, M_DEVBUF, NULL, 0, EX_WAITOK |
792 	    EX_NOCOALESCE);
793 
794 	/* Reserve first page (to catch NULL access) */
795 	extent_alloc_region(dom->sd_iovamap, 0, PAGE_SIZE, EX_WAITOK);
796 
797 	SIMPLEQ_INSERT_TAIL(&sc->sc_domains, dom, sd_list);
798 	return dom;
799 }
800 
801 void
smmu_reserve_region(void * cookie,uint32_t sid,bus_addr_t addr,bus_size_t size)802 smmu_reserve_region(void *cookie, uint32_t sid, bus_addr_t addr,
803     bus_size_t size)
804 {
805 	struct smmu_softc *sc = cookie;
806 	struct smmu_domain *dom;
807 
808 	dom = smmu_domain_lookup(sc, sid);
809 	if (dom == NULL)
810 		return;
811 
812 	extent_alloc_region(dom->sd_iovamap, addr, size,
813 	    EX_WAITOK | EX_CONFLICTOK);
814 }
815 
816 /* basically pmap follows */
817 
818 /* virtual to physical helpers */
819 static inline int
VP_IDX0(vaddr_t va)820 VP_IDX0(vaddr_t va)
821 {
822 	return (va >> VP_IDX0_POS) & VP_IDX0_MASK;
823 }
824 
825 static inline int
VP_IDX1(vaddr_t va)826 VP_IDX1(vaddr_t va)
827 {
828 	return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
829 }
830 
831 static inline int
VP_IDX2(vaddr_t va)832 VP_IDX2(vaddr_t va)
833 {
834 	return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
835 }
836 
837 static inline int
VP_IDX3(vaddr_t va)838 VP_IDX3(vaddr_t va)
839 {
840 	return (va >> VP_IDX3_POS) & VP_IDX3_MASK;
841 }
842 
843 static inline uint64_t
VP_Lx(paddr_t pa)844 VP_Lx(paddr_t pa)
845 {
846 	/*
847 	 * This function takes the pa address given and manipulates it
848 	 * into the form that should be inserted into the VM table.
849 	 */
850 	return pa | Lx_TYPE_PT;
851 }
852 
853 void
smmu_set_l1(struct smmu_domain * dom,uint64_t va,struct smmuvp1 * l1_va)854 smmu_set_l1(struct smmu_domain *dom, uint64_t va, struct smmuvp1 *l1_va)
855 {
856 	struct smmu_softc *sc = dom->sd_sc;
857 	uint64_t pg_entry;
858 	paddr_t l1_pa;
859 	int idx0;
860 
861 	if (pmap_extract(pmap_kernel(), (vaddr_t)l1_va, &l1_pa) == 0)
862 		panic("%s: unable to find vp pa mapping %p", __func__, l1_va);
863 
864 	if (l1_pa & (Lx_TABLE_ALIGN-1))
865 		panic("%s: misaligned L2 table", __func__);
866 
867 	pg_entry = VP_Lx(l1_pa);
868 
869 	idx0 = VP_IDX0(va);
870 	dom->sd_vp.l0->vp[idx0] = l1_va;
871 	dom->sd_vp.l0->l0[idx0] = pg_entry;
872 	membar_producer(); /* XXX bus dma sync? */
873 	if (!sc->sc_coherent)
874 		cpu_dcache_wb_range((vaddr_t)&dom->sd_vp.l0->l0[idx0],
875 		    sizeof(dom->sd_vp.l0->l0[idx0]));
876 }
877 
878 void
smmu_set_l2(struct smmu_domain * dom,uint64_t va,struct smmuvp1 * vp1,struct smmuvp2 * l2_va)879 smmu_set_l2(struct smmu_domain *dom, uint64_t va, struct smmuvp1 *vp1,
880     struct smmuvp2 *l2_va)
881 {
882 	struct smmu_softc *sc = dom->sd_sc;
883 	uint64_t pg_entry;
884 	paddr_t l2_pa;
885 	int idx1;
886 
887 	if (pmap_extract(pmap_kernel(), (vaddr_t)l2_va, &l2_pa) == 0)
888 		panic("%s: unable to find vp pa mapping %p", __func__, l2_va);
889 
890 	if (l2_pa & (Lx_TABLE_ALIGN-1))
891 		panic("%s: misaligned L2 table", __func__);
892 
893 	pg_entry = VP_Lx(l2_pa);
894 
895 	idx1 = VP_IDX1(va);
896 	vp1->vp[idx1] = l2_va;
897 	vp1->l1[idx1] = pg_entry;
898 	membar_producer(); /* XXX bus dma sync? */
899 	if (!sc->sc_coherent)
900 		cpu_dcache_wb_range((vaddr_t)&vp1->l1[idx1],
901 		    sizeof(vp1->l1[idx1]));
902 }
903 
904 void
smmu_set_l3(struct smmu_domain * dom,uint64_t va,struct smmuvp2 * vp2,struct smmuvp3 * l3_va)905 smmu_set_l3(struct smmu_domain *dom, uint64_t va, struct smmuvp2 *vp2,
906     struct smmuvp3 *l3_va)
907 {
908 	struct smmu_softc *sc = dom->sd_sc;
909 	uint64_t pg_entry;
910 	paddr_t l3_pa;
911 	int idx2;
912 
913 	if (pmap_extract(pmap_kernel(), (vaddr_t)l3_va, &l3_pa) == 0)
914 		panic("%s: unable to find vp pa mapping %p", __func__, l3_va);
915 
916 	if (l3_pa & (Lx_TABLE_ALIGN-1))
917 		panic("%s: misaligned L2 table", __func__);
918 
919 	pg_entry = VP_Lx(l3_pa);
920 
921 	idx2 = VP_IDX2(va);
922 	vp2->vp[idx2] = l3_va;
923 	vp2->l2[idx2] = pg_entry;
924 	membar_producer(); /* XXX bus dma sync? */
925 	if (!sc->sc_coherent)
926 		cpu_dcache_wb_range((vaddr_t)&vp2->l2[idx2],
927 		    sizeof(vp2->l2[idx2]));
928 }
929 
930 int
smmu_vp_lookup(struct smmu_domain * dom,vaddr_t va,uint64_t ** pl3entry)931 smmu_vp_lookup(struct smmu_domain *dom, vaddr_t va, uint64_t **pl3entry)
932 {
933 	struct smmuvp1 *vp1;
934 	struct smmuvp2 *vp2;
935 	struct smmuvp3 *vp3;
936 
937 	if (dom->sd_4level) {
938 		if (dom->sd_vp.l0 == NULL) {
939 			return ENXIO;
940 		}
941 		vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
942 	} else {
943 		vp1 = dom->sd_vp.l1;
944 	}
945 	if (vp1 == NULL) {
946 		return ENXIO;
947 	}
948 
949 	vp2 = vp1->vp[VP_IDX1(va)];
950 	if (vp2 == NULL) {
951 		return ENXIO;
952 	}
953 
954 	vp3 = vp2->vp[VP_IDX2(va)];
955 	if (vp3 == NULL) {
956 		return ENXIO;
957 	}
958 
959 	if (pl3entry != NULL)
960 		*pl3entry = &(vp3->l3[VP_IDX3(va)]);
961 
962 	return 0;
963 }
964 
965 int
smmu_vp_enter(struct smmu_domain * dom,vaddr_t va,uint64_t ** pl3entry,int flags)966 smmu_vp_enter(struct smmu_domain *dom, vaddr_t va, uint64_t **pl3entry,
967     int flags)
968 {
969 	struct smmu_softc *sc = dom->sd_sc;
970 	struct smmuvp1 *vp1;
971 	struct smmuvp2 *vp2;
972 	struct smmuvp3 *vp3;
973 
974 	if (dom->sd_4level) {
975 		vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
976 		if (vp1 == NULL) {
977 			mtx_enter(&dom->sd_pmap_mtx);
978 			vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
979 			if (vp1 == NULL) {
980 				vp1 = pool_get(&sc->sc_vp_pool,
981 				    PR_NOWAIT | PR_ZERO);
982 				if (vp1 == NULL) {
983 					mtx_leave(&dom->sd_pmap_mtx);
984 					return ENOMEM;
985 				}
986 				smmu_set_l1(dom, va, vp1);
987 			}
988 			mtx_leave(&dom->sd_pmap_mtx);
989 		}
990 	} else {
991 		vp1 = dom->sd_vp.l1;
992 	}
993 
994 	vp2 = vp1->vp[VP_IDX1(va)];
995 	if (vp2 == NULL) {
996 		mtx_enter(&dom->sd_pmap_mtx);
997 		vp2 = vp1->vp[VP_IDX1(va)];
998 		if (vp2 == NULL) {
999 			vp2 = pool_get(&sc->sc_vp_pool, PR_NOWAIT | PR_ZERO);
1000 			if (vp2 == NULL) {
1001 				mtx_leave(&dom->sd_pmap_mtx);
1002 				return ENOMEM;
1003 			}
1004 			smmu_set_l2(dom, va, vp1, vp2);
1005 		}
1006 		mtx_leave(&dom->sd_pmap_mtx);
1007 	}
1008 
1009 	vp3 = vp2->vp[VP_IDX2(va)];
1010 	if (vp3 == NULL) {
1011 		mtx_enter(&dom->sd_pmap_mtx);
1012 		vp3 = vp2->vp[VP_IDX2(va)];
1013 		if (vp3 == NULL) {
1014 			vp3 = pool_get(&sc->sc_vp3_pool, PR_NOWAIT | PR_ZERO);
1015 			if (vp3 == NULL) {
1016 				mtx_leave(&dom->sd_pmap_mtx);
1017 				return ENOMEM;
1018 			}
1019 			smmu_set_l3(dom, va, vp2, vp3);
1020 		}
1021 		mtx_leave(&dom->sd_pmap_mtx);
1022 	}
1023 
1024 	if (pl3entry != NULL)
1025 		*pl3entry = &(vp3->l3[VP_IDX3(va)]);
1026 
1027 	return 0;
1028 }
1029 
1030 uint64_t
smmu_fill_pte(struct smmu_domain * dom,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)1031 smmu_fill_pte(struct smmu_domain *dom, vaddr_t va, paddr_t pa,
1032     vm_prot_t prot, int flags, int cache)
1033 {
1034 	uint64_t pted;
1035 
1036 	pted = pa & PTE_RPGN;
1037 
1038 	switch (cache) {
1039 	case PMAP_CACHE_WB:
1040 		break;
1041 	case PMAP_CACHE_WT:
1042 		break;
1043 	case PMAP_CACHE_CI:
1044 		break;
1045 	case PMAP_CACHE_DEV_NGNRNE:
1046 		break;
1047 	case PMAP_CACHE_DEV_NGNRE:
1048 		break;
1049 	default:
1050 		panic("%s: invalid cache mode", __func__);
1051 	}
1052 
1053 	pted |= cache;
1054 	pted |= flags & (PROT_READ|PROT_WRITE|PROT_EXEC);
1055 	return pted;
1056 }
1057 
1058 void
smmu_pte_update(struct smmu_domain * dom,uint64_t pted,uint64_t * pl3)1059 smmu_pte_update(struct smmu_domain *dom, uint64_t pted, uint64_t *pl3)
1060 {
1061 	struct smmu_softc *sc = dom->sd_sc;
1062 	uint64_t pte, access_bits;
1063 	uint64_t attr = 0;
1064 
1065 	/* see mair in locore.S */
1066 	switch (pted & PMAP_CACHE_BITS) {
1067 	case PMAP_CACHE_WB:
1068 		/* inner and outer writeback */
1069 		if (dom->sd_stage == 1)
1070 			attr |= ATTR_IDX(PTE_ATTR_WB);
1071 		else
1072 			attr |= ATTR_IDX(PTE_MEMATTR_WB);
1073 		attr |= ATTR_SH(SH_INNER);
1074 		break;
1075 	case PMAP_CACHE_WT:
1076 		/* inner and outer writethrough */
1077 		if (dom->sd_stage == 1)
1078 			attr |= ATTR_IDX(PTE_ATTR_WT);
1079 		else
1080 			attr |= ATTR_IDX(PTE_MEMATTR_WT);
1081 		attr |= ATTR_SH(SH_INNER);
1082 		break;
1083 	case PMAP_CACHE_CI:
1084 		if (dom->sd_stage == 1)
1085 			attr |= ATTR_IDX(PTE_ATTR_CI);
1086 		else
1087 			attr |= ATTR_IDX(PTE_MEMATTR_CI);
1088 		attr |= ATTR_SH(SH_INNER);
1089 		break;
1090 	case PMAP_CACHE_DEV_NGNRNE:
1091 		if (dom->sd_stage == 1)
1092 			attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRNE);
1093 		else
1094 			attr |= ATTR_IDX(PTE_MEMATTR_DEV_NGNRNE);
1095 		attr |= ATTR_SH(SH_INNER);
1096 		break;
1097 	case PMAP_CACHE_DEV_NGNRE:
1098 		if (dom->sd_stage == 1)
1099 			attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRE);
1100 		else
1101 			attr |= ATTR_IDX(PTE_MEMATTR_DEV_NGNRE);
1102 		attr |= ATTR_SH(SH_INNER);
1103 		break;
1104 	default:
1105 		panic("%s: invalid cache mode", __func__);
1106 	}
1107 
1108 	access_bits = ATTR_PXN | ATTR_AF;
1109 	if (dom->sd_stage == 1) {
1110 		attr |= ATTR_nG;
1111 		access_bits |= ATTR_AP(1);
1112 		if ((pted & PROT_READ) &&
1113 		    !(pted & PROT_WRITE))
1114 			access_bits |= ATTR_AP(2);
1115 	} else {
1116 		if (pted & PROT_READ)
1117 			access_bits |= ATTR_AP(1);
1118 		if (pted & PROT_WRITE)
1119 			access_bits |= ATTR_AP(2);
1120 	}
1121 
1122 	pte = (pted & PTE_RPGN) | attr | access_bits | L3_P;
1123 	*pl3 = pte;
1124 	membar_producer(); /* XXX bus dma sync? */
1125 	if (!sc->sc_coherent)
1126 		cpu_dcache_wb_range((vaddr_t)pl3, sizeof(*pl3));
1127 }
1128 
1129 void
smmu_pte_remove(struct smmu_domain * dom,vaddr_t va)1130 smmu_pte_remove(struct smmu_domain *dom, vaddr_t va)
1131 {
1132 	/* put entry into table */
1133 	/* need to deal with ref/change here */
1134 	struct smmu_softc *sc = dom->sd_sc;
1135 	struct smmuvp1 *vp1;
1136 	struct smmuvp2 *vp2;
1137 	struct smmuvp3 *vp3;
1138 
1139 	if (dom->sd_4level)
1140 		vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
1141 	else
1142 		vp1 = dom->sd_vp.l1;
1143 	if (vp1 == NULL) {
1144 		panic("%s: missing the l1 for va %lx domain %p", __func__,
1145 		    va, dom);
1146 	}
1147 	vp2 = vp1->vp[VP_IDX1(va)];
1148 	if (vp2 == NULL) {
1149 		panic("%s: missing the l2 for va %lx domain %p", __func__,
1150 		    va, dom);
1151 	}
1152 	vp3 = vp2->vp[VP_IDX2(va)];
1153 	if (vp3 == NULL) {
1154 		panic("%s: missing the l3 for va %lx domain %p", __func__,
1155 		    va, dom);
1156 	}
1157 	vp3->l3[VP_IDX3(va)] = 0;
1158 	membar_producer(); /* XXX bus dma sync? */
1159 	if (!sc->sc_coherent)
1160 		cpu_dcache_wb_range((vaddr_t)&vp3->l3[VP_IDX3(va)],
1161 		    sizeof(vp3->l3[VP_IDX3(va)]));
1162 }
1163 
1164 int
smmu_enter(struct smmu_domain * dom,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)1165 smmu_enter(struct smmu_domain *dom, vaddr_t va, paddr_t pa, vm_prot_t prot,
1166     int flags, int cache)
1167 {
1168 	uint64_t *pl3;
1169 
1170 	if (smmu_vp_lookup(dom, va, &pl3) != 0) {
1171 		if (smmu_vp_enter(dom, va, &pl3, flags))
1172 			return ENOMEM;
1173 	}
1174 
1175 	if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC))
1176 		smmu_map(dom, va, pa, prot, flags, cache);
1177 
1178 	return 0;
1179 }
1180 
1181 void
smmu_map(struct smmu_domain * dom,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)1182 smmu_map(struct smmu_domain *dom, vaddr_t va, paddr_t pa, vm_prot_t prot,
1183     int flags, int cache)
1184 {
1185 	uint64_t *pl3;
1186 	uint64_t pted;
1187 	int ret;
1188 
1189 	/* IOVA must already be allocated */
1190 	ret = smmu_vp_lookup(dom, va, &pl3);
1191 	KASSERT(ret == 0);
1192 
1193 	/* Update PTED information for physical address */
1194 	pted = smmu_fill_pte(dom, va, pa, prot, flags, cache);
1195 
1196 	/* Insert updated information */
1197 	smmu_pte_update(dom, pted, pl3);
1198 }
1199 
1200 void
smmu_unmap(struct smmu_domain * dom,vaddr_t va)1201 smmu_unmap(struct smmu_domain *dom, vaddr_t va)
1202 {
1203 	struct smmu_softc *sc = dom->sd_sc;
1204 	int ret;
1205 
1206 	/* IOVA must already be allocated */
1207 	ret = smmu_vp_lookup(dom, va, NULL);
1208 	KASSERT(ret == 0);
1209 
1210 	/* Remove mapping from pagetable */
1211 	smmu_pte_remove(dom, va);
1212 
1213 	/* Invalidate IOTLB */
1214 	if (dom->sd_stage == 1)
1215 		smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TLBIVAL,
1216 		    (uint64_t)dom->sd_cb_idx << 48 | va >> PAGE_SHIFT);
1217 	else
1218 		smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TLBIIPAS2L,
1219 		    va >> PAGE_SHIFT);
1220 }
1221 
1222 void
smmu_remove(struct smmu_domain * dom,vaddr_t va)1223 smmu_remove(struct smmu_domain *dom, vaddr_t va)
1224 {
1225 	/* TODO: garbage collect page tables? */
1226 }
1227 
1228 int
smmu_load_map(struct smmu_domain * dom,bus_dmamap_t map)1229 smmu_load_map(struct smmu_domain *dom, bus_dmamap_t map)
1230 {
1231 	struct smmu_map_state *sms = map->_dm_cookie;
1232 	u_long dva, maplen;
1233 	int seg;
1234 
1235 	maplen = 0;
1236 	for (seg = 0; seg < map->dm_nsegs; seg++) {
1237 		paddr_t pa = map->dm_segs[seg]._ds_paddr;
1238 		psize_t off = pa - trunc_page(pa);
1239 		maplen += round_page(map->dm_segs[seg].ds_len + off);
1240 	}
1241 	KASSERT(maplen <= sms->sms_len);
1242 
1243 	dva = sms->sms_dva;
1244 	for (seg = 0; seg < map->dm_nsegs; seg++) {
1245 		paddr_t pa = map->dm_segs[seg]._ds_paddr;
1246 		psize_t off = pa - trunc_page(pa);
1247 		u_long len = round_page(map->dm_segs[seg].ds_len + off);
1248 
1249 		map->dm_segs[seg].ds_addr = dva + off;
1250 
1251 		pa = trunc_page(pa);
1252 		while (len > 0) {
1253 			smmu_map(dom, dva, pa,
1254 			    PROT_READ | PROT_WRITE,
1255 			    PROT_READ | PROT_WRITE, PMAP_CACHE_WB);
1256 
1257 			dva += PAGE_SIZE;
1258 			pa += PAGE_SIZE;
1259 			len -= PAGE_SIZE;
1260 			sms->sms_loaded += PAGE_SIZE;
1261 		}
1262 	}
1263 
1264 	return 0;
1265 }
1266 
1267 void
smmu_unload_map(struct smmu_domain * dom,bus_dmamap_t map)1268 smmu_unload_map(struct smmu_domain *dom, bus_dmamap_t map)
1269 {
1270 	struct smmu_map_state *sms = map->_dm_cookie;
1271 	u_long len, dva;
1272 
1273 	if (sms->sms_loaded == 0)
1274 		return;
1275 
1276 	dva = sms->sms_dva;
1277 	len = sms->sms_loaded;
1278 
1279 	while (len > 0) {
1280 		smmu_unmap(dom, dva);
1281 
1282 		dva += PAGE_SIZE;
1283 		len -= PAGE_SIZE;
1284 	}
1285 
1286 	sms->sms_loaded = 0;
1287 
1288 	smmu_tlb_sync_context(dom);
1289 }
1290 
1291 int
smmu_dmamap_create(bus_dma_tag_t t,bus_size_t size,int nsegments,bus_size_t maxsegsz,bus_size_t boundary,int flags,bus_dmamap_t * dmamap)1292 smmu_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
1293     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap)
1294 {
1295 	struct smmu_domain *dom = t->_cookie;
1296 	struct smmu_softc *sc = dom->sd_sc;
1297 	struct smmu_map_state *sms;
1298 	bus_dmamap_t map;
1299 	u_long dva, len;
1300 	int error;
1301 
1302 	error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size,
1303 	    nsegments, maxsegsz, boundary, flags, &map);
1304 	if (error)
1305 		return error;
1306 
1307 	sms = malloc(sizeof(*sms), M_DEVBUF, (flags & BUS_DMA_NOWAIT) ?
1308 	     (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO));
1309 	if (sms == NULL) {
1310 		sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
1311 		return ENOMEM;
1312 	}
1313 
1314 	/* Approximation of maximum pages needed. */
1315 	len = round_page(size) + nsegments * PAGE_SIZE;
1316 
1317 	/* Allocate IOVA, and a guard page at the end. */
1318 	mtx_enter(&dom->sd_iova_mtx);
1319 	error = extent_alloc_with_descr(dom->sd_iovamap, len + PAGE_SIZE,
1320 	    PAGE_SIZE, 0, 0, EX_NOWAIT, &sms->sms_er, &dva);
1321 	mtx_leave(&dom->sd_iova_mtx);
1322 	if (error) {
1323 		sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
1324 		free(sms, M_DEVBUF, sizeof(*sms));
1325 		return error;
1326 	}
1327 
1328 	sms->sms_dva = dva;
1329 	sms->sms_len = len;
1330 
1331 	while (len > 0) {
1332 		error = smmu_enter(dom, dva, dva, PROT_READ | PROT_WRITE,
1333 		    PROT_NONE, PMAP_CACHE_WB);
1334 		KASSERT(error == 0); /* FIXME: rollback smmu_enter() */
1335 		dva += PAGE_SIZE;
1336 		len -= PAGE_SIZE;
1337 	}
1338 
1339 	map->_dm_cookie = sms;
1340 	*dmamap = map;
1341 	return 0;
1342 }
1343 
1344 void
smmu_dmamap_destroy(bus_dma_tag_t t,bus_dmamap_t map)1345 smmu_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
1346 {
1347 	struct smmu_domain *dom = t->_cookie;
1348 	struct smmu_softc *sc = dom->sd_sc;
1349 	struct smmu_map_state *sms = map->_dm_cookie;
1350 	u_long dva, len;
1351 	int error;
1352 
1353 	if (sms->sms_loaded)
1354 		smmu_dmamap_unload(t, map);
1355 
1356 	dva = sms->sms_dva;
1357 	len = sms->sms_len;
1358 
1359 	while (len > 0) {
1360 		smmu_remove(dom, dva);
1361 		dva += PAGE_SIZE;
1362 		len -= PAGE_SIZE;
1363 	}
1364 
1365 	mtx_enter(&dom->sd_iova_mtx);
1366 	error = extent_free(dom->sd_iovamap, sms->sms_dva,
1367 	    sms->sms_len + PAGE_SIZE, EX_NOWAIT);
1368 	mtx_leave(&dom->sd_iova_mtx);
1369 	KASSERT(error == 0);
1370 
1371 	free(sms, M_DEVBUF, sizeof(*sms));
1372 	sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
1373 }
1374 
1375 int
smmu_dmamap_load(bus_dma_tag_t t,bus_dmamap_t map,void * buf,bus_size_t buflen,struct proc * p,int flags)1376 smmu_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
1377     bus_size_t buflen, struct proc *p, int flags)
1378 {
1379 	struct smmu_domain *dom = t->_cookie;
1380 	struct smmu_softc *sc = dom->sd_sc;
1381 	int error;
1382 
1383 	error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map,
1384 	    buf, buflen, p, flags);
1385 	if (error)
1386 		return error;
1387 
1388 	error = smmu_load_map(dom, map);
1389 	if (error)
1390 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1391 
1392 	return error;
1393 }
1394 
1395 int
smmu_dmamap_load_mbuf(bus_dma_tag_t t,bus_dmamap_t map,struct mbuf * m0,int flags)1396 smmu_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
1397     int flags)
1398 {
1399 	struct smmu_domain *dom = t->_cookie;
1400 	struct smmu_softc *sc = dom->sd_sc;
1401 	int error;
1402 
1403 	error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map,
1404 	    m0, flags);
1405 	if (error)
1406 		return error;
1407 
1408 	error = smmu_load_map(dom, map);
1409 	if (error)
1410 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1411 
1412 	return error;
1413 }
1414 
1415 int
smmu_dmamap_load_uio(bus_dma_tag_t t,bus_dmamap_t map,struct uio * uio,int flags)1416 smmu_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio,
1417     int flags)
1418 {
1419 	struct smmu_domain *dom = t->_cookie;
1420 	struct smmu_softc *sc = dom->sd_sc;
1421 	int error;
1422 
1423 	error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map,
1424 	    uio, flags);
1425 	if (error)
1426 		return error;
1427 
1428 	error = smmu_load_map(dom, map);
1429 	if (error)
1430 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1431 
1432 	return error;
1433 }
1434 
1435 int
smmu_dmamap_load_raw(bus_dma_tag_t t,bus_dmamap_t map,bus_dma_segment_t * segs,int nsegs,bus_size_t size,int flags)1436 smmu_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, bus_dma_segment_t *segs,
1437     int nsegs, bus_size_t size, int flags)
1438 {
1439 	struct smmu_domain *dom = t->_cookie;
1440 	struct smmu_softc *sc = dom->sd_sc;
1441 	int error;
1442 
1443 	error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map,
1444 	    segs, nsegs, size, flags);
1445 	if (error)
1446 		return error;
1447 
1448 	error = smmu_load_map(dom, map);
1449 	if (error)
1450 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1451 
1452 	return error;
1453 }
1454 
1455 void
smmu_dmamap_unload(bus_dma_tag_t t,bus_dmamap_t map)1456 smmu_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
1457 {
1458 	struct smmu_domain *dom = t->_cookie;
1459 	struct smmu_softc *sc = dom->sd_sc;
1460 
1461 	smmu_unload_map(dom, map);
1462 	sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1463 }
1464