1 /* $OpenBSD: smmu.c,v 1.21 2022/09/11 10:28:56 patrick Exp $ */
2 /*
3 * Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com>
4 * Copyright (c) 2021 Patrick Wildt <patrick@blueri.se>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/device.h>
22 #include <sys/pool.h>
23 #include <sys/atomic.h>
24
25 #include <machine/bus.h>
26 #include <machine/cpufunc.h>
27
28 #include <uvm/uvm_extern.h>
29 #include <arm64/vmparam.h>
30 #include <arm64/pmap.h>
31
32 #include <dev/pci/pcivar.h>
33 #include <arm64/dev/smmuvar.h>
34 #include <arm64/dev/smmureg.h>
35
36 struct smmu_map_state {
37 struct extent_region sms_er;
38 bus_addr_t sms_dva;
39 bus_size_t sms_len;
40 bus_size_t sms_loaded;
41 };
42
43 struct smmuvp0 {
44 uint64_t l0[VP_IDX0_CNT];
45 struct smmuvp1 *vp[VP_IDX0_CNT];
46 };
47
48 struct smmuvp1 {
49 uint64_t l1[VP_IDX1_CNT];
50 struct smmuvp2 *vp[VP_IDX1_CNT];
51 };
52
53 struct smmuvp2 {
54 uint64_t l2[VP_IDX2_CNT];
55 struct smmuvp3 *vp[VP_IDX2_CNT];
56 };
57
58 struct smmuvp3 {
59 uint64_t l3[VP_IDX3_CNT];
60 };
61
62 CTASSERT(sizeof(struct smmuvp0) == sizeof(struct smmuvp1));
63 CTASSERT(sizeof(struct smmuvp0) == sizeof(struct smmuvp2));
64 CTASSERT(sizeof(struct smmuvp0) != sizeof(struct smmuvp3));
65
66 uint32_t smmu_gr0_read_4(struct smmu_softc *, bus_size_t);
67 void smmu_gr0_write_4(struct smmu_softc *, bus_size_t, uint32_t);
68 uint32_t smmu_gr1_read_4(struct smmu_softc *, bus_size_t);
69 void smmu_gr1_write_4(struct smmu_softc *, bus_size_t, uint32_t);
70 uint32_t smmu_cb_read_4(struct smmu_softc *, int, bus_size_t);
71 void smmu_cb_write_4(struct smmu_softc *, int, bus_size_t, uint32_t);
72 uint64_t smmu_cb_read_8(struct smmu_softc *, int, bus_size_t);
73 void smmu_cb_write_8(struct smmu_softc *, int, bus_size_t, uint64_t);
74
75 void smmu_tlb_sync_global(struct smmu_softc *);
76 void smmu_tlb_sync_context(struct smmu_domain *);
77
78 struct smmu_domain *smmu_domain_lookup(struct smmu_softc *, uint32_t);
79 struct smmu_domain *smmu_domain_create(struct smmu_softc *, uint32_t);
80
81 void smmu_set_l1(struct smmu_domain *, uint64_t, struct smmuvp1 *);
82 void smmu_set_l2(struct smmu_domain *, uint64_t, struct smmuvp1 *,
83 struct smmuvp2 *);
84 void smmu_set_l3(struct smmu_domain *, uint64_t, struct smmuvp2 *,
85 struct smmuvp3 *);
86
87 int smmu_vp_lookup(struct smmu_domain *, vaddr_t, uint64_t **);
88 int smmu_vp_enter(struct smmu_domain *, vaddr_t, uint64_t **, int);
89
90 uint64_t smmu_fill_pte(struct smmu_domain *, vaddr_t, paddr_t,
91 vm_prot_t, int, int);
92 void smmu_pte_update(struct smmu_domain *, uint64_t, uint64_t *);
93 void smmu_pte_remove(struct smmu_domain *, vaddr_t);
94
95 int smmu_enter(struct smmu_domain *, vaddr_t, paddr_t, vm_prot_t, int, int);
96 void smmu_map(struct smmu_domain *, vaddr_t, paddr_t, vm_prot_t, int, int);
97 void smmu_unmap(struct smmu_domain *, vaddr_t);
98 void smmu_remove(struct smmu_domain *, vaddr_t);
99
100 int smmu_load_map(struct smmu_domain *, bus_dmamap_t);
101 void smmu_unload_map(struct smmu_domain *, bus_dmamap_t);
102
103 int smmu_dmamap_create(bus_dma_tag_t , bus_size_t, int,
104 bus_size_t, bus_size_t, int, bus_dmamap_t *);
105 void smmu_dmamap_destroy(bus_dma_tag_t , bus_dmamap_t);
106 int smmu_dmamap_load(bus_dma_tag_t , bus_dmamap_t, void *,
107 bus_size_t, struct proc *, int);
108 int smmu_dmamap_load_mbuf(bus_dma_tag_t , bus_dmamap_t,
109 struct mbuf *, int);
110 int smmu_dmamap_load_uio(bus_dma_tag_t , bus_dmamap_t,
111 struct uio *, int);
112 int smmu_dmamap_load_raw(bus_dma_tag_t , bus_dmamap_t,
113 bus_dma_segment_t *, int, bus_size_t, int);
114 void smmu_dmamap_unload(bus_dma_tag_t , bus_dmamap_t);
115
116 struct cfdriver smmu_cd = {
117 NULL, "smmu", DV_DULL
118 };
119
120 int
smmu_attach(struct smmu_softc * sc)121 smmu_attach(struct smmu_softc *sc)
122 {
123 uint32_t reg;
124 int i;
125
126 SIMPLEQ_INIT(&sc->sc_domains);
127
128 pool_init(&sc->sc_vp_pool, sizeof(struct smmuvp0), PAGE_SIZE, IPL_VM, 0,
129 "smmu_vp", NULL);
130 pool_setlowat(&sc->sc_vp_pool, 20);
131 pool_init(&sc->sc_vp3_pool, sizeof(struct smmuvp3), PAGE_SIZE, IPL_VM, 0,
132 "smmu_vp3", NULL);
133 pool_setlowat(&sc->sc_vp3_pool, 20);
134
135 reg = smmu_gr0_read_4(sc, SMMU_IDR0);
136 if (reg & SMMU_IDR0_S1TS)
137 sc->sc_has_s1 = 1;
138 /*
139 * Marvell's 8040 does not support 64-bit writes, hence it
140 * is not possible to invalidate stage-2, because the ASID
141 * is part of the upper 32-bits and they'd be ignored.
142 */
143 if (sc->sc_is_ap806)
144 sc->sc_has_s1 = 0;
145 if (reg & SMMU_IDR0_S2TS)
146 sc->sc_has_s2 = 1;
147 if (!sc->sc_has_s1 && !sc->sc_has_s2)
148 return 1;
149 if (reg & SMMU_IDR0_EXIDS)
150 sc->sc_has_exids = 1;
151
152 sc->sc_num_streams = 1 << SMMU_IDR0_NUMSIDB(reg);
153 if (sc->sc_has_exids)
154 sc->sc_num_streams = 1 << 16;
155 sc->sc_stream_mask = sc->sc_num_streams - 1;
156 if (reg & SMMU_IDR0_SMS) {
157 sc->sc_num_streams = SMMU_IDR0_NUMSMRG(reg);
158 if (sc->sc_num_streams == 0)
159 return 1;
160 sc->sc_smr = mallocarray(sc->sc_num_streams,
161 sizeof(*sc->sc_smr), M_DEVBUF, M_WAITOK | M_ZERO);
162 }
163
164 reg = smmu_gr0_read_4(sc, SMMU_IDR1);
165 sc->sc_pagesize = 4 * 1024;
166 if (reg & SMMU_IDR1_PAGESIZE_64K)
167 sc->sc_pagesize = 64 * 1024;
168 sc->sc_numpage = 1 << (SMMU_IDR1_NUMPAGENDXB(reg) + 1);
169
170 /* 0 to NUMS2CB == stage-2, NUMS2CB to NUMCB == stage-1 */
171 sc->sc_num_context_banks = SMMU_IDR1_NUMCB(reg);
172 sc->sc_num_s2_context_banks = SMMU_IDR1_NUMS2CB(reg);
173 if (sc->sc_num_s2_context_banks > sc->sc_num_context_banks)
174 return 1;
175 sc->sc_cb = mallocarray(sc->sc_num_context_banks,
176 sizeof(*sc->sc_cb), M_DEVBUF, M_WAITOK | M_ZERO);
177
178 reg = smmu_gr0_read_4(sc, SMMU_IDR2);
179 if (reg & SMMU_IDR2_VMID16S)
180 sc->sc_has_vmid16s = 1;
181
182 switch (SMMU_IDR2_IAS(reg)) {
183 case SMMU_IDR2_IAS_32BIT:
184 sc->sc_ipa_bits = 32;
185 break;
186 case SMMU_IDR2_IAS_36BIT:
187 sc->sc_ipa_bits = 36;
188 break;
189 case SMMU_IDR2_IAS_40BIT:
190 sc->sc_ipa_bits = 40;
191 break;
192 case SMMU_IDR2_IAS_42BIT:
193 sc->sc_ipa_bits = 42;
194 break;
195 case SMMU_IDR2_IAS_44BIT:
196 sc->sc_ipa_bits = 44;
197 break;
198 case SMMU_IDR2_IAS_48BIT:
199 default:
200 sc->sc_ipa_bits = 48;
201 break;
202 }
203 switch (SMMU_IDR2_OAS(reg)) {
204 case SMMU_IDR2_OAS_32BIT:
205 sc->sc_pa_bits = 32;
206 break;
207 case SMMU_IDR2_OAS_36BIT:
208 sc->sc_pa_bits = 36;
209 break;
210 case SMMU_IDR2_OAS_40BIT:
211 sc->sc_pa_bits = 40;
212 break;
213 case SMMU_IDR2_OAS_42BIT:
214 sc->sc_pa_bits = 42;
215 break;
216 case SMMU_IDR2_OAS_44BIT:
217 sc->sc_pa_bits = 44;
218 break;
219 case SMMU_IDR2_OAS_48BIT:
220 default:
221 sc->sc_pa_bits = 48;
222 break;
223 }
224 switch (SMMU_IDR2_UBS(reg)) {
225 case SMMU_IDR2_UBS_32BIT:
226 sc->sc_va_bits = 32;
227 break;
228 case SMMU_IDR2_UBS_36BIT:
229 sc->sc_va_bits = 36;
230 break;
231 case SMMU_IDR2_UBS_40BIT:
232 sc->sc_va_bits = 40;
233 break;
234 case SMMU_IDR2_UBS_42BIT:
235 sc->sc_va_bits = 42;
236 break;
237 case SMMU_IDR2_UBS_44BIT:
238 sc->sc_va_bits = 44;
239 break;
240 case SMMU_IDR2_UBS_49BIT:
241 default:
242 sc->sc_va_bits = 48;
243 break;
244 }
245
246 printf(": %u CBs (%u S2-only)",
247 sc->sc_num_context_banks, sc->sc_num_s2_context_banks);
248 if (sc->sc_is_qcom) {
249 /*
250 * In theory we should check if bypass quirk is needed by
251 * modifying S2CR and re-checking if the value is different.
252 * This does not work on the last S2CR, but on the first,
253 * which is in use. Revisit this once we have other QCOM HW.
254 */
255 sc->sc_bypass_quirk = 1;
256 printf(", bypass quirk");
257 /*
258 * Create special context that is turned off. This allows us
259 * to map a stream to a context bank where translation is not
260 * happening, and hence bypassed.
261 */
262 sc->sc_cb[sc->sc_num_context_banks - 1] =
263 malloc(sizeof(struct smmu_cb), M_DEVBUF, M_WAITOK | M_ZERO);
264 smmu_cb_write_4(sc, sc->sc_num_context_banks - 1,
265 SMMU_CB_SCTLR, 0);
266 smmu_gr1_write_4(sc, SMMU_CBAR(sc->sc_num_context_banks - 1),
267 SMMU_CBAR_TYPE_S1_TRANS_S2_BYPASS);
268 }
269 printf("\n");
270
271 /* Clear Global Fault Status Register */
272 smmu_gr0_write_4(sc, SMMU_SGFSR, smmu_gr0_read_4(sc, SMMU_SGFSR));
273
274 for (i = 0; i < sc->sc_num_streams; i++) {
275 /* On QCOM HW we need to keep current streams running. */
276 if (sc->sc_is_qcom && sc->sc_smr &&
277 smmu_gr0_read_4(sc, SMMU_SMR(i)) & SMMU_SMR_VALID) {
278 reg = smmu_gr0_read_4(sc, SMMU_SMR(i));
279 sc->sc_smr[i] = malloc(sizeof(struct smmu_smr),
280 M_DEVBUF, M_WAITOK | M_ZERO);
281 sc->sc_smr[i]->ss_id = (reg >> SMMU_SMR_ID_SHIFT) &
282 SMMU_SMR_ID_MASK;
283 sc->sc_smr[i]->ss_mask = (reg >> SMMU_SMR_MASK_SHIFT) &
284 SMMU_SMR_MASK_MASK;
285 if (sc->sc_bypass_quirk) {
286 smmu_gr0_write_4(sc, SMMU_S2CR(i),
287 SMMU_S2CR_TYPE_TRANS |
288 sc->sc_num_context_banks - 1);
289 } else {
290 smmu_gr0_write_4(sc, SMMU_S2CR(i),
291 SMMU_S2CR_TYPE_BYPASS | 0xff);
292 }
293 continue;
294 }
295 #if 1
296 /* Setup all streams to fault by default */
297 smmu_gr0_write_4(sc, SMMU_S2CR(i), SMMU_S2CR_TYPE_FAULT);
298 #else
299 /* For stream indexing, USFCFG bypass isn't enough! */
300 smmu_gr0_write_4(sc, SMMU_S2CR(i), SMMU_S2CR_TYPE_BYPASS);
301 #endif
302 /* Disable all stream map registers */
303 if (sc->sc_smr)
304 smmu_gr0_write_4(sc, SMMU_SMR(i), 0);
305 }
306
307 for (i = 0; i < sc->sc_num_context_banks; i++) {
308 /* Disable Context Bank */
309 smmu_cb_write_4(sc, i, SMMU_CB_SCTLR, 0);
310 /* Clear Context Bank Fault Status Register */
311 smmu_cb_write_4(sc, i, SMMU_CB_FSR, SMMU_CB_FSR_MASK);
312 }
313
314 /* Invalidate TLB */
315 smmu_gr0_write_4(sc, SMMU_TLBIALLH, ~0);
316 smmu_gr0_write_4(sc, SMMU_TLBIALLNSNH, ~0);
317
318 if (sc->sc_is_mmu500) {
319 reg = smmu_gr0_read_4(sc, SMMU_SACR);
320 if (SMMU_IDR7_MAJOR(smmu_gr0_read_4(sc, SMMU_IDR7)) >= 2)
321 reg &= ~SMMU_SACR_MMU500_CACHE_LOCK;
322 reg |= SMMU_SACR_MMU500_SMTNMB_TLBEN |
323 SMMU_SACR_MMU500_S2CRB_TLBEN;
324 smmu_gr0_write_4(sc, SMMU_SACR, reg);
325 for (i = 0; i < sc->sc_num_context_banks; i++) {
326 reg = smmu_cb_read_4(sc, i, SMMU_CB_ACTLR);
327 reg &= ~SMMU_CB_ACTLR_CPRE;
328 smmu_cb_write_4(sc, i, SMMU_CB_ACTLR, reg);
329 }
330 }
331
332 /* Enable SMMU */
333 reg = smmu_gr0_read_4(sc, SMMU_SCR0);
334 reg &= ~(SMMU_SCR0_CLIENTPD |
335 SMMU_SCR0_FB | SMMU_SCR0_BSU_MASK);
336 #if 1
337 /* Disable bypass for unknown streams */
338 reg |= SMMU_SCR0_USFCFG;
339 #else
340 /* Enable bypass for unknown streams */
341 reg &= ~SMMU_SCR0_USFCFG;
342 #endif
343 reg |= SMMU_SCR0_GFRE | SMMU_SCR0_GFIE |
344 SMMU_SCR0_GCFGFRE | SMMU_SCR0_GCFGFIE |
345 SMMU_SCR0_VMIDPNE | SMMU_SCR0_PTM;
346 if (sc->sc_has_exids)
347 reg |= SMMU_SCR0_EXIDENABLE;
348 if (sc->sc_has_vmid16s)
349 reg |= SMMU_SCR0_VMID16EN;
350
351 smmu_tlb_sync_global(sc);
352 smmu_gr0_write_4(sc, SMMU_SCR0, reg);
353
354 return 0;
355 }
356
357 int
smmu_global_irq(void * cookie)358 smmu_global_irq(void *cookie)
359 {
360 struct smmu_softc *sc = cookie;
361 uint32_t reg;
362
363 reg = smmu_gr0_read_4(sc, SMMU_SGFSR);
364 if (reg == 0)
365 return 0;
366
367 printf("%s: SGFSR 0x%08x SGFSYNR0 0x%08x SGFSYNR1 0x%08x "
368 "SGFSYNR2 0x%08x\n", sc->sc_dev.dv_xname, reg,
369 smmu_gr0_read_4(sc, SMMU_SGFSYNR0),
370 smmu_gr0_read_4(sc, SMMU_SGFSYNR1),
371 smmu_gr0_read_4(sc, SMMU_SGFSYNR2));
372
373 smmu_gr0_write_4(sc, SMMU_SGFSR, reg);
374
375 return 1;
376 }
377
378 int
smmu_context_irq(void * cookie)379 smmu_context_irq(void *cookie)
380 {
381 struct smmu_cb_irq *cbi = cookie;
382 struct smmu_softc *sc = cbi->cbi_sc;
383 uint32_t reg;
384
385 reg = smmu_cb_read_4(sc, cbi->cbi_idx, SMMU_CB_FSR);
386 if ((reg & SMMU_CB_FSR_MASK) == 0)
387 return 0;
388
389 printf("%s: FSR 0x%08x FSYNR0 0x%08x FAR 0x%llx "
390 "CBFRSYNRA 0x%08x\n", sc->sc_dev.dv_xname, reg,
391 smmu_cb_read_4(sc, cbi->cbi_idx, SMMU_CB_FSYNR0),
392 smmu_cb_read_8(sc, cbi->cbi_idx, SMMU_CB_FAR),
393 smmu_gr1_read_4(sc, SMMU_CBFRSYNRA(cbi->cbi_idx)));
394
395 smmu_cb_write_4(sc, cbi->cbi_idx, SMMU_CB_FSR, reg);
396
397 return 1;
398 }
399
400 void
smmu_tlb_sync_global(struct smmu_softc * sc)401 smmu_tlb_sync_global(struct smmu_softc *sc)
402 {
403 int i;
404
405 smmu_gr0_write_4(sc, SMMU_STLBGSYNC, ~0);
406 for (i = 1000; i > 0; i--) {
407 if ((smmu_gr0_read_4(sc, SMMU_STLBGSTATUS) &
408 SMMU_STLBGSTATUS_GSACTIVE) == 0)
409 return;
410 }
411
412 printf("%s: global TLB sync timeout\n",
413 sc->sc_dev.dv_xname);
414 }
415
416 void
smmu_tlb_sync_context(struct smmu_domain * dom)417 smmu_tlb_sync_context(struct smmu_domain *dom)
418 {
419 struct smmu_softc *sc = dom->sd_sc;
420 int i;
421
422 smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TLBSYNC, ~0);
423 for (i = 1000; i > 0; i--) {
424 if ((smmu_cb_read_4(sc, dom->sd_cb_idx, SMMU_CB_TLBSTATUS) &
425 SMMU_CB_TLBSTATUS_SACTIVE) == 0)
426 return;
427 }
428
429 printf("%s: context TLB sync timeout\n",
430 sc->sc_dev.dv_xname);
431 }
432
433 uint32_t
smmu_gr0_read_4(struct smmu_softc * sc,bus_size_t off)434 smmu_gr0_read_4(struct smmu_softc *sc, bus_size_t off)
435 {
436 uint32_t base = 0 * sc->sc_pagesize;
437
438 return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
439 }
440
441 void
smmu_gr0_write_4(struct smmu_softc * sc,bus_size_t off,uint32_t val)442 smmu_gr0_write_4(struct smmu_softc *sc, bus_size_t off, uint32_t val)
443 {
444 uint32_t base = 0 * sc->sc_pagesize;
445
446 bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
447 }
448
449 uint32_t
smmu_gr1_read_4(struct smmu_softc * sc,bus_size_t off)450 smmu_gr1_read_4(struct smmu_softc *sc, bus_size_t off)
451 {
452 uint32_t base = 1 * sc->sc_pagesize;
453
454 return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
455 }
456
457 void
smmu_gr1_write_4(struct smmu_softc * sc,bus_size_t off,uint32_t val)458 smmu_gr1_write_4(struct smmu_softc *sc, bus_size_t off, uint32_t val)
459 {
460 uint32_t base = 1 * sc->sc_pagesize;
461
462 bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
463 }
464
465 uint32_t
smmu_cb_read_4(struct smmu_softc * sc,int idx,bus_size_t off)466 smmu_cb_read_4(struct smmu_softc *sc, int idx, bus_size_t off)
467 {
468 uint32_t base;
469
470 base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
471 base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
472
473 return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
474 }
475
476 void
smmu_cb_write_4(struct smmu_softc * sc,int idx,bus_size_t off,uint32_t val)477 smmu_cb_write_4(struct smmu_softc *sc, int idx, bus_size_t off, uint32_t val)
478 {
479 uint32_t base;
480
481 base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
482 base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
483
484 bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
485 }
486
487 uint64_t
smmu_cb_read_8(struct smmu_softc * sc,int idx,bus_size_t off)488 smmu_cb_read_8(struct smmu_softc *sc, int idx, bus_size_t off)
489 {
490 uint64_t reg;
491 uint32_t base;
492
493 base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
494 base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
495
496 if (sc->sc_is_ap806) {
497 reg = bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off + 4);
498 reg <<= 32;
499 reg |= bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off + 0);
500 return reg;
501 }
502
503 return bus_space_read_8(sc->sc_iot, sc->sc_ioh, base + off);
504 }
505
506 void
smmu_cb_write_8(struct smmu_softc * sc,int idx,bus_size_t off,uint64_t val)507 smmu_cb_write_8(struct smmu_softc *sc, int idx, bus_size_t off, uint64_t val)
508 {
509 uint32_t base;
510
511 base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
512 base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
513
514 if (sc->sc_is_ap806) {
515 bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off + 4,
516 val >> 32);
517 bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off + 0,
518 val & 0xffffffff);
519 return;
520 }
521
522 bus_space_write_8(sc->sc_iot, sc->sc_ioh, base + off, val);
523 }
524
525 bus_dma_tag_t
smmu_device_map(void * cookie,uint32_t sid,bus_dma_tag_t dmat)526 smmu_device_map(void *cookie, uint32_t sid, bus_dma_tag_t dmat)
527 {
528 struct smmu_softc *sc = cookie;
529 struct smmu_domain *dom;
530
531 dom = smmu_domain_lookup(sc, sid);
532 if (dom == NULL)
533 return dmat;
534
535 if (dom->sd_dmat == NULL) {
536 dom->sd_dmat = malloc(sizeof(*dom->sd_dmat),
537 M_DEVBUF, M_WAITOK);
538 memcpy(dom->sd_dmat, sc->sc_dmat,
539 sizeof(*dom->sd_dmat));
540 dom->sd_dmat->_cookie = dom;
541 dom->sd_dmat->_dmamap_create = smmu_dmamap_create;
542 dom->sd_dmat->_dmamap_destroy = smmu_dmamap_destroy;
543 dom->sd_dmat->_dmamap_load = smmu_dmamap_load;
544 dom->sd_dmat->_dmamap_load_mbuf = smmu_dmamap_load_mbuf;
545 dom->sd_dmat->_dmamap_load_uio = smmu_dmamap_load_uio;
546 dom->sd_dmat->_dmamap_load_raw = smmu_dmamap_load_raw;
547 dom->sd_dmat->_dmamap_unload = smmu_dmamap_unload;
548 dom->sd_dmat->_flags |= BUS_DMA_COHERENT;
549 }
550
551 return dom->sd_dmat;
552 }
553
554 struct smmu_domain *
smmu_domain_lookup(struct smmu_softc * sc,uint32_t sid)555 smmu_domain_lookup(struct smmu_softc *sc, uint32_t sid)
556 {
557 struct smmu_domain *dom;
558
559 SIMPLEQ_FOREACH(dom, &sc->sc_domains, sd_list) {
560 if (dom->sd_sid == sid)
561 return dom;
562 }
563
564 return smmu_domain_create(sc, sid);
565 }
566
567 struct smmu_domain *
smmu_domain_create(struct smmu_softc * sc,uint32_t sid)568 smmu_domain_create(struct smmu_softc *sc, uint32_t sid)
569 {
570 struct smmu_domain *dom;
571 uint32_t iovabits, reg;
572 paddr_t pa;
573 vaddr_t l0va;
574 int i, start, end;
575
576 dom = malloc(sizeof(*dom), M_DEVBUF, M_WAITOK | M_ZERO);
577 mtx_init(&dom->sd_iova_mtx, IPL_VM);
578 mtx_init(&dom->sd_pmap_mtx, IPL_VM);
579 dom->sd_sc = sc;
580 dom->sd_sid = sid;
581
582 /* Prefer stage 1 if possible! */
583 if (sc->sc_has_s1) {
584 start = sc->sc_num_s2_context_banks;
585 end = sc->sc_num_context_banks;
586 dom->sd_stage = 1;
587 } else {
588 start = 0;
589 end = sc->sc_num_context_banks;
590 dom->sd_stage = 2;
591 }
592
593 for (i = start; i < end; i++) {
594 if (sc->sc_cb[i] != NULL)
595 continue;
596 sc->sc_cb[i] = malloc(sizeof(struct smmu_cb),
597 M_DEVBUF, M_WAITOK | M_ZERO);
598 dom->sd_cb_idx = i;
599 break;
600 }
601 if (i >= end) {
602 printf("%s: out of context blocks, I/O device will fail\n",
603 sc->sc_dev.dv_xname);
604 free(dom, M_DEVBUF, sizeof(*dom));
605 return NULL;
606 }
607
608 /* Stream indexing is easy */
609 dom->sd_smr_idx = sid;
610
611 /* Stream mapping is a bit more effort */
612 if (sc->sc_smr) {
613 for (i = 0; i < sc->sc_num_streams; i++) {
614 /* Take over QCOM SMRs */
615 if (sc->sc_is_qcom && sc->sc_smr[i] != NULL &&
616 sc->sc_smr[i]->ss_dom == NULL &&
617 sc->sc_smr[i]->ss_id == sid &&
618 sc->sc_smr[i]->ss_mask == 0) {
619 free(sc->sc_smr[i], M_DEVBUF,
620 sizeof(struct smmu_smr));
621 sc->sc_smr[i] = NULL;
622 }
623 if (sc->sc_smr[i] != NULL)
624 continue;
625 sc->sc_smr[i] = malloc(sizeof(struct smmu_smr),
626 M_DEVBUF, M_WAITOK | M_ZERO);
627 sc->sc_smr[i]->ss_dom = dom;
628 sc->sc_smr[i]->ss_id = sid;
629 sc->sc_smr[i]->ss_mask = 0;
630 dom->sd_smr_idx = i;
631 break;
632 }
633
634 if (i >= sc->sc_num_streams) {
635 free(sc->sc_cb[dom->sd_cb_idx], M_DEVBUF,
636 sizeof(struct smmu_cb));
637 sc->sc_cb[dom->sd_cb_idx] = NULL;
638 free(dom, M_DEVBUF, sizeof(*dom));
639 printf("%s: out of streams, I/O device will fail\n",
640 sc->sc_dev.dv_xname);
641 return NULL;
642 }
643 }
644
645 reg = SMMU_CBA2R_VA64;
646 if (sc->sc_has_vmid16s)
647 reg |= (dom->sd_cb_idx + 1) << SMMU_CBA2R_VMID16_SHIFT;
648 smmu_gr1_write_4(sc, SMMU_CBA2R(dom->sd_cb_idx), reg);
649
650 if (dom->sd_stage == 1) {
651 reg = SMMU_CBAR_TYPE_S1_TRANS_S2_BYPASS |
652 SMMU_CBAR_BPSHCFG_NSH | SMMU_CBAR_MEMATTR_WB;
653 } else {
654 reg = SMMU_CBAR_TYPE_S2_TRANS;
655 if (!sc->sc_has_vmid16s)
656 reg |= (dom->sd_cb_idx + 1) << SMMU_CBAR_VMID_SHIFT;
657 }
658 smmu_gr1_write_4(sc, SMMU_CBAR(dom->sd_cb_idx), reg);
659
660 if (dom->sd_stage == 1) {
661 reg = SMMU_CB_TCR2_AS | SMMU_CB_TCR2_SEP_UPSTREAM;
662 switch (sc->sc_ipa_bits) {
663 case 32:
664 reg |= SMMU_CB_TCR2_PASIZE_32BIT;
665 break;
666 case 36:
667 reg |= SMMU_CB_TCR2_PASIZE_36BIT;
668 break;
669 case 40:
670 reg |= SMMU_CB_TCR2_PASIZE_40BIT;
671 break;
672 case 42:
673 reg |= SMMU_CB_TCR2_PASIZE_42BIT;
674 break;
675 case 44:
676 reg |= SMMU_CB_TCR2_PASIZE_44BIT;
677 break;
678 case 48:
679 reg |= SMMU_CB_TCR2_PASIZE_48BIT;
680 break;
681 }
682 smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TCR2, reg);
683 }
684
685 if (dom->sd_stage == 1)
686 iovabits = sc->sc_va_bits;
687 else
688 iovabits = sc->sc_ipa_bits;
689 /*
690 * Marvell's 8040 does not support 64-bit writes, hence we
691 * can only address 44-bits of VA space for TLB invalidation.
692 */
693 if (sc->sc_is_ap806)
694 iovabits = min(44, iovabits);
695 if (iovabits >= 40)
696 dom->sd_4level = 1;
697
698 reg = SMMU_CB_TCR_TG0_4KB | SMMU_CB_TCR_T0SZ(64 - iovabits);
699 if (dom->sd_stage == 1) {
700 reg |= SMMU_CB_TCR_EPD1;
701 } else {
702 if (dom->sd_4level)
703 reg |= SMMU_CB_TCR_S2_SL0_4KB_L0;
704 else
705 reg |= SMMU_CB_TCR_S2_SL0_4KB_L1;
706 switch (sc->sc_pa_bits) {
707 case 32:
708 reg |= SMMU_CB_TCR_S2_PASIZE_32BIT;
709 break;
710 case 36:
711 reg |= SMMU_CB_TCR_S2_PASIZE_36BIT;
712 break;
713 case 40:
714 reg |= SMMU_CB_TCR_S2_PASIZE_40BIT;
715 break;
716 case 42:
717 reg |= SMMU_CB_TCR_S2_PASIZE_42BIT;
718 break;
719 case 44:
720 reg |= SMMU_CB_TCR_S2_PASIZE_44BIT;
721 break;
722 case 48:
723 reg |= SMMU_CB_TCR_S2_PASIZE_48BIT;
724 break;
725 }
726 }
727 if (sc->sc_coherent)
728 reg |= SMMU_CB_TCR_IRGN0_WBWA | SMMU_CB_TCR_ORGN0_WBWA |
729 SMMU_CB_TCR_SH0_ISH;
730 else
731 reg |= SMMU_CB_TCR_IRGN0_NC | SMMU_CB_TCR_ORGN0_NC |
732 SMMU_CB_TCR_SH0_OSH;
733 smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TCR, reg);
734
735 if (dom->sd_4level) {
736 while (dom->sd_vp.l0 == NULL) {
737 dom->sd_vp.l0 = pool_get(&sc->sc_vp_pool,
738 PR_WAITOK | PR_ZERO);
739 }
740 l0va = (vaddr_t)dom->sd_vp.l0->l0; /* top level is l0 */
741 } else {
742 while (dom->sd_vp.l1 == NULL) {
743 dom->sd_vp.l1 = pool_get(&sc->sc_vp_pool,
744 PR_WAITOK | PR_ZERO);
745 }
746 l0va = (vaddr_t)dom->sd_vp.l1->l1; /* top level is l1 */
747 }
748 pmap_extract(pmap_kernel(), l0va, &pa);
749
750 if (dom->sd_stage == 1) {
751 smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR0,
752 (uint64_t)dom->sd_cb_idx << SMMU_CB_TTBR_ASID_SHIFT | pa);
753 smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR1,
754 (uint64_t)dom->sd_cb_idx << SMMU_CB_TTBR_ASID_SHIFT);
755 } else
756 smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR0, pa);
757
758 if (dom->sd_stage == 1) {
759 smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_MAIR0,
760 SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_nGnRnE, 0) |
761 SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_nGnRE, 1) |
762 SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_NC, 2) |
763 SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_WB, 3));
764 smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_MAIR1,
765 SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_WT, 0));
766 }
767
768 reg = SMMU_CB_SCTLR_M | SMMU_CB_SCTLR_TRE | SMMU_CB_SCTLR_AFE |
769 SMMU_CB_SCTLR_CFRE | SMMU_CB_SCTLR_CFIE;
770 if (dom->sd_stage == 1)
771 reg |= SMMU_CB_SCTLR_ASIDPNE;
772 smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_SCTLR, reg);
773
774 /* Point stream to context block */
775 reg = SMMU_S2CR_TYPE_TRANS | dom->sd_cb_idx;
776 if (sc->sc_has_exids && sc->sc_smr)
777 reg |= SMMU_S2CR_EXIDVALID;
778 smmu_gr0_write_4(sc, SMMU_S2CR(dom->sd_smr_idx), reg);
779
780 /* Map stream idx to S2CR idx */
781 if (sc->sc_smr) {
782 reg = sid;
783 if (!sc->sc_has_exids)
784 reg |= SMMU_SMR_VALID;
785 smmu_gr0_write_4(sc, SMMU_SMR(dom->sd_smr_idx), reg);
786 }
787
788 snprintf(dom->sd_exname, sizeof(dom->sd_exname), "%s:%x",
789 sc->sc_dev.dv_xname, sid);
790 dom->sd_iovamap = extent_create(dom->sd_exname, 0,
791 (1LL << iovabits) - 1, M_DEVBUF, NULL, 0, EX_WAITOK |
792 EX_NOCOALESCE);
793
794 /* Reserve first page (to catch NULL access) */
795 extent_alloc_region(dom->sd_iovamap, 0, PAGE_SIZE, EX_WAITOK);
796
797 SIMPLEQ_INSERT_TAIL(&sc->sc_domains, dom, sd_list);
798 return dom;
799 }
800
801 void
smmu_reserve_region(void * cookie,uint32_t sid,bus_addr_t addr,bus_size_t size)802 smmu_reserve_region(void *cookie, uint32_t sid, bus_addr_t addr,
803 bus_size_t size)
804 {
805 struct smmu_softc *sc = cookie;
806 struct smmu_domain *dom;
807
808 dom = smmu_domain_lookup(sc, sid);
809 if (dom == NULL)
810 return;
811
812 extent_alloc_region(dom->sd_iovamap, addr, size,
813 EX_WAITOK | EX_CONFLICTOK);
814 }
815
816 /* basically pmap follows */
817
818 /* virtual to physical helpers */
819 static inline int
VP_IDX0(vaddr_t va)820 VP_IDX0(vaddr_t va)
821 {
822 return (va >> VP_IDX0_POS) & VP_IDX0_MASK;
823 }
824
825 static inline int
VP_IDX1(vaddr_t va)826 VP_IDX1(vaddr_t va)
827 {
828 return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
829 }
830
831 static inline int
VP_IDX2(vaddr_t va)832 VP_IDX2(vaddr_t va)
833 {
834 return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
835 }
836
837 static inline int
VP_IDX3(vaddr_t va)838 VP_IDX3(vaddr_t va)
839 {
840 return (va >> VP_IDX3_POS) & VP_IDX3_MASK;
841 }
842
843 static inline uint64_t
VP_Lx(paddr_t pa)844 VP_Lx(paddr_t pa)
845 {
846 /*
847 * This function takes the pa address given and manipulates it
848 * into the form that should be inserted into the VM table.
849 */
850 return pa | Lx_TYPE_PT;
851 }
852
853 void
smmu_set_l1(struct smmu_domain * dom,uint64_t va,struct smmuvp1 * l1_va)854 smmu_set_l1(struct smmu_domain *dom, uint64_t va, struct smmuvp1 *l1_va)
855 {
856 struct smmu_softc *sc = dom->sd_sc;
857 uint64_t pg_entry;
858 paddr_t l1_pa;
859 int idx0;
860
861 if (pmap_extract(pmap_kernel(), (vaddr_t)l1_va, &l1_pa) == 0)
862 panic("%s: unable to find vp pa mapping %p", __func__, l1_va);
863
864 if (l1_pa & (Lx_TABLE_ALIGN-1))
865 panic("%s: misaligned L2 table", __func__);
866
867 pg_entry = VP_Lx(l1_pa);
868
869 idx0 = VP_IDX0(va);
870 dom->sd_vp.l0->vp[idx0] = l1_va;
871 dom->sd_vp.l0->l0[idx0] = pg_entry;
872 membar_producer(); /* XXX bus dma sync? */
873 if (!sc->sc_coherent)
874 cpu_dcache_wb_range((vaddr_t)&dom->sd_vp.l0->l0[idx0],
875 sizeof(dom->sd_vp.l0->l0[idx0]));
876 }
877
878 void
smmu_set_l2(struct smmu_domain * dom,uint64_t va,struct smmuvp1 * vp1,struct smmuvp2 * l2_va)879 smmu_set_l2(struct smmu_domain *dom, uint64_t va, struct smmuvp1 *vp1,
880 struct smmuvp2 *l2_va)
881 {
882 struct smmu_softc *sc = dom->sd_sc;
883 uint64_t pg_entry;
884 paddr_t l2_pa;
885 int idx1;
886
887 if (pmap_extract(pmap_kernel(), (vaddr_t)l2_va, &l2_pa) == 0)
888 panic("%s: unable to find vp pa mapping %p", __func__, l2_va);
889
890 if (l2_pa & (Lx_TABLE_ALIGN-1))
891 panic("%s: misaligned L2 table", __func__);
892
893 pg_entry = VP_Lx(l2_pa);
894
895 idx1 = VP_IDX1(va);
896 vp1->vp[idx1] = l2_va;
897 vp1->l1[idx1] = pg_entry;
898 membar_producer(); /* XXX bus dma sync? */
899 if (!sc->sc_coherent)
900 cpu_dcache_wb_range((vaddr_t)&vp1->l1[idx1],
901 sizeof(vp1->l1[idx1]));
902 }
903
904 void
smmu_set_l3(struct smmu_domain * dom,uint64_t va,struct smmuvp2 * vp2,struct smmuvp3 * l3_va)905 smmu_set_l3(struct smmu_domain *dom, uint64_t va, struct smmuvp2 *vp2,
906 struct smmuvp3 *l3_va)
907 {
908 struct smmu_softc *sc = dom->sd_sc;
909 uint64_t pg_entry;
910 paddr_t l3_pa;
911 int idx2;
912
913 if (pmap_extract(pmap_kernel(), (vaddr_t)l3_va, &l3_pa) == 0)
914 panic("%s: unable to find vp pa mapping %p", __func__, l3_va);
915
916 if (l3_pa & (Lx_TABLE_ALIGN-1))
917 panic("%s: misaligned L2 table", __func__);
918
919 pg_entry = VP_Lx(l3_pa);
920
921 idx2 = VP_IDX2(va);
922 vp2->vp[idx2] = l3_va;
923 vp2->l2[idx2] = pg_entry;
924 membar_producer(); /* XXX bus dma sync? */
925 if (!sc->sc_coherent)
926 cpu_dcache_wb_range((vaddr_t)&vp2->l2[idx2],
927 sizeof(vp2->l2[idx2]));
928 }
929
930 int
smmu_vp_lookup(struct smmu_domain * dom,vaddr_t va,uint64_t ** pl3entry)931 smmu_vp_lookup(struct smmu_domain *dom, vaddr_t va, uint64_t **pl3entry)
932 {
933 struct smmuvp1 *vp1;
934 struct smmuvp2 *vp2;
935 struct smmuvp3 *vp3;
936
937 if (dom->sd_4level) {
938 if (dom->sd_vp.l0 == NULL) {
939 return ENXIO;
940 }
941 vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
942 } else {
943 vp1 = dom->sd_vp.l1;
944 }
945 if (vp1 == NULL) {
946 return ENXIO;
947 }
948
949 vp2 = vp1->vp[VP_IDX1(va)];
950 if (vp2 == NULL) {
951 return ENXIO;
952 }
953
954 vp3 = vp2->vp[VP_IDX2(va)];
955 if (vp3 == NULL) {
956 return ENXIO;
957 }
958
959 if (pl3entry != NULL)
960 *pl3entry = &(vp3->l3[VP_IDX3(va)]);
961
962 return 0;
963 }
964
965 int
smmu_vp_enter(struct smmu_domain * dom,vaddr_t va,uint64_t ** pl3entry,int flags)966 smmu_vp_enter(struct smmu_domain *dom, vaddr_t va, uint64_t **pl3entry,
967 int flags)
968 {
969 struct smmu_softc *sc = dom->sd_sc;
970 struct smmuvp1 *vp1;
971 struct smmuvp2 *vp2;
972 struct smmuvp3 *vp3;
973
974 if (dom->sd_4level) {
975 vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
976 if (vp1 == NULL) {
977 mtx_enter(&dom->sd_pmap_mtx);
978 vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
979 if (vp1 == NULL) {
980 vp1 = pool_get(&sc->sc_vp_pool,
981 PR_NOWAIT | PR_ZERO);
982 if (vp1 == NULL) {
983 mtx_leave(&dom->sd_pmap_mtx);
984 return ENOMEM;
985 }
986 smmu_set_l1(dom, va, vp1);
987 }
988 mtx_leave(&dom->sd_pmap_mtx);
989 }
990 } else {
991 vp1 = dom->sd_vp.l1;
992 }
993
994 vp2 = vp1->vp[VP_IDX1(va)];
995 if (vp2 == NULL) {
996 mtx_enter(&dom->sd_pmap_mtx);
997 vp2 = vp1->vp[VP_IDX1(va)];
998 if (vp2 == NULL) {
999 vp2 = pool_get(&sc->sc_vp_pool, PR_NOWAIT | PR_ZERO);
1000 if (vp2 == NULL) {
1001 mtx_leave(&dom->sd_pmap_mtx);
1002 return ENOMEM;
1003 }
1004 smmu_set_l2(dom, va, vp1, vp2);
1005 }
1006 mtx_leave(&dom->sd_pmap_mtx);
1007 }
1008
1009 vp3 = vp2->vp[VP_IDX2(va)];
1010 if (vp3 == NULL) {
1011 mtx_enter(&dom->sd_pmap_mtx);
1012 vp3 = vp2->vp[VP_IDX2(va)];
1013 if (vp3 == NULL) {
1014 vp3 = pool_get(&sc->sc_vp3_pool, PR_NOWAIT | PR_ZERO);
1015 if (vp3 == NULL) {
1016 mtx_leave(&dom->sd_pmap_mtx);
1017 return ENOMEM;
1018 }
1019 smmu_set_l3(dom, va, vp2, vp3);
1020 }
1021 mtx_leave(&dom->sd_pmap_mtx);
1022 }
1023
1024 if (pl3entry != NULL)
1025 *pl3entry = &(vp3->l3[VP_IDX3(va)]);
1026
1027 return 0;
1028 }
1029
1030 uint64_t
smmu_fill_pte(struct smmu_domain * dom,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)1031 smmu_fill_pte(struct smmu_domain *dom, vaddr_t va, paddr_t pa,
1032 vm_prot_t prot, int flags, int cache)
1033 {
1034 uint64_t pted;
1035
1036 pted = pa & PTE_RPGN;
1037
1038 switch (cache) {
1039 case PMAP_CACHE_WB:
1040 break;
1041 case PMAP_CACHE_WT:
1042 break;
1043 case PMAP_CACHE_CI:
1044 break;
1045 case PMAP_CACHE_DEV_NGNRNE:
1046 break;
1047 case PMAP_CACHE_DEV_NGNRE:
1048 break;
1049 default:
1050 panic("%s: invalid cache mode", __func__);
1051 }
1052
1053 pted |= cache;
1054 pted |= flags & (PROT_READ|PROT_WRITE|PROT_EXEC);
1055 return pted;
1056 }
1057
1058 void
smmu_pte_update(struct smmu_domain * dom,uint64_t pted,uint64_t * pl3)1059 smmu_pte_update(struct smmu_domain *dom, uint64_t pted, uint64_t *pl3)
1060 {
1061 struct smmu_softc *sc = dom->sd_sc;
1062 uint64_t pte, access_bits;
1063 uint64_t attr = 0;
1064
1065 /* see mair in locore.S */
1066 switch (pted & PMAP_CACHE_BITS) {
1067 case PMAP_CACHE_WB:
1068 /* inner and outer writeback */
1069 if (dom->sd_stage == 1)
1070 attr |= ATTR_IDX(PTE_ATTR_WB);
1071 else
1072 attr |= ATTR_IDX(PTE_MEMATTR_WB);
1073 attr |= ATTR_SH(SH_INNER);
1074 break;
1075 case PMAP_CACHE_WT:
1076 /* inner and outer writethrough */
1077 if (dom->sd_stage == 1)
1078 attr |= ATTR_IDX(PTE_ATTR_WT);
1079 else
1080 attr |= ATTR_IDX(PTE_MEMATTR_WT);
1081 attr |= ATTR_SH(SH_INNER);
1082 break;
1083 case PMAP_CACHE_CI:
1084 if (dom->sd_stage == 1)
1085 attr |= ATTR_IDX(PTE_ATTR_CI);
1086 else
1087 attr |= ATTR_IDX(PTE_MEMATTR_CI);
1088 attr |= ATTR_SH(SH_INNER);
1089 break;
1090 case PMAP_CACHE_DEV_NGNRNE:
1091 if (dom->sd_stage == 1)
1092 attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRNE);
1093 else
1094 attr |= ATTR_IDX(PTE_MEMATTR_DEV_NGNRNE);
1095 attr |= ATTR_SH(SH_INNER);
1096 break;
1097 case PMAP_CACHE_DEV_NGNRE:
1098 if (dom->sd_stage == 1)
1099 attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRE);
1100 else
1101 attr |= ATTR_IDX(PTE_MEMATTR_DEV_NGNRE);
1102 attr |= ATTR_SH(SH_INNER);
1103 break;
1104 default:
1105 panic("%s: invalid cache mode", __func__);
1106 }
1107
1108 access_bits = ATTR_PXN | ATTR_AF;
1109 if (dom->sd_stage == 1) {
1110 attr |= ATTR_nG;
1111 access_bits |= ATTR_AP(1);
1112 if ((pted & PROT_READ) &&
1113 !(pted & PROT_WRITE))
1114 access_bits |= ATTR_AP(2);
1115 } else {
1116 if (pted & PROT_READ)
1117 access_bits |= ATTR_AP(1);
1118 if (pted & PROT_WRITE)
1119 access_bits |= ATTR_AP(2);
1120 }
1121
1122 pte = (pted & PTE_RPGN) | attr | access_bits | L3_P;
1123 *pl3 = pte;
1124 membar_producer(); /* XXX bus dma sync? */
1125 if (!sc->sc_coherent)
1126 cpu_dcache_wb_range((vaddr_t)pl3, sizeof(*pl3));
1127 }
1128
1129 void
smmu_pte_remove(struct smmu_domain * dom,vaddr_t va)1130 smmu_pte_remove(struct smmu_domain *dom, vaddr_t va)
1131 {
1132 /* put entry into table */
1133 /* need to deal with ref/change here */
1134 struct smmu_softc *sc = dom->sd_sc;
1135 struct smmuvp1 *vp1;
1136 struct smmuvp2 *vp2;
1137 struct smmuvp3 *vp3;
1138
1139 if (dom->sd_4level)
1140 vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
1141 else
1142 vp1 = dom->sd_vp.l1;
1143 if (vp1 == NULL) {
1144 panic("%s: missing the l1 for va %lx domain %p", __func__,
1145 va, dom);
1146 }
1147 vp2 = vp1->vp[VP_IDX1(va)];
1148 if (vp2 == NULL) {
1149 panic("%s: missing the l2 for va %lx domain %p", __func__,
1150 va, dom);
1151 }
1152 vp3 = vp2->vp[VP_IDX2(va)];
1153 if (vp3 == NULL) {
1154 panic("%s: missing the l3 for va %lx domain %p", __func__,
1155 va, dom);
1156 }
1157 vp3->l3[VP_IDX3(va)] = 0;
1158 membar_producer(); /* XXX bus dma sync? */
1159 if (!sc->sc_coherent)
1160 cpu_dcache_wb_range((vaddr_t)&vp3->l3[VP_IDX3(va)],
1161 sizeof(vp3->l3[VP_IDX3(va)]));
1162 }
1163
1164 int
smmu_enter(struct smmu_domain * dom,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)1165 smmu_enter(struct smmu_domain *dom, vaddr_t va, paddr_t pa, vm_prot_t prot,
1166 int flags, int cache)
1167 {
1168 uint64_t *pl3;
1169
1170 if (smmu_vp_lookup(dom, va, &pl3) != 0) {
1171 if (smmu_vp_enter(dom, va, &pl3, flags))
1172 return ENOMEM;
1173 }
1174
1175 if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC))
1176 smmu_map(dom, va, pa, prot, flags, cache);
1177
1178 return 0;
1179 }
1180
1181 void
smmu_map(struct smmu_domain * dom,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)1182 smmu_map(struct smmu_domain *dom, vaddr_t va, paddr_t pa, vm_prot_t prot,
1183 int flags, int cache)
1184 {
1185 uint64_t *pl3;
1186 uint64_t pted;
1187 int ret;
1188
1189 /* IOVA must already be allocated */
1190 ret = smmu_vp_lookup(dom, va, &pl3);
1191 KASSERT(ret == 0);
1192
1193 /* Update PTED information for physical address */
1194 pted = smmu_fill_pte(dom, va, pa, prot, flags, cache);
1195
1196 /* Insert updated information */
1197 smmu_pte_update(dom, pted, pl3);
1198 }
1199
1200 void
smmu_unmap(struct smmu_domain * dom,vaddr_t va)1201 smmu_unmap(struct smmu_domain *dom, vaddr_t va)
1202 {
1203 struct smmu_softc *sc = dom->sd_sc;
1204 int ret;
1205
1206 /* IOVA must already be allocated */
1207 ret = smmu_vp_lookup(dom, va, NULL);
1208 KASSERT(ret == 0);
1209
1210 /* Remove mapping from pagetable */
1211 smmu_pte_remove(dom, va);
1212
1213 /* Invalidate IOTLB */
1214 if (dom->sd_stage == 1)
1215 smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TLBIVAL,
1216 (uint64_t)dom->sd_cb_idx << 48 | va >> PAGE_SHIFT);
1217 else
1218 smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TLBIIPAS2L,
1219 va >> PAGE_SHIFT);
1220 }
1221
1222 void
smmu_remove(struct smmu_domain * dom,vaddr_t va)1223 smmu_remove(struct smmu_domain *dom, vaddr_t va)
1224 {
1225 /* TODO: garbage collect page tables? */
1226 }
1227
1228 int
smmu_load_map(struct smmu_domain * dom,bus_dmamap_t map)1229 smmu_load_map(struct smmu_domain *dom, bus_dmamap_t map)
1230 {
1231 struct smmu_map_state *sms = map->_dm_cookie;
1232 u_long dva, maplen;
1233 int seg;
1234
1235 maplen = 0;
1236 for (seg = 0; seg < map->dm_nsegs; seg++) {
1237 paddr_t pa = map->dm_segs[seg]._ds_paddr;
1238 psize_t off = pa - trunc_page(pa);
1239 maplen += round_page(map->dm_segs[seg].ds_len + off);
1240 }
1241 KASSERT(maplen <= sms->sms_len);
1242
1243 dva = sms->sms_dva;
1244 for (seg = 0; seg < map->dm_nsegs; seg++) {
1245 paddr_t pa = map->dm_segs[seg]._ds_paddr;
1246 psize_t off = pa - trunc_page(pa);
1247 u_long len = round_page(map->dm_segs[seg].ds_len + off);
1248
1249 map->dm_segs[seg].ds_addr = dva + off;
1250
1251 pa = trunc_page(pa);
1252 while (len > 0) {
1253 smmu_map(dom, dva, pa,
1254 PROT_READ | PROT_WRITE,
1255 PROT_READ | PROT_WRITE, PMAP_CACHE_WB);
1256
1257 dva += PAGE_SIZE;
1258 pa += PAGE_SIZE;
1259 len -= PAGE_SIZE;
1260 sms->sms_loaded += PAGE_SIZE;
1261 }
1262 }
1263
1264 return 0;
1265 }
1266
1267 void
smmu_unload_map(struct smmu_domain * dom,bus_dmamap_t map)1268 smmu_unload_map(struct smmu_domain *dom, bus_dmamap_t map)
1269 {
1270 struct smmu_map_state *sms = map->_dm_cookie;
1271 u_long len, dva;
1272
1273 if (sms->sms_loaded == 0)
1274 return;
1275
1276 dva = sms->sms_dva;
1277 len = sms->sms_loaded;
1278
1279 while (len > 0) {
1280 smmu_unmap(dom, dva);
1281
1282 dva += PAGE_SIZE;
1283 len -= PAGE_SIZE;
1284 }
1285
1286 sms->sms_loaded = 0;
1287
1288 smmu_tlb_sync_context(dom);
1289 }
1290
1291 int
smmu_dmamap_create(bus_dma_tag_t t,bus_size_t size,int nsegments,bus_size_t maxsegsz,bus_size_t boundary,int flags,bus_dmamap_t * dmamap)1292 smmu_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
1293 bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap)
1294 {
1295 struct smmu_domain *dom = t->_cookie;
1296 struct smmu_softc *sc = dom->sd_sc;
1297 struct smmu_map_state *sms;
1298 bus_dmamap_t map;
1299 u_long dva, len;
1300 int error;
1301
1302 error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size,
1303 nsegments, maxsegsz, boundary, flags, &map);
1304 if (error)
1305 return error;
1306
1307 sms = malloc(sizeof(*sms), M_DEVBUF, (flags & BUS_DMA_NOWAIT) ?
1308 (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO));
1309 if (sms == NULL) {
1310 sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
1311 return ENOMEM;
1312 }
1313
1314 /* Approximation of maximum pages needed. */
1315 len = round_page(size) + nsegments * PAGE_SIZE;
1316
1317 /* Allocate IOVA, and a guard page at the end. */
1318 mtx_enter(&dom->sd_iova_mtx);
1319 error = extent_alloc_with_descr(dom->sd_iovamap, len + PAGE_SIZE,
1320 PAGE_SIZE, 0, 0, EX_NOWAIT, &sms->sms_er, &dva);
1321 mtx_leave(&dom->sd_iova_mtx);
1322 if (error) {
1323 sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
1324 free(sms, M_DEVBUF, sizeof(*sms));
1325 return error;
1326 }
1327
1328 sms->sms_dva = dva;
1329 sms->sms_len = len;
1330
1331 while (len > 0) {
1332 error = smmu_enter(dom, dva, dva, PROT_READ | PROT_WRITE,
1333 PROT_NONE, PMAP_CACHE_WB);
1334 KASSERT(error == 0); /* FIXME: rollback smmu_enter() */
1335 dva += PAGE_SIZE;
1336 len -= PAGE_SIZE;
1337 }
1338
1339 map->_dm_cookie = sms;
1340 *dmamap = map;
1341 return 0;
1342 }
1343
1344 void
smmu_dmamap_destroy(bus_dma_tag_t t,bus_dmamap_t map)1345 smmu_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
1346 {
1347 struct smmu_domain *dom = t->_cookie;
1348 struct smmu_softc *sc = dom->sd_sc;
1349 struct smmu_map_state *sms = map->_dm_cookie;
1350 u_long dva, len;
1351 int error;
1352
1353 if (sms->sms_loaded)
1354 smmu_dmamap_unload(t, map);
1355
1356 dva = sms->sms_dva;
1357 len = sms->sms_len;
1358
1359 while (len > 0) {
1360 smmu_remove(dom, dva);
1361 dva += PAGE_SIZE;
1362 len -= PAGE_SIZE;
1363 }
1364
1365 mtx_enter(&dom->sd_iova_mtx);
1366 error = extent_free(dom->sd_iovamap, sms->sms_dva,
1367 sms->sms_len + PAGE_SIZE, EX_NOWAIT);
1368 mtx_leave(&dom->sd_iova_mtx);
1369 KASSERT(error == 0);
1370
1371 free(sms, M_DEVBUF, sizeof(*sms));
1372 sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
1373 }
1374
1375 int
smmu_dmamap_load(bus_dma_tag_t t,bus_dmamap_t map,void * buf,bus_size_t buflen,struct proc * p,int flags)1376 smmu_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
1377 bus_size_t buflen, struct proc *p, int flags)
1378 {
1379 struct smmu_domain *dom = t->_cookie;
1380 struct smmu_softc *sc = dom->sd_sc;
1381 int error;
1382
1383 error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map,
1384 buf, buflen, p, flags);
1385 if (error)
1386 return error;
1387
1388 error = smmu_load_map(dom, map);
1389 if (error)
1390 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1391
1392 return error;
1393 }
1394
1395 int
smmu_dmamap_load_mbuf(bus_dma_tag_t t,bus_dmamap_t map,struct mbuf * m0,int flags)1396 smmu_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
1397 int flags)
1398 {
1399 struct smmu_domain *dom = t->_cookie;
1400 struct smmu_softc *sc = dom->sd_sc;
1401 int error;
1402
1403 error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map,
1404 m0, flags);
1405 if (error)
1406 return error;
1407
1408 error = smmu_load_map(dom, map);
1409 if (error)
1410 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1411
1412 return error;
1413 }
1414
1415 int
smmu_dmamap_load_uio(bus_dma_tag_t t,bus_dmamap_t map,struct uio * uio,int flags)1416 smmu_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio,
1417 int flags)
1418 {
1419 struct smmu_domain *dom = t->_cookie;
1420 struct smmu_softc *sc = dom->sd_sc;
1421 int error;
1422
1423 error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map,
1424 uio, flags);
1425 if (error)
1426 return error;
1427
1428 error = smmu_load_map(dom, map);
1429 if (error)
1430 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1431
1432 return error;
1433 }
1434
1435 int
smmu_dmamap_load_raw(bus_dma_tag_t t,bus_dmamap_t map,bus_dma_segment_t * segs,int nsegs,bus_size_t size,int flags)1436 smmu_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, bus_dma_segment_t *segs,
1437 int nsegs, bus_size_t size, int flags)
1438 {
1439 struct smmu_domain *dom = t->_cookie;
1440 struct smmu_softc *sc = dom->sd_sc;
1441 int error;
1442
1443 error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map,
1444 segs, nsegs, size, flags);
1445 if (error)
1446 return error;
1447
1448 error = smmu_load_map(dom, map);
1449 if (error)
1450 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1451
1452 return error;
1453 }
1454
1455 void
smmu_dmamap_unload(bus_dma_tag_t t,bus_dmamap_t map)1456 smmu_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
1457 {
1458 struct smmu_domain *dom = t->_cookie;
1459 struct smmu_softc *sc = dom->sd_sc;
1460
1461 smmu_unload_map(dom, map);
1462 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1463 }
1464