1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2012 Oleksandr Tymoshenko <gonzo@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #include <sys/rman.h>
41 #include <sys/sysctl.h>
42 #include <sys/taskqueue.h>
43 
44 #include <machine/bus.h>
45 
46 #include <dev/ofw/ofw_bus.h>
47 #include <dev/ofw/ofw_bus_subr.h>
48 
49 #include <dev/mmc/bridge.h>
50 #include <dev/mmc/mmcreg.h>
51 
52 #include <dev/sdhci/sdhci.h>
53 
54 #include "mmcbr_if.h"
55 #include "sdhci_if.h"
56 
57 #include "opt_mmccam.h"
58 
59 #include "bcm2835_dma.h"
60 #include <arm/broadcom/bcm2835/bcm2835_mbox_prop.h>
61 #ifdef NOTYET
62 #include <arm/broadcom/bcm2835/bcm2835_clkman.h>
63 #endif
64 #include <arm/broadcom/bcm2835/bcm2835_vcbus.h>
65 
66 #define	BCM2835_DEFAULT_SDHCI_FREQ	50
67 #define	BCM2838_DEFAULT_SDHCI_FREQ	100
68 
69 #define	BCM_SDHCI_BUFFER_SIZE		512
70 /*
71  * NUM_DMA_SEGS is the number of DMA segments we want to accommodate on average.
72  * We add in a number of segments based on how much we may need to spill into
73  * another segment due to crossing page boundaries.  e.g. up to PAGE_SIZE, an
74  * extra page is needed as we can cross a page boundary exactly once.
75  */
76 #define	NUM_DMA_SEGS			1
77 #define	NUM_DMA_SPILL_SEGS		\
78 	((((NUM_DMA_SEGS * BCM_SDHCI_BUFFER_SIZE) - 1) / PAGE_SIZE) + 1)
79 #define	ALLOCATED_DMA_SEGS		(NUM_DMA_SEGS +	NUM_DMA_SPILL_SEGS)
80 #define	BCM_DMA_MAXSIZE			(NUM_DMA_SEGS * BCM_SDHCI_BUFFER_SIZE)
81 
82 #define	BCM_SDHCI_SLOT_LEFT(slot)	\
83 	((slot)->curcmd->data->len - (slot)->offset)
84 
85 #define	BCM_SDHCI_SEGSZ_LEFT(slot)	\
86 	min(BCM_DMA_MAXSIZE,		\
87 	    rounddown(BCM_SDHCI_SLOT_LEFT(slot), BCM_SDHCI_BUFFER_SIZE))
88 
89 #define	DATA_PENDING_MASK	(SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL)
90 #define	DATA_XFER_MASK		(DATA_PENDING_MASK | SDHCI_INT_DATA_END)
91 
92 #ifdef DEBUG
93 static int bcm2835_sdhci_debug = 0;
94 
95 TUNABLE_INT("hw.bcm2835.sdhci.debug", &bcm2835_sdhci_debug);
96 SYSCTL_INT(_hw_sdhci, OID_AUTO, bcm2835_sdhci_debug, CTLFLAG_RWTUN,
97     &bcm2835_sdhci_debug, 0, "bcm2835 SDHCI debug level");
98 
99 #define	dprintf(fmt, args...)					\
100 	do {							\
101 		if (bcm2835_sdhci_debug)			\
102 			printf("%s: " fmt, __func__, ##args);	\
103 	}  while (0)
104 #else
105 #define dprintf(fmt, args...)
106 #endif
107 
108 static int bcm2835_sdhci_hs = 1;
109 static int bcm2835_sdhci_pio_mode = 0;
110 
111 struct bcm_mmc_conf {
112 	int	clock_id;
113 	int	clock_src;
114 	int	default_freq;
115 	int	quirks;
116 	int	emmc_dreq;
117 };
118 
119 struct bcm_mmc_conf bcm2835_sdhci_conf = {
120 	.clock_id	= BCM2835_MBOX_CLOCK_ID_EMMC,
121 	.clock_src	= -1,
122 	.default_freq	= BCM2835_DEFAULT_SDHCI_FREQ,
123 	.quirks		= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
124 	    SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_DONT_SET_HISPD_BIT |
125 	    SDHCI_QUIRK_MISSING_CAPS,
126 	.emmc_dreq	= BCM_DMA_DREQ_EMMC,
127 };
128 
129 struct bcm_mmc_conf bcm2838_emmc2_conf = {
130 	.clock_id	= BCM2838_MBOX_CLOCK_ID_EMMC2,
131 	.clock_src	= -1,
132 	.default_freq	= BCM2838_DEFAULT_SDHCI_FREQ,
133 	.quirks		= 0,
134 	.emmc_dreq	= BCM_DMA_DREQ_NONE,
135 };
136 
137 static struct ofw_compat_data compat_data[] = {
138 	{"broadcom,bcm2835-sdhci",	(uintptr_t)&bcm2835_sdhci_conf},
139 	{"brcm,bcm2835-sdhci",		(uintptr_t)&bcm2835_sdhci_conf},
140 	{"brcm,bcm2835-mmc",		(uintptr_t)&bcm2835_sdhci_conf},
141 	{"brcm,bcm2711-emmc2",		(uintptr_t)&bcm2838_emmc2_conf},
142 	{"brcm,bcm2838-emmc2",		(uintptr_t)&bcm2838_emmc2_conf},
143 	{NULL,				0}
144 };
145 
146 TUNABLE_INT("hw.bcm2835.sdhci.hs", &bcm2835_sdhci_hs);
147 TUNABLE_INT("hw.bcm2835.sdhci.pio_mode", &bcm2835_sdhci_pio_mode);
148 
149 struct bcm_sdhci_softc {
150 	device_t		sc_dev;
151 	struct resource *	sc_mem_res;
152 	struct resource *	sc_irq_res;
153 	bus_space_tag_t		sc_bst;
154 	bus_space_handle_t	sc_bsh;
155 	void *			sc_intrhand;
156 	struct mmc_request *	sc_req;
157 	struct sdhci_slot	sc_slot;
158 	int			sc_dma_ch;
159 	bus_dma_tag_t		sc_dma_tag;
160 	bus_dmamap_t		sc_dma_map;
161 	vm_paddr_t		sc_sdhci_buffer_phys;
162 	bus_addr_t		dmamap_seg_addrs[ALLOCATED_DMA_SEGS];
163 	bus_size_t		dmamap_seg_sizes[ALLOCATED_DMA_SEGS];
164 	int			dmamap_seg_count;
165 	int			dmamap_seg_index;
166 	int			dmamap_status;
167 	uint32_t		blksz_and_count;
168 	uint32_t		cmd_and_mode;
169 	bool			need_update_blk;
170 #ifdef NOTYET
171 	device_t		clkman;
172 #endif
173 	struct bcm_mmc_conf *	conf;
174 };
175 
176 static int bcm_sdhci_probe(device_t);
177 static int bcm_sdhci_attach(device_t);
178 static int bcm_sdhci_detach(device_t);
179 static void bcm_sdhci_intr(void *);
180 
181 static int bcm_sdhci_get_ro(device_t, device_t);
182 static void bcm_sdhci_dma_intr(int ch, void *arg);
183 static void bcm_sdhci_start_dma(struct sdhci_slot *slot);
184 
185 static void
186 bcm_sdhci_dmacb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
187 {
188 	struct bcm_sdhci_softc *sc = arg;
189 	int i;
190 
191 	/* Sanity check: we can only ever have one mapping at a time. */
192 	KASSERT(sc->dmamap_seg_count == 0, ("leaked DMA segment"));
193 	sc->dmamap_status = err;
194 	sc->dmamap_seg_count = nseg;
195 
196 	/* Note nseg is guaranteed to be zero if err is non-zero. */
197 	for (i = 0; i < nseg; i++) {
198 		sc->dmamap_seg_addrs[i] = segs[i].ds_addr;
199 		sc->dmamap_seg_sizes[i] = segs[i].ds_len;
200 	}
201 }
202 
203 static int
204 bcm_sdhci_probe(device_t dev)
205 {
206 
207 	if (!ofw_bus_status_okay(dev))
208 		return (ENXIO);
209 
210 	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
211 		return (ENXIO);
212 
213 	device_set_desc(dev, "Broadcom 2708 SDHCI controller");
214 
215 	return (BUS_PROBE_DEFAULT);
216 }
217 
218 static int
219 bcm_sdhci_attach(device_t dev)
220 {
221 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
222 	int rid, err;
223 	phandle_t node;
224 	pcell_t cell;
225 	u_int default_freq;
226 
227 	sc->sc_dev = dev;
228 	sc->sc_req = NULL;
229 
230 	sc->conf = (struct bcm_mmc_conf *)ofw_bus_search_compatible(dev,
231 	    compat_data)->ocd_data;
232 	if (sc->conf == 0)
233 	    return (ENXIO);
234 
235 	err = bcm2835_mbox_set_power_state(BCM2835_MBOX_POWER_ID_EMMC, TRUE);
236 	if (err != 0) {
237 		if (bootverbose)
238 			device_printf(dev, "Unable to enable the power\n");
239 		return (err);
240 	}
241 
242 	default_freq = 0;
243 	err = bcm2835_mbox_get_clock_rate(sc->conf->clock_id, &default_freq);
244 	if (err == 0) {
245 		/* Convert to MHz */
246 		default_freq /= 1000000;
247 	}
248 	if (default_freq == 0) {
249 		node = ofw_bus_get_node(sc->sc_dev);
250 		if ((OF_getencprop(node, "clock-frequency", &cell,
251 		    sizeof(cell))) > 0)
252 			default_freq = cell / 1000000;
253 	}
254 	if (default_freq == 0)
255 		default_freq = sc->conf->default_freq;
256 
257 	if (bootverbose)
258 		device_printf(dev, "SDHCI frequency: %dMHz\n", default_freq);
259 #ifdef NOTYET
260 	if (sc->conf->clock_src > 0) {
261 		uint32_t f;
262 		sc->clkman = devclass_get_device(
263 		    devclass_find("bcm2835_clkman"), 0);
264 		if (sc->clkman == NULL) {
265 			device_printf(dev, "cannot find Clock Manager\n");
266 			return (ENXIO);
267 		}
268 
269 		f = bcm2835_clkman_set_frequency(sc->clkman,
270 		    sc->conf->clock_src, default_freq);
271 		if (f == 0)
272 			return (EINVAL);
273 
274 		if (bootverbose)
275 			device_printf(dev, "Clock source frequency: %dMHz\n",
276 			    f);
277 	}
278 #endif
279 
280 	rid = 0;
281 	sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
282 	    RF_ACTIVE);
283 	if (!sc->sc_mem_res) {
284 		device_printf(dev, "cannot allocate memory window\n");
285 		err = ENXIO;
286 		goto fail;
287 	}
288 
289 	sc->sc_bst = rman_get_bustag(sc->sc_mem_res);
290 	sc->sc_bsh = rman_get_bushandle(sc->sc_mem_res);
291 
292 	rid = 0;
293 	sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
294 	    RF_ACTIVE | RF_SHAREABLE);
295 	if (!sc->sc_irq_res) {
296 		device_printf(dev, "cannot allocate interrupt\n");
297 		err = ENXIO;
298 		goto fail;
299 	}
300 
301 	if (bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_BIO | INTR_MPSAFE,
302 	    NULL, bcm_sdhci_intr, sc, &sc->sc_intrhand)) {
303 		device_printf(dev, "cannot setup interrupt handler\n");
304 		err = ENXIO;
305 		goto fail;
306 	}
307 
308 	if (!bcm2835_sdhci_pio_mode)
309 		sc->sc_slot.opt = SDHCI_PLATFORM_TRANSFER;
310 
311 	sc->sc_slot.caps = SDHCI_CAN_VDD_330 | SDHCI_CAN_VDD_180;
312 	if (bcm2835_sdhci_hs)
313 		sc->sc_slot.caps |= SDHCI_CAN_DO_HISPD;
314 	sc->sc_slot.caps |= (default_freq << SDHCI_CLOCK_BASE_SHIFT);
315 	sc->sc_slot.quirks = sc->conf->quirks;
316 
317 	sdhci_init_slot(dev, &sc->sc_slot, 0);
318 
319 	sc->sc_dma_ch = bcm_dma_allocate(BCM_DMA_CH_ANY);
320 	if (sc->sc_dma_ch == BCM_DMA_CH_INVALID)
321 		goto fail;
322 
323 	err = bcm_dma_setup_intr(sc->sc_dma_ch, bcm_sdhci_dma_intr, sc);
324 	if (err != 0) {
325 		device_printf(dev,
326 		    "cannot setup dma interrupt handler\n");
327 		err = ENXIO;
328 		goto fail;
329 	}
330 
331 	/* Allocate bus_dma resources. */
332 	err = bus_dma_tag_create(bus_get_dma_tag(dev),
333 	    1, 0, bcm283x_dmabus_peripheral_lowaddr(),
334 	    BUS_SPACE_MAXADDR, NULL, NULL,
335 	    BCM_DMA_MAXSIZE, ALLOCATED_DMA_SEGS, BCM_SDHCI_BUFFER_SIZE,
336 	    BUS_DMA_ALLOCNOW, NULL, NULL,
337 	    &sc->sc_dma_tag);
338 
339 	if (err) {
340 		device_printf(dev, "failed allocate DMA tag");
341 		goto fail;
342 	}
343 
344 	err = bus_dmamap_create(sc->sc_dma_tag, 0, &sc->sc_dma_map);
345 	if (err) {
346 		device_printf(dev, "bus_dmamap_create failed\n");
347 		goto fail;
348 	}
349 
350 	/* FIXME: Fix along with other BUS_SPACE_PHYSADDR instances */
351 	sc->sc_sdhci_buffer_phys = rman_get_start(sc->sc_mem_res) +
352 	    SDHCI_BUFFER;
353 
354 	bus_generic_probe(dev);
355 	bus_generic_attach(dev);
356 
357 	sdhci_start_slot(&sc->sc_slot);
358 
359 	/* Seed our copies. */
360 	sc->blksz_and_count = SDHCI_READ_4(dev, &sc->sc_slot, SDHCI_BLOCK_SIZE);
361 	sc->cmd_and_mode = SDHCI_READ_4(dev, &sc->sc_slot, SDHCI_TRANSFER_MODE);
362 
363 	return (0);
364 
365 fail:
366 	if (sc->sc_intrhand)
367 		bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_intrhand);
368 	if (sc->sc_irq_res)
369 		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq_res);
370 	if (sc->sc_mem_res)
371 		bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res);
372 
373 	return (err);
374 }
375 
376 static int
377 bcm_sdhci_detach(device_t dev)
378 {
379 
380 	return (EBUSY);
381 }
382 
383 static void
384 bcm_sdhci_intr(void *arg)
385 {
386 	struct bcm_sdhci_softc *sc = arg;
387 
388 	sdhci_generic_intr(&sc->sc_slot);
389 }
390 
391 static int
392 bcm_sdhci_get_ro(device_t bus, device_t child)
393 {
394 
395 	return (0);
396 }
397 
398 static inline uint32_t
399 RD4(struct bcm_sdhci_softc *sc, bus_size_t off)
400 {
401 	uint32_t val = bus_space_read_4(sc->sc_bst, sc->sc_bsh, off);
402 	return val;
403 }
404 
405 static inline void
406 WR4(struct bcm_sdhci_softc *sc, bus_size_t off, uint32_t val)
407 {
408 
409 	bus_space_write_4(sc->sc_bst, sc->sc_bsh, off, val);
410 	/*
411 	 * The Arasan HC has a bug where it may lose the content of
412 	 * consecutive writes to registers that are within two SD-card
413 	 * clock cycles of each other (a clock domain crossing problem).
414 	 */
415 	if (sc->sc_slot.clock > 0)
416 		DELAY(((2 * 1000000) / sc->sc_slot.clock) + 1);
417 }
418 
419 static uint8_t
420 bcm_sdhci_read_1(device_t dev, struct sdhci_slot *slot, bus_size_t off)
421 {
422 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
423 	uint32_t val = RD4(sc, off & ~3);
424 
425 	return ((val >> (off & 3)*8) & 0xff);
426 }
427 
428 static uint16_t
429 bcm_sdhci_read_2(device_t dev, struct sdhci_slot *slot, bus_size_t off)
430 {
431 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
432 	uint32_t val32;
433 
434 	/*
435 	 * Standard 32-bit handling of command and transfer mode, as
436 	 * well as block size and count.
437 	 */
438 	if ((off == SDHCI_BLOCK_SIZE || off == SDHCI_BLOCK_COUNT) &&
439 	    sc->need_update_blk)
440 		val32 = sc->blksz_and_count;
441 	else if (off == SDHCI_TRANSFER_MODE || off == SDHCI_COMMAND_FLAGS)
442 		val32 = sc->cmd_and_mode;
443 	else
444 		val32 = RD4(sc, off & ~3);
445 
446 	return ((val32 >> (off & 3)*8) & 0xffff);
447 }
448 
449 static uint32_t
450 bcm_sdhci_read_4(device_t dev, struct sdhci_slot *slot, bus_size_t off)
451 {
452 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
453 
454 	return RD4(sc, off);
455 }
456 
457 static void
458 bcm_sdhci_read_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off,
459     uint32_t *data, bus_size_t count)
460 {
461 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
462 
463 	bus_space_read_multi_4(sc->sc_bst, sc->sc_bsh, off, data, count);
464 }
465 
466 static void
467 bcm_sdhci_write_1(device_t dev, struct sdhci_slot *slot, bus_size_t off,
468     uint8_t val)
469 {
470 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
471 	uint32_t val32 = RD4(sc, off & ~3);
472 	val32 &= ~(0xff << (off & 3)*8);
473 	val32 |= (val << (off & 3)*8);
474 	WR4(sc, off & ~3, val32);
475 }
476 
477 static void
478 bcm_sdhci_write_2(device_t dev, struct sdhci_slot *slot, bus_size_t off,
479     uint16_t val)
480 {
481 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
482 	uint32_t val32;
483 
484 	/*
485 	 * If we have a queued up 16bit value for blk size or count, use and
486 	 * update the saved value rather than doing any real register access.
487 	 * If we did not touch either since the last write, then read from
488 	 * register as at least block count can change.
489 	 * Similarly, if we are about to issue a command, always use the saved
490 	 * value for transfer mode as we can never write that without issuing
491 	 * a command.
492 	 */
493 	if ((off == SDHCI_BLOCK_SIZE || off == SDHCI_BLOCK_COUNT) &&
494 	    sc->need_update_blk)
495 		val32 = sc->blksz_and_count;
496 	else if (off == SDHCI_COMMAND_FLAGS)
497 		val32 = sc->cmd_and_mode;
498 	else
499 		val32 = RD4(sc, off & ~3);
500 
501 	val32 &= ~(0xffff << (off & 3)*8);
502 	val32 |= (val << (off & 3)*8);
503 
504 	if (off == SDHCI_TRANSFER_MODE)
505 		sc->cmd_and_mode = val32;
506 	else if (off == SDHCI_BLOCK_SIZE || off == SDHCI_BLOCK_COUNT) {
507 		sc->blksz_and_count = val32;
508 		sc->need_update_blk = true;
509 	} else {
510 		if (off == SDHCI_COMMAND_FLAGS) {
511 			/* If we saved blk writes, do them now before cmd. */
512 			if (sc->need_update_blk) {
513 				WR4(sc, SDHCI_BLOCK_SIZE, sc->blksz_and_count);
514 				sc->need_update_blk = false;
515 			}
516 			/* Always save cmd and mode registers. */
517 			sc->cmd_and_mode = val32;
518 		}
519 		WR4(sc, off & ~3, val32);
520 	}
521 }
522 
523 static void
524 bcm_sdhci_write_4(device_t dev, struct sdhci_slot *slot, bus_size_t off,
525     uint32_t val)
526 {
527 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
528 	WR4(sc, off, val);
529 }
530 
531 static void
532 bcm_sdhci_write_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off,
533     uint32_t *data, bus_size_t count)
534 {
535 	struct bcm_sdhci_softc *sc = device_get_softc(dev);
536 
537 	bus_space_write_multi_4(sc->sc_bst, sc->sc_bsh, off, data, count);
538 }
539 
540 static void
541 bcm_sdhci_start_dma_seg(struct bcm_sdhci_softc *sc)
542 {
543 	struct sdhci_slot *slot;
544 	vm_paddr_t pdst, psrc;
545 	int err, idx, len, sync_op, width;
546 
547 	slot = &sc->sc_slot;
548 	mtx_assert(&slot->mtx, MA_OWNED);
549 	idx = sc->dmamap_seg_index++;
550 	len = sc->dmamap_seg_sizes[idx];
551 	slot->offset += len;
552 	width = (len & 0xf ? BCM_DMA_32BIT : BCM_DMA_128BIT);
553 
554 	if (slot->curcmd->data->flags & MMC_DATA_READ) {
555 		/*
556 		 * Peripherals on the AXI bus do not need DREQ pacing for reads
557 		 * from the ARM core, so we can safely set this to NONE.
558 		 */
559 		bcm_dma_setup_src(sc->sc_dma_ch, BCM_DMA_DREQ_NONE,
560 		    BCM_DMA_SAME_ADDR, BCM_DMA_32BIT);
561 		bcm_dma_setup_dst(sc->sc_dma_ch, BCM_DMA_DREQ_NONE,
562 		    BCM_DMA_INC_ADDR, width);
563 		psrc = sc->sc_sdhci_buffer_phys;
564 		pdst = sc->dmamap_seg_addrs[idx];
565 		sync_op = BUS_DMASYNC_PREREAD;
566 	} else {
567 		/*
568 		 * The ordering here is important, because the last write to
569 		 * dst/src in the dma control block writes the real dreq value.
570 		 */
571 		bcm_dma_setup_src(sc->sc_dma_ch, BCM_DMA_DREQ_NONE,
572 		    BCM_DMA_INC_ADDR, width);
573 		bcm_dma_setup_dst(sc->sc_dma_ch, sc->conf->emmc_dreq,
574 		    BCM_DMA_SAME_ADDR, BCM_DMA_32BIT);
575 		psrc = sc->dmamap_seg_addrs[idx];
576 		pdst = sc->sc_sdhci_buffer_phys;
577 		sync_op = BUS_DMASYNC_PREWRITE;
578 	}
579 
580 	/*
581 	 * When starting a new DMA operation do the busdma sync operation, and
582 	 * disable SDCHI data interrrupts because we'll be driven by DMA
583 	 * interrupts (or SDHCI error interrupts) until the IO is done.
584 	 */
585 	if (idx == 0) {
586 		bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, sync_op);
587 
588 		slot->intmask &= ~DATA_XFER_MASK;
589 		bcm_sdhci_write_4(sc->sc_dev, slot, SDHCI_SIGNAL_ENABLE,
590 		    slot->intmask);
591 	}
592 
593 	/*
594 	 * Start the DMA transfer.  Only programming errors (like failing to
595 	 * allocate a channel) cause a non-zero return from bcm_dma_start().
596 	 */
597 	err = bcm_dma_start(sc->sc_dma_ch, psrc, pdst, len);
598 	KASSERT((err == 0), ("bcm2835_sdhci: failed DMA start"));
599 }
600 
601 static void
602 bcm_sdhci_dma_exit(struct bcm_sdhci_softc *sc)
603 {
604 	struct sdhci_slot *slot = &sc->sc_slot;
605 
606 	mtx_assert(&slot->mtx, MA_OWNED);
607 
608 	/* Re-enable interrupts */
609 	slot->intmask |= DATA_XFER_MASK;
610 	bcm_sdhci_write_4(slot->bus, slot, SDHCI_SIGNAL_ENABLE,
611 	    slot->intmask);
612 }
613 
614 static void
615 bcm_sdhci_dma_unload(struct bcm_sdhci_softc *sc)
616 {
617 	struct sdhci_slot *slot = &sc->sc_slot;
618 
619 	if (sc->dmamap_seg_count == 0)
620 		return;
621 	if ((slot->curcmd->data->flags & MMC_DATA_READ) != 0)
622 		bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map,
623 		    BUS_DMASYNC_POSTREAD);
624 	else
625 		bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map,
626 		    BUS_DMASYNC_POSTWRITE);
627 	bus_dmamap_unload(sc->sc_dma_tag, sc->sc_dma_map);
628 
629 	sc->dmamap_seg_count = 0;
630 	sc->dmamap_seg_index = 0;
631 }
632 
633 static void
634 bcm_sdhci_dma_intr(int ch, void *arg)
635 {
636 	struct bcm_sdhci_softc *sc = (struct bcm_sdhci_softc *)arg;
637 	struct sdhci_slot *slot = &sc->sc_slot;
638 	uint32_t reg;
639 
640 	mtx_lock(&slot->mtx);
641 	if (slot->curcmd == NULL)
642 		goto out;
643 	/*
644 	 * If there are more segments for the current dma, start the next one.
645 	 * Otherwise unload the dma map and decide what to do next based on the
646 	 * status of the sdhci controller and whether there's more data left.
647 	 */
648 	if (sc->dmamap_seg_index < sc->dmamap_seg_count) {
649 		bcm_sdhci_start_dma_seg(sc);
650 		goto out;
651 	}
652 
653 	bcm_sdhci_dma_unload(sc);
654 
655 	/*
656 	 * If we had no further segments pending, we need to determine how to
657 	 * proceed next.  If the 'data/space pending' bit is already set and we
658 	 * can continue via DMA, do so.  Otherwise, re-enable interrupts and
659 	 * return.
660 	 */
661 	reg = bcm_sdhci_read_4(slot->bus, slot, SDHCI_INT_STATUS) &
662 	    DATA_XFER_MASK;
663 	if ((reg & DATA_PENDING_MASK) != 0 &&
664 	    BCM_SDHCI_SEGSZ_LEFT(slot) >= BCM_SDHCI_BUFFER_SIZE) {
665 		/* ACK any pending interrupts */
666 		bcm_sdhci_write_4(slot->bus, slot, SDHCI_INT_STATUS,
667 		    DATA_PENDING_MASK);
668 
669 		bcm_sdhci_start_dma(slot);
670 		if (slot->curcmd->error != 0) {
671 			/* We won't recover from this error for this command. */
672 			bcm_sdhci_dma_unload(sc);
673 			bcm_sdhci_dma_exit(sc);
674 			sdhci_finish_data(slot);
675 		}
676 	} else if ((reg & SDHCI_INT_DATA_END) != 0) {
677 		bcm_sdhci_dma_exit(sc);
678 		bcm_sdhci_write_4(slot->bus, slot, SDHCI_INT_STATUS,
679 		    reg);
680 		slot->flags &= ~PLATFORM_DATA_STARTED;
681 		sdhci_finish_data(slot);
682 	} else {
683 		bcm_sdhci_dma_exit(sc);
684 	}
685 out:
686 	mtx_unlock(&slot->mtx);
687 }
688 
689 static void
690 bcm_sdhci_start_dma(struct sdhci_slot *slot)
691 {
692 	struct bcm_sdhci_softc *sc = device_get_softc(slot->bus);
693 	uint8_t *buf;
694 	size_t left;
695 
696 	mtx_assert(&slot->mtx, MA_OWNED);
697 
698 	left = BCM_SDHCI_SEGSZ_LEFT(slot);
699 	buf = (uint8_t *)slot->curcmd->data->data + slot->offset;
700 	KASSERT(left != 0,
701 	    ("%s: DMA handling incorrectly indicated", __func__));
702 
703 	/*
704 	 * No need to check segment count here; if we've not yet unloaded
705 	 * previous segments, we'll catch that in bcm_sdhci_dmacb.
706 	 */
707 	if (bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, buf, left,
708 	    bcm_sdhci_dmacb, sc, BUS_DMA_NOWAIT) != 0 ||
709 	    sc->dmamap_status != 0) {
710 		slot->curcmd->error = MMC_ERR_NO_MEMORY;
711 		return;
712 	}
713 
714 	/* DMA start */
715 	bcm_sdhci_start_dma_seg(sc);
716 }
717 
718 static int
719 bcm_sdhci_will_handle_transfer(device_t dev, struct sdhci_slot *slot)
720 {
721 #ifdef INVARIANTS
722 	struct bcm_sdhci_softc *sc = device_get_softc(slot->bus);
723 #endif
724 
725 	/*
726 	 * This indicates that we somehow let a data interrupt slip by into the
727 	 * SDHCI framework, when it should not have.  This really needs to be
728 	 * caught and fixed ASAP, as it really shouldn't happen.
729 	 */
730 	KASSERT(sc->dmamap_seg_count == 0,
731 	    ("data pending interrupt pushed through SDHCI framework"));
732 
733 	/*
734 	 * Do not use DMA for transfers less than our block size.  Checking
735 	 * alignment serves little benefit, as we round transfer sizes down to
736 	 * a multiple of the block size and push the transfer back to
737 	 * SDHCI-driven PIO once we're below the block size.
738 	 */
739 	if (BCM_SDHCI_SEGSZ_LEFT(slot) < BCM_DMA_BLOCK_SIZE)
740 		return (0);
741 
742 	return (1);
743 }
744 
745 static void
746 bcm_sdhci_start_transfer(device_t dev, struct sdhci_slot *slot,
747     uint32_t *intmask)
748 {
749 
750 	/* DMA transfer FIFO 1KB */
751 	bcm_sdhci_start_dma(slot);
752 }
753 
754 static void
755 bcm_sdhci_finish_transfer(device_t dev, struct sdhci_slot *slot)
756 {
757 	struct bcm_sdhci_softc *sc = device_get_softc(slot->bus);
758 
759 	/*
760 	 * Clean up.  Interrupts are clearly enabled, because we received an
761 	 * SDHCI_INT_DATA_END to get this far -- just make sure we don't leave
762 	 * anything laying around.
763 	 */
764 	if (sc->dmamap_seg_count != 0) {
765 		/*
766 		 * Our segment math should have worked out such that we would
767 		 * never finish the transfer without having used up all of the
768 		 * segments.  If we haven't, that means we must have erroneously
769 		 * regressed to SDHCI-driven PIO to finish the operation and
770 		 * this is certainly caused by developer-error.
771 		 */
772 		bcm_sdhci_dma_unload(sc);
773 	}
774 
775 	sdhci_finish_data(slot);
776 }
777 
778 static device_method_t bcm_sdhci_methods[] = {
779 	/* Device interface */
780 	DEVMETHOD(device_probe,		bcm_sdhci_probe),
781 	DEVMETHOD(device_attach,	bcm_sdhci_attach),
782 	DEVMETHOD(device_detach,	bcm_sdhci_detach),
783 
784 	/* Bus interface */
785 	DEVMETHOD(bus_read_ivar,	sdhci_generic_read_ivar),
786 	DEVMETHOD(bus_write_ivar,	sdhci_generic_write_ivar),
787 	DEVMETHOD(bus_add_child,	bus_generic_add_child),
788 
789 	/* MMC bridge interface */
790 	DEVMETHOD(mmcbr_update_ios,	sdhci_generic_update_ios),
791 	DEVMETHOD(mmcbr_request,	sdhci_generic_request),
792 	DEVMETHOD(mmcbr_get_ro,		bcm_sdhci_get_ro),
793 	DEVMETHOD(mmcbr_acquire_host,	sdhci_generic_acquire_host),
794 	DEVMETHOD(mmcbr_release_host,	sdhci_generic_release_host),
795 
796 	/* Platform transfer methods */
797 	DEVMETHOD(sdhci_platform_will_handle,		bcm_sdhci_will_handle_transfer),
798 	DEVMETHOD(sdhci_platform_start_transfer,	bcm_sdhci_start_transfer),
799 	DEVMETHOD(sdhci_platform_finish_transfer,	bcm_sdhci_finish_transfer),
800 	/* SDHCI registers accessors */
801 	DEVMETHOD(sdhci_read_1,		bcm_sdhci_read_1),
802 	DEVMETHOD(sdhci_read_2,		bcm_sdhci_read_2),
803 	DEVMETHOD(sdhci_read_4,		bcm_sdhci_read_4),
804 	DEVMETHOD(sdhci_read_multi_4,	bcm_sdhci_read_multi_4),
805 	DEVMETHOD(sdhci_write_1,	bcm_sdhci_write_1),
806 	DEVMETHOD(sdhci_write_2,	bcm_sdhci_write_2),
807 	DEVMETHOD(sdhci_write_4,	bcm_sdhci_write_4),
808 	DEVMETHOD(sdhci_write_multi_4,	bcm_sdhci_write_multi_4),
809 
810 	DEVMETHOD_END
811 };
812 
813 static devclass_t bcm_sdhci_devclass;
814 
815 static driver_t bcm_sdhci_driver = {
816 	"sdhci_bcm",
817 	bcm_sdhci_methods,
818 	sizeof(struct bcm_sdhci_softc),
819 };
820 
821 DRIVER_MODULE(sdhci_bcm, simplebus, bcm_sdhci_driver, bcm_sdhci_devclass,
822     NULL, NULL);
823 #ifdef NOTYET
824 MODULE_DEPEND(sdhci_bcm, bcm2835_clkman, 1, 1, 1);
825 #endif
826 SDHCI_DEPEND(sdhci_bcm);
827 #ifndef MMCCAM
828 MMC_DECLARE_BRIDGE(sdhci_bcm);
829 #endif
830