xref: /freebsd/sys/dev/qcom_qup/qcom_spi_hw.c (revision c7046f76)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2021, Adrian Chadd <adrian@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 
35 #include <sys/bus.h>
36 #include <sys/interrupt.h>
37 #include <sys/malloc.h>
38 #include <sys/lock.h>
39 #include <sys/mutex.h>
40 #include <sys/kernel.h>
41 #include <sys/module.h>
42 #include <sys/rman.h>
43 
44 #include <vm/vm.h>
45 #include <vm/pmap.h>
46 #include <vm/vm_extern.h>
47 
48 #include <machine/bus.h>
49 #include <machine/cpu.h>
50 
51 #include <dev/gpio/gpiobusvar.h>
52 #include <dev/ofw/ofw_bus.h>
53 #include <dev/ofw/ofw_bus_subr.h>
54 
55 #include <dev/extres/clk/clk.h>
56 #include <dev/extres/hwreset/hwreset.h>
57 
58 #include <dev/spibus/spi.h>
59 #include <dev/spibus/spibusvar.h>
60 #include "spibus_if.h"
61 
62 #include <dev/qcom_qup/qcom_spi_var.h>
63 #include <dev/qcom_qup/qcom_spi_reg.h>
64 #include <dev/qcom_qup/qcom_qup_reg.h>
65 #include <dev/qcom_qup/qcom_spi_debug.h>
66 
67 int
68 qcom_spi_hw_read_controller_transfer_sizes(struct qcom_spi_softc *sc)
69 {
70 	uint32_t reg, val;
71 
72 	reg = QCOM_SPI_READ_4(sc, QUP_IO_M_MODES);
73 
74 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
75 	    "%s: QUP_IO_M_MODES=0x%08x\n", __func__, reg);
76 
77 	/* Input block size */
78 	val = (reg >> QUP_IO_M_INPUT_BLOCK_SIZE_SHIFT)
79 	    & QUP_IO_M_INPUT_BLOCK_SIZE_MASK;
80 	if (val == 0)
81 		sc->config.input_block_size = 4;
82 	else
83 		sc->config.input_block_size = val * 16;
84 
85 	/* Output block size */
86 	val = (reg >> QUP_IO_M_OUTPUT_BLOCK_SIZE_SHIFT)
87 	    & QUP_IO_M_OUTPUT_BLOCK_SIZE_MASK;
88 	if (val == 0)
89 		sc->config.output_block_size = 4;
90 	else
91 		sc->config.output_block_size = val * 16;
92 
93 	/* Input FIFO size */
94 	val = (reg >> QUP_IO_M_INPUT_FIFO_SIZE_SHIFT)
95 	    & QUP_IO_M_INPUT_FIFO_SIZE_MASK;
96 	sc->config.input_fifo_size =
97 	    sc->config.input_block_size * (2 << val);
98 
99 	/* Output FIFO size */
100 	val = (reg >> QUP_IO_M_OUTPUT_FIFO_SIZE_SHIFT)
101 	    & QUP_IO_M_OUTPUT_FIFO_SIZE_MASK;
102 	sc->config.output_fifo_size =
103 	    sc->config.output_block_size * (2 << val);
104 
105 	return (0);
106 }
107 
108 static bool
109 qcom_spi_hw_qup_is_state_valid_locked(struct qcom_spi_softc *sc)
110 {
111 	uint32_t reg;
112 
113 	QCOM_SPI_ASSERT_LOCKED(sc);
114 
115 	reg = QCOM_SPI_READ_4(sc, QUP_STATE);
116 	QCOM_SPI_BARRIER_READ(sc);
117 
118 	return !! (reg & QUP_STATE_VALID);
119 }
120 
121 static int
122 qcom_spi_hw_qup_wait_state_valid_locked(struct qcom_spi_softc *sc)
123 {
124 	int i;
125 
126 	for (i = 0; i < 10; i++) {
127 		if (qcom_spi_hw_qup_is_state_valid_locked(sc))
128 			break;
129 	}
130 	if (i >= 10) {
131 		device_printf(sc->sc_dev,
132 		    "ERROR: timeout waiting for valid state\n");
133 		return (ENXIO);
134 	}
135 	return (0);
136 }
137 
138 static bool
139 qcom_spi_hw_is_opmode_dma_locked(struct qcom_spi_softc *sc)
140 {
141 
142 	QCOM_SPI_ASSERT_LOCKED(sc);
143 
144 	if (sc->state.transfer_mode == QUP_IO_M_MODE_DMOV)
145 		return (true);
146 	if (sc->state.transfer_mode == QUP_IO_M_MODE_BAM)
147 		return (true);
148 	return (false);
149 }
150 
151 int
152 qcom_spi_hw_qup_set_state_locked(struct qcom_spi_softc *sc, uint32_t state)
153 {
154 	uint32_t cur_state;
155 	int ret;
156 
157 	QCOM_SPI_ASSERT_LOCKED(sc);
158 
159 	/* Wait until the state becomes valid */
160 	ret = qcom_spi_hw_qup_wait_state_valid_locked(sc);
161 	if (ret != 0) {
162 		return (ret);
163 	}
164 
165 	cur_state = QCOM_SPI_READ_4(sc, QUP_STATE);
166 
167 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_STATE_CHANGE,
168 	    "%s: target state=%d, cur_state=0x%08x\n",
169 	    __func__, state, cur_state);
170 
171 	/*
172 	 * According to the QUP specification, when going
173 	 * from PAUSE to RESET, two writes are required.
174 	 */
175 	if ((state == QUP_STATE_RESET)
176 	    && ((cur_state & QUP_STATE_MASK) == QUP_STATE_PAUSE)) {
177 		QCOM_SPI_WRITE_4(sc, QUP_STATE, QUP_STATE_CLEAR);
178 		QCOM_SPI_BARRIER_WRITE(sc);
179 		QCOM_SPI_WRITE_4(sc, QUP_STATE, QUP_STATE_CLEAR);
180 		QCOM_SPI_BARRIER_WRITE(sc);
181 	} else {
182 		cur_state &= ~QUP_STATE_MASK;
183 		cur_state |= state;
184 		QCOM_SPI_WRITE_4(sc, QUP_STATE, cur_state);
185 		QCOM_SPI_BARRIER_WRITE(sc);
186 	}
187 
188 	/* Wait until the state becomes valid */
189 	ret = qcom_spi_hw_qup_wait_state_valid_locked(sc);
190 	if (ret != 0) {
191 		return (ret);
192 	}
193 
194 	cur_state = QCOM_SPI_READ_4(sc, QUP_STATE);
195 
196 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_STATE_CHANGE,
197 	    "%s: FINISH: target state=%d, cur_state=0x%08x\n",
198 	    __func__, state, cur_state);
199 
200 	return (0);
201 }
202 
203 /*
204  * Do initial QUP setup.
205  *
206  * This is initially for the SPI driver; it would be interesting to see how
207  * much of this is the same with the I2C/HSUART paths.
208  */
209 int
210 qcom_spi_hw_qup_init_locked(struct qcom_spi_softc *sc)
211 {
212 	int ret;
213 
214 	QCOM_SPI_ASSERT_LOCKED(sc);
215 
216 	/* Full hardware reset */
217 	(void) qcom_spi_hw_do_full_reset(sc);
218 
219 	ret = qcom_spi_hw_qup_set_state_locked(sc, QUP_STATE_RESET);
220 	if (ret != 0) {
221 		device_printf(sc->sc_dev, "ERROR: %s: couldn't reset\n",
222 		    __func__);
223 		goto error;
224 	}
225 
226 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, 0);
227 	QCOM_SPI_WRITE_4(sc, QUP_IO_M_MODES, 0);
228 	/* Note: no QUP_OPERATIONAL_MASK in QUP v1 */
229 	if (! QCOM_SPI_QUP_VERSION_V1(sc))
230 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL_MASK, 0);
231 
232 	/* Explicitly disable input overrun in QUP v1 */
233 	if (QCOM_SPI_QUP_VERSION_V1(sc))
234 		QCOM_SPI_WRITE_4(sc, QUP_ERROR_FLAGS_EN,
235 		    QUP_ERROR_OUTPUT_OVER_RUN
236 		    | QUP_ERROR_INPUT_UNDER_RUN
237 		    | QUP_ERROR_OUTPUT_UNDER_RUN);
238 	QCOM_SPI_BARRIER_WRITE(sc);
239 
240 	return (0);
241 error:
242 	return (ret);
243 }
244 
245 /*
246  * Do initial SPI setup.
247  */
248 int
249 qcom_spi_hw_spi_init_locked(struct qcom_spi_softc *sc)
250 {
251 
252 	QCOM_SPI_ASSERT_LOCKED(sc);
253 
254 	/* Initial SPI error flags */
255 	QCOM_SPI_WRITE_4(sc, SPI_ERROR_FLAGS_EN,
256 	    QUP_ERROR_INPUT_UNDER_RUN
257 	    | QUP_ERROR_OUTPUT_UNDER_RUN);
258 	QCOM_SPI_BARRIER_WRITE(sc);
259 
260 	/* Initial SPI config */
261 	QCOM_SPI_WRITE_4(sc, SPI_CONFIG, 0);
262 	QCOM_SPI_BARRIER_WRITE(sc);
263 
264 	/* Initial CS/tri-state io control config */
265 	QCOM_SPI_WRITE_4(sc, SPI_IO_CONTROL,
266 	    SPI_IO_C_NO_TRI_STATE
267 	    | SPI_IO_C_CS_SELECT(sc->config.cs_select));
268 	QCOM_SPI_BARRIER_WRITE(sc);
269 
270 	return (0);
271 }
272 
273 /*
274  * Force the currently selected device CS line to be active
275  * or inactive.
276  *
277  * This forces it to be active or inactive rather than letting
278  * the SPI transfer machine do its thing.  If you want to be able
279  * break up a big transaction into a handful of smaller ones,
280  * without toggling /CS_n for that device, then you need it forced.
281  * (If you toggle the /CS_n to the device to inactive then active,
282  * NOR/NAND devices tend to stop a block transfer.)
283  */
284 int
285 qcom_spi_hw_spi_cs_force(struct qcom_spi_softc *sc, int cs, bool enable)
286 {
287 	uint32_t reg;
288 
289 	QCOM_SPI_ASSERT_LOCKED(sc);
290 
291 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_CHIPSELECT,
292 	    "%s: called, enable=%u\n",
293 	    __func__, enable);
294 
295 	reg = QCOM_SPI_READ_4(sc, SPI_IO_CONTROL);
296 	if (enable)
297 		reg |= SPI_IO_C_FORCE_CS;
298 	else
299 		reg &= ~SPI_IO_C_FORCE_CS;
300 	reg &= ~SPI_IO_C_CS_SELECT_MASK;
301 	reg |= SPI_IO_C_CS_SELECT(cs);
302 	QCOM_SPI_WRITE_4(sc, SPI_IO_CONTROL, reg);
303 	QCOM_SPI_BARRIER_WRITE(sc);
304 
305 	return (0);
306 }
307 
308 /*
309  * ACK/store current interrupt flag state.
310  */
311 int
312 qcom_spi_hw_interrupt_handle(struct qcom_spi_softc *sc)
313 {
314 	uint32_t qup_error, spi_error, op_flags;
315 
316 	QCOM_SPI_ASSERT_LOCKED(sc);
317 
318 	/* Get QUP/SPI state */
319 	qup_error = QCOM_SPI_READ_4(sc, QUP_ERROR_FLAGS);
320 	spi_error = QCOM_SPI_READ_4(sc, SPI_ERROR_FLAGS);
321 	op_flags = QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
322 
323 	/* ACK state */
324 	QCOM_SPI_WRITE_4(sc, QUP_ERROR_FLAGS, qup_error);
325 	QCOM_SPI_WRITE_4(sc, SPI_ERROR_FLAGS, spi_error);
326 
327 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_INTR,
328 	    "%s: called; qup=0x%08x, spi=0x%08x, op=0x%08x\n",
329 	    __func__,
330 	    qup_error,
331 	    spi_error,
332 	    op_flags);
333 
334 	/* handle error flags */
335 	if (qup_error != 0) {
336 		device_printf(sc->sc_dev, "ERROR: (QUP) mask=0x%08x\n",
337 		    qup_error);
338 		sc->intr.error = true;
339 	}
340 	if (spi_error != 0) {
341 		device_printf(sc->sc_dev, "ERROR: (SPI) mask=0x%08x\n",
342 		    spi_error);
343 		sc->intr.error = true;
344 	}
345 
346 	/* handle operational state */
347 	if (qcom_spi_hw_is_opmode_dma_locked(sc)) {
348 		/* ACK interrupts now */
349 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, op_flags);
350 		if ((op_flags & QUP_OP_IN_SERVICE_FLAG)
351 		    && (op_flags & QUP_OP_MAX_INPUT_DONE_FLAG))
352 			sc->intr.rx_dma_done = true;
353 		if ((op_flags & QUP_OP_OUT_SERVICE_FLAG)
354 		    && (op_flags & QUP_OP_MAX_OUTPUT_DONE_FLAG))
355 			sc->intr.tx_dma_done = true;
356 	} else {
357 		/* FIFO/Block */
358 		if (op_flags & QUP_OP_IN_SERVICE_FLAG)
359 			sc->intr.do_rx = true;
360 		if (op_flags & QUP_OP_OUT_SERVICE_FLAG)
361 			sc->intr.do_tx = true;
362 	}
363 
364 	/* Check if we've finished transfers */
365 	if (op_flags & QUP_OP_MAX_INPUT_DONE_FLAG)
366 		sc->intr.done = true;
367 	if (sc->intr.error)
368 		sc->intr.done = true;
369 
370 	return (0);
371 }
372 
373 /*
374  * Make initial transfer selections based on the transfer sizes
375  * and alignment.
376  *
377  * For now this'll just default to FIFO until that works, and then
378  * will grow to include BLOCK / DMA as appropriate.
379  */
380 int
381 qcom_spi_hw_setup_transfer_selection(struct qcom_spi_softc *sc, uint32_t len)
382 {
383 
384 	QCOM_SPI_ASSERT_LOCKED(sc);
385 
386 	/*
387 	 * For now only support doing a single FIFO transfer.
388 	 * The main PIO transfer routine loop will break it up for us.
389 	 */
390 	sc->state.transfer_mode = QUP_IO_M_MODE_FIFO;
391 	sc->transfer.tx_offset = 0;
392 	sc->transfer.rx_offset = 0;
393 	sc->transfer.tx_len = 0;
394 	sc->transfer.rx_len = 0;
395 	sc->transfer.tx_buf = NULL;
396 	sc->transfer.rx_buf = NULL;
397 
398 	/*
399 	 * If we're sending a DWORD multiple sized block (like IO buffers)
400 	 * then we can totally just use the DWORD size transfers.
401 	 *
402 	 * This is really only valid for PIO/block modes; I'm not yet
403 	 * sure what we should do for DMA modes.
404 	 */
405 	if (len > 0 && len % 4 == 0)
406 		sc->state.transfer_word_size = 4;
407 	else
408 		sc->state.transfer_word_size = 1;
409 
410 	return (0);
411 }
412 
413 /*
414  * Blank the transfer state after a full transfer is completed.
415  */
416 int
417 qcom_spi_hw_complete_transfer(struct qcom_spi_softc *sc)
418 {
419 	QCOM_SPI_ASSERT_LOCKED(sc);
420 
421 	sc->state.transfer_mode = QUP_IO_M_MODE_FIFO;
422 	sc->transfer.tx_offset = 0;
423 	sc->transfer.rx_offset = 0;
424 	sc->transfer.tx_len = 0;
425 	sc->transfer.rx_len = 0;
426 	sc->transfer.tx_buf = NULL;
427 	sc->transfer.rx_buf = NULL;
428 	sc->state.transfer_word_size = 0;
429 	return (0);
430 }
431 
432 /*
433  * Configure up the transfer selection for the current transfer.
434  *
435  * This calculates how many words we can transfer in the current
436  * transfer and what's left to transfer.
437  */
438 int
439 qcom_spi_hw_setup_current_transfer(struct qcom_spi_softc *sc)
440 {
441 	uint32_t bytes_left;
442 
443 	QCOM_SPI_ASSERT_LOCKED(sc);
444 
445 	/*
446 	 * XXX For now, base this on the TX side buffer size, not both.
447 	 * Later on we'll want to configure it based on the MAX of
448 	 * either and just eat up the dummy values in the PIO
449 	 * routines.  (For DMA it's .. more annoyingly complicated
450 	 * if the transfer sizes are not symmetrical.)
451 	 */
452 	bytes_left = sc->transfer.tx_len - sc->transfer.tx_offset;
453 
454 	if (sc->state.transfer_mode == QUP_IO_M_MODE_FIFO) {
455 		/*
456 		 * For FIFO transfers the num_words limit depends upon
457 		 * the word size, FIFO size and how many bytes are left.
458 		 * It definitely will be under SPI_MAX_XFER so don't
459 		 * worry about that here.
460 		 */
461 		sc->transfer.num_words = bytes_left / sc->state.transfer_word_size;
462 		sc->transfer.num_words = MIN(sc->transfer.num_words,
463 		    sc->config.input_fifo_size / sizeof(uint32_t));
464 	} else if (sc->state.transfer_mode == QUP_IO_M_MODE_BLOCK) {
465 		/*
466 		 * For BLOCK transfers the logic will be a little different.
467 		 * Instead of it being based on the maximum input_fifo_size,
468 		 * it'll be broken down into the 'words per block" size but
469 		 * our maximum transfer size will ACTUALLY be capped by
470 		 * SPI_MAX_XFER (65536-64 bytes.)  Each transfer
471 		 * will end up being in multiples of a block until the
472 		 * last transfer.
473 		 */
474 		sc->transfer.num_words = bytes_left / sc->state.transfer_word_size;
475 		sc->transfer.num_words = MIN(sc->transfer.num_words,
476 		    SPI_MAX_XFER);
477 	}
478 
479 
480 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
481 	"%s: transfer.tx_len=%u,"
482 	    "transfer.tx_offset=%u,"
483 	    " transfer_word_size=%u,"
484 	    " bytes_left=%u, num_words=%u, fifo_word_max=%u\n",
485 	    __func__,
486 	    sc->transfer.tx_len,
487 	    sc->transfer.tx_offset,
488 	    sc->state.transfer_word_size,
489 	    bytes_left,
490 	    sc->transfer.num_words,
491 	    sc->config.input_fifo_size / sizeof(uint32_t));
492 
493 	return (0);
494 }
495 
496 /*
497  * Setup the PIO FIFO transfer count.
498  *
499  * Note that we get a /single/ TX/RX phase up to these num_words
500  * transfers.
501  */
502 int
503 qcom_spi_hw_setup_pio_transfer_cnt(struct qcom_spi_softc *sc)
504 {
505 
506 	QCOM_SPI_ASSERT_LOCKED(sc);
507 
508 	QCOM_SPI_WRITE_4(sc, QUP_MX_READ_CNT, sc->transfer.num_words);
509 	QCOM_SPI_WRITE_4(sc, QUP_MX_WRITE_CNT, sc->transfer.num_words);
510 	QCOM_SPI_WRITE_4(sc, QUP_MX_INPUT_CNT, 0);
511 	QCOM_SPI_WRITE_4(sc, QUP_MX_OUTPUT_CNT, 0);
512 
513 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
514 	    "%s: num_words=%u\n", __func__,
515 	    sc->transfer.num_words);
516 
517 	QCOM_SPI_BARRIER_WRITE(sc);
518 
519 	return (0);
520 }
521 
522 /*
523  * Setup the PIO BLOCK transfer count.
524  *
525  * This sets up the total transfer size, in TX/RX FIFO block size
526  * chunks.  We will get multiple notifications when a block sized
527  * chunk of data is avaliable or required.
528  */
529 int
530 qcom_spi_hw_setup_block_transfer_cnt(struct qcom_spi_softc *sc)
531 {
532 
533 	QCOM_SPI_ASSERT_LOCKED(sc);
534 
535 	QCOM_SPI_WRITE_4(sc, QUP_MX_READ_CNT, 0);
536 	QCOM_SPI_WRITE_4(sc, QUP_MX_WRITE_CNT, 0);
537 	QCOM_SPI_WRITE_4(sc, QUP_MX_INPUT_CNT, sc->transfer.num_words);
538 	QCOM_SPI_WRITE_4(sc, QUP_MX_OUTPUT_CNT, sc->transfer.num_words);
539 	QCOM_SPI_BARRIER_WRITE(sc);
540 
541 	return (0);
542 }
543 
544 int
545 qcom_spi_hw_setup_io_modes(struct qcom_spi_softc *sc)
546 {
547 	uint32_t reg;
548 
549 	QCOM_SPI_ASSERT_LOCKED(sc);
550 
551 	reg = QCOM_SPI_READ_4(sc, QUP_IO_M_MODES);
552 
553 	reg &= ~((QUP_IO_M_INPUT_MODE_MASK << QUP_IO_M_INPUT_MODE_SHIFT)
554 	    | (QUP_IO_M_OUTPUT_MODE_MASK << QUP_IO_M_OUTPUT_MODE_SHIFT));
555 
556 	/*
557 	 * If it's being done using DMA then the hardware will
558 	 * need to pack and unpack the byte stream into the word/dword
559 	 * stream being expected by the SPI/QUP micro engine.
560 	 *
561 	 * For PIO modes we're doing the pack/unpack in software,
562 	 * see the pio/block transfer routines.
563 	 */
564 	if (qcom_spi_hw_is_opmode_dma_locked(sc))
565 		reg |= (QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
566 	else
567 		reg &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
568 
569 	/* Transfer mode */
570 	reg |= ((sc->state.transfer_mode & QUP_IO_M_INPUT_MODE_MASK)
571 	    << QUP_IO_M_INPUT_MODE_SHIFT);
572 	reg |= ((sc->state.transfer_mode & QUP_IO_M_OUTPUT_MODE_MASK)
573 	    << QUP_IO_M_OUTPUT_MODE_SHIFT);
574 
575 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
576 	    "%s: QUP_IO_M_MODES=0x%08x\n", __func__, reg);
577 
578 	QCOM_SPI_WRITE_4(sc, QUP_IO_M_MODES, reg);
579 	QCOM_SPI_BARRIER_WRITE(sc);
580 
581 	return (0);
582 }
583 
584 int
585 qcom_spi_hw_setup_spi_io_clock_polarity(struct qcom_spi_softc *sc,
586     bool cpol)
587 {
588 	uint32_t reg;
589 
590 	QCOM_SPI_ASSERT_LOCKED(sc);
591 
592 	reg = QCOM_SPI_READ_4(sc, SPI_IO_CONTROL);
593 
594 	if (cpol)
595 		reg |= SPI_IO_C_CLK_IDLE_HIGH;
596 	else
597 		reg &= ~SPI_IO_C_CLK_IDLE_HIGH;
598 
599 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
600 	    "%s: SPI_IO_CONTROL=0x%08x\n", __func__, reg);
601 
602 	QCOM_SPI_WRITE_4(sc, SPI_IO_CONTROL, reg);
603 	QCOM_SPI_BARRIER_WRITE(sc);
604 
605 	return (0);
606 }
607 
608 int
609 qcom_spi_hw_setup_spi_config(struct qcom_spi_softc *sc, uint32_t clock_val,
610     bool cpha)
611 {
612 	uint32_t reg;
613 
614 	/*
615 	 * For now we don't have a way to configure loopback SPI for testing,
616 	 * or the clock/transfer phase.  When we do then here's where we
617 	 * would put that.
618 	 */
619 
620 	QCOM_SPI_ASSERT_LOCKED(sc);
621 
622 	reg = QCOM_SPI_READ_4(sc, SPI_CONFIG);
623 	reg &= ~SPI_CONFIG_LOOPBACK;
624 
625 	if (cpha)
626 		reg &= ~SPI_CONFIG_INPUT_FIRST;
627 	else
628 		reg |= SPI_CONFIG_INPUT_FIRST;
629 
630 	/*
631 	 * If the frequency is above SPI_HS_MIN_RATE then enable high speed.
632 	 * This apparently improves stability.
633 	 *
634 	 * Note - don't do this if SPI loopback is enabled!
635 	 */
636 	if (clock_val >= SPI_HS_MIN_RATE)
637 		reg |= SPI_CONFIG_HS_MODE;
638 	else
639 		reg &= ~SPI_CONFIG_HS_MODE;
640 
641 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
642 	    "%s: SPI_CONFIG=0x%08x\n", __func__, reg);
643 
644 	QCOM_SPI_WRITE_4(sc, SPI_CONFIG, reg);
645 	QCOM_SPI_BARRIER_WRITE(sc);
646 
647 	return (0);
648 }
649 
650 int
651 qcom_spi_hw_setup_qup_config(struct qcom_spi_softc *sc, bool is_tx, bool is_rx)
652 {
653 	uint32_t reg;
654 
655 	QCOM_SPI_ASSERT_LOCKED(sc);
656 
657 	reg = QCOM_SPI_READ_4(sc, QUP_CONFIG);
658 	reg &= ~(QUP_CONFIG_NO_INPUT | QUP_CONFIG_NO_OUTPUT | QUP_CONFIG_N);
659 
660 	/* SPI mode */
661 	reg |= QUP_CONFIG_SPI_MODE;
662 
663 	/* bitmask for number of bits per word being used in each FIFO slot */
664 	reg |= ((sc->state.transfer_word_size * 8) - 1) & QUP_CONFIG_N;
665 
666 	/*
667 	 * When doing DMA we need to configure whether we are shifting
668 	 * data in, out, and/or both.  For PIO/block modes it must stay
669 	 * unset.
670 	 */
671 	if (qcom_spi_hw_is_opmode_dma_locked(sc)) {
672 		if (is_rx == false)
673 			reg |= QUP_CONFIG_NO_INPUT;
674 		if (is_tx == false)
675 			reg |= QUP_CONFIG_NO_OUTPUT;
676 	}
677 
678 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
679 	    "%s: QUP_CONFIG=0x%08x\n", __func__, reg);
680 
681 	QCOM_SPI_WRITE_4(sc, QUP_CONFIG, reg);
682 	QCOM_SPI_BARRIER_WRITE(sc);
683 
684 	return (0);
685 }
686 
687 int
688 qcom_spi_hw_setup_operational_mask(struct qcom_spi_softc *sc)
689 {
690 
691 	QCOM_SPI_ASSERT_LOCKED(sc);
692 
693 	if (QCOM_SPI_QUP_VERSION_V1(sc)) {
694 		QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
695 		    "%s: skipping, qupv1\n", __func__);
696 		return (0);
697 	}
698 
699 	if (qcom_spi_hw_is_opmode_dma_locked(sc))
700 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL_MASK,
701 		    QUP_OP_IN_SERVICE_FLAG | QUP_OP_OUT_SERVICE_FLAG);
702 	else
703 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL_MASK, 0);
704 
705 	QCOM_SPI_BARRIER_WRITE(sc);
706 
707 	return (0);
708 }
709 
710 /*
711  * ACK that we already have serviced the output FIFO.
712  */
713 int
714 qcom_spi_hw_ack_write_pio_fifo(struct qcom_spi_softc *sc)
715 {
716 
717 	QCOM_SPI_ASSERT_LOCKED(sc);
718 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_OUT_SERVICE_FLAG);
719 	QCOM_SPI_BARRIER_WRITE(sc);
720 	return (0);
721 }
722 
723 int
724 qcom_spi_hw_ack_opmode(struct qcom_spi_softc *sc)
725 {
726 
727 	QCOM_SPI_ASSERT_LOCKED(sc);
728 
729 	QCOM_SPI_BARRIER_READ(sc);
730 	QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
731 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_OUT_SERVICE_FLAG);
732 	QCOM_SPI_BARRIER_WRITE(sc);
733 
734 	return (0);
735 
736 }
737 
738 /*
739  * Read the value from the TX buffer into the given 32 bit DWORD,
740  * pre-shifting it into the place requested.
741  *
742  * Returns true if there was a byte available, false otherwise.
743  */
744 static bool
745 qcom_spi_hw_write_from_tx_buf(struct qcom_spi_softc *sc, int shift,
746     uint32_t *val)
747 {
748 
749 	QCOM_SPI_ASSERT_LOCKED(sc);
750 
751 	if (sc->transfer.tx_buf == NULL)
752 		return false;
753 
754 	if (sc->transfer.tx_offset < sc->transfer.tx_len) {
755 		*val |= (sc->transfer.tx_buf[sc->transfer.tx_offset] & 0xff)
756 		    << shift;
757 		sc->transfer.tx_offset++;
758 		return true;
759 	}
760 
761 	return false;
762 }
763 
764 int
765 qcom_spi_hw_write_pio_fifo(struct qcom_spi_softc *sc)
766 {
767 	uint32_t i;
768 	int num_bytes = 0;
769 
770 	QCOM_SPI_ASSERT_LOCKED(sc);
771 
772 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_OUT_SERVICE_FLAG);
773 	QCOM_SPI_BARRIER_WRITE(sc);
774 
775 	/*
776 	 * Loop over the transfer num_words, do complain if we are full.
777 	 */
778 	for (i = 0; i < sc->transfer.num_words; i++) {
779 		uint32_t reg;
780 
781 		/* Break if FIFO is full */
782 		if ((QCOM_SPI_READ_4(sc, QUP_OPERATIONAL)
783 		    & QUP_OP_OUT_FIFO_FULL) != 0) {
784 			device_printf(sc->sc_dev, "%s: FIFO full\n", __func__);
785 			break;
786 		}
787 
788 		/*
789 		 * Handle 1, 2, 4 byte transfer packing rules.
790 		 *
791 		 * Unlike read, where the shifting is done towards the MSB
792 		 * for us by default, we have to do it ourselves for transmit.
793 		 * There's a bit that one can set to do the preshifting
794 		 * (and u-boot uses it!) but I'll stick with what Linux is
795 		 * doing to make it easier for future maintenance.
796 		 *
797 		 * The format is the same as 4 byte RX - 0xaabbccdd;
798 		 * the byte ordering on the wire being aa, bb, cc, dd.
799 		 */
800 		reg = 0;
801 		if (sc->state.transfer_word_size == 1) {
802 			if (qcom_spi_hw_write_from_tx_buf(sc, 24, &reg))
803 				num_bytes++;
804 		} else if (sc->state.transfer_word_size == 2) {
805 			if (qcom_spi_hw_write_from_tx_buf(sc, 24, &reg))
806 				num_bytes++;
807 			if (qcom_spi_hw_write_from_tx_buf(sc, 16, &reg))
808 				num_bytes++;
809 		} else if (sc->state.transfer_word_size == 4) {
810 			if (qcom_spi_hw_write_from_tx_buf(sc, 24, &reg))
811 				num_bytes++;
812 			if (qcom_spi_hw_write_from_tx_buf(sc, 16, &reg))
813 				num_bytes++;
814 			if (qcom_spi_hw_write_from_tx_buf(sc, 8, &reg))
815 				num_bytes++;
816 			if (qcom_spi_hw_write_from_tx_buf(sc, 0, &reg))
817 				num_bytes++;
818 		}
819 
820 		/*
821 		 * always shift out something in case we need phantom
822 		 * writes to finish things up whilst we read a reply
823 		 * payload.
824 		 */
825 		QCOM_SPI_WRITE_4(sc, QUP_OUTPUT_FIFO, reg);
826 		QCOM_SPI_BARRIER_WRITE(sc);
827 	}
828 
829 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TX_FIFO,
830 	    "%s: wrote %d bytes (%d fifo slots)\n",
831 	    __func__, num_bytes, sc->transfer.num_words);
832 
833 	return (0);
834 }
835 
836 int
837 qcom_spi_hw_write_pio_block(struct qcom_spi_softc *sc)
838 {
839 	/* Not yet implemented */
840 	return (ENXIO);
841 }
842 
843 /*
844  * Read data into the RX buffer and increment the RX offset.
845  *
846  * Return true if the byte was saved into the RX buffer, else
847  * return false.
848  */
849 static bool
850 qcom_spi_hw_read_into_rx_buf(struct qcom_spi_softc *sc, uint8_t val)
851 {
852 	QCOM_SPI_ASSERT_LOCKED(sc);
853 
854 	if (sc->transfer.rx_buf == NULL)
855 		return false;
856 
857 	/* Make sure we aren't overflowing the receive buffer */
858 	if (sc->transfer.rx_offset < sc->transfer.rx_len) {
859 		sc->transfer.rx_buf[sc->transfer.rx_offset] = val;
860 		sc->transfer.rx_offset++;
861 		return true;
862 	}
863 	return false;
864 }
865 
866 /*
867  * Read "n_words" transfers, and push those bytes into the receive buffer.
868  * Make sure we have enough space, and make sure we don't overflow the
869  * read buffer size too!
870  */
871 int
872 qcom_spi_hw_read_pio_fifo(struct qcom_spi_softc *sc)
873 {
874 	uint32_t i;
875 	uint32_t reg;
876 	int num_bytes = 0;
877 
878 	QCOM_SPI_ASSERT_LOCKED(sc);
879 
880 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_IN_SERVICE_FLAG);
881 	QCOM_SPI_BARRIER_WRITE(sc);
882 
883 	for (i = 0; i < sc->transfer.num_words; i++) {
884 		/* Break if FIFO is empty */
885 		QCOM_SPI_BARRIER_READ(sc);
886 		reg = QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
887 		if ((reg & QUP_OP_IN_FIFO_NOT_EMPTY) == 0) {
888 			device_printf(sc->sc_dev, "%s: FIFO empty\n", __func__);
889 			break;
890 		}
891 
892 		/*
893 		 * Always read num_words up to FIFO being non-empty; that way
894 		 * if we have mis-matching TX/RX buffer sizes for some reason
895 		 * we will read the needed phantom bytes.
896 		 */
897 		reg = QCOM_SPI_READ_4(sc, QUP_INPUT_FIFO);
898 
899 		/*
900 		 * Unpack the receive buffer based on whether we are
901 		 * doing 1, 2, or 4 byte transfer words.
902 		 */
903 		if (sc->state.transfer_word_size == 1) {
904 			if (qcom_spi_hw_read_into_rx_buf(sc, reg & 0xff))
905 				num_bytes++;
906 		} else if (sc->state.transfer_word_size == 2) {
907 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 8) & 0xff))
908 				num_bytes++;
909 			if (qcom_spi_hw_read_into_rx_buf(sc, reg & 0xff))
910 				num_bytes++;
911 		} else if (sc->state.transfer_word_size == 4) {
912 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 24) & 0xff))
913 				num_bytes++;
914 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 16) & 0xff))
915 				num_bytes++;
916 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 8) & 0xff))
917 				num_bytes++;
918 			if (qcom_spi_hw_read_into_rx_buf(sc, reg & 0xff))
919 				num_bytes++;
920 		}
921 	}
922 
923 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TX_FIFO,
924 	    "%s: read %d bytes (%d transfer words)\n",
925 	    __func__, num_bytes, sc->transfer.num_words);
926 
927 #if 0
928 	/*
929 	 * This is a no-op for FIFO mode, it's only a thing for BLOCK
930 	 * transfers.
931 	 */
932 	QCOM_SPI_BARRIER_READ(sc);
933 	reg = QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
934 	if (reg & QUP_OP_MAX_INPUT_DONE_FLAG) {
935 		device_printf(sc->sc_dev, "%s: read complete (DONE)\n" ,
936 		    __func__);
937 		sc->intr.done = true;
938 	}
939 #endif
940 
941 #if 0
942 	/*
943 	 * And see if we've finished the transfer and won't be getting
944 	 * any more.  Then treat it as done as well.
945 	 *
946 	 * In FIFO only mode we don't get a completion interrupt;
947 	 * we get an interrupt when the FIFO has enough data present.
948 	 */
949 	if ((sc->state.transfer_mode == QUP_IO_M_MODE_FIFO)
950 	    && (sc->transfer.rx_offset >= sc->transfer.rx_len)) {
951 		device_printf(sc->sc_dev, "%s: read complete (rxlen)\n",
952 		    __func__);
953 		sc->intr.done = true;
954 	}
955 #endif
956 
957 	/*
958 	 * For FIFO transfers we get a /single/ result that complete
959 	 * the FIFO transfer.  We won't get any subsequent transfers;
960 	 * we'll need to schedule a new FIFO transfer.
961 	 */
962 	sc->intr.done = true;
963 
964 	return (0);
965 }
966 
967 int
968 qcom_spi_hw_read_pio_block(struct qcom_spi_softc *sc)
969 {
970 
971 	/* Not yet implemented */
972 	return (ENXIO);
973 }
974 
975 int
976 qcom_spi_hw_do_full_reset(struct qcom_spi_softc *sc)
977 {
978 	QCOM_SPI_ASSERT_LOCKED(sc);
979 
980 	QCOM_SPI_WRITE_4(sc, QUP_SW_RESET, 1);
981 	QCOM_SPI_BARRIER_WRITE(sc);
982 	DELAY(100);
983 
984 	return (0);
985 }
986