1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * skl-sst-cldma.c - Code Loader DMA handler
4  *
5  * Copyright (C) 2015, Intel Corporation.
6  * Author: Subhransu S. Prusty <subhransu.s.prusty@intel.com>
7  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8  */
9 
10 #include <linux/device.h>
11 #include <linux/io.h>
12 #include <linux/mm.h>
13 #include <linux/delay.h>
14 #include "../common/sst-dsp.h"
15 #include "../common/sst-dsp-priv.h"
16 
skl_cldma_int_enable(struct sst_dsp * ctx)17 static void skl_cldma_int_enable(struct sst_dsp *ctx)
18 {
19 	sst_dsp_shim_update_bits_unlocked(ctx, SKL_ADSP_REG_ADSPIC,
20 				SKL_ADSPIC_CL_DMA, SKL_ADSPIC_CL_DMA);
21 }
22 
skl_cldma_int_disable(struct sst_dsp * ctx)23 void skl_cldma_int_disable(struct sst_dsp *ctx)
24 {
25 	sst_dsp_shim_update_bits_unlocked(ctx,
26 			SKL_ADSP_REG_ADSPIC, SKL_ADSPIC_CL_DMA, 0);
27 }
28 
skl_cldma_stream_run(struct sst_dsp * ctx,bool enable)29 static void skl_cldma_stream_run(struct sst_dsp  *ctx, bool enable)
30 {
31 	unsigned char val;
32 	int timeout;
33 
34 	sst_dsp_shim_update_bits_unlocked(ctx,
35 			SKL_ADSP_REG_CL_SD_CTL,
36 			CL_SD_CTL_RUN_MASK, CL_SD_CTL_RUN(enable));
37 
38 	udelay(3);
39 	timeout = 300;
40 	do {
41 		/* waiting for hardware to report that the stream Run bit set */
42 		val = sst_dsp_shim_read(ctx, SKL_ADSP_REG_CL_SD_CTL) &
43 			CL_SD_CTL_RUN_MASK;
44 		if (enable && val)
45 			break;
46 		else if (!enable && !val)
47 			break;
48 		udelay(3);
49 	} while (--timeout);
50 
51 	if (timeout == 0)
52 		dev_err(ctx->dev, "Failed to set Run bit=%d enable=%d\n", val, enable);
53 }
54 
skl_cldma_stream_clear(struct sst_dsp * ctx)55 static void skl_cldma_stream_clear(struct sst_dsp  *ctx)
56 {
57 	/* make sure Run bit is cleared before setting stream register */
58 	skl_cldma_stream_run(ctx, 0);
59 
60 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
61 				CL_SD_CTL_IOCE_MASK, CL_SD_CTL_IOCE(0));
62 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
63 				CL_SD_CTL_FEIE_MASK, CL_SD_CTL_FEIE(0));
64 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
65 				CL_SD_CTL_DEIE_MASK, CL_SD_CTL_DEIE(0));
66 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
67 				CL_SD_CTL_STRM_MASK, CL_SD_CTL_STRM(0));
68 
69 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_BDLPL, CL_SD_BDLPLBA(0));
70 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_BDLPU, 0);
71 
72 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_CBL, 0);
73 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_LVI, 0);
74 }
75 
76 /* Code loader helper APIs */
skl_cldma_setup_bdle(struct sst_dsp * ctx,struct snd_dma_buffer * dmab_data,__le32 ** bdlp,int size,int with_ioc)77 static void skl_cldma_setup_bdle(struct sst_dsp *ctx,
78 		struct snd_dma_buffer *dmab_data,
79 		__le32 **bdlp, int size, int with_ioc)
80 {
81 	__le32 *bdl = *bdlp;
82 
83 	ctx->cl_dev.frags = 0;
84 	while (size > 0) {
85 		phys_addr_t addr = virt_to_phys(dmab_data->area +
86 				(ctx->cl_dev.frags * ctx->cl_dev.bufsize));
87 
88 		bdl[0] = cpu_to_le32(lower_32_bits(addr));
89 		bdl[1] = cpu_to_le32(upper_32_bits(addr));
90 
91 		bdl[2] = cpu_to_le32(ctx->cl_dev.bufsize);
92 
93 		size -= ctx->cl_dev.bufsize;
94 		bdl[3] = (size || !with_ioc) ? 0 : cpu_to_le32(0x01);
95 
96 		bdl += 4;
97 		ctx->cl_dev.frags++;
98 	}
99 }
100 
101 /*
102  * Setup controller
103  * Configure the registers to update the dma buffer address and
104  * enable interrupts.
105  * Note: Using the channel 1 for transfer
106  */
skl_cldma_setup_controller(struct sst_dsp * ctx,struct snd_dma_buffer * dmab_bdl,unsigned int max_size,u32 count)107 static void skl_cldma_setup_controller(struct sst_dsp  *ctx,
108 		struct snd_dma_buffer *dmab_bdl, unsigned int max_size,
109 		u32 count)
110 {
111 	skl_cldma_stream_clear(ctx);
112 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_BDLPL,
113 			CL_SD_BDLPLBA(dmab_bdl->addr));
114 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_BDLPU,
115 			CL_SD_BDLPUBA(dmab_bdl->addr));
116 
117 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_CBL, max_size);
118 	sst_dsp_shim_write(ctx, SKL_ADSP_REG_CL_SD_LVI, count - 1);
119 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
120 			CL_SD_CTL_IOCE_MASK, CL_SD_CTL_IOCE(1));
121 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
122 			CL_SD_CTL_FEIE_MASK, CL_SD_CTL_FEIE(1));
123 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
124 			CL_SD_CTL_DEIE_MASK, CL_SD_CTL_DEIE(1));
125 	sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_CL_SD_CTL,
126 			CL_SD_CTL_STRM_MASK, CL_SD_CTL_STRM(FW_CL_STREAM_NUMBER));
127 }
128 
skl_cldma_setup_spb(struct sst_dsp * ctx,unsigned int size,bool enable)129 static void skl_cldma_setup_spb(struct sst_dsp  *ctx,
130 		unsigned int size, bool enable)
131 {
132 	if (enable)
133 		sst_dsp_shim_update_bits_unlocked(ctx,
134 				SKL_ADSP_REG_CL_SPBFIFO_SPBFCCTL,
135 				CL_SPBFIFO_SPBFCCTL_SPIBE_MASK,
136 				CL_SPBFIFO_SPBFCCTL_SPIBE(1));
137 
138 	sst_dsp_shim_write_unlocked(ctx, SKL_ADSP_REG_CL_SPBFIFO_SPIB, size);
139 }
140 
skl_cldma_cleanup_spb(struct sst_dsp * ctx)141 static void skl_cldma_cleanup_spb(struct sst_dsp  *ctx)
142 {
143 	sst_dsp_shim_update_bits_unlocked(ctx,
144 			SKL_ADSP_REG_CL_SPBFIFO_SPBFCCTL,
145 			CL_SPBFIFO_SPBFCCTL_SPIBE_MASK,
146 			CL_SPBFIFO_SPBFCCTL_SPIBE(0));
147 
148 	sst_dsp_shim_write_unlocked(ctx, SKL_ADSP_REG_CL_SPBFIFO_SPIB, 0);
149 }
150 
skl_cldma_cleanup(struct sst_dsp * ctx)151 static void skl_cldma_cleanup(struct sst_dsp  *ctx)
152 {
153 	skl_cldma_cleanup_spb(ctx);
154 	skl_cldma_stream_clear(ctx);
155 
156 	ctx->dsp_ops.free_dma_buf(ctx->dev, &ctx->cl_dev.dmab_data);
157 	ctx->dsp_ops.free_dma_buf(ctx->dev, &ctx->cl_dev.dmab_bdl);
158 }
159 
skl_cldma_wait_interruptible(struct sst_dsp * ctx)160 int skl_cldma_wait_interruptible(struct sst_dsp *ctx)
161 {
162 	int ret = 0;
163 
164 	if (!wait_event_timeout(ctx->cl_dev.wait_queue,
165 				ctx->cl_dev.wait_condition,
166 				msecs_to_jiffies(SKL_WAIT_TIMEOUT))) {
167 		dev_err(ctx->dev, "%s: Wait timeout\n", __func__);
168 		ret = -EIO;
169 		goto cleanup;
170 	}
171 
172 	dev_dbg(ctx->dev, "%s: Event wake\n", __func__);
173 	if (ctx->cl_dev.wake_status != SKL_CL_DMA_BUF_COMPLETE) {
174 		dev_err(ctx->dev, "%s: DMA Error\n", __func__);
175 		ret = -EIO;
176 	}
177 
178 cleanup:
179 	ctx->cl_dev.wake_status = SKL_CL_DMA_STATUS_NONE;
180 	return ret;
181 }
182 
skl_cldma_stop(struct sst_dsp * ctx)183 static void skl_cldma_stop(struct sst_dsp *ctx)
184 {
185 	skl_cldma_stream_run(ctx, false);
186 }
187 
skl_cldma_fill_buffer(struct sst_dsp * ctx,unsigned int size,const void * curr_pos,bool intr_enable,bool trigger)188 static void skl_cldma_fill_buffer(struct sst_dsp *ctx, unsigned int size,
189 		const void *curr_pos, bool intr_enable, bool trigger)
190 {
191 	dev_dbg(ctx->dev, "Size: %x, intr_enable: %d\n", size, intr_enable);
192 	dev_dbg(ctx->dev, "buf_pos_index:%d, trigger:%d\n",
193 			ctx->cl_dev.dma_buffer_offset, trigger);
194 	dev_dbg(ctx->dev, "spib position: %d\n", ctx->cl_dev.curr_spib_pos);
195 
196 	/*
197 	 * Check if the size exceeds buffer boundary. If it exceeds
198 	 * max_buffer size, then copy till buffer size and then copy
199 	 * remaining buffer from the start of ring buffer.
200 	 */
201 	if (ctx->cl_dev.dma_buffer_offset + size > ctx->cl_dev.bufsize) {
202 		unsigned int size_b = ctx->cl_dev.bufsize -
203 					ctx->cl_dev.dma_buffer_offset;
204 		memcpy(ctx->cl_dev.dmab_data.area + ctx->cl_dev.dma_buffer_offset,
205 			curr_pos, size_b);
206 		size -= size_b;
207 		curr_pos += size_b;
208 		ctx->cl_dev.dma_buffer_offset = 0;
209 	}
210 
211 	memcpy(ctx->cl_dev.dmab_data.area + ctx->cl_dev.dma_buffer_offset,
212 			curr_pos, size);
213 
214 	if (ctx->cl_dev.curr_spib_pos == ctx->cl_dev.bufsize)
215 		ctx->cl_dev.dma_buffer_offset = 0;
216 	else
217 		ctx->cl_dev.dma_buffer_offset = ctx->cl_dev.curr_spib_pos;
218 
219 	ctx->cl_dev.wait_condition = false;
220 
221 	if (intr_enable)
222 		skl_cldma_int_enable(ctx);
223 
224 	ctx->cl_dev.ops.cl_setup_spb(ctx, ctx->cl_dev.curr_spib_pos, trigger);
225 	if (trigger)
226 		ctx->cl_dev.ops.cl_trigger(ctx, true);
227 }
228 
229 /*
230  * The CL dma doesn't have any way to update the transfer status until a BDL
231  * buffer is fully transferred
232  *
233  * So Copying is divided in two parts.
234  * 1. Interrupt on buffer done where the size to be transferred is more than
235  *    ring buffer size.
236  * 2. Polling on fw register to identify if data left to transferred doesn't
237  *    fill the ring buffer. Caller takes care of polling the required status
238  *    register to identify the transfer status.
239  * 3. if wait flag is set, waits for DBL interrupt to copy the next chunk till
240  *    bytes_left is 0.
241  *    if wait flag is not set, doesn't wait for BDL interrupt. after ccopying
242  *    the first chunk return the no of bytes_left to be copied.
243  */
244 static int
skl_cldma_copy_to_buf(struct sst_dsp * ctx,const void * bin,u32 total_size,bool wait)245 skl_cldma_copy_to_buf(struct sst_dsp *ctx, const void *bin,
246 			u32 total_size, bool wait)
247 {
248 	int ret;
249 	bool start = true;
250 	unsigned int excess_bytes;
251 	u32 size;
252 	unsigned int bytes_left = total_size;
253 	const void *curr_pos = bin;
254 
255 	if (total_size <= 0)
256 		return -EINVAL;
257 
258 	dev_dbg(ctx->dev, "%s: Total binary size: %u\n", __func__, bytes_left);
259 
260 	while (bytes_left) {
261 		if (bytes_left > ctx->cl_dev.bufsize) {
262 
263 			/*
264 			 * dma transfers only till the write pointer as
265 			 * updated in spib
266 			 */
267 			if (ctx->cl_dev.curr_spib_pos == 0)
268 				ctx->cl_dev.curr_spib_pos = ctx->cl_dev.bufsize;
269 
270 			size = ctx->cl_dev.bufsize;
271 			skl_cldma_fill_buffer(ctx, size, curr_pos, true, start);
272 
273 			if (wait) {
274 				start = false;
275 				ret = skl_cldma_wait_interruptible(ctx);
276 				if (ret < 0) {
277 					skl_cldma_stop(ctx);
278 					return ret;
279 				}
280 			}
281 		} else {
282 			skl_cldma_int_disable(ctx);
283 
284 			if ((ctx->cl_dev.curr_spib_pos + bytes_left)
285 							<= ctx->cl_dev.bufsize) {
286 				ctx->cl_dev.curr_spib_pos += bytes_left;
287 			} else {
288 				excess_bytes = bytes_left -
289 					(ctx->cl_dev.bufsize -
290 					ctx->cl_dev.curr_spib_pos);
291 				ctx->cl_dev.curr_spib_pos = excess_bytes;
292 			}
293 
294 			size = bytes_left;
295 			skl_cldma_fill_buffer(ctx, size,
296 					curr_pos, false, start);
297 		}
298 		bytes_left -= size;
299 		curr_pos = curr_pos + size;
300 		if (!wait)
301 			return bytes_left;
302 	}
303 
304 	return bytes_left;
305 }
306 
skl_cldma_process_intr(struct sst_dsp * ctx)307 void skl_cldma_process_intr(struct sst_dsp *ctx)
308 {
309 	u8 cl_dma_intr_status;
310 
311 	cl_dma_intr_status =
312 		sst_dsp_shim_read_unlocked(ctx, SKL_ADSP_REG_CL_SD_STS);
313 
314 	if (!(cl_dma_intr_status & SKL_CL_DMA_SD_INT_COMPLETE))
315 		ctx->cl_dev.wake_status = SKL_CL_DMA_ERR;
316 	else
317 		ctx->cl_dev.wake_status = SKL_CL_DMA_BUF_COMPLETE;
318 
319 	ctx->cl_dev.wait_condition = true;
320 	wake_up(&ctx->cl_dev.wait_queue);
321 }
322 
skl_cldma_prepare(struct sst_dsp * ctx)323 int skl_cldma_prepare(struct sst_dsp *ctx)
324 {
325 	int ret;
326 	__le32 *bdl;
327 
328 	ctx->cl_dev.bufsize = SKL_MAX_BUFFER_SIZE;
329 
330 	/* Allocate cl ops */
331 	ctx->cl_dev.ops.cl_setup_bdle = skl_cldma_setup_bdle;
332 	ctx->cl_dev.ops.cl_setup_controller = skl_cldma_setup_controller;
333 	ctx->cl_dev.ops.cl_setup_spb = skl_cldma_setup_spb;
334 	ctx->cl_dev.ops.cl_cleanup_spb = skl_cldma_cleanup_spb;
335 	ctx->cl_dev.ops.cl_trigger = skl_cldma_stream_run;
336 	ctx->cl_dev.ops.cl_cleanup_controller = skl_cldma_cleanup;
337 	ctx->cl_dev.ops.cl_copy_to_dmabuf = skl_cldma_copy_to_buf;
338 	ctx->cl_dev.ops.cl_stop_dma = skl_cldma_stop;
339 
340 	/* Allocate buffer*/
341 	ret = ctx->dsp_ops.alloc_dma_buf(ctx->dev,
342 			&ctx->cl_dev.dmab_data, ctx->cl_dev.bufsize);
343 	if (ret < 0) {
344 		dev_err(ctx->dev, "Alloc buffer for base fw failed: %x\n", ret);
345 		return ret;
346 	}
347 	/* Setup Code loader BDL */
348 	ret = ctx->dsp_ops.alloc_dma_buf(ctx->dev,
349 			&ctx->cl_dev.dmab_bdl, PAGE_SIZE);
350 	if (ret < 0) {
351 		dev_err(ctx->dev, "Alloc buffer for blde failed: %x\n", ret);
352 		ctx->dsp_ops.free_dma_buf(ctx->dev, &ctx->cl_dev.dmab_data);
353 		return ret;
354 	}
355 	bdl = (__le32 *)ctx->cl_dev.dmab_bdl.area;
356 
357 	/* Allocate BDLs */
358 	ctx->cl_dev.ops.cl_setup_bdle(ctx, &ctx->cl_dev.dmab_data,
359 			&bdl, ctx->cl_dev.bufsize, 1);
360 	ctx->cl_dev.ops.cl_setup_controller(ctx, &ctx->cl_dev.dmab_bdl,
361 			ctx->cl_dev.bufsize, ctx->cl_dev.frags);
362 
363 	ctx->cl_dev.curr_spib_pos = 0;
364 	ctx->cl_dev.dma_buffer_offset = 0;
365 	init_waitqueue_head(&ctx->cl_dev.wait_queue);
366 
367 	return ret;
368 }
369