1 /*
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Cavium, Inc.. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Cavium, Inc. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 /*$FreeBSD$*/
34 
35 #include "lio_bsd.h"
36 #include "lio_common.h"
37 #include "lio_droq.h"
38 #include "lio_iq.h"
39 #include "lio_response_manager.h"
40 #include "lio_device.h"
41 #include "lio_main.h"
42 #include "lio_network.h"
43 #include "cn23xx_pf_device.h"
44 #include "lio_rxtx.h"
45 
46 struct lio_iq_post_status {
47 	int	status;
48 	int	index;
49 };
50 
51 static void	lio_check_db_timeout(void *arg, int pending);
52 static void	__lio_check_db_timeout(struct octeon_device *oct,
53 				       uint64_t iq_no);
54 
55 /* Return 0 on success, 1 on failure */
56 int
57 lio_init_instr_queue(struct octeon_device *oct, union octeon_txpciq txpciq,
58 		     uint32_t num_descs)
59 {
60 	struct lio_instr_queue	*iq;
61 	struct lio_iq_config	*conf = NULL;
62 	struct lio_tq		*db_tq;
63 	struct lio_request_list	*request_buf;
64 	bus_size_t		max_size;
65 	uint32_t		iq_no = (uint32_t)txpciq.s.q_no;
66 	uint32_t		q_size;
67 	int			error, i;
68 
69 	if (LIO_CN23XX_PF(oct))
70 		conf = &(LIO_GET_IQ_CFG(LIO_CHIP_CONF(oct, cn23xx_pf)));
71 	if (conf == NULL) {
72 		lio_dev_err(oct, "Unsupported Chip %x\n", oct->chip_id);
73 		return (1);
74 	}
75 
76 	q_size = (uint32_t)conf->instr_type * num_descs;
77 	iq = oct->instr_queue[iq_no];
78 	iq->oct_dev = oct;
79 
80 	max_size = LIO_CN23XX_PKI_MAX_FRAME_SIZE * num_descs;
81 
82 	error = bus_dma_tag_create(bus_get_dma_tag(oct->device),	/* parent */
83 				   1, 0,				/* alignment, bounds */
84 				   BUS_SPACE_MAXADDR,			/* lowaddr */
85 				   BUS_SPACE_MAXADDR,			/* highaddr */
86 				   NULL, NULL,				/* filter, filterarg */
87 				   max_size,				/* maxsize */
88 				   LIO_MAX_SG,				/* nsegments */
89 				   PAGE_SIZE,				/* maxsegsize */
90 				   0,					/* flags */
91 				   NULL,				/* lockfunc */
92 				   NULL,				/* lockfuncarg */
93 				   &iq->txtag);
94 	if (error) {
95 		lio_dev_err(oct, "Cannot allocate memory for instr queue %d\n",
96 			    iq_no);
97 		return (1);
98 	}
99 
100 	iq->base_addr = lio_dma_alloc(q_size, (vm_paddr_t *)&iq->base_addr_dma);
101 	if (!iq->base_addr) {
102 		lio_dev_err(oct, "Cannot allocate memory for instr queue %d\n",
103 			    iq_no);
104 		return (1);
105 	}
106 
107 	iq->max_count = num_descs;
108 
109 	/*
110 	 * Initialize a list to holds requests that have been posted to
111 	 * Octeon but has yet to be fetched by octeon
112 	 */
113 	iq->request_list = malloc(sizeof(*iq->request_list) * num_descs,
114 				  M_DEVBUF, M_NOWAIT | M_ZERO);
115 	if (iq->request_list == NULL) {
116 		lio_dev_err(oct, "Alloc failed for IQ[%d] nr free list\n",
117 			    iq_no);
118 		return (1);
119 	}
120 
121 	lio_dev_dbg(oct, "IQ[%d]: base: %p basedma: %llx count: %d\n",
122 		    iq_no, iq->base_addr, LIO_CAST64(iq->base_addr_dma),
123 		    iq->max_count);
124 
125 	/* Create the descriptor buffer dma maps */
126 	request_buf = iq->request_list;
127 	for (i = 0; i < num_descs; i++, request_buf++) {
128 		error = bus_dmamap_create(iq->txtag, 0, &request_buf->map);
129 		if (error) {
130 			lio_dev_err(oct, "Unable to create TX DMA map\n");
131 			return (1);
132 		}
133 	}
134 
135 	iq->txpciq.txpciq64 = txpciq.txpciq64;
136 	iq->fill_cnt = 0;
137 	iq->host_write_index = 0;
138 	iq->octeon_read_index = 0;
139 	iq->flush_index = 0;
140 	iq->last_db_time = 0;
141 	iq->db_timeout = (uint32_t)conf->db_timeout;
142 	atomic_store_rel_int(&iq->instr_pending, 0);
143 
144 	/* Initialize the lock for this instruction queue */
145 	mtx_init(&iq->lock, "Tx_lock", NULL, MTX_DEF);
146 	mtx_init(&iq->post_lock, "iq_post_lock", NULL, MTX_DEF);
147 	mtx_init(&iq->enq_lock, "enq_lock", NULL, MTX_DEF);
148 
149 	mtx_init(&iq->iq_flush_running_lock, "iq_flush_running_lock", NULL,
150 		 MTX_DEF);
151 
152 	oct->io_qmask.iq |= BIT_ULL(iq_no);
153 
154 	/* Set the 32B/64B mode for each input queue */
155 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
156 	iq->iqcmd_64B = (conf->instr_type == 64);
157 
158 	oct->fn_list.setup_iq_regs(oct, iq_no);
159 
160 	db_tq = &oct->check_db_tq[iq_no];
161 	db_tq->tq = taskqueue_create("lio_check_db_timeout", M_WAITOK,
162 				     taskqueue_thread_enqueue, &db_tq->tq);
163 	if (db_tq->tq == NULL) {
164 		lio_dev_err(oct, "check db wq create failed for iq %d\n",
165 			    iq_no);
166 		return (1);
167 	}
168 
169 	TIMEOUT_TASK_INIT(db_tq->tq, &db_tq->work, 0, lio_check_db_timeout,
170 			  (void *)db_tq);
171 	db_tq->ctxul = iq_no;
172 	db_tq->ctxptr = oct;
173 
174 	taskqueue_start_threads(&db_tq->tq, 1, PI_NET,
175 				"lio%d_check_db_timeout:%d",
176 				oct->octeon_id, iq_no);
177 	taskqueue_enqueue_timeout(db_tq->tq, &db_tq->work, 1);
178 
179 	/* Allocate a buf ring */
180 	oct->instr_queue[iq_no]->br =
181 		buf_ring_alloc(LIO_BR_SIZE, M_DEVBUF, M_WAITOK,
182 			       &oct->instr_queue[iq_no]->enq_lock);
183 	if (oct->instr_queue[iq_no]->br == NULL) {
184 		lio_dev_err(oct, "Critical Failure setting up buf ring\n");
185 		return (1);
186 	}
187 
188 	return (0);
189 }
190 
191 int
192 lio_delete_instr_queue(struct octeon_device *oct, uint32_t iq_no)
193 {
194 	struct lio_instr_queue		*iq = oct->instr_queue[iq_no];
195 	struct lio_request_list		*request_buf;
196 	struct lio_mbuf_free_info	*finfo;
197 	uint64_t			desc_size = 0, q_size;
198 	int				i;
199 
200 	lio_dev_dbg(oct, "%s[%d]\n", __func__, iq_no);
201 
202 	if (oct->check_db_tq[iq_no].tq != NULL) {
203 		while (taskqueue_cancel_timeout(oct->check_db_tq[iq_no].tq,
204 						&oct->check_db_tq[iq_no].work,
205 						NULL))
206 			taskqueue_drain_timeout(oct->check_db_tq[iq_no].tq,
207 						&oct->check_db_tq[iq_no].work);
208 		taskqueue_free(oct->check_db_tq[iq_no].tq);
209 		oct->check_db_tq[iq_no].tq = NULL;
210 	}
211 
212 	if (LIO_CN23XX_PF(oct))
213 		desc_size =
214 		    LIO_GET_IQ_INSTR_TYPE_CFG(LIO_CHIP_CONF(oct, cn23xx_pf));
215 
216 	request_buf = iq->request_list;
217 	for (i = 0; i < iq->max_count; i++, request_buf++) {
218 		if ((request_buf->reqtype == LIO_REQTYPE_NORESP_NET) ||
219 		    (request_buf->reqtype == LIO_REQTYPE_NORESP_NET_SG)) {
220 			if (request_buf->buf != NULL) {
221 				finfo = request_buf->buf;
222 				bus_dmamap_sync(iq->txtag, request_buf->map,
223 						BUS_DMASYNC_POSTWRITE);
224 				bus_dmamap_unload(iq->txtag,
225 						  request_buf->map);
226 				m_freem(finfo->mb);
227 				request_buf->buf = NULL;
228 				if (request_buf->map != NULL) {
229 					bus_dmamap_destroy(iq->txtag,
230 							   request_buf->map);
231 					request_buf->map = NULL;
232 				}
233 			} else if (request_buf->map != NULL) {
234 				bus_dmamap_unload(iq->txtag, request_buf->map);
235 				bus_dmamap_destroy(iq->txtag, request_buf->map);
236 				request_buf->map = NULL;
237 			}
238 		}
239 	}
240 
241 	if (iq->br != NULL) {
242 		buf_ring_free(iq->br, M_DEVBUF);
243 		iq->br = NULL;
244 	}
245 
246 	if (iq->request_list != NULL) {
247 		free(iq->request_list, M_DEVBUF);
248 		iq->request_list = NULL;
249 	}
250 
251 	if (iq->txtag != NULL) {
252 		bus_dma_tag_destroy(iq->txtag);
253 		iq->txtag = NULL;
254 	}
255 
256 	if (iq->base_addr) {
257 		q_size = iq->max_count * desc_size;
258 		lio_dma_free((uint32_t)q_size, iq->base_addr);
259 
260 		oct->io_qmask.iq &= ~(1ULL << iq_no);
261 		bzero(oct->instr_queue[iq_no], sizeof(struct lio_instr_queue));
262 		oct->num_iqs--;
263 
264 		return (0);
265 	}
266 
267 	return (1);
268 }
269 
270 /* Return 0 on success, 1 on failure */
271 int
272 lio_setup_iq(struct octeon_device *oct, int ifidx, int q_index,
273 	     union octeon_txpciq txpciq, uint32_t num_descs)
274 {
275 	uint32_t	iq_no = (uint32_t)txpciq.s.q_no;
276 
277 	if (oct->instr_queue[iq_no]->oct_dev != NULL) {
278 		lio_dev_dbg(oct, "IQ is in use. Cannot create the IQ: %d again\n",
279 			    iq_no);
280 		oct->instr_queue[iq_no]->txpciq.txpciq64 = txpciq.txpciq64;
281 		return (0);
282 	}
283 
284 	oct->instr_queue[iq_no]->q_index = q_index;
285 	oct->instr_queue[iq_no]->ifidx = ifidx;
286 
287 	if (lio_init_instr_queue(oct, txpciq, num_descs)) {
288 		lio_delete_instr_queue(oct, iq_no);
289 		return (1);
290 	}
291 
292 	oct->num_iqs++;
293 	if (oct->fn_list.enable_io_queues(oct))
294 		return (1);
295 
296 	return (0);
297 }
298 
299 int
300 lio_wait_for_instr_fetch(struct octeon_device *oct)
301 {
302 	int	i, retry = 1000, pending, instr_cnt = 0;
303 
304 	do {
305 		instr_cnt = 0;
306 
307 		for (i = 0; i < LIO_MAX_INSTR_QUEUES(oct); i++) {
308 			if (!(oct->io_qmask.iq & BIT_ULL(i)))
309 				continue;
310 			pending = atomic_load_acq_int(
311 					&oct->instr_queue[i]->instr_pending);
312 			if (pending)
313 				__lio_check_db_timeout(oct, i);
314 			instr_cnt += pending;
315 		}
316 
317 		if (instr_cnt == 0)
318 			break;
319 
320 		lio_sleep_timeout(1);
321 
322 	} while (retry-- && instr_cnt);
323 
324 	return (instr_cnt);
325 }
326 
327 static inline void
328 lio_ring_doorbell(struct octeon_device *oct, struct lio_instr_queue *iq)
329 {
330 
331 	if (atomic_load_acq_int(&oct->status) == LIO_DEV_RUNNING) {
332 		lio_write_csr32(oct, iq->doorbell_reg, iq->fill_cnt);
333 		/* make sure doorbell write goes through */
334 		__compiler_membar();
335 		iq->fill_cnt = 0;
336 		iq->last_db_time = ticks;
337 		return;
338 	}
339 }
340 
341 static inline void
342 __lio_copy_cmd_into_iq(struct lio_instr_queue *iq, uint8_t *cmd)
343 {
344 	uint8_t	*iqptr, cmdsize;
345 
346 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
347 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
348 
349 	memcpy(iqptr, cmd, cmdsize);
350 }
351 
352 static inline struct lio_iq_post_status
353 __lio_post_command2(struct lio_instr_queue *iq, uint8_t *cmd)
354 {
355 	struct lio_iq_post_status	st;
356 
357 	st.status = LIO_IQ_SEND_OK;
358 
359 	/*
360 	 * This ensures that the read index does not wrap around to the same
361 	 * position if queue gets full before Octeon could fetch any instr.
362 	 */
363 	if (atomic_load_acq_int(&iq->instr_pending) >=
364 	    (int32_t)(iq->max_count - 1)) {
365 		st.status = LIO_IQ_SEND_FAILED;
366 		st.index = -1;
367 		return (st);
368 	}
369 
370 	if (atomic_load_acq_int(&iq->instr_pending) >=
371 	    (int32_t)(iq->max_count - 2))
372 		st.status = LIO_IQ_SEND_STOP;
373 
374 	__lio_copy_cmd_into_iq(iq, cmd);
375 
376 	/* "index" is returned, host_write_index is modified. */
377 	st.index = iq->host_write_index;
378 	iq->host_write_index = lio_incr_index(iq->host_write_index, 1,
379 					      iq->max_count);
380 	iq->fill_cnt++;
381 
382 	/*
383 	 * Flush the command into memory. We need to be sure the data is in
384 	 * memory before indicating that the instruction is pending.
385 	 */
386 	wmb();
387 
388 	atomic_add_int(&iq->instr_pending, 1);
389 
390 	return (st);
391 }
392 
393 static inline void
394 __lio_add_to_request_list(struct lio_instr_queue *iq, int idx, void *buf,
395 			  int reqtype)
396 {
397 
398 	iq->request_list[idx].buf = buf;
399 	iq->request_list[idx].reqtype = reqtype;
400 }
401 
402 /* Can only run in process context */
403 int
404 lio_process_iq_request_list(struct octeon_device *oct,
405 			    struct lio_instr_queue *iq, uint32_t budget)
406 {
407 	struct lio_soft_command		*sc;
408 	struct octeon_instr_irh		*irh = NULL;
409 	struct lio_mbuf_free_info	*finfo;
410 	void				*buf;
411 	uint32_t			inst_count = 0;
412 	uint32_t			old = iq->flush_index;
413 	int				reqtype;
414 
415 	while (old != iq->octeon_read_index) {
416 		reqtype = iq->request_list[old].reqtype;
417 		buf = iq->request_list[old].buf;
418 		finfo = buf;
419 
420 		if (reqtype == LIO_REQTYPE_NONE)
421 			goto skip_this;
422 
423 		switch (reqtype) {
424 		case LIO_REQTYPE_NORESP_NET:
425 			lio_free_mbuf(iq, buf);
426 			break;
427 		case LIO_REQTYPE_NORESP_NET_SG:
428 			lio_free_sgmbuf(iq, buf);
429 			break;
430 		case LIO_REQTYPE_RESP_NET:
431 		case LIO_REQTYPE_SOFT_COMMAND:
432 			sc = buf;
433 			if (LIO_CN23XX_PF(oct))
434 				irh = (struct octeon_instr_irh *)
435 					&sc->cmd.cmd3.irh;
436 			if (irh->rflag) {
437 				/*
438 				 * We're expecting a response from Octeon.
439 				 * It's up to lio_process_ordered_list() to
440 				 * process  sc. Add sc to the ordered soft
441 				 * command response list because we expect
442 				 * a response from Octeon.
443 				 */
444 				mtx_lock(&oct->response_list
445 					 [LIO_ORDERED_SC_LIST].lock);
446 				atomic_add_int(&oct->response_list
447 					       [LIO_ORDERED_SC_LIST].
448 					       pending_req_count, 1);
449 				STAILQ_INSERT_TAIL(&oct->response_list
450 						   [LIO_ORDERED_SC_LIST].
451 						   head, &sc->node, entries);
452 				mtx_unlock(&oct->response_list
453 					   [LIO_ORDERED_SC_LIST].lock);
454 			} else {
455 				if (sc->callback != NULL) {
456 					/* This callback must not sleep */
457 					sc->callback(oct, LIO_REQUEST_DONE,
458 						     sc->callback_arg);
459 				}
460 			}
461 
462 			break;
463 		default:
464 			lio_dev_err(oct, "%s Unknown reqtype: %d buf: %p at idx %d\n",
465 				    __func__, reqtype, buf, old);
466 		}
467 
468 		iq->request_list[old].buf = NULL;
469 		iq->request_list[old].reqtype = 0;
470 
471 skip_this:
472 		inst_count++;
473 		old = lio_incr_index(old, 1, iq->max_count);
474 
475 		if ((budget) && (inst_count >= budget))
476 			break;
477 	}
478 
479 	iq->flush_index = old;
480 
481 	return (inst_count);
482 }
483 
484 /* Can only be called from process context */
485 int
486 lio_flush_iq(struct octeon_device *oct, struct lio_instr_queue *iq,
487 	     uint32_t budget)
488 {
489 	uint32_t	inst_processed = 0;
490 	uint32_t	tot_inst_processed = 0;
491 	int		tx_done = 1;
492 
493 	if (!mtx_trylock(&iq->iq_flush_running_lock))
494 		return (tx_done);
495 
496 	mtx_lock(&iq->lock);
497 
498 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
499 
500 	do {
501 		/* Process any outstanding IQ packets. */
502 		if (iq->flush_index == iq->octeon_read_index)
503 			break;
504 
505 		if (budget)
506 			inst_processed =
507 				lio_process_iq_request_list(oct, iq,
508 							    budget -
509 							    tot_inst_processed);
510 		else
511 			inst_processed =
512 				lio_process_iq_request_list(oct, iq, 0);
513 
514 		if (inst_processed) {
515 			atomic_subtract_int(&iq->instr_pending, inst_processed);
516 			iq->stats.instr_processed += inst_processed;
517 		}
518 		tot_inst_processed += inst_processed;
519 		inst_processed = 0;
520 
521 	} while (tot_inst_processed < budget);
522 
523 	if (budget && (tot_inst_processed >= budget))
524 		tx_done = 0;
525 
526 	iq->last_db_time = ticks;
527 
528 	mtx_unlock(&iq->lock);
529 
530 	mtx_unlock(&iq->iq_flush_running_lock);
531 
532 	return (tx_done);
533 }
534 
535 /*
536  * Process instruction queue after timeout.
537  * This routine gets called from a taskqueue or when removing the module.
538  */
539 static void
540 __lio_check_db_timeout(struct octeon_device *oct, uint64_t iq_no)
541 {
542 	struct lio_instr_queue	*iq;
543 	uint64_t		next_time;
544 
545 	if (oct == NULL)
546 		return;
547 
548 	iq = oct->instr_queue[iq_no];
549 	if (iq == NULL)
550 		return;
551 
552 	if (atomic_load_acq_int(&iq->instr_pending)) {
553 		/* If ticks - last_db_time < db_timeout do nothing  */
554 		next_time = iq->last_db_time + lio_ms_to_ticks(iq->db_timeout);
555 		if (!lio_check_timeout(ticks, next_time))
556 			return;
557 
558 		iq->last_db_time = ticks;
559 
560 		/* Flush the instruction queue */
561 		lio_flush_iq(oct, iq, 0);
562 
563 		lio_enable_irq(NULL, iq);
564 	}
565 
566 	if (oct->props.ifp != NULL && iq->br != NULL) {
567 		if (mtx_trylock(&iq->enq_lock)) {
568 			if (!drbr_empty(oct->props.ifp, iq->br))
569 				lio_mq_start_locked(oct->props.ifp, iq);
570 
571 			mtx_unlock(&iq->enq_lock);
572 		}
573 	}
574 }
575 
576 /*
577  * Called by the Poll thread at regular intervals to check the instruction
578  * queue for commands to be posted and for commands that were fetched by Octeon.
579  */
580 static void
581 lio_check_db_timeout(void *arg, int pending)
582 {
583 	struct lio_tq		*db_tq = (struct lio_tq *)arg;
584 	struct octeon_device	*oct = db_tq->ctxptr;
585 	uint64_t		iq_no = db_tq->ctxul;
586 	uint32_t		delay = 10;
587 
588 	__lio_check_db_timeout(oct, iq_no);
589 	taskqueue_enqueue_timeout(db_tq->tq, &db_tq->work,
590 				  lio_ms_to_ticks(delay));
591 }
592 
593 int
594 lio_send_command(struct octeon_device *oct, uint32_t iq_no,
595 		 uint32_t force_db, void *cmd, void *buf,
596 		 uint32_t datasize, uint32_t reqtype)
597 {
598 	struct lio_iq_post_status	st;
599 	struct lio_instr_queue		*iq = oct->instr_queue[iq_no];
600 
601 	/*
602 	 * Get the lock and prevent other tasks and tx interrupt handler
603 	 * from running.
604 	 */
605 	mtx_lock(&iq->post_lock);
606 
607 	st = __lio_post_command2(iq, cmd);
608 
609 	if (st.status != LIO_IQ_SEND_FAILED) {
610 		__lio_add_to_request_list(iq, st.index, buf, reqtype);
611 		LIO_INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
612 		LIO_INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
613 
614 		if (force_db || (st.status == LIO_IQ_SEND_STOP))
615 			lio_ring_doorbell(oct, iq);
616 	} else {
617 		LIO_INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
618 	}
619 
620 	mtx_unlock(&iq->post_lock);
621 
622 	/*
623 	 * This is only done here to expedite packets being flushed for
624 	 * cases where there are no IQ completion interrupts.
625 	 */
626 
627 	return (st.status);
628 }
629 
630 void
631 lio_prepare_soft_command(struct octeon_device *oct, struct lio_soft_command *sc,
632 			 uint8_t opcode, uint8_t subcode, uint32_t irh_ossp,
633 			 uint64_t ossp0, uint64_t ossp1)
634 {
635 	struct lio_config		*lio_cfg;
636 	struct octeon_instr_ih3		*ih3;
637 	struct octeon_instr_pki_ih3	*pki_ih3;
638 	struct octeon_instr_irh		*irh;
639 	struct octeon_instr_rdp		*rdp;
640 
641 	KASSERT(opcode <= 15, ("%s, %d, opcode > 15", __func__, __LINE__));
642 	KASSERT(subcode <= 127, ("%s, %d, opcode > 127", __func__, __LINE__));
643 
644 	lio_cfg = lio_get_conf(oct);
645 
646 	if (LIO_CN23XX_PF(oct)) {
647 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
648 
649 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
650 
651 		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
652 
653 		pki_ih3->w = 1;
654 		pki_ih3->raw = 1;
655 		pki_ih3->utag = 1;
656 		pki_ih3->uqpg = oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
657 		pki_ih3->utt = 1;
658 		pki_ih3->tag = LIO_CONTROL;
659 		pki_ih3->tagtype = LIO_ATOMIC_TAG;
660 		pki_ih3->qpg = oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
661 		pki_ih3->pm = 0x7;
662 		pki_ih3->sl = 8;
663 
664 		if (sc->datasize)
665 			ih3->dlengsz = sc->datasize;
666 
667 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
668 		irh->opcode = opcode;
669 		irh->subcode = subcode;
670 
671 		/* opcode/subcode specific parameters (ossp) */
672 		irh->ossp = irh_ossp;
673 		sc->cmd.cmd3.ossp[0] = ossp0;
674 		sc->cmd.cmd3.ossp[1] = ossp1;
675 
676 		if (sc->rdatasize) {
677 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
678 			rdp->pcie_port = oct->pcie_port;
679 			rdp->rlen = sc->rdatasize;
680 
681 			irh->rflag = 1;
682 			/* PKI IH3 */
683 			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
684 			ih3->fsz = LIO_SOFTCMDRESP_IH3;
685 		} else {
686 			irh->rflag = 0;
687 			/* PKI IH3 */
688 			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
689 			ih3->fsz = LIO_PCICMD_O3;
690 		}
691 	}
692 }
693 
694 int
695 lio_send_soft_command(struct octeon_device *oct, struct lio_soft_command *sc)
696 {
697 	struct octeon_instr_ih3	*ih3;
698 	struct octeon_instr_irh	*irh;
699 	uint32_t		len = 0;
700 
701 	if (LIO_CN23XX_PF(oct)) {
702 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
703 		if (ih3->dlengsz) {
704 			KASSERT(sc->dmadptr, ("%s, %d, sc->dmadptr is NULL",
705 					      __func__, __LINE__));
706 			sc->cmd.cmd3.dptr = sc->dmadptr;
707 		}
708 
709 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
710 		if (irh->rflag) {
711 			KASSERT(sc->dmarptr, ("%s, %d, sc->dmarptr is NULL",
712 					      __func__, __LINE__));
713 			KASSERT(sc->status_word, ("%s, %d, sc->status_word is NULL",
714 						  __func__, __LINE__));
715 			*sc->status_word = COMPLETION_WORD_INIT;
716 			sc->cmd.cmd3.rptr = sc->dmarptr;
717 		}
718 		len = (uint32_t)ih3->dlengsz;
719 	}
720 	if (sc->wait_time)
721 		sc->timeout = ticks + lio_ms_to_ticks(sc->wait_time);
722 
723 	return (lio_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
724 				 len, LIO_REQTYPE_SOFT_COMMAND));
725 }
726 
727 int
728 lio_setup_sc_buffer_pool(struct octeon_device *oct)
729 {
730 	struct lio_soft_command	*sc;
731 	uint64_t		dma_addr;
732 	int			i;
733 
734 	STAILQ_INIT(&oct->sc_buf_pool.head);
735 	mtx_init(&oct->sc_buf_pool.lock, "sc_pool_lock", NULL, MTX_DEF);
736 	atomic_store_rel_int(&oct->sc_buf_pool.alloc_buf_count, 0);
737 
738 	for (i = 0; i < LIO_MAX_SOFT_COMMAND_BUFFERS; i++) {
739 		sc = (struct lio_soft_command *)
740 			lio_dma_alloc(LIO_SOFT_COMMAND_BUFFER_SIZE, (vm_paddr_t *)&dma_addr);
741 		if (sc == NULL) {
742 			lio_free_sc_buffer_pool(oct);
743 			return (1);
744 		}
745 
746 		sc->dma_addr = dma_addr;
747 		sc->size = LIO_SOFT_COMMAND_BUFFER_SIZE;
748 
749 		STAILQ_INSERT_TAIL(&oct->sc_buf_pool.head, &sc->node, entries);
750 	}
751 
752 	return (0);
753 }
754 
755 int
756 lio_free_sc_buffer_pool(struct octeon_device *oct)
757 {
758 	struct lio_stailq_node	*tmp, *tmp2;
759 	struct lio_soft_command	*sc;
760 
761 	mtx_lock(&oct->sc_buf_pool.lock);
762 
763 	STAILQ_FOREACH_SAFE(tmp, &oct->sc_buf_pool.head, entries, tmp2) {
764 		sc = LIO_STAILQ_FIRST_ENTRY(&oct->sc_buf_pool.head,
765 					    struct lio_soft_command, node);
766 
767 		STAILQ_REMOVE_HEAD(&oct->sc_buf_pool.head, entries);
768 
769 		lio_dma_free(sc->size, sc);
770 	}
771 
772 	STAILQ_INIT(&oct->sc_buf_pool.head);
773 
774 	mtx_unlock(&oct->sc_buf_pool.lock);
775 
776 	return (0);
777 }
778 
779 struct lio_soft_command *
780 lio_alloc_soft_command(struct octeon_device *oct, uint32_t datasize,
781 		       uint32_t rdatasize, uint32_t ctxsize)
782 {
783 	struct lio_soft_command	*sc = NULL;
784 	struct lio_stailq_node	*tmp;
785 	uint64_t		dma_addr;
786 	uint32_t		size;
787 	uint32_t		offset = sizeof(struct lio_soft_command);
788 
789 	KASSERT((offset + datasize + rdatasize + ctxsize) <=
790 		LIO_SOFT_COMMAND_BUFFER_SIZE,
791 		("%s, %d, offset + datasize + rdatasize + ctxsize > LIO_SOFT_COMMAND_BUFFER_SIZE",
792 		 __func__, __LINE__));
793 
794 	mtx_lock(&oct->sc_buf_pool.lock);
795 
796 	if (STAILQ_EMPTY(&oct->sc_buf_pool.head)) {
797 		mtx_unlock(&oct->sc_buf_pool.lock);
798 		return (NULL);
799 	}
800 	tmp = STAILQ_LAST(&oct->sc_buf_pool.head, lio_stailq_node, entries);
801 
802 	STAILQ_REMOVE(&oct->sc_buf_pool.head, tmp, lio_stailq_node, entries);
803 
804 	atomic_add_int(&oct->sc_buf_pool.alloc_buf_count, 1);
805 
806 	mtx_unlock(&oct->sc_buf_pool.lock);
807 
808 	sc = (struct lio_soft_command *)tmp;
809 
810 	dma_addr = sc->dma_addr;
811 	size = sc->size;
812 
813 	bzero(sc, sc->size);
814 
815 	sc->dma_addr = dma_addr;
816 	sc->size = size;
817 
818 	if (ctxsize) {
819 		sc->ctxptr = (uint8_t *)sc + offset;
820 		sc->ctxsize = ctxsize;
821 	}
822 
823 	/* Start data at 128 byte boundary */
824 	offset = (offset + ctxsize + 127) & 0xffffff80;
825 
826 	if (datasize) {
827 		sc->virtdptr = (uint8_t *)sc + offset;
828 		sc->dmadptr = dma_addr + offset;
829 		sc->datasize = datasize;
830 	}
831 	/* Start rdata at 128 byte boundary */
832 	offset = (offset + datasize + 127) & 0xffffff80;
833 
834 	if (rdatasize) {
835 		KASSERT(rdatasize >= 16, ("%s, %d, rdatasize < 16", __func__,
836 					  __LINE__));
837 		sc->virtrptr = (uint8_t *)sc + offset;
838 		sc->dmarptr = dma_addr + offset;
839 		sc->rdatasize = rdatasize;
840 		sc->status_word = (uint64_t *)((uint8_t *)(sc->virtrptr) +
841 					       rdatasize - 8);
842 	}
843 	return (sc);
844 }
845 
846 void
847 lio_free_soft_command(struct octeon_device *oct,
848 		      struct lio_soft_command *sc)
849 {
850 
851 	mtx_lock(&oct->sc_buf_pool.lock);
852 
853 	STAILQ_INSERT_TAIL(&oct->sc_buf_pool.head, &sc->node, entries);
854 
855 	atomic_subtract_int(&oct->sc_buf_pool.alloc_buf_count, 1);
856 
857 	mtx_unlock(&oct->sc_buf_pool.lock);
858 }
859