1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2023 Tintri by DDN, Inc. All rights reserved.
14  */
15 
16 /*
17  * This file contains code necessary to send SCSI commands to HBA.
18  */
19 #include <smartpqi.h>
20 
21 /*
22  * []------------------------------------------------------------------[]
23  * | Forward declarations for support/utility functions			|
24  * []------------------------------------------------------------------[]
25  */
26 static void aio_io_complete(pqi_io_request_t *io, void *context);
27 static void raid_io_complete(pqi_io_request_t *io, void *context);
28 static void build_aio_sg_list(pqi_state_t *s,
29 	pqi_aio_path_request_t *rqst, pqi_cmd_t *cmd, pqi_io_request_t *);
30 static void build_raid_sg_list(pqi_state_t *s,
31 	pqi_raid_path_request_t *rqst, pqi_cmd_t *cmd, pqi_io_request_t *);
32 static pqi_io_request_t *setup_aio_request(pqi_state_t *s, pqi_cmd_t *cmd);
33 static pqi_io_request_t *setup_raid_request(pqi_state_t *s, pqi_cmd_t *cmd);
34 static uint32_t read_heartbeat_counter(pqi_state_t *s);
35 static void take_ctlr_offline(pqi_state_t *s);
36 static uint32_t free_elem_count(pqi_index_t pi, pqi_index_t ci,
37 	uint32_t per_iq);
38 static void ack_event(pqi_state_t *s, pqi_event_t *e);
39 static boolean_t is_aio_enabled(pqi_device_t *d);
40 static void lun_reset_worker(void *v);
41 static void lun_reset_complete(pqi_io_request_t *io, void *ctx);
42 
43 #define	DIV_UP(n, d) ((n + (d - 1)) / d)
44 
45 /*
46  * []------------------------------------------------------------------[]
47  * | Main entry points in file.						|
48  * []------------------------------------------------------------------[]
49  */
50 
51 int pqi_do_reset_lun = -1;
52 int pqi_do_reset_ctlr = -1;
53 /*
54  * pqi_watchdog -- interrupt count and/or heartbeat must increase over time.
55  */
56 void
57 pqi_watchdog(void *v)
58 {
59 	pqi_state_t	*s = v;
60 	uint32_t	hb;
61 
62 	if (pqi_is_offline(s))
63 		return;
64 
65 	hb = read_heartbeat_counter(s);
66 	if ((s->s_last_intr_count == s->s_intr_count) &&
67 	    (s->s_last_heartbeat_count == hb)) {
68 		dev_err(s->s_dip, CE_NOTE, "No heartbeat");
69 		pqi_show_dev_state(s);
70 		take_ctlr_offline(s);
71 	} else {
72 		if (pqi_do_reset_ctlr == s->s_instance) {
73 			pqi_do_reset_ctlr = -1;
74 			take_ctlr_offline(s);
75 		} else {
76 			s->s_last_intr_count = s->s_intr_count;
77 			s->s_last_heartbeat_count = hb;
78 			s->s_watchdog = timeout(pqi_watchdog, s,
79 			    drv_usectohz(WATCHDOG));
80 		}
81 	}
82 }
83 
84 /*
85  * pqi_start_io -- queues command to HBA.
86  *
87  * This method can be called either from the upper layer with a non-zero
88  * io argument or called during an interrupt to load the outgoing queue
89  * with more commands.
90  */
91 void
92 pqi_start_io(pqi_state_t *s, pqi_queue_group_t *qg, pqi_path_t path,
93     pqi_io_request_t *io)
94 {
95 	pqi_iu_header_t	*rqst;
96 	size_t		iu_len;
97 	size_t		copy_to_end;
98 	pqi_index_t	iq_pi;
99 	pqi_index_t	iq_ci;
100 	uint32_t	elem_needed;
101 	uint32_t	elem_to_end;
102 	caddr_t		next_elem;
103 	int		sending		= 0;
104 
105 	mutex_enter(&qg->submit_lock[path]);
106 	if (io != NULL) {
107 		io->io_queue_group = qg;
108 		io->io_queue_path = path;
109 		list_insert_tail(&qg->request_list[path], io);
110 	}
111 
112 
113 	iq_pi = qg->iq_pi_copy[path];
114 	while ((io = list_remove_head(&qg->request_list[path])) != NULL) {
115 
116 		/* ---- Primary cause for !active is controller failure ---- */
117 		if (qg->qg_active == B_FALSE) {
118 			pqi_cmd_t	*c = io->io_cmd;
119 
120 			mutex_enter(&c->pc_device->pd_mutex);
121 			/*
122 			 * When a command is failed it will be removed from
123 			 * the queue group if pc_io_rqst is not NULL. Since
124 			 * we have already removed the command from the list
125 			 * would shouldn't attempt to do so a second time.
126 			 */
127 			c->pc_io_rqst = NULL;
128 			(void) pqi_fail_cmd(io->io_cmd, CMD_DEV_GONE,
129 			    STAT_TERMINATED);
130 			mutex_exit(&c->pc_device->pd_mutex);
131 			continue;
132 		}
133 
134 		rqst = io->io_iu;
135 		iu_len = rqst->iu_length + PQI_REQUEST_HEADER_LENGTH;
136 		elem_needed = DIV_UP(iu_len, PQI_OPERATIONAL_IQ_ELEMENT_LENGTH);
137 		(void) ddi_dma_sync(s->s_queue_dma->handle,
138 		    (uintptr_t)qg->iq_ci[path] -
139 		    (uintptr_t)s->s_queue_dma->alloc_memory, sizeof (iq_ci),
140 		    DDI_DMA_SYNC_FORCPU);
141 		iq_ci = *qg->iq_ci[path];
142 
143 		if (elem_needed > free_elem_count(iq_pi, iq_ci,
144 		    s->s_num_elements_per_iq)) {
145 			list_insert_head(&qg->request_list[path], io);
146 			break;
147 		}
148 
149 		if (pqi_cmd_action(io->io_cmd, PQI_CMD_START) == PQI_CMD_FAIL)
150 			continue;
151 
152 		io->io_pi = iq_pi;
153 		rqst->iu_id = qg->oq_id;
154 		next_elem = qg->iq_element_array[path] +
155 		    (iq_pi * PQI_OPERATIONAL_IQ_ELEMENT_LENGTH);
156 		elem_to_end = s->s_num_elements_per_iq - iq_pi;
157 		if (elem_needed <= elem_to_end) {
158 			(void) memcpy(next_elem, rqst, iu_len);
159 			(void) ddi_dma_sync(s->s_queue_dma->handle,
160 			    (uintptr_t)next_elem -
161 			    (uintptr_t)s->s_queue_dma->alloc_memory, iu_len,
162 			    DDI_DMA_SYNC_FORDEV);
163 		} else {
164 			copy_to_end = elem_to_end *
165 			    PQI_OPERATIONAL_IQ_ELEMENT_LENGTH;
166 			(void) memcpy(next_elem, rqst, copy_to_end);
167 			(void) ddi_dma_sync(s->s_queue_dma->handle,
168 			    (uintptr_t)next_elem -
169 			    (uintptr_t)s->s_queue_dma->alloc_memory,
170 			    copy_to_end, DDI_DMA_SYNC_FORDEV);
171 			(void) memcpy(qg->iq_element_array[path],
172 			    (caddr_t)rqst + copy_to_end,
173 			    iu_len - copy_to_end);
174 			(void) ddi_dma_sync(s->s_queue_dma->handle,
175 			    0, iu_len - copy_to_end, DDI_DMA_SYNC_FORDEV);
176 		}
177 		sending += elem_needed;
178 
179 		iq_pi = (iq_pi + elem_needed) % s->s_num_elements_per_iq;
180 	}
181 
182 	qg->submit_count += sending;
183 	if (iq_pi != qg->iq_pi_copy[path]) {
184 		qg->iq_pi_copy[path] = iq_pi;
185 		ddi_put32(s->s_datap, qg->iq_pi[path], iq_pi);
186 	} else {
187 		ASSERT0(sending);
188 	}
189 	mutex_exit(&qg->submit_lock[path]);
190 }
191 
192 int
193 pqi_transport_command(pqi_state_t *s, pqi_cmd_t *cmd)
194 {
195 	pqi_device_t		*devp = cmd->pc_device;
196 	int			path;
197 	pqi_io_request_t	*io;
198 
199 	if (is_aio_enabled(devp) == B_TRUE) {
200 		path = AIO_PATH;
201 		io = setup_aio_request(s, cmd);
202 	} else {
203 		path = RAID_PATH;
204 		io = setup_raid_request(s, cmd);
205 	}
206 
207 	if (io == NULL)
208 		return (TRAN_BUSY);
209 
210 	cmd->pc_io_rqst = io;
211 	(void) pqi_cmd_action(cmd, PQI_CMD_QUEUE);
212 
213 	pqi_start_io(s, &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP],
214 	    path, io);
215 
216 	return (TRAN_ACCEPT);
217 }
218 
219 void
220 pqi_do_rescan(void *v)
221 {
222 	pqi_state_t	*s	= v;
223 
224 	ndi_devi_enter(scsi_vhci_dip);
225 	ndi_devi_enter(s->s_dip);
226 	pqi_rescan_devices(s);
227 	(void) pqi_config_all(s->s_dip, s);
228 	ndi_devi_exit(s->s_dip);
229 	ndi_devi_exit(scsi_vhci_dip);
230 }
231 
232 void
233 pqi_event_worker(void *v)
234 {
235 	pqi_state_t	*s		= v;
236 	int		i;
237 	pqi_event_t	*e;
238 	boolean_t	non_heartbeat	= B_FALSE;
239 
240 	if (pqi_is_offline(s))
241 		return;
242 
243 	e = s->s_events;
244 	for (i = 0; i < PQI_NUM_SUPPORTED_EVENTS; i++) {
245 		if (e->ev_pending == B_TRUE) {
246 			e->ev_pending = B_FALSE;
247 			ack_event(s, e);
248 			if (pqi_map_event(PQI_EVENT_TYPE_HEARTBEAT) != i)
249 				non_heartbeat = B_TRUE;
250 		}
251 		e++;
252 	}
253 
254 	if (non_heartbeat == B_TRUE)
255 		pqi_do_rescan(s);
256 }
257 
258 /*
259  * pqi_fail_cmd -- given a reason and stats the command is failed.
260  */
261 pqi_cmd_action_t
262 pqi_fail_cmd(pqi_cmd_t *cmd, uchar_t reason, uint_t stats)
263 {
264 	struct scsi_pkt		*pkt	= CMD2PKT(cmd);
265 
266 	pkt->pkt_reason = reason;
267 	pkt->pkt_statistics = stats;
268 
269 	return (pqi_cmd_action_nolock(cmd, PQI_CMD_FAIL));
270 }
271 
272 void
273 pqi_fail_drive_cmds(pqi_device_t *d, uchar_t reason)
274 {
275 	pqi_cmd_t	*c, *next_c;
276 
277 	mutex_enter(&d->pd_mutex);
278 
279 	c = list_head(&d->pd_cmd_list);
280 	while (c != NULL) {
281 		next_c = list_next(&d->pd_cmd_list, c);
282 		if (pqi_fail_cmd(c, reason, STAT_BUS_RESET) !=
283 		    PQI_CMD_START) {
284 			/*
285 			 * The command can't be terminated in the driver because
286 			 * it was already handed off to the HBA and the driver
287 			 * will have to wait for completion. The reason is
288 			 * that the HBA indicates slots are complete, not a
289 			 * pointer to a command. If the code were to cancel
290 			 * an outstanding command that slot could be reused
291 			 * by another command and when the completion interrupt
292 			 * arrives the driver would signal that a command had
293 			 * completed when in fact it was a prior command that
294 			 * had been canceled.
295 			 *
296 			 * Should the command fail to complete due to an HBA
297 			 * error the command will be forced through to
298 			 * completion during a timeout scan that occurs on
299 			 * another thread.
300 			 */
301 			d->pd_killed++;
302 		} else {
303 			d->pd_posted++;
304 		}
305 		c = next_c;
306 	}
307 
308 	mutex_exit(&d->pd_mutex);
309 }
310 
311 uint32_t
312 pqi_disable_intr(pqi_state_t *s)
313 {
314 	uint32_t	db;
315 	uint32_t	rval;
316 
317 	rval = db = G32(s, sis_host_to_ctrl_doorbell);
318 	db &= ~(SIS_ENABLE_MSIX | SIS_ENABLE_INTX);
319 	S32(s, sis_host_to_ctrl_doorbell, db);
320 	return (rval);
321 }
322 
323 void
324 pqi_enable_intr(pqi_state_t *s, uint32_t old_state)
325 {
326 	S32(s, sis_host_to_ctrl_doorbell, old_state);
327 }
328 
329 typedef struct reset_closure {
330 	pqi_state_t	*rc_s;
331 	pqi_device_t	*rc_d;
332 } *reset_closure_t;
333 
334 /*
335  * pqi_lun_reset -- set up callback to reset the device
336  *
337  * Dispatch queue is used here because the call tree can come from the interrupt
338  * routine. (pqi_process_io_intr -> aio_io_complete -> SCSA -> tran_reset ->
339  * pqi_lun_reset). If pqi_lun_reset were to actually do the reset work it would
340  * then wait for an interrupt which would never arrive since the current thread
341  * would be the interrupt thread. So, start a task to reset the device and
342  * wait for completion.
343  */
344 void
345 pqi_lun_reset(pqi_state_t *s, pqi_device_t *d)
346 {
347 	reset_closure_t	r = kmem_alloc(sizeof (struct reset_closure), KM_SLEEP);
348 
349 	r->rc_s = s;
350 	r->rc_d = d;
351 	(void) ddi_taskq_dispatch(s->s_events_taskq, lun_reset_worker, r, 0);
352 }
353 
354 /*
355  * []------------------------------------------------------------------[]
356  * | Support/utility functions for main entry points			|
357  * []------------------------------------------------------------------[]
358  */
359 
360 static uint32_t
361 count_drive_cmds(pqi_device_t *d)
362 {
363 	pqi_cmd_t	*c;
364 	uint32_t	count = 0;
365 
366 	mutex_enter(&d->pd_mutex);
367 	c = list_head(&d->pd_cmd_list);
368 	while (c != NULL) {
369 		c = list_next(&d->pd_cmd_list, c);
370 		count++;
371 	}
372 	mutex_exit(&d->pd_mutex);
373 
374 	return (count);
375 }
376 
377 static uint32_t
378 count_oustanding_cmds(pqi_state_t *s)
379 {
380 	uint32_t	count = 0;
381 	pqi_device_t	*d;
382 
383 	mutex_enter(&s->s_mutex);
384 	d = list_head(&s->s_devnodes);
385 	while (d != NULL) {
386 		count += count_drive_cmds(d);
387 		d = list_next(&s->s_devnodes, d);
388 	}
389 	mutex_exit(&s->s_mutex);
390 
391 	return (count);
392 }
393 
394 static void
395 lun_reset_worker(void *v)
396 {
397 	reset_closure_t			r = v;
398 	pqi_state_t			*s;
399 	pqi_device_t			*d;
400 	pqi_io_request_t		*io;
401 	ksema_t				sema;
402 	pqi_task_management_rqst_t	*rqst;
403 	struct pqi_cmd			cmd;
404 
405 	s = r->rc_s;
406 	d = r->rc_d;
407 
408 	pqi_fail_drive_cmds(d, CMD_RESET);
409 	sema_init(&sema, 0, NULL, SEMA_DRIVER, NULL);
410 
411 	bzero(&cmd, sizeof (cmd));
412 	mutex_init(&cmd.pc_mutex, NULL, MUTEX_DRIVER, NULL);
413 
414 	if ((io = pqi_alloc_io(s)) == NULL) {
415 		mutex_destroy(&cmd.pc_mutex);
416 		kmem_free(r, sizeof (*r));
417 		return;
418 	}
419 	io->io_cb = lun_reset_complete;
420 	io->io_context = &sema;
421 	io->io_cmd = &cmd;
422 	cmd.pc_io_rqst = io;
423 	cmd.pc_softc = s;
424 	cmd.pc_device = &s->s_special_device;
425 
426 	(void) pqi_cmd_action(&cmd, PQI_CMD_QUEUE);
427 
428 	rqst = io->io_iu;
429 	(void) memset(rqst, 0, sizeof (*rqst));
430 
431 	rqst->header.iu_type = PQI_REQUEST_IU_TASK_MANAGEMENT;
432 	rqst->header.iu_length = sizeof (*rqst) - PQI_REQUEST_HEADER_LENGTH;
433 	rqst->request_id = PQI_MAKE_REQID(io->io_index, io->io_gen);
434 	(void) memcpy(rqst->lun_number, d->pd_scsi3addr,
435 	    sizeof (rqst->lun_number));
436 	rqst->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
437 
438 	pqi_start_io(s, &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH,
439 	    io);
440 
441 	sema_p(&sema);
442 
443 	(void) pqi_cmd_action(&cmd, PQI_CMD_CMPLT);
444 	mutex_destroy(&cmd.pc_mutex);
445 	kmem_free(r, sizeof (*r));
446 }
447 
448 static void
449 lun_reset_complete(pqi_io_request_t *io __unused, void *ctx)
450 {
451 	sema_v((ksema_t *)ctx);
452 }
453 
454 static void
455 send_event_ack(pqi_state_t *s, pqi_event_acknowledge_request_t *rqst)
456 {
457 	pqi_queue_group_t	*qg;
458 	caddr_t			next_element;
459 	pqi_index_t		iq_ci;
460 	pqi_index_t		iq_pi;
461 	int			ms_timeo = 1000 * 10;
462 
463 	qg = &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP];
464 	rqst->header.iu_id = qg->oq_id;
465 
466 	for (;;) {
467 		mutex_enter(&qg->submit_lock[RAID_PATH]);
468 		iq_pi = qg->iq_pi_copy[RAID_PATH];
469 		iq_ci = ddi_get32(s->s_queue_dma->acc, qg->iq_ci[RAID_PATH]);
470 
471 		if (free_elem_count(iq_pi, iq_ci, s->s_num_elements_per_iq))
472 			break;
473 
474 		mutex_exit(&qg->submit_lock[RAID_PATH]);
475 		if (pqi_is_offline(s))
476 			return;
477 	}
478 	next_element = qg->iq_element_array[RAID_PATH] +
479 	    (iq_pi * PQI_OPERATIONAL_IQ_ELEMENT_LENGTH);
480 
481 	(void) memcpy(next_element, rqst, sizeof (*rqst));
482 	(void) ddi_dma_sync(s->s_queue_dma->handle, 0, 0, DDI_DMA_SYNC_FORDEV);
483 
484 	iq_pi = (iq_pi + 1) % s->s_num_elements_per_iq;
485 	qg->iq_pi_copy[RAID_PATH] = iq_pi;
486 
487 	ddi_put32(s->s_datap, qg->iq_pi[RAID_PATH], iq_pi);
488 
489 	/*
490 	 * Special case processing for events required. The driver must
491 	 * wait until the acknowledgement is processed before proceeding.
492 	 * Unfortunately, the HBA doesn't provide an interrupt which means
493 	 * the code must busy wait.
494 	 * Code will wait up to 10 seconds.
495 	 */
496 	while (ms_timeo--) {
497 		drv_usecwait(1000);
498 		iq_ci = ddi_get32(s->s_queue_dma->acc, qg->iq_ci[RAID_PATH]);
499 		if (iq_pi == iq_ci)
500 			break;
501 	}
502 
503 	mutex_exit(&qg->submit_lock[RAID_PATH]);
504 }
505 
506 static void
507 ack_event(pqi_state_t *s, pqi_event_t *e)
508 {
509 	pqi_event_acknowledge_request_t	rqst;
510 
511 	(void) memset(&rqst, 0, sizeof (rqst));
512 	rqst.header.iu_type = PQI_REQUEST_IU_ACKNOWLEDGE_VENDOR_EVENT;
513 	rqst.header.iu_length = sizeof (rqst) - PQI_REQUEST_HEADER_LENGTH;
514 	rqst.event_type = e->ev_type;
515 	rqst.event_id = e->ev_id;
516 	rqst.additional_event_id = e->ev_additional;
517 
518 	send_event_ack(s, &rqst);
519 }
520 
521 static pqi_io_request_t *
522 setup_aio_request(pqi_state_t *s, pqi_cmd_t *cmd)
523 {
524 	pqi_io_request_t	*io;
525 	pqi_aio_path_request_t	*rqst;
526 	pqi_device_t		*devp = cmd->pc_device;
527 
528 	/* ---- Most likely received a signal during a cv_wait ---- */
529 	if ((io = pqi_alloc_io(s)) == NULL)
530 		return (NULL);
531 
532 	io->io_cb = aio_io_complete;
533 	io->io_cmd = cmd;
534 	io->io_raid_bypass = 0;
535 
536 	rqst = io->io_iu;
537 	(void) memset(rqst, 0, sizeof (*rqst));
538 
539 	rqst->header.iu_type = PQI_REQUEST_IU_AIO_PATH_IO;
540 	rqst->nexus_id = devp->pd_aio_handle;
541 	rqst->buffer_length = cmd->pc_dma_count;
542 	rqst->task_attribute = SOP_TASK_ATTRIBUTE_SIMPLE;
543 	rqst->request_id = PQI_MAKE_REQID(io->io_index, io->io_gen);
544 	rqst->error_index = io->io_index;
545 	rqst->cdb_length = cmd->pc_cmdlen;
546 	(void) memcpy(rqst->cdb, cmd->pc_cdb, cmd->pc_cmdlen);
547 	(void) memcpy(rqst->lun_number, devp->pd_scsi3addr,
548 	    sizeof (rqst->lun_number));
549 
550 	if (cmd->pc_flags & PQI_FLAG_DMA_VALID) {
551 		if (cmd->pc_flags & PQI_FLAG_IO_READ)
552 			rqst->data_direction = SOP_READ_FLAG;
553 		else
554 			rqst->data_direction = SOP_WRITE_FLAG;
555 	} else {
556 		rqst->data_direction = SOP_NO_DIRECTION_FLAG;
557 	}
558 
559 	build_aio_sg_list(s, rqst, cmd, io);
560 	return (io);
561 }
562 
563 static pqi_io_request_t *
564 setup_raid_request(pqi_state_t *s, pqi_cmd_t *cmd)
565 {
566 	pqi_io_request_t	*io;
567 	pqi_raid_path_request_t	*rqst;
568 	pqi_device_t		*devp = cmd->pc_device;
569 
570 	/* ---- Most likely received a signal during a cv_wait ---- */
571 	if ((io = pqi_alloc_io(s)) == NULL)
572 		return (NULL);
573 
574 	io->io_cb = raid_io_complete;
575 	io->io_cmd = cmd;
576 	io->io_raid_bypass = 0;
577 
578 	rqst = io->io_iu;
579 	(void) memset(rqst, 0, sizeof (*rqst));
580 	rqst->header.iu_type = PQI_REQUEST_IU_RAID_PATH_IO;
581 	rqst->rp_data_len = cmd->pc_dma_count;
582 	rqst->rp_task_attr = SOP_TASK_ATTRIBUTE_SIMPLE;
583 	rqst->rp_id = PQI_MAKE_REQID(io->io_index, io->io_gen);
584 	rqst->rp_error_index = io->io_index;
585 	(void) memcpy(rqst->rp_lun, devp->pd_scsi3addr, sizeof (rqst->rp_lun));
586 	(void) memcpy(rqst->rp_cdb, cmd->pc_cdb, cmd->pc_cmdlen);
587 
588 	ASSERT(cmd->pc_cmdlen <= 16);
589 	rqst->rp_additional_cdb = SOP_ADDITIONAL_CDB_BYTES_0;
590 
591 	if (cmd->pc_flags & PQI_FLAG_DMA_VALID) {
592 		if (cmd->pc_flags & PQI_FLAG_IO_READ)
593 			rqst->rp_data_dir = SOP_READ_FLAG;
594 		else
595 			rqst->rp_data_dir = SOP_WRITE_FLAG;
596 	} else {
597 		rqst->rp_data_dir = SOP_NO_DIRECTION_FLAG;
598 	}
599 
600 	build_raid_sg_list(s, rqst, cmd, io);
601 	return (io);
602 }
603 
604 pqi_cmd_t *
605 pqi_process_comp_ring(pqi_state_t *s __unused)
606 {
607 	return (NULL);
608 }
609 
610 static void
611 raid_io_complete(pqi_io_request_t *io, void *context)
612 {
613 	/*
614 	 * ---- XXX Not sure if this complete function will be the same
615 	 * or different in the end. If it's the same this will be removed
616 	 * and aio_io_complete will have it's named changed to something
617 	 * more generic.
618 	 */
619 	aio_io_complete(io, context);
620 }
621 
622 /*
623  * special_error_check -- See if sense buffer matches "offline" status.
624  *
625  * spc3r23 section 4.5.6 -- Sense key and sense code definitions.
626  * Sense key == 5 (KEY_ILLEGAL_REQUEST) indicates one of several conditions
627  * a) Command addressed to incorrect logical unit.
628  * b) Command had an invalid task attribute.
629  * ...
630  * Table 28 also shows that ASC 0x26 and ASCQ of 0x00 is an INVALID FIELD
631  * IN PARAMETER LIST.
632  * At no other time does this combination of KEY/ASC/ASCQ occur except when
633  * a device or cable is pulled from the system along with a Hotplug event.
634  * Without documentation it's only a guess, but it's the best that's available.
635  * So, if the conditions are true the command packet pkt_reason will be changed
636  * to CMD_DEV_GONE which causes MPxIO to switch to the other path and the
637  * Hotplug event will cause a scan to occur which removes other inactive
638  * devices in case of a cable pull.
639  */
640 boolean_t
641 special_error_check(pqi_cmd_t *cmd)
642 {
643 	struct scsi_arq_status *arq;
644 
645 	/* LINTED E_BAD_PTR_CAST_ALIGN */
646 	arq = (struct scsi_arq_status *)cmd->pc_pkt->pkt_scbp;
647 
648 	if (((*cmd->pc_pkt->pkt_scbp & STATUS_MASK) == STATUS_CHECK) &&
649 	    (arq->sts_sensedata.es_key == KEY_ILLEGAL_REQUEST) &&
650 	    (arq->sts_sensedata.es_add_code == 0x26) &&
651 	    (arq->sts_sensedata.es_qual_code == 0)) {
652 		return (B_TRUE);
653 	} else {
654 		return (B_FALSE);
655 	}
656 }
657 
658 static void
659 aio_io_complete(pqi_io_request_t *io, void *context __unused)
660 {
661 	pqi_cmd_t	*cmd = io->io_cmd;
662 	struct scsi_pkt	*pkt = CMD2PKT(cmd);
663 	boolean_t	pkt_ok = B_FALSE;
664 
665 	if (cmd->pc_flags & (PQI_FLAG_IO_READ | PQI_FLAG_IO_IOPB))
666 		(void) ddi_dma_sync(cmd->pc_dmahdl, 0, 0, DDI_DMA_SYNC_FORCPU);
667 
668 	switch (io->io_status) {
669 	case PQI_DATA_IN_OUT_UNDERFLOW:
670 		pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET |
671 		    STATE_SENT_CMD | STATE_GOT_STATUS;
672 		if (pkt->pkt_resid == cmd->pc_dma_count) {
673 			pkt->pkt_reason = CMD_INCOMPLETE;
674 		} else {
675 			pkt->pkt_state |= STATE_XFERRED_DATA;
676 			pkt->pkt_reason = CMD_CMPLT;
677 		}
678 		break;
679 
680 	case PQI_DATA_IN_OUT_GOOD:
681 		pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET |
682 		    STATE_SENT_CMD | STATE_GOT_STATUS;
683 		if (cmd->pc_flags & PQI_FLAG_DMA_VALID)
684 			pkt->pkt_state |= STATE_XFERRED_DATA;
685 		pkt->pkt_reason = CMD_CMPLT;
686 		pkt->pkt_resid = 0;
687 		pkt->pkt_statistics = 0;
688 		pkt_ok = B_TRUE;
689 		break;
690 
691 	case PQI_DATA_IN_OUT_ERROR:
692 		pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET |
693 		    STATE_SENT_CMD;
694 		if (pkt->pkt_resid != cmd->pc_dma_count) {
695 			pkt->pkt_state |= STATE_XFERRED_DATA;
696 			pkt->pkt_reason = CMD_CMPLT;
697 		} else {
698 			pkt->pkt_reason = CMD_CMPLT;
699 		}
700 		break;
701 
702 	case PQI_DATA_IN_OUT_PROTOCOL_ERROR:
703 		pkt->pkt_reason = CMD_TERMINATED;
704 		pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET;
705 		break;
706 
707 	case PQI_DATA_IN_OUT_HARDWARE_ERROR:
708 		pkt->pkt_reason = CMD_CMPLT;
709 		pkt->pkt_state |= STATE_GOT_BUS;
710 		break;
711 
712 	default:
713 		pkt->pkt_reason = CMD_INCOMPLETE;
714 		break;
715 	}
716 
717 	if (pkt_ok == B_FALSE)
718 		atomic_inc_32(&cmd->pc_device->pd_sense_errors);
719 
720 	if (special_error_check(cmd) == B_TRUE) {
721 		pkt->pkt_reason = CMD_DEV_GONE;
722 		pkt->pkt_statistics = STAT_TERMINATED;
723 	}
724 	(void) pqi_cmd_action(cmd, PQI_CMD_CMPLT);
725 }
726 
727 static void
728 fail_outstanding_cmds(pqi_state_t *s)
729 {
730 	pqi_device_t		*devp;
731 
732 	ASSERT(MUTEX_HELD(&s->s_mutex));
733 
734 	pqi_fail_drive_cmds(&s->s_special_device, CMD_TRAN_ERR);
735 	for (devp = list_head(&s->s_devnodes); devp != NULL;
736 	    devp = list_next(&s->s_devnodes, devp)) {
737 		pqi_fail_drive_cmds(devp, CMD_TRAN_ERR);
738 	}
739 }
740 
741 static void
742 set_sg_descriptor(pqi_sg_entry_t *sg, ddi_dma_cookie_t *cookie)
743 {
744 	sg->sg_addr = cookie->dmac_laddress;
745 	sg->sg_len = cookie->dmac_size;
746 	sg->sg_flags = 0;
747 }
748 
749 static void
750 build_aio_sg_list(pqi_state_t *s, pqi_aio_path_request_t *rqst,
751     pqi_cmd_t *cmd, pqi_io_request_t *io)
752 {
753 	int			i;
754 	int			max_sg_per_iu;
755 	uint16_t		iu_length;
756 	uint8_t			chained;
757 	uint8_t			num_sg_in_iu	= 0;
758 	ddi_dma_cookie_t	*cookies;
759 	pqi_sg_entry_t		*sg;
760 
761 	iu_length = offsetof(struct pqi_aio_path_request, ap_sglist) -
762 	    PQI_REQUEST_HEADER_LENGTH;
763 
764 	if (cmd->pc_dmaccount == 0)
765 		goto out;
766 	sg = rqst->ap_sglist;
767 	cookies = cmd->pc_cached_cookies;
768 	max_sg_per_iu = s->s_max_sg_per_iu - 1;
769 	i = 0;
770 	chained = 0;
771 
772 	for (;;) {
773 		set_sg_descriptor(sg, cookies);
774 		if (!chained)
775 			num_sg_in_iu++;
776 		i++;
777 		if (i == cmd->pc_dmaccount)
778 			break;
779 		sg++;
780 		cookies++;
781 		if (i == max_sg_per_iu) {
782 			sg->sg_addr = io->io_sg_chain_dma->dma_addr;
783 			sg->sg_len = (cmd->pc_dmaccount - num_sg_in_iu) *
784 			    sizeof (*sg);
785 			sg->sg_flags = CISS_SG_CHAIN;
786 			chained = 1;
787 			num_sg_in_iu++;
788 			sg = (pqi_sg_entry_t *)
789 			    io->io_sg_chain_dma->alloc_memory;
790 		}
791 	}
792 	sg->sg_flags = CISS_SG_LAST;
793 	rqst->partial = chained;
794 	if (chained) {
795 		(void) ddi_dma_sync(io->io_sg_chain_dma->handle, 0, 0,
796 		    DDI_DMA_SYNC_FORDEV);
797 	}
798 	iu_length += num_sg_in_iu * sizeof (*sg);
799 
800 out:
801 	rqst->header.iu_length = iu_length;
802 	rqst->num_sg_descriptors = num_sg_in_iu;
803 }
804 
805 static void
806 build_raid_sg_list(pqi_state_t *s, pqi_raid_path_request_t *rqst,
807     pqi_cmd_t *cmd, pqi_io_request_t *io)
808 {
809 	int			i		= 0;
810 	int			max_sg_per_iu;
811 	int			num_sg_in_iu	= 0;
812 	uint16_t		iu_length;
813 	uint8_t			chained		= 0;
814 	ddi_dma_cookie_t	*cookies;
815 	pqi_sg_entry_t		*sg;
816 
817 	iu_length = offsetof(struct pqi_raid_path_request, rp_sglist) -
818 	    PQI_REQUEST_HEADER_LENGTH;
819 
820 	if (cmd->pc_dmaccount == 0)
821 		goto out;
822 
823 	sg = rqst->rp_sglist;
824 	cookies = cmd->pc_cached_cookies;
825 	max_sg_per_iu = s->s_max_sg_per_iu - 1;
826 
827 	for (;;) {
828 		set_sg_descriptor(sg, cookies);
829 		if (!chained)
830 			num_sg_in_iu++;
831 		i++;
832 		if (i == cmd->pc_dmaccount)
833 			break;
834 		sg++;
835 		cookies++;
836 		if (i == max_sg_per_iu) {
837 			ASSERT(io->io_sg_chain_dma != NULL);
838 			sg->sg_addr = io->io_sg_chain_dma->dma_addr;
839 			sg->sg_len = (cmd->pc_dmaccount - num_sg_in_iu) *
840 			    sizeof (*sg);
841 			sg->sg_flags = CISS_SG_CHAIN;
842 			chained = 1;
843 			num_sg_in_iu++;
844 			sg = (pqi_sg_entry_t *)
845 			    io->io_sg_chain_dma->alloc_memory;
846 		}
847 	}
848 	sg->sg_flags = CISS_SG_LAST;
849 	rqst->rp_partial = chained;
850 	if (chained) {
851 		(void) ddi_dma_sync(io->io_sg_chain_dma->handle, 0, 0,
852 		    DDI_DMA_SYNC_FORDEV);
853 	}
854 	iu_length += num_sg_in_iu * sizeof (*sg);
855 
856 out:
857 	rqst->header.iu_length = iu_length;
858 }
859 
860 static uint32_t
861 read_heartbeat_counter(pqi_state_t *s)
862 {
863 	return (ddi_get32(s->s_datap, s->s_heartbeat_counter));
864 }
865 
866 static void
867 take_ctlr_offline(pqi_state_t *s)
868 {
869 	int			num_passes = 5;
870 	int			i;
871 	pqi_device_t		*d;
872 	pqi_cmd_t		*c, *nc;
873 	pqi_io_request_t	*io;
874 	uint32_t		active_count;
875 
876 	/*
877 	 * 1) Why always panic here?
878 	 * Firmware resets don't work on the Microsemi HBA when the firmware
879 	 * is hung. The code as written fails outstanding commands and tries
880 	 * to reset the HBA. Since the reset don't work the HBA is left in an
881 	 * offline state and further commands sent (retries and new commands)
882 	 * are also failed. Eventually ZFS will panic with a deadman timer,
883 	 * but before that COMSTAR will see I/O requests error out and send
884 	 * I/O errors back to the client which causes corruption since these
885 	 * errors are no different than a device that starts to fail. So,
886 	 * instead of trying to play nice the driver now panics which will
887 	 * allow HA to fail fast to the other node.
888 	 *
889 	 * 2) Why not just remove this routine can call panic from the heartbeat
890 	 * routine?
891 	 * I'm hoping this is a temporary work around. We have been asking
892 	 * for more documentation on the product and we've been told there isn't
893 	 * any available.  It has been implied that some HBA's do support
894 	 * firmware resets. Therefore documentation would enable the driver
895 	 * to determine model number and adjust parameters such as panic on
896 	 * firmware hang or try a reset.
897 	 */
898 	if (1)
899 		panic("Firmware hung");
900 
901 	d = &s->s_special_device;
902 	mutex_enter(&d->pd_mutex);
903 	while ((c = list_remove_head(&d->pd_cmd_list)) != NULL) {
904 		io = c->pc_io_rqst;
905 		io->io_status = PQI_DATA_IN_OUT_ERROR;
906 
907 		mutex_exit(&d->pd_mutex);
908 		(io->io_cb)(io, io->io_context);
909 		mutex_enter(&d->pd_mutex);
910 	}
911 	mutex_exit(&d->pd_mutex);
912 
913 	/*
914 	 * If pqi_reset_ctl() completes successfully the queues will be marked
915 	 * B_TRUE and the controller will be marked online again.
916 	 */
917 	mutex_enter(&s->s_mutex);
918 	for (i = 0; i < s->s_num_queue_groups; i++)
919 		s->s_queue_groups[i].qg_active = B_FALSE;
920 	s->s_offline = B_TRUE;
921 	fail_outstanding_cmds(s);
922 	mutex_exit(&s->s_mutex);
923 
924 	/*
925 	 * Commands have been canceled that can be. It's possible there are
926 	 * commands currently running that are about to complete. Give them
927 	 * up to 5 seconds to finish. If those haven't completed by then they
928 	 * are most likely hung in the firmware of the HBA so go ahead and
929 	 * reset the firmware.
930 	 */
931 	while (num_passes-- > 0) {
932 		active_count = count_oustanding_cmds(s);
933 		if (active_count == 0)
934 			break;
935 		drv_usecwait(MICROSEC);
936 	}
937 
938 	/*
939 	 * Any commands remaining are hung in the controller firmware so
940 	 * go ahead time them out so that the upper layers know what's
941 	 * happening.
942 	 */
943 	mutex_enter(&s->s_mutex);
944 	for (d = list_head(&s->s_devnodes); d != NULL;
945 	    d = list_next(&s->s_devnodes, d)) {
946 		mutex_enter(&d->pd_mutex);
947 		while ((c = list_head(&d->pd_cmd_list)) != NULL) {
948 			struct scsi_pkt *pkt = CMD2PKT(c);
949 
950 			nc = list_next(&d->pd_cmd_list, c);
951 			ASSERT(pkt);
952 			if (pkt != NULL) {
953 				pkt->pkt_reason = CMD_TIMEOUT;
954 				pkt->pkt_statistics = STAT_TIMEOUT;
955 			}
956 			(void) pqi_cmd_action_nolock(c, PQI_CMD_TIMEOUT);
957 			c = nc;
958 		}
959 		mutex_exit(&d->pd_mutex);
960 	}
961 	mutex_exit(&s->s_mutex);
962 
963 	cmn_err(CE_WARN, "Firmware Status: 0x%x", G32(s, sis_firmware_status));
964 
965 	if (pqi_reset_ctl(s) == B_FALSE) {
966 		cmn_err(CE_WARN, "Failed to reset controller");
967 		return;
968 	}
969 
970 	/*
971 	 * This will have the effect of releasing the device's dip
972 	 * structure from the NDI layer do to s_offline == B_TRUE.
973 	 */
974 	ndi_devi_enter(scsi_vhci_dip);
975 	ndi_devi_enter(s->s_dip);
976 	(void) pqi_config_all(s->s_dip, s);
977 	ndi_devi_exit(s->s_dip);
978 	ndi_devi_exit(scsi_vhci_dip);
979 }
980 
981 static uint32_t
982 free_elem_count(pqi_index_t pi, pqi_index_t ci, uint32_t per_iq)
983 {
984 	pqi_index_t	used;
985 	if (pi >= ci) {
986 		used = pi - ci;
987 	} else {
988 		used = per_iq - ci + pi;
989 	}
990 	return (per_iq - used - 1);
991 }
992 
993 static boolean_t
994 is_aio_enabled(pqi_device_t *d)
995 {
996 	return (d->pd_aio_enabled ? B_TRUE : B_FALSE);
997 }
998