1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2023 Tintri by DDN, Inc. All rights reserved. 14 */ 15 16 /* 17 * This file contains code necessary to send SCSI commands to HBA. 18 */ 19 #include <smartpqi.h> 20 21 /* 22 * []------------------------------------------------------------------[] 23 * | Forward declarations for support/utility functions | 24 * []------------------------------------------------------------------[] 25 */ 26 static void aio_io_complete(pqi_io_request_t *io, void *context); 27 static void raid_io_complete(pqi_io_request_t *io, void *context); 28 static void build_aio_sg_list(pqi_state_t *s, 29 pqi_aio_path_request_t *rqst, pqi_cmd_t *cmd, pqi_io_request_t *); 30 static void build_raid_sg_list(pqi_state_t *s, 31 pqi_raid_path_request_t *rqst, pqi_cmd_t *cmd, pqi_io_request_t *); 32 static pqi_io_request_t *setup_aio_request(pqi_state_t *s, pqi_cmd_t *cmd); 33 static pqi_io_request_t *setup_raid_request(pqi_state_t *s, pqi_cmd_t *cmd); 34 static uint32_t read_heartbeat_counter(pqi_state_t *s); 35 static void take_ctlr_offline(pqi_state_t *s); 36 static uint32_t free_elem_count(pqi_index_t pi, pqi_index_t ci, 37 uint32_t per_iq); 38 static void ack_event(pqi_state_t *s, pqi_event_t *e); 39 static boolean_t is_aio_enabled(pqi_device_t *d); 40 static void lun_reset_worker(void *v); 41 static void lun_reset_complete(pqi_io_request_t *io, void *ctx); 42 43 #define DIV_UP(n, d) ((n + (d - 1)) / d) 44 45 /* 46 * []------------------------------------------------------------------[] 47 * | Main entry points in file. | 48 * []------------------------------------------------------------------[] 49 */ 50 51 int pqi_do_reset_lun = -1; 52 int pqi_do_reset_ctlr = -1; 53 /* 54 * pqi_watchdog -- interrupt count and/or heartbeat must increase over time. 55 */ 56 void 57 pqi_watchdog(void *v) 58 { 59 pqi_state_t *s = v; 60 uint32_t hb; 61 62 if (pqi_is_offline(s)) 63 return; 64 65 hb = read_heartbeat_counter(s); 66 if ((s->s_last_intr_count == s->s_intr_count) && 67 (s->s_last_heartbeat_count == hb)) { 68 dev_err(s->s_dip, CE_NOTE, "No heartbeat"); 69 pqi_show_dev_state(s); 70 take_ctlr_offline(s); 71 } else { 72 if (pqi_do_reset_ctlr == s->s_instance) { 73 pqi_do_reset_ctlr = -1; 74 take_ctlr_offline(s); 75 } else { 76 s->s_last_intr_count = s->s_intr_count; 77 s->s_last_heartbeat_count = hb; 78 s->s_watchdog = timeout(pqi_watchdog, s, 79 drv_usectohz(WATCHDOG)); 80 } 81 } 82 } 83 84 /* 85 * pqi_start_io -- queues command to HBA. 86 * 87 * This method can be called either from the upper layer with a non-zero 88 * io argument or called during an interrupt to load the outgoing queue 89 * with more commands. 90 */ 91 void 92 pqi_start_io(pqi_state_t *s, pqi_queue_group_t *qg, pqi_path_t path, 93 pqi_io_request_t *io) 94 { 95 pqi_iu_header_t *rqst; 96 size_t iu_len; 97 size_t copy_to_end; 98 pqi_index_t iq_pi; 99 pqi_index_t iq_ci; 100 uint32_t elem_needed; 101 uint32_t elem_to_end; 102 caddr_t next_elem; 103 int sending = 0; 104 105 mutex_enter(&qg->submit_lock[path]); 106 if (io != NULL) { 107 io->io_queue_group = qg; 108 io->io_queue_path = path; 109 list_insert_tail(&qg->request_list[path], io); 110 } 111 112 113 iq_pi = qg->iq_pi_copy[path]; 114 while ((io = list_remove_head(&qg->request_list[path])) != NULL) { 115 116 /* ---- Primary cause for !active is controller failure ---- */ 117 if (qg->qg_active == B_FALSE) { 118 pqi_cmd_t *c = io->io_cmd; 119 120 mutex_enter(&c->pc_device->pd_mutex); 121 /* 122 * When a command is failed it will be removed from 123 * the queue group if pc_io_rqst is not NULL. Since 124 * we have already removed the command from the list 125 * would shouldn't attempt to do so a second time. 126 */ 127 c->pc_io_rqst = NULL; 128 (void) pqi_fail_cmd(io->io_cmd, CMD_DEV_GONE, 129 STAT_TERMINATED); 130 mutex_exit(&c->pc_device->pd_mutex); 131 continue; 132 } 133 134 rqst = io->io_iu; 135 iu_len = rqst->iu_length + PQI_REQUEST_HEADER_LENGTH; 136 elem_needed = DIV_UP(iu_len, PQI_OPERATIONAL_IQ_ELEMENT_LENGTH); 137 (void) ddi_dma_sync(s->s_queue_dma->handle, 138 (uintptr_t)qg->iq_ci[path] - 139 (uintptr_t)s->s_queue_dma->alloc_memory, sizeof (iq_ci), 140 DDI_DMA_SYNC_FORCPU); 141 iq_ci = *qg->iq_ci[path]; 142 143 if (elem_needed > free_elem_count(iq_pi, iq_ci, 144 s->s_num_elements_per_iq)) { 145 list_insert_head(&qg->request_list[path], io); 146 break; 147 } 148 149 if (pqi_cmd_action(io->io_cmd, PQI_CMD_START) == PQI_CMD_FAIL) 150 continue; 151 152 io->io_pi = iq_pi; 153 rqst->iu_id = qg->oq_id; 154 next_elem = qg->iq_element_array[path] + 155 (iq_pi * PQI_OPERATIONAL_IQ_ELEMENT_LENGTH); 156 elem_to_end = s->s_num_elements_per_iq - iq_pi; 157 if (elem_needed <= elem_to_end) { 158 (void) memcpy(next_elem, rqst, iu_len); 159 (void) ddi_dma_sync(s->s_queue_dma->handle, 160 (uintptr_t)next_elem - 161 (uintptr_t)s->s_queue_dma->alloc_memory, iu_len, 162 DDI_DMA_SYNC_FORDEV); 163 } else { 164 copy_to_end = elem_to_end * 165 PQI_OPERATIONAL_IQ_ELEMENT_LENGTH; 166 (void) memcpy(next_elem, rqst, copy_to_end); 167 (void) ddi_dma_sync(s->s_queue_dma->handle, 168 (uintptr_t)next_elem - 169 (uintptr_t)s->s_queue_dma->alloc_memory, 170 copy_to_end, DDI_DMA_SYNC_FORDEV); 171 (void) memcpy(qg->iq_element_array[path], 172 (caddr_t)rqst + copy_to_end, 173 iu_len - copy_to_end); 174 (void) ddi_dma_sync(s->s_queue_dma->handle, 175 0, iu_len - copy_to_end, DDI_DMA_SYNC_FORDEV); 176 } 177 sending += elem_needed; 178 179 iq_pi = (iq_pi + elem_needed) % s->s_num_elements_per_iq; 180 } 181 182 qg->submit_count += sending; 183 if (iq_pi != qg->iq_pi_copy[path]) { 184 qg->iq_pi_copy[path] = iq_pi; 185 ddi_put32(s->s_datap, qg->iq_pi[path], iq_pi); 186 } else { 187 ASSERT0(sending); 188 } 189 mutex_exit(&qg->submit_lock[path]); 190 } 191 192 int 193 pqi_transport_command(pqi_state_t *s, pqi_cmd_t *cmd) 194 { 195 pqi_device_t *devp = cmd->pc_device; 196 int path; 197 pqi_io_request_t *io; 198 199 if (is_aio_enabled(devp) == B_TRUE) { 200 path = AIO_PATH; 201 io = setup_aio_request(s, cmd); 202 } else { 203 path = RAID_PATH; 204 io = setup_raid_request(s, cmd); 205 } 206 207 if (io == NULL) 208 return (TRAN_BUSY); 209 210 cmd->pc_io_rqst = io; 211 (void) pqi_cmd_action(cmd, PQI_CMD_QUEUE); 212 213 pqi_start_io(s, &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP], 214 path, io); 215 216 return (TRAN_ACCEPT); 217 } 218 219 void 220 pqi_do_rescan(void *v) 221 { 222 pqi_state_t *s = v; 223 224 ndi_devi_enter(scsi_vhci_dip); 225 ndi_devi_enter(s->s_dip); 226 pqi_rescan_devices(s); 227 (void) pqi_config_all(s->s_dip, s); 228 ndi_devi_exit(s->s_dip); 229 ndi_devi_exit(scsi_vhci_dip); 230 } 231 232 void 233 pqi_event_worker(void *v) 234 { 235 pqi_state_t *s = v; 236 int i; 237 pqi_event_t *e; 238 boolean_t non_heartbeat = B_FALSE; 239 240 if (pqi_is_offline(s)) 241 return; 242 243 e = s->s_events; 244 for (i = 0; i < PQI_NUM_SUPPORTED_EVENTS; i++) { 245 if (e->ev_pending == B_TRUE) { 246 e->ev_pending = B_FALSE; 247 ack_event(s, e); 248 if (pqi_map_event(PQI_EVENT_TYPE_HEARTBEAT) != i) 249 non_heartbeat = B_TRUE; 250 } 251 e++; 252 } 253 254 if (non_heartbeat == B_TRUE) 255 pqi_do_rescan(s); 256 } 257 258 /* 259 * pqi_fail_cmd -- given a reason and stats the command is failed. 260 */ 261 pqi_cmd_action_t 262 pqi_fail_cmd(pqi_cmd_t *cmd, uchar_t reason, uint_t stats) 263 { 264 struct scsi_pkt *pkt = CMD2PKT(cmd); 265 266 pkt->pkt_reason = reason; 267 pkt->pkt_statistics = stats; 268 269 return (pqi_cmd_action_nolock(cmd, PQI_CMD_FAIL)); 270 } 271 272 void 273 pqi_fail_drive_cmds(pqi_device_t *d, uchar_t reason) 274 { 275 pqi_cmd_t *c, *next_c; 276 277 mutex_enter(&d->pd_mutex); 278 279 c = list_head(&d->pd_cmd_list); 280 while (c != NULL) { 281 next_c = list_next(&d->pd_cmd_list, c); 282 if (pqi_fail_cmd(c, reason, STAT_BUS_RESET) != 283 PQI_CMD_START) { 284 /* 285 * The command can't be terminated in the driver because 286 * it was already handed off to the HBA and the driver 287 * will have to wait for completion. The reason is 288 * that the HBA indicates slots are complete, not a 289 * pointer to a command. If the code were to cancel 290 * an outstanding command that slot could be reused 291 * by another command and when the completion interrupt 292 * arrives the driver would signal that a command had 293 * completed when in fact it was a prior command that 294 * had been canceled. 295 * 296 * Should the command fail to complete due to an HBA 297 * error the command will be forced through to 298 * completion during a timeout scan that occurs on 299 * another thread. 300 */ 301 d->pd_killed++; 302 } else { 303 d->pd_posted++; 304 } 305 c = next_c; 306 } 307 308 mutex_exit(&d->pd_mutex); 309 } 310 311 uint32_t 312 pqi_disable_intr(pqi_state_t *s) 313 { 314 uint32_t db; 315 uint32_t rval; 316 317 rval = db = G32(s, sis_host_to_ctrl_doorbell); 318 db &= ~(SIS_ENABLE_MSIX | SIS_ENABLE_INTX); 319 S32(s, sis_host_to_ctrl_doorbell, db); 320 return (rval); 321 } 322 323 void 324 pqi_enable_intr(pqi_state_t *s, uint32_t old_state) 325 { 326 S32(s, sis_host_to_ctrl_doorbell, old_state); 327 } 328 329 typedef struct reset_closure { 330 pqi_state_t *rc_s; 331 pqi_device_t *rc_d; 332 } *reset_closure_t; 333 334 /* 335 * pqi_lun_reset -- set up callback to reset the device 336 * 337 * Dispatch queue is used here because the call tree can come from the interrupt 338 * routine. (pqi_process_io_intr -> aio_io_complete -> SCSA -> tran_reset -> 339 * pqi_lun_reset). If pqi_lun_reset were to actually do the reset work it would 340 * then wait for an interrupt which would never arrive since the current thread 341 * would be the interrupt thread. So, start a task to reset the device and 342 * wait for completion. 343 */ 344 void 345 pqi_lun_reset(pqi_state_t *s, pqi_device_t *d) 346 { 347 reset_closure_t r = kmem_alloc(sizeof (struct reset_closure), KM_SLEEP); 348 349 r->rc_s = s; 350 r->rc_d = d; 351 (void) ddi_taskq_dispatch(s->s_events_taskq, lun_reset_worker, r, 0); 352 } 353 354 /* 355 * []------------------------------------------------------------------[] 356 * | Support/utility functions for main entry points | 357 * []------------------------------------------------------------------[] 358 */ 359 360 static uint32_t 361 count_drive_cmds(pqi_device_t *d) 362 { 363 pqi_cmd_t *c; 364 uint32_t count = 0; 365 366 mutex_enter(&d->pd_mutex); 367 c = list_head(&d->pd_cmd_list); 368 while (c != NULL) { 369 c = list_next(&d->pd_cmd_list, c); 370 count++; 371 } 372 mutex_exit(&d->pd_mutex); 373 374 return (count); 375 } 376 377 static uint32_t 378 count_oustanding_cmds(pqi_state_t *s) 379 { 380 uint32_t count = 0; 381 pqi_device_t *d; 382 383 mutex_enter(&s->s_mutex); 384 d = list_head(&s->s_devnodes); 385 while (d != NULL) { 386 count += count_drive_cmds(d); 387 d = list_next(&s->s_devnodes, d); 388 } 389 mutex_exit(&s->s_mutex); 390 391 return (count); 392 } 393 394 static void 395 lun_reset_worker(void *v) 396 { 397 reset_closure_t r = v; 398 pqi_state_t *s; 399 pqi_device_t *d; 400 pqi_io_request_t *io; 401 ksema_t sema; 402 pqi_task_management_rqst_t *rqst; 403 struct pqi_cmd cmd; 404 405 s = r->rc_s; 406 d = r->rc_d; 407 408 pqi_fail_drive_cmds(d, CMD_RESET); 409 sema_init(&sema, 0, NULL, SEMA_DRIVER, NULL); 410 411 bzero(&cmd, sizeof (cmd)); 412 mutex_init(&cmd.pc_mutex, NULL, MUTEX_DRIVER, NULL); 413 414 if ((io = pqi_alloc_io(s)) == NULL) { 415 mutex_destroy(&cmd.pc_mutex); 416 kmem_free(r, sizeof (*r)); 417 return; 418 } 419 io->io_cb = lun_reset_complete; 420 io->io_context = &sema; 421 io->io_cmd = &cmd; 422 cmd.pc_io_rqst = io; 423 cmd.pc_softc = s; 424 cmd.pc_device = &s->s_special_device; 425 426 (void) pqi_cmd_action(&cmd, PQI_CMD_QUEUE); 427 428 rqst = io->io_iu; 429 (void) memset(rqst, 0, sizeof (*rqst)); 430 431 rqst->header.iu_type = PQI_REQUEST_IU_TASK_MANAGEMENT; 432 rqst->header.iu_length = sizeof (*rqst) - PQI_REQUEST_HEADER_LENGTH; 433 rqst->request_id = PQI_MAKE_REQID(io->io_index, io->io_gen); 434 (void) memcpy(rqst->lun_number, d->pd_scsi3addr, 435 sizeof (rqst->lun_number)); 436 rqst->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET; 437 438 pqi_start_io(s, &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH, 439 io); 440 441 sema_p(&sema); 442 443 (void) pqi_cmd_action(&cmd, PQI_CMD_CMPLT); 444 mutex_destroy(&cmd.pc_mutex); 445 kmem_free(r, sizeof (*r)); 446 } 447 448 static void 449 lun_reset_complete(pqi_io_request_t *io __unused, void *ctx) 450 { 451 sema_v((ksema_t *)ctx); 452 } 453 454 static void 455 send_event_ack(pqi_state_t *s, pqi_event_acknowledge_request_t *rqst) 456 { 457 pqi_queue_group_t *qg; 458 caddr_t next_element; 459 pqi_index_t iq_ci; 460 pqi_index_t iq_pi; 461 int ms_timeo = 1000 * 10; 462 463 qg = &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP]; 464 rqst->header.iu_id = qg->oq_id; 465 466 for (;;) { 467 mutex_enter(&qg->submit_lock[RAID_PATH]); 468 iq_pi = qg->iq_pi_copy[RAID_PATH]; 469 iq_ci = ddi_get32(s->s_queue_dma->acc, qg->iq_ci[RAID_PATH]); 470 471 if (free_elem_count(iq_pi, iq_ci, s->s_num_elements_per_iq)) 472 break; 473 474 mutex_exit(&qg->submit_lock[RAID_PATH]); 475 if (pqi_is_offline(s)) 476 return; 477 } 478 next_element = qg->iq_element_array[RAID_PATH] + 479 (iq_pi * PQI_OPERATIONAL_IQ_ELEMENT_LENGTH); 480 481 (void) memcpy(next_element, rqst, sizeof (*rqst)); 482 (void) ddi_dma_sync(s->s_queue_dma->handle, 0, 0, DDI_DMA_SYNC_FORDEV); 483 484 iq_pi = (iq_pi + 1) % s->s_num_elements_per_iq; 485 qg->iq_pi_copy[RAID_PATH] = iq_pi; 486 487 ddi_put32(s->s_datap, qg->iq_pi[RAID_PATH], iq_pi); 488 489 /* 490 * Special case processing for events required. The driver must 491 * wait until the acknowledgement is processed before proceeding. 492 * Unfortunately, the HBA doesn't provide an interrupt which means 493 * the code must busy wait. 494 * Code will wait up to 10 seconds. 495 */ 496 while (ms_timeo--) { 497 drv_usecwait(1000); 498 iq_ci = ddi_get32(s->s_queue_dma->acc, qg->iq_ci[RAID_PATH]); 499 if (iq_pi == iq_ci) 500 break; 501 } 502 503 mutex_exit(&qg->submit_lock[RAID_PATH]); 504 } 505 506 static void 507 ack_event(pqi_state_t *s, pqi_event_t *e) 508 { 509 pqi_event_acknowledge_request_t rqst; 510 511 (void) memset(&rqst, 0, sizeof (rqst)); 512 rqst.header.iu_type = PQI_REQUEST_IU_ACKNOWLEDGE_VENDOR_EVENT; 513 rqst.header.iu_length = sizeof (rqst) - PQI_REQUEST_HEADER_LENGTH; 514 rqst.event_type = e->ev_type; 515 rqst.event_id = e->ev_id; 516 rqst.additional_event_id = e->ev_additional; 517 518 send_event_ack(s, &rqst); 519 } 520 521 static pqi_io_request_t * 522 setup_aio_request(pqi_state_t *s, pqi_cmd_t *cmd) 523 { 524 pqi_io_request_t *io; 525 pqi_aio_path_request_t *rqst; 526 pqi_device_t *devp = cmd->pc_device; 527 528 /* ---- Most likely received a signal during a cv_wait ---- */ 529 if ((io = pqi_alloc_io(s)) == NULL) 530 return (NULL); 531 532 io->io_cb = aio_io_complete; 533 io->io_cmd = cmd; 534 io->io_raid_bypass = 0; 535 536 rqst = io->io_iu; 537 (void) memset(rqst, 0, sizeof (*rqst)); 538 539 rqst->header.iu_type = PQI_REQUEST_IU_AIO_PATH_IO; 540 rqst->nexus_id = devp->pd_aio_handle; 541 rqst->buffer_length = cmd->pc_dma_count; 542 rqst->task_attribute = SOP_TASK_ATTRIBUTE_SIMPLE; 543 rqst->request_id = PQI_MAKE_REQID(io->io_index, io->io_gen); 544 rqst->error_index = io->io_index; 545 rqst->cdb_length = cmd->pc_cmdlen; 546 (void) memcpy(rqst->cdb, cmd->pc_cdb, cmd->pc_cmdlen); 547 (void) memcpy(rqst->lun_number, devp->pd_scsi3addr, 548 sizeof (rqst->lun_number)); 549 550 if (cmd->pc_flags & PQI_FLAG_DMA_VALID) { 551 if (cmd->pc_flags & PQI_FLAG_IO_READ) 552 rqst->data_direction = SOP_READ_FLAG; 553 else 554 rqst->data_direction = SOP_WRITE_FLAG; 555 } else { 556 rqst->data_direction = SOP_NO_DIRECTION_FLAG; 557 } 558 559 build_aio_sg_list(s, rqst, cmd, io); 560 return (io); 561 } 562 563 static pqi_io_request_t * 564 setup_raid_request(pqi_state_t *s, pqi_cmd_t *cmd) 565 { 566 pqi_io_request_t *io; 567 pqi_raid_path_request_t *rqst; 568 pqi_device_t *devp = cmd->pc_device; 569 570 /* ---- Most likely received a signal during a cv_wait ---- */ 571 if ((io = pqi_alloc_io(s)) == NULL) 572 return (NULL); 573 574 io->io_cb = raid_io_complete; 575 io->io_cmd = cmd; 576 io->io_raid_bypass = 0; 577 578 rqst = io->io_iu; 579 (void) memset(rqst, 0, sizeof (*rqst)); 580 rqst->header.iu_type = PQI_REQUEST_IU_RAID_PATH_IO; 581 rqst->rp_data_len = cmd->pc_dma_count; 582 rqst->rp_task_attr = SOP_TASK_ATTRIBUTE_SIMPLE; 583 rqst->rp_id = PQI_MAKE_REQID(io->io_index, io->io_gen); 584 rqst->rp_error_index = io->io_index; 585 (void) memcpy(rqst->rp_lun, devp->pd_scsi3addr, sizeof (rqst->rp_lun)); 586 (void) memcpy(rqst->rp_cdb, cmd->pc_cdb, cmd->pc_cmdlen); 587 588 ASSERT(cmd->pc_cmdlen <= 16); 589 rqst->rp_additional_cdb = SOP_ADDITIONAL_CDB_BYTES_0; 590 591 if (cmd->pc_flags & PQI_FLAG_DMA_VALID) { 592 if (cmd->pc_flags & PQI_FLAG_IO_READ) 593 rqst->rp_data_dir = SOP_READ_FLAG; 594 else 595 rqst->rp_data_dir = SOP_WRITE_FLAG; 596 } else { 597 rqst->rp_data_dir = SOP_NO_DIRECTION_FLAG; 598 } 599 600 build_raid_sg_list(s, rqst, cmd, io); 601 return (io); 602 } 603 604 pqi_cmd_t * 605 pqi_process_comp_ring(pqi_state_t *s __unused) 606 { 607 return (NULL); 608 } 609 610 static void 611 raid_io_complete(pqi_io_request_t *io, void *context) 612 { 613 /* 614 * ---- XXX Not sure if this complete function will be the same 615 * or different in the end. If it's the same this will be removed 616 * and aio_io_complete will have it's named changed to something 617 * more generic. 618 */ 619 aio_io_complete(io, context); 620 } 621 622 /* 623 * special_error_check -- See if sense buffer matches "offline" status. 624 * 625 * spc3r23 section 4.5.6 -- Sense key and sense code definitions. 626 * Sense key == 5 (KEY_ILLEGAL_REQUEST) indicates one of several conditions 627 * a) Command addressed to incorrect logical unit. 628 * b) Command had an invalid task attribute. 629 * ... 630 * Table 28 also shows that ASC 0x26 and ASCQ of 0x00 is an INVALID FIELD 631 * IN PARAMETER LIST. 632 * At no other time does this combination of KEY/ASC/ASCQ occur except when 633 * a device or cable is pulled from the system along with a Hotplug event. 634 * Without documentation it's only a guess, but it's the best that's available. 635 * So, if the conditions are true the command packet pkt_reason will be changed 636 * to CMD_DEV_GONE which causes MPxIO to switch to the other path and the 637 * Hotplug event will cause a scan to occur which removes other inactive 638 * devices in case of a cable pull. 639 */ 640 boolean_t 641 special_error_check(pqi_cmd_t *cmd) 642 { 643 struct scsi_arq_status *arq; 644 645 /* LINTED E_BAD_PTR_CAST_ALIGN */ 646 arq = (struct scsi_arq_status *)cmd->pc_pkt->pkt_scbp; 647 648 if (((*cmd->pc_pkt->pkt_scbp & STATUS_MASK) == STATUS_CHECK) && 649 (arq->sts_sensedata.es_key == KEY_ILLEGAL_REQUEST) && 650 (arq->sts_sensedata.es_add_code == 0x26) && 651 (arq->sts_sensedata.es_qual_code == 0)) { 652 return (B_TRUE); 653 } else { 654 return (B_FALSE); 655 } 656 } 657 658 static void 659 aio_io_complete(pqi_io_request_t *io, void *context __unused) 660 { 661 pqi_cmd_t *cmd = io->io_cmd; 662 struct scsi_pkt *pkt = CMD2PKT(cmd); 663 boolean_t pkt_ok = B_FALSE; 664 665 if (cmd->pc_flags & (PQI_FLAG_IO_READ | PQI_FLAG_IO_IOPB)) 666 (void) ddi_dma_sync(cmd->pc_dmahdl, 0, 0, DDI_DMA_SYNC_FORCPU); 667 668 switch (io->io_status) { 669 case PQI_DATA_IN_OUT_UNDERFLOW: 670 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET | 671 STATE_SENT_CMD | STATE_GOT_STATUS; 672 if (pkt->pkt_resid == cmd->pc_dma_count) { 673 pkt->pkt_reason = CMD_INCOMPLETE; 674 } else { 675 pkt->pkt_state |= STATE_XFERRED_DATA; 676 pkt->pkt_reason = CMD_CMPLT; 677 } 678 break; 679 680 case PQI_DATA_IN_OUT_GOOD: 681 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET | 682 STATE_SENT_CMD | STATE_GOT_STATUS; 683 if (cmd->pc_flags & PQI_FLAG_DMA_VALID) 684 pkt->pkt_state |= STATE_XFERRED_DATA; 685 pkt->pkt_reason = CMD_CMPLT; 686 pkt->pkt_resid = 0; 687 pkt->pkt_statistics = 0; 688 pkt_ok = B_TRUE; 689 break; 690 691 case PQI_DATA_IN_OUT_ERROR: 692 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET | 693 STATE_SENT_CMD; 694 if (pkt->pkt_resid != cmd->pc_dma_count) { 695 pkt->pkt_state |= STATE_XFERRED_DATA; 696 pkt->pkt_reason = CMD_CMPLT; 697 } else { 698 pkt->pkt_reason = CMD_CMPLT; 699 } 700 break; 701 702 case PQI_DATA_IN_OUT_PROTOCOL_ERROR: 703 pkt->pkt_reason = CMD_TERMINATED; 704 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET; 705 break; 706 707 case PQI_DATA_IN_OUT_HARDWARE_ERROR: 708 pkt->pkt_reason = CMD_CMPLT; 709 pkt->pkt_state |= STATE_GOT_BUS; 710 break; 711 712 default: 713 pkt->pkt_reason = CMD_INCOMPLETE; 714 break; 715 } 716 717 if (pkt_ok == B_FALSE) 718 atomic_inc_32(&cmd->pc_device->pd_sense_errors); 719 720 if (special_error_check(cmd) == B_TRUE) { 721 pkt->pkt_reason = CMD_DEV_GONE; 722 pkt->pkt_statistics = STAT_TERMINATED; 723 } 724 (void) pqi_cmd_action(cmd, PQI_CMD_CMPLT); 725 } 726 727 static void 728 fail_outstanding_cmds(pqi_state_t *s) 729 { 730 pqi_device_t *devp; 731 732 ASSERT(MUTEX_HELD(&s->s_mutex)); 733 734 pqi_fail_drive_cmds(&s->s_special_device, CMD_TRAN_ERR); 735 for (devp = list_head(&s->s_devnodes); devp != NULL; 736 devp = list_next(&s->s_devnodes, devp)) { 737 pqi_fail_drive_cmds(devp, CMD_TRAN_ERR); 738 } 739 } 740 741 static void 742 set_sg_descriptor(pqi_sg_entry_t *sg, ddi_dma_cookie_t *cookie) 743 { 744 sg->sg_addr = cookie->dmac_laddress; 745 sg->sg_len = cookie->dmac_size; 746 sg->sg_flags = 0; 747 } 748 749 static void 750 build_aio_sg_list(pqi_state_t *s, pqi_aio_path_request_t *rqst, 751 pqi_cmd_t *cmd, pqi_io_request_t *io) 752 { 753 int i; 754 int max_sg_per_iu; 755 uint16_t iu_length; 756 uint8_t chained; 757 uint8_t num_sg_in_iu = 0; 758 ddi_dma_cookie_t *cookies; 759 pqi_sg_entry_t *sg; 760 761 iu_length = offsetof(struct pqi_aio_path_request, ap_sglist) - 762 PQI_REQUEST_HEADER_LENGTH; 763 764 if (cmd->pc_dmaccount == 0) 765 goto out; 766 sg = rqst->ap_sglist; 767 cookies = cmd->pc_cached_cookies; 768 max_sg_per_iu = s->s_max_sg_per_iu - 1; 769 i = 0; 770 chained = 0; 771 772 for (;;) { 773 set_sg_descriptor(sg, cookies); 774 if (!chained) 775 num_sg_in_iu++; 776 i++; 777 if (i == cmd->pc_dmaccount) 778 break; 779 sg++; 780 cookies++; 781 if (i == max_sg_per_iu) { 782 sg->sg_addr = io->io_sg_chain_dma->dma_addr; 783 sg->sg_len = (cmd->pc_dmaccount - num_sg_in_iu) * 784 sizeof (*sg); 785 sg->sg_flags = CISS_SG_CHAIN; 786 chained = 1; 787 num_sg_in_iu++; 788 sg = (pqi_sg_entry_t *) 789 io->io_sg_chain_dma->alloc_memory; 790 } 791 } 792 sg->sg_flags = CISS_SG_LAST; 793 rqst->partial = chained; 794 if (chained) { 795 (void) ddi_dma_sync(io->io_sg_chain_dma->handle, 0, 0, 796 DDI_DMA_SYNC_FORDEV); 797 } 798 iu_length += num_sg_in_iu * sizeof (*sg); 799 800 out: 801 rqst->header.iu_length = iu_length; 802 rqst->num_sg_descriptors = num_sg_in_iu; 803 } 804 805 static void 806 build_raid_sg_list(pqi_state_t *s, pqi_raid_path_request_t *rqst, 807 pqi_cmd_t *cmd, pqi_io_request_t *io) 808 { 809 int i = 0; 810 int max_sg_per_iu; 811 int num_sg_in_iu = 0; 812 uint16_t iu_length; 813 uint8_t chained = 0; 814 ddi_dma_cookie_t *cookies; 815 pqi_sg_entry_t *sg; 816 817 iu_length = offsetof(struct pqi_raid_path_request, rp_sglist) - 818 PQI_REQUEST_HEADER_LENGTH; 819 820 if (cmd->pc_dmaccount == 0) 821 goto out; 822 823 sg = rqst->rp_sglist; 824 cookies = cmd->pc_cached_cookies; 825 max_sg_per_iu = s->s_max_sg_per_iu - 1; 826 827 for (;;) { 828 set_sg_descriptor(sg, cookies); 829 if (!chained) 830 num_sg_in_iu++; 831 i++; 832 if (i == cmd->pc_dmaccount) 833 break; 834 sg++; 835 cookies++; 836 if (i == max_sg_per_iu) { 837 ASSERT(io->io_sg_chain_dma != NULL); 838 sg->sg_addr = io->io_sg_chain_dma->dma_addr; 839 sg->sg_len = (cmd->pc_dmaccount - num_sg_in_iu) * 840 sizeof (*sg); 841 sg->sg_flags = CISS_SG_CHAIN; 842 chained = 1; 843 num_sg_in_iu++; 844 sg = (pqi_sg_entry_t *) 845 io->io_sg_chain_dma->alloc_memory; 846 } 847 } 848 sg->sg_flags = CISS_SG_LAST; 849 rqst->rp_partial = chained; 850 if (chained) { 851 (void) ddi_dma_sync(io->io_sg_chain_dma->handle, 0, 0, 852 DDI_DMA_SYNC_FORDEV); 853 } 854 iu_length += num_sg_in_iu * sizeof (*sg); 855 856 out: 857 rqst->header.iu_length = iu_length; 858 } 859 860 static uint32_t 861 read_heartbeat_counter(pqi_state_t *s) 862 { 863 return (ddi_get32(s->s_datap, s->s_heartbeat_counter)); 864 } 865 866 static void 867 take_ctlr_offline(pqi_state_t *s) 868 { 869 int num_passes = 5; 870 int i; 871 pqi_device_t *d; 872 pqi_cmd_t *c, *nc; 873 pqi_io_request_t *io; 874 uint32_t active_count; 875 876 /* 877 * 1) Why always panic here? 878 * Firmware resets don't work on the Microsemi HBA when the firmware 879 * is hung. The code as written fails outstanding commands and tries 880 * to reset the HBA. Since the reset don't work the HBA is left in an 881 * offline state and further commands sent (retries and new commands) 882 * are also failed. Eventually ZFS will panic with a deadman timer, 883 * but before that COMSTAR will see I/O requests error out and send 884 * I/O errors back to the client which causes corruption since these 885 * errors are no different than a device that starts to fail. So, 886 * instead of trying to play nice the driver now panics which will 887 * allow HA to fail fast to the other node. 888 * 889 * 2) Why not just remove this routine can call panic from the heartbeat 890 * routine? 891 * I'm hoping this is a temporary work around. We have been asking 892 * for more documentation on the product and we've been told there isn't 893 * any available. It has been implied that some HBA's do support 894 * firmware resets. Therefore documentation would enable the driver 895 * to determine model number and adjust parameters such as panic on 896 * firmware hang or try a reset. 897 */ 898 if (1) 899 panic("Firmware hung"); 900 901 d = &s->s_special_device; 902 mutex_enter(&d->pd_mutex); 903 while ((c = list_remove_head(&d->pd_cmd_list)) != NULL) { 904 io = c->pc_io_rqst; 905 io->io_status = PQI_DATA_IN_OUT_ERROR; 906 907 mutex_exit(&d->pd_mutex); 908 (io->io_cb)(io, io->io_context); 909 mutex_enter(&d->pd_mutex); 910 } 911 mutex_exit(&d->pd_mutex); 912 913 /* 914 * If pqi_reset_ctl() completes successfully the queues will be marked 915 * B_TRUE and the controller will be marked online again. 916 */ 917 mutex_enter(&s->s_mutex); 918 for (i = 0; i < s->s_num_queue_groups; i++) 919 s->s_queue_groups[i].qg_active = B_FALSE; 920 s->s_offline = B_TRUE; 921 fail_outstanding_cmds(s); 922 mutex_exit(&s->s_mutex); 923 924 /* 925 * Commands have been canceled that can be. It's possible there are 926 * commands currently running that are about to complete. Give them 927 * up to 5 seconds to finish. If those haven't completed by then they 928 * are most likely hung in the firmware of the HBA so go ahead and 929 * reset the firmware. 930 */ 931 while (num_passes-- > 0) { 932 active_count = count_oustanding_cmds(s); 933 if (active_count == 0) 934 break; 935 drv_usecwait(MICROSEC); 936 } 937 938 /* 939 * Any commands remaining are hung in the controller firmware so 940 * go ahead time them out so that the upper layers know what's 941 * happening. 942 */ 943 mutex_enter(&s->s_mutex); 944 for (d = list_head(&s->s_devnodes); d != NULL; 945 d = list_next(&s->s_devnodes, d)) { 946 mutex_enter(&d->pd_mutex); 947 while ((c = list_head(&d->pd_cmd_list)) != NULL) { 948 struct scsi_pkt *pkt = CMD2PKT(c); 949 950 nc = list_next(&d->pd_cmd_list, c); 951 ASSERT(pkt); 952 if (pkt != NULL) { 953 pkt->pkt_reason = CMD_TIMEOUT; 954 pkt->pkt_statistics = STAT_TIMEOUT; 955 } 956 (void) pqi_cmd_action_nolock(c, PQI_CMD_TIMEOUT); 957 c = nc; 958 } 959 mutex_exit(&d->pd_mutex); 960 } 961 mutex_exit(&s->s_mutex); 962 963 cmn_err(CE_WARN, "Firmware Status: 0x%x", G32(s, sis_firmware_status)); 964 965 if (pqi_reset_ctl(s) == B_FALSE) { 966 cmn_err(CE_WARN, "Failed to reset controller"); 967 return; 968 } 969 970 /* 971 * This will have the effect of releasing the device's dip 972 * structure from the NDI layer do to s_offline == B_TRUE. 973 */ 974 ndi_devi_enter(scsi_vhci_dip); 975 ndi_devi_enter(s->s_dip); 976 (void) pqi_config_all(s->s_dip, s); 977 ndi_devi_exit(s->s_dip); 978 ndi_devi_exit(scsi_vhci_dip); 979 } 980 981 static uint32_t 982 free_elem_count(pqi_index_t pi, pqi_index_t ci, uint32_t per_iq) 983 { 984 pqi_index_t used; 985 if (pi >= ci) { 986 used = pi - ci; 987 } else { 988 used = per_iq - ci + pi; 989 } 990 return (per_iq - used - 1); 991 } 992 993 static boolean_t 994 is_aio_enabled(pqi_device_t *d) 995 { 996 return (d->pd_aio_enabled ? B_TRUE : B_FALSE); 997 } 998