1145e0143Sdh142964 /*
2145e0143Sdh142964 * CDDL HEADER START
3145e0143Sdh142964 *
4145e0143Sdh142964 * The contents of this file are subject to the terms of the
5145e0143Sdh142964 * Common Development and Distribution License (the "License").
6145e0143Sdh142964 * You may not use this file except in compliance with the License.
7145e0143Sdh142964 *
8145e0143Sdh142964 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9145e0143Sdh142964 * or http://www.opensolaris.org/os/licensing.
10145e0143Sdh142964 * See the License for the specific language governing permissions
11145e0143Sdh142964 * and limitations under the License.
12145e0143Sdh142964 *
13145e0143Sdh142964 * When distributing Covered Code, include this CDDL HEADER in each
14145e0143Sdh142964 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15145e0143Sdh142964 * If applicable, add the following below this CDDL HEADER, with the
16145e0143Sdh142964 * fields enclosed by brackets "[]" replaced with your own identifying
17145e0143Sdh142964 * information: Portions Copyright [yyyy] [name of copyright owner]
18145e0143Sdh142964 *
19145e0143Sdh142964 * CDDL HEADER END
20658280b6SDavid Hollister */
21658280b6SDavid Hollister /*
22658280b6SDavid Hollister * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23145e0143Sdh142964 */
24145e0143Sdh142964
25145e0143Sdh142964 /*
26145e0143Sdh142964 * PM8001 device state recovery routines
27145e0143Sdh142964 */
28145e0143Sdh142964
29145e0143Sdh142964 #include <sys/scsi/adapters/pmcs/pmcs.h>
30145e0143Sdh142964
31145e0143Sdh142964 /*
32145e0143Sdh142964 * SAS Topology Configuration
33145e0143Sdh142964 */
346745c559SJesse Butler static void pmcs_ds_operational(pmcs_phy_t *pptr, pmcs_xscsi_t *tgt);
35145e0143Sdh142964 static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp,
36601c90f1SSrikanth, Ramana pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name,
37145e0143Sdh142964 char *reason_string);
38145e0143Sdh142964
39145e0143Sdh142964 /*
40145e0143Sdh142964 * Get device state. Called with statlock and PHY lock held.
41145e0143Sdh142964 */
42145e0143Sdh142964 static int
pmcs_get_dev_state(pmcs_hw_t * pwp,pmcs_phy_t * phyp,pmcs_xscsi_t * xp,uint8_t * ds)43145e0143Sdh142964 pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp,
44145e0143Sdh142964 uint8_t *ds)
45145e0143Sdh142964 {
46145e0143Sdh142964 uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
47145e0143Sdh142964 int result;
48145e0143Sdh142964 struct pmcwork *pwrk;
49145e0143Sdh142964
50145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG3, phyp, xp, "%s: tgt(0x%p)", __func__,
51145e0143Sdh142964 (void *)xp);
52145e0143Sdh142964
53145e0143Sdh142964 if (xp != NULL) {
54145e0143Sdh142964 ASSERT(mutex_owned(&xp->statlock));
55145e0143Sdh142964 }
56f96f3b56SSrikanth, Ramana
57f96f3b56SSrikanth, Ramana if (phyp == NULL) {
58f96f3b56SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp,
59f96f3b56SSrikanth, Ramana "%s: PHY is NULL", __func__);
60f96f3b56SSrikanth, Ramana return (-1);
61f96f3b56SSrikanth, Ramana }
62145e0143Sdh142964 ASSERT(mutex_owned(&phyp->phy_lock));
63145e0143Sdh142964
64145e0143Sdh142964 pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
65145e0143Sdh142964 if (pwrk == NULL) {
66145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__);
67145e0143Sdh142964 return (-1);
68145e0143Sdh142964 }
69145e0143Sdh142964 pwrk->arg = msg;
70145e0143Sdh142964 pwrk->dtype = phyp->dtype;
71145e0143Sdh142964
72145e0143Sdh142964 if (phyp->valid_device_id == 0) {
73145e0143Sdh142964 pmcs_pwork(pwp, pwrk);
74145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp,
75145e0143Sdh142964 "%s: Invalid DeviceID", __func__);
76145e0143Sdh142964 return (-1);
77145e0143Sdh142964 }
78145e0143Sdh142964 htag = pwrk->htag;
79145e0143Sdh142964 msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
80145e0143Sdh142964 PMCIN_GET_DEVICE_STATE));
81145e0143Sdh142964 msg[1] = LE_32(pwrk->htag);
82145e0143Sdh142964 msg[2] = LE_32(phyp->device_id);
83601c90f1SSrikanth, Ramana CLEAN_MESSAGE(msg, 3);
84145e0143Sdh142964
85145e0143Sdh142964 mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
86145e0143Sdh142964 ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
87145e0143Sdh142964 if (ptr == NULL) {
88145e0143Sdh142964 mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
89145e0143Sdh142964 pmcs_pwork(pwp, pwrk);
90145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__);
91145e0143Sdh142964 return (-1);
92145e0143Sdh142964 }
93145e0143Sdh142964 COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
94145e0143Sdh142964 pwrk->state = PMCS_WORK_STATE_ONCHIP;
95145e0143Sdh142964 INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
96145e0143Sdh142964
97145e0143Sdh142964 if (xp != NULL) {
98145e0143Sdh142964 mutex_exit(&xp->statlock);
99145e0143Sdh142964 }
100145e0143Sdh142964 pmcs_unlock_phy(phyp);
101145e0143Sdh142964 WAIT_FOR(pwrk, 1000, result);
102145e0143Sdh142964 pmcs_pwork(pwp, pwrk);
1033be32c0fSJesse Butler pmcs_lock_phy(phyp);
104145e0143Sdh142964
105145e0143Sdh142964 if (xp != NULL) {
106145e0143Sdh142964 mutex_enter(&xp->statlock);
107145e0143Sdh142964 }
108145e0143Sdh142964
109145e0143Sdh142964 if (result) {
110145e0143Sdh142964 pmcs_timed_out(pwp, htag, __func__);
111145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp,
112145e0143Sdh142964 "%s: cmd timed out, returning", __func__);
113145e0143Sdh142964 return (-1);
114145e0143Sdh142964 }
115145e0143Sdh142964 if (LE_32(msg[2]) == 0) {
116145e0143Sdh142964 *ds = (uint8_t)(LE_32(msg[4]));
117145e0143Sdh142964 if (xp == NULL) {
118145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
119145e0143Sdh142964 "%s: retrieved_ds=0x%x", __func__, *ds);
120145e0143Sdh142964 } else if (*ds != xp->dev_state) {
121145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
122145e0143Sdh142964 "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__,
123145e0143Sdh142964 *ds, xp->dev_state);
124145e0143Sdh142964 }
125145e0143Sdh142964 return (0);
126145e0143Sdh142964 } else {
127145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
128145e0143Sdh142964 "%s: cmd failed Status(0x%x), returning ", __func__,
129145e0143Sdh142964 LE_32(msg[2]));
130145e0143Sdh142964 return (-1);
131145e0143Sdh142964 }
132145e0143Sdh142964 }
133145e0143Sdh142964
134145e0143Sdh142964 /*
135145e0143Sdh142964 * Set device state. Called with target's statlock and PHY lock held.
136145e0143Sdh142964 */
137145e0143Sdh142964 static int
pmcs_set_dev_state(pmcs_hw_t * pwp,pmcs_phy_t * phyp,pmcs_xscsi_t * xp,uint8_t ds)138145e0143Sdh142964 pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp,
139145e0143Sdh142964 uint8_t ds)
140145e0143Sdh142964 {
141145e0143Sdh142964 uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
142145e0143Sdh142964 int result;
143145e0143Sdh142964 uint8_t pds, nds;
144145e0143Sdh142964 struct pmcwork *pwrk;
145145e0143Sdh142964
146145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
147145e0143Sdh142964 "%s: ds: 0x%x tgt: 0x%p phy: 0x%p", __func__, ds, (void *)xp,
148145e0143Sdh142964 (void *)phyp);
149145e0143Sdh142964
150145e0143Sdh142964 if (phyp == NULL) {
151145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp,
152145e0143Sdh142964 "%s: PHY is NULL", __func__);
153145e0143Sdh142964 return (-1);
154145e0143Sdh142964 }
155145e0143Sdh142964
156145e0143Sdh142964 pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
157145e0143Sdh142964 if (pwrk == NULL) {
158145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__);
159145e0143Sdh142964 return (-1);
160145e0143Sdh142964 }
161145e0143Sdh142964 if (phyp->valid_device_id == 0) {
162145e0143Sdh142964 pmcs_pwork(pwp, pwrk);
163145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
164145e0143Sdh142964 "%s: Invalid DeviceID", __func__);
165145e0143Sdh142964 return (-1);
166145e0143Sdh142964 }
167145e0143Sdh142964 pwrk->arg = msg;
168145e0143Sdh142964 pwrk->dtype = phyp->dtype;
169145e0143Sdh142964 htag = pwrk->htag;
170145e0143Sdh142964 msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
171145e0143Sdh142964 PMCIN_SET_DEVICE_STATE));
172145e0143Sdh142964 msg[1] = LE_32(pwrk->htag);
173145e0143Sdh142964 msg[2] = LE_32(phyp->device_id);
174145e0143Sdh142964 msg[3] = LE_32(ds);
175601c90f1SSrikanth, Ramana CLEAN_MESSAGE(msg, 4);
176145e0143Sdh142964
177145e0143Sdh142964 mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
178145e0143Sdh142964 ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
179145e0143Sdh142964 if (ptr == NULL) {
180145e0143Sdh142964 mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
181145e0143Sdh142964 pmcs_pwork(pwp, pwrk);
182145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__);
183145e0143Sdh142964 return (-1);
184145e0143Sdh142964 }
185145e0143Sdh142964 COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
186145e0143Sdh142964 pwrk->state = PMCS_WORK_STATE_ONCHIP;
187145e0143Sdh142964 INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
188145e0143Sdh142964
189145e0143Sdh142964 if (xp != NULL) {
190145e0143Sdh142964 mutex_exit(&xp->statlock);
191145e0143Sdh142964 }
192145e0143Sdh142964 pmcs_unlock_phy(phyp);
193145e0143Sdh142964 WAIT_FOR(pwrk, 1000, result);
194145e0143Sdh142964 pmcs_pwork(pwp, pwrk);
1953be32c0fSJesse Butler pmcs_lock_phy(phyp);
196145e0143Sdh142964 if (xp != NULL) {
197145e0143Sdh142964 mutex_enter(&xp->statlock);
198145e0143Sdh142964 }
199145e0143Sdh142964
200145e0143Sdh142964 if (result) {
201145e0143Sdh142964 pmcs_timed_out(pwp, htag, __func__);
202145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
203145e0143Sdh142964 "%s: cmd timed out, returning", __func__);
204145e0143Sdh142964 return (-1);
205145e0143Sdh142964 }
206145e0143Sdh142964 if (LE_32(msg[2]) == 0) {
207145e0143Sdh142964 pds = (uint8_t)(LE_32(msg[4]) >> 4);
208145e0143Sdh142964 nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f);
209145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
210145e0143Sdh142964 "%s: previous_ds=0x%x, new_ds=0x%x", __func__, pds, nds);
211145e0143Sdh142964 if (xp != NULL) {
212145e0143Sdh142964 xp->dev_state = nds;
213145e0143Sdh142964 }
214145e0143Sdh142964 return (0);
215145e0143Sdh142964 } else {
216145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
217145e0143Sdh142964 "%s: cmd failed Status(0x%x), returning ", __func__,
218145e0143Sdh142964 LE_32(msg[2]));
219145e0143Sdh142964 return (-1);
220145e0143Sdh142964 }
221145e0143Sdh142964 }
222145e0143Sdh142964
2236745c559SJesse Butler static void
pmcs_ds_operational(pmcs_phy_t * pptr,pmcs_xscsi_t * tgt)2246745c559SJesse Butler pmcs_ds_operational(pmcs_phy_t *pptr, pmcs_xscsi_t *tgt)
2256745c559SJesse Butler {
2266745c559SJesse Butler pmcs_hw_t *pwp;
2276745c559SJesse Butler
2286745c559SJesse Butler ASSERT(pptr);
2296745c559SJesse Butler pwp = pptr->pwp;
2306745c559SJesse Butler
2316745c559SJesse Butler if (tgt != NULL) {
2326745c559SJesse Butler tgt->recover_wait = 0;
2336745c559SJesse Butler }
2346745c559SJesse Butler pptr->ds_recovery_retries = 0;
2356745c559SJesse Butler
2366745c559SJesse Butler if ((pptr->ds_prev_good_recoveries == 0) ||
2376745c559SJesse Butler (ddi_get_lbolt() - pptr->last_good_recovery >
2386745c559SJesse Butler drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME))) {
2396745c559SJesse Butler pptr->last_good_recovery = ddi_get_lbolt();
2406745c559SJesse Butler pptr->ds_prev_good_recoveries = 1;
2416745c559SJesse Butler } else if (ddi_get_lbolt() < pptr->last_good_recovery +
2426745c559SJesse Butler drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)) {
2436745c559SJesse Butler pptr->ds_prev_good_recoveries++;
2446745c559SJesse Butler } else {
245601c90f1SSrikanth, Ramana pmcs_handle_ds_recovery_error(pptr, tgt, pwp, __func__,
246601c90f1SSrikanth, Ramana "Max recovery attempts reached. Declaring PHY dead");
2476745c559SJesse Butler }
2486745c559SJesse Butler
2496745c559SJesse Butler /* Don't bother to run the work queues if the PHY is dead */
2506745c559SJesse Butler if (!pptr->dead) {
2516745c559SJesse Butler SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
2526745c559SJesse Butler (void) ddi_taskq_dispatch(pwp->tq, pmcs_worker,
2536745c559SJesse Butler pwp, DDI_NOSLEEP);
2546745c559SJesse Butler }
2556745c559SJesse Butler }
2566745c559SJesse Butler
257145e0143Sdh142964 void
pmcs_dev_state_recovery(pmcs_hw_t * pwp,pmcs_phy_t * phyp)258145e0143Sdh142964 pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
259145e0143Sdh142964 {
260b0e5d1e5SSrikanth, Ramana boolean_t reschedule = B_FALSE;
261145e0143Sdh142964 uint8_t ds, tgt_dev_state;
262145e0143Sdh142964 int rc;
263145e0143Sdh142964 pmcs_xscsi_t *tgt;
264145e0143Sdh142964 pmcs_phy_t *pptr, *pnext, *pchild;
265145e0143Sdh142964
266145e0143Sdh142964 /*
267145e0143Sdh142964 * First time, check to see if we're already performing recovery
268145e0143Sdh142964 */
269145e0143Sdh142964 if (phyp == NULL) {
270145e0143Sdh142964 mutex_enter(&pwp->lock);
271145e0143Sdh142964 if (pwp->ds_err_recovering) {
272145e0143Sdh142964 mutex_exit(&pwp->lock);
273145e0143Sdh142964 SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
274145e0143Sdh142964 return;
275145e0143Sdh142964 }
276145e0143Sdh142964
277145e0143Sdh142964 pwp->ds_err_recovering = 1;
278145e0143Sdh142964 pptr = pwp->root_phys;
279145e0143Sdh142964 mutex_exit(&pwp->lock);
280145e0143Sdh142964 } else {
281145e0143Sdh142964 pptr = phyp;
282145e0143Sdh142964 }
283145e0143Sdh142964
284145e0143Sdh142964 while (pptr) {
285145e0143Sdh142964 /*
286145e0143Sdh142964 * Since ds_err_recovering is set, we can be assured these
287145e0143Sdh142964 * PHYs won't disappear on us while we do this.
288145e0143Sdh142964 */
289145e0143Sdh142964 pmcs_lock_phy(pptr);
290145e0143Sdh142964 pchild = pptr->children;
291145e0143Sdh142964 pnext = pptr->sibling;
292145e0143Sdh142964 pmcs_unlock_phy(pptr);
293145e0143Sdh142964
294145e0143Sdh142964 if (pchild) {
295145e0143Sdh142964 pmcs_dev_state_recovery(pwp, pchild);
296145e0143Sdh142964 }
297145e0143Sdh142964
298145e0143Sdh142964 tgt = NULL;
299145e0143Sdh142964 pmcs_lock_phy(pptr);
300145e0143Sdh142964
301601c90f1SSrikanth, Ramana if (pptr->dead || !pptr->valid_device_id) {
302601c90f1SSrikanth, Ramana goto next_phy;
303601c90f1SSrikanth, Ramana }
304601c90f1SSrikanth, Ramana
305601c90f1SSrikanth, Ramana if (pptr->iport && (pptr->iport->ua_state != UA_ACTIVE)) {
306601c90f1SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, pptr->target,
307601c90f1SSrikanth, Ramana "%s: No DS recovery on PHY %s, iport not active",
308601c90f1SSrikanth, Ramana __func__, pptr->path);
309145e0143Sdh142964 goto next_phy;
310145e0143Sdh142964 }
311145e0143Sdh142964
312145e0143Sdh142964 tgt = pptr->target;
313145e0143Sdh142964
314145e0143Sdh142964 if (tgt != NULL) {
315145e0143Sdh142964 mutex_enter(&tgt->statlock);
316145e0143Sdh142964 if (tgt->recover_wait == 0) {
317145e0143Sdh142964 goto next_phy;
318145e0143Sdh142964 }
319145e0143Sdh142964 tgt_dev_state = tgt->dev_state;
320145e0143Sdh142964 } else {
321145e0143Sdh142964 tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE;
322145e0143Sdh142964 }
323145e0143Sdh142964
324145e0143Sdh142964 if (pptr->prev_recovery) {
325145e0143Sdh142964 if (ddi_get_lbolt() - pptr->prev_recovery <
326145e0143Sdh142964 drv_usectohz(PMCS_DS_RECOVERY_INTERVAL)) {
327145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, tgt,
328145e0143Sdh142964 "%s: DS recovery on PHY %s "
329145e0143Sdh142964 "re-invoked too soon. Skipping...",
330145e0143Sdh142964 __func__, pptr->path);
331b0e5d1e5SSrikanth, Ramana if ((tgt) && (tgt->recover_wait)) {
332b0e5d1e5SSrikanth, Ramana reschedule = B_TRUE;
333b0e5d1e5SSrikanth, Ramana }
334145e0143Sdh142964 goto next_phy;
335145e0143Sdh142964 }
336145e0143Sdh142964 }
337145e0143Sdh142964 pptr->prev_recovery = ddi_get_lbolt();
338145e0143Sdh142964
339145e0143Sdh142964 /*
340145e0143Sdh142964 * Step 1: Put the device into the IN_RECOVERY state
341145e0143Sdh142964 */
342145e0143Sdh142964 rc = pmcs_get_dev_state(pwp, pptr, tgt, &ds);
343145e0143Sdh142964 if (rc != 0) {
344145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
345145e0143Sdh142964 "%s: pmcs_get_dev_state on PHY %s "
346145e0143Sdh142964 "failed (rc=%d)",
347145e0143Sdh142964 __func__, pptr->path, rc);
348145e0143Sdh142964
349145e0143Sdh142964 pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
350601c90f1SSrikanth, Ramana __func__, "pmcs_get_dev_state");
351145e0143Sdh142964
352145e0143Sdh142964 goto next_phy;
353145e0143Sdh142964 }
354145e0143Sdh142964
3556745c559SJesse Butler /* If the chip says it's operational, we're done */
3566745c559SJesse Butler if (ds == PMCS_DEVICE_STATE_OPERATIONAL) {
3576745c559SJesse Butler pmcs_ds_operational(pptr, tgt);
3586745c559SJesse Butler goto next_phy;
3596745c559SJesse Butler }
3606745c559SJesse Butler
361145e0143Sdh142964 if ((tgt_dev_state == ds) &&
362145e0143Sdh142964 (ds == PMCS_DEVICE_STATE_IN_RECOVERY)) {
363145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
364145e0143Sdh142964 "%s: Target 0x%p already IN_RECOVERY", __func__,
365145e0143Sdh142964 (void *)tgt);
366145e0143Sdh142964 } else {
367145e0143Sdh142964 if (tgt != NULL) {
368145e0143Sdh142964 tgt->dev_state = ds;
369145e0143Sdh142964 }
370145e0143Sdh142964 tgt_dev_state = ds;
371145e0143Sdh142964 ds = PMCS_DEVICE_STATE_IN_RECOVERY;
372145e0143Sdh142964 rc = pmcs_send_err_recovery_cmd(pwp, ds, pptr, tgt);
373145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
374145e0143Sdh142964 "%s: pmcs_send_err_recovery_cmd "
375145e0143Sdh142964 "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)",
376145e0143Sdh142964 __func__, rc, (void *)tgt, ds, tgt_dev_state);
377145e0143Sdh142964
378145e0143Sdh142964 if (rc) {
379145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
380145e0143Sdh142964 "%s: pmcs_send_err_recovery_cmd to PHY %s "
381145e0143Sdh142964 "failed (rc=%d)",
382145e0143Sdh142964 __func__, pptr->path, rc);
383145e0143Sdh142964
384145e0143Sdh142964 pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
385601c90f1SSrikanth, Ramana __func__, "pmcs_send_err_recovery_cmd");
386145e0143Sdh142964
387145e0143Sdh142964 goto next_phy;
388145e0143Sdh142964 }
389145e0143Sdh142964 }
390145e0143Sdh142964
391145e0143Sdh142964 /*
3926745c559SJesse Butler * Step 2: Perform a hard reset on the PHY.
393145e0143Sdh142964 */
394145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
3956745c559SJesse Butler "%s: Issue HARD_RESET to PHY %s", __func__,
3966745c559SJesse Butler pptr->path);
397145e0143Sdh142964 /*
3986745c559SJesse Butler * Must release statlock here because pmcs_reset_phy
3996745c559SJesse Butler * will drop and reacquire the PHY lock.
400145e0143Sdh142964 */
401145e0143Sdh142964 if (tgt != NULL) {
402145e0143Sdh142964 mutex_exit(&tgt->statlock);
403145e0143Sdh142964 }
404145e0143Sdh142964 rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET);
405145e0143Sdh142964 if (tgt != NULL) {
406145e0143Sdh142964 mutex_enter(&tgt->statlock);
407145e0143Sdh142964 }
408145e0143Sdh142964 if (rc) {
409145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
410145e0143Sdh142964 "%s: HARD_RESET to PHY %s failed (rc=%d)",
411145e0143Sdh142964 __func__, pptr->path, rc);
412145e0143Sdh142964
413145e0143Sdh142964 pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
414601c90f1SSrikanth, Ramana __func__, "HARD_RESET");
415145e0143Sdh142964
416145e0143Sdh142964 goto next_phy;
417145e0143Sdh142964 }
418145e0143Sdh142964
419145e0143Sdh142964 /*
420145e0143Sdh142964 * Step 3: Abort all I/Os to the device
421145e0143Sdh142964 */
422145e0143Sdh142964 if (pptr->abort_all_start) {
423145e0143Sdh142964 while (pptr->abort_all_start) {
424145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
425145e0143Sdh142964 "%s: Waiting for outstanding ABORT_ALL on "
426145e0143Sdh142964 "PHY 0x%p", __func__, (void *)pptr);
427145e0143Sdh142964 cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
428145e0143Sdh142964 }
429145e0143Sdh142964 } else {
430145e0143Sdh142964 if (tgt != NULL) {
431145e0143Sdh142964 mutex_exit(&tgt->statlock);
432145e0143Sdh142964 }
433145e0143Sdh142964 rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
434145e0143Sdh142964 if (tgt != NULL) {
435145e0143Sdh142964 mutex_enter(&tgt->statlock);
436145e0143Sdh142964 }
437145e0143Sdh142964 if (rc != 0) {
438145e0143Sdh142964 pptr->abort_pending = 1;
439145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
440145e0143Sdh142964 "%s: pmcs_abort to PHY %s failed (rc=%d)",
441145e0143Sdh142964 __func__, pptr->path, rc);
442145e0143Sdh142964
443145e0143Sdh142964 pmcs_handle_ds_recovery_error(pptr, tgt,
444601c90f1SSrikanth, Ramana pwp, __func__, "pmcs_abort");
445145e0143Sdh142964
446145e0143Sdh142964 goto next_phy;
447145e0143Sdh142964 }
448145e0143Sdh142964 }
449145e0143Sdh142964
450145e0143Sdh142964 /*
451145e0143Sdh142964 * Step 4: Set the device back to OPERATIONAL state
452145e0143Sdh142964 */
453145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
454145e0143Sdh142964 "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state",
455145e0143Sdh142964 __func__, (void *)pptr, (void *)tgt);
456145e0143Sdh142964 rc = pmcs_set_dev_state(pwp, pptr, tgt,
457145e0143Sdh142964 PMCS_DEVICE_STATE_OPERATIONAL);
458145e0143Sdh142964 if (rc == 0) {
4596745c559SJesse Butler pmcs_ds_operational(pptr, tgt);
460145e0143Sdh142964 } else {
461145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt,
462145e0143Sdh142964 "%s: Failed to SET tgt 0x%p to OPERATIONAL state",
463145e0143Sdh142964 __func__, (void *)tgt);
464145e0143Sdh142964
465145e0143Sdh142964 pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
466601c90f1SSrikanth, Ramana __func__, "SET tgt to OPERATIONAL state");
467145e0143Sdh142964
468145e0143Sdh142964 goto next_phy;
469145e0143Sdh142964 }
470145e0143Sdh142964
471145e0143Sdh142964 next_phy:
472145e0143Sdh142964 if (tgt) {
473145e0143Sdh142964 mutex_exit(&tgt->statlock);
474145e0143Sdh142964 }
475145e0143Sdh142964 pmcs_unlock_phy(pptr);
476145e0143Sdh142964 pptr = pnext;
477145e0143Sdh142964 }
478145e0143Sdh142964
479145e0143Sdh142964 /*
480145e0143Sdh142964 * Only clear ds_err_recovering if we're exiting for good and not
481145e0143Sdh142964 * just unwinding from recursion
482145e0143Sdh142964 */
483145e0143Sdh142964 if (phyp == NULL) {
484145e0143Sdh142964 mutex_enter(&pwp->lock);
485145e0143Sdh142964 pwp->ds_err_recovering = 0;
486145e0143Sdh142964 mutex_exit(&pwp->lock);
487145e0143Sdh142964 }
488b0e5d1e5SSrikanth, Ramana
489b0e5d1e5SSrikanth, Ramana if (reschedule) {
490b0e5d1e5SSrikanth, Ramana SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
491b0e5d1e5SSrikanth, Ramana }
492145e0143Sdh142964 }
493145e0143Sdh142964
494145e0143Sdh142964 /*
495145e0143Sdh142964 * Called with target's statlock held (if target is non-NULL) and PHY lock held.
496145e0143Sdh142964 */
497145e0143Sdh142964 int
pmcs_send_err_recovery_cmd(pmcs_hw_t * pwp,uint8_t dev_state,pmcs_phy_t * phyp,pmcs_xscsi_t * tgt)498145e0143Sdh142964 pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_phy_t *phyp,
499145e0143Sdh142964 pmcs_xscsi_t *tgt)
500145e0143Sdh142964 {
501145e0143Sdh142964 int rc = -1;
502145e0143Sdh142964 uint8_t tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE;
503145e0143Sdh142964
504145e0143Sdh142964 if (tgt != NULL) {
505145e0143Sdh142964 ASSERT(mutex_owned(&tgt->statlock));
506145e0143Sdh142964 if (tgt->recovering) {
507145e0143Sdh142964 return (0);
508145e0143Sdh142964 }
509145e0143Sdh142964
510145e0143Sdh142964 tgt->recovering = 1;
511145e0143Sdh142964 tgt_dev_state = tgt->dev_state;
512145e0143Sdh142964 }
513145e0143Sdh142964
514145e0143Sdh142964 if (phyp == NULL) {
515145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, tgt,
516145e0143Sdh142964 "%s: PHY is NULL", __func__);
517145e0143Sdh142964 return (-1);
518145e0143Sdh142964 }
519145e0143Sdh142964
520145e0143Sdh142964 ASSERT(mutex_owned(&phyp->phy_lock));
521145e0143Sdh142964
522145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
523145e0143Sdh142964 "%s: ds: 0x%x, tgt ds(0x%x)", __func__, dev_state, tgt_dev_state);
524145e0143Sdh142964
525145e0143Sdh142964 switch (dev_state) {
526145e0143Sdh142964 case PMCS_DEVICE_STATE_IN_RECOVERY:
527145e0143Sdh142964 if (tgt_dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
528145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
529145e0143Sdh142964 "%s: Target 0x%p already IN_RECOVERY", __func__,
530145e0143Sdh142964 (void *)tgt);
531145e0143Sdh142964 rc = 0; /* This is not an error */
532145e0143Sdh142964 goto no_action;
533145e0143Sdh142964 }
534145e0143Sdh142964
535145e0143Sdh142964 rc = pmcs_set_dev_state(pwp, phyp, tgt,
536145e0143Sdh142964 PMCS_DEVICE_STATE_IN_RECOVERY);
537145e0143Sdh142964 if (rc != 0) {
538145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
539145e0143Sdh142964 "%s(1): Failed to set tgt(0x%p) to IN_RECOVERY",
540145e0143Sdh142964 __func__, (void *)tgt);
541145e0143Sdh142964 }
542145e0143Sdh142964
543145e0143Sdh142964 break;
544145e0143Sdh142964
545145e0143Sdh142964 case PMCS_DEVICE_STATE_OPERATIONAL:
546145e0143Sdh142964 if (tgt_dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) {
547145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
548145e0143Sdh142964 "%s: Target 0x%p not ready to go OPERATIONAL",
549145e0143Sdh142964 __func__, (void *)tgt);
550145e0143Sdh142964 goto no_action;
551145e0143Sdh142964 }
552145e0143Sdh142964
553145e0143Sdh142964 rc = pmcs_set_dev_state(pwp, phyp, tgt,
554145e0143Sdh142964 PMCS_DEVICE_STATE_OPERATIONAL);
555145e0143Sdh142964 if (tgt != NULL) {
556145e0143Sdh142964 tgt->reset_success = 1;
557145e0143Sdh142964 }
558145e0143Sdh142964 if (rc != 0) {
559145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
560145e0143Sdh142964 "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL",
561145e0143Sdh142964 __func__, (void *)tgt);
562145e0143Sdh142964 if (tgt != NULL) {
563145e0143Sdh142964 tgt->reset_success = 0;
564145e0143Sdh142964 }
565145e0143Sdh142964 }
566145e0143Sdh142964
567145e0143Sdh142964 break;
568145e0143Sdh142964
569145e0143Sdh142964 case PMCS_DEVICE_STATE_NON_OPERATIONAL:
570145e0143Sdh142964 PHY_CHANGED(pwp, phyp);
571145e0143Sdh142964 RESTART_DISCOVERY(pwp);
572145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
573145e0143Sdh142964 "%s: Device at %s is non-operational",
574145e0143Sdh142964 __func__, phyp->path);
575145e0143Sdh142964 if (tgt != NULL) {
576145e0143Sdh142964 tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL;
577145e0143Sdh142964 }
578145e0143Sdh142964 rc = 0;
579145e0143Sdh142964
580145e0143Sdh142964 break;
581145e0143Sdh142964
582145e0143Sdh142964 default:
583145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt,
584145e0143Sdh142964 "%s: Invalid state requested (%d)", __func__,
585145e0143Sdh142964 dev_state);
586145e0143Sdh142964 break;
587145e0143Sdh142964
588145e0143Sdh142964 }
589145e0143Sdh142964
590145e0143Sdh142964 no_action:
591145e0143Sdh142964 if (tgt != NULL) {
592145e0143Sdh142964 tgt->recovering = 0;
593145e0143Sdh142964 }
594145e0143Sdh142964 return (rc);
595145e0143Sdh142964 }
596145e0143Sdh142964
597145e0143Sdh142964 /*
598145e0143Sdh142964 * Start ssp event recovery. We have to schedule recovery operation because
599145e0143Sdh142964 * it involves sending multiple commands to device and we should not do it
600145e0143Sdh142964 * in the interrupt context.
601145e0143Sdh142964 * If it is failure of a recovery command, let the recovery thread deal with it.
602225bf905SJesse Butler * Called with the work lock held.
603145e0143Sdh142964 */
604145e0143Sdh142964 void
pmcs_start_ssp_event_recovery(pmcs_hw_t * pwp,pmcwork_t * pwrk,uint32_t * iomb,size_t amt)605145e0143Sdh142964 pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
606145e0143Sdh142964 size_t amt)
607145e0143Sdh142964 {
608145e0143Sdh142964 pmcs_xscsi_t *tgt = pwrk->xp;
609145e0143Sdh142964 uint32_t event = LE_32(iomb[2]);
610145e0143Sdh142964 pmcs_phy_t *pptr = pwrk->phy;
611658280b6SDavid Hollister pmcs_cb_t callback;
612145e0143Sdh142964 uint32_t tag;
613145e0143Sdh142964
614145e0143Sdh142964 if (tgt != NULL) {
615145e0143Sdh142964 mutex_enter(&tgt->statlock);
616145e0143Sdh142964 if (!tgt->assigned) {
617145e0143Sdh142964 if (pptr) {
618145e0143Sdh142964 pmcs_dec_phy_ref_count(pptr);
619145e0143Sdh142964 }
620145e0143Sdh142964 pptr = NULL;
621145e0143Sdh142964 pwrk->phy = NULL;
622145e0143Sdh142964 }
623145e0143Sdh142964 mutex_exit(&tgt->statlock);
624145e0143Sdh142964 }
62556976565SDavid Hollister
626145e0143Sdh142964 if (pptr == NULL) {
627145e0143Sdh142964 /*
628145e0143Sdh142964 * No target, need to run RE-DISCOVERY here.
629145e0143Sdh142964 */
630145e0143Sdh142964 if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) {
631145e0143Sdh142964 pwrk->state = PMCS_WORK_STATE_INTR;
632145e0143Sdh142964 }
633145e0143Sdh142964 /*
634145e0143Sdh142964 * Although we cannot mark phy to force abort nor mark phy
635145e0143Sdh142964 * as changed, killing of a target would take care of aborting
636145e0143Sdh142964 * commands for the device.
637145e0143Sdh142964 */
638145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
639145e0143Sdh142964 "%s: No valid target for event processing. Reconfigure.",
640145e0143Sdh142964 __func__);
641145e0143Sdh142964 pmcs_pwork(pwp, pwrk);
642145e0143Sdh142964 RESTART_DISCOVERY(pwp);
643145e0143Sdh142964 return;
644145e0143Sdh142964 } else {
645225bf905SJesse Butler /* We have a phy pointer, we'll need to lock it */
646225bf905SJesse Butler mutex_exit(&pwrk->lock);
647145e0143Sdh142964 pmcs_lock_phy(pptr);
648225bf905SJesse Butler mutex_enter(&pwrk->lock);
6493be32c0fSJesse Butler if (tgt != NULL) {
650145e0143Sdh142964 mutex_enter(&tgt->statlock);
65156976565SDavid Hollister }
652145e0143Sdh142964 if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) {
6533be32c0fSJesse Butler if ((tgt != NULL) && (tgt->dev_state !=
6543be32c0fSJesse Butler PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
655145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
656145e0143Sdh142964 "%s: Device at %s is non-operational",
657145e0143Sdh142964 __func__, pptr->path);
658145e0143Sdh142964 tgt->dev_state =
659145e0143Sdh142964 PMCS_DEVICE_STATE_NON_OPERATIONAL;
660145e0143Sdh142964 }
661145e0143Sdh142964 pptr->abort_pending = 1;
6623be32c0fSJesse Butler if (tgt != NULL) {
663145e0143Sdh142964 mutex_exit(&tgt->statlock);
66456976565SDavid Hollister }
665145e0143Sdh142964 mutex_exit(&pwrk->lock);
666225bf905SJesse Butler pmcs_unlock_phy(pptr);
667145e0143Sdh142964 SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
668145e0143Sdh142964 RESTART_DISCOVERY(pwp);
669145e0143Sdh142964 return;
670145e0143Sdh142964 }
671145e0143Sdh142964
672145e0143Sdh142964 /*
673145e0143Sdh142964 * If this command is run in WAIT mode, it is a failing recovery
674145e0143Sdh142964 * command. If so, just wake up recovery thread waiting for
675145e0143Sdh142964 * command completion.
676145e0143Sdh142964 */
677145e0143Sdh142964 tag = PMCS_TAG_TYPE(pwrk->htag);
678145e0143Sdh142964 if (tag == PMCS_TAG_TYPE_WAIT) {
679145e0143Sdh142964 pwrk->htag |= PMCS_TAG_DONE;
680145e0143Sdh142964 if (pwrk->arg && amt) {
681145e0143Sdh142964 (void) memcpy(pwrk->arg, iomb, amt);
682145e0143Sdh142964 }
683145e0143Sdh142964 cv_signal(&pwrk->sleep_cv);
6843be32c0fSJesse Butler if (tgt != NULL) {
685145e0143Sdh142964 mutex_exit(&tgt->statlock);
68656976565SDavid Hollister }
687225bf905SJesse Butler mutex_exit(&pwrk->lock);
688145e0143Sdh142964 pmcs_unlock_phy(pptr);
689145e0143Sdh142964 return;
690145e0143Sdh142964 }
691145e0143Sdh142964
6923be32c0fSJesse Butler if (tgt == NULL) {
69356976565SDavid Hollister pmcs_prt(pwp, PMCS_PRT_DEBUG1, pptr, NULL,
69456976565SDavid Hollister "%s: Not scheduling SSP event recovery for NULL tgt"
69556976565SDavid Hollister " pwrk(%p) tag(0x%x)", __func__, (void *)pwrk,
69656976565SDavid Hollister pwrk->htag);
697225bf905SJesse Butler mutex_exit(&pwrk->lock);
698225bf905SJesse Butler pmcs_unlock_phy(pptr);
69956976565SDavid Hollister return;
70056976565SDavid Hollister }
70156976565SDavid Hollister
702145e0143Sdh142964 /*
703658280b6SDavid Hollister * If the SSP event was an OPEN_RETRY_TIMEOUT, we don't want
704658280b6SDavid Hollister * to go through the recovery (abort/LU reset) process.
705658280b6SDavid Hollister * Simply complete the command and return it as STATUS_BUSY.
706658280b6SDavid Hollister * This will cause the target driver to simply retry.
707658280b6SDavid Hollister */
708658280b6SDavid Hollister if (event == PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT) {
709658280b6SDavid Hollister pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
710658280b6SDavid Hollister "%s: Got OPEN_RETRY_TIMEOUT event (htag 0x%08x)",
711658280b6SDavid Hollister __func__, pwrk->htag);
712658280b6SDavid Hollister
713658280b6SDavid Hollister mutex_exit(&tgt->statlock);
714225bf905SJesse Butler /* Note: work remains locked for the callback */
715658280b6SDavid Hollister pmcs_unlock_phy(pptr);
716658280b6SDavid Hollister pwrk->ssp_event = event;
717658280b6SDavid Hollister callback = (pmcs_cb_t)pwrk->ptr;
718658280b6SDavid Hollister (*callback)(pwp, pwrk, iomb);
719658280b6SDavid Hollister return;
720658280b6SDavid Hollister }
721658280b6SDavid Hollister
722658280b6SDavid Hollister /*
723145e0143Sdh142964 * To recover from primary failures,
724145e0143Sdh142964 * we need to schedule handling events recovery.
725145e0143Sdh142964 */
726145e0143Sdh142964 tgt->event_recovery = 1;
727145e0143Sdh142964 mutex_exit(&tgt->statlock);
728145e0143Sdh142964 pwrk->ssp_event = event;
729225bf905SJesse Butler mutex_exit(&pwrk->lock);
730225bf905SJesse Butler pmcs_unlock_phy(pptr);
731145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
732145e0143Sdh142964 "%s: Scheduling SSP event recovery for tgt(0x%p) "
733145e0143Sdh142964 "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk,
734145e0143Sdh142964 pwrk->htag);
735145e0143Sdh142964 SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY);
736145e0143Sdh142964 }
737145e0143Sdh142964
738145e0143Sdh142964 /* Work cannot be completed until event recovery is completed. */
739145e0143Sdh142964 }
740145e0143Sdh142964
741145e0143Sdh142964 /*
742145e0143Sdh142964 * SSP target event recovery
743*219ebc8eSSrikanth Suravajhala * phy->lock should be held upon entry.
744*219ebc8eSSrikanth Suravajhala * pwrk->lock should be held upon entry and gets released by this routine.
745*219ebc8eSSrikanth Suravajhala * tgt->statlock should not be held.
746145e0143Sdh142964 */
747145e0143Sdh142964 void
pmcs_tgt_event_recovery(pmcs_hw_t * pwp,pmcwork_t * pwrk)748145e0143Sdh142964 pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk)
749145e0143Sdh142964 {
750145e0143Sdh142964 pmcs_phy_t *pptr = pwrk->phy;
751145e0143Sdh142964 pmcs_cmd_t *sp = pwrk->arg;
752145e0143Sdh142964 pmcs_lun_t *lun = sp->cmd_lun;
753145e0143Sdh142964 pmcs_xscsi_t *tgt = pwrk->xp;
754145e0143Sdh142964 uint32_t event;
755145e0143Sdh142964 uint32_t htag;
756145e0143Sdh142964 uint32_t status;
757145e0143Sdh142964 int rv;
758145e0143Sdh142964
759145e0143Sdh142964 ASSERT(pwrk->arg != NULL);
760145e0143Sdh142964 ASSERT(pwrk->xp != NULL);
761145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
762145e0143Sdh142964 "%s: event recovery for target 0x%p", __func__, (void *)pwrk->xp);
763145e0143Sdh142964 htag = pwrk->htag;
764145e0143Sdh142964 event = pwrk->ssp_event;
765145e0143Sdh142964 pwrk->ssp_event = 0xffffffff;
766658280b6SDavid Hollister
767*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
768*219ebc8eSSrikanth Suravajhala
769145e0143Sdh142964 if (event == PMCOUT_STATUS_XFER_ERR_BREAK ||
770145e0143Sdh142964 event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY ||
771145e0143Sdh142964 event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) {
772145e0143Sdh142964 /* Command may be still pending on device */
773145e0143Sdh142964 rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag,
774145e0143Sdh142964 lun->lun_num, &status);
775145e0143Sdh142964 if (rv != 0) {
776145e0143Sdh142964 goto out;
777145e0143Sdh142964 }
778145e0143Sdh142964 if (status == SAS_RSP_TMF_COMPLETE) {
779145e0143Sdh142964 /* Command NOT pending on a device */
780145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
781145e0143Sdh142964 "%s: No pending command for tgt 0x%p",
782145e0143Sdh142964 __func__, (void *)tgt);
783145e0143Sdh142964 /* Nothing more to do, just abort it on chip */
784145e0143Sdh142964 htag = 0;
785145e0143Sdh142964 }
786145e0143Sdh142964 }
787145e0143Sdh142964 /*
788145e0143Sdh142964 * All other events left the command pending in the host
789145e0143Sdh142964 * Send abort task and abort it on the chip
790145e0143Sdh142964 */
791145e0143Sdh142964 if (htag != 0) {
792145e0143Sdh142964 if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag,
793145e0143Sdh142964 lun->lun_num, &status))
794145e0143Sdh142964 goto out;
795145e0143Sdh142964 }
796*219ebc8eSSrikanth Suravajhala (void) pmcs_abort(pwp, pptr, htag, 0, 1);
797145e0143Sdh142964 /*
798145e0143Sdh142964 * Abort either took care of work completion, or put device in
799145e0143Sdh142964 * a recovery state
800145e0143Sdh142964 */
801145e0143Sdh142964 return;
802145e0143Sdh142964 out:
803145e0143Sdh142964 /* Abort failed, do full device recovery */
804*219ebc8eSSrikanth Suravajhala mutex_enter(&pwrk->lock);
805*219ebc8eSSrikanth Suravajhala tgt = pwrk->xp;
806*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
807*219ebc8eSSrikanth Suravajhala if (tgt != NULL) {
808145e0143Sdh142964 mutex_enter(&tgt->statlock);
809*219ebc8eSSrikanth Suravajhala pmcs_start_dev_state_recovery(tgt, pptr);
810145e0143Sdh142964 mutex_exit(&tgt->statlock);
811145e0143Sdh142964 }
812*219ebc8eSSrikanth Suravajhala }
813145e0143Sdh142964
814145e0143Sdh142964 /*
815145e0143Sdh142964 * SSP event recovery task.
816145e0143Sdh142964 */
817145e0143Sdh142964 void
pmcs_ssp_event_recovery(pmcs_hw_t * pwp)818145e0143Sdh142964 pmcs_ssp_event_recovery(pmcs_hw_t *pwp)
819145e0143Sdh142964 {
820145e0143Sdh142964 int idx;
821145e0143Sdh142964 pmcs_xscsi_t *tgt;
822145e0143Sdh142964 pmcs_cmd_t *cp;
823145e0143Sdh142964 pmcwork_t *pwrk;
824145e0143Sdh142964 pmcs_phy_t *pphy;
825145e0143Sdh142964 int er_flag;
826145e0143Sdh142964 uint32_t idxpwrk;
827145e0143Sdh142964
828145e0143Sdh142964 restart:
829145e0143Sdh142964 for (idx = 0; idx < pwp->max_dev; idx++) {
830145e0143Sdh142964 mutex_enter(&pwp->lock);
831145e0143Sdh142964 tgt = pwp->targets[idx];
832145e0143Sdh142964 mutex_exit(&pwp->lock);
833601c90f1SSrikanth, Ramana if (tgt == NULL) {
834601c90f1SSrikanth, Ramana continue;
835601c90f1SSrikanth, Ramana }
836601c90f1SSrikanth, Ramana
837145e0143Sdh142964 mutex_enter(&tgt->statlock);
838145e0143Sdh142964 if (!tgt->assigned) {
839145e0143Sdh142964 mutex_exit(&tgt->statlock);
840145e0143Sdh142964 continue;
841145e0143Sdh142964 }
842145e0143Sdh142964 pphy = tgt->phy;
843145e0143Sdh142964 er_flag = tgt->event_recovery;
844145e0143Sdh142964 mutex_exit(&tgt->statlock);
845601c90f1SSrikanth, Ramana
846601c90f1SSrikanth, Ramana if ((pphy == NULL) || (er_flag == 0)) {
847601c90f1SSrikanth, Ramana continue;
848601c90f1SSrikanth, Ramana }
849601c90f1SSrikanth, Ramana
850145e0143Sdh142964 pmcs_lock_phy(pphy);
851145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
852601c90f1SSrikanth, Ramana "%s: found target(0x%p)", __func__, (void *) tgt);
853145e0143Sdh142964
854145e0143Sdh142964 /* Check what cmd expects recovery */
855145e0143Sdh142964 mutex_enter(&tgt->aqlock);
856145e0143Sdh142964 STAILQ_FOREACH(cp, &tgt->aq, cmd_next) {
857145e0143Sdh142964 idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag);
858145e0143Sdh142964 pwrk = &pwp->work[idxpwrk];
859*219ebc8eSSrikanth Suravajhala mutex_enter(&pwrk->lock);
860145e0143Sdh142964 if (pwrk->htag != cp->cmd_tag) {
861145e0143Sdh142964 /*
862601c90f1SSrikanth, Ramana * aq may contain TMF commands, so we
863601c90f1SSrikanth, Ramana * may not find work structure with htag
864145e0143Sdh142964 */
865*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
866*219ebc8eSSrikanth Suravajhala continue;
867145e0143Sdh142964 }
868*219ebc8eSSrikanth Suravajhala if (!PMCS_COMMAND_DONE(pwrk) &&
869*219ebc8eSSrikanth Suravajhala (pwrk->ssp_event != 0) &&
870601c90f1SSrikanth, Ramana (pwrk->ssp_event != PMCS_REC_EVENT)) {
871601c90f1SSrikanth, Ramana pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
872601c90f1SSrikanth, Ramana "%s: pwrk(%p) htag(0x%x)",
873601c90f1SSrikanth, Ramana __func__, (void *) pwrk, cp->cmd_tag);
874145e0143Sdh142964 mutex_exit(&tgt->aqlock);
875145e0143Sdh142964 /*
876*219ebc8eSSrikanth Suravajhala * pwrk->lock gets dropped in
877*219ebc8eSSrikanth Suravajhala * pmcs_tgt_event_recovery()
878145e0143Sdh142964 */
879*219ebc8eSSrikanth Suravajhala pmcs_tgt_event_recovery(pwp, pwrk);
880145e0143Sdh142964 pmcs_unlock_phy(pphy);
881*219ebc8eSSrikanth Suravajhala /* All bets are off on tgt/aq now, restart */
882145e0143Sdh142964 goto restart;
883145e0143Sdh142964 }
884*219ebc8eSSrikanth Suravajhala mutex_exit(&pwrk->lock);
885145e0143Sdh142964 }
886145e0143Sdh142964 mutex_exit(&tgt->aqlock);
887*219ebc8eSSrikanth Suravajhala mutex_enter(&tgt->statlock);
888145e0143Sdh142964 tgt->event_recovery = 0;
889145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
890601c90f1SSrikanth, Ramana "%s: end of SSP event recovery for target(0x%p)",
891601c90f1SSrikanth, Ramana __func__, (void *) tgt);
892145e0143Sdh142964 mutex_exit(&tgt->statlock);
893145e0143Sdh142964 pmcs_unlock_phy(pphy);
894145e0143Sdh142964 }
895145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL,
896145e0143Sdh142964 "%s: end of SSP event recovery for pwp(0x%p)", __func__,
897145e0143Sdh142964 (void *) pwp);
898145e0143Sdh142964 }
899145e0143Sdh142964
900145e0143Sdh142964 void
pmcs_start_dev_state_recovery(pmcs_xscsi_t * xp,pmcs_phy_t * phyp)901145e0143Sdh142964 pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp)
902145e0143Sdh142964 {
903145e0143Sdh142964 ASSERT(mutex_owned(&xp->statlock));
904145e0143Sdh142964 ASSERT(xp->pwp != NULL);
905145e0143Sdh142964
906145e0143Sdh142964 if (xp->recover_wait == 0) {
907145e0143Sdh142964 pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp,
908145e0143Sdh142964 "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)",
909145e0143Sdh142964 __func__, (void *)xp, (void *)phyp, phyp->path);
910145e0143Sdh142964 xp->recover_wait = 1;
911145e0143Sdh142964
912145e0143Sdh142964 /*
913145e0143Sdh142964 * Rather than waiting for the watchdog timer, we'll
914145e0143Sdh142964 * kick it right now.
915145e0143Sdh142964 */
916145e0143Sdh142964 SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY);
917145e0143Sdh142964 (void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp,
918145e0143Sdh142964 DDI_NOSLEEP);
919145e0143Sdh142964 }
920145e0143Sdh142964 }
921145e0143Sdh142964
922145e0143Sdh142964 /*
923145e0143Sdh142964 * Increment the phy ds error retry count.
924145e0143Sdh142964 * If too many retries, mark phy dead and restart discovery;
925145e0143Sdh142964 * otherwise schedule ds recovery.
926145e0143Sdh142964 */
927145e0143Sdh142964 static void
pmcs_handle_ds_recovery_error(pmcs_phy_t * phyp,pmcs_xscsi_t * tgt,pmcs_hw_t * pwp,const char * func_name,char * reason_string)928145e0143Sdh142964 pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
929601c90f1SSrikanth, Ramana pmcs_hw_t *pwp, const char *func_name, char *reason_string)
930145e0143Sdh142964 {
931145e0143Sdh142964 ASSERT(mutex_owned(&phyp->phy_lock));
932145e0143Sdh142964 ASSERT((tgt == NULL) || mutex_owned(&tgt->statlock));
933145e0143Sdh142964
934145e0143Sdh142964 phyp->ds_recovery_retries++;
935145e0143Sdh142964
936145e0143Sdh142964 if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) {
937145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt,
938145e0143Sdh142964 "%s: retry limit reached after %s to PHY %s failed",
939145e0143Sdh142964 func_name, reason_string, phyp->path);
940145e0143Sdh142964 if (tgt != NULL) {
941145e0143Sdh142964 tgt->recover_wait = 0;
942145e0143Sdh142964 }
943601c90f1SSrikanth, Ramana /*
944601c90f1SSrikanth, Ramana * Mark the PHY as dead and it and its parent as changed,
945601c90f1SSrikanth, Ramana * then restart discovery
946601c90f1SSrikanth, Ramana */
947145e0143Sdh142964 phyp->dead = 1;
948601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp);
949601c90f1SSrikanth, Ramana if (phyp->parent)
950601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp->parent);
951145e0143Sdh142964 RESTART_DISCOVERY(pwp);
952145e0143Sdh142964 } else if ((phyp->ds_prev_good_recoveries >
953145e0143Sdh142964 PMCS_MAX_DS_RECOVERY_RETRIES) &&
954145e0143Sdh142964 (phyp->last_good_recovery + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)
955145e0143Sdh142964 < ddi_get_lbolt())) {
956145e0143Sdh142964 pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, "%s: max number of "
957145e0143Sdh142964 "successful recoveries reached, declaring PHY %s dead",
958145e0143Sdh142964 __func__, phyp->path);
959145e0143Sdh142964 if (tgt != NULL) {
960145e0143Sdh142964 tgt->recover_wait = 0;
961145e0143Sdh142964 }
962601c90f1SSrikanth, Ramana /*
963601c90f1SSrikanth, Ramana * Mark the PHY as dead and its parent as changed,
964601c90f1SSrikanth, Ramana * then restart discovery
965601c90f1SSrikanth, Ramana */
966145e0143Sdh142964 phyp->dead = 1;
967601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp);
968601c90f1SSrikanth, Ramana if (phyp->parent)
969601c90f1SSrikanth, Ramana PHY_CHANGED(pwp, phyp->parent);
970145e0143Sdh142964 RESTART_DISCOVERY(pwp);
971145e0143Sdh142964 } else {
972145e0143Sdh142964 SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
973145e0143Sdh142964 }
974145e0143Sdh142964 }
975