1 /* $OpenBSD: mpath.c,v 1.53 2020/07/22 13:16:05 krw Exp $ */ 2 3 /* 4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/malloc.h> 23 #include <sys/device.h> 24 #include <sys/conf.h> 25 #include <sys/queue.h> 26 #include <sys/rwlock.h> 27 #include <sys/ioctl.h> 28 #include <sys/poll.h> 29 #include <sys/selinfo.h> 30 31 #include <scsi/scsi_all.h> 32 #include <scsi/scsiconf.h> 33 #include <scsi/mpathvar.h> 34 35 #define MPATH_BUSWIDTH 256 36 37 int mpath_match(struct device *, void *, void *); 38 void mpath_attach(struct device *, struct device *, void *); 39 void mpath_shutdown(void *); 40 41 TAILQ_HEAD(mpath_paths, mpath_path); 42 43 struct mpath_group { 44 TAILQ_ENTRY(mpath_group) g_entry; 45 struct mpath_paths g_paths; 46 struct mpath_dev *g_dev; 47 u_int g_id; 48 }; 49 TAILQ_HEAD(mpath_groups, mpath_group); 50 51 struct mpath_dev { 52 struct mutex d_mtx; 53 54 struct scsi_xfer_list d_xfers; 55 struct mpath_path *d_next_path; 56 57 struct mpath_groups d_groups; 58 59 struct mpath_group *d_failover_iter; 60 struct timeout d_failover_tmo; 61 u_int d_failover; 62 63 const struct mpath_ops *d_ops; 64 struct devid *d_id; 65 }; 66 67 struct mpath_softc { 68 struct device sc_dev; 69 struct scsibus_softc *sc_scsibus; 70 struct mpath_dev *sc_devs[MPATH_BUSWIDTH]; 71 }; 72 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 73 74 struct mpath_softc *mpath; 75 76 struct cfattach mpath_ca = { 77 sizeof(struct mpath_softc), 78 mpath_match, 79 mpath_attach 80 }; 81 82 struct cfdriver mpath_cd = { 83 NULL, 84 "mpath", 85 DV_DULL 86 }; 87 88 void mpath_cmd(struct scsi_xfer *); 89 void mpath_minphys(struct buf *, struct scsi_link *); 90 int mpath_probe(struct scsi_link *); 91 92 struct mpath_path *mpath_next_path(struct mpath_dev *); 93 void mpath_done(struct scsi_xfer *); 94 95 void mpath_failover(struct mpath_dev *); 96 void mpath_failover_start(void *); 97 void mpath_failover_check(struct mpath_dev *); 98 99 struct scsi_adapter mpath_switch = { 100 mpath_cmd, NULL, mpath_probe, NULL, NULL 101 }; 102 103 void mpath_xs_stuffup(struct scsi_xfer *); 104 105 int 106 mpath_match(struct device *parent, void *match, void *aux) 107 { 108 return (1); 109 } 110 111 void 112 mpath_attach(struct device *parent, struct device *self, void *aux) 113 { 114 struct mpath_softc *sc = (struct mpath_softc *)self; 115 struct scsibus_attach_args saa; 116 117 mpath = sc; 118 119 printf("\n"); 120 121 saa.saa_adapter = &mpath_switch; 122 saa.saa_adapter_softc = sc; 123 saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET; 124 saa.saa_adapter_buswidth = MPATH_BUSWIDTH; 125 saa.saa_luns = 1; 126 saa.saa_openings = 1024; /* XXX magical */ 127 saa.saa_pool = NULL; 128 saa.saa_quirks = saa.saa_flags = 0; 129 saa.saa_wwpn = saa.saa_wwnn = 0; 130 131 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 132 &saa, scsiprint); 133 } 134 135 void 136 mpath_xs_stuffup(struct scsi_xfer *xs) 137 { 138 xs->error = XS_DRIVER_STUFFUP; 139 scsi_done(xs); 140 } 141 142 int 143 mpath_probe(struct scsi_link *link) 144 { 145 struct mpath_softc *sc = link->bus->sb_adapter_softc; 146 struct mpath_dev *d = sc->sc_devs[link->target]; 147 148 if (link->lun != 0 || d == NULL) 149 return (ENXIO); 150 151 link->id = devid_copy(d->d_id); 152 153 return (0); 154 } 155 156 struct mpath_path * 157 mpath_next_path(struct mpath_dev *d) 158 { 159 struct mpath_group *g; 160 struct mpath_path *p; 161 162 #ifdef DIAGNOSTIC 163 if (d == NULL) 164 panic("%s: d is NULL", __func__); 165 #endif /* DIAGNOSTIC */ 166 167 p = d->d_next_path; 168 if (p != NULL) { 169 d->d_next_path = TAILQ_NEXT(p, p_entry); 170 if (d->d_next_path == NULL && 171 (g = TAILQ_FIRST(&d->d_groups)) != NULL) 172 d->d_next_path = TAILQ_FIRST(&g->g_paths); 173 } 174 175 return (p); 176 } 177 178 void 179 mpath_cmd(struct scsi_xfer *xs) 180 { 181 struct scsi_link *link = xs->sc_link; 182 struct mpath_softc *sc = link->bus->sb_adapter_softc; 183 struct mpath_dev *d = sc->sc_devs[link->target]; 184 struct mpath_path *p; 185 struct scsi_xfer *mxs; 186 187 #ifdef DIAGNOSTIC 188 if (d == NULL) 189 panic("mpath_cmd issued against nonexistent device"); 190 #endif /* DIAGNOSTIC */ 191 192 if (ISSET(xs->flags, SCSI_POLL)) { 193 mtx_enter(&d->d_mtx); 194 p = mpath_next_path(d); 195 mtx_leave(&d->d_mtx); 196 if (p == NULL) { 197 mpath_xs_stuffup(xs); 198 return; 199 } 200 201 mxs = scsi_xs_get(p->p_link, xs->flags); 202 if (mxs == NULL) { 203 mpath_xs_stuffup(xs); 204 return; 205 } 206 207 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 208 mxs->cmdlen = xs->cmdlen; 209 mxs->data = xs->data; 210 mxs->datalen = xs->datalen; 211 mxs->retries = xs->retries; 212 mxs->timeout = xs->timeout; 213 mxs->bp = xs->bp; 214 215 scsi_xs_sync(mxs); 216 217 xs->error = mxs->error; 218 xs->status = mxs->status; 219 xs->resid = mxs->resid; 220 221 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 222 223 scsi_xs_put(mxs); 224 scsi_done(xs); 225 return; 226 } 227 228 mtx_enter(&d->d_mtx); 229 SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list); 230 p = mpath_next_path(d); 231 mtx_leave(&d->d_mtx); 232 233 if (p != NULL) 234 scsi_xsh_add(&p->p_xsh); 235 } 236 237 void 238 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs) 239 { 240 struct mpath_dev *d = p->p_group->g_dev; 241 struct scsi_xfer *xs; 242 int addxsh = 0; 243 244 if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL) 245 goto fail; 246 247 mtx_enter(&d->d_mtx); 248 xs = SIMPLEQ_FIRST(&d->d_xfers); 249 if (xs != NULL) { 250 SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list); 251 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 252 addxsh = 1; 253 } 254 mtx_leave(&d->d_mtx); 255 256 if (xs == NULL) 257 goto fail; 258 259 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 260 mxs->cmdlen = xs->cmdlen; 261 mxs->data = xs->data; 262 mxs->datalen = xs->datalen; 263 mxs->retries = xs->retries; 264 mxs->timeout = xs->timeout; 265 mxs->bp = xs->bp; 266 mxs->flags = xs->flags; 267 268 mxs->cookie = xs; 269 mxs->done = mpath_done; 270 271 scsi_xs_exec(mxs); 272 273 if (addxsh) 274 scsi_xsh_add(&p->p_xsh); 275 276 return; 277 fail: 278 scsi_xs_put(mxs); 279 } 280 281 void 282 mpath_done(struct scsi_xfer *mxs) 283 { 284 struct scsi_xfer *xs = mxs->cookie; 285 struct scsi_link *link = xs->sc_link; 286 struct mpath_softc *sc = link->bus->sb_adapter_softc; 287 struct mpath_dev *d = sc->sc_devs[link->target]; 288 struct mpath_path *p; 289 290 switch (mxs->error) { 291 case XS_SELTIMEOUT: /* physical path is gone, try the next */ 292 case XS_RESET: 293 mtx_enter(&d->d_mtx); 294 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 295 p = mpath_next_path(d); 296 mtx_leave(&d->d_mtx); 297 298 scsi_xs_put(mxs); 299 300 if (p != NULL) 301 scsi_xsh_add(&p->p_xsh); 302 return; 303 case XS_SENSE: 304 switch (d->d_ops->op_checksense(mxs)) { 305 case MPATH_SENSE_FAILOVER: 306 mtx_enter(&d->d_mtx); 307 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 308 p = mpath_next_path(d); 309 mtx_leave(&d->d_mtx); 310 311 scsi_xs_put(mxs); 312 313 mpath_failover(d); 314 return; 315 case MPATH_SENSE_DECLINED: 316 break; 317 #ifdef DIAGNOSTIC 318 default: 319 panic("unexpected return from checksense"); 320 #endif /* DIAGNOSTIC */ 321 } 322 break; 323 } 324 325 xs->error = mxs->error; 326 xs->status = mxs->status; 327 xs->resid = mxs->resid; 328 329 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 330 331 scsi_xs_put(mxs); 332 333 scsi_done(xs); 334 } 335 336 void 337 mpath_failover(struct mpath_dev *d) 338 { 339 if (!scsi_pending_start(&d->d_mtx, &d->d_failover)) 340 return; 341 342 mpath_failover_start(d); 343 } 344 345 void 346 mpath_failover_start(void *xd) 347 { 348 struct mpath_dev *d = xd; 349 350 mtx_enter(&d->d_mtx); 351 d->d_failover_iter = TAILQ_FIRST(&d->d_groups); 352 mtx_leave(&d->d_mtx); 353 354 mpath_failover_check(d); 355 } 356 357 void 358 mpath_failover_check(struct mpath_dev *d) 359 { 360 struct mpath_group *g = d->d_failover_iter; 361 struct mpath_path *p; 362 363 if (g == NULL) 364 timeout_add_sec(&d->d_failover_tmo, 1); 365 else { 366 p = TAILQ_FIRST(&g->g_paths); 367 d->d_ops->op_status(p->p_link); 368 } 369 } 370 371 void 372 mpath_path_status(struct mpath_path *p, int status) 373 { 374 struct mpath_group *g = p->p_group; 375 struct mpath_dev *d = g->g_dev; 376 377 mtx_enter(&d->d_mtx); 378 if (status == MPATH_S_ACTIVE) { 379 TAILQ_REMOVE(&d->d_groups, g, g_entry); 380 TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry); 381 d->d_next_path = p; 382 } else 383 d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry); 384 mtx_leave(&d->d_mtx); 385 386 if (status == MPATH_S_ACTIVE) { 387 scsi_xsh_add(&p->p_xsh); 388 if (!scsi_pending_finish(&d->d_mtx, &d->d_failover)) 389 mpath_failover_start(d); 390 } else 391 mpath_failover_check(d); 392 } 393 394 void 395 mpath_minphys(struct buf *bp, struct scsi_link *link) 396 { 397 struct mpath_softc *sc = link->bus->sb_adapter_softc; 398 struct mpath_dev *d = sc->sc_devs[link->target]; 399 struct mpath_group *g; 400 struct mpath_path *p; 401 402 #ifdef DIAGNOSTIC 403 if (d == NULL) 404 panic("mpath_minphys against nonexistent device"); 405 #endif /* DIAGNOSTIC */ 406 407 mtx_enter(&d->d_mtx); 408 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 409 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 410 /* XXX crossing layers with mutex held */ 411 if (p->p_link->bus->sb_adapter->dev_minphys != NULL) 412 p->p_link->bus->sb_adapter->dev_minphys(bp, 413 p->p_link); 414 } 415 } 416 mtx_leave(&d->d_mtx); 417 } 418 419 int 420 mpath_path_probe(struct scsi_link *link) 421 { 422 if (mpath == NULL) 423 return (ENXIO); 424 425 if (link->id == NULL) 426 return (EINVAL); 427 428 if (ISSET(link->flags, SDEV_UMASS)) 429 return (EINVAL); 430 431 if (mpath == link->bus->sb_adapter_softc) 432 return (ENXIO); 433 434 return (0); 435 } 436 437 int 438 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops) 439 { 440 struct mpath_softc *sc = mpath; 441 struct scsi_link *link = p->p_link; 442 struct mpath_dev *d = NULL; 443 struct mpath_group *g; 444 int newdev = 0, addxsh = 0; 445 int target; 446 447 #ifdef DIAGNOSTIC 448 if (p->p_link == NULL) 449 panic("mpath_path_attach: NULL link"); 450 if (p->p_group != NULL) 451 panic("mpath_path_attach: group is not NULL"); 452 #endif /* DIAGNOSTIC */ 453 454 for (target = 0; target < MPATH_BUSWIDTH; target++) { 455 if ((d = sc->sc_devs[target]) == NULL) 456 continue; 457 458 if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops) 459 break; 460 461 d = NULL; 462 } 463 464 if (d == NULL) { 465 for (target = 0; target < MPATH_BUSWIDTH; target++) { 466 if (sc->sc_devs[target] == NULL) 467 break; 468 } 469 if (target >= MPATH_BUSWIDTH) 470 return (ENXIO); 471 472 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO); 473 if (d == NULL) 474 return (ENOMEM); 475 476 mtx_init(&d->d_mtx, IPL_BIO); 477 TAILQ_INIT(&d->d_groups); 478 SIMPLEQ_INIT(&d->d_xfers); 479 d->d_id = devid_copy(link->id); 480 d->d_ops = ops; 481 482 timeout_set(&d->d_failover_tmo, mpath_failover_start, d); 483 484 sc->sc_devs[target] = d; 485 newdev = 1; 486 } else { 487 /* 488 * instead of carrying identical values in different devid 489 * instances, delete the new one and reference the old one in 490 * the new scsi_link. 491 */ 492 devid_free(link->id); 493 link->id = devid_copy(d->d_id); 494 } 495 496 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 497 if (g->g_id == g_id) 498 break; 499 } 500 501 if (g == NULL) { 502 g = malloc(sizeof(*g), M_DEVBUF, 503 M_WAITOK | M_CANFAIL | M_ZERO); 504 if (g == NULL) { 505 if (newdev) { 506 free(d, M_DEVBUF, sizeof(*d)); 507 sc->sc_devs[target] = NULL; 508 } 509 510 return (ENOMEM); 511 } 512 513 TAILQ_INIT(&g->g_paths); 514 g->g_dev = d; 515 g->g_id = g_id; 516 517 mtx_enter(&d->d_mtx); 518 TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry); 519 mtx_leave(&d->d_mtx); 520 } 521 522 p->p_group = g; 523 524 mtx_enter(&d->d_mtx); 525 TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry); 526 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 527 addxsh = 1; 528 529 if (d->d_next_path == NULL) 530 d->d_next_path = p; 531 mtx_leave(&d->d_mtx); 532 533 if (newdev) 534 scsi_probe_target(mpath->sc_scsibus, target); 535 else if (addxsh) 536 scsi_xsh_add(&p->p_xsh); 537 538 return (0); 539 } 540 541 int 542 mpath_path_detach(struct mpath_path *p) 543 { 544 struct mpath_group *g = p->p_group; 545 struct mpath_dev *d; 546 struct mpath_path *np = NULL; 547 548 #ifdef DIAGNOSTIC 549 if (g == NULL) 550 panic("mpath: detaching a path from a nonexistent bus"); 551 #endif /* DIAGNOSTIC */ 552 d = g->g_dev; 553 p->p_group = NULL; 554 555 mtx_enter(&d->d_mtx); 556 TAILQ_REMOVE(&g->g_paths, p, p_entry); 557 if (d->d_next_path == p) 558 d->d_next_path = TAILQ_FIRST(&g->g_paths); 559 560 if (TAILQ_EMPTY(&g->g_paths)) 561 TAILQ_REMOVE(&d->d_groups, g, g_entry); 562 else 563 g = NULL; 564 565 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 566 np = d->d_next_path; 567 mtx_leave(&d->d_mtx); 568 569 if (g != NULL) 570 free(g, M_DEVBUF, sizeof(*g)); 571 572 scsi_xsh_del(&p->p_xsh); 573 574 if (np == NULL) 575 mpath_failover(d); 576 else 577 scsi_xsh_add(&np->p_xsh); 578 579 return (0); 580 } 581 582 struct device * 583 mpath_bootdv(struct device *dev) 584 { 585 struct mpath_softc *sc = mpath; 586 struct mpath_dev *d; 587 struct mpath_group *g; 588 struct mpath_path *p; 589 int target; 590 591 if (sc == NULL) 592 return (dev); 593 594 for (target = 0; target < MPATH_BUSWIDTH; target++) { 595 if ((d = sc->sc_devs[target]) == NULL) 596 continue; 597 598 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 599 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 600 if (p->p_link->device_softc == dev) { 601 return (scsi_get_link(mpath->sc_scsibus, 602 target, 0)->device_softc); 603 } 604 } 605 } 606 } 607 608 return (dev); 609 } 610