1 /* $OpenBSD: mpath.c,v 1.39 2015/03/14 03:38:52 jsg Exp $ */ 2 3 /* 4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/malloc.h> 23 #include <sys/device.h> 24 #include <sys/conf.h> 25 #include <sys/queue.h> 26 #include <sys/rwlock.h> 27 #include <sys/ioctl.h> 28 #include <sys/poll.h> 29 #include <sys/selinfo.h> 30 31 #include <scsi/scsi_all.h> 32 #include <scsi/scsiconf.h> 33 #include <scsi/mpathvar.h> 34 35 #define MPATH_BUSWIDTH 256 36 37 int mpath_match(struct device *, void *, void *); 38 void mpath_attach(struct device *, struct device *, void *); 39 void mpath_shutdown(void *); 40 41 TAILQ_HEAD(mpath_paths, mpath_path); 42 43 struct mpath_group { 44 TAILQ_ENTRY(mpath_group) g_entry; 45 struct mpath_paths g_paths; 46 struct mpath_dev *g_dev; 47 u_int g_id; 48 }; 49 TAILQ_HEAD(mpath_groups, mpath_group); 50 51 struct mpath_dev { 52 struct mutex d_mtx; 53 54 struct scsi_xfer_list d_xfers; 55 struct mpath_path *d_next_path; 56 57 struct mpath_groups d_groups; 58 59 struct mpath_group *d_failover_iter; 60 struct timeout d_failover_tmo; 61 u_int d_failover; 62 63 const struct mpath_ops *d_ops; 64 struct devid *d_id; 65 }; 66 67 struct mpath_softc { 68 struct device sc_dev; 69 struct scsi_link sc_link; 70 struct scsibus_softc *sc_scsibus; 71 struct mpath_dev *sc_devs[MPATH_BUSWIDTH]; 72 }; 73 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 74 75 struct mpath_softc *mpath; 76 77 struct cfattach mpath_ca = { 78 sizeof(struct mpath_softc), 79 mpath_match, 80 mpath_attach 81 }; 82 83 struct cfdriver mpath_cd = { 84 NULL, 85 "mpath", 86 DV_DULL 87 }; 88 89 void mpath_cmd(struct scsi_xfer *); 90 void mpath_minphys(struct buf *, struct scsi_link *); 91 int mpath_probe(struct scsi_link *); 92 93 struct mpath_path *mpath_next_path(struct mpath_dev *); 94 void mpath_done(struct scsi_xfer *); 95 96 void mpath_failover(struct mpath_dev *); 97 void mpath_failover_start(void *); 98 void mpath_failover_check(struct mpath_dev *); 99 100 struct scsi_adapter mpath_switch = { 101 mpath_cmd, 102 scsi_minphys, 103 mpath_probe 104 }; 105 106 void mpath_xs_stuffup(struct scsi_xfer *); 107 108 int 109 mpath_match(struct device *parent, void *match, void *aux) 110 { 111 return (1); 112 } 113 114 void 115 mpath_attach(struct device *parent, struct device *self, void *aux) 116 { 117 struct mpath_softc *sc = (struct mpath_softc *)self; 118 struct scsibus_attach_args saa; 119 120 mpath = sc; 121 122 printf("\n"); 123 124 sc->sc_link.adapter = &mpath_switch; 125 sc->sc_link.adapter_softc = sc; 126 sc->sc_link.adapter_target = MPATH_BUSWIDTH; 127 sc->sc_link.adapter_buswidth = MPATH_BUSWIDTH; 128 sc->sc_link.luns = 1; 129 sc->sc_link.openings = 1024; /* XXX magical */ 130 131 bzero(&saa, sizeof(saa)); 132 saa.saa_sc_link = &sc->sc_link; 133 134 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 135 &saa, scsiprint); 136 } 137 138 void 139 mpath_xs_stuffup(struct scsi_xfer *xs) 140 { 141 xs->error = XS_DRIVER_STUFFUP; 142 scsi_done(xs); 143 } 144 145 int 146 mpath_probe(struct scsi_link *link) 147 { 148 struct mpath_softc *sc = link->adapter_softc; 149 struct mpath_dev *d = sc->sc_devs[link->target]; 150 151 if (link->lun != 0 || d == NULL) 152 return (ENXIO); 153 154 link->id = devid_copy(d->d_id); 155 156 return (0); 157 } 158 159 struct mpath_path * 160 mpath_next_path(struct mpath_dev *d) 161 { 162 struct mpath_group *g; 163 struct mpath_path *p; 164 165 #ifdef DIAGNOSTIC 166 if (d == NULL) 167 panic("%s: d is NULL", __func__); 168 #endif 169 170 p = d->d_next_path; 171 if (p != NULL) { 172 d->d_next_path = TAILQ_NEXT(p, p_entry); 173 if (d->d_next_path == NULL && 174 (g = TAILQ_FIRST(&d->d_groups)) != NULL) 175 d->d_next_path = TAILQ_FIRST(&g->g_paths); 176 } 177 178 return (p); 179 } 180 181 void 182 mpath_cmd(struct scsi_xfer *xs) 183 { 184 struct scsi_link *link = xs->sc_link; 185 struct mpath_softc *sc = link->adapter_softc; 186 struct mpath_dev *d = sc->sc_devs[link->target]; 187 struct mpath_path *p; 188 struct scsi_xfer *mxs; 189 190 #ifdef DIAGNOSTIC 191 if (d == NULL) 192 panic("mpath_cmd issued against nonexistant device"); 193 #endif 194 195 if (ISSET(xs->flags, SCSI_POLL)) { 196 mtx_enter(&d->d_mtx); 197 p = mpath_next_path(d); 198 mtx_leave(&d->d_mtx); 199 if (p == NULL) { 200 mpath_xs_stuffup(xs); 201 return; 202 } 203 204 mxs = scsi_xs_get(p->p_link, xs->flags); 205 if (mxs == NULL) { 206 mpath_xs_stuffup(xs); 207 return; 208 } 209 210 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 211 mxs->cmdlen = xs->cmdlen; 212 mxs->data = xs->data; 213 mxs->datalen = xs->datalen; 214 mxs->retries = xs->retries; 215 mxs->timeout = xs->timeout; 216 mxs->bp = xs->bp; 217 218 scsi_xs_sync(mxs); 219 220 xs->error = mxs->error; 221 xs->status = mxs->status; 222 xs->resid = mxs->resid; 223 224 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 225 226 scsi_xs_put(mxs); 227 scsi_done(xs); 228 return; 229 } 230 231 mtx_enter(&d->d_mtx); 232 SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list); 233 p = mpath_next_path(d); 234 mtx_leave(&d->d_mtx); 235 236 if (p != NULL) 237 scsi_xsh_add(&p->p_xsh); 238 } 239 240 void 241 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs) 242 { 243 struct mpath_dev *d = p->p_group->g_dev; 244 struct scsi_xfer *xs; 245 int addxsh = 0; 246 247 if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL) 248 goto fail; 249 250 mtx_enter(&d->d_mtx); 251 xs = SIMPLEQ_FIRST(&d->d_xfers); 252 if (xs != NULL) { 253 SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list); 254 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 255 addxsh = 1; 256 } 257 mtx_leave(&d->d_mtx); 258 259 if (xs == NULL) 260 goto fail; 261 262 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 263 mxs->cmdlen = xs->cmdlen; 264 mxs->data = xs->data; 265 mxs->datalen = xs->datalen; 266 mxs->retries = xs->retries; 267 mxs->timeout = xs->timeout; 268 mxs->bp = xs->bp; 269 mxs->flags = xs->flags; 270 271 mxs->cookie = xs; 272 mxs->done = mpath_done; 273 274 scsi_xs_exec(mxs); 275 276 if (addxsh) 277 scsi_xsh_add(&p->p_xsh); 278 279 return; 280 fail: 281 scsi_xs_put(mxs); 282 } 283 284 void 285 mpath_done(struct scsi_xfer *mxs) 286 { 287 struct scsi_xfer *xs = mxs->cookie; 288 struct scsi_link *link = xs->sc_link; 289 struct mpath_softc *sc = link->adapter_softc; 290 struct mpath_dev *d = sc->sc_devs[link->target]; 291 struct mpath_path *p; 292 293 switch (mxs->error) { 294 case XS_SELTIMEOUT: /* physical path is gone, try the next */ 295 case XS_RESET: 296 mtx_enter(&d->d_mtx); 297 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 298 p = mpath_next_path(d); 299 mtx_leave(&d->d_mtx); 300 301 scsi_xs_put(mxs); 302 303 if (p != NULL) 304 scsi_xsh_add(&p->p_xsh); 305 return; 306 case XS_SENSE: 307 switch (d->d_ops->op_checksense(mxs)) { 308 case MPATH_SENSE_FAILOVER: 309 mtx_enter(&d->d_mtx); 310 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 311 p = mpath_next_path(d); 312 mtx_leave(&d->d_mtx); 313 314 scsi_xs_put(mxs); 315 316 mpath_failover(d); 317 return; 318 case MPATH_SENSE_DECLINED: 319 break; 320 #ifdef DIAGNOSTIC 321 default: 322 panic("unexpected return from checksense"); 323 #endif 324 } 325 break; 326 } 327 328 xs->error = mxs->error; 329 xs->status = mxs->status; 330 xs->resid = mxs->resid; 331 332 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 333 334 scsi_xs_put(mxs); 335 336 scsi_done(xs); 337 } 338 339 void 340 mpath_failover(struct mpath_dev *d) 341 { 342 if (!scsi_pending_start(&d->d_mtx, &d->d_failover)) 343 return; 344 345 mpath_failover_start(d); 346 } 347 348 void 349 mpath_failover_start(void *xd) 350 { 351 struct mpath_dev *d = xd; 352 353 mtx_enter(&d->d_mtx); 354 d->d_failover_iter = TAILQ_FIRST(&d->d_groups); 355 mtx_leave(&d->d_mtx); 356 357 mpath_failover_check(d); 358 } 359 360 void 361 mpath_failover_check(struct mpath_dev *d) 362 { 363 struct mpath_group *g = d->d_failover_iter; 364 struct mpath_path *p; 365 366 if (g == NULL) 367 timeout_add_sec(&d->d_failover_tmo, 1); 368 else { 369 p = TAILQ_FIRST(&g->g_paths); 370 d->d_ops->op_status(p->p_link); 371 } 372 } 373 374 void 375 mpath_path_status(struct mpath_path *p, int status) 376 { 377 struct mpath_group *g = p->p_group; 378 struct mpath_dev *d = g->g_dev; 379 380 mtx_enter(&d->d_mtx); 381 if (status == MPATH_S_ACTIVE) { 382 TAILQ_REMOVE(&d->d_groups, g, g_entry); 383 TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry); 384 d->d_next_path = p; 385 } else 386 d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry); 387 mtx_leave(&d->d_mtx); 388 389 if (status == MPATH_S_ACTIVE) { 390 scsi_xsh_add(&p->p_xsh); 391 if (!scsi_pending_finish(&d->d_mtx, &d->d_failover)) 392 mpath_failover_start(d); 393 } else 394 mpath_failover_check(d); 395 } 396 397 void 398 mpath_minphys(struct buf *bp, struct scsi_link *link) 399 { 400 struct mpath_softc *sc = link->adapter_softc; 401 struct mpath_dev *d = sc->sc_devs[link->target]; 402 struct mpath_group *g; 403 struct mpath_path *p; 404 405 #ifdef DIAGNOSTIC 406 if (d == NULL) 407 panic("mpath_minphys against nonexistant device"); 408 #endif 409 410 mtx_enter(&d->d_mtx); 411 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 412 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 413 /* XXX crossing layers with mutex held */ 414 p->p_link->adapter->scsi_minphys(bp, p->p_link); 415 } 416 } 417 mtx_leave(&d->d_mtx); 418 } 419 420 int 421 mpath_path_probe(struct scsi_link *link) 422 { 423 if (mpath == NULL) 424 return (ENXIO); 425 426 if (link->id == NULL) 427 return (EINVAL); 428 429 if (ISSET(link->flags, SDEV_UMASS)) 430 return (EINVAL); 431 432 if (mpath == link->adapter_softc) 433 return (ENXIO); 434 435 return (0); 436 } 437 438 int 439 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops) 440 { 441 struct mpath_softc *sc = mpath; 442 struct scsi_link *link = p->p_link; 443 struct mpath_dev *d = NULL; 444 struct mpath_group *g; 445 int newdev = 0, addxsh = 0; 446 int target; 447 448 #ifdef DIAGNOSTIC 449 if (p->p_link == NULL) 450 panic("mpath_path_attach: NULL link"); 451 if (p->p_group != NULL) 452 panic("mpath_path_attach: group is not NULL"); 453 #endif 454 455 for (target = 0; target < MPATH_BUSWIDTH; target++) { 456 if ((d = sc->sc_devs[target]) == NULL) 457 continue; 458 459 if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops) 460 break; 461 462 d = NULL; 463 } 464 465 if (d == NULL) { 466 for (target = 0; target < MPATH_BUSWIDTH; target++) { 467 if (sc->sc_devs[target] == NULL) 468 break; 469 } 470 if (target >= MPATH_BUSWIDTH) 471 return (ENXIO); 472 473 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO); 474 if (d == NULL) 475 return (ENOMEM); 476 477 mtx_init(&d->d_mtx, IPL_BIO); 478 TAILQ_INIT(&d->d_groups); 479 SIMPLEQ_INIT(&d->d_xfers); 480 d->d_id = devid_copy(link->id); 481 d->d_ops = ops; 482 483 timeout_set(&d->d_failover_tmo, mpath_failover_start, d); 484 485 sc->sc_devs[target] = d; 486 newdev = 1; 487 } else { 488 /* 489 * instead of carrying identical values in different devid 490 * instances, delete the new one and reference the old one in 491 * the new scsi_link. 492 */ 493 devid_free(link->id); 494 link->id = devid_copy(d->d_id); 495 } 496 497 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 498 if (g->g_id == g_id) 499 break; 500 } 501 502 if (g == NULL) { 503 g = malloc(sizeof(*g), M_DEVBUF, 504 M_WAITOK | M_CANFAIL | M_ZERO); 505 if (g == NULL) { 506 if (newdev) { 507 free(d, M_DEVBUF, 0); 508 sc->sc_devs[target] = NULL; 509 } 510 511 return (ENOMEM); 512 } 513 514 TAILQ_INIT(&g->g_paths); 515 g->g_dev = d; 516 g->g_id = g_id; 517 518 mtx_enter(&d->d_mtx); 519 TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry); 520 mtx_leave(&d->d_mtx); 521 } 522 523 p->p_group = g; 524 525 mtx_enter(&d->d_mtx); 526 TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry); 527 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 528 addxsh = 1; 529 530 if (d->d_next_path == NULL) 531 d->d_next_path = p; 532 mtx_leave(&d->d_mtx); 533 534 if (newdev) 535 scsi_probe_target(mpath->sc_scsibus, target); 536 else if (addxsh) 537 scsi_xsh_add(&p->p_xsh); 538 539 return (0); 540 } 541 542 int 543 mpath_path_detach(struct mpath_path *p) 544 { 545 struct mpath_group *g = p->p_group; 546 struct mpath_dev *d; 547 struct mpath_path *np = NULL; 548 549 #ifdef DIAGNOSTIC 550 if (g == NULL) 551 panic("mpath: detaching a path from a nonexistant bus"); 552 #endif 553 d = g->g_dev; 554 p->p_group = NULL; 555 556 mtx_enter(&d->d_mtx); 557 TAILQ_REMOVE(&g->g_paths, p, p_entry); 558 if (d->d_next_path == p) 559 d->d_next_path = TAILQ_FIRST(&g->g_paths); 560 561 if (TAILQ_EMPTY(&g->g_paths)) 562 TAILQ_REMOVE(&d->d_groups, g, g_entry); 563 else 564 g = NULL; 565 566 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 567 np = d->d_next_path; 568 mtx_leave(&d->d_mtx); 569 570 if (g != NULL) 571 free(g, M_DEVBUF, 0); 572 573 scsi_xsh_del(&p->p_xsh); 574 575 if (np == NULL) 576 mpath_failover(d); 577 else 578 scsi_xsh_add(&np->p_xsh); 579 580 return (0); 581 } 582 583 struct device * 584 mpath_bootdv(struct device *dev) 585 { 586 struct mpath_softc *sc = mpath; 587 struct mpath_dev *d; 588 struct mpath_group *g; 589 struct mpath_path *p; 590 int target; 591 592 if (sc == NULL) 593 return (dev); 594 595 for (target = 0; target < MPATH_BUSWIDTH; target++) { 596 if ((d = sc->sc_devs[target]) == NULL) 597 continue; 598 599 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 600 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 601 if (p->p_link->device_softc == dev) { 602 return (scsi_get_link(mpath->sc_scsibus, 603 target, 0)->device_softc); 604 } 605 } 606 } 607 } 608 609 return (dev); 610 } 611