1 /* $OpenBSD: mpath.c,v 1.58 2024/05/13 01:15:53 jsg Exp $ */
2
3 /*
4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/device.h>
24 #include <sys/conf.h>
25 #include <sys/queue.h>
26 #include <sys/rwlock.h>
27 #include <sys/ioctl.h>
28
29 #include <scsi/scsi_all.h>
30 #include <scsi/scsiconf.h>
31 #include <scsi/mpathvar.h>
32
33 #define MPATH_BUSWIDTH 256
34
35 int mpath_match(struct device *, void *, void *);
36 void mpath_attach(struct device *, struct device *, void *);
37
38 TAILQ_HEAD(mpath_paths, mpath_path);
39
40 struct mpath_group {
41 TAILQ_ENTRY(mpath_group) g_entry;
42 struct mpath_paths g_paths;
43 struct mpath_dev *g_dev;
44 u_int g_id;
45 };
46 TAILQ_HEAD(mpath_groups, mpath_group);
47
48 struct mpath_dev {
49 struct mutex d_mtx;
50
51 struct scsi_xfer_list d_xfers;
52 struct mpath_path *d_next_path;
53
54 struct mpath_groups d_groups;
55
56 struct mpath_group *d_failover_iter;
57 struct timeout d_failover_tmo;
58 u_int d_failover;
59
60 const struct mpath_ops *d_ops;
61 struct devid *d_id;
62 };
63
64 struct mpath_softc {
65 struct device sc_dev;
66 struct scsibus_softc *sc_scsibus;
67 struct mpath_dev *sc_devs[MPATH_BUSWIDTH];
68 };
69 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
70
71 struct mpath_softc *mpath;
72
73 const struct cfattach mpath_ca = {
74 sizeof(struct mpath_softc),
75 mpath_match,
76 mpath_attach
77 };
78
79 struct cfdriver mpath_cd = {
80 NULL,
81 "mpath",
82 DV_DULL
83 };
84
85 void mpath_cmd(struct scsi_xfer *);
86 void mpath_minphys(struct buf *, struct scsi_link *);
87 int mpath_probe(struct scsi_link *);
88
89 struct mpath_path *mpath_next_path(struct mpath_dev *);
90 void mpath_done(struct scsi_xfer *);
91
92 void mpath_failover(struct mpath_dev *);
93 void mpath_failover_start(void *);
94 void mpath_failover_check(struct mpath_dev *);
95
96 const struct scsi_adapter mpath_switch = {
97 mpath_cmd, NULL, mpath_probe, NULL, NULL
98 };
99
100 void mpath_xs_stuffup(struct scsi_xfer *);
101
102 int
mpath_match(struct device * parent,void * match,void * aux)103 mpath_match(struct device *parent, void *match, void *aux)
104 {
105 return (1);
106 }
107
108 void
mpath_attach(struct device * parent,struct device * self,void * aux)109 mpath_attach(struct device *parent, struct device *self, void *aux)
110 {
111 struct mpath_softc *sc = (struct mpath_softc *)self;
112 struct scsibus_attach_args saa;
113
114 mpath = sc;
115
116 printf("\n");
117
118 saa.saa_adapter = &mpath_switch;
119 saa.saa_adapter_softc = sc;
120 saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET;
121 saa.saa_adapter_buswidth = MPATH_BUSWIDTH;
122 saa.saa_luns = 1;
123 saa.saa_openings = 1024; /* XXX magical */
124 saa.saa_pool = NULL;
125 saa.saa_quirks = saa.saa_flags = 0;
126 saa.saa_wwpn = saa.saa_wwnn = 0;
127
128 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
129 &saa, scsiprint);
130 }
131
132 void
mpath_xs_stuffup(struct scsi_xfer * xs)133 mpath_xs_stuffup(struct scsi_xfer *xs)
134 {
135 xs->error = XS_DRIVER_STUFFUP;
136 scsi_done(xs);
137 }
138
139 int
mpath_probe(struct scsi_link * link)140 mpath_probe(struct scsi_link *link)
141 {
142 struct mpath_softc *sc = link->bus->sb_adapter_softc;
143 struct mpath_dev *d = sc->sc_devs[link->target];
144
145 if (link->lun != 0 || d == NULL)
146 return (ENXIO);
147
148 link->id = devid_copy(d->d_id);
149
150 return (0);
151 }
152
153 struct mpath_path *
mpath_next_path(struct mpath_dev * d)154 mpath_next_path(struct mpath_dev *d)
155 {
156 struct mpath_group *g;
157 struct mpath_path *p;
158
159 #ifdef DIAGNOSTIC
160 if (d == NULL)
161 panic("%s: d is NULL", __func__);
162 #endif /* DIAGNOSTIC */
163
164 p = d->d_next_path;
165 if (p != NULL) {
166 d->d_next_path = TAILQ_NEXT(p, p_entry);
167 if (d->d_next_path == NULL &&
168 (g = TAILQ_FIRST(&d->d_groups)) != NULL)
169 d->d_next_path = TAILQ_FIRST(&g->g_paths);
170 }
171
172 return (p);
173 }
174
175 void
mpath_cmd(struct scsi_xfer * xs)176 mpath_cmd(struct scsi_xfer *xs)
177 {
178 struct scsi_link *link = xs->sc_link;
179 struct mpath_softc *sc = link->bus->sb_adapter_softc;
180 struct mpath_dev *d = sc->sc_devs[link->target];
181 struct mpath_path *p;
182 struct scsi_xfer *mxs;
183
184 #ifdef DIAGNOSTIC
185 if (d == NULL)
186 panic("mpath_cmd issued against nonexistent device");
187 #endif /* DIAGNOSTIC */
188
189 if (ISSET(xs->flags, SCSI_POLL)) {
190 mtx_enter(&d->d_mtx);
191 p = mpath_next_path(d);
192 mtx_leave(&d->d_mtx);
193 if (p == NULL) {
194 mpath_xs_stuffup(xs);
195 return;
196 }
197
198 mxs = scsi_xs_get(p->p_link, xs->flags);
199 if (mxs == NULL) {
200 mpath_xs_stuffup(xs);
201 return;
202 }
203
204 memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
205 mxs->cmdlen = xs->cmdlen;
206 mxs->data = xs->data;
207 mxs->datalen = xs->datalen;
208 mxs->retries = xs->retries;
209 mxs->timeout = xs->timeout;
210 mxs->bp = xs->bp;
211
212 scsi_xs_sync(mxs);
213
214 xs->error = mxs->error;
215 xs->status = mxs->status;
216 xs->resid = mxs->resid;
217
218 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
219
220 scsi_xs_put(mxs);
221 scsi_done(xs);
222 return;
223 }
224
225 mtx_enter(&d->d_mtx);
226 SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
227 p = mpath_next_path(d);
228 mtx_leave(&d->d_mtx);
229
230 if (p != NULL)
231 scsi_xsh_add(&p->p_xsh);
232 }
233
234 void
mpath_start(struct mpath_path * p,struct scsi_xfer * mxs)235 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
236 {
237 struct mpath_dev *d = p->p_group->g_dev;
238 struct scsi_xfer *xs;
239 int addxsh = 0;
240
241 if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
242 goto fail;
243
244 mtx_enter(&d->d_mtx);
245 xs = SIMPLEQ_FIRST(&d->d_xfers);
246 if (xs != NULL) {
247 SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
248 if (!SIMPLEQ_EMPTY(&d->d_xfers))
249 addxsh = 1;
250 }
251 mtx_leave(&d->d_mtx);
252
253 if (xs == NULL)
254 goto fail;
255
256 memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
257 mxs->cmdlen = xs->cmdlen;
258 mxs->data = xs->data;
259 mxs->datalen = xs->datalen;
260 mxs->retries = xs->retries;
261 mxs->timeout = xs->timeout;
262 mxs->bp = xs->bp;
263 mxs->flags = xs->flags;
264
265 mxs->cookie = xs;
266 mxs->done = mpath_done;
267
268 scsi_xs_exec(mxs);
269
270 if (addxsh)
271 scsi_xsh_add(&p->p_xsh);
272
273 return;
274 fail:
275 scsi_xs_put(mxs);
276 }
277
278 void
mpath_done(struct scsi_xfer * mxs)279 mpath_done(struct scsi_xfer *mxs)
280 {
281 struct scsi_xfer *xs = mxs->cookie;
282 struct scsi_link *link = xs->sc_link;
283 struct mpath_softc *sc = link->bus->sb_adapter_softc;
284 struct mpath_dev *d = sc->sc_devs[link->target];
285 struct mpath_path *p;
286
287 switch (mxs->error) {
288 case XS_SELTIMEOUT: /* physical path is gone, try the next */
289 case XS_RESET:
290 mtx_enter(&d->d_mtx);
291 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
292 p = mpath_next_path(d);
293 mtx_leave(&d->d_mtx);
294
295 scsi_xs_put(mxs);
296
297 if (p != NULL)
298 scsi_xsh_add(&p->p_xsh);
299 return;
300 case XS_SENSE:
301 switch (d->d_ops->op_checksense(mxs)) {
302 case MPATH_SENSE_FAILOVER:
303 mtx_enter(&d->d_mtx);
304 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
305 p = mpath_next_path(d);
306 mtx_leave(&d->d_mtx);
307
308 scsi_xs_put(mxs);
309
310 mpath_failover(d);
311 return;
312 case MPATH_SENSE_DECLINED:
313 break;
314 #ifdef DIAGNOSTIC
315 default:
316 panic("unexpected return from checksense");
317 #endif /* DIAGNOSTIC */
318 }
319 break;
320 }
321
322 xs->error = mxs->error;
323 xs->status = mxs->status;
324 xs->resid = mxs->resid;
325
326 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
327
328 scsi_xs_put(mxs);
329
330 scsi_done(xs);
331 }
332
333 void
mpath_failover(struct mpath_dev * d)334 mpath_failover(struct mpath_dev *d)
335 {
336 if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
337 return;
338
339 mpath_failover_start(d);
340 }
341
342 void
mpath_failover_start(void * xd)343 mpath_failover_start(void *xd)
344 {
345 struct mpath_dev *d = xd;
346
347 mtx_enter(&d->d_mtx);
348 d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
349 mtx_leave(&d->d_mtx);
350
351 mpath_failover_check(d);
352 }
353
354 void
mpath_failover_check(struct mpath_dev * d)355 mpath_failover_check(struct mpath_dev *d)
356 {
357 struct mpath_group *g = d->d_failover_iter;
358 struct mpath_path *p;
359
360 if (g == NULL)
361 timeout_add_sec(&d->d_failover_tmo, 1);
362 else {
363 p = TAILQ_FIRST(&g->g_paths);
364 d->d_ops->op_status(p->p_link);
365 }
366 }
367
368 void
mpath_path_status(struct mpath_path * p,int status)369 mpath_path_status(struct mpath_path *p, int status)
370 {
371 struct mpath_group *g = p->p_group;
372 struct mpath_dev *d = g->g_dev;
373
374 mtx_enter(&d->d_mtx);
375 if (status == MPATH_S_ACTIVE) {
376 TAILQ_REMOVE(&d->d_groups, g, g_entry);
377 TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
378 d->d_next_path = p;
379 } else
380 d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
381 mtx_leave(&d->d_mtx);
382
383 if (status == MPATH_S_ACTIVE) {
384 scsi_xsh_add(&p->p_xsh);
385 if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
386 mpath_failover_start(d);
387 } else
388 mpath_failover_check(d);
389 }
390
391 void
mpath_minphys(struct buf * bp,struct scsi_link * link)392 mpath_minphys(struct buf *bp, struct scsi_link *link)
393 {
394 struct mpath_softc *sc = link->bus->sb_adapter_softc;
395 struct mpath_dev *d = sc->sc_devs[link->target];
396 struct mpath_group *g;
397 struct mpath_path *p;
398
399 #ifdef DIAGNOSTIC
400 if (d == NULL)
401 panic("mpath_minphys against nonexistent device");
402 #endif /* DIAGNOSTIC */
403
404 mtx_enter(&d->d_mtx);
405 TAILQ_FOREACH(g, &d->d_groups, g_entry) {
406 TAILQ_FOREACH(p, &g->g_paths, p_entry) {
407 /* XXX crossing layers with mutex held */
408 if (p->p_link->bus->sb_adapter->dev_minphys != NULL)
409 p->p_link->bus->sb_adapter->dev_minphys(bp,
410 p->p_link);
411 }
412 }
413 mtx_leave(&d->d_mtx);
414 }
415
416 int
mpath_path_probe(struct scsi_link * link)417 mpath_path_probe(struct scsi_link *link)
418 {
419 if (mpath == NULL)
420 return (ENXIO);
421
422 if (link->id == NULL)
423 return (EINVAL);
424
425 if (ISSET(link->flags, SDEV_UMASS))
426 return (EINVAL);
427
428 if (mpath == link->bus->sb_adapter_softc)
429 return (ENXIO);
430
431 return (0);
432 }
433
434 int
mpath_path_attach(struct mpath_path * p,u_int g_id,const struct mpath_ops * ops)435 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
436 {
437 struct mpath_softc *sc = mpath;
438 struct scsi_link *link = p->p_link;
439 struct mpath_dev *d = NULL;
440 struct mpath_group *g;
441 int newdev = 0, addxsh = 0;
442 int target;
443
444 #ifdef DIAGNOSTIC
445 if (p->p_link == NULL)
446 panic("mpath_path_attach: NULL link");
447 if (p->p_group != NULL)
448 panic("mpath_path_attach: group is not NULL");
449 #endif /* DIAGNOSTIC */
450
451 for (target = 0; target < MPATH_BUSWIDTH; target++) {
452 if ((d = sc->sc_devs[target]) == NULL)
453 continue;
454
455 if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
456 break;
457
458 d = NULL;
459 }
460
461 if (d == NULL) {
462 for (target = 0; target < MPATH_BUSWIDTH; target++) {
463 if (sc->sc_devs[target] == NULL)
464 break;
465 }
466 if (target >= MPATH_BUSWIDTH)
467 return (ENXIO);
468
469 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
470 if (d == NULL)
471 return (ENOMEM);
472
473 mtx_init(&d->d_mtx, IPL_BIO);
474 TAILQ_INIT(&d->d_groups);
475 SIMPLEQ_INIT(&d->d_xfers);
476 d->d_id = devid_copy(link->id);
477 d->d_ops = ops;
478
479 timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
480
481 sc->sc_devs[target] = d;
482 newdev = 1;
483 } else {
484 /*
485 * instead of carrying identical values in different devid
486 * instances, delete the new one and reference the old one in
487 * the new scsi_link.
488 */
489 devid_free(link->id);
490 link->id = devid_copy(d->d_id);
491 }
492
493 TAILQ_FOREACH(g, &d->d_groups, g_entry) {
494 if (g->g_id == g_id)
495 break;
496 }
497
498 if (g == NULL) {
499 g = malloc(sizeof(*g), M_DEVBUF,
500 M_WAITOK | M_CANFAIL | M_ZERO);
501 if (g == NULL) {
502 if (newdev) {
503 free(d, M_DEVBUF, sizeof(*d));
504 sc->sc_devs[target] = NULL;
505 }
506
507 return (ENOMEM);
508 }
509
510 TAILQ_INIT(&g->g_paths);
511 g->g_dev = d;
512 g->g_id = g_id;
513
514 mtx_enter(&d->d_mtx);
515 TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
516 mtx_leave(&d->d_mtx);
517 }
518
519 p->p_group = g;
520
521 mtx_enter(&d->d_mtx);
522 TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
523 if (!SIMPLEQ_EMPTY(&d->d_xfers))
524 addxsh = 1;
525
526 if (d->d_next_path == NULL)
527 d->d_next_path = p;
528 mtx_leave(&d->d_mtx);
529
530 if (newdev)
531 scsi_probe_target(mpath->sc_scsibus, target);
532 else if (addxsh)
533 scsi_xsh_add(&p->p_xsh);
534
535 return (0);
536 }
537
538 int
mpath_path_detach(struct mpath_path * p)539 mpath_path_detach(struct mpath_path *p)
540 {
541 struct mpath_group *g = p->p_group;
542 struct mpath_dev *d;
543 struct mpath_path *np = NULL;
544
545 #ifdef DIAGNOSTIC
546 if (g == NULL)
547 panic("mpath: detaching a path from a nonexistent bus");
548 #endif /* DIAGNOSTIC */
549 d = g->g_dev;
550 p->p_group = NULL;
551
552 mtx_enter(&d->d_mtx);
553 TAILQ_REMOVE(&g->g_paths, p, p_entry);
554 if (d->d_next_path == p)
555 d->d_next_path = TAILQ_FIRST(&g->g_paths);
556
557 if (TAILQ_EMPTY(&g->g_paths))
558 TAILQ_REMOVE(&d->d_groups, g, g_entry);
559 else
560 g = NULL;
561
562 if (!SIMPLEQ_EMPTY(&d->d_xfers))
563 np = d->d_next_path;
564 mtx_leave(&d->d_mtx);
565
566 if (g != NULL)
567 free(g, M_DEVBUF, sizeof(*g));
568
569 scsi_xsh_del(&p->p_xsh);
570
571 if (np == NULL)
572 mpath_failover(d);
573 else
574 scsi_xsh_add(&np->p_xsh);
575
576 return (0);
577 }
578
579 struct device *
mpath_bootdv(struct device * dev)580 mpath_bootdv(struct device *dev)
581 {
582 struct mpath_softc *sc = mpath;
583 struct mpath_dev *d;
584 struct mpath_group *g;
585 struct mpath_path *p;
586 int target;
587
588 if (sc == NULL)
589 return (dev);
590
591 for (target = 0; target < MPATH_BUSWIDTH; target++) {
592 if ((d = sc->sc_devs[target]) == NULL)
593 continue;
594
595 TAILQ_FOREACH(g, &d->d_groups, g_entry) {
596 TAILQ_FOREACH(p, &g->g_paths, p_entry) {
597 if (p->p_link->device_softc == dev) {
598 return (scsi_get_link(mpath->sc_scsibus,
599 target, 0)->device_softc);
600 }
601 }
602 }
603 }
604
605 return (dev);
606 }
607