xref: /openbsd/sys/scsi/mpath.c (revision 0f9e9ec2)
1 /*	$OpenBSD: mpath.c,v 1.58 2024/05/13 01:15:53 jsg Exp $ */
2 
3 /*
4  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/device.h>
24 #include <sys/conf.h>
25 #include <sys/queue.h>
26 #include <sys/rwlock.h>
27 #include <sys/ioctl.h>
28 
29 #include <scsi/scsi_all.h>
30 #include <scsi/scsiconf.h>
31 #include <scsi/mpathvar.h>
32 
33 #define MPATH_BUSWIDTH 256
34 
35 int		mpath_match(struct device *, void *, void *);
36 void		mpath_attach(struct device *, struct device *, void *);
37 
38 TAILQ_HEAD(mpath_paths, mpath_path);
39 
40 struct mpath_group {
41 	TAILQ_ENTRY(mpath_group) g_entry;
42 	struct mpath_paths	 g_paths;
43 	struct mpath_dev	*g_dev;
44 	u_int			 g_id;
45 };
46 TAILQ_HEAD(mpath_groups, mpath_group);
47 
48 struct mpath_dev {
49 	struct mutex		 d_mtx;
50 
51 	struct scsi_xfer_list	 d_xfers;
52 	struct mpath_path	*d_next_path;
53 
54 	struct mpath_groups	 d_groups;
55 
56 	struct mpath_group	*d_failover_iter;
57 	struct timeout		 d_failover_tmo;
58 	u_int			 d_failover;
59 
60 	const struct mpath_ops	*d_ops;
61 	struct devid		*d_id;
62 };
63 
64 struct mpath_softc {
65 	struct device		sc_dev;
66 	struct scsibus_softc	*sc_scsibus;
67 	struct mpath_dev	*sc_devs[MPATH_BUSWIDTH];
68 };
69 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
70 
71 struct mpath_softc	*mpath;
72 
73 const struct cfattach mpath_ca = {
74 	sizeof(struct mpath_softc),
75 	mpath_match,
76 	mpath_attach
77 };
78 
79 struct cfdriver mpath_cd = {
80 	NULL,
81 	"mpath",
82 	DV_DULL
83 };
84 
85 void		mpath_cmd(struct scsi_xfer *);
86 void		mpath_minphys(struct buf *, struct scsi_link *);
87 int		mpath_probe(struct scsi_link *);
88 
89 struct mpath_path *mpath_next_path(struct mpath_dev *);
90 void		mpath_done(struct scsi_xfer *);
91 
92 void		mpath_failover(struct mpath_dev *);
93 void		mpath_failover_start(void *);
94 void		mpath_failover_check(struct mpath_dev *);
95 
96 const struct scsi_adapter mpath_switch = {
97 	mpath_cmd, NULL, mpath_probe, NULL, NULL
98 };
99 
100 void		mpath_xs_stuffup(struct scsi_xfer *);
101 
102 int
mpath_match(struct device * parent,void * match,void * aux)103 mpath_match(struct device *parent, void *match, void *aux)
104 {
105 	return (1);
106 }
107 
108 void
mpath_attach(struct device * parent,struct device * self,void * aux)109 mpath_attach(struct device *parent, struct device *self, void *aux)
110 {
111 	struct mpath_softc		*sc = (struct mpath_softc *)self;
112 	struct scsibus_attach_args	saa;
113 
114 	mpath = sc;
115 
116 	printf("\n");
117 
118 	saa.saa_adapter = &mpath_switch;
119 	saa.saa_adapter_softc = sc;
120 	saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET;
121 	saa.saa_adapter_buswidth = MPATH_BUSWIDTH;
122 	saa.saa_luns = 1;
123 	saa.saa_openings = 1024; /* XXX magical */
124 	saa.saa_pool = NULL;
125 	saa.saa_quirks = saa.saa_flags = 0;
126 	saa.saa_wwpn = saa.saa_wwnn = 0;
127 
128 	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
129 	    &saa, scsiprint);
130 }
131 
132 void
mpath_xs_stuffup(struct scsi_xfer * xs)133 mpath_xs_stuffup(struct scsi_xfer *xs)
134 {
135 	xs->error = XS_DRIVER_STUFFUP;
136 	scsi_done(xs);
137 }
138 
139 int
mpath_probe(struct scsi_link * link)140 mpath_probe(struct scsi_link *link)
141 {
142 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
143 	struct mpath_dev *d = sc->sc_devs[link->target];
144 
145 	if (link->lun != 0 || d == NULL)
146 		return (ENXIO);
147 
148 	link->id = devid_copy(d->d_id);
149 
150 	return (0);
151 }
152 
153 struct mpath_path *
mpath_next_path(struct mpath_dev * d)154 mpath_next_path(struct mpath_dev *d)
155 {
156 	struct mpath_group *g;
157 	struct mpath_path *p;
158 
159 #ifdef DIAGNOSTIC
160 	if (d == NULL)
161 		panic("%s: d is NULL", __func__);
162 #endif /* DIAGNOSTIC */
163 
164 	p = d->d_next_path;
165 	if (p != NULL) {
166 		d->d_next_path = TAILQ_NEXT(p, p_entry);
167 		if (d->d_next_path == NULL &&
168 		    (g = TAILQ_FIRST(&d->d_groups)) != NULL)
169 			d->d_next_path = TAILQ_FIRST(&g->g_paths);
170 	}
171 
172 	return (p);
173 }
174 
175 void
mpath_cmd(struct scsi_xfer * xs)176 mpath_cmd(struct scsi_xfer *xs)
177 {
178 	struct scsi_link *link = xs->sc_link;
179 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
180 	struct mpath_dev *d = sc->sc_devs[link->target];
181 	struct mpath_path *p;
182 	struct scsi_xfer *mxs;
183 
184 #ifdef DIAGNOSTIC
185 	if (d == NULL)
186 		panic("mpath_cmd issued against nonexistent device");
187 #endif /* DIAGNOSTIC */
188 
189 	if (ISSET(xs->flags, SCSI_POLL)) {
190 		mtx_enter(&d->d_mtx);
191 		p = mpath_next_path(d);
192 		mtx_leave(&d->d_mtx);
193 		if (p == NULL) {
194 			mpath_xs_stuffup(xs);
195 			return;
196 		}
197 
198 		mxs = scsi_xs_get(p->p_link, xs->flags);
199 		if (mxs == NULL) {
200 			mpath_xs_stuffup(xs);
201 			return;
202 		}
203 
204 		memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
205 		mxs->cmdlen = xs->cmdlen;
206 		mxs->data = xs->data;
207 		mxs->datalen = xs->datalen;
208 		mxs->retries = xs->retries;
209 		mxs->timeout = xs->timeout;
210 		mxs->bp = xs->bp;
211 
212 		scsi_xs_sync(mxs);
213 
214 		xs->error = mxs->error;
215 		xs->status = mxs->status;
216 		xs->resid = mxs->resid;
217 
218 		memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
219 
220 		scsi_xs_put(mxs);
221 		scsi_done(xs);
222 		return;
223 	}
224 
225 	mtx_enter(&d->d_mtx);
226 	SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
227 	p = mpath_next_path(d);
228 	mtx_leave(&d->d_mtx);
229 
230 	if (p != NULL)
231 		scsi_xsh_add(&p->p_xsh);
232 }
233 
234 void
mpath_start(struct mpath_path * p,struct scsi_xfer * mxs)235 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
236 {
237 	struct mpath_dev *d = p->p_group->g_dev;
238 	struct scsi_xfer *xs;
239 	int addxsh = 0;
240 
241 	if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
242 		goto fail;
243 
244 	mtx_enter(&d->d_mtx);
245 	xs = SIMPLEQ_FIRST(&d->d_xfers);
246 	if (xs != NULL) {
247 		SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
248 		if (!SIMPLEQ_EMPTY(&d->d_xfers))
249 			addxsh = 1;
250 	}
251 	mtx_leave(&d->d_mtx);
252 
253 	if (xs == NULL)
254 		goto fail;
255 
256 	memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
257 	mxs->cmdlen = xs->cmdlen;
258 	mxs->data = xs->data;
259 	mxs->datalen = xs->datalen;
260 	mxs->retries = xs->retries;
261 	mxs->timeout = xs->timeout;
262 	mxs->bp = xs->bp;
263 	mxs->flags = xs->flags;
264 
265 	mxs->cookie = xs;
266 	mxs->done = mpath_done;
267 
268 	scsi_xs_exec(mxs);
269 
270 	if (addxsh)
271 		scsi_xsh_add(&p->p_xsh);
272 
273 	return;
274 fail:
275 	scsi_xs_put(mxs);
276 }
277 
278 void
mpath_done(struct scsi_xfer * mxs)279 mpath_done(struct scsi_xfer *mxs)
280 {
281 	struct scsi_xfer *xs = mxs->cookie;
282 	struct scsi_link *link = xs->sc_link;
283 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
284 	struct mpath_dev *d = sc->sc_devs[link->target];
285 	struct mpath_path *p;
286 
287 	switch (mxs->error) {
288 	case XS_SELTIMEOUT: /* physical path is gone, try the next */
289 	case XS_RESET:
290 		mtx_enter(&d->d_mtx);
291 		SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
292 		p = mpath_next_path(d);
293 		mtx_leave(&d->d_mtx);
294 
295 		scsi_xs_put(mxs);
296 
297 		if (p != NULL)
298 			scsi_xsh_add(&p->p_xsh);
299 		return;
300 	case XS_SENSE:
301 		switch (d->d_ops->op_checksense(mxs)) {
302 		case MPATH_SENSE_FAILOVER:
303 			mtx_enter(&d->d_mtx);
304 			SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
305 			p = mpath_next_path(d);
306 			mtx_leave(&d->d_mtx);
307 
308 			scsi_xs_put(mxs);
309 
310 			mpath_failover(d);
311 			return;
312 		case MPATH_SENSE_DECLINED:
313 			break;
314 #ifdef DIAGNOSTIC
315 		default:
316 			panic("unexpected return from checksense");
317 #endif /* DIAGNOSTIC */
318 		}
319 		break;
320 	}
321 
322 	xs->error = mxs->error;
323 	xs->status = mxs->status;
324 	xs->resid = mxs->resid;
325 
326 	memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
327 
328 	scsi_xs_put(mxs);
329 
330 	scsi_done(xs);
331 }
332 
333 void
mpath_failover(struct mpath_dev * d)334 mpath_failover(struct mpath_dev *d)
335 {
336 	if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
337 		return;
338 
339 	mpath_failover_start(d);
340 }
341 
342 void
mpath_failover_start(void * xd)343 mpath_failover_start(void *xd)
344 {
345 	struct mpath_dev *d = xd;
346 
347 	mtx_enter(&d->d_mtx);
348 	d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
349 	mtx_leave(&d->d_mtx);
350 
351 	mpath_failover_check(d);
352 }
353 
354 void
mpath_failover_check(struct mpath_dev * d)355 mpath_failover_check(struct mpath_dev *d)
356 {
357 	struct mpath_group *g = d->d_failover_iter;
358 	struct mpath_path *p;
359 
360 	if (g == NULL)
361 		timeout_add_sec(&d->d_failover_tmo, 1);
362 	else {
363 		p = TAILQ_FIRST(&g->g_paths);
364 		d->d_ops->op_status(p->p_link);
365 	}
366 }
367 
368 void
mpath_path_status(struct mpath_path * p,int status)369 mpath_path_status(struct mpath_path *p, int status)
370 {
371 	struct mpath_group *g = p->p_group;
372 	struct mpath_dev *d = g->g_dev;
373 
374 	mtx_enter(&d->d_mtx);
375 	if (status == MPATH_S_ACTIVE) {
376 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
377 		TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
378 		d->d_next_path = p;
379 	} else
380 		d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
381 	mtx_leave(&d->d_mtx);
382 
383 	if (status == MPATH_S_ACTIVE) {
384 		scsi_xsh_add(&p->p_xsh);
385 		if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
386 			mpath_failover_start(d);
387 	} else
388 		mpath_failover_check(d);
389 }
390 
391 void
mpath_minphys(struct buf * bp,struct scsi_link * link)392 mpath_minphys(struct buf *bp, struct scsi_link *link)
393 {
394 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
395 	struct mpath_dev *d = sc->sc_devs[link->target];
396 	struct mpath_group *g;
397 	struct mpath_path *p;
398 
399 #ifdef DIAGNOSTIC
400 	if (d == NULL)
401 		panic("mpath_minphys against nonexistent device");
402 #endif /* DIAGNOSTIC */
403 
404 	mtx_enter(&d->d_mtx);
405 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
406 		TAILQ_FOREACH(p, &g->g_paths, p_entry) {
407 			/* XXX crossing layers with mutex held */
408 			if (p->p_link->bus->sb_adapter->dev_minphys != NULL)
409 				p->p_link->bus->sb_adapter->dev_minphys(bp,
410 				    p->p_link);
411 		}
412 	}
413 	mtx_leave(&d->d_mtx);
414 }
415 
416 int
mpath_path_probe(struct scsi_link * link)417 mpath_path_probe(struct scsi_link *link)
418 {
419 	if (mpath == NULL)
420 		return (ENXIO);
421 
422 	if (link->id == NULL)
423 		return (EINVAL);
424 
425 	if (ISSET(link->flags, SDEV_UMASS))
426 		return (EINVAL);
427 
428 	if (mpath == link->bus->sb_adapter_softc)
429 		return (ENXIO);
430 
431 	return (0);
432 }
433 
434 int
mpath_path_attach(struct mpath_path * p,u_int g_id,const struct mpath_ops * ops)435 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
436 {
437 	struct mpath_softc *sc = mpath;
438 	struct scsi_link *link = p->p_link;
439 	struct mpath_dev *d = NULL;
440 	struct mpath_group *g;
441 	int newdev = 0, addxsh = 0;
442 	int target;
443 
444 #ifdef DIAGNOSTIC
445 	if (p->p_link == NULL)
446 		panic("mpath_path_attach: NULL link");
447 	if (p->p_group != NULL)
448 		panic("mpath_path_attach: group is not NULL");
449 #endif /* DIAGNOSTIC */
450 
451 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
452 		if ((d = sc->sc_devs[target]) == NULL)
453 			continue;
454 
455 		if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
456 			break;
457 
458 		d = NULL;
459 	}
460 
461 	if (d == NULL) {
462 		for (target = 0; target < MPATH_BUSWIDTH; target++) {
463 			if (sc->sc_devs[target] == NULL)
464 				break;
465 		}
466 		if (target >= MPATH_BUSWIDTH)
467 			return (ENXIO);
468 
469 		d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
470 		if (d == NULL)
471 			return (ENOMEM);
472 
473 		mtx_init(&d->d_mtx, IPL_BIO);
474 		TAILQ_INIT(&d->d_groups);
475 		SIMPLEQ_INIT(&d->d_xfers);
476 		d->d_id = devid_copy(link->id);
477 		d->d_ops = ops;
478 
479 		timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
480 
481 		sc->sc_devs[target] = d;
482 		newdev = 1;
483 	} else {
484 		/*
485 		 * instead of carrying identical values in different devid
486 		 * instances, delete the new one and reference the old one in
487 		 * the new scsi_link.
488 		 */
489 		devid_free(link->id);
490 		link->id = devid_copy(d->d_id);
491 	}
492 
493 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
494 		if (g->g_id == g_id)
495 			break;
496 	}
497 
498 	if (g == NULL) {
499 		g = malloc(sizeof(*g),  M_DEVBUF,
500 		    M_WAITOK | M_CANFAIL | M_ZERO);
501 		if (g == NULL) {
502 			if (newdev) {
503 				free(d, M_DEVBUF, sizeof(*d));
504 				sc->sc_devs[target] = NULL;
505 			}
506 
507 			return (ENOMEM);
508 		}
509 
510 		TAILQ_INIT(&g->g_paths);
511 		g->g_dev = d;
512 		g->g_id = g_id;
513 
514 		mtx_enter(&d->d_mtx);
515 		TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
516 		mtx_leave(&d->d_mtx);
517 	}
518 
519 	p->p_group = g;
520 
521 	mtx_enter(&d->d_mtx);
522 	TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
523 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
524 		addxsh = 1;
525 
526 	if (d->d_next_path == NULL)
527 		d->d_next_path = p;
528 	mtx_leave(&d->d_mtx);
529 
530 	if (newdev)
531 		scsi_probe_target(mpath->sc_scsibus, target);
532 	else if (addxsh)
533 		scsi_xsh_add(&p->p_xsh);
534 
535 	return (0);
536 }
537 
538 int
mpath_path_detach(struct mpath_path * p)539 mpath_path_detach(struct mpath_path *p)
540 {
541 	struct mpath_group *g = p->p_group;
542 	struct mpath_dev *d;
543 	struct mpath_path *np = NULL;
544 
545 #ifdef DIAGNOSTIC
546 	if (g == NULL)
547 		panic("mpath: detaching a path from a nonexistent bus");
548 #endif /* DIAGNOSTIC */
549 	d = g->g_dev;
550 	p->p_group = NULL;
551 
552 	mtx_enter(&d->d_mtx);
553 	TAILQ_REMOVE(&g->g_paths, p, p_entry);
554 	if (d->d_next_path == p)
555 		d->d_next_path = TAILQ_FIRST(&g->g_paths);
556 
557 	if (TAILQ_EMPTY(&g->g_paths))
558 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
559 	else
560 		g = NULL;
561 
562 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
563 		np = d->d_next_path;
564 	mtx_leave(&d->d_mtx);
565 
566 	if (g != NULL)
567 		free(g, M_DEVBUF, sizeof(*g));
568 
569 	scsi_xsh_del(&p->p_xsh);
570 
571 	if (np == NULL)
572 		mpath_failover(d);
573 	else
574 		scsi_xsh_add(&np->p_xsh);
575 
576 	return (0);
577 }
578 
579 struct device *
mpath_bootdv(struct device * dev)580 mpath_bootdv(struct device *dev)
581 {
582 	struct mpath_softc *sc = mpath;
583 	struct mpath_dev *d;
584 	struct mpath_group *g;
585 	struct mpath_path *p;
586 	int target;
587 
588 	if (sc == NULL)
589 		return (dev);
590 
591 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
592 		if ((d = sc->sc_devs[target]) == NULL)
593 			continue;
594 
595 		TAILQ_FOREACH(g, &d->d_groups, g_entry) {
596 			TAILQ_FOREACH(p, &g->g_paths, p_entry) {
597 				if (p->p_link->device_softc == dev) {
598 					return (scsi_get_link(mpath->sc_scsibus,
599 					    target, 0)->device_softc);
600 				}
601 			}
602 		}
603 	}
604 
605 	return (dev);
606 }
607