xref: /openbsd/sys/scsi/mpath.c (revision 4bdff4be)
1 /*	$OpenBSD: mpath.c,v 1.57 2022/07/02 08:50:42 visa Exp $ */
2 
3 /*
4  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/device.h>
24 #include <sys/conf.h>
25 #include <sys/queue.h>
26 #include <sys/rwlock.h>
27 #include <sys/ioctl.h>
28 
29 #include <scsi/scsi_all.h>
30 #include <scsi/scsiconf.h>
31 #include <scsi/mpathvar.h>
32 
33 #define MPATH_BUSWIDTH 256
34 
35 int		mpath_match(struct device *, void *, void *);
36 void		mpath_attach(struct device *, struct device *, void *);
37 void		mpath_shutdown(void *);
38 
39 TAILQ_HEAD(mpath_paths, mpath_path);
40 
41 struct mpath_group {
42 	TAILQ_ENTRY(mpath_group) g_entry;
43 	struct mpath_paths	 g_paths;
44 	struct mpath_dev	*g_dev;
45 	u_int			 g_id;
46 };
47 TAILQ_HEAD(mpath_groups, mpath_group);
48 
49 struct mpath_dev {
50 	struct mutex		 d_mtx;
51 
52 	struct scsi_xfer_list	 d_xfers;
53 	struct mpath_path	*d_next_path;
54 
55 	struct mpath_groups	 d_groups;
56 
57 	struct mpath_group	*d_failover_iter;
58 	struct timeout		 d_failover_tmo;
59 	u_int			 d_failover;
60 
61 	const struct mpath_ops	*d_ops;
62 	struct devid		*d_id;
63 };
64 
65 struct mpath_softc {
66 	struct device		sc_dev;
67 	struct scsibus_softc	*sc_scsibus;
68 	struct mpath_dev	*sc_devs[MPATH_BUSWIDTH];
69 };
70 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
71 
72 struct mpath_softc	*mpath;
73 
74 const struct cfattach mpath_ca = {
75 	sizeof(struct mpath_softc),
76 	mpath_match,
77 	mpath_attach
78 };
79 
80 struct cfdriver mpath_cd = {
81 	NULL,
82 	"mpath",
83 	DV_DULL
84 };
85 
86 void		mpath_cmd(struct scsi_xfer *);
87 void		mpath_minphys(struct buf *, struct scsi_link *);
88 int		mpath_probe(struct scsi_link *);
89 
90 struct mpath_path *mpath_next_path(struct mpath_dev *);
91 void		mpath_done(struct scsi_xfer *);
92 
93 void		mpath_failover(struct mpath_dev *);
94 void		mpath_failover_start(void *);
95 void		mpath_failover_check(struct mpath_dev *);
96 
97 const struct scsi_adapter mpath_switch = {
98 	mpath_cmd, NULL, mpath_probe, NULL, NULL
99 };
100 
101 void		mpath_xs_stuffup(struct scsi_xfer *);
102 
103 int
104 mpath_match(struct device *parent, void *match, void *aux)
105 {
106 	return (1);
107 }
108 
109 void
110 mpath_attach(struct device *parent, struct device *self, void *aux)
111 {
112 	struct mpath_softc		*sc = (struct mpath_softc *)self;
113 	struct scsibus_attach_args	saa;
114 
115 	mpath = sc;
116 
117 	printf("\n");
118 
119 	saa.saa_adapter = &mpath_switch;
120 	saa.saa_adapter_softc = sc;
121 	saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET;
122 	saa.saa_adapter_buswidth = MPATH_BUSWIDTH;
123 	saa.saa_luns = 1;
124 	saa.saa_openings = 1024; /* XXX magical */
125 	saa.saa_pool = NULL;
126 	saa.saa_quirks = saa.saa_flags = 0;
127 	saa.saa_wwpn = saa.saa_wwnn = 0;
128 
129 	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
130 	    &saa, scsiprint);
131 }
132 
133 void
134 mpath_xs_stuffup(struct scsi_xfer *xs)
135 {
136 	xs->error = XS_DRIVER_STUFFUP;
137 	scsi_done(xs);
138 }
139 
140 int
141 mpath_probe(struct scsi_link *link)
142 {
143 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
144 	struct mpath_dev *d = sc->sc_devs[link->target];
145 
146 	if (link->lun != 0 || d == NULL)
147 		return (ENXIO);
148 
149 	link->id = devid_copy(d->d_id);
150 
151 	return (0);
152 }
153 
154 struct mpath_path *
155 mpath_next_path(struct mpath_dev *d)
156 {
157 	struct mpath_group *g;
158 	struct mpath_path *p;
159 
160 #ifdef DIAGNOSTIC
161 	if (d == NULL)
162 		panic("%s: d is NULL", __func__);
163 #endif /* DIAGNOSTIC */
164 
165 	p = d->d_next_path;
166 	if (p != NULL) {
167 		d->d_next_path = TAILQ_NEXT(p, p_entry);
168 		if (d->d_next_path == NULL &&
169 		    (g = TAILQ_FIRST(&d->d_groups)) != NULL)
170 			d->d_next_path = TAILQ_FIRST(&g->g_paths);
171 	}
172 
173 	return (p);
174 }
175 
176 void
177 mpath_cmd(struct scsi_xfer *xs)
178 {
179 	struct scsi_link *link = xs->sc_link;
180 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
181 	struct mpath_dev *d = sc->sc_devs[link->target];
182 	struct mpath_path *p;
183 	struct scsi_xfer *mxs;
184 
185 #ifdef DIAGNOSTIC
186 	if (d == NULL)
187 		panic("mpath_cmd issued against nonexistent device");
188 #endif /* DIAGNOSTIC */
189 
190 	if (ISSET(xs->flags, SCSI_POLL)) {
191 		mtx_enter(&d->d_mtx);
192 		p = mpath_next_path(d);
193 		mtx_leave(&d->d_mtx);
194 		if (p == NULL) {
195 			mpath_xs_stuffup(xs);
196 			return;
197 		}
198 
199 		mxs = scsi_xs_get(p->p_link, xs->flags);
200 		if (mxs == NULL) {
201 			mpath_xs_stuffup(xs);
202 			return;
203 		}
204 
205 		memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
206 		mxs->cmdlen = xs->cmdlen;
207 		mxs->data = xs->data;
208 		mxs->datalen = xs->datalen;
209 		mxs->retries = xs->retries;
210 		mxs->timeout = xs->timeout;
211 		mxs->bp = xs->bp;
212 
213 		scsi_xs_sync(mxs);
214 
215 		xs->error = mxs->error;
216 		xs->status = mxs->status;
217 		xs->resid = mxs->resid;
218 
219 		memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
220 
221 		scsi_xs_put(mxs);
222 		scsi_done(xs);
223 		return;
224 	}
225 
226 	mtx_enter(&d->d_mtx);
227 	SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
228 	p = mpath_next_path(d);
229 	mtx_leave(&d->d_mtx);
230 
231 	if (p != NULL)
232 		scsi_xsh_add(&p->p_xsh);
233 }
234 
235 void
236 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
237 {
238 	struct mpath_dev *d = p->p_group->g_dev;
239 	struct scsi_xfer *xs;
240 	int addxsh = 0;
241 
242 	if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
243 		goto fail;
244 
245 	mtx_enter(&d->d_mtx);
246 	xs = SIMPLEQ_FIRST(&d->d_xfers);
247 	if (xs != NULL) {
248 		SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
249 		if (!SIMPLEQ_EMPTY(&d->d_xfers))
250 			addxsh = 1;
251 	}
252 	mtx_leave(&d->d_mtx);
253 
254 	if (xs == NULL)
255 		goto fail;
256 
257 	memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
258 	mxs->cmdlen = xs->cmdlen;
259 	mxs->data = xs->data;
260 	mxs->datalen = xs->datalen;
261 	mxs->retries = xs->retries;
262 	mxs->timeout = xs->timeout;
263 	mxs->bp = xs->bp;
264 	mxs->flags = xs->flags;
265 
266 	mxs->cookie = xs;
267 	mxs->done = mpath_done;
268 
269 	scsi_xs_exec(mxs);
270 
271 	if (addxsh)
272 		scsi_xsh_add(&p->p_xsh);
273 
274 	return;
275 fail:
276 	scsi_xs_put(mxs);
277 }
278 
279 void
280 mpath_done(struct scsi_xfer *mxs)
281 {
282 	struct scsi_xfer *xs = mxs->cookie;
283 	struct scsi_link *link = xs->sc_link;
284 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
285 	struct mpath_dev *d = sc->sc_devs[link->target];
286 	struct mpath_path *p;
287 
288 	switch (mxs->error) {
289 	case XS_SELTIMEOUT: /* physical path is gone, try the next */
290 	case XS_RESET:
291 		mtx_enter(&d->d_mtx);
292 		SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
293 		p = mpath_next_path(d);
294 		mtx_leave(&d->d_mtx);
295 
296 		scsi_xs_put(mxs);
297 
298 		if (p != NULL)
299 			scsi_xsh_add(&p->p_xsh);
300 		return;
301 	case XS_SENSE:
302 		switch (d->d_ops->op_checksense(mxs)) {
303 		case MPATH_SENSE_FAILOVER:
304 			mtx_enter(&d->d_mtx);
305 			SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
306 			p = mpath_next_path(d);
307 			mtx_leave(&d->d_mtx);
308 
309 			scsi_xs_put(mxs);
310 
311 			mpath_failover(d);
312 			return;
313 		case MPATH_SENSE_DECLINED:
314 			break;
315 #ifdef DIAGNOSTIC
316 		default:
317 			panic("unexpected return from checksense");
318 #endif /* DIAGNOSTIC */
319 		}
320 		break;
321 	}
322 
323 	xs->error = mxs->error;
324 	xs->status = mxs->status;
325 	xs->resid = mxs->resid;
326 
327 	memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
328 
329 	scsi_xs_put(mxs);
330 
331 	scsi_done(xs);
332 }
333 
334 void
335 mpath_failover(struct mpath_dev *d)
336 {
337 	if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
338 		return;
339 
340 	mpath_failover_start(d);
341 }
342 
343 void
344 mpath_failover_start(void *xd)
345 {
346 	struct mpath_dev *d = xd;
347 
348 	mtx_enter(&d->d_mtx);
349 	d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
350 	mtx_leave(&d->d_mtx);
351 
352 	mpath_failover_check(d);
353 }
354 
355 void
356 mpath_failover_check(struct mpath_dev *d)
357 {
358 	struct mpath_group *g = d->d_failover_iter;
359 	struct mpath_path *p;
360 
361 	if (g == NULL)
362 		timeout_add_sec(&d->d_failover_tmo, 1);
363 	else {
364 		p = TAILQ_FIRST(&g->g_paths);
365 		d->d_ops->op_status(p->p_link);
366 	}
367 }
368 
369 void
370 mpath_path_status(struct mpath_path *p, int status)
371 {
372 	struct mpath_group *g = p->p_group;
373 	struct mpath_dev *d = g->g_dev;
374 
375 	mtx_enter(&d->d_mtx);
376 	if (status == MPATH_S_ACTIVE) {
377 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
378 		TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
379 		d->d_next_path = p;
380 	} else
381 		d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
382 	mtx_leave(&d->d_mtx);
383 
384 	if (status == MPATH_S_ACTIVE) {
385 		scsi_xsh_add(&p->p_xsh);
386 		if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
387 			mpath_failover_start(d);
388 	} else
389 		mpath_failover_check(d);
390 }
391 
392 void
393 mpath_minphys(struct buf *bp, struct scsi_link *link)
394 {
395 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
396 	struct mpath_dev *d = sc->sc_devs[link->target];
397 	struct mpath_group *g;
398 	struct mpath_path *p;
399 
400 #ifdef DIAGNOSTIC
401 	if (d == NULL)
402 		panic("mpath_minphys against nonexistent device");
403 #endif /* DIAGNOSTIC */
404 
405 	mtx_enter(&d->d_mtx);
406 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
407 		TAILQ_FOREACH(p, &g->g_paths, p_entry) {
408 			/* XXX crossing layers with mutex held */
409 			if (p->p_link->bus->sb_adapter->dev_minphys != NULL)
410 				p->p_link->bus->sb_adapter->dev_minphys(bp,
411 				    p->p_link);
412 		}
413 	}
414 	mtx_leave(&d->d_mtx);
415 }
416 
417 int
418 mpath_path_probe(struct scsi_link *link)
419 {
420 	if (mpath == NULL)
421 		return (ENXIO);
422 
423 	if (link->id == NULL)
424 		return (EINVAL);
425 
426 	if (ISSET(link->flags, SDEV_UMASS))
427 		return (EINVAL);
428 
429 	if (mpath == link->bus->sb_adapter_softc)
430 		return (ENXIO);
431 
432 	return (0);
433 }
434 
435 int
436 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
437 {
438 	struct mpath_softc *sc = mpath;
439 	struct scsi_link *link = p->p_link;
440 	struct mpath_dev *d = NULL;
441 	struct mpath_group *g;
442 	int newdev = 0, addxsh = 0;
443 	int target;
444 
445 #ifdef DIAGNOSTIC
446 	if (p->p_link == NULL)
447 		panic("mpath_path_attach: NULL link");
448 	if (p->p_group != NULL)
449 		panic("mpath_path_attach: group is not NULL");
450 #endif /* DIAGNOSTIC */
451 
452 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
453 		if ((d = sc->sc_devs[target]) == NULL)
454 			continue;
455 
456 		if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
457 			break;
458 
459 		d = NULL;
460 	}
461 
462 	if (d == NULL) {
463 		for (target = 0; target < MPATH_BUSWIDTH; target++) {
464 			if (sc->sc_devs[target] == NULL)
465 				break;
466 		}
467 		if (target >= MPATH_BUSWIDTH)
468 			return (ENXIO);
469 
470 		d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
471 		if (d == NULL)
472 			return (ENOMEM);
473 
474 		mtx_init(&d->d_mtx, IPL_BIO);
475 		TAILQ_INIT(&d->d_groups);
476 		SIMPLEQ_INIT(&d->d_xfers);
477 		d->d_id = devid_copy(link->id);
478 		d->d_ops = ops;
479 
480 		timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
481 
482 		sc->sc_devs[target] = d;
483 		newdev = 1;
484 	} else {
485 		/*
486 		 * instead of carrying identical values in different devid
487 		 * instances, delete the new one and reference the old one in
488 		 * the new scsi_link.
489 		 */
490 		devid_free(link->id);
491 		link->id = devid_copy(d->d_id);
492 	}
493 
494 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
495 		if (g->g_id == g_id)
496 			break;
497 	}
498 
499 	if (g == NULL) {
500 		g = malloc(sizeof(*g),  M_DEVBUF,
501 		    M_WAITOK | M_CANFAIL | M_ZERO);
502 		if (g == NULL) {
503 			if (newdev) {
504 				free(d, M_DEVBUF, sizeof(*d));
505 				sc->sc_devs[target] = NULL;
506 			}
507 
508 			return (ENOMEM);
509 		}
510 
511 		TAILQ_INIT(&g->g_paths);
512 		g->g_dev = d;
513 		g->g_id = g_id;
514 
515 		mtx_enter(&d->d_mtx);
516 		TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
517 		mtx_leave(&d->d_mtx);
518 	}
519 
520 	p->p_group = g;
521 
522 	mtx_enter(&d->d_mtx);
523 	TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
524 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
525 		addxsh = 1;
526 
527 	if (d->d_next_path == NULL)
528 		d->d_next_path = p;
529 	mtx_leave(&d->d_mtx);
530 
531 	if (newdev)
532 		scsi_probe_target(mpath->sc_scsibus, target);
533 	else if (addxsh)
534 		scsi_xsh_add(&p->p_xsh);
535 
536 	return (0);
537 }
538 
539 int
540 mpath_path_detach(struct mpath_path *p)
541 {
542 	struct mpath_group *g = p->p_group;
543 	struct mpath_dev *d;
544 	struct mpath_path *np = NULL;
545 
546 #ifdef DIAGNOSTIC
547 	if (g == NULL)
548 		panic("mpath: detaching a path from a nonexistent bus");
549 #endif /* DIAGNOSTIC */
550 	d = g->g_dev;
551 	p->p_group = NULL;
552 
553 	mtx_enter(&d->d_mtx);
554 	TAILQ_REMOVE(&g->g_paths, p, p_entry);
555 	if (d->d_next_path == p)
556 		d->d_next_path = TAILQ_FIRST(&g->g_paths);
557 
558 	if (TAILQ_EMPTY(&g->g_paths))
559 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
560 	else
561 		g = NULL;
562 
563 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
564 		np = d->d_next_path;
565 	mtx_leave(&d->d_mtx);
566 
567 	if (g != NULL)
568 		free(g, M_DEVBUF, sizeof(*g));
569 
570 	scsi_xsh_del(&p->p_xsh);
571 
572 	if (np == NULL)
573 		mpath_failover(d);
574 	else
575 		scsi_xsh_add(&np->p_xsh);
576 
577 	return (0);
578 }
579 
580 struct device *
581 mpath_bootdv(struct device *dev)
582 {
583 	struct mpath_softc *sc = mpath;
584 	struct mpath_dev *d;
585 	struct mpath_group *g;
586 	struct mpath_path *p;
587 	int target;
588 
589 	if (sc == NULL)
590 		return (dev);
591 
592 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
593 		if ((d = sc->sc_devs[target]) == NULL)
594 			continue;
595 
596 		TAILQ_FOREACH(g, &d->d_groups, g_entry) {
597 			TAILQ_FOREACH(p, &g->g_paths, p_entry) {
598 				if (p->p_link->device_softc == dev) {
599 					return (scsi_get_link(mpath->sc_scsibus,
600 					    target, 0)->device_softc);
601 				}
602 			}
603 		}
604 	}
605 
606 	return (dev);
607 }
608