xref: /openbsd/sys/scsi/mpath.c (revision d89ec533)
1 /*	$OpenBSD: mpath.c,v 1.55 2021/10/24 16:57:30 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/device.h>
24 #include <sys/conf.h>
25 #include <sys/queue.h>
26 #include <sys/rwlock.h>
27 #include <sys/ioctl.h>
28 #include <sys/poll.h>
29 #include <sys/selinfo.h>
30 
31 #include <scsi/scsi_all.h>
32 #include <scsi/scsiconf.h>
33 #include <scsi/mpathvar.h>
34 
35 #define MPATH_BUSWIDTH 256
36 
37 int		mpath_match(struct device *, void *, void *);
38 void		mpath_attach(struct device *, struct device *, void *);
39 void		mpath_shutdown(void *);
40 
41 TAILQ_HEAD(mpath_paths, mpath_path);
42 
43 struct mpath_group {
44 	TAILQ_ENTRY(mpath_group) g_entry;
45 	struct mpath_paths	 g_paths;
46 	struct mpath_dev	*g_dev;
47 	u_int			 g_id;
48 };
49 TAILQ_HEAD(mpath_groups, mpath_group);
50 
51 struct mpath_dev {
52 	struct mutex		 d_mtx;
53 
54 	struct scsi_xfer_list	 d_xfers;
55 	struct mpath_path	*d_next_path;
56 
57 	struct mpath_groups	 d_groups;
58 
59 	struct mpath_group	*d_failover_iter;
60 	struct timeout		 d_failover_tmo;
61 	u_int			 d_failover;
62 
63 	const struct mpath_ops	*d_ops;
64 	struct devid		*d_id;
65 };
66 
67 struct mpath_softc {
68 	struct device		sc_dev;
69 	struct scsibus_softc	*sc_scsibus;
70 	struct mpath_dev	*sc_devs[MPATH_BUSWIDTH];
71 };
72 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
73 
74 struct mpath_softc	*mpath;
75 
76 const struct cfattach mpath_ca = {
77 	sizeof(struct mpath_softc),
78 	mpath_match,
79 	mpath_attach
80 };
81 
82 struct cfdriver mpath_cd = {
83 	NULL,
84 	"mpath",
85 	DV_DULL
86 };
87 
88 void		mpath_cmd(struct scsi_xfer *);
89 void		mpath_minphys(struct buf *, struct scsi_link *);
90 int		mpath_probe(struct scsi_link *);
91 
92 struct mpath_path *mpath_next_path(struct mpath_dev *);
93 void		mpath_done(struct scsi_xfer *);
94 
95 void		mpath_failover(struct mpath_dev *);
96 void		mpath_failover_start(void *);
97 void		mpath_failover_check(struct mpath_dev *);
98 
99 struct scsi_adapter mpath_switch = {
100 	mpath_cmd, NULL, mpath_probe, NULL, NULL
101 };
102 
103 void		mpath_xs_stuffup(struct scsi_xfer *);
104 
105 int
106 mpath_match(struct device *parent, void *match, void *aux)
107 {
108 	return (1);
109 }
110 
111 void
112 mpath_attach(struct device *parent, struct device *self, void *aux)
113 {
114 	struct mpath_softc		*sc = (struct mpath_softc *)self;
115 	struct scsibus_attach_args	saa;
116 
117 	mpath = sc;
118 
119 	printf("\n");
120 
121 	saa.saa_adapter = &mpath_switch;
122 	saa.saa_adapter_softc = sc;
123 	saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET;
124 	saa.saa_adapter_buswidth = MPATH_BUSWIDTH;
125 	saa.saa_luns = 1;
126 	saa.saa_openings = 1024; /* XXX magical */
127 	saa.saa_pool = NULL;
128 	saa.saa_quirks = saa.saa_flags = 0;
129 	saa.saa_wwpn = saa.saa_wwnn = 0;
130 
131 	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
132 	    &saa, scsiprint);
133 }
134 
135 void
136 mpath_xs_stuffup(struct scsi_xfer *xs)
137 {
138 	xs->error = XS_DRIVER_STUFFUP;
139 	scsi_done(xs);
140 }
141 
142 int
143 mpath_probe(struct scsi_link *link)
144 {
145 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
146 	struct mpath_dev *d = sc->sc_devs[link->target];
147 
148 	if (link->lun != 0 || d == NULL)
149 		return (ENXIO);
150 
151 	link->id = devid_copy(d->d_id);
152 
153 	return (0);
154 }
155 
156 struct mpath_path *
157 mpath_next_path(struct mpath_dev *d)
158 {
159 	struct mpath_group *g;
160 	struct mpath_path *p;
161 
162 #ifdef DIAGNOSTIC
163 	if (d == NULL)
164 		panic("%s: d is NULL", __func__);
165 #endif /* DIAGNOSTIC */
166 
167 	p = d->d_next_path;
168 	if (p != NULL) {
169 		d->d_next_path = TAILQ_NEXT(p, p_entry);
170 		if (d->d_next_path == NULL &&
171 		    (g = TAILQ_FIRST(&d->d_groups)) != NULL)
172 			d->d_next_path = TAILQ_FIRST(&g->g_paths);
173 	}
174 
175 	return (p);
176 }
177 
178 void
179 mpath_cmd(struct scsi_xfer *xs)
180 {
181 	struct scsi_link *link = xs->sc_link;
182 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
183 	struct mpath_dev *d = sc->sc_devs[link->target];
184 	struct mpath_path *p;
185 	struct scsi_xfer *mxs;
186 
187 #ifdef DIAGNOSTIC
188 	if (d == NULL)
189 		panic("mpath_cmd issued against nonexistent device");
190 #endif /* DIAGNOSTIC */
191 
192 	if (ISSET(xs->flags, SCSI_POLL)) {
193 		mtx_enter(&d->d_mtx);
194 		p = mpath_next_path(d);
195 		mtx_leave(&d->d_mtx);
196 		if (p == NULL) {
197 			mpath_xs_stuffup(xs);
198 			return;
199 		}
200 
201 		mxs = scsi_xs_get(p->p_link, xs->flags);
202 		if (mxs == NULL) {
203 			mpath_xs_stuffup(xs);
204 			return;
205 		}
206 
207 		memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
208 		mxs->cmdlen = xs->cmdlen;
209 		mxs->data = xs->data;
210 		mxs->datalen = xs->datalen;
211 		mxs->retries = xs->retries;
212 		mxs->timeout = xs->timeout;
213 		mxs->bp = xs->bp;
214 
215 		scsi_xs_sync(mxs);
216 
217 		xs->error = mxs->error;
218 		xs->status = mxs->status;
219 		xs->resid = mxs->resid;
220 
221 		memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
222 
223 		scsi_xs_put(mxs);
224 		scsi_done(xs);
225 		return;
226 	}
227 
228 	mtx_enter(&d->d_mtx);
229 	SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
230 	p = mpath_next_path(d);
231 	mtx_leave(&d->d_mtx);
232 
233 	if (p != NULL)
234 		scsi_xsh_add(&p->p_xsh);
235 }
236 
237 void
238 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
239 {
240 	struct mpath_dev *d = p->p_group->g_dev;
241 	struct scsi_xfer *xs;
242 	int addxsh = 0;
243 
244 	if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
245 		goto fail;
246 
247 	mtx_enter(&d->d_mtx);
248 	xs = SIMPLEQ_FIRST(&d->d_xfers);
249 	if (xs != NULL) {
250 		SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
251 		if (!SIMPLEQ_EMPTY(&d->d_xfers))
252 			addxsh = 1;
253 	}
254 	mtx_leave(&d->d_mtx);
255 
256 	if (xs == NULL)
257 		goto fail;
258 
259 	memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
260 	mxs->cmdlen = xs->cmdlen;
261 	mxs->data = xs->data;
262 	mxs->datalen = xs->datalen;
263 	mxs->retries = xs->retries;
264 	mxs->timeout = xs->timeout;
265 	mxs->bp = xs->bp;
266 	mxs->flags = xs->flags;
267 
268 	mxs->cookie = xs;
269 	mxs->done = mpath_done;
270 
271 	scsi_xs_exec(mxs);
272 
273 	if (addxsh)
274 		scsi_xsh_add(&p->p_xsh);
275 
276 	return;
277 fail:
278 	scsi_xs_put(mxs);
279 }
280 
281 void
282 mpath_done(struct scsi_xfer *mxs)
283 {
284 	struct scsi_xfer *xs = mxs->cookie;
285 	struct scsi_link *link = xs->sc_link;
286 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
287 	struct mpath_dev *d = sc->sc_devs[link->target];
288 	struct mpath_path *p;
289 
290 	switch (mxs->error) {
291 	case XS_SELTIMEOUT: /* physical path is gone, try the next */
292 	case XS_RESET:
293 		mtx_enter(&d->d_mtx);
294 		SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
295 		p = mpath_next_path(d);
296 		mtx_leave(&d->d_mtx);
297 
298 		scsi_xs_put(mxs);
299 
300 		if (p != NULL)
301 			scsi_xsh_add(&p->p_xsh);
302 		return;
303 	case XS_SENSE:
304 		switch (d->d_ops->op_checksense(mxs)) {
305 		case MPATH_SENSE_FAILOVER:
306 			mtx_enter(&d->d_mtx);
307 			SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
308 			p = mpath_next_path(d);
309 			mtx_leave(&d->d_mtx);
310 
311 			scsi_xs_put(mxs);
312 
313 			mpath_failover(d);
314 			return;
315 		case MPATH_SENSE_DECLINED:
316 			break;
317 #ifdef DIAGNOSTIC
318 		default:
319 			panic("unexpected return from checksense");
320 #endif /* DIAGNOSTIC */
321 		}
322 		break;
323 	}
324 
325 	xs->error = mxs->error;
326 	xs->status = mxs->status;
327 	xs->resid = mxs->resid;
328 
329 	memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
330 
331 	scsi_xs_put(mxs);
332 
333 	scsi_done(xs);
334 }
335 
336 void
337 mpath_failover(struct mpath_dev *d)
338 {
339 	if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
340 		return;
341 
342 	mpath_failover_start(d);
343 }
344 
345 void
346 mpath_failover_start(void *xd)
347 {
348 	struct mpath_dev *d = xd;
349 
350 	mtx_enter(&d->d_mtx);
351 	d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
352 	mtx_leave(&d->d_mtx);
353 
354 	mpath_failover_check(d);
355 }
356 
357 void
358 mpath_failover_check(struct mpath_dev *d)
359 {
360 	struct mpath_group *g = d->d_failover_iter;
361 	struct mpath_path *p;
362 
363 	if (g == NULL)
364 		timeout_add_sec(&d->d_failover_tmo, 1);
365 	else {
366 		p = TAILQ_FIRST(&g->g_paths);
367 		d->d_ops->op_status(p->p_link);
368 	}
369 }
370 
371 void
372 mpath_path_status(struct mpath_path *p, int status)
373 {
374 	struct mpath_group *g = p->p_group;
375 	struct mpath_dev *d = g->g_dev;
376 
377 	mtx_enter(&d->d_mtx);
378 	if (status == MPATH_S_ACTIVE) {
379 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
380 		TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
381 		d->d_next_path = p;
382 	} else
383 		d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
384 	mtx_leave(&d->d_mtx);
385 
386 	if (status == MPATH_S_ACTIVE) {
387 		scsi_xsh_add(&p->p_xsh);
388 		if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
389 			mpath_failover_start(d);
390 	} else
391 		mpath_failover_check(d);
392 }
393 
394 void
395 mpath_minphys(struct buf *bp, struct scsi_link *link)
396 {
397 	struct mpath_softc *sc = link->bus->sb_adapter_softc;
398 	struct mpath_dev *d = sc->sc_devs[link->target];
399 	struct mpath_group *g;
400 	struct mpath_path *p;
401 
402 #ifdef DIAGNOSTIC
403 	if (d == NULL)
404 		panic("mpath_minphys against nonexistent device");
405 #endif /* DIAGNOSTIC */
406 
407 	mtx_enter(&d->d_mtx);
408 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
409 		TAILQ_FOREACH(p, &g->g_paths, p_entry) {
410 			/* XXX crossing layers with mutex held */
411 			if (p->p_link->bus->sb_adapter->dev_minphys != NULL)
412 				p->p_link->bus->sb_adapter->dev_minphys(bp,
413 				    p->p_link);
414 		}
415 	}
416 	mtx_leave(&d->d_mtx);
417 }
418 
419 int
420 mpath_path_probe(struct scsi_link *link)
421 {
422 	if (mpath == NULL)
423 		return (ENXIO);
424 
425 	if (link->id == NULL)
426 		return (EINVAL);
427 
428 	if (ISSET(link->flags, SDEV_UMASS))
429 		return (EINVAL);
430 
431 	if (mpath == link->bus->sb_adapter_softc)
432 		return (ENXIO);
433 
434 	return (0);
435 }
436 
437 int
438 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
439 {
440 	struct mpath_softc *sc = mpath;
441 	struct scsi_link *link = p->p_link;
442 	struct mpath_dev *d = NULL;
443 	struct mpath_group *g;
444 	int newdev = 0, addxsh = 0;
445 	int target;
446 
447 #ifdef DIAGNOSTIC
448 	if (p->p_link == NULL)
449 		panic("mpath_path_attach: NULL link");
450 	if (p->p_group != NULL)
451 		panic("mpath_path_attach: group is not NULL");
452 #endif /* DIAGNOSTIC */
453 
454 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
455 		if ((d = sc->sc_devs[target]) == NULL)
456 			continue;
457 
458 		if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
459 			break;
460 
461 		d = NULL;
462 	}
463 
464 	if (d == NULL) {
465 		for (target = 0; target < MPATH_BUSWIDTH; target++) {
466 			if (sc->sc_devs[target] == NULL)
467 				break;
468 		}
469 		if (target >= MPATH_BUSWIDTH)
470 			return (ENXIO);
471 
472 		d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
473 		if (d == NULL)
474 			return (ENOMEM);
475 
476 		mtx_init(&d->d_mtx, IPL_BIO);
477 		TAILQ_INIT(&d->d_groups);
478 		SIMPLEQ_INIT(&d->d_xfers);
479 		d->d_id = devid_copy(link->id);
480 		d->d_ops = ops;
481 
482 		timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
483 
484 		sc->sc_devs[target] = d;
485 		newdev = 1;
486 	} else {
487 		/*
488 		 * instead of carrying identical values in different devid
489 		 * instances, delete the new one and reference the old one in
490 		 * the new scsi_link.
491 		 */
492 		devid_free(link->id);
493 		link->id = devid_copy(d->d_id);
494 	}
495 
496 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
497 		if (g->g_id == g_id)
498 			break;
499 	}
500 
501 	if (g == NULL) {
502 		g = malloc(sizeof(*g),  M_DEVBUF,
503 		    M_WAITOK | M_CANFAIL | M_ZERO);
504 		if (g == NULL) {
505 			if (newdev) {
506 				free(d, M_DEVBUF, sizeof(*d));
507 				sc->sc_devs[target] = NULL;
508 			}
509 
510 			return (ENOMEM);
511 		}
512 
513 		TAILQ_INIT(&g->g_paths);
514 		g->g_dev = d;
515 		g->g_id = g_id;
516 
517 		mtx_enter(&d->d_mtx);
518 		TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
519 		mtx_leave(&d->d_mtx);
520 	}
521 
522 	p->p_group = g;
523 
524 	mtx_enter(&d->d_mtx);
525 	TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
526 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
527 		addxsh = 1;
528 
529 	if (d->d_next_path == NULL)
530 		d->d_next_path = p;
531 	mtx_leave(&d->d_mtx);
532 
533 	if (newdev)
534 		scsi_probe_target(mpath->sc_scsibus, target);
535 	else if (addxsh)
536 		scsi_xsh_add(&p->p_xsh);
537 
538 	return (0);
539 }
540 
541 int
542 mpath_path_detach(struct mpath_path *p)
543 {
544 	struct mpath_group *g = p->p_group;
545 	struct mpath_dev *d;
546 	struct mpath_path *np = NULL;
547 
548 #ifdef DIAGNOSTIC
549 	if (g == NULL)
550 		panic("mpath: detaching a path from a nonexistent bus");
551 #endif /* DIAGNOSTIC */
552 	d = g->g_dev;
553 	p->p_group = NULL;
554 
555 	mtx_enter(&d->d_mtx);
556 	TAILQ_REMOVE(&g->g_paths, p, p_entry);
557 	if (d->d_next_path == p)
558 		d->d_next_path = TAILQ_FIRST(&g->g_paths);
559 
560 	if (TAILQ_EMPTY(&g->g_paths))
561 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
562 	else
563 		g = NULL;
564 
565 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
566 		np = d->d_next_path;
567 	mtx_leave(&d->d_mtx);
568 
569 	if (g != NULL)
570 		free(g, M_DEVBUF, sizeof(*g));
571 
572 	scsi_xsh_del(&p->p_xsh);
573 
574 	if (np == NULL)
575 		mpath_failover(d);
576 	else
577 		scsi_xsh_add(&np->p_xsh);
578 
579 	return (0);
580 }
581 
582 struct device *
583 mpath_bootdv(struct device *dev)
584 {
585 	struct mpath_softc *sc = mpath;
586 	struct mpath_dev *d;
587 	struct mpath_group *g;
588 	struct mpath_path *p;
589 	int target;
590 
591 	if (sc == NULL)
592 		return (dev);
593 
594 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
595 		if ((d = sc->sc_devs[target]) == NULL)
596 			continue;
597 
598 		TAILQ_FOREACH(g, &d->d_groups, g_entry) {
599 			TAILQ_FOREACH(p, &g->g_paths, p_entry) {
600 				if (p->p_link->device_softc == dev) {
601 					return (scsi_get_link(mpath->sc_scsibus,
602 					    target, 0)->device_softc);
603 				}
604 			}
605 		}
606 	}
607 
608 	return (dev);
609 }
610