xref: /openbsd/sys/scsi/mpath.c (revision 8529ddd3)
1 /*	$OpenBSD: mpath.c,v 1.39 2015/03/14 03:38:52 jsg Exp $ */
2 
3 /*
4  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/device.h>
24 #include <sys/conf.h>
25 #include <sys/queue.h>
26 #include <sys/rwlock.h>
27 #include <sys/ioctl.h>
28 #include <sys/poll.h>
29 #include <sys/selinfo.h>
30 
31 #include <scsi/scsi_all.h>
32 #include <scsi/scsiconf.h>
33 #include <scsi/mpathvar.h>
34 
35 #define MPATH_BUSWIDTH 256
36 
37 int		mpath_match(struct device *, void *, void *);
38 void		mpath_attach(struct device *, struct device *, void *);
39 void		mpath_shutdown(void *);
40 
41 TAILQ_HEAD(mpath_paths, mpath_path);
42 
43 struct mpath_group {
44 	TAILQ_ENTRY(mpath_group) g_entry;
45 	struct mpath_paths	 g_paths;
46 	struct mpath_dev	*g_dev;
47 	u_int			 g_id;
48 };
49 TAILQ_HEAD(mpath_groups, mpath_group);
50 
51 struct mpath_dev {
52 	struct mutex		 d_mtx;
53 
54 	struct scsi_xfer_list	 d_xfers;
55 	struct mpath_path	*d_next_path;
56 
57 	struct mpath_groups	 d_groups;
58 
59 	struct mpath_group	*d_failover_iter;
60 	struct timeout		 d_failover_tmo;
61 	u_int			 d_failover;
62 
63 	const struct mpath_ops	*d_ops;
64 	struct devid		*d_id;
65 };
66 
67 struct mpath_softc {
68 	struct device		sc_dev;
69 	struct scsi_link	sc_link;
70 	struct scsibus_softc	*sc_scsibus;
71 	struct mpath_dev	*sc_devs[MPATH_BUSWIDTH];
72 };
73 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
74 
75 struct mpath_softc	*mpath;
76 
77 struct cfattach mpath_ca = {
78 	sizeof(struct mpath_softc),
79 	mpath_match,
80 	mpath_attach
81 };
82 
83 struct cfdriver mpath_cd = {
84 	NULL,
85 	"mpath",
86 	DV_DULL
87 };
88 
89 void		mpath_cmd(struct scsi_xfer *);
90 void		mpath_minphys(struct buf *, struct scsi_link *);
91 int		mpath_probe(struct scsi_link *);
92 
93 struct mpath_path *mpath_next_path(struct mpath_dev *);
94 void		mpath_done(struct scsi_xfer *);
95 
96 void		mpath_failover(struct mpath_dev *);
97 void		mpath_failover_start(void *);
98 void		mpath_failover_check(struct mpath_dev *);
99 
100 struct scsi_adapter mpath_switch = {
101 	mpath_cmd,
102 	scsi_minphys,
103 	mpath_probe
104 };
105 
106 void		mpath_xs_stuffup(struct scsi_xfer *);
107 
108 int
109 mpath_match(struct device *parent, void *match, void *aux)
110 {
111 	return (1);
112 }
113 
114 void
115 mpath_attach(struct device *parent, struct device *self, void *aux)
116 {
117 	struct mpath_softc		*sc = (struct mpath_softc *)self;
118 	struct scsibus_attach_args	saa;
119 
120 	mpath = sc;
121 
122 	printf("\n");
123 
124 	sc->sc_link.adapter = &mpath_switch;
125 	sc->sc_link.adapter_softc = sc;
126 	sc->sc_link.adapter_target = MPATH_BUSWIDTH;
127 	sc->sc_link.adapter_buswidth = MPATH_BUSWIDTH;
128 	sc->sc_link.luns = 1;
129 	sc->sc_link.openings = 1024; /* XXX magical */
130 
131 	bzero(&saa, sizeof(saa));
132 	saa.saa_sc_link = &sc->sc_link;
133 
134 	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
135 	    &saa, scsiprint);
136 }
137 
138 void
139 mpath_xs_stuffup(struct scsi_xfer *xs)
140 {
141 	xs->error = XS_DRIVER_STUFFUP;
142 	scsi_done(xs);
143 }
144 
145 int
146 mpath_probe(struct scsi_link *link)
147 {
148 	struct mpath_softc *sc = link->adapter_softc;
149 	struct mpath_dev *d = sc->sc_devs[link->target];
150 
151 	if (link->lun != 0 || d == NULL)
152 		return (ENXIO);
153 
154 	link->id = devid_copy(d->d_id);
155 
156 	return (0);
157 }
158 
159 struct mpath_path *
160 mpath_next_path(struct mpath_dev *d)
161 {
162 	struct mpath_group *g;
163 	struct mpath_path *p;
164 
165 #ifdef DIAGNOSTIC
166 	if (d == NULL)
167 		panic("%s: d is NULL", __func__);
168 #endif
169 
170 	p = d->d_next_path;
171 	if (p != NULL) {
172 		d->d_next_path = TAILQ_NEXT(p, p_entry);
173 		if (d->d_next_path == NULL &&
174 		    (g = TAILQ_FIRST(&d->d_groups)) != NULL)
175 			d->d_next_path = TAILQ_FIRST(&g->g_paths);
176 	}
177 
178 	return (p);
179 }
180 
181 void
182 mpath_cmd(struct scsi_xfer *xs)
183 {
184 	struct scsi_link *link = xs->sc_link;
185 	struct mpath_softc *sc = link->adapter_softc;
186 	struct mpath_dev *d = sc->sc_devs[link->target];
187 	struct mpath_path *p;
188 	struct scsi_xfer *mxs;
189 
190 #ifdef DIAGNOSTIC
191 	if (d == NULL)
192 		panic("mpath_cmd issued against nonexistant device");
193 #endif
194 
195 	if (ISSET(xs->flags, SCSI_POLL)) {
196 		mtx_enter(&d->d_mtx);
197 		p = mpath_next_path(d);
198 		mtx_leave(&d->d_mtx);
199 		if (p == NULL) {
200 			mpath_xs_stuffup(xs);
201 			return;
202 		}
203 
204 		mxs = scsi_xs_get(p->p_link, xs->flags);
205 		if (mxs == NULL) {
206 			mpath_xs_stuffup(xs);
207 			return;
208 		}
209 
210 		memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
211 		mxs->cmdlen = xs->cmdlen;
212 		mxs->data = xs->data;
213 		mxs->datalen = xs->datalen;
214 		mxs->retries = xs->retries;
215 		mxs->timeout = xs->timeout;
216 		mxs->bp = xs->bp;
217 
218 		scsi_xs_sync(mxs);
219 
220 		xs->error = mxs->error;
221 		xs->status = mxs->status;
222 		xs->resid = mxs->resid;
223 
224 		memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
225 
226 		scsi_xs_put(mxs);
227 		scsi_done(xs);
228 		return;
229 	}
230 
231 	mtx_enter(&d->d_mtx);
232 	SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
233 	p = mpath_next_path(d);
234 	mtx_leave(&d->d_mtx);
235 
236 	if (p != NULL)
237 		scsi_xsh_add(&p->p_xsh);
238 }
239 
240 void
241 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
242 {
243 	struct mpath_dev *d = p->p_group->g_dev;
244 	struct scsi_xfer *xs;
245 	int addxsh = 0;
246 
247 	if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
248 		goto fail;
249 
250 	mtx_enter(&d->d_mtx);
251 	xs = SIMPLEQ_FIRST(&d->d_xfers);
252 	if (xs != NULL) {
253 		SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
254 		if (!SIMPLEQ_EMPTY(&d->d_xfers))
255 			addxsh = 1;
256 	}
257 	mtx_leave(&d->d_mtx);
258 
259 	if (xs == NULL)
260 		goto fail;
261 
262 	memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
263 	mxs->cmdlen = xs->cmdlen;
264 	mxs->data = xs->data;
265 	mxs->datalen = xs->datalen;
266 	mxs->retries = xs->retries;
267 	mxs->timeout = xs->timeout;
268 	mxs->bp = xs->bp;
269 	mxs->flags = xs->flags;
270 
271 	mxs->cookie = xs;
272 	mxs->done = mpath_done;
273 
274 	scsi_xs_exec(mxs);
275 
276 	if (addxsh)
277 		scsi_xsh_add(&p->p_xsh);
278 
279 	return;
280 fail:
281 	scsi_xs_put(mxs);
282 }
283 
284 void
285 mpath_done(struct scsi_xfer *mxs)
286 {
287 	struct scsi_xfer *xs = mxs->cookie;
288 	struct scsi_link *link = xs->sc_link;
289 	struct mpath_softc *sc = link->adapter_softc;
290 	struct mpath_dev *d = sc->sc_devs[link->target];
291 	struct mpath_path *p;
292 
293 	switch (mxs->error) {
294 	case XS_SELTIMEOUT: /* physical path is gone, try the next */
295 	case XS_RESET:
296 		mtx_enter(&d->d_mtx);
297 		SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
298 		p = mpath_next_path(d);
299 		mtx_leave(&d->d_mtx);
300 
301 		scsi_xs_put(mxs);
302 
303 		if (p != NULL)
304 			scsi_xsh_add(&p->p_xsh);
305 		return;
306 	case XS_SENSE:
307 		switch (d->d_ops->op_checksense(mxs)) {
308 		case MPATH_SENSE_FAILOVER:
309 			mtx_enter(&d->d_mtx);
310 			SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
311 			p = mpath_next_path(d);
312 			mtx_leave(&d->d_mtx);
313 
314 			scsi_xs_put(mxs);
315 
316 			mpath_failover(d);
317 			return;
318 		case MPATH_SENSE_DECLINED:
319 			break;
320 #ifdef DIAGNOSTIC
321 		default:
322 			panic("unexpected return from checksense");
323 #endif
324 		}
325 		break;
326 	}
327 
328 	xs->error = mxs->error;
329 	xs->status = mxs->status;
330 	xs->resid = mxs->resid;
331 
332 	memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
333 
334 	scsi_xs_put(mxs);
335 
336 	scsi_done(xs);
337 }
338 
339 void
340 mpath_failover(struct mpath_dev *d)
341 {
342 	if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
343 		return;
344 
345 	mpath_failover_start(d);
346 }
347 
348 void
349 mpath_failover_start(void *xd)
350 {
351 	struct mpath_dev *d = xd;
352 
353 	mtx_enter(&d->d_mtx);
354 	d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
355 	mtx_leave(&d->d_mtx);
356 
357 	mpath_failover_check(d);
358 }
359 
360 void
361 mpath_failover_check(struct mpath_dev *d)
362 {
363 	struct mpath_group *g = d->d_failover_iter;
364 	struct mpath_path *p;
365 
366 	if (g == NULL)
367 		timeout_add_sec(&d->d_failover_tmo, 1);
368 	else {
369 		p = TAILQ_FIRST(&g->g_paths);
370 		d->d_ops->op_status(p->p_link);
371 	}
372 }
373 
374 void
375 mpath_path_status(struct mpath_path *p, int status)
376 {
377 	struct mpath_group *g = p->p_group;
378 	struct mpath_dev *d = g->g_dev;
379 
380 	mtx_enter(&d->d_mtx);
381 	if (status == MPATH_S_ACTIVE) {
382 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
383 		TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
384 		d->d_next_path = p;
385 	} else
386 		d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
387 	mtx_leave(&d->d_mtx);
388 
389 	if (status == MPATH_S_ACTIVE) {
390 		scsi_xsh_add(&p->p_xsh);
391 		if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
392 			mpath_failover_start(d);
393 	} else
394 		mpath_failover_check(d);
395 }
396 
397 void
398 mpath_minphys(struct buf *bp, struct scsi_link *link)
399 {
400 	struct mpath_softc *sc = link->adapter_softc;
401 	struct mpath_dev *d = sc->sc_devs[link->target];
402 	struct mpath_group *g;
403 	struct mpath_path *p;
404 
405 #ifdef DIAGNOSTIC
406 	if (d == NULL)
407 		panic("mpath_minphys against nonexistant device");
408 #endif
409 
410 	mtx_enter(&d->d_mtx);
411 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
412 		TAILQ_FOREACH(p, &g->g_paths, p_entry) {
413 			/* XXX crossing layers with mutex held */
414 			p->p_link->adapter->scsi_minphys(bp, p->p_link);
415 		}
416 	}
417 	mtx_leave(&d->d_mtx);
418 }
419 
420 int
421 mpath_path_probe(struct scsi_link *link)
422 {
423 	if (mpath == NULL)
424 		return (ENXIO);
425 
426 	if (link->id == NULL)
427 		return (EINVAL);
428 
429 	if (ISSET(link->flags, SDEV_UMASS))
430 		return (EINVAL);
431 
432 	if (mpath == link->adapter_softc)
433 		return (ENXIO);
434 
435 	return (0);
436 }
437 
438 int
439 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
440 {
441 	struct mpath_softc *sc = mpath;
442 	struct scsi_link *link = p->p_link;
443 	struct mpath_dev *d = NULL;
444 	struct mpath_group *g;
445 	int newdev = 0, addxsh = 0;
446 	int target;
447 
448 #ifdef DIAGNOSTIC
449 	if (p->p_link == NULL)
450 		panic("mpath_path_attach: NULL link");
451 	if (p->p_group != NULL)
452 		panic("mpath_path_attach: group is not NULL");
453 #endif
454 
455 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
456 		if ((d = sc->sc_devs[target]) == NULL)
457 			continue;
458 
459 		if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
460 			break;
461 
462 		d = NULL;
463 	}
464 
465 	if (d == NULL) {
466 		for (target = 0; target < MPATH_BUSWIDTH; target++) {
467 			if (sc->sc_devs[target] == NULL)
468 				break;
469 		}
470 		if (target >= MPATH_BUSWIDTH)
471 			return (ENXIO);
472 
473 		d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
474 		if (d == NULL)
475 			return (ENOMEM);
476 
477 		mtx_init(&d->d_mtx, IPL_BIO);
478 		TAILQ_INIT(&d->d_groups);
479 		SIMPLEQ_INIT(&d->d_xfers);
480 		d->d_id = devid_copy(link->id);
481 		d->d_ops = ops;
482 
483 		timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
484 
485 		sc->sc_devs[target] = d;
486 		newdev = 1;
487 	} else {
488 		/*
489 		 * instead of carrying identical values in different devid
490 		 * instances, delete the new one and reference the old one in
491 		 * the new scsi_link.
492 		 */
493 		devid_free(link->id);
494 		link->id = devid_copy(d->d_id);
495 	}
496 
497 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
498 		if (g->g_id == g_id)
499 			break;
500 	}
501 
502 	if (g == NULL) {
503 		g = malloc(sizeof(*g),  M_DEVBUF,
504 		    M_WAITOK | M_CANFAIL | M_ZERO);
505 		if (g == NULL) {
506 			if (newdev) {
507 				free(d, M_DEVBUF, 0);
508 				sc->sc_devs[target] = NULL;
509 			}
510 
511 			return (ENOMEM);
512 		}
513 
514 		TAILQ_INIT(&g->g_paths);
515 		g->g_dev = d;
516 		g->g_id = g_id;
517 
518 		mtx_enter(&d->d_mtx);
519 		TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
520 		mtx_leave(&d->d_mtx);
521 	}
522 
523 	p->p_group = g;
524 
525 	mtx_enter(&d->d_mtx);
526 	TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
527 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
528 		addxsh = 1;
529 
530 	if (d->d_next_path == NULL)
531 		d->d_next_path = p;
532 	mtx_leave(&d->d_mtx);
533 
534 	if (newdev)
535 		scsi_probe_target(mpath->sc_scsibus, target);
536 	else if (addxsh)
537 		scsi_xsh_add(&p->p_xsh);
538 
539 	return (0);
540 }
541 
542 int
543 mpath_path_detach(struct mpath_path *p)
544 {
545 	struct mpath_group *g = p->p_group;
546 	struct mpath_dev *d;
547 	struct mpath_path *np = NULL;
548 
549 #ifdef DIAGNOSTIC
550 	if (g == NULL)
551 		panic("mpath: detaching a path from a nonexistant bus");
552 #endif
553 	d = g->g_dev;
554 	p->p_group = NULL;
555 
556 	mtx_enter(&d->d_mtx);
557 	TAILQ_REMOVE(&g->g_paths, p, p_entry);
558 	if (d->d_next_path == p)
559 		d->d_next_path = TAILQ_FIRST(&g->g_paths);
560 
561 	if (TAILQ_EMPTY(&g->g_paths))
562 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
563 	else
564 		g = NULL;
565 
566 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
567 		np = d->d_next_path;
568 	mtx_leave(&d->d_mtx);
569 
570 	if (g != NULL)
571 		free(g, M_DEVBUF, 0);
572 
573 	scsi_xsh_del(&p->p_xsh);
574 
575 	if (np == NULL)
576 		mpath_failover(d);
577 	else
578 		scsi_xsh_add(&np->p_xsh);
579 
580 	return (0);
581 }
582 
583 struct device *
584 mpath_bootdv(struct device *dev)
585 {
586 	struct mpath_softc *sc = mpath;
587 	struct mpath_dev *d;
588 	struct mpath_group *g;
589 	struct mpath_path *p;
590 	int target;
591 
592 	if (sc == NULL)
593 		return (dev);
594 
595 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
596 		if ((d = sc->sc_devs[target]) == NULL)
597 			continue;
598 
599 		TAILQ_FOREACH(g, &d->d_groups, g_entry) {
600 			TAILQ_FOREACH(p, &g->g_paths, p_entry) {
601 				if (p->p_link->device_softc == dev) {
602 					return (scsi_get_link(mpath->sc_scsibus,
603 					    target, 0)->device_softc);
604 				}
605 			}
606 		}
607 	}
608 
609 	return (dev);
610 }
611