xref: /openbsd/sys/dev/softraid_raid1.c (revision ceed1c06)
1 /* $OpenBSD: softraid_raid1.c,v 1.48 2013/03/31 13:31:44 jsing Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/proc.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/disklabel.h>
33 #include <sys/mount.h>
34 #include <sys/sensors.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 #include <dev/rndvar.h>
45 
46 /* RAID 1 functions. */
47 int	sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
48 	    int, int64_t);
49 int	sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
50 	    int, void *);
51 int	sr_raid1_init(struct sr_discipline *sd);
52 int	sr_raid1_rw(struct sr_workunit *);
53 void	sr_raid1_intr(struct buf *);
54 void	sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
55 void	sr_raid1_set_vol_state(struct sr_discipline *);
56 
57 /* Discipline initialisation. */
58 void
59 sr_raid1_discipline_init(struct sr_discipline *sd)
60 {
61 	/* Fill out discipline members. */
62 	sd->sd_type = SR_MD_RAID1;
63 	strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
64 	sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
65 	    SR_CAP_REBUILD | SR_CAP_REDUNDANT;
66 	sd->sd_max_wu = SR_RAID1_NOWU;
67 
68 	/* Setup discipline specific function pointers. */
69 	sd->sd_assemble = sr_raid1_assemble;
70 	sd->sd_create = sr_raid1_create;
71 	sd->sd_scsi_rw = sr_raid1_rw;
72 	sd->sd_scsi_intr = sr_raid1_intr;
73 	sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
74 	sd->sd_set_vol_state = sr_raid1_set_vol_state;
75 }
76 
77 int
78 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
79     int no_chunk, int64_t coerced_size)
80 {
81 
82 	if (no_chunk < 2) {
83 		sr_error(sd->sd_sc, "RAID 1 requires two or more chunks");
84 		return EINVAL;
85 	}
86 
87 	sd->sd_meta->ssdi.ssd_size = coerced_size;
88 
89 	return sr_raid1_init(sd);
90 }
91 
92 int
93 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
94     int no_chunk, void *data)
95 {
96 	return sr_raid1_init(sd);
97 }
98 
99 int
100 sr_raid1_init(struct sr_discipline *sd)
101 {
102 	sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
103 
104 	return 0;
105 }
106 
107 void
108 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
109 {
110 	int			old_state, s;
111 
112 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
113 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
114 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
115 
116 	/* ok to go to splbio since this only happens in error path */
117 	s = splbio();
118 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
119 
120 	/* multiple IOs to the same chunk that fail will come through here */
121 	if (old_state == new_state)
122 		goto done;
123 
124 	switch (old_state) {
125 	case BIOC_SDONLINE:
126 		switch (new_state) {
127 		case BIOC_SDOFFLINE:
128 		case BIOC_SDSCRUB:
129 			break;
130 		default:
131 			goto die;
132 		}
133 		break;
134 
135 	case BIOC_SDOFFLINE:
136 		switch (new_state) {
137 		case BIOC_SDREBUILD:
138 		case BIOC_SDHOTSPARE:
139 			break;
140 		default:
141 			goto die;
142 		}
143 		break;
144 
145 	case BIOC_SDSCRUB:
146 		if (new_state == BIOC_SDONLINE) {
147 			;
148 		} else
149 			goto die;
150 		break;
151 
152 	case BIOC_SDREBUILD:
153 		switch (new_state) {
154 		case BIOC_SDONLINE:
155 			break;
156 		case BIOC_SDOFFLINE:
157 			/* Abort rebuild since the rebuild chunk disappeared. */
158 			sd->sd_reb_abort = 1;
159 			break;
160 		default:
161 			goto die;
162 		}
163 		break;
164 
165 	case BIOC_SDHOTSPARE:
166 		switch (new_state) {
167 		case BIOC_SDOFFLINE:
168 		case BIOC_SDREBUILD:
169 			break;
170 		default:
171 			goto die;
172 		}
173 		break;
174 
175 	default:
176 die:
177 		splx(s); /* XXX */
178 		panic("%s: %s: %s: invalid chunk state transition "
179 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
180 		    sd->sd_meta->ssd_devname,
181 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
182 		    old_state, new_state);
183 		/* NOTREACHED */
184 	}
185 
186 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
187 	sd->sd_set_vol_state(sd);
188 
189 	sd->sd_must_flush = 1;
190 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
191 done:
192 	splx(s);
193 }
194 
195 void
196 sr_raid1_set_vol_state(struct sr_discipline *sd)
197 {
198 	int			states[SR_MAX_STATES];
199 	int			new_state, i, s, nd;
200 	int			old_state = sd->sd_vol_status;
201 
202 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
203 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
204 
205 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
206 
207 #ifdef SR_DEBUG
208 	for (i = 0; i < nd; i++)
209 		DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
210 		    DEVNAME(sd->sd_sc), i,
211 		    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
212 #endif
213 
214 	for (i = 0; i < SR_MAX_STATES; i++)
215 		states[i] = 0;
216 
217 	for (i = 0; i < nd; i++) {
218 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
219 		if (s >= SR_MAX_STATES)
220 			panic("%s: %s: %s: invalid chunk state",
221 			    DEVNAME(sd->sd_sc),
222 			    sd->sd_meta->ssd_devname,
223 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
224 		states[s]++;
225 	}
226 
227 	if (states[BIOC_SDONLINE] == nd)
228 		new_state = BIOC_SVONLINE;
229 	else if (states[BIOC_SDONLINE] == 0)
230 		new_state = BIOC_SVOFFLINE;
231 	else if (states[BIOC_SDSCRUB] != 0)
232 		new_state = BIOC_SVSCRUB;
233 	else if (states[BIOC_SDREBUILD] != 0)
234 		new_state = BIOC_SVREBUILD;
235 	else if (states[BIOC_SDOFFLINE] != 0)
236 		new_state = BIOC_SVDEGRADED;
237 	else {
238 		DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
239 		    "was %d\n", DEVNAME(sd->sd_sc), old_state);
240 		panic("invalid volume state");
241 	}
242 
243 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
244 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
245 	    old_state, new_state);
246 
247 	switch (old_state) {
248 	case BIOC_SVONLINE:
249 		switch (new_state) {
250 		case BIOC_SVONLINE: /* can go to same state */
251 		case BIOC_SVOFFLINE:
252 		case BIOC_SVDEGRADED:
253 		case BIOC_SVREBUILD: /* happens on boot */
254 			break;
255 		default:
256 			goto die;
257 		}
258 		break;
259 
260 	case BIOC_SVOFFLINE:
261 		/* XXX this might be a little too much */
262 		goto die;
263 
264 	case BIOC_SVSCRUB:
265 		switch (new_state) {
266 		case BIOC_SVONLINE:
267 		case BIOC_SVOFFLINE:
268 		case BIOC_SVDEGRADED:
269 		case BIOC_SVSCRUB: /* can go to same state */
270 			break;
271 		default:
272 			goto die;
273 		}
274 		break;
275 
276 	case BIOC_SVBUILDING:
277 		switch (new_state) {
278 		case BIOC_SVONLINE:
279 		case BIOC_SVOFFLINE:
280 		case BIOC_SVBUILDING: /* can go to the same state */
281 			break;
282 		default:
283 			goto die;
284 		}
285 		break;
286 
287 	case BIOC_SVREBUILD:
288 		switch (new_state) {
289 		case BIOC_SVONLINE:
290 		case BIOC_SVOFFLINE:
291 		case BIOC_SVDEGRADED:
292 		case BIOC_SVREBUILD: /* can go to the same state */
293 			break;
294 		default:
295 			goto die;
296 		}
297 		break;
298 
299 	case BIOC_SVDEGRADED:
300 		switch (new_state) {
301 		case BIOC_SVOFFLINE:
302 		case BIOC_SVREBUILD:
303 		case BIOC_SVDEGRADED: /* can go to the same state */
304 			break;
305 		default:
306 			goto die;
307 		}
308 		break;
309 
310 	default:
311 die:
312 		panic("%s: %s: invalid volume state transition "
313 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
314 		    sd->sd_meta->ssd_devname,
315 		    old_state, new_state);
316 		/* NOTREACHED */
317 	}
318 
319 	sd->sd_vol_status = new_state;
320 
321 	/* If we have just become degraded, look for a hotspare. */
322 	if (new_state == BIOC_SVDEGRADED)
323 		workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL);
324 }
325 
326 int
327 sr_raid1_rw(struct sr_workunit *wu)
328 {
329 	struct sr_discipline	*sd = wu->swu_dis;
330 	struct scsi_xfer	*xs = wu->swu_xs;
331 	struct sr_ccb		*ccb;
332 	struct sr_chunk		*scp;
333 	int			ios, chunk, i, s, rt;
334 	daddr64_t		blk;
335 
336 	/* blk and scsi error will be handled by sr_validate_io */
337 	if (sr_validate_io(wu, &blk, "sr_raid1_rw"))
338 		goto bad;
339 
340 	/* calculate physical block */
341 	blk += sd->sd_meta->ssd_data_offset;
342 
343 	if (xs->flags & SCSI_DATA_IN)
344 		ios = 1;
345 	else
346 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
347 
348 	for (i = 0; i < ios; i++) {
349 		if (xs->flags & SCSI_DATA_IN) {
350 			rt = 0;
351 ragain:
352 			/* interleave reads */
353 			chunk = sd->mds.mdd_raid1.sr1_counter++ %
354 			    sd->sd_meta->ssdi.ssd_chunk_no;
355 			scp = sd->sd_vol.sv_chunks[chunk];
356 			switch (scp->src_meta.scm_status) {
357 			case BIOC_SDONLINE:
358 			case BIOC_SDSCRUB:
359 				break;
360 
361 			case BIOC_SDOFFLINE:
362 			case BIOC_SDREBUILD:
363 			case BIOC_SDHOTSPARE:
364 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
365 					goto ragain;
366 
367 				/* FALLTHROUGH */
368 			default:
369 				/* volume offline */
370 				printf("%s: is offline, cannot read\n",
371 				    DEVNAME(sd->sd_sc));
372 				goto bad;
373 			}
374 		} else {
375 			/* writes go on all working disks */
376 			chunk = i;
377 			scp = sd->sd_vol.sv_chunks[chunk];
378 			switch (scp->src_meta.scm_status) {
379 			case BIOC_SDONLINE:
380 			case BIOC_SDSCRUB:
381 			case BIOC_SDREBUILD:
382 				break;
383 
384 			case BIOC_SDHOTSPARE: /* should never happen */
385 			case BIOC_SDOFFLINE:
386 				continue;
387 
388 			default:
389 				goto bad;
390 			}
391 		}
392 
393 		ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data,
394 		    xs->flags, 0);
395 		if (!ccb) {
396 			/* should never happen but handle more gracefully */
397 			printf("%s: %s: too many ccbs queued\n",
398 			    DEVNAME(sd->sd_sc),
399 			    sd->sd_meta->ssd_devname);
400 			goto bad;
401 		}
402 		sr_wu_enqueue_ccb(wu, ccb);
403 	}
404 
405 	s = splbio();
406 
407 	/* rebuild io, let rebuild routine deal with it */
408 	if (wu->swu_flags & SR_WUF_REBUILD)
409 		goto queued;
410 
411 	/* current io failed, restart */
412 	if (wu->swu_state == SR_WU_RESTART)
413 		goto start;
414 
415 	/* deferred io failed, don't restart */
416 	if (wu->swu_state == SR_WU_REQUEUE)
417 		goto queued;
418 
419 	if (sr_check_io_collision(wu))
420 		goto queued;
421 
422 start:
423 	sr_raid_startwu(wu);
424 queued:
425 	splx(s);
426 	return (0);
427 bad:
428 	/* wu is unwound by sr_wu_put */
429 	return (1);
430 }
431 
432 void
433 sr_raid1_intr(struct buf *bp)
434 {
435 	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
436 	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
437 	struct sr_discipline	*sd = wu->swu_dis;
438 	struct scsi_xfer	*xs = wu->swu_xs;
439 	struct sr_softc		*sc = sd->sd_sc;
440 	int			s;
441 
442 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
443 	    DEVNAME(sc), bp, xs);
444 
445 	s = splbio();
446 
447 	sr_ccb_done(ccb);
448 
449 	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
450 	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
451 	    wu->swu_ios_failed);
452 
453 	if (wu->swu_ios_complete < wu->swu_io_count)
454 		goto done;
455 
456 	xs->error = XS_NOERROR;
457 
458 	/* if all ios failed, retry reads and give up on writes */
459 	if (wu->swu_ios_failed == wu->swu_ios_complete) {
460 		if (xs->flags & SCSI_DATA_IN) {
461 			printf("%s: retrying read on block %lld\n",
462 			    DEVNAME(sc), ccb->ccb_buf.b_blkno);
463 			if (wu->swu_cb_active == 1)
464 				panic("%s: sr_raid1_intr_cb",
465 				    DEVNAME(sd->sd_sc));
466 			sr_wu_release_ccbs(wu);
467 			wu->swu_state = SR_WU_RESTART;
468 			if (sd->sd_scsi_rw(wu) == 0)
469 				goto done;
470 			xs->error = XS_DRIVER_STUFFUP;
471 		} else {
472 			printf("%s: permanently failing write on block %lld\n",
473 			    DEVNAME(sc), ccb->ccb_buf.b_blkno);
474 			xs->error = XS_DRIVER_STUFFUP;
475 		}
476 	}
477 
478 	TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link)
479 		if (wu == wup)
480 			break;
481 
482 	if (wup == NULL)
483 		panic("%s: wu %p not on pending queue",
484 		    DEVNAME(sd->sd_sc), wu);
485 
486 	/* wu on pendq, remove */
487 	TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
488 
489 	if (wu->swu_collider) {
490 		if (wu->swu_ios_failed)
491 			sr_raid_recreate_wu(wu->swu_collider);
492 
493 		/* XXX Should the collider be failed if this xs failed? */
494 		/* restart deferred wu */
495 		wu->swu_collider->swu_state = SR_WU_INPROGRESS;
496 		TAILQ_REMOVE(&sd->sd_wu_defq, wu->swu_collider, swu_link);
497 		sr_raid_startwu(wu->swu_collider);
498 	}
499 
500 	if (wu->swu_flags & SR_WUF_REBUILD)
501 		wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
502 	if (wu->swu_flags & SR_WUF_WAKEUP)
503 		wakeup(wu);
504 	if (!(wu->swu_flags & SR_WUF_REBUILD))
505 		sr_scsi_done(sd, xs);
506 
507 done:
508 	splx(s);
509 }
510