xref: /openbsd/sys/dev/softraid_raid1.c (revision f6d8fcae)
1 /* $OpenBSD: softraid_raid1.c,v 1.67 2021/05/16 15:12:37 deraadt Exp $ */
2 /*
3  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/malloc.h>
26 #include <sys/kernel.h>
27 #include <sys/disk.h>
28 #include <sys/rwlock.h>
29 #include <sys/queue.h>
30 #include <sys/fcntl.h>
31 #include <sys/mount.h>
32 #include <sys/sensors.h>
33 #include <sys/stat.h>
34 #include <sys/task.h>
35 #include <sys/conf.h>
36 #include <sys/uio.h>
37 
38 #include <scsi/scsi_all.h>
39 #include <scsi/scsiconf.h>
40 #include <scsi/scsi_disk.h>
41 
42 #include <dev/softraidvar.h>
43 
44 /* RAID 1 functions. */
45 int	sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
46 	    int, int64_t);
47 int	sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
48 	    int, void *);
49 int	sr_raid1_init(struct sr_discipline *sd);
50 int	sr_raid1_rw(struct sr_workunit *);
51 int	sr_raid1_wu_done(struct sr_workunit *);
52 void	sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
53 void	sr_raid1_set_vol_state(struct sr_discipline *);
54 
55 /* Discipline initialisation. */
56 void
sr_raid1_discipline_init(struct sr_discipline * sd)57 sr_raid1_discipline_init(struct sr_discipline *sd)
58 {
59 	/* Fill out discipline members. */
60 	sd->sd_type = SR_MD_RAID1;
61 	strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
62 	sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
63 	    SR_CAP_REBUILD | SR_CAP_REDUNDANT;
64 	sd->sd_max_wu = SR_RAID1_NOWU;
65 
66 	/* Setup discipline specific function pointers. */
67 	sd->sd_assemble = sr_raid1_assemble;
68 	sd->sd_create = sr_raid1_create;
69 	sd->sd_scsi_rw = sr_raid1_rw;
70 	sd->sd_scsi_wu_done = sr_raid1_wu_done;
71 	sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
72 	sd->sd_set_vol_state = sr_raid1_set_vol_state;
73 }
74 
75 int
sr_raid1_create(struct sr_discipline * sd,struct bioc_createraid * bc,int no_chunk,int64_t coerced_size)76 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
77     int no_chunk, int64_t coerced_size)
78 {
79 	if (no_chunk < 2) {
80 		sr_error(sd->sd_sc, "%s requires two or more chunks",
81 		    sd->sd_name);
82 		return EINVAL;
83 	}
84 
85 	sd->sd_meta->ssdi.ssd_size = coerced_size;
86 
87 	return sr_raid1_init(sd);
88 }
89 
90 int
sr_raid1_assemble(struct sr_discipline * sd,struct bioc_createraid * bc,int no_chunk,void * data)91 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
92     int no_chunk, void *data)
93 {
94 	return sr_raid1_init(sd);
95 }
96 
97 int
sr_raid1_init(struct sr_discipline * sd)98 sr_raid1_init(struct sr_discipline *sd)
99 {
100 	sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
101 
102 	return 0;
103 }
104 
105 void
sr_raid1_set_chunk_state(struct sr_discipline * sd,int c,int new_state)106 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
107 {
108 	int			old_state, s;
109 
110 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid1_set_chunk_state %d -> %d\n",
111 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
112 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
113 
114 	/* ok to go to splbio since this only happens in error path */
115 	s = splbio();
116 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
117 
118 	/* multiple IOs to the same chunk that fail will come through here */
119 	if (old_state == new_state)
120 		goto done;
121 
122 	switch (old_state) {
123 	case BIOC_SDONLINE:
124 		switch (new_state) {
125 		case BIOC_SDOFFLINE:
126 		case BIOC_SDSCRUB:
127 			break;
128 		default:
129 			goto die;
130 		}
131 		break;
132 
133 	case BIOC_SDOFFLINE:
134 		switch (new_state) {
135 		case BIOC_SDREBUILD:
136 		case BIOC_SDHOTSPARE:
137 			break;
138 		default:
139 			goto die;
140 		}
141 		break;
142 
143 	case BIOC_SDSCRUB:
144 		if (new_state == BIOC_SDONLINE) {
145 			;
146 		} else
147 			goto die;
148 		break;
149 
150 	case BIOC_SDREBUILD:
151 		switch (new_state) {
152 		case BIOC_SDONLINE:
153 			break;
154 		case BIOC_SDOFFLINE:
155 			/* Abort rebuild since the rebuild chunk disappeared. */
156 			sd->sd_reb_abort = 1;
157 			break;
158 		default:
159 			goto die;
160 		}
161 		break;
162 
163 	case BIOC_SDHOTSPARE:
164 		switch (new_state) {
165 		case BIOC_SDOFFLINE:
166 		case BIOC_SDREBUILD:
167 			break;
168 		default:
169 			goto die;
170 		}
171 		break;
172 
173 	default:
174 die:
175 		splx(s); /* XXX */
176 		panic("%s: %s: %s: invalid chunk state transition %d -> %d",
177 		    DEVNAME(sd->sd_sc),
178 		    sd->sd_meta->ssd_devname,
179 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
180 		    old_state, new_state);
181 		/* NOTREACHED */
182 	}
183 
184 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
185 	sd->sd_set_vol_state(sd);
186 
187 	sd->sd_must_flush = 1;
188 	task_add(systq, &sd->sd_meta_save_task);
189 done:
190 	splx(s);
191 }
192 
193 void
sr_raid1_set_vol_state(struct sr_discipline * sd)194 sr_raid1_set_vol_state(struct sr_discipline *sd)
195 {
196 	int			states[SR_MAX_STATES];
197 	int			new_state, i, s, nd;
198 	int			old_state = sd->sd_vol_status;
199 
200 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state\n",
201 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
202 
203 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
204 
205 #ifdef SR_DEBUG
206 	for (i = 0; i < nd; i++)
207 		DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
208 		    DEVNAME(sd->sd_sc), i,
209 		    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
210 #endif
211 
212 	for (i = 0; i < SR_MAX_STATES; i++)
213 		states[i] = 0;
214 
215 	for (i = 0; i < nd; i++) {
216 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
217 		if (s >= SR_MAX_STATES)
218 			panic("%s: %s: %s: invalid chunk state",
219 			    DEVNAME(sd->sd_sc),
220 			    sd->sd_meta->ssd_devname,
221 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
222 		states[s]++;
223 	}
224 
225 	if (states[BIOC_SDONLINE] == nd)
226 		new_state = BIOC_SVONLINE;
227 	else if (states[BIOC_SDONLINE] == 0)
228 		new_state = BIOC_SVOFFLINE;
229 	else if (states[BIOC_SDSCRUB] != 0)
230 		new_state = BIOC_SVSCRUB;
231 	else if (states[BIOC_SDREBUILD] != 0)
232 		new_state = BIOC_SVREBUILD;
233 	else if (states[BIOC_SDOFFLINE] != 0)
234 		new_state = BIOC_SVDEGRADED;
235 	else {
236 		DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
237 		    "was %d\n", DEVNAME(sd->sd_sc), old_state);
238 		panic("invalid volume state");
239 	}
240 
241 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
242 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
243 	    old_state, new_state);
244 
245 	switch (old_state) {
246 	case BIOC_SVONLINE:
247 		switch (new_state) {
248 		case BIOC_SVONLINE: /* can go to same state */
249 		case BIOC_SVOFFLINE:
250 		case BIOC_SVDEGRADED:
251 		case BIOC_SVREBUILD: /* happens on boot */
252 			break;
253 		default:
254 			goto die;
255 		}
256 		break;
257 
258 	case BIOC_SVOFFLINE:
259 		/* XXX this might be a little too much */
260 		goto die;
261 
262 	case BIOC_SVDEGRADED:
263 		switch (new_state) {
264 		case BIOC_SVOFFLINE:
265 		case BIOC_SVREBUILD:
266 		case BIOC_SVDEGRADED: /* can go to the same state */
267 			break;
268 		default:
269 			goto die;
270 		}
271 		break;
272 
273 	case BIOC_SVBUILDING:
274 		switch (new_state) {
275 		case BIOC_SVONLINE:
276 		case BIOC_SVOFFLINE:
277 		case BIOC_SVBUILDING: /* can go to the same state */
278 			break;
279 		default:
280 			goto die;
281 		}
282 		break;
283 
284 	case BIOC_SVSCRUB:
285 		switch (new_state) {
286 		case BIOC_SVONLINE:
287 		case BIOC_SVOFFLINE:
288 		case BIOC_SVDEGRADED:
289 		case BIOC_SVSCRUB: /* can go to same state */
290 			break;
291 		default:
292 			goto die;
293 		}
294 		break;
295 
296 	case BIOC_SVREBUILD:
297 		switch (new_state) {
298 		case BIOC_SVONLINE:
299 		case BIOC_SVOFFLINE:
300 		case BIOC_SVDEGRADED:
301 		case BIOC_SVREBUILD: /* can go to the same state */
302 			break;
303 		default:
304 			goto die;
305 		}
306 		break;
307 
308 	default:
309 die:
310 		panic("%s: %s: invalid volume state transition %d -> %d",
311 		    DEVNAME(sd->sd_sc),
312 		    sd->sd_meta->ssd_devname,
313 		    old_state, new_state);
314 		/* NOTREACHED */
315 	}
316 
317 	sd->sd_vol_status = new_state;
318 
319 	/* If we have just become degraded, look for a hotspare. */
320 	if (new_state == BIOC_SVDEGRADED)
321 		task_add(systq, &sd->sd_hotspare_rebuild_task);
322 }
323 
324 int
sr_raid1_rw(struct sr_workunit * wu)325 sr_raid1_rw(struct sr_workunit *wu)
326 {
327 	struct sr_discipline	*sd = wu->swu_dis;
328 	struct scsi_xfer	*xs = wu->swu_xs;
329 	struct sr_ccb		*ccb;
330 	struct sr_chunk		*scp;
331 	int			ios, chunk, i, rt;
332 	daddr_t			blkno;
333 
334 	/* blkno and scsi error will be handled by sr_validate_io */
335 	if (sr_validate_io(wu, &blkno, "sr_raid1_rw"))
336 		goto bad;
337 
338 	if (xs->flags & SCSI_DATA_IN)
339 		ios = 1;
340 	else
341 		ios = sd->sd_meta->ssdi.ssd_chunk_no;
342 
343 	for (i = 0; i < ios; i++) {
344 		if (xs->flags & SCSI_DATA_IN) {
345 			rt = 0;
346 ragain:
347 			/* interleave reads */
348 			chunk = sd->mds.mdd_raid1.sr1_counter++ %
349 			    sd->sd_meta->ssdi.ssd_chunk_no;
350 			scp = sd->sd_vol.sv_chunks[chunk];
351 			switch (scp->src_meta.scm_status) {
352 			case BIOC_SDONLINE:
353 			case BIOC_SDSCRUB:
354 				break;
355 
356 			case BIOC_SDOFFLINE:
357 			case BIOC_SDREBUILD:
358 			case BIOC_SDHOTSPARE:
359 				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
360 					goto ragain;
361 
362 				/* FALLTHROUGH */
363 			default:
364 				/* volume offline */
365 				printf("%s: is offline, cannot read\n",
366 				    DEVNAME(sd->sd_sc));
367 				goto bad;
368 			}
369 		} else {
370 			/* writes go on all working disks */
371 			chunk = i;
372 			scp = sd->sd_vol.sv_chunks[chunk];
373 			switch (scp->src_meta.scm_status) {
374 			case BIOC_SDONLINE:
375 			case BIOC_SDSCRUB:
376 			case BIOC_SDREBUILD:
377 				break;
378 
379 			case BIOC_SDHOTSPARE: /* should never happen */
380 			case BIOC_SDOFFLINE:
381 				continue;
382 
383 			default:
384 				goto bad;
385 			}
386 		}
387 
388 		ccb = sr_ccb_rw(sd, chunk, blkno, xs->datalen, xs->data,
389 		    xs->flags, 0);
390 		if (!ccb) {
391 			/* should never happen but handle more gracefully */
392 			printf("%s: %s: too many ccbs queued\n",
393 			    DEVNAME(sd->sd_sc),
394 			    sd->sd_meta->ssd_devname);
395 			goto bad;
396 		}
397 		sr_wu_enqueue_ccb(wu, ccb);
398 	}
399 
400 	sr_schedule_wu(wu);
401 
402 	return (0);
403 
404 bad:
405 	/* wu is unwound by sr_wu_put */
406 	return (1);
407 }
408 
409 int
sr_raid1_wu_done(struct sr_workunit * wu)410 sr_raid1_wu_done(struct sr_workunit *wu)
411 {
412 	struct sr_discipline	*sd = wu->swu_dis;
413 	struct scsi_xfer	*xs = wu->swu_xs;
414 
415 	/* If at least one I/O succeeded, we are okay. */
416 	if (wu->swu_ios_succeeded > 0) {
417 		xs->error = XS_NOERROR;
418 		return SR_WU_OK;
419 	}
420 
421 	/* If all I/O failed, retry reads and give up on writes. */
422 	if (xs->flags & SCSI_DATA_IN) {
423 		printf("%s: retrying read on block %lld\n",
424 		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
425 		if (wu->swu_cb_active == 1)
426 			panic("%s: sr_raid1_intr_cb",
427 			    DEVNAME(sd->sd_sc));
428 		sr_wu_release_ccbs(wu);
429 		wu->swu_state = SR_WU_RESTART;
430 		if (sd->sd_scsi_rw(wu) == 0)
431 			return SR_WU_RESTART;
432 	} else {
433 		printf("%s: permanently failing write on block %lld\n",
434 		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
435 	}
436 
437 	wu->swu_state = SR_WU_FAILED;
438 	xs->error = XS_DRIVER_STUFFUP;
439 
440 	return SR_WU_FAILED;
441 }
442