1 /*
2  * Copyright (c) 2009 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The NetBSD Foundation
6  * by Adam Hamsik.
7  *
8  * This code is further derived from software contributed to the
9  * DragonFly project by Alex Hornung and Matthew Dillon
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * $NetBSD: dm_target_stripe.c,v 1.9 2010/01/04 00:14:41 haad Exp $
33  */
34 
35 /*
36  * This file implements initial version of device-mapper stripe target.
37  *
38  * DragonFly changes: Increase to an unlimited number of stripes
39  */
40 #include <dev/disk/dm/dm.h>
41 #include <sys/malloc.h>		/* for malloc macros, dm.h includes sys/param.h */
42 
43 MALLOC_DEFINE(M_DMSTRIPE, "dm_striped", "Device Mapper Target Striped");
44 
45 #define MAX_STRIPES 32
46 /* #define USE_NUM_ERROR */
47 
48 struct target_stripe_dev {
49 	dm_pdev_t *pdev;
50 	uint64_t offset;
51 	int num_error;
52 };
53 
54 typedef struct target_stripe_config {
55 	int stripe_num;
56 	uint64_t stripe_chunksize;
57 	struct target_stripe_dev stripe_devs[0];
58 } dm_target_stripe_config_t;
59 
60 static void dm_target_stripe_destroy_config(dm_target_stripe_config_t *tsc);
61 
62 /*
63  * Init function called from dm_table_load_ioctl.
64  *
65  * Example line sent to dm from lvm tools when using striped target.
66  * start length striped #stripes chunk_size device1 offset1 ... deviceN offsetN
67  */
68 static int
69 dm_target_stripe_init(dm_table_entry_t *table_en, int argc, char **argv)
70 {
71 	dm_target_stripe_config_t *tsc;
72 	char *arg;
73 	int i, n, siz, chunksize;
74 
75 	if (argc < 4) {
76 		kprintf("Striped target takes 4 or more args\n");
77 		return EINVAL;
78 	}
79 
80 	n = (int)atoi64(argv[0]);
81 	if (n <= 0 || n > MAX_STRIPES) {
82 		kprintf("dm: Error %d stripes not supported (%d max)\n",
83 			n, MAX_STRIPES);
84 		return ENOTSUP;
85 	}
86 #if 0
87 	if (table_en->length % n) {
88 		kprintf("dm: Target device size not multiple of stripes\n");
89 		return EINVAL;
90 	}
91 #endif
92 	if (argc != (2 + n * 2)) {
93 		kprintf("dm: Invalid argc %d for %d stripe devices\n",
94 			argc, n);
95 		return EINVAL;
96 	}
97 
98 	chunksize = atoi64(argv[1]);
99 	if (chunksize < 1 || chunksize * DEV_BSIZE > MAXPHYS) {
100 		kprintf("dm: Error unsupported chunk size %jdKB\n",
101 			(intmax_t)chunksize * DEV_BSIZE / 1024);
102 		return EINVAL;
103 	}
104 #if 0
105 	if ((table_en->length / n) % chunksize) {
106 		kprintf("dm: Stripe device size not multiple of chunk size\n");
107 		return EINVAL;
108 	}
109 #endif
110 
111 	siz = sizeof(dm_target_stripe_config_t) +
112 		n * sizeof(struct target_stripe_dev);
113 	tsc = kmalloc(siz, M_DMSTRIPE, M_WAITOK | M_ZERO);
114 	if (tsc == NULL)
115 		return ENOMEM;
116 	tsc->stripe_num = n;
117 	tsc->stripe_chunksize = chunksize;
118 
119 	/*
120 	 * Parse the devices
121 	 */
122 
123 	kprintf("dm: Stripe %d devices chunk size %dKB\n",
124 		(int)tsc->stripe_num,
125 		(int)tsc->stripe_chunksize
126 	);
127 
128 	argv += 2;
129 	for (n = 0, i = 0; n < tsc->stripe_num; ++n) {
130 		arg = argv[i++];
131 		KKASSERT(arg);
132 		tsc->stripe_devs[n].pdev = dm_pdev_insert(arg);
133 		if (tsc->stripe_devs[n].pdev == NULL)
134 			break;
135 		arg = argv[i++];
136 		KKASSERT(arg);
137 		tsc->stripe_devs[n].offset = atoi64(arg);
138 		dm_table_add_deps(table_en, tsc->stripe_devs[n].pdev);
139 	}
140 	if (n != tsc->stripe_num) {
141 		dm_target_stripe_destroy_config(tsc);
142 		return (ENOENT);
143 	}
144 
145 	dm_table_init_target(table_en, tsc);
146 
147 	return 0;
148 }
149 
150 /*
151  * Info routine called to get params string.
152  */
153 static char *
154 dm_target_stripe_info(void *target_config)
155 {
156 	dm_target_stripe_config_t *tsc;
157 	char *params;
158 	char *ptr;
159 	char buf[MAX_STRIPES + 1];
160 	size_t len;
161 	int ret;
162 	int i;
163 
164 	tsc = target_config;
165 
166 	len = DM_MAX_PARAMS_SIZE;
167 	params = dm_alloc_string(len);
168 	ptr = params;
169 
170 	ret = ksnprintf(ptr, len, "%d ", tsc->stripe_num);
171 	ptr += ret;
172 	len -= ret;
173 
174 	memset(buf, 0, sizeof(buf));
175 	for (i = 0; i < tsc->stripe_num; i++) {
176 		ret = ksnprintf(ptr, len, "%s ",
177 			tsc->stripe_devs[i].pdev->udev_name);
178 		if (tsc->stripe_devs[i].num_error) /* no lock */
179 			buf[i] = 'D';
180 		else
181 			buf[i] = 'A';
182 		ptr += ret;
183 		len -= ret;
184 	}
185 
186 	ret = ksnprintf(ptr, len, "1 %s", buf);
187 	ptr += ret;
188 	len -= ret;
189 
190 	return params;
191 }
192 
193 /*
194  * Table routine called to get params string.
195  */
196 static char *
197 dm_target_stripe_table(void *target_config)
198 {
199 	dm_target_stripe_config_t *tsc;
200 	char *params;
201 	char *ptr;
202 	size_t len;
203 	int ret;
204 	int i;
205 
206 	tsc = target_config;
207 
208 	len = DM_MAX_PARAMS_SIZE;
209 	params = dm_alloc_string(len);
210 	ptr = params;
211 
212 	ret = ksnprintf(ptr, len, "%d %jd",
213 		tsc->stripe_num,
214 		(intmax_t)tsc->stripe_chunksize);
215 	ptr += ret;
216 	len -= ret;
217 
218 	for (i = 0; i < tsc->stripe_num; i++) {
219 		ret = ksnprintf(ptr, len, " %s %jd",
220 			tsc->stripe_devs[i].pdev->udev_name,
221 			(intmax_t)tsc->stripe_devs[i].offset);
222 		ptr += ret;
223 		len -= ret;
224 	}
225 
226 	return params;
227 }
228 
229 #ifdef USE_NUM_ERROR
230 static void
231 dm_target_stripe_iodone(struct bio *bio)
232 {
233 	struct bio *obio;
234 	struct buf *bp;
235 	dm_target_stripe_config_t *tsc;
236 
237 	bp = bio->bio_buf;
238 	tsc = bio->bio_caller_info1.ptr;
239 
240 	if (bp->b_error) {
241 		int devnr;
242 		uint64_t blkno, stripe;
243 
244 		blkno = bio->bio_offset / DEV_BSIZE;
245 		stripe = blkno / tsc->stripe_chunksize;
246 		devnr = stripe % tsc->stripe_num;
247 		KKASSERT(devnr < MAX_STRIPES);
248 		tsc->stripe_devs[devnr].num_error++;
249 
250 		dmdebug("device=%d error=%d\n", devnr, bp->b_error);
251 	}
252 
253 	obio = pop_bio(bio);
254 	biodone(obio);
255 }
256 
257 static __inline
258 struct bio *get_stripe_bio(struct bio *bio, void *priv)
259 {
260 	struct bio *nbio;
261 
262 	nbio = push_bio(bio);
263 	nbio->bio_caller_info1.ptr = priv;
264 	nbio->bio_done = dm_target_stripe_iodone;
265 
266 	return nbio;
267 }
268 #else
269 static __inline
270 struct bio *get_stripe_bio(struct bio *bio, void *priv __unused)
271 {
272 	return bio;
273 }
274 #endif
275 
276 /*
277  * Strategy routine called from dm_strategy.
278  */
279 static int
280 dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp)
281 {
282 	dm_target_stripe_config_t *tsc;
283 	struct bio *bio = &bp->b_bio1;
284 	struct bio *nbio;
285 	struct buf *nestbuf;
286 	struct target_stripe_dev *dev;
287 	uint64_t blkno, blkoff;
288 	uint64_t stripe, blknr;
289 	uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
290 	int devnr;
291 
292 	tsc = table_en->target_config;
293 	if (tsc == NULL)
294 		return 0;
295 
296 	/* calculate extent of request */
297 	KKASSERT(bp->b_resid % DEV_BSIZE == 0);
298 
299 	switch(bp->b_cmd) {
300 	case BUF_CMD_READ:
301 	case BUF_CMD_WRITE:
302 	case BUF_CMD_FREEBLKS:
303 		/*
304 		 * Loop through to individual operations
305 		 */
306 		blkno = bio->bio_offset / DEV_BSIZE;
307 		blkoff = 0;
308 		num_blks = bp->b_resid / DEV_BSIZE;
309 		nestiobuf_init(bio);
310 
311 		while (num_blks > 0) {
312 			/* blockno to stripe piece nr */
313 			stripe = blkno / tsc->stripe_chunksize;
314 			stripe_off = blkno % tsc->stripe_chunksize;
315 
316 			/* where we are inside the stripe */
317 			devnr = stripe % tsc->stripe_num;
318 			blknr = stripe / tsc->stripe_num;
319 			dev = &tsc->stripe_devs[devnr];
320 
321 			/* how much is left before we hit a boundary */
322 			stripe_rest = tsc->stripe_chunksize - stripe_off;
323 
324 			/* issue this piece on stripe `stripe' */
325 			issue_blks = MIN(stripe_rest, num_blks);
326 			nestbuf = getpbuf(NULL);
327 			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;
328 
329 			nestiobuf_add(bio, nestbuf, blkoff,
330 					issue_blks * DEV_BSIZE, NULL);
331 
332 			nbio = get_stripe_bio(&nestbuf->b_bio1, tsc);
333 			nbio->bio_offset = blknr * tsc->stripe_chunksize;
334 			nbio->bio_offset += stripe_off;
335 			nbio->bio_offset += dev->offset;
336 			nbio->bio_offset *= DEV_BSIZE;
337 
338 			vn_strategy(dev->pdev->pdev_vnode, nbio);
339 
340 			blkno += issue_blks;
341 			blkoff += issue_blks * DEV_BSIZE;
342 			num_blks -= issue_blks;
343 		}
344 		nestiobuf_start(bio);
345 		break;
346 	case BUF_CMD_FLUSH:
347 		nestiobuf_init(bio);
348 		for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
349 			dev = &tsc->stripe_devs[devnr];
350 			nestbuf = getpbuf(NULL);
351 			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;
352 
353 			nestiobuf_add(bio, nestbuf, 0, 0, NULL);
354 
355 			nbio = get_stripe_bio(&nestbuf->b_bio1, tsc);
356 			nbio->bio_offset = 0;
357 
358 			vn_strategy(dev->pdev->pdev_vnode, nbio);
359 		}
360 		nestiobuf_start(bio);
361 		break;
362 	default:
363 		bp->b_flags |= B_ERROR;
364 		bp->b_error = EIO;
365 		biodone(bio);
366 		break;
367 	}
368 	return 0;
369 }
370 
371 
372 static int
373 dm_target_stripe_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset)
374 {
375 	dm_target_stripe_config_t *tsc;
376 	uint64_t blkno, blkoff;
377 	uint64_t stripe, blknr;
378 	uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
379 	uint64_t off2, len2;
380 	int devnr;
381 
382 	tsc = table_en->target_config;
383 	if (tsc == NULL)
384 		return 0;
385 
386 	/* calculate extent of request */
387 	KKASSERT(length % DEV_BSIZE == 0);
388 
389 	blkno = offset / DEV_BSIZE;
390 	blkoff = 0;
391 	num_blks = length / DEV_BSIZE;
392 
393 	/*
394 	 * 0 length means flush buffers and return
395 	 */
396 	if (length == 0) {
397 		for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
398 			if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL)
399 				return ENXIO;
400 
401 			dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev,
402 			    data, 0, offset, 0);
403 		}
404 		return 0;
405 	}
406 
407 	while (num_blks > 0) {
408 		/* blockno to stripe piece nr */
409 		stripe = blkno / tsc->stripe_chunksize;
410 		stripe_off = blkno % tsc->stripe_chunksize;
411 
412 		/* where we are inside the stripe */
413 		devnr = stripe % tsc->stripe_num;
414 		blknr = stripe / tsc->stripe_num;
415 
416 		/* how much is left before we hit a boundary */
417 		stripe_rest = tsc->stripe_chunksize - stripe_off;
418 
419 		/* issue this piece on stripe `stripe' */
420 		issue_blks = MIN(stripe_rest, num_blks);
421 
422 #if 0
423 		nestiobuf_add(bio, nestbuf, blkoff,
424 				issue_blks * DEV_BSIZE);
425 #endif
426 		len2 = issue_blks * DEV_BSIZE;
427 
428 		/* I need number of bytes. */
429 		off2 = blknr * tsc->stripe_chunksize + stripe_off;
430 		off2 += tsc->stripe_devs[devnr].offset;
431 		off2 *= DEV_BSIZE;
432 		off2 = dm_pdev_correct_dump_offset(tsc->stripe_devs[devnr].pdev,
433 		    off2);
434 
435 		if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL)
436 			return ENXIO;
437 
438 		dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev,
439 		    (char *)data + blkoff, 0, off2, len2);
440 
441 		blkno += issue_blks;
442 		blkoff += issue_blks * DEV_BSIZE;
443 		num_blks -= issue_blks;
444 	}
445 
446 	return 0;
447 }
448 
449 /*
450  * Destroy a dm table entry for stripes.
451  */
452 static int
453 dm_target_stripe_destroy(dm_table_entry_t *table_en)
454 {
455 	if (table_en->target_config != NULL)
456 		dm_target_stripe_destroy_config(table_en->target_config);
457 
458 	return 0;
459 }
460 
461 static void
462 dm_target_stripe_destroy_config(dm_target_stripe_config_t *tsc)
463 {
464 	int n;
465 
466 	for (n = 0; n < tsc->stripe_num; ++n) {
467 		if (tsc->stripe_devs[n].pdev) {
468 			dm_pdev_decr(tsc->stripe_devs[n].pdev);
469 			tsc->stripe_devs[n].pdev = NULL;
470 		}
471 	}
472 	kfree(tsc, M_DMSTRIPE);
473 }
474 
475 static int
476 dmts_mod_handler(module_t mod, int type, void *unused)
477 {
478 	dm_target_t *dmt = NULL;
479 	int err = 0;
480 
481 	switch(type) {
482 	case MOD_LOAD:
483 		if ((dmt = dm_target_lookup("striped")) != NULL) {
484 			dm_target_unbusy(dmt);
485 			return EEXIST;
486 		}
487 		dmt = dm_target_alloc("striped");
488 		dmt->version[0] = 1;
489 		dmt->version[1] = 0;
490 		dmt->version[2] = 3;
491 		dmt->init = &dm_target_stripe_init;
492 		dmt->destroy = &dm_target_stripe_destroy;
493 		dmt->strategy = &dm_target_stripe_strategy;
494 		dmt->table = &dm_target_stripe_table;
495 		dmt->info = &dm_target_stripe_info;
496 		dmt->dump = &dm_target_stripe_dump;
497 		dmt->max_argc = 2 + (MAX_STRIPES * 2);
498 
499 		err = dm_target_insert(dmt);
500 		if (err == 0)
501 			kprintf("dm_target_striped: Successfully initialized\n");
502 		break;
503 
504 	case MOD_UNLOAD:
505 		err = dm_target_remove("striped");
506 		if (err == 0)
507 			kprintf("dm_target_striped: unloaded\n");
508 		break;
509 	}
510 
511 	return err;
512 }
513 
514 DM_TARGET_MODULE(dm_target_striped, dmts_mod_handler);
515