1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Tomohiro Kusumi <kusumi.tomohiro@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/mutex2.h>
36 #include <sys/objcache.h>
37 #include <sys/callout.h>
38 
39 #include <dev/disk/dm/dm.h>
40 
41 MALLOC_DEFINE(M_DMDELAY, "dm_delay", "Device Mapper Target Delay");
42 
43 struct dm_delay_buf {
44 	TAILQ_ENTRY(dm_delay_buf) entry;
45 	struct buf *bp;
46 	int expire;
47 };
48 TAILQ_HEAD(dm_delay_buf_list, dm_delay_buf);
49 
50 struct dm_delay_info {
51 	dm_pdev_t *pdev;
52 	uint64_t offset;
53 	int delay;
54 	int count;
55 	int enabled;
56 	struct dm_delay_buf_list buf_list;
57 	struct callout cal;
58 	struct mtx buf_mtx;
59 	struct mtx cal_mtx;
60 	struct lwkt_token token;
61 	thread_t td;
62 };
63 
64 typedef struct target_delay_config {
65 	struct dm_delay_info read;
66 	struct dm_delay_info write;
67 	int argc;  /* either 3 or 6 */
68 } dm_target_delay_config_t;
69 
70 static int _init(struct dm_delay_info*, char**, int);
71 static int _table(struct dm_delay_info*, char*);
72 static void _strategy(struct dm_delay_info*, struct buf*);
73 static __inline void _submit(struct dm_delay_info*, struct buf*);
74 static void _submit_queue(struct dm_delay_info*, int);
75 static void _destroy(struct dm_delay_info*);
76 static void _timeout(void*);
77 static void _thread(void*);
78 static __inline void _debug(struct dm_delay_info*, const char*);
79 
80 static struct objcache *obj_cache = NULL;
81 static struct objcache_malloc_args obj_args = {
82 	sizeof(struct dm_delay_buf), M_DMDELAY,
83 };
84 
85 static int
86 dm_target_delay_init(dm_table_entry_t *table_en, int argc, char **argv)
87 {
88 	dm_target_delay_config_t *tdc;
89 	int ret;
90 
91 	dmdebug("Delay target init: argc=%d\n", argc);
92 	if (argc != 3 && argc != 6) {
93 		kprintf("Delay target takes 3 or 6 args\n");
94 		return EINVAL;
95 	}
96 
97 	tdc = kmalloc(sizeof(*tdc), M_DMDELAY, M_WAITOK | M_ZERO);
98 	if (tdc == NULL)
99 		return ENOMEM;
100 	tdc->argc = argc;
101 
102 	ret = _init(&tdc->read, argv, 0);
103 	if (ret) {
104 		kfree(tdc, M_DMDELAY);
105 		return ret;
106 	}
107 
108 	if (argc == 6)
109 		argv += 3;
110 
111 	ret = _init(&tdc->write, argv, 1);
112 	if (ret) {
113 		dm_pdev_decr(tdc->read.pdev);
114 		kfree(tdc, M_DMDELAY);
115 		return ret;
116 	}
117 
118 	dm_table_add_deps(table_en, tdc->read.pdev);
119 	dm_table_add_deps(table_en, tdc->write.pdev);
120 
121 	dm_table_init_target(table_en, tdc);
122 
123 	return 0;
124 }
125 
126 static int
127 _init(struct dm_delay_info *di, char **argv, int id)
128 {
129 	dm_pdev_t *dmp;
130 	int tmp;
131 
132 	if (argv[0] == NULL)
133 		return EINVAL;
134 	if ((dmp = dm_pdev_insert(argv[0])) == NULL)
135 		return ENOENT;
136 
137 	di->pdev = dmp;
138 	di->offset = atoi64(argv[1]);
139 	tmp = atoi64(argv[2]);
140 	di->delay = tmp * hz / 1000;
141 	di->count = 0;
142 
143 	TAILQ_INIT(&di->buf_list);
144 	callout_init(&di->cal);
145 	mtx_init(&di->buf_mtx, "dmdlbuf");
146 	mtx_init(&di->cal_mtx, "dmdlcal");
147 	lwkt_token_init(&di->token, "dmdlthr");
148 
149 	di->enabled = 1;
150 	lwkt_create(_thread, di, &di->td, NULL, 0, -1, "dmdl%d", id);
151 
152 	_debug(di, "init");
153 	return 0;
154 }
155 
156 static char *
157 dm_target_delay_info(void *target_config)
158 {
159 	dm_target_delay_config_t *tdc;
160 	char *params;
161 
162 	tdc = target_config;
163 	KKASSERT(tdc != NULL);
164 
165 	params = dm_alloc_string(DM_MAX_PARAMS_SIZE);
166 	ksnprintf(params, DM_MAX_PARAMS_SIZE,
167 		"%d %d", tdc->read.count, tdc->write.count);
168 
169 	return params;
170 }
171 
172 static char *
173 dm_target_delay_table(void *target_config)
174 {
175 	dm_target_delay_config_t *tdc;
176 	char *params, *p;
177 
178 	tdc = target_config;
179 	KKASSERT(tdc != NULL);
180 
181 	params = dm_alloc_string(DM_MAX_PARAMS_SIZE);
182 	p = params;
183 	p += _table(&tdc->read, p);
184 	if (tdc->argc == 6) {
185 		p += ksnprintf(p, DM_MAX_PARAMS_SIZE, " ");
186 		_table(&tdc->write, p);
187 	}
188 
189 	return params;
190 }
191 
192 static int _table(struct dm_delay_info *di, char *p)
193 {
194 	int ret;
195 
196 	ret = ksnprintf(p, DM_MAX_PARAMS_SIZE,
197 		"%s %" PRIu64 " %d",
198 		di->pdev->udev_name, di->offset, di->delay);
199 	return ret;
200 }
201 
202 static int
203 dm_target_delay_strategy(dm_table_entry_t *table_en, struct buf *bp)
204 {
205 	dm_target_delay_config_t *tdc;
206 	struct dm_delay_info *di;
207 
208 	tdc = table_en->target_config;
209 	KKASSERT(tdc != NULL);
210 
211 	switch (bp->b_cmd) {
212 	case BUF_CMD_READ:
213 		di = &tdc->read;
214 		break;
215 	case BUF_CMD_WRITE:
216 	case BUF_CMD_FLUSH:
217 		di = &tdc->write;
218 		break;
219 	default:
220 		di = NULL;
221 		break;
222 	}
223 
224 	if (di) {
225 		if (di->delay) {
226 			_strategy(di, bp);
227 		} else {
228 			_submit(di, bp);
229 		}
230 	} else {
231 		/* XXX */
232 		struct vnode *vnode = tdc->write.pdev->pdev_vnode;
233 		vn_strategy(vnode, &bp->b_bio1);
234 	}
235 	return 0;
236 }
237 
238 static void
239 _strategy(struct dm_delay_info *di, struct buf *bp)
240 {
241 	struct dm_delay_buf *dp;
242 
243 	dp = objcache_get(obj_cache, M_WAITOK);
244 	dp->bp = bp;
245 	dp->expire = ticks + di->delay;
246 
247 	mtx_lock(&di->buf_mtx);
248 	di->count++;
249 	TAILQ_INSERT_TAIL(&di->buf_list, dp, entry);
250 	mtx_unlock(&di->buf_mtx);
251 
252 	mtx_lock(&di->cal_mtx);
253 	if (!callout_pending(&di->cal))
254 		callout_reset(&di->cal, di->delay, _timeout, di);
255 	mtx_unlock(&di->cal_mtx);
256 }
257 
258 static __inline
259 void
260 _submit(struct dm_delay_info *di, struct buf *bp)
261 {
262 	_debug(di, "submit");
263 
264 	bp->b_bio1.bio_offset += di->offset * DEV_BSIZE;
265 	vn_strategy(di->pdev->pdev_vnode, &bp->b_bio1);
266 }
267 
268 static void
269 _submit_queue(struct dm_delay_info *di, int submit_all)
270 {
271 	struct dm_delay_buf *dp;
272 	struct dm_delay_buf_list tmp_list;
273 	int next = -1;
274 	int reset = 0;
275 
276 	_debug(di, "submitq");
277 	TAILQ_INIT(&tmp_list);
278 
279 	mtx_lock(&di->buf_mtx);
280 	while ((dp = TAILQ_FIRST(&di->buf_list)) != NULL) {
281 		if (submit_all || ticks > dp->expire) {
282 			TAILQ_REMOVE(&di->buf_list, dp, entry);
283 			TAILQ_INSERT_TAIL(&tmp_list, dp, entry);
284 			di->count--;
285 			continue;
286 		}
287 		if (reset == 0) {
288 			reset = 1;
289 			next = dp->expire;
290 		} else {
291 			next = min(next, dp->expire);
292 		}
293 	}
294 	mtx_unlock(&di->buf_mtx);
295 
296 	if (reset) {
297 		mtx_lock(&di->cal_mtx);
298 		callout_reset(&di->cal, next - ticks, _timeout, di);
299 		mtx_unlock(&di->cal_mtx);
300 	}
301 
302 	while ((dp = TAILQ_FIRST(&tmp_list)) != NULL) {
303 		TAILQ_REMOVE(&tmp_list, dp, entry);
304 		_submit(di, dp->bp);
305 		objcache_put(obj_cache, dp);
306 	}
307 }
308 
309 static int
310 dm_target_delay_destroy(dm_table_entry_t *table_en)
311 {
312 	dm_target_delay_config_t *tdc;
313 
314 	tdc = table_en->target_config;
315 	if (tdc == NULL)
316 		return 0;
317 
318 	_destroy(&tdc->read);
319 	_destroy(&tdc->write);
320 
321 	kfree(tdc, M_DMDELAY);
322 
323 	return 0;
324 }
325 
326 static void
327 _destroy(struct dm_delay_info *di)
328 {
329 	_debug(di, "destroy");
330 
331 	lwkt_gettoken(&di->token);
332 	di->enabled = 0;
333 
334 	mtx_lock(&di->cal_mtx);
335 	if (callout_pending(&di->cal))
336 		callout_cancel(&di->cal);
337 	mtx_unlock(&di->cal_mtx);
338 
339 	_submit_queue(di, 1);
340 	wakeup(di);
341 	tsleep(&di->enabled, 0, "dmdldestroy", 0);
342 	lwkt_reltoken(&di->token);
343 
344 	mtx_uninit(&di->cal_mtx);
345 	mtx_uninit(&di->buf_mtx);
346 
347 	dm_pdev_decr(di->pdev);
348 }
349 
350 static void
351 _timeout(void *arg)
352 {
353 	struct dm_delay_info *di = arg;
354 
355 	_debug(di, "timeout");
356 	wakeup(di);
357 }
358 
359 static void
360 _thread(void *arg)
361 {
362 	struct dm_delay_info *di = arg;
363 
364 	_debug(di, "thread init");
365 	lwkt_gettoken(&di->token);
366 
367 	while (di->enabled) {
368 		tsleep(di, 0, "dmdlthread", 0);
369 		_submit_queue(di, 0);
370 	}
371 
372 	di->td = NULL;
373 	wakeup(&di->enabled);
374 
375 	_debug(di, "thread exit");
376 	lwkt_reltoken(&di->token);
377 	lwkt_exit();
378 }
379 
380 static __inline
381 void
382 _debug(struct dm_delay_info *di, const char *msg)
383 {
384 	dmdebug("%-8s: %d pdev=%s offset=%ju delay=%d count=%d\n",
385 		msg, di->enabled, di->pdev->name,
386 		(uintmax_t)di->offset, di->delay, di->count);
387 }
388 
389 static void
390 _objcache_create(void)
391 {
392 	if (obj_cache == NULL) {
393 		obj_cache = objcache_create("dmdlobj", 0, 0, NULL, NULL, NULL,
394 			objcache_malloc_alloc,
395 			objcache_malloc_free,
396 			&obj_args);
397 	}
398 	KKASSERT(obj_cache);
399 }
400 
401 static void
402 _objcache_destroy(void)
403 {
404 	if (obj_cache) {
405 		objcache_destroy(obj_cache);
406 		obj_cache = NULL;
407 	}
408 }
409 
410 static int
411 dmtd_mod_handler(module_t mod, int type, void *unused)
412 {
413 	dm_target_t *dmt = NULL;
414 	int err = 0;
415 
416 	switch(type) {
417 	case MOD_LOAD:
418 		if ((dmt = dm_target_lookup("delay")) != NULL) {
419 			dm_target_unbusy(dmt);
420 			return EEXIST;
421 		}
422 		dmt = dm_target_alloc("delay");
423 		dmt->version[0] = 1;
424 		dmt->version[1] = 0;
425 		dmt->version[2] = 0;
426 		dmt->init = &dm_target_delay_init;
427 		dmt->destroy = &dm_target_delay_destroy;
428 		dmt->strategy = &dm_target_delay_strategy;
429 		dmt->table = &dm_target_delay_table;
430 		dmt->info = &dm_target_delay_info;
431 
432 		_objcache_create();
433 		err = dm_target_insert(dmt);
434 		if (err == 0)
435 			kprintf("dm_target_delay: Successfully initialized\n");
436 		break;
437 
438 	case MOD_UNLOAD:
439 		err = dm_target_remove("delay");
440 		if (err == 0)
441 			kprintf("dm_target_delay: unloaded\n");
442 		_objcache_destroy();
443 		break;
444 	}
445 
446 	return err;
447 }
448 
449 DM_TARGET_MODULE(dm_target_delay, dmtd_mod_handler);
450