1 /*	$NetBSD: dev_manager.c,v 1.1.1.3 2009/12/02 00:26:23 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 #include "lib.h"
19 #include "str_list.h"
20 #include "dev_manager.h"
21 #include "lvm-string.h"
22 #include "fs.h"
23 #include "defaults.h"
24 #include "segtype.h"
25 #include "display.h"
26 #include "toolcontext.h"
27 #include "targets.h"
28 #include "config.h"
29 #include "filter.h"
30 #include "activate.h"
31 
32 #include <limits.h>
33 #include <dirent.h>
34 
35 #define MAX_TARGET_PARAMSIZE 50000
36 #define UUID_PREFIX "LVM-"
37 
38 typedef enum {
39 	PRELOAD,
40 	ACTIVATE,
41 	DEACTIVATE,
42 	SUSPEND,
43 	SUSPEND_WITH_LOCKFS,
44 	CLEAN
45 } action_t;
46 
47 struct dev_manager {
48 	struct dm_pool *mem;
49 
50 	struct cmd_context *cmd;
51 
52 	void *target_state;
53 	uint32_t pvmove_mirror_count;
54 	int flush_required;
55 
56 	char *vg_name;
57 };
58 
59 struct lv_layer {
60 	struct logical_volume *lv;
61 	const char *old_name;
62 };
63 
64 static char *_build_dlid(struct dm_pool *mem, const char *lvid, const char *layer)
65 {
66 	char *dlid;
67 	size_t len;
68 
69 	if (!layer)
70 		layer = "";
71 
72 	len = sizeof(UUID_PREFIX) + sizeof(union lvid) + strlen(layer);
73 
74 	if (!(dlid = dm_pool_alloc(mem, len))) {
75 		log_error("_build_dlid: pool allocation failed for %" PRIsize_t
76 			  " %s %s.", len, lvid, layer);
77 		return NULL;
78 	}
79 
80 	sprintf(dlid, UUID_PREFIX "%s%s%s", lvid, (*layer) ? "-" : "", layer);
81 
82 	return dlid;
83 }
84 
85 char *build_dlid(struct dev_manager *dm, const char *lvid, const char *layer)
86 {
87 	return _build_dlid(dm->mem, lvid, layer);
88 }
89 
90 static int _read_only_lv(struct logical_volume *lv)
91 {
92 	return (!(lv->vg->status & LVM_WRITE) || !(lv->status & LVM_WRITE));
93 }
94 
95 /*
96  * Low level device-layer operations.
97  */
98 static struct dm_task *_setup_task(const char *name, const char *uuid,
99 				   uint32_t *event_nr, int task,
100 				   uint32_t major, uint32_t minor)
101 {
102 	struct dm_task *dmt;
103 
104 	if (!(dmt = dm_task_create(task)))
105 		return_NULL;
106 
107 	if (name)
108 		dm_task_set_name(dmt, name);
109 
110 	if (uuid && *uuid)
111 		dm_task_set_uuid(dmt, uuid);
112 
113 	if (event_nr)
114 		dm_task_set_event_nr(dmt, *event_nr);
115 
116 	if (major)
117 		dm_task_set_major_minor(dmt, major, minor, 1);
118 
119 	return dmt;
120 }
121 
122 static int _info_run(const char *name, const char *dlid, struct dm_info *info,
123 		     uint32_t *read_ahead, int mknodes, int with_open_count,
124 		     int with_read_ahead, uint32_t major, uint32_t minor)
125 {
126 	int r = 0;
127 	struct dm_task *dmt;
128 	int dmtask;
129 
130 	dmtask = mknodes ? DM_DEVICE_MKNODES : DM_DEVICE_INFO;
131 
132 	if (!(dmt = _setup_task(name, dlid, 0, dmtask, major, minor)))
133 		return_0;
134 
135 	if (!with_open_count)
136 		if (!dm_task_no_open_count(dmt))
137 			log_error("Failed to disable open_count");
138 
139 	if (!dm_task_run(dmt))
140 		goto_out;
141 
142 	if (!dm_task_get_info(dmt, info))
143 		goto_out;
144 
145 	if (with_read_ahead && info->exists) {
146 		if (!dm_task_get_read_ahead(dmt, read_ahead))
147 			goto_out;
148 	} else if (read_ahead)
149 		*read_ahead = DM_READ_AHEAD_NONE;
150 
151 	r = 1;
152 
153       out:
154 	dm_task_destroy(dmt);
155 	return r;
156 }
157 
158 int device_is_usable(dev_t dev)
159 {
160 	struct dm_task *dmt;
161 	struct dm_info info;
162 	const char *name;
163 	uint64_t start, length;
164 	char *target_type = NULL;
165 	char *params;
166 	void *next = NULL;
167 	int r = 0;
168 
169 	if (!(dmt = dm_task_create(DM_DEVICE_STATUS))) {
170 		log_error("Failed to allocate dm_task struct to check dev status");
171 		return 0;
172 	}
173 
174 	if (!dm_task_set_major_minor(dmt, MAJOR(dev), MINOR(dev), 1))
175 		goto_out;
176 
177 	if (!dm_task_run(dmt)) {
178 		log_error("Failed to get state of mapped device");
179 		goto out;
180 	}
181 
182 	if (!dm_task_get_info(dmt, &info))
183 		goto_out;
184 
185 	if (!info.exists || info.suspended)
186 		goto out;
187 
188 	name = dm_task_get_name(dmt);
189 
190 	/* FIXME Also check for mirror block_on_error and mpath no paths */
191 	/* For now, we exclude all mirrors */
192 
193 	do {
194 		next = dm_get_next_target(dmt, next, &start, &length,
195 					  &target_type, &params);
196 		/* Skip if target type doesn't match */
197 		if (target_type && !strcmp(target_type, "mirror"))
198 			goto out;
199 	} while (next);
200 
201 	/* FIXME Also check dependencies? */
202 
203 	r = 1;
204 
205       out:
206 	dm_task_destroy(dmt);
207 	return r;
208 }
209 
210 static int _info(const char *name, const char *dlid, int mknodes,
211 		 int with_open_count, int with_read_ahead,
212 		 struct dm_info *info, uint32_t *read_ahead)
213 {
214 	if (!mknodes && dlid && *dlid) {
215 		if (_info_run(NULL, dlid, info, read_ahead, 0, with_open_count,
216 			      with_read_ahead, 0, 0) &&
217 	    	    info->exists)
218 			return 1;
219 		else if (_info_run(NULL, dlid + sizeof(UUID_PREFIX) - 1, info,
220 				   read_ahead, 0, with_open_count,
221 				   with_read_ahead, 0, 0) &&
222 			 info->exists)
223 			return 1;
224 	}
225 
226 	if (name)
227 		return _info_run(name, NULL, info, read_ahead, mknodes,
228 				 with_open_count, with_read_ahead, 0, 0);
229 
230 	return 0;
231 }
232 
233 static int _info_by_dev(uint32_t major, uint32_t minor, struct dm_info *info)
234 {
235 	return _info_run(NULL, NULL, info, NULL, 0, 0, 0, major, minor);
236 }
237 
238 int dev_manager_info(struct dm_pool *mem, const char *name,
239 		     const struct logical_volume *lv, int with_mknodes,
240 		     int with_open_count, int with_read_ahead,
241 		     struct dm_info *info, uint32_t *read_ahead)
242 {
243 	const char *dlid;
244 
245 	if (!(dlid = _build_dlid(mem, lv->lvid.s, NULL))) {
246 		log_error("dlid build failed for %s", lv->name);
247 		return 0;
248 	}
249 
250 	return _info(name, dlid, with_mknodes, with_open_count, with_read_ahead,
251 		     info, read_ahead);
252 }
253 
254 /* FIXME Interface must cope with multiple targets */
255 static int _status_run(const char *name, const char *uuid,
256 		       unsigned long long *s, unsigned long long *l,
257 		       char **t, uint32_t t_size, char **p, uint32_t p_size)
258 {
259 	int r = 0;
260 	struct dm_task *dmt;
261 	struct dm_info info;
262 	void *next = NULL;
263 	uint64_t start, length;
264 	char *type = NULL;
265 	char *params = NULL;
266 
267 	if (!(dmt = _setup_task(name, uuid, 0, DM_DEVICE_STATUS, 0, 0)))
268 		return_0;
269 
270 	if (!dm_task_no_open_count(dmt))
271 		log_error("Failed to disable open_count");
272 
273 	if (!dm_task_run(dmt))
274 		goto_out;
275 
276 	if (!dm_task_get_info(dmt, &info) || !info.exists)
277 		goto_out;
278 
279 	do {
280 		next = dm_get_next_target(dmt, next, &start, &length,
281 					  &type, &params);
282 		if (type) {
283 			*s = start;
284 			*l = length;
285 			/* Make sure things are null terminated */
286 			strncpy(*t, type, t_size);
287 			(*t)[t_size - 1] = '\0';
288 			strncpy(*p, params, p_size);
289 			(*p)[p_size - 1] = '\0';
290 
291 			r = 1;
292 			/* FIXME Cope with multiple targets! */
293 			break;
294 		}
295 
296 	} while (next);
297 
298       out:
299 	dm_task_destroy(dmt);
300 	return r;
301 }
302 
303 static int _status(const char *name, const char *uuid,
304 		   unsigned long long *start, unsigned long long *length,
305 		   char **type, uint32_t type_size, char **params,
306 		   uint32_t param_size) __attribute__ ((unused));
307 
308 static int _status(const char *name, const char *uuid,
309 		   unsigned long long *start, unsigned long long *length,
310 		   char **type, uint32_t type_size, char **params,
311 		   uint32_t param_size)
312 {
313 	if (uuid && *uuid) {
314 		if (_status_run(NULL, uuid, start, length, type,
315 				type_size, params, param_size) &&
316 		    *params)
317 			return 1;
318 		else if (_status_run(NULL, uuid + sizeof(UUID_PREFIX) - 1, start,
319 				     length, type, type_size, params,
320 				     param_size) &&
321 			 *params)
322 			return 1;
323 	}
324 
325 	if (name && _status_run(name, NULL, start, length, type, type_size,
326 				params, param_size))
327 		return 1;
328 
329 	return 0;
330 }
331 
332 static percent_range_t _combine_percent_ranges(percent_range_t a,
333 					       percent_range_t b)
334 {
335 	if (a == PERCENT_INVALID || b == PERCENT_INVALID)
336 		return PERCENT_INVALID;
337 
338 	if (a == PERCENT_100 && b == PERCENT_100)
339 		return PERCENT_100;
340 
341 	if (a == PERCENT_0 && b == PERCENT_0)
342 		return PERCENT_0;
343 
344 	return PERCENT_0_TO_100;
345 }
346 
347 static int _percent_run(struct dev_manager *dm, const char *name,
348 			const char *dlid,
349 			const char *target_type, int wait,
350 			struct logical_volume *lv, float *percent,
351 			percent_range_t *overall_percent_range,
352 			uint32_t *event_nr)
353 {
354 	int r = 0;
355 	struct dm_task *dmt;
356 	struct dm_info info;
357 	void *next = NULL;
358 	uint64_t start, length;
359 	char *type = NULL;
360 	char *params = NULL;
361 	struct dm_list *segh = &lv->segments;
362 	struct lv_segment *seg = NULL;
363 	struct segment_type *segtype;
364 	percent_range_t percent_range = 0, combined_percent_range = 0;
365 	int first_time = 1;
366 
367 	uint64_t total_numerator = 0, total_denominator = 0;
368 
369 	*percent = -1;
370 	*overall_percent_range = PERCENT_INVALID;
371 
372 	if (!(dmt = _setup_task(name, dlid, event_nr,
373 				wait ? DM_DEVICE_WAITEVENT : DM_DEVICE_STATUS, 0, 0)))
374 		return_0;
375 
376 	if (!dm_task_no_open_count(dmt))
377 		log_error("Failed to disable open_count");
378 
379 	if (!dm_task_run(dmt))
380 		goto_out;
381 
382 	if (!dm_task_get_info(dmt, &info) || !info.exists)
383 		goto_out;
384 
385 	if (event_nr)
386 		*event_nr = info.event_nr;
387 
388 	do {
389 		next = dm_get_next_target(dmt, next, &start, &length, &type,
390 					  &params);
391 		if (lv) {
392 			if (!(segh = dm_list_next(&lv->segments, segh))) {
393 				log_error("Number of segments in active LV %s "
394 					  "does not match metadata", lv->name);
395 				goto out;
396 			}
397 			seg = dm_list_item(segh, struct lv_segment);
398 		}
399 
400 		if (!type || !params || strcmp(type, target_type))
401 			continue;
402 
403 		if (!(segtype = get_segtype_from_string(dm->cmd, type)))
404 			continue;
405 
406 		if (segtype->ops->target_percent &&
407 		    !segtype->ops->target_percent(&dm->target_state,
408 						  &percent_range, dm->mem,
409 						  dm->cmd, seg, params,
410 						  &total_numerator,
411 						  &total_denominator))
412 			goto_out;
413 
414 		if (first_time) {
415 			combined_percent_range = percent_range;
416 			first_time = 0;
417 		} else
418 			combined_percent_range =
419 			    _combine_percent_ranges(combined_percent_range,
420 						    percent_range);
421 	} while (next);
422 
423 	if (lv && (segh = dm_list_next(&lv->segments, segh))) {
424 		log_error("Number of segments in active LV %s does not "
425 			  "match metadata", lv->name);
426 		goto out;
427 	}
428 
429 	if (total_denominator) {
430 		*percent = (float) total_numerator *100 / total_denominator;
431 		*overall_percent_range = combined_percent_range;
432 	} else {
433 		*percent = 100;
434 		if (first_time)
435 			*overall_percent_range = PERCENT_100;
436 		else
437 			*overall_percent_range = combined_percent_range;
438 	}
439 
440 	log_debug("LV percent: %f", *percent);
441 	r = 1;
442 
443       out:
444 	dm_task_destroy(dmt);
445 	return r;
446 }
447 
448 static int _percent(struct dev_manager *dm, const char *name, const char *dlid,
449 		    const char *target_type, int wait,
450 		    struct logical_volume *lv, float *percent,
451 		    percent_range_t *overall_percent_range, uint32_t *event_nr)
452 {
453 	if (dlid && *dlid) {
454 		if (_percent_run(dm, NULL, dlid, target_type, wait, lv, percent,
455 				 overall_percent_range, event_nr))
456 			return 1;
457 		else if (_percent_run(dm, NULL, dlid + sizeof(UUID_PREFIX) - 1,
458 				      target_type, wait, lv, percent,
459 				      overall_percent_range, event_nr))
460 			return 1;
461 	}
462 
463 	if (name && _percent_run(dm, name, NULL, target_type, wait, lv, percent,
464 				 overall_percent_range, event_nr))
465 		return 1;
466 
467 	return 0;
468 }
469 
470 /*
471  * dev_manager implementation.
472  */
473 struct dev_manager *dev_manager_create(struct cmd_context *cmd,
474 				       const char *vg_name)
475 {
476 	struct dm_pool *mem;
477 	struct dev_manager *dm;
478 
479 	if (!(mem = dm_pool_create("dev_manager", 16 * 1024)))
480 		return_NULL;
481 
482 	if (!(dm = dm_pool_zalloc(mem, sizeof(*dm))))
483 		goto_bad;
484 
485 	dm->cmd = cmd;
486 	dm->mem = mem;
487 
488 	if (!(dm->vg_name = dm_pool_strdup(dm->mem, vg_name)))
489 		goto_bad;
490 
491 	dm->target_state = NULL;
492 
493 	dm_udev_set_sync_support(cmd->current_settings.udev_sync);
494 
495 	return dm;
496 
497       bad:
498 	dm_pool_destroy(mem);
499 	return NULL;
500 }
501 
502 void dev_manager_destroy(struct dev_manager *dm)
503 {
504 	dm_pool_destroy(dm->mem);
505 }
506 
507 void dev_manager_release(void)
508 {
509 	dm_lib_release();
510 }
511 
512 void dev_manager_exit(void)
513 {
514 	dm_lib_exit();
515 }
516 
517 int dev_manager_snapshot_percent(struct dev_manager *dm,
518 				 const struct logical_volume *lv,
519 				 float *percent, percent_range_t *percent_range)
520 {
521 	char *name;
522 	const char *dlid;
523 
524 	/*
525 	 * Build a name for the top layer.
526 	 */
527 	if (!(name = build_dm_name(dm->mem, lv->vg->name, lv->name, NULL)))
528 		return_0;
529 
530 	if (!(dlid = build_dlid(dm, lv->lvid.s, NULL)))
531 		return_0;
532 
533 	/*
534 	 * Try and get some info on this device.
535 	 */
536 	log_debug("Getting device status percentage for %s", name);
537 	if (!(_percent(dm, name, dlid, "snapshot", 0, NULL, percent,
538 		       percent_range, NULL)))
539 		return_0;
540 
541 	/* FIXME dm_pool_free ? */
542 
543 	/* If the snapshot isn't available, percent will be -1 */
544 	return 1;
545 }
546 
547 /* FIXME Merge with snapshot_percent, auto-detecting target type */
548 /* FIXME Cope with more than one target */
549 int dev_manager_mirror_percent(struct dev_manager *dm,
550 			       struct logical_volume *lv, int wait,
551 			       float *percent, percent_range_t *percent_range,
552 			       uint32_t *event_nr)
553 {
554 	char *name;
555 	const char *dlid;
556 	const char *suffix = (lv_is_origin(lv)) ? "real" : NULL;
557 
558 	/*
559 	 * Build a name for the top layer.
560 	 */
561 	if (!(name = build_dm_name(dm->mem, lv->vg->name, lv->name, suffix)))
562 		return_0;
563 
564 	/* FIXME dm_pool_free ? */
565 
566 	if (!(dlid = build_dlid(dm, lv->lvid.s, suffix))) {
567 		log_error("dlid build failed for %s", lv->name);
568 		return 0;
569 	}
570 
571 	log_debug("Getting device mirror status percentage for %s", name);
572 	if (!(_percent(dm, name, dlid, "mirror", wait, lv, percent,
573 		       percent_range, event_nr)))
574 		return_0;
575 
576 	return 1;
577 }
578 
579 #if 0
580 	log_very_verbose("%s %s", sus ? "Suspending" : "Resuming", name);
581 
582 	log_verbose("Loading %s", dl->name);
583 			log_very_verbose("Activating %s read-only", dl->name);
584 	log_very_verbose("Activated %s %s %03u:%03u", dl->name,
585 			 dl->dlid, dl->info.major, dl->info.minor);
586 
587 	if (_get_flag(dl, VISIBLE))
588 		log_verbose("Removing %s", dl->name);
589 	else
590 		log_very_verbose("Removing %s", dl->name);
591 
592 	log_debug("Adding target: %" PRIu64 " %" PRIu64 " %s %s",
593 		  extent_size * seg->le, extent_size * seg->len, target, params);
594 
595 	log_debug("Adding target: 0 %" PRIu64 " snapshot-origin %s",
596 		  dl->lv->size, params);
597 	log_debug("Adding target: 0 %" PRIu64 " snapshot %s", size, params);
598 	log_debug("Getting device info for %s", dl->name);
599 
600 	/* Rename? */
601 		if ((suffix = strrchr(dl->dlid + sizeof(UUID_PREFIX) - 1, '-')))
602 			suffix++;
603 		new_name = build_dm_name(dm->mem, dm->vg_name, dl->lv->name,
604 					suffix);
605 
606 static int _belong_to_vg(const char *vgname, const char *name)
607 {
608 	const char *v = vgname, *n = name;
609 
610 	while (*v) {
611 		if ((*v != *n) || (*v == '-' && *(++n) != '-'))
612 			return 0;
613 		v++, n++;
614 	}
615 
616 	if (*n == '-' && *(n + 1) != '-')
617 		return 1;
618 	else
619 		return 0;
620 }
621 
622 	if (!(snap_seg = find_cow(lv)))
623 		return 1;
624 
625 	old_origin = snap_seg->origin;
626 
627 	/* Was this the last active snapshot with this origin? */
628 	dm_list_iterate_items(lvl, active_head) {
629 		active = lvl->lv;
630 		if ((snap_seg = find_cow(active)) &&
631 		    snap_seg->origin == old_origin) {
632 			return 1;
633 		}
634 	}
635 
636 #endif
637 
638 /*************************/
639 /*  NEW CODE STARTS HERE */
640 /*************************/
641 
642 int dev_manager_lv_mknodes(const struct logical_volume *lv)
643 {
644 	char *name;
645 
646 	if (!(name = build_dm_name(lv->vg->cmd->mem, lv->vg->name,
647 				   lv->name, NULL)))
648 		return_0;
649 
650 	return fs_add_lv(lv, name);
651 }
652 
653 int dev_manager_lv_rmnodes(const struct logical_volume *lv)
654 {
655 	return fs_del_lv(lv);
656 }
657 
658 static int _add_dev_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
659 			       struct logical_volume *lv, const char *layer)
660 {
661 	char *dlid, *name;
662 	struct dm_info info, info2;
663 
664 	if (!(name = build_dm_name(dm->mem, lv->vg->name, lv->name, layer)))
665 		return_0;
666 
667 	if (!(dlid = build_dlid(dm, lv->lvid.s, layer)))
668 		return_0;
669 
670 	log_debug("Getting device info for %s [%s]", name, dlid);
671 	if (!_info(name, dlid, 0, 1, 0, &info, NULL)) {
672 		log_error("Failed to get info for %s [%s].", name, dlid);
673 		return 0;
674 	}
675 
676 	/*
677 	 * For top level volumes verify that existing device match
678 	 * requested major/minor and that major/minor pair is available for use
679 	 */
680 	if (!layer && lv->major != -1 && lv->minor != -1) {
681 		/*
682 		 * FIXME compare info.major with lv->major if multiple major support
683 		 */
684 		if (info.exists && (info.minor != lv->minor)) {
685 			log_error("Volume %s (%" PRIu32 ":%" PRIu32")"
686 				  " differs from already active device "
687 				  "(%" PRIu32 ":%" PRIu32")",
688 				  lv->name, lv->major, lv->minor, info.major, info.minor);
689 			return 0;
690 		}
691 		if (!info.exists && _info_by_dev(lv->major, lv->minor, &info2) &&
692 		    info2.exists) {
693 			log_error("The requested major:minor pair "
694 				  "(%" PRIu32 ":%" PRIu32") is already used",
695 				  lv->major, lv->minor);
696 			return 0;
697 		}
698 	}
699 
700 	if (info.exists && !dm_tree_add_dev(dtree, info.major, info.minor)) {
701 		log_error("Failed to add device (%" PRIu32 ":%" PRIu32") to dtree",
702 			  info.major, info.minor);
703 		return 0;
704 	}
705 
706 	return 1;
707 }
708 
709 /*
710  * Add LV and any known dependencies
711  */
712 static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree, struct logical_volume *lv)
713 {
714 	if (!_add_dev_to_dtree(dm, dtree, lv, NULL))
715 		return_0;
716 
717 	/* FIXME Can we avoid doing this every time? */
718 	if (!_add_dev_to_dtree(dm, dtree, lv, "real"))
719 		return_0;
720 
721 	if (!_add_dev_to_dtree(dm, dtree, lv, "cow"))
722 		return_0;
723 
724 	if (!_add_dev_to_dtree(dm, dtree, lv, "_mlog"))
725 		return_0;
726 
727 	return 1;
728 }
729 
730 static struct dm_tree *_create_partial_dtree(struct dev_manager *dm, struct logical_volume *lv)
731 {
732 	struct dm_tree *dtree;
733 	struct dm_list *snh, *snht;
734 	struct lv_segment *seg;
735 	uint32_t s;
736 
737 	if (!(dtree = dm_tree_create())) {
738 		log_error("Partial dtree creation failed for %s.", lv->name);
739 		return NULL;
740 	}
741 
742 	if (!_add_lv_to_dtree(dm, dtree, lv))
743 		goto_bad;
744 
745 	/* Add any snapshots of this LV */
746 	dm_list_iterate_safe(snh, snht, &lv->snapshot_segs)
747 		if (!_add_lv_to_dtree(dm, dtree, dm_list_struct_base(snh, struct lv_segment, origin_list)->cow))
748 			goto_bad;
749 
750 	/* Add any LVs used by segments in this LV */
751 	dm_list_iterate_items(seg, &lv->segments)
752 		for (s = 0; s < seg->area_count; s++)
753 			if (seg_type(seg, s) == AREA_LV && seg_lv(seg, s)) {
754 				if (!_add_lv_to_dtree(dm, dtree, seg_lv(seg, s)))
755 					goto_bad;
756 			}
757 
758 	return dtree;
759 
760 bad:
761 	dm_tree_free(dtree);
762 	return NULL;
763 }
764 
765 static char *_add_error_device(struct dev_manager *dm, struct dm_tree *dtree,
766 			       struct lv_segment *seg, int s)
767 {
768 	char *id, *name;
769 	char errid[32];
770 	struct dm_tree_node *node;
771 	struct lv_segment *seg_i;
772 	int segno = -1, i = 0;;
773 	uint64_t size = seg->len * seg->lv->vg->extent_size;
774 
775 	dm_list_iterate_items(seg_i, &seg->lv->segments) {
776 		if (seg == seg_i)
777 			segno = i;
778 		++i;
779 	}
780 
781 	if (segno < 0) {
782 		log_error("_add_error_device called with bad segment");
783 		return_NULL;
784 	}
785 
786 	sprintf(errid, "missing_%d_%d", segno, s);
787 
788 	if (!(id = build_dlid(dm, seg->lv->lvid.s, errid)))
789 		return_NULL;
790 
791 	if (!(name = build_dm_name(dm->mem, seg->lv->vg->name,
792 				   seg->lv->name, errid)))
793 		return_NULL;
794 	if (!(node = dm_tree_add_new_dev(dtree, name, id, 0, 0, 0, 0, 0)))
795 		return_NULL;
796 	if (!dm_tree_node_add_error_target(node, size))
797 		return_NULL;
798 
799 	return id;
800 }
801 
802 static int _add_error_area(struct dev_manager *dm, struct dm_tree_node *node,
803 			   struct lv_segment *seg, int s)
804 {
805 	char *dlid;
806 	uint64_t extent_size = seg->lv->vg->extent_size;
807 
808 	if (!strcmp(dm->cmd->stripe_filler, "error")) {
809 		/*
810 		 * FIXME, the tree pointer is first field of dm_tree_node, but
811 		 * we don't have the struct definition available.
812 		 */
813 		struct dm_tree **tree = (struct dm_tree **) node;
814 		dlid = _add_error_device(dm, *tree, seg, s);
815 		if (!dlid)
816 			return_0;
817 		dm_tree_node_add_target_area(node, NULL, dlid,
818 					     extent_size * seg_le(seg, s));
819 	} else
820 		dm_tree_node_add_target_area(node,
821 					     dm->cmd->stripe_filler,
822 					     NULL, UINT64_C(0));
823 
824 	return 1;
825 }
826 
827 int add_areas_line(struct dev_manager *dm, struct lv_segment *seg,
828 		   struct dm_tree_node *node, uint32_t start_area,
829 		   uint32_t areas)
830 {
831 	uint64_t extent_size = seg->lv->vg->extent_size;
832 	uint32_t s;
833 	char *dlid;
834 
835 	for (s = start_area; s < areas; s++) {
836 		if ((seg_type(seg, s) == AREA_PV &&
837 		     (!seg_pvseg(seg, s) ||
838 		      !seg_pv(seg, s) ||
839 		      !seg_dev(seg, s))) ||
840 		    (seg_type(seg, s) == AREA_LV && !seg_lv(seg, s))) {
841 			if (!_add_error_area(dm, node, seg, s))
842 				return_0;
843 		} else if (seg_type(seg, s) == AREA_PV)
844 			dm_tree_node_add_target_area(node,
845 							dev_name(seg_dev(seg, s)),
846 							NULL,
847 							(seg_pv(seg, s)->pe_start +
848 							 (extent_size * seg_pe(seg, s))));
849 		else if (seg_type(seg, s) == AREA_LV) {
850 			if (!(dlid = build_dlid(dm,
851 						 seg_lv(seg, s)->lvid.s,
852 						 NULL)))
853 				return_0;
854 			dm_tree_node_add_target_area(node, NULL, dlid,
855 							extent_size * seg_le(seg, s));
856 		} else {
857 			log_error("Internal error: Unassigned area found in LV %s.",
858 				  seg->lv->name);
859 			return 0;
860 		}
861 	}
862 
863 	return 1;
864 }
865 
866 static int _add_origin_target_to_dtree(struct dev_manager *dm,
867 					 struct dm_tree_node *dnode,
868 					 struct logical_volume *lv)
869 {
870 	const char *real_dlid;
871 
872 	if (!(real_dlid = build_dlid(dm, lv->lvid.s, "real")))
873 		return_0;
874 
875 	if (!dm_tree_node_add_snapshot_origin_target(dnode, lv->size, real_dlid))
876 		return_0;
877 
878 	return 1;
879 }
880 
881 static int _add_snapshot_target_to_dtree(struct dev_manager *dm,
882 					   struct dm_tree_node *dnode,
883 					   struct logical_volume *lv)
884 {
885 	const char *origin_dlid;
886 	const char *cow_dlid;
887 	struct lv_segment *snap_seg;
888 	uint64_t size;
889 
890 	if (!(snap_seg = find_cow(lv))) {
891 		log_error("Couldn't find snapshot for '%s'.", lv->name);
892 		return 0;
893 	}
894 
895 	if (!(origin_dlid = build_dlid(dm, snap_seg->origin->lvid.s, "real")))
896 		return_0;
897 
898 	if (!(cow_dlid = build_dlid(dm, snap_seg->cow->lvid.s, "cow")))
899 		return_0;
900 
901 	size = (uint64_t) snap_seg->len * snap_seg->origin->vg->extent_size;
902 
903 	if (!dm_tree_node_add_snapshot_target(dnode, size, origin_dlid, cow_dlid, 1, snap_seg->chunk_size))
904 		return_0;
905 
906 	return 1;
907 }
908 
909 static int _add_target_to_dtree(struct dev_manager *dm,
910 				  struct dm_tree_node *dnode,
911 				  struct lv_segment *seg)
912 {
913 	uint64_t extent_size = seg->lv->vg->extent_size;
914 
915 	if (!seg->segtype->ops->add_target_line) {
916 		log_error("_emit_target: Internal error: Can't handle "
917 			  "segment type %s", seg->segtype->name);
918 		return 0;
919 	}
920 
921 	return seg->segtype->ops->add_target_line(dm, dm->mem, dm->cmd,
922 						  &dm->target_state, seg,
923 						  dnode,
924 						  extent_size * seg->len,
925 						  &dm-> pvmove_mirror_count);
926 }
927 
928 static int _add_new_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
929 				  struct logical_volume *lv, const char *layer);
930 
931 static int _add_segment_to_dtree(struct dev_manager *dm,
932 				   struct dm_tree *dtree,
933 				   struct dm_tree_node *dnode,
934 				   struct lv_segment *seg,
935 				   const char *layer)
936 {
937 	uint32_t s;
938 	struct dm_list *snh;
939 	struct lv_segment *seg_present;
940 
941 	/* Ensure required device-mapper targets are loaded */
942 	seg_present = find_cow(seg->lv) ? : seg;
943 
944 	log_debug("Checking kernel supports %s segment type for %s%s%s",
945 		  seg_present->segtype->name, seg->lv->name,
946 		  layer ? "-" : "", layer ? : "");
947 
948 	if (seg_present->segtype->ops->target_present &&
949 	    !seg_present->segtype->ops->target_present(seg_present->lv->vg->cmd,
950 						       seg_present, NULL)) {
951 		log_error("Can't expand LV %s: %s target support missing "
952 			  "from kernel?", seg->lv->name, seg_present->segtype->name);
953 		return 0;
954 	}
955 
956 	/* Add mirror log */
957 	if (seg->log_lv &&
958 	    !_add_new_lv_to_dtree(dm, dtree, seg->log_lv, NULL))
959 		return_0;
960 
961 	/* If this is a snapshot origin, add real LV */
962 	if (lv_is_origin(seg->lv) && !layer) {
963 		if (vg_is_clustered(seg->lv->vg)) {
964 			log_error("Clustered snapshots are not yet supported");
965 			return 0;
966 		}
967 		if (!_add_new_lv_to_dtree(dm, dtree, seg->lv, "real"))
968 			return_0;
969 	} else if (lv_is_cow(seg->lv) && !layer) {
970 		if (!_add_new_lv_to_dtree(dm, dtree, seg->lv, "cow"))
971 			return_0;
972 	} else {
973 		/* Add any LVs used by this segment */
974 		for (s = 0; s < seg->area_count; s++)
975 			if ((seg_type(seg, s) == AREA_LV) &&
976 			    (!_add_new_lv_to_dtree(dm, dtree, seg_lv(seg, s), NULL)))
977 				return_0;
978 	}
979 
980 	/* Now we've added its dependencies, we can add the target itself */
981 	if (lv_is_origin(seg->lv) && !layer) {
982 		if (!_add_origin_target_to_dtree(dm, dnode, seg->lv))
983 			return_0;
984 	} else if (lv_is_cow(seg->lv) && !layer) {
985 		if (!_add_snapshot_target_to_dtree(dm, dnode, seg->lv))
986 			return_0;
987 	} else if (!_add_target_to_dtree(dm, dnode, seg))
988 		return_0;
989 
990 	if (lv_is_origin(seg->lv) && !layer)
991 		/* Add any snapshots of this LV */
992 		dm_list_iterate(snh, &seg->lv->snapshot_segs)
993 			if (!_add_new_lv_to_dtree(dm, dtree, dm_list_struct_base(snh, struct lv_segment, origin_list)->cow, NULL))
994 				return_0;
995 
996 	return 1;
997 }
998 
999 static int _add_new_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
1000 				  struct logical_volume *lv, const char *layer)
1001 {
1002 	struct lv_segment *seg;
1003 	struct lv_layer *lvlayer;
1004 	struct dm_tree_node *dnode;
1005 	char *name, *dlid;
1006 	uint32_t max_stripe_size = UINT32_C(0);
1007 	uint32_t read_ahead = lv->read_ahead;
1008 	uint32_t read_ahead_flags = UINT32_C(0);
1009 	uint16_t udev_flags = 0;
1010 
1011 	if (!(name = build_dm_name(dm->mem, lv->vg->name, lv->name, layer)))
1012 		return_0;
1013 
1014 	if (!(dlid = build_dlid(dm, lv->lvid.s, layer)))
1015 		return_0;
1016 
1017 	/* We've already processed this node if it already has a context ptr */
1018 	if ((dnode = dm_tree_find_node_by_uuid(dtree, dlid)) &&
1019 	    dm_tree_node_get_context(dnode))
1020 		return 1;
1021 
1022 	if (!(lvlayer = dm_pool_alloc(dm->mem, sizeof(*lvlayer)))) {
1023 		log_error("_add_new_lv_to_dtree: pool alloc failed for %s %s.", lv->name, layer);
1024 		return 0;
1025 	}
1026 
1027 	lvlayer->lv = lv;
1028 
1029 	if (layer || !lv_is_visible(lv))
1030 		udev_flags |= DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG |
1031 			      DM_UDEV_DISABLE_DISK_RULES_FLAG |
1032 			      DM_UDEV_DISABLE_OTHER_RULES_FLAG;
1033 
1034 	if (lv_is_cow(lv))
1035 		udev_flags |= DM_UDEV_LOW_PRIORITY_FLAG;
1036 
1037 	/*
1038 	 * Add LV to dtree.
1039 	 * If we're working with precommitted metadata, clear any
1040 	 * existing inactive table left behind.
1041 	 * Major/minor settings only apply to the visible layer.
1042 	 */
1043 	if (!(dnode = dm_tree_add_new_dev_with_udev_flags(dtree, name, dlid,
1044 					     layer ? UINT32_C(0) : (uint32_t) lv->major,
1045 					     layer ? UINT32_C(0) : (uint32_t) lv->minor,
1046 					     _read_only_lv(lv),
1047 					     (lv->vg->status & PRECOMMITTED) ? 1 : 0,
1048 					     lvlayer,
1049 					     udev_flags)))
1050 		return_0;
1051 
1052 	/* Store existing name so we can do rename later */
1053 	lvlayer->old_name = dm_tree_node_get_name(dnode);
1054 
1055 	/* Create table */
1056 	dm->pvmove_mirror_count = 0u;
1057 	dm_list_iterate_items(seg, &lv->segments) {
1058 		if (!_add_segment_to_dtree(dm, dtree, dnode, seg, layer))
1059 			return_0;
1060 		/* These aren't real segments in the LVM2 metadata */
1061 		if (lv_is_origin(lv) && !layer)
1062 			break;
1063 		if (lv_is_cow(lv) && !layer)
1064 			break;
1065 		if (max_stripe_size < seg->stripe_size * seg->area_count)
1066 			max_stripe_size = seg->stripe_size * seg->area_count;
1067 	}
1068 
1069 	if (read_ahead == DM_READ_AHEAD_AUTO) {
1070 		/* we need RA at least twice a whole stripe - see the comment in md/raid0.c */
1071 		read_ahead = max_stripe_size * 2;
1072 		if (!read_ahead)
1073 			lv_calculate_readahead(lv, &read_ahead);
1074 		read_ahead_flags = DM_READ_AHEAD_MINIMUM_FLAG;
1075 	}
1076 
1077 	dm_tree_node_set_read_ahead(dnode, read_ahead, read_ahead_flags);
1078 
1079 	return 1;
1080 }
1081 
1082 /* FIXME: symlinks should be created/destroyed at the same time
1083  * as the kernel devices but we can't do that from within libdevmapper
1084  * at present so we must walk the tree twice instead. */
1085 
1086 /*
1087  * Create LV symlinks for children of supplied root node.
1088  */
1089 static int _create_lv_symlinks(struct dev_manager *dm, struct dm_tree_node *root)
1090 {
1091 	void *handle = NULL;
1092 	struct dm_tree_node *child;
1093 	struct lv_layer *lvlayer;
1094 	char *old_vgname, *old_lvname, *old_layer;
1095 	char *new_vgname, *new_lvname, *new_layer;
1096 	const char *name;
1097 	int r = 1;
1098 
1099 	while ((child = dm_tree_next_child(&handle, root, 0))) {
1100 		if (!(lvlayer = (struct lv_layer *) dm_tree_node_get_context(child)))
1101 			continue;
1102 
1103 		/* Detect rename */
1104 		name = dm_tree_node_get_name(child);
1105 
1106 		if (name && lvlayer->old_name && *lvlayer->old_name && strcmp(name, lvlayer->old_name)) {
1107 			if (!dm_split_lvm_name(dm->mem, lvlayer->old_name, &old_vgname, &old_lvname, &old_layer)) {
1108 				log_error("_create_lv_symlinks: Couldn't split up old device name %s", lvlayer->old_name);
1109 				return 0;
1110 			}
1111 			if (!dm_split_lvm_name(dm->mem, name, &new_vgname, &new_lvname, &new_layer)) {
1112 				log_error("_create_lv_symlinks: Couldn't split up new device name %s", name);
1113 				return 0;
1114 			}
1115 			if (!fs_rename_lv(lvlayer->lv, name, old_vgname, old_lvname))
1116 				r = 0;
1117 			continue;
1118 		}
1119 		if (lv_is_visible(lvlayer->lv)) {
1120 			if (!dev_manager_lv_mknodes(lvlayer->lv))
1121 				r = 0;
1122 			continue;
1123 		}
1124 		if (!dev_manager_lv_rmnodes(lvlayer->lv))
1125 			r = 0;
1126 	}
1127 
1128 	return r;
1129 }
1130 
1131 /*
1132  * Remove LV symlinks for children of supplied root node.
1133  */
1134 static int _remove_lv_symlinks(struct dev_manager *dm, struct dm_tree_node *root)
1135 {
1136 	void *handle = NULL;
1137 	struct dm_tree_node *child;
1138 	char *vgname, *lvname, *layer;
1139 	int r = 1;
1140 
1141 	while ((child = dm_tree_next_child(&handle, root, 0))) {
1142 		if (!dm_split_lvm_name(dm->mem, dm_tree_node_get_name(child), &vgname, &lvname, &layer)) {
1143 			r = 0;
1144 			continue;
1145 		}
1146 
1147 		if (!*vgname)
1148 			continue;
1149 
1150 		/* only top level layer has symlinks */
1151 		if (*layer)
1152 			continue;
1153 
1154 		fs_del_lv_byname(dm->cmd->dev_dir, vgname, lvname);
1155 	}
1156 
1157 	return r;
1158 }
1159 
1160 static int _clean_tree(struct dev_manager *dm, struct dm_tree_node *root)
1161 {
1162 	void *handle = NULL;
1163 	struct dm_tree_node *child;
1164 	char *vgname, *lvname, *layer;
1165 	const char *name, *uuid;
1166 	int r;
1167 
1168 	while ((child = dm_tree_next_child(&handle, root, 0))) {
1169 		if (!(name = dm_tree_node_get_name(child)))
1170 			continue;
1171 
1172 		if (!(uuid = dm_tree_node_get_uuid(child)))
1173 			continue;
1174 
1175 		if (!dm_split_lvm_name(dm->mem, name, &vgname, &lvname, &layer)) {
1176 			log_error("_clean_tree: Couldn't split up device name %s.", name);
1177 			return 0;
1178 		}
1179 
1180 		/* Not meant to be top level? */
1181 		if (!*layer)
1182 			continue;
1183 
1184 		dm_tree_set_cookie(root, 0);
1185 		r = dm_tree_deactivate_children(root, uuid, strlen(uuid));
1186 		if (!dm_udev_wait(dm_tree_get_cookie(root)))
1187 			stack;
1188 
1189 		if (!r)
1190 			return_0;
1191 	}
1192 
1193 	return 1;
1194 }
1195 
1196 static int _tree_action(struct dev_manager *dm, struct logical_volume *lv, action_t action)
1197 {
1198 	struct dm_tree *dtree;
1199 	struct dm_tree_node *root;
1200 	char *dlid;
1201 	int r = 0;
1202 
1203 	if (!(dtree = _create_partial_dtree(dm, lv)))
1204 		return_0;
1205 
1206 	if (!(root = dm_tree_find_node(dtree, 0, 0))) {
1207 		log_error("Lost dependency tree root node");
1208 		goto out;
1209 	}
1210 
1211 	if (!(dlid = build_dlid(dm, lv->lvid.s, NULL)))
1212 		goto_out;
1213 
1214 	/* Only process nodes with uuid of "LVM-" plus VG id. */
1215 	switch(action) {
1216 	case CLEAN:
1217 		/* Deactivate any unused non-toplevel nodes */
1218 		if (!_clean_tree(dm, root))
1219 			goto_out;
1220 		break;
1221 	case DEACTIVATE:
1222  		/* Deactivate LV and all devices it references that nothing else has open. */
1223 		dm_tree_set_cookie(root, 0);
1224 		r = dm_tree_deactivate_children(root, dlid, ID_LEN + sizeof(UUID_PREFIX) - 1);
1225 		if (!dm_udev_wait(dm_tree_get_cookie(root)))
1226 			stack;
1227 		if (!r)
1228 			goto_out;
1229 		if (!_remove_lv_symlinks(dm, root))
1230 			log_error("Failed to remove all device symlinks associated with %s.", lv->name);
1231 		break;
1232 	case SUSPEND:
1233 		dm_tree_skip_lockfs(root);
1234 		if (!dm->flush_required && (lv->status & MIRRORED) && !(lv->status & PVMOVE))
1235 			dm_tree_use_no_flush_suspend(root);
1236 	case SUSPEND_WITH_LOCKFS:
1237 		if (!dm_tree_suspend_children(root, dlid, ID_LEN + sizeof(UUID_PREFIX) - 1))
1238 			goto_out;
1239 		break;
1240 	case PRELOAD:
1241 	case ACTIVATE:
1242 		/* Add all required new devices to tree */
1243 		if (!_add_new_lv_to_dtree(dm, dtree, lv, NULL))
1244 			goto_out;
1245 
1246 		/* Preload any devices required before any suspensions */
1247 		dm_tree_set_cookie(root, 0);
1248 		r = dm_tree_preload_children(root, dlid, ID_LEN + sizeof(UUID_PREFIX) - 1);
1249 		if (!dm_udev_wait(dm_tree_get_cookie(root)))
1250 			stack;
1251 		if (!r)
1252 			goto_out;
1253 
1254 		if (dm_tree_node_size_changed(root))
1255 			dm->flush_required = 1;
1256 
1257 		if (action == ACTIVATE) {
1258 			dm_tree_set_cookie(root, 0);
1259 			r = dm_tree_activate_children(root, dlid, ID_LEN + sizeof(UUID_PREFIX) - 1);
1260 			if (!dm_udev_wait(dm_tree_get_cookie(root)))
1261 				stack;
1262 			if (!r)
1263 				goto_out;
1264 			if (!_create_lv_symlinks(dm, root)) {
1265 				log_error("Failed to create symlinks for %s.", lv->name);
1266 				goto out;
1267 			}
1268 		}
1269 
1270 		break;
1271 	default:
1272 		log_error("_tree_action: Action %u not supported.", action);
1273 		goto out;
1274 	}
1275 
1276 	r = 1;
1277 
1278 out:
1279 	dm_tree_free(dtree);
1280 
1281 	return r;
1282 }
1283 
1284 int dev_manager_activate(struct dev_manager *dm, struct logical_volume *lv)
1285 {
1286 	if (!_tree_action(dm, lv, ACTIVATE))
1287 		return_0;
1288 
1289 	return _tree_action(dm, lv, CLEAN);
1290 }
1291 
1292 int dev_manager_preload(struct dev_manager *dm, struct logical_volume *lv,
1293 			int *flush_required)
1294 {
1295 	/* FIXME Update the pvmove implementation! */
1296 	if ((lv->status & PVMOVE) || (lv->status & LOCKED))
1297 		return 1;
1298 
1299 	if (!_tree_action(dm, lv, PRELOAD))
1300 		return 0;
1301 
1302 	*flush_required = dm->flush_required;
1303 
1304 	return 1;
1305 }
1306 
1307 int dev_manager_deactivate(struct dev_manager *dm, struct logical_volume *lv)
1308 {
1309 	int r;
1310 
1311 	r = _tree_action(dm, lv, DEACTIVATE);
1312 
1313 	fs_del_lv(lv);
1314 
1315 	return r;
1316 }
1317 
1318 int dev_manager_suspend(struct dev_manager *dm, struct logical_volume *lv,
1319 			int lockfs, int flush_required)
1320 {
1321 	dm->flush_required = flush_required;
1322 
1323 	return _tree_action(dm, lv, lockfs ? SUSPEND_WITH_LOCKFS : SUSPEND);
1324 }
1325 
1326 /*
1327  * Does device use VG somewhere in its construction?
1328  * Returns 1 if uncertain.
1329  */
1330 int dev_manager_device_uses_vg(struct device *dev,
1331 			       struct volume_group *vg)
1332 {
1333 	struct dm_tree *dtree;
1334 	struct dm_tree_node *root;
1335 	char dlid[sizeof(UUID_PREFIX) + sizeof(struct id) - 1] __attribute((aligned(8)));
1336 	int r = 1;
1337 
1338 	if (!(dtree = dm_tree_create())) {
1339 		log_error("partial dtree creation failed");
1340 		return r;
1341 	}
1342 
1343 	if (!dm_tree_add_dev(dtree, (uint32_t) MAJOR(dev->dev), (uint32_t) MINOR(dev->dev))) {
1344 		log_error("Failed to add device %s (%" PRIu32 ":%" PRIu32") to dtree",
1345 			  dev_name(dev), (uint32_t) MAJOR(dev->dev), (uint32_t) MINOR(dev->dev));
1346 		goto out;
1347 	}
1348 
1349 	memcpy(dlid, UUID_PREFIX, sizeof(UUID_PREFIX) - 1);
1350 	memcpy(dlid + sizeof(UUID_PREFIX) - 1, &vg->id.uuid[0], sizeof(vg->id));
1351 
1352 	if (!(root = dm_tree_find_node(dtree, 0, 0))) {
1353 		log_error("Lost dependency tree root node");
1354 		goto out;
1355 	}
1356 
1357 	if (dm_tree_children_use_uuid(root, dlid, sizeof(UUID_PREFIX) + sizeof(vg->id) - 1))
1358 		goto_out;
1359 
1360 	r = 0;
1361 
1362 out:
1363 	dm_tree_free(dtree);
1364 	return r;
1365 }
1366