1 /*	$NetBSD: metadata.c,v 1.1.1.3 2009/12/02 00:26:39 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 #include "lib.h"
19 #include "device.h"
20 #include "metadata.h"
21 #include "toolcontext.h"
22 #include "lvm-string.h"
23 #include "lvm-file.h"
24 #include "lvmcache.h"
25 #include "memlock.h"
26 #include "str_list.h"
27 #include "pv_alloc.h"
28 #include "segtype.h"
29 #include "activate.h"
30 #include "display.h"
31 #include "locking.h"
32 #include "archiver.h"
33 #include "defaults.h"
34 #include "filter-persistent.h"
35 
36 #include <sys/param.h>
37 
38 /*
39  * FIXME: Check for valid handle before dereferencing field or log error?
40  */
41 #define pv_field(handle, field)				\
42 	(((const struct physical_volume *)(handle))->field)
43 
44 static struct physical_volume *_pv_read(struct cmd_context *cmd,
45 					struct dm_pool *pvmem,
46 					const char *pv_name,
47 					struct dm_list *mdas,
48 					uint64_t *label_sector,
49 					int warnings, int scan_label_only);
50 
51 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
52 			 			const char *pv_name);
53 
54 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
55 				      const char *pv_name);
56 
57 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
58 						      const struct id *id);
59 
60 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
61 				    uint32_t status);
62 
63 const char _really_init[] =
64     "Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
65 
66 unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
67 {
68 	if (pv->pe_align)
69 		goto out;
70 
71 	if (data_alignment)
72 		pv->pe_align = data_alignment;
73 	else
74 		pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
75 
76 	if (!pv->dev)
77 		goto out;
78 
79 	/*
80 	 * Align to stripe-width of underlying md device if present
81 	 */
82 	if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
83 				  DEFAULT_MD_CHUNK_ALIGNMENT))
84 		pv->pe_align = MAX(pv->pe_align,
85 				   dev_md_stripe_width(pv->fmt->cmd->sysfs_dir,
86 						       pv->dev));
87 
88 	/*
89 	 * Align to topology's minimum_io_size or optimal_io_size if present
90 	 * - minimum_io_size - the smallest request the device can perform
91 	 *   w/o incurring a read-modify-write penalty (e.g. MD's chunk size)
92 	 * - optimal_io_size - the device's preferred unit of receiving I/O
93 	 *   (e.g. MD's stripe width)
94 	 */
95 	if (find_config_tree_bool(pv->fmt->cmd,
96 				  "devices/data_alignment_detection",
97 				  DEFAULT_DATA_ALIGNMENT_DETECTION)) {
98 		pv->pe_align = MAX(pv->pe_align,
99 				   dev_minimum_io_size(pv->fmt->cmd->sysfs_dir,
100 						       pv->dev));
101 
102 		pv->pe_align = MAX(pv->pe_align,
103 				   dev_optimal_io_size(pv->fmt->cmd->sysfs_dir,
104 						       pv->dev));
105 	}
106 
107 	log_very_verbose("%s: Setting PE alignment to %lu sectors.",
108 			 dev_name(pv->dev), pv->pe_align);
109 
110 out:
111 	return pv->pe_align;
112 }
113 
114 unsigned long set_pe_align_offset(struct physical_volume *pv,
115 				  unsigned long data_alignment_offset)
116 {
117 	if (pv->pe_align_offset)
118 		goto out;
119 
120 	if (data_alignment_offset)
121 		pv->pe_align_offset = data_alignment_offset;
122 
123 	if (!pv->dev)
124 		goto out;
125 
126 	if (find_config_tree_bool(pv->fmt->cmd,
127 				  "devices/data_alignment_offset_detection",
128 				  DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION))
129 		pv->pe_align_offset =
130 			MAX(pv->pe_align_offset,
131 			    dev_alignment_offset(pv->fmt->cmd->sysfs_dir,
132 						 pv->dev));
133 
134 	log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
135 			 dev_name(pv->dev), pv->pe_align_offset);
136 
137 out:
138 	return pv->pe_align_offset;
139 }
140 
141 /**
142  * add_pv_to_vg - Add a physical volume to a volume group
143  * @vg - volume group to add to
144  * @pv_name - name of the pv (to be removed)
145  * @pv - physical volume to add to volume group
146  *
147  * Returns:
148  *  0 - failure
149  *  1 - success
150  * FIXME: remove pv_name - obtain safely from pv
151  */
152 int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
153 		 struct physical_volume *pv)
154 {
155 	struct pv_list *pvl;
156 	struct format_instance *fid = vg->fid;
157 	struct dm_pool *mem = vg->vgmem;
158 
159 	log_verbose("Adding physical volume '%s' to volume group '%s'",
160 		    pv_name, vg->name);
161 
162 	if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
163 		log_error("pv_list allocation for '%s' failed", pv_name);
164 		return 0;
165 	}
166 
167 	if (!is_orphan_vg(pv->vg_name)) {
168 		log_error("Physical volume '%s' is already in volume group "
169 			  "'%s'", pv_name, pv->vg_name);
170 		return 0;
171 	}
172 
173 	if (pv->fmt != fid->fmt) {
174 		log_error("Physical volume %s is of different format type (%s)",
175 			  pv_name, pv->fmt->name);
176 		return 0;
177 	}
178 
179 	/* Ensure PV doesn't depend on another PV already in the VG */
180 	if (pv_uses_vg(pv, vg)) {
181 		log_error("Physical volume %s might be constructed from same "
182 			  "volume group %s", pv_name, vg->name);
183 		return 0;
184 	}
185 
186 	if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) {
187 		log_error("vg->name allocation failed for '%s'", pv_name);
188 		return 0;
189 	}
190 
191 	memcpy(&pv->vgid, &vg->id, sizeof(vg->id));
192 
193 	/* Units of 512-byte sectors */
194 	pv->pe_size = vg->extent_size;
195 
196 	/*
197 	 * pe_count must always be calculated by pv_setup
198 	 */
199 	pv->pe_alloc_count = 0;
200 
201 	if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
202 				     vg->extent_size, 0, 0, 0UL, UINT64_C(0),
203 				     &fid->metadata_areas, pv, vg)) {
204 		log_error("Format-specific setup of physical volume '%s' "
205 			  "failed.", pv_name);
206 		return 0;
207 	}
208 
209 	if (_find_pv_in_vg(vg, pv_name)) {
210 		log_error("Physical volume '%s' listed more than once.",
211 			  pv_name);
212 		return 0;
213 	}
214 
215 	if (vg->pv_count && (vg->pv_count == vg->max_pv)) {
216 		log_error("No space for '%s' - volume group '%s' "
217 			  "holds max %d physical volume(s).", pv_name,
218 			  vg->name, vg->max_pv);
219 		return 0;
220 	}
221 
222 	if (!alloc_pv_segment_whole_pv(mem, pv))
223 		return_0;
224 
225 	pvl->pv = pv;
226 	dm_list_add(&vg->pvs, &pvl->list);
227 
228 	if ((uint64_t) vg->extent_count + pv->pe_count > UINT32_MAX) {
229 		log_error("Unable to add %s to %s: new extent count (%"
230 			  PRIu64 ") exceeds limit (%" PRIu32 ").",
231 			  pv_name, vg->name,
232 			  (uint64_t) vg->extent_count + pv->pe_count,
233 			  UINT32_MAX);
234 		return 0;
235 	}
236 
237 	vg->pv_count++;
238 	vg->extent_count += pv->pe_count;
239 	vg->free_count += pv->pe_count;
240 
241 	return 1;
242 }
243 
244 static int _copy_pv(struct dm_pool *pvmem,
245 		    struct physical_volume *pv_to,
246 		    struct physical_volume *pv_from)
247 {
248 	memcpy(pv_to, pv_from, sizeof(*pv_to));
249 
250 	if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name)))
251 		return_0;
252 
253 	if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags))
254 		return_0;
255 
256 	if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments))
257 		return_0;
258 
259 	return 1;
260 }
261 
262 static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from)
263 {
264 	struct pv_list *pvl_to = NULL;
265 
266 	if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to))))
267 		return_NULL;
268 
269 	if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv))))
270 		goto_bad;
271 
272 	if(!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv))
273 		goto_bad;
274 
275 	return pvl_to;
276 bad:
277 	dm_pool_free(pvmem, pvl_to);
278 	return NULL;
279 }
280 
281 int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name,
282 			 const char *vgid, const char *pvid,
283 			 struct physical_volume *pv)
284 {
285 	struct volume_group *vg;
286 	struct pv_list *pvl;
287 	int r = 0, consistent = 0;
288 
289 	if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, &consistent))) {
290 		log_error("get_pv_from_vg_by_id: vg_read_internal failed to read VG %s",
291 			  vg_name);
292 		return 0;
293 	}
294 
295 	if (!consistent)
296 		log_warn("WARNING: Volume group %s is not consistent",
297 			 vg_name);
298 
299 	dm_list_iterate_items(pvl, &vg->pvs) {
300 		if (id_equal(&pvl->pv->id, (const struct id *) pvid)) {
301 			if (!_copy_pv(fmt->cmd->mem, pv, pvl->pv)) {
302 				log_error("internal PV duplication failed");
303 				r = 0;
304 				goto out;
305 			}
306 			r = 1;
307 			goto out;
308 		}
309 	}
310 out:
311 	vg_release(vg);
312 	return r;
313 }
314 
315 int move_pv(struct volume_group *vg_from, struct volume_group *vg_to,
316 	    const char *pv_name)
317 {
318 	struct physical_volume *pv;
319 	struct pv_list *pvl;
320 
321 	/* FIXME: handle tags */
322 	if (!(pvl = find_pv_in_vg(vg_from, pv_name))) {
323 		log_error("Physical volume %s not in volume group %s",
324 			  pv_name, vg_from->name);
325 		return 0;
326 	}
327 
328 	if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
329 	    _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
330 		return 0;
331 
332 	dm_list_move(&vg_to->pvs, &pvl->list);
333 
334 	vg_from->pv_count--;
335 	vg_to->pv_count++;
336 
337 	pv = pvl->pv;
338 
339 	vg_from->extent_count -= pv_pe_count(pv);
340 	vg_to->extent_count += pv_pe_count(pv);
341 
342 	vg_from->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
343 	vg_to->free_count += pv_pe_count(pv) - pv_pe_alloc_count(pv);
344 
345 	return 1;
346 }
347 
348 int move_pvs_used_by_lv(struct volume_group *vg_from,
349 			struct volume_group *vg_to,
350 			const char *lv_name)
351 {
352 	struct lv_segment *lvseg;
353 	unsigned s;
354 	struct lv_list *lvl;
355 	struct logical_volume *lv;
356 
357 	/* FIXME: handle tags */
358 	if (!(lvl = find_lv_in_vg(vg_from, lv_name))) {
359 		log_error("Logical volume %s not in volume group %s",
360 			  lv_name, vg_from->name);
361 		return 0;
362 	}
363 
364 	if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
365 	    _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
366 		return 0;
367 
368 	dm_list_iterate_items(lvseg, &lvl->lv->segments) {
369 		if (lvseg->log_lv)
370 			if (!move_pvs_used_by_lv(vg_from, vg_to,
371 						     lvseg->log_lv->name))
372 				return_0;
373 		for (s = 0; s < lvseg->area_count; s++) {
374 			if (seg_type(lvseg, s) == AREA_PV) {
375 				if (!move_pv(vg_from, vg_to,
376 					      pv_dev_name(seg_pv(lvseg, s))))
377 					return_0;
378 			} else if (seg_type(lvseg, s) == AREA_LV) {
379 				lv = seg_lv(lvseg, s);
380 				if (!move_pvs_used_by_lv(vg_from, vg_to,
381 							     lv->name))
382 				    return_0;
383 			}
384 		}
385 	}
386 	return 1;
387 }
388 
389 static int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name)
390 {
391 	char vg_path[PATH_MAX];
392 
393 	if (!validate_name(vg_name))
394 		return_0;
395 
396 	snprintf(vg_path, PATH_MAX, "%s%s", cmd->dev_dir, vg_name);
397 	if (path_exists(vg_path)) {
398 		log_error("%s: already exists in filesystem", vg_path);
399 		return 0;
400 	}
401 
402 	return 1;
403 }
404 
405 int validate_vg_rename_params(struct cmd_context *cmd,
406 			      const char *vg_name_old,
407 			      const char *vg_name_new)
408 {
409 	unsigned length;
410 	char *dev_dir;
411 
412 	dev_dir = cmd->dev_dir;
413 	length = strlen(dev_dir);
414 
415 	/* Check sanity of new name */
416 	if (strlen(vg_name_new) > NAME_LEN - length - 2) {
417 		log_error("New volume group path exceeds maximum length "
418 			  "of %d!", NAME_LEN - length - 2);
419 		return 0;
420 	}
421 
422 	if (!validate_new_vg_name(cmd, vg_name_new)) {
423 		log_error("New volume group name \"%s\" is invalid",
424 			  vg_name_new);
425 		return 0;
426 	}
427 
428 	if (!strcmp(vg_name_old, vg_name_new)) {
429 		log_error("Old and new volume group names must differ");
430 		return 0;
431 	}
432 
433 	return 1;
434 }
435 
436 int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
437 	      const char *new_name)
438 {
439 	struct dm_pool *mem = vg->vgmem;
440 	struct pv_list *pvl;
441 
442 	if (!(vg->name = dm_pool_strdup(mem, new_name))) {
443 		log_error("vg->name allocation failed for '%s'", new_name);
444 		return 0;
445 	}
446 
447 	dm_list_iterate_items(pvl, &vg->pvs) {
448 		if (!(pvl->pv->vg_name = dm_pool_strdup(mem, new_name))) {
449 			log_error("pv->vg_name allocation failed for '%s'",
450 				  pv_dev_name(pvl->pv));
451 			return 0;
452 		}
453 	}
454 
455 	return 1;
456 }
457 
458 int remove_lvs_in_vg(struct cmd_context *cmd,
459 		     struct volume_group *vg,
460 		     force_t force)
461 {
462 	struct dm_list *lst;
463 	struct lv_list *lvl;
464 
465 	while ((lst = dm_list_first(&vg->lvs))) {
466 		lvl = dm_list_item(lst, struct lv_list);
467 		if (!lv_remove_with_dependencies(cmd, lvl->lv, force))
468 		    return 0;
469 	}
470 
471 	return 1;
472 }
473 
474 int vg_remove_check(struct volume_group *vg)
475 {
476 	unsigned lv_count;
477 	struct pv_list *pvl, *tpvl;
478 
479 	if (vg_read_error(vg) || vg_missing_pv_count(vg)) {
480 		log_error("Volume group \"%s\" not found, is inconsistent "
481 			  "or has PVs missing.", vg ? vg->name : "");
482 		log_error("Consider vgreduce --removemissing if metadata "
483 			  "is inconsistent.");
484 		return 0;
485 	}
486 
487 	if (!vg_check_status(vg, EXPORTED_VG))
488 		return 0;
489 
490 	lv_count = vg_visible_lvs(vg);
491 
492 	if (lv_count) {
493 		log_error("Volume group \"%s\" still contains %u "
494 			  "logical volume(s)", vg->name, lv_count);
495 		return 0;
496 	}
497 
498 	if (!archive(vg))
499 		return 0;
500 
501 	dm_list_iterate_items_safe(pvl, tpvl, &vg->pvs) {
502 		dm_list_del(&pvl->list);
503 		dm_list_add(&vg->removed_pvs, &pvl->list);
504 	}
505 	return 1;
506 }
507 
508 int vg_remove(struct volume_group *vg)
509 {
510 	struct physical_volume *pv;
511 	struct pv_list *pvl;
512 	int ret = 1;
513 
514 	if (!lock_vol(vg->cmd, VG_ORPHANS, LCK_VG_WRITE)) {
515 		log_error("Can't get lock for orphan PVs");
516 		return 0;
517 	}
518 
519 	if (!vg_remove_mdas(vg)) {
520 		log_error("vg_remove_mdas %s failed", vg->name);
521 		unlock_vg(vg->cmd, VG_ORPHANS);
522 		return 0;
523 	}
524 
525 	/* init physical volumes */
526 	dm_list_iterate_items(pvl, &vg->removed_pvs) {
527 		pv = pvl->pv;
528 		log_verbose("Removing physical volume \"%s\" from "
529 			    "volume group \"%s\"", pv_dev_name(pv), vg->name);
530 		pv->vg_name = vg->fid->fmt->orphan_vg_name;
531 		pv->status = ALLOCATABLE_PV;
532 
533 		if (!dev_get_size(pv_dev(pv), &pv->size)) {
534 			log_error("%s: Couldn't get size.", pv_dev_name(pv));
535 			ret = 0;
536 			continue;
537 		}
538 
539 		/* FIXME Write to same sector label was read from */
540 		if (!pv_write(vg->cmd, pv, NULL, INT64_C(-1))) {
541 			log_error("Failed to remove physical volume \"%s\""
542 				  " from volume group \"%s\"",
543 				  pv_dev_name(pv), vg->name);
544 			ret = 0;
545 		}
546 	}
547 
548 	backup_remove(vg->cmd, vg->name);
549 
550 	if (ret)
551 		log_print("Volume group \"%s\" successfully removed", vg->name);
552 	else
553 		log_error("Volume group \"%s\" not properly removed", vg->name);
554 
555 	unlock_vg(vg->cmd, VG_ORPHANS);
556 	return ret;
557 }
558 
559 /*
560  * Extend a VG by a single PV / device path
561  *
562  * Parameters:
563  * - vg: handle of volume group to extend by 'pv_name'
564  * - pv_name: device path of PV to add to VG
565  * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
566  *
567  */
568 static int vg_extend_single_pv(struct volume_group *vg, char *pv_name,
569 			       struct pvcreate_params *pp)
570 {
571 	struct physical_volume *pv;
572 
573 	pv = pv_by_path(vg->fid->fmt->cmd, pv_name);
574 	if (!pv && !pp) {
575 		log_error("%s not identified as an existing "
576 			  "physical volume", pv_name);
577 		return 0;
578 	} else if (!pv && pp) {
579 		pv = pvcreate_single(vg->cmd, pv_name, pp);
580 		if (!pv)
581 			return 0;
582 	}
583 	if (!add_pv_to_vg(vg, pv_name, pv))
584 		return 0;
585 	return 1;
586 }
587 
588 /*
589  * Extend a VG by a single PV / device path
590  *
591  * Parameters:
592  * - vg: handle of volume group to extend by 'pv_name'
593  * - pv_count: count of device paths of PVs
594  * - pv_names: device paths of PVs to add to VG
595  * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
596  *
597  */
598 int vg_extend(struct volume_group *vg, int pv_count, char **pv_names,
599 	      struct pvcreate_params *pp)
600 {
601 	int i;
602 
603 	if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
604 		return 0;
605 
606 	/* attach each pv */
607 	for (i = 0; i < pv_count; i++) {
608 		if (!vg_extend_single_pv(vg, pv_names[i], pp))
609 			goto bad;
610 	}
611 
612 /* FIXME Decide whether to initialise and add new mdahs to format instance */
613 
614 	return 1;
615 
616       bad:
617 	log_error("Unable to add physical volume '%s' to "
618 		  "volume group '%s'.", pv_names[i], vg->name);
619 	return 0;
620 }
621 
622 /* FIXME: use this inside vgreduce_single? */
623 int vg_reduce(struct volume_group *vg, char *pv_name)
624 {
625 	struct physical_volume *pv;
626 	struct pv_list *pvl;
627 
628 	if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
629 		return 0;
630 
631 	if (!archive(vg))
632 		goto bad;
633 
634 	/* remove each pv */
635 	if (!(pvl = find_pv_in_vg(vg, pv_name))) {
636 		log_error("Physical volume %s not in volume group %s.",
637 			  pv_name, vg->name);
638 		goto bad;
639 	}
640 
641 	pv = pvl->pv;
642 
643 	if (pv_pe_alloc_count(pv)) {
644 		log_error("Physical volume %s still in use.",
645 			  pv_name);
646 		goto bad;
647 	}
648 
649 	if (!dev_get_size(pv_dev(pv), &pv->size)) {
650 		log_error("%s: Couldn't get size.", pv_name);
651 		goto bad;
652 	}
653 
654 	vg->pv_count--;
655 	vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
656 	vg->extent_count -= pv_pe_count(pv);
657 
658 	/* add pv to the remove_pvs list */
659 	dm_list_del(&pvl->list);
660 	dm_list_add(&vg->removed_pvs, &pvl->list);
661 
662 	return 1;
663 
664       bad:
665 	log_error("Unable to remove physical volume '%s' from "
666 		  "volume group '%s'.", pv_name, vg->name);
667 	return 0;
668 }
669 
670 const char *strip_dir(const char *vg_name, const char *dev_dir)
671 {
672 	size_t len = strlen(dev_dir);
673 	if (!strncmp(vg_name, dev_dir, len))
674 		vg_name += len;
675 
676 	return vg_name;
677 }
678 
679 /*
680  * Validate parameters to vg_create() before calling.
681  * FIXME: Move inside vg_create library function.
682  * FIXME: Change vgcreate_params struct to individual gets/sets
683  */
684 int vgcreate_params_validate(struct cmd_context *cmd,
685 			     struct vgcreate_params *vp)
686 {
687 	if (!validate_new_vg_name(cmd, vp->vg_name)) {
688 		log_error("New volume group name \"%s\" is invalid",
689 			  vp->vg_name);
690 		return 1;
691 	}
692 
693 	if (vp->alloc == ALLOC_INHERIT) {
694 		log_error("Volume Group allocation policy cannot inherit "
695 			  "from anything");
696 		return 1;
697 	}
698 
699 	if (!vp->extent_size) {
700 		log_error("Physical extent size may not be zero");
701 		return 1;
702 	}
703 
704 	if (!(cmd->fmt->features & FMT_UNLIMITED_VOLS)) {
705 		if (!vp->max_lv)
706 			vp->max_lv = 255;
707 		if (!vp->max_pv)
708 			vp->max_pv = 255;
709 		if (vp->max_lv > 255 || vp->max_pv > 255) {
710 			log_error("Number of volumes may not exceed 255");
711 			return 1;
712 		}
713 	}
714 
715 	return 0;
716 }
717 
718 /*
719  * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a
720  * possible failure code or zero for success.
721  */
722 static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
723 			     struct volume_group *vg,
724 			     uint32_t failure)
725 {
726 	struct dm_pool *vgmem;
727 
728 	if (!vg) {
729 		if (!(vgmem = dm_pool_create("lvm2 vg_handle", VG_MEMPOOL_CHUNK)) ||
730 		    !(vg = dm_pool_zalloc(vgmem, sizeof(*vg)))) {
731 			log_error("Error allocating vg handle.");
732 			if (vgmem)
733 				dm_pool_destroy(vgmem);
734 			return_NULL;
735 		}
736 		vg->vgmem = vgmem;
737 	}
738 
739 	vg->read_status = failure;
740 
741 	return (struct volume_group *)vg;
742 }
743 
744 int lv_has_unknown_segments(const struct logical_volume *lv)
745 {
746 	struct lv_segment *seg;
747 	/* foreach segment */
748 	dm_list_iterate_items(seg, &lv->segments)
749 		if (seg_unknown(seg))
750 			return 1;
751 	return 0;
752 }
753 
754 int vg_has_unknown_segments(const struct volume_group *vg)
755 {
756 	struct lv_list *lvl;
757 
758 	/* foreach LV */
759 	dm_list_iterate_items(lvl, &vg->lvs)
760 		if (lv_has_unknown_segments(lvl->lv))
761 			return 1;
762 	return 0;
763 }
764 
765 /*
766  * Create a VG with default parameters.
767  * Returns:
768  * - struct volume_group* with SUCCESS code: VG structure created
769  * - NULL or struct volume_group* with FAILED_* code: error creating VG structure
770  * Use vg_read_error() to determine success or failure.
771  * FIXME: cleanup usage of _vg_make_handle()
772  */
773 struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
774 {
775 	struct volume_group *vg;
776 	int consistent = 0;
777 	struct dm_pool *mem;
778 	uint32_t rc;
779 
780 	if (!validate_name(vg_name)) {
781 		log_error("Invalid vg name %s", vg_name);
782 		/* FIXME: use _vg_make_handle() w/proper error code */
783 		return NULL;
784 	}
785 
786 	rc = vg_lock_newname(cmd, vg_name);
787 	if (rc != SUCCESS)
788 		/* NOTE: let caller decide - this may be check for existence */
789 		return _vg_make_handle(cmd, NULL, rc);
790 
791 	/* FIXME: Is this vg_read_internal necessary? Move it inside
792 	   vg_lock_newname? */
793 	/* is this vg name already in use ? */
794 	if ((vg = vg_read_internal(cmd, vg_name, NULL, &consistent))) {
795 		log_error("A volume group called '%s' already exists.", vg_name);
796 		unlock_and_release_vg(cmd, vg, vg_name);
797 		return _vg_make_handle(cmd, NULL, FAILED_EXIST);
798 	}
799 
800 	if (!(mem = dm_pool_create("lvm2 vg_create", VG_MEMPOOL_CHUNK)))
801 		goto_bad;
802 
803 	if (!(vg = dm_pool_zalloc(mem, sizeof(*vg))))
804 		goto_bad;
805 
806 	if (!id_create(&vg->id)) {
807 		log_error("Couldn't create uuid for volume group '%s'.",
808 			  vg_name);
809 		goto bad;
810 	}
811 
812 	/* Strip dev_dir if present */
813 	vg_name = strip_dir(vg_name, cmd->dev_dir);
814 
815 	vg->vgmem = mem;
816 	vg->cmd = cmd;
817 
818 	if (!(vg->name = dm_pool_strdup(mem, vg_name)))
819 		goto_bad;
820 
821 	vg->seqno = 0;
822 
823 	vg->status = (RESIZEABLE_VG | LVM_READ | LVM_WRITE);
824 	if (!(vg->system_id = dm_pool_alloc(mem, NAME_LEN)))
825 		goto_bad;
826 
827 	*vg->system_id = '\0';
828 
829 	vg->extent_size = DEFAULT_EXTENT_SIZE * 2;
830 	vg->extent_count = 0;
831 	vg->free_count = 0;
832 
833 	vg->max_lv = DEFAULT_MAX_LV;
834 	vg->max_pv = DEFAULT_MAX_PV;
835 
836 	vg->alloc = DEFAULT_ALLOC_POLICY;
837 
838 	vg->pv_count = 0;
839 	dm_list_init(&vg->pvs);
840 
841 	dm_list_init(&vg->lvs);
842 
843 	dm_list_init(&vg->tags);
844 
845 	/* initialize removed_pvs list */
846 	dm_list_init(&vg->removed_pvs);
847 
848 	if (!(vg->fid = cmd->fmt->ops->create_instance(cmd->fmt, vg_name,
849 						       NULL, NULL))) {
850 		log_error("Failed to create format instance");
851 		goto bad;
852 	}
853 
854 	if (vg->fid->fmt->ops->vg_setup &&
855 	    !vg->fid->fmt->ops->vg_setup(vg->fid, vg)) {
856 		log_error("Format specific setup of volume group '%s' failed.",
857 			  vg_name);
858 		goto bad;
859 	}
860 	return _vg_make_handle(cmd, vg, SUCCESS);
861 
862 bad:
863 	unlock_and_release_vg(cmd, vg, vg_name);
864 	/* FIXME: use _vg_make_handle() w/proper error code */
865 	return NULL;
866 }
867 
868 uint64_t extents_from_size(struct cmd_context *cmd, uint64_t size,
869 			   uint32_t extent_size)
870 {
871 	if (size % extent_size) {
872 		size += extent_size - size % extent_size;
873 		log_print("Rounding up size to full physical extent %s",
874 			  display_size(cmd, size));
875 	}
876 
877 	if (size > (uint64_t) UINT32_MAX * extent_size) {
878 		log_error("Volume too large (%s) for extent size %s. "
879 			  "Upper limit is %s.",
880 			  display_size(cmd, size),
881 			  display_size(cmd, (uint64_t) extent_size),
882 			  display_size(cmd, (uint64_t) UINT32_MAX *
883 				       extent_size));
884 		return 0;
885 	}
886 
887 	return (uint64_t) size / extent_size;
888 }
889 
890 static int _recalc_extents(uint32_t *extents, const char *desc1,
891 			   const char *desc2, uint32_t old_size,
892 			   uint32_t new_size)
893 {
894 	uint64_t size = (uint64_t) old_size * (*extents);
895 
896 	if (size % new_size) {
897 		log_error("New size %" PRIu64 " for %s%s not an exact number "
898 			  "of new extents.", size, desc1, desc2);
899 		return 0;
900 	}
901 
902 	size /= new_size;
903 
904 	if (size > UINT32_MAX) {
905 		log_error("New extent count %" PRIu64 " for %s%s exceeds "
906 			  "32 bits.", size, desc1, desc2);
907 		return 0;
908 	}
909 
910 	*extents = (uint32_t) size;
911 
912 	return 1;
913 }
914 
915 int vg_set_extent_size(struct volume_group *vg, uint32_t new_size)
916 {
917 	uint32_t old_size = vg->extent_size;
918 	struct pv_list *pvl;
919 	struct lv_list *lvl;
920 	struct physical_volume *pv;
921 	struct logical_volume *lv;
922 	struct lv_segment *seg;
923 	struct pv_segment *pvseg;
924 	uint32_t s;
925 
926 	if (!vg_is_resizeable(vg)) {
927 		log_error("Volume group \"%s\" must be resizeable "
928 			  "to change PE size", vg->name);
929 		return 0;
930 	}
931 
932 	if (!new_size) {
933 		log_error("Physical extent size may not be zero");
934 		return 0;
935 	}
936 
937 	if (new_size == vg->extent_size)
938 		return 1;
939 
940 	if (new_size & (new_size - 1)) {
941 		log_error("Physical extent size must be a power of 2.");
942 		return 0;
943 	}
944 
945 	if (new_size > vg->extent_size) {
946 		if ((uint64_t) vg_size(vg) % new_size) {
947 			/* FIXME Adjust used PV sizes instead */
948 			log_error("New extent size is not a perfect fit");
949 			return 0;
950 		}
951 	}
952 
953 	vg->extent_size = new_size;
954 
955 	if (vg->fid->fmt->ops->vg_setup &&
956 	    !vg->fid->fmt->ops->vg_setup(vg->fid, vg))
957 		return_0;
958 
959 	if (!_recalc_extents(&vg->extent_count, vg->name, "", old_size,
960 			     new_size))
961 		return_0;
962 
963 	if (!_recalc_extents(&vg->free_count, vg->name, " free space",
964 			     old_size, new_size))
965 		return_0;
966 
967 	/* foreach PV */
968 	dm_list_iterate_items(pvl, &vg->pvs) {
969 		pv = pvl->pv;
970 
971 		pv->pe_size = new_size;
972 		if (!_recalc_extents(&pv->pe_count, pv_dev_name(pv), "",
973 				     old_size, new_size))
974 			return_0;
975 
976 		if (!_recalc_extents(&pv->pe_alloc_count, pv_dev_name(pv),
977 				     " allocated space", old_size, new_size))
978 			return_0;
979 
980 		/* foreach free PV Segment */
981 		dm_list_iterate_items(pvseg, &pv->segments) {
982 			if (pvseg_is_allocated(pvseg))
983 				continue;
984 
985 			if (!_recalc_extents(&pvseg->pe, pv_dev_name(pv),
986 					     " PV segment start", old_size,
987 					     new_size))
988 				return_0;
989 			if (!_recalc_extents(&pvseg->len, pv_dev_name(pv),
990 					     " PV segment length", old_size,
991 					     new_size))
992 				return_0;
993 		}
994 	}
995 
996 	/* foreach LV */
997 	dm_list_iterate_items(lvl, &vg->lvs) {
998 		lv = lvl->lv;
999 
1000 		if (!_recalc_extents(&lv->le_count, lv->name, "", old_size,
1001 				     new_size))
1002 			return_0;
1003 
1004 		dm_list_iterate_items(seg, &lv->segments) {
1005 			if (!_recalc_extents(&seg->le, lv->name,
1006 					     " segment start", old_size,
1007 					     new_size))
1008 				return_0;
1009 
1010 			if (!_recalc_extents(&seg->len, lv->name,
1011 					     " segment length", old_size,
1012 					     new_size))
1013 				return_0;
1014 
1015 			if (!_recalc_extents(&seg->area_len, lv->name,
1016 					     " area length", old_size,
1017 					     new_size))
1018 				return_0;
1019 
1020 			if (!_recalc_extents(&seg->extents_copied, lv->name,
1021 					     " extents moved", old_size,
1022 					     new_size))
1023 				return_0;
1024 
1025 			/* foreach area */
1026 			for (s = 0; s < seg->area_count; s++) {
1027 				switch (seg_type(seg, s)) {
1028 				case AREA_PV:
1029 					if (!_recalc_extents
1030 					    (&seg_pe(seg, s),
1031 					     lv->name,
1032 					     " pvseg start", old_size,
1033 					     new_size))
1034 						return_0;
1035 					if (!_recalc_extents
1036 					    (&seg_pvseg(seg, s)->len,
1037 					     lv->name,
1038 					     " pvseg length", old_size,
1039 					     new_size))
1040 						return_0;
1041 					break;
1042 				case AREA_LV:
1043 					if (!_recalc_extents
1044 					    (&seg_le(seg, s), lv->name,
1045 					     " area start", old_size,
1046 					     new_size))
1047 						return_0;
1048 					break;
1049 				case AREA_UNASSIGNED:
1050 					log_error("Unassigned area %u found in "
1051 						  "segment", s);
1052 					return 0;
1053 				}
1054 			}
1055 		}
1056 
1057 	}
1058 
1059 	return 1;
1060 }
1061 
1062 int vg_set_max_lv(struct volume_group *vg, uint32_t max_lv)
1063 {
1064 	if (!vg_is_resizeable(vg)) {
1065 		log_error("Volume group \"%s\" must be resizeable "
1066 			  "to change MaxLogicalVolume", vg->name);
1067 		return 0;
1068 	}
1069 
1070 	if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
1071 		if (!max_lv)
1072 			max_lv = 255;
1073 		else if (max_lv > 255) {
1074 			log_error("MaxLogicalVolume limit is 255");
1075 			return 0;
1076 		}
1077 	}
1078 
1079 	if (max_lv && max_lv < vg_visible_lvs(vg)) {
1080 		log_error("MaxLogicalVolume is less than the current number "
1081 			  "%d of LVs for %s", vg_visible_lvs(vg),
1082 			  vg->name);
1083 		return 0;
1084 	}
1085 	vg->max_lv = max_lv;
1086 
1087 	return 1;
1088 }
1089 
1090 int vg_set_max_pv(struct volume_group *vg, uint32_t max_pv)
1091 {
1092 	if (!vg_is_resizeable(vg)) {
1093 		log_error("Volume group \"%s\" must be resizeable "
1094 			  "to change MaxPhysicalVolumes", vg->name);
1095 		return 0;
1096 	}
1097 
1098 	if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
1099 		if (!max_pv)
1100 			max_pv = 255;
1101 		else if (max_pv > 255) {
1102 			log_error("MaxPhysicalVolume limit is 255");
1103 			return 0;
1104 		}
1105 	}
1106 
1107 	if (max_pv && max_pv < vg->pv_count) {
1108 		log_error("MaxPhysicalVolumes is less than the current number "
1109 			  "%d of PVs for \"%s\"", vg->pv_count,
1110 			  vg->name);
1111 		return 0;
1112 	}
1113 	vg->max_pv = max_pv;
1114 	return 1;
1115 }
1116 
1117 int vg_set_alloc_policy(struct volume_group *vg, alloc_policy_t alloc)
1118 {
1119 	if (alloc == ALLOC_INHERIT) {
1120 		log_error("Volume Group allocation policy cannot inherit "
1121 			  "from anything");
1122 		return 0;
1123 	}
1124 
1125 	if (alloc == vg->alloc)
1126 		return 1;
1127 
1128 	vg->alloc = alloc;
1129 	return 1;
1130 }
1131 
1132 int vg_set_clustered(struct volume_group *vg, int clustered)
1133 {
1134 	struct lv_list *lvl;
1135 	if (clustered) {
1136 		dm_list_iterate_items(lvl, &vg->lvs) {
1137 			if (lv_is_origin(lvl->lv) || lv_is_cow(lvl->lv)) {
1138 				log_error("Volume group %s contains snapshots "
1139 					  "that are not yet supported.",
1140 					  vg->name);
1141 				return 0;
1142 			}
1143 		}
1144 	}
1145 
1146 	if (clustered)
1147 		vg->status |= CLUSTERED;
1148 	else
1149 		vg->status &= ~CLUSTERED;
1150 	return 1;
1151 }
1152 
1153 /*
1154  * Separate metadata areas after splitting a VG.
1155  * Also accepts orphan VG as destination (for vgreduce).
1156  */
1157 int vg_split_mdas(struct cmd_context *cmd __attribute((unused)),
1158 		  struct volume_group *vg_from, struct volume_group *vg_to)
1159 {
1160 	struct metadata_area *mda, *mda2;
1161 	struct dm_list *mdas_from, *mdas_to;
1162 	int common_mda = 0;
1163 
1164 	mdas_from = &vg_from->fid->metadata_areas;
1165 	mdas_to = &vg_to->fid->metadata_areas;
1166 
1167 	dm_list_iterate_items_safe(mda, mda2, mdas_from) {
1168 		if (!mda->ops->mda_in_vg) {
1169 			common_mda = 1;
1170 			continue;
1171 		}
1172 
1173 		if (!mda->ops->mda_in_vg(vg_from->fid, vg_from, mda)) {
1174 			if (is_orphan_vg(vg_to->name))
1175 				dm_list_del(&mda->list);
1176 			else
1177 				dm_list_move(mdas_to, &mda->list);
1178 		}
1179 	}
1180 
1181 	if (dm_list_empty(mdas_from) ||
1182 	    (!is_orphan_vg(vg_to->name) && dm_list_empty(mdas_to)))
1183 		return common_mda;
1184 
1185 	return 1;
1186 }
1187 
1188 /*
1189  * See if we may pvcreate on this device.
1190  * 0 indicates we may not.
1191  */
1192 static int pvcreate_check(struct cmd_context *cmd, const char *name,
1193 			  struct pvcreate_params *pp)
1194 {
1195 	struct physical_volume *pv;
1196 	struct device *dev;
1197 	uint64_t md_superblock, swap_signature;
1198 	int wipe_md, wipe_swap;
1199 
1200 	/* FIXME Check partition type is LVM unless --force is given */
1201 
1202 	/* Is there a pv here already? */
1203 	pv = pv_read(cmd, name, NULL, NULL, 0, 0);
1204 
1205 	/*
1206 	 * If a PV has no MDAs it may appear to be an orphan until the
1207 	 * metadata is read off another PV in the same VG.  Detecting
1208 	 * this means checking every VG by scanning every PV on the
1209 	 * system.
1210 	 */
1211 	if (pv && is_orphan(pv)) {
1212 		if (!scan_vgs_for_pvs(cmd))
1213 			return_0;
1214 		pv = pv_read(cmd, name, NULL, NULL, 0, 0);
1215 	}
1216 
1217 	/* Allow partial & exported VGs to be destroyed. */
1218 	/* We must have -ff to overwrite a non orphan */
1219 	if (pv && !is_orphan(pv) && pp->force != DONT_PROMPT_OVERRIDE) {
1220 		log_error("Can't initialize physical volume \"%s\" of "
1221 			  "volume group \"%s\" without -ff", name, pv_vg_name(pv));
1222 		return 0;
1223 	}
1224 
1225 	/* prompt */
1226 	if (pv && !is_orphan(pv) && !pp->yes &&
1227 	    yes_no_prompt(_really_init, name, pv_vg_name(pv)) == 'n') {
1228 		log_print("%s: physical volume not initialized", name);
1229 		return 0;
1230 	}
1231 
1232 	if (sigint_caught())
1233 		return 0;
1234 
1235 	dev = dev_cache_get(name, cmd->filter);
1236 
1237 	/* Is there an md superblock here? */
1238 	if (!dev && md_filtering()) {
1239 		unlock_vg(cmd, VG_ORPHANS);
1240 
1241 		persistent_filter_wipe(cmd->filter);
1242 		lvmcache_destroy(cmd, 1);
1243 
1244 		init_md_filtering(0);
1245 		if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE)) {
1246 			log_error("Can't get lock for orphan PVs");
1247 			init_md_filtering(1);
1248 			return 0;
1249 		}
1250 		dev = dev_cache_get(name, cmd->filter);
1251 		init_md_filtering(1);
1252 	}
1253 
1254 	if (!dev) {
1255 		log_error("Device %s not found (or ignored by filtering).", name);
1256 		return 0;
1257 	}
1258 
1259 	/*
1260 	 * This test will fail if the device belongs to an MD array.
1261 	 */
1262 	if (!dev_test_excl(dev)) {
1263 		/* FIXME Detect whether device-mapper itself is still using it */
1264 		log_error("Can't open %s exclusively.  Mounted filesystem?",
1265 			  name);
1266 		return 0;
1267 	}
1268 
1269 	/* Wipe superblock? */
1270 	if ((wipe_md = dev_is_md(dev, &md_superblock)) == 1 &&
1271 	    ((!pp->idp && !pp->restorefile) || pp->yes ||
1272 	     (yes_no_prompt("Software RAID md superblock "
1273 			    "detected on %s. Wipe it? [y/n] ", name) == 'y'))) {
1274 		log_print("Wiping software RAID md superblock on %s", name);
1275 		if (!dev_set(dev, md_superblock, 4, 0)) {
1276 			log_error("Failed to wipe RAID md superblock on %s",
1277 				  name);
1278 			return 0;
1279 		}
1280 	}
1281 
1282 	if (wipe_md == -1) {
1283 		log_error("Fatal error while trying to detect software "
1284 			  "RAID md superblock on %s", name);
1285 		return 0;
1286 	}
1287 
1288 	if ((wipe_swap = dev_is_swap(dev, &swap_signature)) == 1 &&
1289 	    ((!pp->idp && !pp->restorefile) || pp->yes ||
1290 	     (yes_no_prompt("Swap signature detected on %s. Wipe it? [y/n] ",
1291 			    name) == 'y'))) {
1292 		log_print("Wiping swap signature on %s", name);
1293 		if (!dev_set(dev, swap_signature, 10, 0)) {
1294 			log_error("Failed to wipe swap signature on %s", name);
1295 			return 0;
1296 		}
1297 	}
1298 
1299 	if (wipe_swap == -1) {
1300 		log_error("Fatal error while trying to detect swap "
1301 			  "signature on %s", name);
1302 		return 0;
1303 	}
1304 
1305 	if (sigint_caught())
1306 		return 0;
1307 
1308 	if (pv && !is_orphan(pv) && pp->force) {
1309 		log_warn("WARNING: Forcing physical volume creation on "
1310 			  "%s%s%s%s", name,
1311 			  !is_orphan(pv) ? " of volume group \"" : "",
1312 			  !is_orphan(pv) ? pv_vg_name(pv) : "",
1313 			  !is_orphan(pv) ? "\"" : "");
1314 	}
1315 
1316 	return 1;
1317 }
1318 
1319 void pvcreate_params_set_defaults(struct pvcreate_params *pp)
1320 {
1321 	memset(pp, 0, sizeof(*pp));
1322 	pp->zero = 1;
1323 	pp->size = 0;
1324 	pp->data_alignment = UINT64_C(0);
1325 	pp->data_alignment_offset = UINT64_C(0);
1326 	pp->pvmetadatacopies = DEFAULT_PVMETADATACOPIES;
1327 	pp->pvmetadatasize = DEFAULT_PVMETADATASIZE;
1328 	pp->labelsector = DEFAULT_LABELSECTOR;
1329 	pp->idp = 0;
1330 	pp->pe_start = 0;
1331 	pp->extent_count = 0;
1332 	pp->extent_size = 0;
1333 	pp->restorefile = 0;
1334 	pp->force = PROMPT;
1335 	pp->yes = 0;
1336 }
1337 
1338 /*
1339  * pvcreate_single() - initialize a device with PV label and metadata area
1340  *
1341  * Parameters:
1342  * - pv_name: device path to initialize
1343  * - pp: parameters to pass to pv_create; if NULL, use default values
1344  *
1345  * Returns:
1346  * NULL: error
1347  * struct physical_volume * (non-NULL): handle to physical volume created
1348  */
1349 struct physical_volume * pvcreate_single(struct cmd_context *cmd,
1350 					 const char *pv_name,
1351 					 struct pvcreate_params *pp)
1352 {
1353 	void *pv;
1354 	struct device *dev;
1355 	struct dm_list mdas;
1356 	struct pvcreate_params default_pp;
1357 	char buffer[64] __attribute((aligned(8)));
1358 
1359 	pvcreate_params_set_defaults(&default_pp);
1360 	if (!pp)
1361 		pp = &default_pp;
1362 
1363 	if (pp->idp) {
1364 		if ((dev = device_from_pvid(cmd, pp->idp)) &&
1365 		    (dev != dev_cache_get(pv_name, cmd->filter))) {
1366 			if (!id_write_format((const struct id*)&pp->idp->uuid,
1367 			    buffer, sizeof(buffer)))
1368 				return_NULL;
1369 			log_error("uuid %s already in use on \"%s\"", buffer,
1370 				  dev_name(dev));
1371 			return NULL;
1372 		}
1373 	}
1374 
1375 	if (!pvcreate_check(cmd, pv_name, pp))
1376 		goto error;
1377 
1378 	if (sigint_caught())
1379 		goto error;
1380 
1381 	if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
1382 		log_error("%s: Couldn't find device.  Check your filters?",
1383 			  pv_name);
1384 		goto error;
1385 	}
1386 
1387 	dm_list_init(&mdas);
1388 	if (!(pv = pv_create(cmd, dev, pp->idp, pp->size,
1389 			     pp->data_alignment, pp->data_alignment_offset,
1390 			     pp->pe_start, pp->extent_count, pp->extent_size,
1391 			     pp->pvmetadatacopies,
1392 			     pp->pvmetadatasize,&mdas))) {
1393 		log_error("Failed to setup physical volume \"%s\"", pv_name);
1394 		goto error;
1395 	}
1396 
1397 	log_verbose("Set up physical volume for \"%s\" with %" PRIu64
1398 		    " available sectors", pv_name, pv_size(pv));
1399 
1400 	/* Wipe existing label first */
1401 	if (!label_remove(pv_dev(pv))) {
1402 		log_error("Failed to wipe existing label on %s", pv_name);
1403 		goto error;
1404 	}
1405 
1406 	if (pp->zero) {
1407 		log_verbose("Zeroing start of device %s", pv_name);
1408 		if (!dev_open_quiet(dev)) {
1409 			log_error("%s not opened: device not zeroed", pv_name);
1410 			goto error;
1411 		}
1412 
1413 		if (!dev_set(dev, UINT64_C(0), (size_t) 2048, 0)) {
1414 			log_error("%s not wiped: aborting", pv_name);
1415 			dev_close(dev);
1416 			goto error;
1417 		}
1418 		dev_close(dev);
1419 	}
1420 
1421 	log_very_verbose("Writing physical volume data to disk \"%s\"",
1422 			 pv_name);
1423 	if (!(pv_write(cmd, (struct physical_volume *)pv, &mdas,
1424 		       pp->labelsector))) {
1425 		log_error("Failed to write physical volume \"%s\"", pv_name);
1426 		goto error;
1427 	}
1428 
1429 	log_print("Physical volume \"%s\" successfully created", pv_name);
1430 
1431 	return pv;
1432 
1433       error:
1434 	return NULL;
1435 }
1436 
1437 static void _free_pv(struct dm_pool *mem, struct physical_volume *pv)
1438 {
1439 	dm_pool_free(mem, pv);
1440 }
1441 
1442 static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev)
1443 {
1444 	struct physical_volume *pv = dm_pool_zalloc(mem, sizeof(*pv));
1445 
1446 	if (!pv)
1447 		return_NULL;
1448 
1449 	if (!(pv->vg_name = dm_pool_zalloc(mem, NAME_LEN))) {
1450 		dm_pool_free(mem, pv);
1451 		return NULL;
1452 	}
1453 
1454 	pv->pe_size = 0;
1455 	pv->pe_start = 0;
1456 	pv->pe_count = 0;
1457 	pv->pe_alloc_count = 0;
1458 	pv->pe_align = 0;
1459 	pv->pe_align_offset = 0;
1460 	pv->fmt = NULL;
1461 	pv->dev = dev;
1462 
1463 	pv->status = ALLOCATABLE_PV;
1464 
1465 	dm_list_init(&pv->tags);
1466 	dm_list_init(&pv->segments);
1467 
1468 	return pv;
1469 }
1470 
1471 /**
1472  * pv_create - initialize a physical volume for use with a volume group
1473  *
1474  * @fmt: format type
1475  * @dev: PV device to initialize
1476  * @size: size of the PV in sectors
1477  * @data_alignment: requested alignment of data
1478  * @data_alignment_offset: requested offset to aligned data
1479  * @pe_start: physical extent start
1480  * @existing_extent_count
1481  * @existing_extent_size
1482  * @pvmetadatacopies
1483  * @pvmetadatasize
1484  * @mdas
1485  *
1486  * Returns:
1487  *   PV handle - physical volume initialized successfully
1488  *   NULL - invalid parameter or problem initializing the physical volume
1489  *
1490  * Note:
1491  *   FIXME: shorten argument list and replace with explict 'set' functions
1492  */
1493 struct physical_volume *pv_create(const struct cmd_context *cmd,
1494 				  struct device *dev,
1495 				  struct id *id, uint64_t size,
1496 				  unsigned long data_alignment,
1497 				  unsigned long data_alignment_offset,
1498 				  uint64_t pe_start,
1499 				  uint32_t existing_extent_count,
1500 				  uint32_t existing_extent_size,
1501 				  int pvmetadatacopies,
1502 				  uint64_t pvmetadatasize, struct dm_list *mdas)
1503 {
1504 	const struct format_type *fmt = cmd->fmt;
1505 	struct dm_pool *mem = fmt->cmd->mem;
1506 	struct physical_volume *pv = _alloc_pv(mem, dev);
1507 
1508 	if (!pv)
1509 		return NULL;
1510 
1511 	if (id)
1512 		memcpy(&pv->id, id, sizeof(*id));
1513 	else if (!id_create(&pv->id)) {
1514 		log_error("Failed to create random uuid for %s.",
1515 			  dev_name(dev));
1516 		goto bad;
1517 	}
1518 
1519 	if (!dev_get_size(pv->dev, &pv->size)) {
1520 		log_error("%s: Couldn't get size.", pv_dev_name(pv));
1521 		goto bad;
1522 	}
1523 
1524 	if (size) {
1525 		if (size > pv->size)
1526 			log_warn("WARNING: %s: Overriding real size. "
1527 				  "You could lose data.", pv_dev_name(pv));
1528 		log_verbose("%s: Pretending size is %" PRIu64 " sectors.",
1529 			    pv_dev_name(pv), size);
1530 		pv->size = size;
1531 	}
1532 
1533 	if (pv->size < PV_MIN_SIZE) {
1534 		log_error("%s: Size must exceed minimum of %ld sectors.",
1535 			  pv_dev_name(pv), PV_MIN_SIZE);
1536 		goto bad;
1537 	}
1538 
1539 	if (pv->size < data_alignment) {
1540 		log_error("%s: Data alignment must not exceed device size.",
1541 			  pv_dev_name(pv));
1542 		goto bad;
1543 	}
1544 
1545 	pv->fmt = fmt;
1546 	pv->vg_name = fmt->orphan_vg_name;
1547 
1548 	if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
1549 				existing_extent_size, data_alignment,
1550 				data_alignment_offset,
1551 				pvmetadatacopies, pvmetadatasize, mdas,
1552 				pv, NULL)) {
1553 		log_error("%s: Format-specific setup of physical volume "
1554 			  "failed.", pv_dev_name(pv));
1555 		goto bad;
1556 	}
1557 
1558 	return pv;
1559 
1560       bad:
1561 	_free_pv(mem, pv);
1562 	return NULL;
1563 }
1564 
1565 /* FIXME: liblvm todo - make into function that returns handle */
1566 struct pv_list *find_pv_in_vg(const struct volume_group *vg,
1567 			      const char *pv_name)
1568 {
1569 	return _find_pv_in_vg(vg, pv_name);
1570 }
1571 
1572 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
1573 				      const char *pv_name)
1574 {
1575 	struct pv_list *pvl;
1576 
1577 	dm_list_iterate_items(pvl, &vg->pvs)
1578 		if (pvl->pv->dev == dev_cache_get(pv_name, vg->cmd->filter))
1579 			return pvl;
1580 
1581 	return NULL;
1582 }
1583 
1584 struct pv_list *find_pv_in_pv_list(const struct dm_list *pl,
1585 				   const struct physical_volume *pv)
1586 {
1587 	struct pv_list *pvl;
1588 
1589 	dm_list_iterate_items(pvl, pl)
1590 		if (pvl->pv == pv)
1591 			return pvl;
1592 
1593 	return NULL;
1594 }
1595 
1596 int pv_is_in_vg(struct volume_group *vg, struct physical_volume *pv)
1597 {
1598 	struct pv_list *pvl;
1599 
1600 	dm_list_iterate_items(pvl, &vg->pvs)
1601 		if (pv == pvl->pv)
1602 			 return 1;
1603 
1604 	return 0;
1605 }
1606 
1607 /**
1608  * find_pv_in_vg_by_uuid - Find PV in VG by PV UUID
1609  * @vg: volume group to search
1610  * @id: UUID of the PV to match
1611  *
1612  * Returns:
1613  *   PV handle - if UUID of PV found in VG
1614  *   NULL - invalid parameter or UUID of PV not found in VG
1615  *
1616  * Note
1617  *   FIXME - liblvm todo - make into function that takes VG handle
1618  */
1619 struct physical_volume *find_pv_in_vg_by_uuid(const struct volume_group *vg,
1620 			    const struct id *id)
1621 {
1622 	return _find_pv_in_vg_by_uuid(vg, id);
1623 }
1624 
1625 
1626 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
1627 						      const struct id *id)
1628 {
1629 	struct pv_list *pvl;
1630 
1631 	dm_list_iterate_items(pvl, &vg->pvs)
1632 		if (id_equal(&pvl->pv->id, id))
1633 			return pvl->pv;
1634 
1635 	return NULL;
1636 }
1637 
1638 struct lv_list *find_lv_in_vg(const struct volume_group *vg,
1639 			      const char *lv_name)
1640 {
1641 	struct lv_list *lvl;
1642 	const char *ptr;
1643 
1644 	/* Use last component */
1645 	if ((ptr = strrchr(lv_name, '/')))
1646 		ptr++;
1647 	else
1648 		ptr = lv_name;
1649 
1650 	dm_list_iterate_items(lvl, &vg->lvs)
1651 		if (!strcmp(lvl->lv->name, ptr))
1652 			return lvl;
1653 
1654 	return NULL;
1655 }
1656 
1657 struct lv_list *find_lv_in_lv_list(const struct dm_list *ll,
1658 				   const struct logical_volume *lv)
1659 {
1660 	struct lv_list *lvl;
1661 
1662 	dm_list_iterate_items(lvl, ll)
1663 		if (lvl->lv == lv)
1664 			return lvl;
1665 
1666 	return NULL;
1667 }
1668 
1669 struct lv_list *find_lv_in_vg_by_lvid(struct volume_group *vg,
1670 				      const union lvid *lvid)
1671 {
1672 	struct lv_list *lvl;
1673 
1674 	dm_list_iterate_items(lvl, &vg->lvs)
1675 		if (!strncmp(lvl->lv->lvid.s, lvid->s, sizeof(*lvid)))
1676 			return lvl;
1677 
1678 	return NULL;
1679 }
1680 
1681 struct logical_volume *find_lv(const struct volume_group *vg,
1682 			       const char *lv_name)
1683 {
1684 	struct lv_list *lvl = find_lv_in_vg(vg, lv_name);
1685 	return lvl ? lvl->lv : NULL;
1686 }
1687 
1688 struct physical_volume *find_pv(struct volume_group *vg, struct device *dev)
1689 {
1690 	struct pv_list *pvl;
1691 
1692 	dm_list_iterate_items(pvl, &vg->pvs)
1693 		if (dev == pvl->pv->dev)
1694 			return pvl->pv;
1695 
1696 	return NULL;
1697 }
1698 
1699 /* FIXME: liblvm todo - make into function that returns handle */
1700 struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
1701 					const char *pv_name)
1702 {
1703 	return _find_pv_by_name(cmd, pv_name);
1704 }
1705 
1706 
1707 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
1708 			 			const char *pv_name)
1709 {
1710 	struct physical_volume *pv;
1711 
1712 	if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
1713 		log_error("Physical volume %s not found", pv_name);
1714 		return NULL;
1715 	}
1716 
1717 	if (is_orphan_vg(pv->vg_name)) {
1718 		/* If a PV has no MDAs - need to search all VGs for it */
1719 		if (!scan_vgs_for_pvs(cmd))
1720 			return_NULL;
1721 		if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
1722 			log_error("Physical volume %s not found", pv_name);
1723 			return NULL;
1724 		}
1725 	}
1726 
1727 	if (is_orphan_vg(pv->vg_name)) {
1728 		log_error("Physical volume %s not in a volume group", pv_name);
1729 		return NULL;
1730 	}
1731 
1732 	return pv;
1733 }
1734 
1735 /* Find segment at a given logical extent in an LV */
1736 struct lv_segment *find_seg_by_le(const struct logical_volume *lv, uint32_t le)
1737 {
1738 	struct lv_segment *seg;
1739 
1740 	dm_list_iterate_items(seg, &lv->segments)
1741 		if (le >= seg->le && le < seg->le + seg->len)
1742 			return seg;
1743 
1744 	return NULL;
1745 }
1746 
1747 struct lv_segment *first_seg(const struct logical_volume *lv)
1748 {
1749 	struct lv_segment *seg;
1750 
1751 	dm_list_iterate_items(seg, &lv->segments)
1752 		return seg;
1753 
1754 	return NULL;
1755 }
1756 
1757 /* Find segment at a given physical extent in a PV */
1758 struct pv_segment *find_peg_by_pe(const struct physical_volume *pv, uint32_t pe)
1759 {
1760 	struct pv_segment *peg;
1761 
1762 	dm_list_iterate_items(peg, &pv->segments)
1763 		if (pe >= peg->pe && pe < peg->pe + peg->len)
1764 			return peg;
1765 
1766 	return NULL;
1767 }
1768 
1769 int vg_remove_mdas(struct volume_group *vg)
1770 {
1771 	struct metadata_area *mda;
1772 
1773 	/* FIXME Improve recovery situation? */
1774 	/* Remove each copy of the metadata */
1775 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1776 		if (mda->ops->vg_remove &&
1777 		    !mda->ops->vg_remove(vg->fid, vg, mda))
1778 			return_0;
1779 	}
1780 
1781 	return 1;
1782 }
1783 
1784 unsigned snapshot_count(const struct volume_group *vg)
1785 {
1786 	struct lv_list *lvl;
1787 	unsigned num_snapshots = 0;
1788 
1789 	dm_list_iterate_items(lvl, &vg->lvs)
1790 		if (lv_is_cow(lvl->lv))
1791 			num_snapshots++;
1792 
1793 	return num_snapshots;
1794 }
1795 
1796 unsigned vg_visible_lvs(const struct volume_group *vg)
1797 {
1798 	struct lv_list *lvl;
1799 	unsigned lv_count = 0;
1800 
1801 	dm_list_iterate_items(lvl, &vg->lvs) {
1802 		if (lv_is_visible(lvl->lv))
1803 			lv_count++;
1804 	}
1805 
1806 	return lv_count;
1807 }
1808 
1809 /*
1810  * Determine whether two vgs are compatible for merging.
1811  */
1812 int vgs_are_compatible(struct cmd_context *cmd __attribute((unused)),
1813 		       struct volume_group *vg_from,
1814 		       struct volume_group *vg_to)
1815 {
1816 	struct lv_list *lvl1, *lvl2;
1817 	struct pv_list *pvl;
1818 	char *name1, *name2;
1819 
1820 	if (lvs_in_vg_activated(vg_from)) {
1821 		log_error("Logical volumes in \"%s\" must be inactive",
1822 			  vg_from->name);
1823 		return 0;
1824 	}
1825 
1826 	/* Check compatibility */
1827 	if (vg_to->extent_size != vg_from->extent_size) {
1828 		log_error("Extent sizes differ: %d (%s) and %d (%s)",
1829 			  vg_to->extent_size, vg_to->name,
1830 			  vg_from->extent_size, vg_from->name);
1831 		return 0;
1832 	}
1833 
1834 	if (vg_to->max_pv &&
1835 	    (vg_to->max_pv < vg_to->pv_count + vg_from->pv_count)) {
1836 		log_error("Maximum number of physical volumes (%d) exceeded "
1837 			  " for \"%s\" and \"%s\"", vg_to->max_pv, vg_to->name,
1838 			  vg_from->name);
1839 		return 0;
1840 	}
1841 
1842 	if (vg_to->max_lv &&
1843 	    (vg_to->max_lv < vg_visible_lvs(vg_to) + vg_visible_lvs(vg_from))) {
1844 		log_error("Maximum number of logical volumes (%d) exceeded "
1845 			  " for \"%s\" and \"%s\"", vg_to->max_lv, vg_to->name,
1846 			  vg_from->name);
1847 		return 0;
1848 	}
1849 
1850 	/* Metadata types must be the same */
1851 	if (vg_to->fid->fmt != vg_from->fid->fmt) {
1852 		log_error("Metadata types differ for \"%s\" and \"%s\"",
1853 			  vg_to->name, vg_from->name);
1854 		return 0;
1855 	}
1856 
1857 	/* Clustering attribute must be the same */
1858 	if (vg_is_clustered(vg_to) != vg_is_clustered(vg_from)) {
1859 		log_error("Clustered attribute differs for \"%s\" and \"%s\"",
1860 			  vg_to->name, vg_from->name);
1861 		return 0;
1862 	}
1863 
1864 	/* Check no conflicts with LV names */
1865 	dm_list_iterate_items(lvl1, &vg_to->lvs) {
1866 		name1 = lvl1->lv->name;
1867 
1868 		dm_list_iterate_items(lvl2, &vg_from->lvs) {
1869 			name2 = lvl2->lv->name;
1870 
1871 			if (!strcmp(name1, name2)) {
1872 				log_error("Duplicate logical volume "
1873 					  "name \"%s\" "
1874 					  "in \"%s\" and \"%s\"",
1875 					  name1, vg_to->name, vg_from->name);
1876 				return 0;
1877 			}
1878 		}
1879 	}
1880 
1881 	/* Check no PVs are constructed from either VG */
1882 	dm_list_iterate_items(pvl, &vg_to->pvs) {
1883 		if (pv_uses_vg(pvl->pv, vg_from)) {
1884 			log_error("Physical volume %s might be constructed "
1885 				  "from same volume group %s.",
1886 				  pv_dev_name(pvl->pv), vg_from->name);
1887 			return 0;
1888 		}
1889 	}
1890 
1891 	dm_list_iterate_items(pvl, &vg_from->pvs) {
1892 		if (pv_uses_vg(pvl->pv, vg_to)) {
1893 			log_error("Physical volume %s might be constructed "
1894 				  "from same volume group %s.",
1895 				  pv_dev_name(pvl->pv), vg_to->name);
1896 			return 0;
1897 		}
1898 	}
1899 
1900 	return 1;
1901 }
1902 
1903 struct _lv_postorder_baton {
1904 	int (*fn)(struct logical_volume *lv, void *data);
1905 	void *data;
1906 };
1907 
1908 static int _lv_postorder_visit(struct logical_volume *,
1909 			       int (*fn)(struct logical_volume *lv, void *data),
1910 			       void *data);
1911 
1912 static int _lv_postorder_level(struct logical_volume *lv, void *data)
1913 {
1914 	struct _lv_postorder_baton *baton = data;
1915 	if (lv->status & POSTORDER_OPEN_FLAG)
1916 		return 1; // a data structure loop has closed...
1917 	lv->status |= POSTORDER_OPEN_FLAG;
1918 	int r =_lv_postorder_visit(lv, baton->fn, baton->data);
1919 	lv->status &= ~POSTORDER_OPEN_FLAG;
1920 	lv->status |= POSTORDER_FLAG;
1921 	return r;
1922 };
1923 
1924 static int _lv_each_dependency(struct logical_volume *lv,
1925 			       int (*fn)(struct logical_volume *lv, void *data),
1926 			       void *data)
1927 {
1928 	int i, s;
1929 	struct lv_segment *lvseg;
1930 
1931 	struct logical_volume *deps[] = {
1932 		lv->snapshot ? lv->snapshot->origin : 0,
1933 		lv->snapshot ? lv->snapshot->cow : 0 };
1934 	for (i = 0; i < sizeof(deps) / sizeof(*deps); ++i) {
1935 		if (deps[i] && !fn(deps[i], data))
1936 			return_0;
1937 	}
1938 
1939 	dm_list_iterate_items(lvseg, &lv->segments) {
1940 		if (lvseg->log_lv && !fn(lvseg->log_lv, data))
1941 			return_0;
1942 		for (s = 0; s < lvseg->area_count; ++s) {
1943 			if (seg_type(lvseg, s) == AREA_LV && !fn(seg_lv(lvseg,s), data))
1944 				return_0;
1945 		}
1946 	}
1947 	return 1;
1948 }
1949 
1950 static int _lv_postorder_cleanup(struct logical_volume *lv, void *data)
1951 {
1952 	if (!(lv->status & POSTORDER_FLAG))
1953 		return 1;
1954 	lv->status &= ~POSTORDER_FLAG;
1955 
1956 	if (!_lv_each_dependency(lv, _lv_postorder_cleanup, data))
1957 		return_0;
1958 	return 1;
1959 }
1960 
1961 static int _lv_postorder_visit(struct logical_volume *lv,
1962 			       int (*fn)(struct logical_volume *lv, void *data),
1963 			       void *data)
1964 {
1965 	struct _lv_postorder_baton baton;
1966 	int r;
1967 
1968 	if (lv->status & POSTORDER_FLAG)
1969 		return 1;
1970 
1971 	baton.fn = fn;
1972 	baton.data = data;
1973 	r = _lv_each_dependency(lv, _lv_postorder_level, &baton);
1974 	if (r)
1975 		r = fn(lv, data);
1976 
1977 	return r;
1978 }
1979 
1980 /*
1981  * This will walk the LV dependency graph in depth-first order and in the
1982  * postorder, call a callback function "fn". The void *data is passed along all
1983  * the calls. The callback may return zero to indicate an error and terminate
1984  * the depth-first walk. The error is propagated to return value of
1985  * _lv_postorder.
1986  */
1987 static int _lv_postorder(struct logical_volume *lv,
1988 			       int (*fn)(struct logical_volume *lv, void *data),
1989 			       void *data)
1990 {
1991 	int r;
1992 	r = _lv_postorder_visit(lv, fn, data);
1993 	_lv_postorder_cleanup(lv, 0);
1994 	return r;
1995 }
1996 
1997 struct _lv_mark_if_partial_baton {
1998 	int partial;
1999 };
2000 
2001 static int _lv_mark_if_partial_collect(struct logical_volume *lv, void *data)
2002 {
2003 	struct _lv_mark_if_partial_baton *baton = data;
2004 	if (lv->status & PARTIAL_LV)
2005 		baton->partial = 1;
2006 
2007 	return 1;
2008 }
2009 
2010 static int _lv_mark_if_partial_single(struct logical_volume *lv, void *data)
2011 {
2012 	int s;
2013 	struct _lv_mark_if_partial_baton baton;
2014 	struct lv_segment *lvseg;
2015 
2016 	dm_list_iterate_items(lvseg, &lv->segments) {
2017 		for (s = 0; s < lvseg->area_count; ++s) {
2018 			if (seg_type(lvseg, s) == AREA_PV) {
2019 				if (seg_pv(lvseg, s)->status & MISSING_PV)
2020 					lv->status |= PARTIAL_LV;
2021 			}
2022 		}
2023 	}
2024 
2025 	baton.partial = 0;
2026 	_lv_each_dependency(lv, _lv_mark_if_partial_collect, &baton);
2027 
2028 	if (baton.partial)
2029 		lv->status |= PARTIAL_LV;
2030 
2031 	return 1;
2032 }
2033 
2034 static int _lv_mark_if_partial(struct logical_volume *lv)
2035 {
2036 	return _lv_postorder(lv, _lv_mark_if_partial_single, NULL);
2037 }
2038 
2039 /*
2040  * Mark LVs with missing PVs using PARTIAL_LV status flag. The flag is
2041  * propagated transitively, so LVs referencing other LVs are marked
2042  * partial as well, if any of their referenced LVs are marked partial.
2043  */
2044 static int _vg_mark_partial_lvs(struct volume_group *vg)
2045 {
2046 	struct logical_volume *lv;
2047 	struct lv_list *lvl;
2048 
2049 	dm_list_iterate_items(lvl, &vg->lvs) {
2050 		lv = lvl->lv;
2051 		if (!_lv_mark_if_partial(lv))
2052 			return_0;
2053 	}
2054 	return 1;
2055 }
2056 
2057 /*
2058  * Be sure that all PV devices have cached read ahead in dev-cache
2059  * Currently it takes read_ahead from first PV segment only
2060  */
2061 static int _lv_read_ahead_single(struct logical_volume *lv, void *data)
2062 {
2063 	struct lv_segment *seg = first_seg(lv);
2064 	uint32_t seg_read_ahead = 0, *read_ahead = data;
2065 
2066 	if (seg && seg->area_count && seg_type(seg, 0) == AREA_PV)
2067 		dev_get_read_ahead(seg_pv(seg, 0)->dev, &seg_read_ahead);
2068 
2069 	if (seg_read_ahead > *read_ahead)
2070 		*read_ahead = seg_read_ahead;
2071 
2072 	return 1;
2073 }
2074 
2075 /*
2076  * Calculate readahead for logical volume from underlying PV devices.
2077  * If read_ahead is NULL, only ensure that readahead of PVs are preloaded
2078  * into PV struct device in dev cache.
2079  */
2080 void lv_calculate_readahead(const struct logical_volume *lv, uint32_t *read_ahead)
2081 {
2082 	uint32_t _read_ahead = 0;
2083 
2084 	if (lv->read_ahead == DM_READ_AHEAD_AUTO)
2085 		_lv_postorder((struct logical_volume *)lv, _lv_read_ahead_single, &_read_ahead);
2086 
2087 	if (read_ahead) {
2088 		log_debug("Calculated readahead of LV %s is %u", lv->name, _read_ahead);
2089 		*read_ahead = _read_ahead;
2090 	}
2091 }
2092 
2093 int vg_validate(struct volume_group *vg)
2094 {
2095 	struct pv_list *pvl, *pvl2;
2096 	struct lv_list *lvl, *lvl2;
2097 	char uuid[64] __attribute((aligned(8)));
2098 	int r = 1;
2099 	uint32_t hidden_lv_count = 0;
2100 
2101 	/* FIXME Also check there's no data/metadata overlap */
2102 
2103 	dm_list_iterate_items(pvl, &vg->pvs) {
2104 		dm_list_iterate_items(pvl2, &vg->pvs) {
2105 			if (pvl == pvl2)
2106 				break;
2107 			if (id_equal(&pvl->pv->id,
2108 				     &pvl2->pv->id)) {
2109 				if (!id_write_format(&pvl->pv->id, uuid,
2110 						     sizeof(uuid)))
2111 					 stack;
2112 				log_error("Internal error: Duplicate PV id "
2113 					  "%s detected for %s in %s.",
2114 					  uuid, pv_dev_name(pvl->pv),
2115 					  vg->name);
2116 				r = 0;
2117 			}
2118 		}
2119 
2120 		if (strcmp(pvl->pv->vg_name, vg->name)) {
2121 			log_error("Internal error: VG name for PV %s is corrupted",
2122 				  pv_dev_name(pvl->pv));
2123 			r = 0;
2124 		}
2125 	}
2126 
2127 	if (!check_pv_segments(vg)) {
2128 		log_error("Internal error: PV segments corrupted in %s.",
2129 			  vg->name);
2130 		r = 0;
2131 	}
2132 
2133 	/*
2134 	 * Count all non-snapshot invisible LVs
2135 	 */
2136 	dm_list_iterate_items(lvl, &vg->lvs) {
2137 		if (lvl->lv->status & VISIBLE_LV)
2138 			continue;
2139 
2140 		/* snapshots */
2141 		if (lv_is_cow(lvl->lv))
2142 			continue;
2143 
2144 		/* virtual origins are always hidden */
2145 		if (lv_is_origin(lvl->lv) && !lv_is_virtual_origin(lvl->lv))
2146 			continue;
2147 
2148 		/* count other non-snapshot invisible volumes */
2149 		hidden_lv_count++;
2150 
2151 		/*
2152 		 *  FIXME: add check for unreferenced invisible LVs
2153 		 *   - snapshot cow & origin
2154 		 *   - mirror log & images
2155 		 *   - mirror conversion volumes (_mimagetmp*)
2156 		 */
2157 	}
2158 
2159 	/*
2160 	 * all volumes = visible LVs + snapshot_cows + invisible LVs
2161 	 */
2162 	if (((uint32_t) dm_list_size(&vg->lvs)) !=
2163 	    vg_visible_lvs(vg) + snapshot_count(vg) + hidden_lv_count) {
2164 		log_error("Internal error: #internal LVs (%u) != #LVs (%"
2165 			  PRIu32 ") + #snapshots (%" PRIu32 ") + #internal LVs %u in VG %s",
2166 			  dm_list_size(&vg->lvs), vg_visible_lvs(vg),
2167 			  snapshot_count(vg), hidden_lv_count, vg->name);
2168 		r = 0;
2169 	}
2170 
2171 	dm_list_iterate_items(lvl, &vg->lvs) {
2172 		dm_list_iterate_items(lvl2, &vg->lvs) {
2173 			if (lvl == lvl2)
2174 				break;
2175 			if (!strcmp(lvl->lv->name, lvl2->lv->name)) {
2176 				log_error("Internal error: Duplicate LV name "
2177 					  "%s detected in %s.", lvl->lv->name,
2178 					  vg->name);
2179 				r = 0;
2180 			}
2181 			if (id_equal(&lvl->lv->lvid.id[1],
2182 				     &lvl2->lv->lvid.id[1])) {
2183 				if (!id_write_format(&lvl->lv->lvid.id[1], uuid,
2184 						     sizeof(uuid)))
2185 					 stack;
2186 				log_error("Internal error: Duplicate LV id "
2187 					  "%s detected for %s and %s in %s.",
2188 					  uuid, lvl->lv->name, lvl2->lv->name,
2189 					  vg->name);
2190 				r = 0;
2191 			}
2192 		}
2193 	}
2194 
2195 	dm_list_iterate_items(lvl, &vg->lvs) {
2196 		if (!check_lv_segments(lvl->lv, 1)) {
2197 			log_error("Internal error: LV segments corrupted in %s.",
2198 				  lvl->lv->name);
2199 			r = 0;
2200 		}
2201 	}
2202 
2203 	if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS) &&
2204 	    (!vg->max_lv || !vg->max_pv)) {
2205 		log_error("Internal error: Volume group %s has limited PV/LV count"
2206 			  " but limit is not set.", vg->name);
2207 		r = 0;
2208 	}
2209 
2210 	if (vg_max_lv_reached(vg))
2211 		stack;
2212 
2213 	return r;
2214 }
2215 
2216 /*
2217  * After vg_write() returns success,
2218  * caller MUST call either vg_commit() or vg_revert()
2219  */
2220 int vg_write(struct volume_group *vg)
2221 {
2222 	struct dm_list *mdah;
2223 	struct metadata_area *mda;
2224 
2225 	if (!vg_validate(vg))
2226 		return_0;
2227 
2228 	if (vg->status & PARTIAL_VG) {
2229 		log_error("Cannot update partial volume group %s.", vg->name);
2230 		return 0;
2231 	}
2232 
2233 	if (vg_missing_pv_count(vg) && !vg->cmd->handles_missing_pvs) {
2234 		log_error("Cannot update volume group %s while physical "
2235 			  "volumes are missing.", vg->name);
2236 		return 0;
2237 	}
2238 
2239 	if (vg_has_unknown_segments(vg) && !vg->cmd->handles_unknown_segments) {
2240 		log_error("Cannot update volume group %s with unknown segments in it!",
2241 			  vg->name);
2242 		return 0;
2243 	}
2244 
2245 
2246 	if (dm_list_empty(&vg->fid->metadata_areas)) {
2247 		log_error("Aborting vg_write: No metadata areas to write to!");
2248 		return 0;
2249 	}
2250 
2251 	if (!drop_cached_metadata(vg)) {
2252 		log_error("Unable to drop cached metadata for VG %s.", vg->name);
2253 		return 0;
2254 	}
2255 
2256 	vg->seqno++;
2257 
2258 	/* Write to each copy of the metadata area */
2259 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2260 		if (!mda->ops->vg_write) {
2261 			log_error("Format does not support writing volume"
2262 				  "group metadata areas");
2263 			/* Revert */
2264 			dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
2265 				mda = dm_list_item(mdah, struct metadata_area);
2266 
2267 				if (mda->ops->vg_revert &&
2268 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
2269 					stack;
2270 				}
2271 			}
2272 			return 0;
2273 		}
2274 		if (!mda->ops->vg_write(vg->fid, vg, mda)) {
2275 			stack;
2276 			/* Revert */
2277 			dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
2278 				mda = dm_list_item(mdah, struct metadata_area);
2279 
2280 				if (mda->ops->vg_revert &&
2281 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
2282 					stack;
2283 				}
2284 			}
2285 			return 0;
2286 		}
2287 	}
2288 
2289 	/* Now pre-commit each copy of the new metadata */
2290 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2291 		if (mda->ops->vg_precommit &&
2292 		    !mda->ops->vg_precommit(vg->fid, vg, mda)) {
2293 			stack;
2294 			/* Revert */
2295 			dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2296 				if (mda->ops->vg_revert &&
2297 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
2298 					stack;
2299 				}
2300 			}
2301 			return 0;
2302 		}
2303 	}
2304 
2305 	return 1;
2306 }
2307 
2308 /* Commit pending changes */
2309 int vg_commit(struct volume_group *vg)
2310 {
2311 	struct metadata_area *mda;
2312 	int cache_updated = 0;
2313 	int failed = 0;
2314 
2315 	if (!vgname_is_locked(vg->name)) {
2316 		log_error("Internal error: Attempt to write new VG metadata "
2317 			  "without locking %s", vg->name);
2318 		return cache_updated;
2319 	}
2320 
2321 	/* Commit to each copy of the metadata area */
2322 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2323 		failed = 0;
2324 		if (mda->ops->vg_commit &&
2325 		    !mda->ops->vg_commit(vg->fid, vg, mda)) {
2326 			stack;
2327 			failed = 1;
2328 		}
2329 		/* Update cache first time we succeed */
2330 		if (!failed && !cache_updated) {
2331 			lvmcache_update_vg(vg, 0);
2332 			cache_updated = 1;
2333 		}
2334 	}
2335 
2336 	/* If update failed, remove any cached precommitted metadata. */
2337 	if (!cache_updated && !drop_cached_metadata(vg))
2338 		log_error("Attempt to drop cached metadata failed "
2339 			  "after commit for VG %s.", vg->name);
2340 
2341 	/* If at least one mda commit succeeded, it was committed */
2342 	return cache_updated;
2343 }
2344 
2345 /* Don't commit any pending changes */
2346 int vg_revert(struct volume_group *vg)
2347 {
2348 	struct metadata_area *mda;
2349 
2350 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2351 		if (mda->ops->vg_revert &&
2352 		    !mda->ops->vg_revert(vg->fid, vg, mda)) {
2353 			stack;
2354 		}
2355 	}
2356 
2357 	if (!drop_cached_metadata(vg))
2358 		log_error("Attempt to drop cached metadata failed "
2359 			  "after reverted update for VG %s.", vg->name);
2360 
2361 	return 1;
2362 }
2363 
2364 /* Make orphan PVs look like a VG */
2365 static struct volume_group *_vg_read_orphans(struct cmd_context *cmd,
2366 					     const char *orphan_vgname)
2367 {
2368 	struct lvmcache_vginfo *vginfo;
2369 	struct lvmcache_info *info;
2370 	struct pv_list *pvl;
2371 	struct volume_group *vg;
2372 	struct physical_volume *pv;
2373 	struct dm_pool *mem;
2374 
2375 	lvmcache_label_scan(cmd, 0);
2376 
2377 	if (!(vginfo = vginfo_from_vgname(orphan_vgname, NULL)))
2378 		return_NULL;
2379 
2380 	if (!(mem = dm_pool_create("vg_read orphan", VG_MEMPOOL_CHUNK)))
2381 		return_NULL;
2382 
2383 	if (!(vg = dm_pool_zalloc(mem, sizeof(*vg)))) {
2384 		log_error("vg allocation failed");
2385 		return NULL;
2386 	}
2387 	dm_list_init(&vg->pvs);
2388 	dm_list_init(&vg->lvs);
2389 	dm_list_init(&vg->tags);
2390 	dm_list_init(&vg->removed_pvs);
2391 	vg->vgmem = mem;
2392 	vg->cmd = cmd;
2393 	if (!(vg->name = dm_pool_strdup(mem, orphan_vgname))) {
2394 		log_error("vg name allocation failed");
2395 		goto bad;
2396 	}
2397 
2398 	/* create format instance with appropriate metadata area */
2399 	if (!(vg->fid = vginfo->fmt->ops->create_instance(vginfo->fmt,
2400 							  orphan_vgname, NULL,
2401 							  NULL))) {
2402 		log_error("Failed to create format instance");
2403 		goto bad;
2404 	}
2405 
2406 	dm_list_iterate_items(info, &vginfo->infos) {
2407 		if (!(pv = _pv_read(cmd, mem, dev_name(info->dev), NULL, NULL, 1, 0))) {
2408 			continue;
2409 		}
2410 		if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
2411 			log_error("pv_list allocation failed");
2412 			goto bad;
2413 		}
2414 		pvl->pv = pv;
2415 		dm_list_add(&vg->pvs, &pvl->list);
2416 		vg->pv_count++;
2417 	}
2418 
2419 	return vg;
2420 bad:
2421 	dm_pool_destroy(mem);
2422 	return NULL;
2423 }
2424 
2425 static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg)
2426 {
2427 	struct pv_list *pvl, *pvl2;
2428 
2429 	dm_list_iterate_items(pvl, &vg->pvs) {
2430 		dm_list_iterate_items(pvl2, all_pvs) {
2431 			if (pvl->pv->dev == pvl2->pv->dev)
2432 				goto next_pv;
2433 		}
2434 
2435 		/*
2436 		 * PV is not on list so add it.
2437 		 */
2438 		if (!(pvl2 = _copy_pvl(pvmem, pvl))) {
2439 			log_error("pv_list allocation for '%s' failed",
2440 				  pv_dev_name(pvl->pv));
2441 			return 0;
2442 		}
2443 		dm_list_add(all_pvs, &pvl2->list);
2444   next_pv:
2445 		;
2446 	}
2447 
2448 	return 1;
2449 }
2450 
2451 int vg_missing_pv_count(const struct volume_group *vg)
2452 {
2453 	int ret = 0;
2454 	struct pv_list *pvl;
2455 	dm_list_iterate_items(pvl, &vg->pvs) {
2456 		if (pvl->pv->status & MISSING_PV)
2457 			++ ret;
2458 	}
2459 	return ret;
2460 }
2461 
2462 /* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
2463  * inconsistent metadata on disk (i.e. the VG write lock is held).
2464  * This guarantees only consistent metadata is returned.
2465  * If consistent is 0, caller must check whether consistent == 1 on return
2466  * and take appropriate action if it isn't (e.g. abort; get write lock
2467  * and call vg_read_internal again).
2468  *
2469  * If precommitted is set, use precommitted metadata if present.
2470  *
2471  * Either of vgname or vgid may be NULL.
2472  */
2473 static struct volume_group *_vg_read(struct cmd_context *cmd,
2474 				     const char *vgname,
2475 				     const char *vgid,
2476 				     int *consistent, unsigned precommitted)
2477 {
2478 	struct format_instance *fid;
2479 	const struct format_type *fmt;
2480 	struct volume_group *vg, *correct_vg = NULL;
2481 	struct metadata_area *mda;
2482 	struct lvmcache_info *info;
2483 	int inconsistent = 0;
2484 	int inconsistent_vgid = 0;
2485 	int inconsistent_pvs = 0;
2486 	unsigned use_precommitted = precommitted;
2487 	unsigned saved_handles_missing_pvs = cmd->handles_missing_pvs;
2488 	struct dm_list *pvids;
2489 	struct pv_list *pvl, *pvl2;
2490 	struct dm_list all_pvs;
2491 	char uuid[64] __attribute((aligned(8)));
2492 
2493 	if (is_orphan_vg(vgname)) {
2494 		if (use_precommitted) {
2495 			log_error("Internal error: vg_read_internal requires vgname "
2496 				  "with pre-commit.");
2497 			return NULL;
2498 		}
2499 		*consistent = 1;
2500 		return _vg_read_orphans(cmd, vgname);
2501 	}
2502 
2503 	if ((correct_vg = lvmcache_get_vg(vgid, precommitted))) {
2504 		if (vg_missing_pv_count(correct_vg)) {
2505 			log_verbose("There are %d physical volumes missing.",
2506 				    vg_missing_pv_count(correct_vg));
2507 			_vg_mark_partial_lvs(correct_vg);
2508 		}
2509 		*consistent = 1;
2510 		return correct_vg;
2511 	}
2512 
2513 	/* Find the vgname in the cache */
2514 	/* If it's not there we must do full scan to be completely sure */
2515 	if (!(fmt = fmt_from_vgname(vgname, vgid))) {
2516 		lvmcache_label_scan(cmd, 0);
2517 		if (!(fmt = fmt_from_vgname(vgname, vgid))) {
2518 			if (memlock())
2519 				return_NULL;
2520 			lvmcache_label_scan(cmd, 2);
2521 			if (!(fmt = fmt_from_vgname(vgname, vgid)))
2522 				return_NULL;
2523 		}
2524 	}
2525 
2526 	/* Now determine the correct vgname if none was supplied */
2527 	if (!vgname && !(vgname = vgname_from_vgid(cmd->mem, vgid)))
2528 		return_NULL;
2529 
2530 	if (use_precommitted && !(fmt->features & FMT_PRECOMMIT))
2531 		use_precommitted = 0;
2532 
2533 	/* create format instance with appropriate metadata area */
2534 	if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
2535 		log_error("Failed to create format instance");
2536 		return NULL;
2537 	}
2538 
2539 	/* Store pvids for later so we can check if any are missing */
2540 	if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
2541 		return_NULL;
2542 
2543 	/* Ensure contents of all metadata areas match - else do recovery */
2544 	dm_list_iterate_items(mda, &fid->metadata_areas) {
2545 		if ((use_precommitted &&
2546 		     !(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
2547 		    (!use_precommitted &&
2548 		     !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
2549 			inconsistent = 1;
2550 			vg_release(vg);
2551 			continue;
2552 		}
2553 		if (!correct_vg) {
2554 			correct_vg = vg;
2555 			continue;
2556 		}
2557 
2558 		/* FIXME Also ensure contents same - checksum compare? */
2559 		if (correct_vg->seqno != vg->seqno) {
2560 			inconsistent = 1;
2561 			if (vg->seqno > correct_vg->seqno) {
2562 				vg_release(correct_vg);
2563 				correct_vg = vg;
2564 			}
2565 		}
2566 
2567 		if (vg != correct_vg)
2568 			vg_release(vg);
2569 	}
2570 
2571 	/* Ensure every PV in the VG was in the cache */
2572 	if (correct_vg) {
2573 		/*
2574 		 * If the VG has PVs without mdas, they may still be
2575 		 * orphans in the cache: update the cache state here.
2576 		 */
2577 		if (!inconsistent &&
2578 		    dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) {
2579 			dm_list_iterate_items(pvl, &correct_vg->pvs) {
2580 				if (!pvl->pv->dev) {
2581 					inconsistent_pvs = 1;
2582 					break;
2583 				}
2584 
2585 				if (str_list_match_item(pvids, pvl->pv->dev->pvid))
2586 					continue;
2587 
2588 				/*
2589 				 * PV not marked as belonging to this VG in cache.
2590 				 * Check it's an orphan without metadata area.
2591 				 */
2592 				if (!(info = info_from_pvid(pvl->pv->dev->pvid, 1)) ||
2593 				   !info->vginfo || !is_orphan_vg(info->vginfo->vgname) ||
2594 				   dm_list_size(&info->mdas)) {
2595 					inconsistent_pvs = 1;
2596 					break;
2597 				}
2598 			}
2599 
2600 			/* If the check passed, let's update VG and recalculate pvids */
2601 			if (!inconsistent_pvs) {
2602 				log_debug("Updating cache for PVs without mdas "
2603 					  "in VG %s.", vgname);
2604 				lvmcache_update_vg(correct_vg, use_precommitted);
2605 
2606 				if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
2607 					return_NULL;
2608 			}
2609 		}
2610 
2611 		if (dm_list_size(&correct_vg->pvs) != dm_list_size(pvids)
2612 		    + vg_missing_pv_count(correct_vg)) {
2613 			log_debug("Cached VG %s had incorrect PV list",
2614 				  vgname);
2615 
2616 			if (memlock())
2617 				inconsistent = 1;
2618 			else {
2619 				vg_release(correct_vg);
2620 				correct_vg = NULL;
2621 			}
2622 		} else dm_list_iterate_items(pvl, &correct_vg->pvs) {
2623 			if (pvl->pv->status & MISSING_PV)
2624 				continue;
2625 			if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) {
2626 				log_debug("Cached VG %s had incorrect PV list",
2627 					  vgname);
2628 				vg_release(correct_vg);
2629 				correct_vg = NULL;
2630 				break;
2631 			}
2632 		}
2633 	}
2634 
2635 	dm_list_init(&all_pvs);
2636 
2637 	/* Failed to find VG where we expected it - full scan and retry */
2638 	if (!correct_vg) {
2639 		inconsistent = 0;
2640 
2641 		if (memlock())
2642 			return_NULL;
2643 		lvmcache_label_scan(cmd, 2);
2644 		if (!(fmt = fmt_from_vgname(vgname, vgid)))
2645 			return_NULL;
2646 
2647 		if (precommitted && !(fmt->features & FMT_PRECOMMIT))
2648 			use_precommitted = 0;
2649 
2650 		/* create format instance with appropriate metadata area */
2651 		if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
2652 			log_error("Failed to create format instance");
2653 			return NULL;
2654 		}
2655 
2656 		/* Ensure contents of all metadata areas match - else recover */
2657 		dm_list_iterate_items(mda, &fid->metadata_areas) {
2658 			if ((use_precommitted &&
2659 			     !(vg = mda->ops->vg_read_precommit(fid, vgname,
2660 								mda))) ||
2661 			    (!use_precommitted &&
2662 			     !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
2663 				inconsistent = 1;
2664 				continue;
2665 			}
2666 			if (!correct_vg) {
2667 				correct_vg = vg;
2668 				if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) {
2669 					vg_release(vg);
2670 					return_NULL;
2671 				}
2672 				continue;
2673 			}
2674 
2675 			if (strncmp((char *)vg->id.uuid,
2676 			    (char *)correct_vg->id.uuid, ID_LEN)) {
2677 				inconsistent = 1;
2678 				inconsistent_vgid = 1;
2679 			}
2680 
2681 			/* FIXME Also ensure contents same - checksums same? */
2682 			if (correct_vg->seqno != vg->seqno) {
2683 				inconsistent = 1;
2684 				if (!_update_pv_list(cmd->mem, &all_pvs, vg)) {
2685 					vg_release(vg);
2686 					vg_release(correct_vg);
2687 					return_NULL;
2688 				}
2689 				if (vg->seqno > correct_vg->seqno) {
2690 					vg_release(correct_vg);
2691 					correct_vg = vg;
2692 				}
2693 			}
2694 
2695 			if (vg != correct_vg)
2696 				vg_release(vg);
2697 		}
2698 
2699 		/* Give up looking */
2700 		if (!correct_vg)
2701 			return_NULL;
2702 	}
2703 
2704 	lvmcache_update_vg(correct_vg, use_precommitted);
2705 
2706 	if (inconsistent) {
2707 		/* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */
2708 		if (use_precommitted) {
2709 			log_error("Inconsistent pre-commit metadata copies "
2710 				  "for volume group %s", vgname);
2711 			vg_release(correct_vg);
2712 			return NULL;
2713 		}
2714 
2715 		if (!*consistent)
2716 			return correct_vg;
2717 
2718 		/* Don't touch if vgids didn't match */
2719 		if (inconsistent_vgid) {
2720 			log_error("Inconsistent metadata UUIDs found for "
2721 				  "volume group %s", vgname);
2722 			*consistent = 0;
2723 			return correct_vg;
2724 		}
2725 
2726 		log_warn("WARNING: Inconsistent metadata found for VG %s - updating "
2727 			 "to use version %u", vgname, correct_vg->seqno);
2728 
2729 		cmd->handles_missing_pvs = 1;
2730 		if (!vg_write(correct_vg)) {
2731 			log_error("Automatic metadata correction failed");
2732 			vg_release(correct_vg);
2733 			cmd->handles_missing_pvs = saved_handles_missing_pvs;
2734 			return NULL;
2735 		}
2736 		cmd->handles_missing_pvs = saved_handles_missing_pvs;
2737 
2738 		if (!vg_commit(correct_vg)) {
2739 			log_error("Automatic metadata correction commit "
2740 				  "failed");
2741 			vg_release(correct_vg);
2742 			return NULL;
2743 		}
2744 
2745 		dm_list_iterate_items(pvl, &all_pvs) {
2746 			dm_list_iterate_items(pvl2, &correct_vg->pvs) {
2747 				if (pvl->pv->dev == pvl2->pv->dev)
2748 					goto next_pv;
2749 			}
2750 			if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) {
2751 				vg_release(correct_vg);
2752 				return_NULL;
2753 			}
2754 			log_error("Removing PV %s (%s) that no longer belongs to VG %s",
2755 				  pv_dev_name(pvl->pv), uuid, correct_vg->name);
2756 			if (!pv_write_orphan(cmd, pvl->pv)) {
2757 				vg_release(correct_vg);
2758 				return_NULL;
2759 			}
2760       next_pv:
2761 			;
2762 		}
2763 	}
2764 
2765 	if (vg_missing_pv_count(correct_vg)) {
2766 		log_verbose("There are %d physical volumes missing.",
2767 			    vg_missing_pv_count(correct_vg));
2768 		_vg_mark_partial_lvs(correct_vg);
2769 	}
2770 
2771 	if ((correct_vg->status & PVMOVE) && !pvmove_mode()) {
2772 		log_error("WARNING: Interrupted pvmove detected in "
2773 			  "volume group %s", correct_vg->name);
2774 		log_error("Please restore the metadata by running "
2775 			  "vgcfgrestore.");
2776 		vg_release(correct_vg);
2777 		return NULL;
2778 	}
2779 
2780 	*consistent = 1;
2781 	return correct_vg;
2782 }
2783 
2784 struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname,
2785 			     const char *vgid, int *consistent)
2786 {
2787 	struct volume_group *vg;
2788 	struct lv_list *lvl;
2789 
2790 	if (!(vg = _vg_read(cmd, vgname, vgid, consistent, 0)))
2791 		return NULL;
2792 
2793 	if (!check_pv_segments(vg)) {
2794 		log_error("Internal error: PV segments corrupted in %s.",
2795 			  vg->name);
2796 		vg_release(vg);
2797 		return NULL;
2798 	}
2799 
2800 	dm_list_iterate_items(lvl, &vg->lvs) {
2801 		if (!check_lv_segments(lvl->lv, 1)) {
2802 			log_error("Internal error: LV segments corrupted in %s.",
2803 				  lvl->lv->name);
2804 			vg_release(vg);
2805 			return NULL;
2806 		}
2807 	}
2808 
2809 	return vg;
2810 }
2811 
2812 void vg_release(struct volume_group *vg)
2813 {
2814 	if (!vg || !vg->vgmem)
2815 		return;
2816 
2817 	if (vg->cmd && vg->vgmem == vg->cmd->mem)
2818 		log_error("Internal error: global memory pool used for VG %s",
2819 			  vg->name);
2820 
2821 	dm_pool_destroy(vg->vgmem);
2822 }
2823 
2824 /* This is only called by lv_from_lvid, which is only called from
2825  * activate.c so we know the appropriate VG lock is already held and
2826  * the vg_read_internal is therefore safe.
2827  */
2828 static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd,
2829 					    const char *vgid,
2830 					    unsigned precommitted)
2831 {
2832 	const char *vgname;
2833 	struct dm_list *vgnames;
2834 	struct volume_group *vg = NULL;
2835 	struct lvmcache_vginfo *vginfo;
2836 	struct str_list *strl;
2837 	int consistent = 0;
2838 
2839 	/* Is corresponding vgname already cached? */
2840 	if ((vginfo = vginfo_from_vgid(vgid)) &&
2841 	    vginfo->vgname && !is_orphan_vg(vginfo->vgname)) {
2842 		if ((vg = _vg_read(cmd, NULL, vgid,
2843 				   &consistent, precommitted)) &&
2844 		    !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2845 
2846 			if (!consistent) {
2847 				log_error("Volume group %s metadata is "
2848 					  "inconsistent", vg->name);
2849 			}
2850 			return vg;
2851 		}
2852 		vg_release(vg);
2853 	}
2854 
2855 	/* Mustn't scan if memory locked: ensure cache gets pre-populated! */
2856 	if (memlock())
2857 		goto out;
2858 
2859 	/* FIXME Need a genuine read by ID here - don't vg_read_internal by name! */
2860 	/* FIXME Disabled vgrenames while active for now because we aren't
2861 	 *       allowed to do a full scan here any more. */
2862 
2863 	// The slow way - full scan required to cope with vgrename
2864 	if (!(vgnames = get_vgnames(cmd, 2))) {
2865 		log_error("vg_read_by_vgid: get_vgnames failed");
2866 		goto out;
2867 	}
2868 
2869 	dm_list_iterate_items(strl, vgnames) {
2870 		vgname = strl->str;
2871 		if (!vgname || is_orphan_vg(vgname))
2872 			continue;	// FIXME Unnecessary?
2873 		consistent = 0;
2874 		if ((vg = _vg_read(cmd, vgname, vgid, &consistent,
2875 				   precommitted)) &&
2876 		    !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2877 
2878 			if (!consistent) {
2879 				log_error("Volume group %s metadata is "
2880 					  "inconsistent", vgname);
2881 				goto out;
2882 			}
2883 			return vg;
2884 		}
2885 	}
2886 
2887 out:
2888 	vg_release(vg);
2889 	return NULL;
2890 }
2891 
2892 /* Only called by activate.c */
2893 struct logical_volume *lv_from_lvid(struct cmd_context *cmd, const char *lvid_s,
2894 				    unsigned precommitted)
2895 {
2896 	struct lv_list *lvl;
2897 	struct volume_group *vg;
2898 	const union lvid *lvid;
2899 
2900 	lvid = (const union lvid *) lvid_s;
2901 
2902 	log_very_verbose("Finding volume group for uuid %s", lvid_s);
2903 	if (!(vg = _vg_read_by_vgid(cmd, (char *)lvid->id[0].uuid, precommitted))) {
2904 		log_error("Volume group for uuid not found: %s", lvid_s);
2905 		return NULL;
2906 	}
2907 
2908 	log_verbose("Found volume group \"%s\"", vg->name);
2909 	if (vg->status & EXPORTED_VG) {
2910 		log_error("Volume group \"%s\" is exported", vg->name);
2911 		goto out;
2912 	}
2913 	if (!(lvl = find_lv_in_vg_by_lvid(vg, lvid))) {
2914 		log_very_verbose("Can't find logical volume id %s", lvid_s);
2915 		goto out;
2916 	}
2917 
2918 	return lvl->lv;
2919 out:
2920 	vg_release(vg);
2921 	return NULL;
2922 }
2923 
2924 /**
2925  * pv_read - read and return a handle to a physical volume
2926  * @cmd: LVM command initiating the pv_read
2927  * @pv_name: full device name of the PV, including the path
2928  * @mdas: list of metadata areas of the PV
2929  * @label_sector: sector number where the PV label is stored on @pv_name
2930  * @warnings:
2931  *
2932  * Returns:
2933  *   PV handle - valid pv_name and successful read of the PV, or
2934  *   NULL - invalid parameter or error in reading the PV
2935  *
2936  * Note:
2937  *   FIXME - liblvm todo - make into function that returns handle
2938  */
2939 struct physical_volume *pv_read(struct cmd_context *cmd, const char *pv_name,
2940 				struct dm_list *mdas, uint64_t *label_sector,
2941 				int warnings, int scan_label_only)
2942 {
2943 	return _pv_read(cmd, cmd->mem, pv_name, mdas, label_sector, warnings, scan_label_only);
2944 }
2945 
2946 /* FIXME Use label functions instead of PV functions */
2947 static struct physical_volume *_pv_read(struct cmd_context *cmd,
2948 					struct dm_pool *pvmem,
2949 					const char *pv_name,
2950 					struct dm_list *mdas,
2951 					uint64_t *label_sector,
2952 					int warnings, int scan_label_only)
2953 {
2954 	struct physical_volume *pv;
2955 	struct label *label;
2956 	struct lvmcache_info *info;
2957 	struct device *dev;
2958 
2959 	if (!(dev = dev_cache_get(pv_name, cmd->filter)))
2960 		return_NULL;
2961 
2962 	if (!(label_read(dev, &label, UINT64_C(0)))) {
2963 		if (warnings)
2964 			log_error("No physical volume label read from %s",
2965 				  pv_name);
2966 		return NULL;
2967 	}
2968 
2969 	info = (struct lvmcache_info *) label->info;
2970 	if (label_sector && *label_sector)
2971 		*label_sector = label->sector;
2972 
2973 	if (!(pv = dm_pool_zalloc(pvmem, sizeof(*pv)))) {
2974 		log_error("pv allocation for '%s' failed", pv_name);
2975 		return NULL;
2976 	}
2977 
2978 	dm_list_init(&pv->tags);
2979 	dm_list_init(&pv->segments);
2980 
2981 	/* FIXME Move more common code up here */
2982 	if (!(info->fmt->ops->pv_read(info->fmt, pv_name, pv, mdas,
2983 	      scan_label_only))) {
2984 		log_error("Failed to read existing physical volume '%s'",
2985 			  pv_name);
2986 		return NULL;
2987 	}
2988 
2989 	if (!pv->size)
2990 		return NULL;
2991 
2992 	if (!alloc_pv_segment_whole_pv(pvmem, pv))
2993 		return_NULL;
2994 
2995 	return pv;
2996 }
2997 
2998 /* May return empty list */
2999 struct dm_list *get_vgnames(struct cmd_context *cmd, int full_scan)
3000 {
3001 	return lvmcache_get_vgnames(cmd, full_scan);
3002 }
3003 
3004 struct dm_list *get_vgids(struct cmd_context *cmd, int full_scan)
3005 {
3006 	return lvmcache_get_vgids(cmd, full_scan);
3007 }
3008 
3009 static int _get_pvs(struct cmd_context *cmd, struct dm_list **pvslist)
3010 {
3011 	struct str_list *strl;
3012 	struct dm_list * uninitialized_var(results);
3013 	const char *vgname, *vgid;
3014 	struct pv_list *pvl, *pvl_copy;
3015 	struct dm_list *vgids;
3016 	struct volume_group *vg;
3017 	int consistent = 0;
3018 	int old_pvmove;
3019 
3020 	lvmcache_label_scan(cmd, 0);
3021 
3022 	if (pvslist) {
3023 		if (!(results = dm_pool_alloc(cmd->mem, sizeof(*results)))) {
3024 			log_error("PV list allocation failed");
3025 			return 0;
3026 		}
3027 
3028 		dm_list_init(results);
3029 	}
3030 
3031 	/* Get list of VGs */
3032 	if (!(vgids = get_vgids(cmd, 0))) {
3033 		log_error("get_pvs: get_vgids failed");
3034 		return 0;
3035 	}
3036 
3037 	/* Read every VG to ensure cache consistency */
3038 	/* Orphan VG is last on list */
3039 	old_pvmove = pvmove_mode();
3040 	init_pvmove(1);
3041 	dm_list_iterate_items(strl, vgids) {
3042 		vgid = strl->str;
3043 		if (!vgid)
3044 			continue;	/* FIXME Unnecessary? */
3045 		consistent = 0;
3046 		if (!(vgname = vgname_from_vgid(NULL, vgid))) {
3047 			stack;
3048 			continue;
3049 		}
3050 		if (!(vg = vg_read_internal(cmd, vgname, vgid, &consistent))) {
3051 			stack;
3052 			continue;
3053 		}
3054 		if (!consistent)
3055 			log_warn("WARNING: Volume Group %s is not consistent",
3056 				 vgname);
3057 
3058 		/* Move PVs onto results list */
3059 		if (pvslist)
3060 			dm_list_iterate_items(pvl, &vg->pvs) {
3061 				if (!(pvl_copy = _copy_pvl(cmd->mem, pvl))) {
3062 					log_error("PV list allocation failed");
3063 					vg_release(vg);
3064 					return 0;
3065 				}
3066 				dm_list_add(results, &pvl_copy->list);
3067 			}
3068 		vg_release(vg);
3069 	}
3070 	init_pvmove(old_pvmove);
3071 
3072 	if (pvslist)
3073 		*pvslist = results;
3074 	else
3075 		dm_pool_free(cmd->mem, vgids);
3076 
3077 	return 1;
3078 }
3079 
3080 struct dm_list *get_pvs(struct cmd_context *cmd)
3081 {
3082 	struct dm_list *results;
3083 
3084 	if (!_get_pvs(cmd, &results))
3085 		return NULL;
3086 
3087 	return results;
3088 }
3089 
3090 int scan_vgs_for_pvs(struct cmd_context *cmd)
3091 {
3092 	return _get_pvs(cmd, NULL);
3093 }
3094 
3095 int pv_write(struct cmd_context *cmd __attribute((unused)),
3096 	     struct physical_volume *pv,
3097 	     struct dm_list *mdas, int64_t label_sector)
3098 {
3099 	if (!pv->fmt->ops->pv_write) {
3100 		log_error("Format does not support writing physical volumes");
3101 		return 0;
3102 	}
3103 
3104 	if (!is_orphan_vg(pv->vg_name) || pv->pe_alloc_count) {
3105 		log_error("Assertion failed: can't _pv_write non-orphan PV "
3106 			  "(in VG %s)", pv->vg_name);
3107 		return 0;
3108 	}
3109 
3110 	if (!pv->fmt->ops->pv_write(pv->fmt, pv, mdas, label_sector))
3111 		return_0;
3112 
3113 	return 1;
3114 }
3115 
3116 int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv)
3117 {
3118 	const char *old_vg_name = pv->vg_name;
3119 
3120 	pv->vg_name = cmd->fmt->orphan_vg_name;
3121 	pv->status = ALLOCATABLE_PV;
3122 	pv->pe_alloc_count = 0;
3123 
3124 	if (!dev_get_size(pv->dev, &pv->size)) {
3125 		log_error("%s: Couldn't get size.", pv_dev_name(pv));
3126 		return 0;
3127 	}
3128 
3129 	if (!pv_write(cmd, pv, NULL, INT64_C(-1))) {
3130 		log_error("Failed to clear metadata from physical "
3131 			  "volume \"%s\" after removal from \"%s\"",
3132 			  pv_dev_name(pv), old_vg_name);
3133 		return 0;
3134 	}
3135 
3136 	return 1;
3137 }
3138 
3139 /**
3140  * is_orphan_vg - Determine whether a vg_name is an orphan
3141  * @vg_name: pointer to the vg_name
3142  */
3143 int is_orphan_vg(const char *vg_name)
3144 {
3145 	return (vg_name && vg_name[0] == ORPHAN_PREFIX[0]) ? 1 : 0;
3146 }
3147 
3148 /**
3149  * is_orphan - Determine whether a pv is an orphan based on its vg_name
3150  * @pv: handle to the physical volume
3151  */
3152 int is_orphan(const struct physical_volume *pv)
3153 {
3154 	return is_orphan_vg(pv_field(pv, vg_name));
3155 }
3156 
3157 /**
3158  * is_pv - Determine whether a pv is a real pv or dummy one
3159  * @pv: handle to device
3160  */
3161 int is_pv(struct physical_volume *pv)
3162 {
3163 	return (pv_field(pv, vg_name) ? 1 : 0);
3164 }
3165 
3166 /*
3167  * Returns:
3168  *  0 - fail
3169  *  1 - success
3170  */
3171 int pv_analyze(struct cmd_context *cmd, const char *pv_name,
3172 	       uint64_t label_sector)
3173 {
3174 	struct label *label;
3175 	struct device *dev;
3176 	struct metadata_area *mda;
3177 	struct lvmcache_info *info;
3178 
3179 	dev = dev_cache_get(pv_name, cmd->filter);
3180 	if (!dev) {
3181 		log_error("Device %s not found (or ignored by filtering).",
3182 			  pv_name);
3183 		return 0;
3184 	}
3185 
3186 	/*
3187 	 * First, scan for LVM labels.
3188 	 */
3189 	if (!label_read(dev, &label, label_sector)) {
3190 		log_error("Could not find LVM label on %s",
3191 			  pv_name);
3192 		return 0;
3193 	}
3194 
3195 	log_print("Found label on %s, sector %"PRIu64", type=%s",
3196 		  pv_name, label->sector, label->type);
3197 
3198 	/*
3199 	 * Next, loop through metadata areas
3200 	 */
3201 	info = label->info;
3202 	dm_list_iterate_items(mda, &info->mdas)
3203 		mda->ops->pv_analyze_mda(info->fmt, mda);
3204 
3205 	return 1;
3206 }
3207 
3208 /* FIXME: remove / combine this with locking? */
3209 int vg_check_write_mode(struct volume_group *vg)
3210 {
3211 	if (vg->open_mode != 'w') {
3212 		log_errno(EPERM, "Attempt to modify a read-only VG");
3213 		return 0;
3214 	}
3215 	return 1;
3216 }
3217 
3218 /*
3219  * Performs a set of checks against a VG according to bits set in status
3220  * and returns FAILED_* bits for those that aren't acceptable.
3221  *
3222  * FIXME Remove the unnecessary duplicate definitions and return bits directly.
3223  */
3224 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
3225 				    uint32_t status)
3226 {
3227 	uint32_t failure = 0;
3228 
3229 	if ((status & CLUSTERED) &&
3230 	    (vg_is_clustered(vg)) && !locking_is_clustered()) {
3231 		log_error("Skipping clustered volume group %s", vg->name);
3232 		/* Return because other flags are considered undefined. */
3233 		return FAILED_CLUSTERED;
3234 	}
3235 
3236 	if ((status & EXPORTED_VG) &&
3237 	    vg_is_exported(vg)) {
3238 		log_error("Volume group %s is exported", vg->name);
3239 		failure |= FAILED_EXPORTED;
3240 	}
3241 
3242 	if ((status & LVM_WRITE) &&
3243 	    !(vg->status & LVM_WRITE)) {
3244 		log_error("Volume group %s is read-only", vg->name);
3245 		failure |= FAILED_READ_ONLY;
3246 	}
3247 
3248 	if ((status & RESIZEABLE_VG) &&
3249 	    !vg_is_resizeable(vg)) {
3250 		log_error("Volume group %s is not resizeable.", vg->name);
3251 		failure |= FAILED_RESIZEABLE;
3252 	}
3253 
3254 	return failure;
3255 }
3256 
3257 /**
3258  * vg_check_status - check volume group status flags and log error
3259  * @vg - volume group to check status flags
3260  * @status - specific status flags to check (e.g. EXPORTED_VG)
3261  */
3262 int vg_check_status(const struct volume_group *vg, uint32_t status)
3263 {
3264 	return !_vg_bad_status_bits(vg, status);
3265 }
3266 
3267 static struct volume_group *_recover_vg(struct cmd_context *cmd, const char *lock_name,
3268 			 const char *vg_name, const char *vgid,
3269 			 uint32_t lock_flags)
3270 {
3271 	int consistent = 1;
3272 	struct volume_group *vg;
3273 
3274 	lock_flags &= ~LCK_TYPE_MASK;
3275 	lock_flags |= LCK_WRITE;
3276 
3277 	unlock_vg(cmd, lock_name);
3278 
3279 	dev_close_all();
3280 
3281 	if (!lock_vol(cmd, lock_name, lock_flags))
3282 		return_NULL;
3283 
3284 	if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent)))
3285 		return_NULL;
3286 
3287 	if (!consistent) {
3288 		vg_release(vg);
3289 		return_NULL;
3290 	}
3291 
3292 	return (struct volume_group *)vg;
3293 }
3294 
3295 /*
3296  * Consolidated locking, reading, and status flag checking.
3297  *
3298  * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in
3299  * misc_flags will return it with FAILED_INCONSISTENT set instead of
3300  * giving you nothing.
3301  *
3302  * Use vg_read_error(vg) to determine the result.  Nonzero means there were
3303  * problems reading the volume group.
3304  * Zero value means that the VG is open and appropriate locks are held.
3305  */
3306 static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
3307 			       const char *vgid, uint32_t lock_flags,
3308 			       uint32_t status_flags, uint32_t misc_flags)
3309 {
3310 	struct volume_group *vg = NULL;
3311 	const char *lock_name;
3312  	int consistent = 1;
3313 	int consistent_in;
3314 	uint32_t failure = 0;
3315 	int already_locked;
3316 
3317 	if (misc_flags & READ_ALLOW_INCONSISTENT || !(lock_flags & LCK_WRITE))
3318 		consistent = 0;
3319 
3320 	if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) {
3321 		log_error("Volume group name %s has invalid characters",
3322 			  vg_name);
3323 		return NULL;
3324 	}
3325 
3326 	lock_name = is_orphan_vg(vg_name) ? VG_ORPHANS : vg_name;
3327 	already_locked = vgname_is_locked(lock_name);
3328 
3329 	if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK) &&
3330 	    !lock_vol(cmd, lock_name, lock_flags)) {
3331 		log_error("Can't get lock for %s", vg_name);
3332 		return _vg_make_handle(cmd, vg, FAILED_LOCKING);
3333 	}
3334 
3335 	if (is_orphan_vg(vg_name))
3336 		status_flags &= ~LVM_WRITE;
3337 
3338 	consistent_in = consistent;
3339 
3340 	/* If consistent == 1, we get NULL here if correction fails. */
3341 	if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent))) {
3342 		if (consistent_in && !consistent) {
3343 			log_error("Volume group \"%s\" inconsistent.", vg_name);
3344 			failure |= FAILED_INCONSISTENT;
3345 			goto_bad;
3346 		}
3347 
3348 		log_error("Volume group \"%s\" not found", vg_name);
3349 
3350 		failure |= FAILED_NOTFOUND;
3351 		goto_bad;
3352 	}
3353 
3354 	if (vg_is_clustered(vg) && !locking_is_clustered()) {
3355 		log_error("Skipping clustered volume group %s", vg->name);
3356 		failure |= FAILED_CLUSTERED;
3357 		goto_bad;
3358 	}
3359 
3360 	/* consistent == 0 when VG is not found, but failed == FAILED_NOTFOUND */
3361 	if (!consistent && !failure) {
3362 		vg_release(vg);
3363 		if (!(vg = _recover_vg(cmd, lock_name, vg_name, vgid, lock_flags))) {
3364 			log_error("Recovery of volume group \"%s\" failed.",
3365 				  vg_name);
3366 			failure |= FAILED_INCONSISTENT;
3367 			goto_bad;
3368 		}
3369 	}
3370 
3371 	/*
3372 	 * Check that the tool can handle tricky cases -- missing PVs and
3373 	 * unknown segment types.
3374 	 */
3375 
3376 	if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) &&
3377 	    (lock_flags & LCK_WRITE)) {
3378 		log_error("Cannot change VG %s while PVs are missing.", vg->name);
3379 		log_error("Consider vgreduce --removemissing.");
3380 		failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
3381 		goto_bad;
3382 	}
3383 
3384 	if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) &&
3385 	    (lock_flags & LCK_WRITE)) {
3386 		log_error("Cannot change VG %s with unknown segments in it!",
3387 			  vg->name);
3388 		failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
3389 		goto_bad;
3390 	}
3391 
3392 	failure |= _vg_bad_status_bits(vg, status_flags);
3393 	if (failure)
3394 		goto_bad;
3395 
3396 	return _vg_make_handle(cmd, vg, failure);
3397 
3398 bad:
3399 	if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK))
3400 		unlock_vg(cmd, lock_name);
3401 
3402 	return _vg_make_handle(cmd, vg, failure);
3403 }
3404 
3405 /*
3406  * vg_read: High-level volume group metadata read function.
3407  *
3408  * vg_read_error() must be used on any handle returned to check for errors.
3409  *
3410  *  - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT
3411  *  - VG is read-only: FAILED_READ_ONLY
3412  *  - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED
3413  *  - VG is not RESIZEABLE: FAILED_RESIZEABLE
3414  *  - locking failed: FAILED_LOCKING
3415  *
3416  * On failures, all locks are released, unless one of the following applies:
3417  *  - vgname_is_locked(lock_name) is true
3418  * FIXME: remove the above 2 conditions if possible and make an error always
3419  * release the lock.
3420  *
3421  * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE.
3422  *
3423  * Checking for VG existence:
3424  *
3425  * FIXME: We want vg_read to attempt automatic recovery after acquiring a
3426  * temporary write lock: if that fails, we bail out as usual, with failed &
3427  * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in
3428  * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with
3429  * *consistent = 1.
3430  */
3431 struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
3432 	      const char *vgid, uint32_t flags)
3433 {
3434 	uint32_t status = 0;
3435 	uint32_t lock_flags = LCK_VG_READ;
3436 
3437 	if (flags & READ_FOR_UPDATE) {
3438 		status |= EXPORTED_VG | LVM_WRITE;
3439 		lock_flags = LCK_VG_WRITE;
3440 	}
3441 
3442 	if (flags & READ_ALLOW_EXPORTED)
3443 		status &= ~EXPORTED_VG;
3444 
3445 	return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status, flags);
3446 }
3447 
3448 /*
3449  * A high-level volume group metadata reading function. Open a volume group for
3450  * later update (this means the user code can change the metadata and later
3451  * request the new metadata to be written and committed).
3452  */
3453 struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
3454 			 const char *vgid, uint32_t flags)
3455 {
3456 	return vg_read(cmd, vg_name, vgid, flags | READ_FOR_UPDATE);
3457 }
3458 
3459 /*
3460  * Test the validity of a VG handle returned by vg_read() or vg_read_for_update().
3461  */
3462 uint32_t vg_read_error(struct volume_group *vg_handle)
3463 {
3464 	if (!vg_handle)
3465 		return FAILED_ALLOCATION;
3466 
3467 	return vg_handle->read_status;
3468 }
3469 
3470 /*
3471  * Lock a vgname and/or check for existence.
3472  * Takes a WRITE lock on the vgname before scanning.
3473  * If scanning fails or vgname found, release the lock.
3474  * NOTE: If you find the return codes confusing, you might think of this
3475  * function as similar to an open() call with O_CREAT and O_EXCL flags
3476  * (open returns fail with -EEXIST if file already exists).
3477  *
3478  * Returns:
3479  * FAILED_LOCKING - Cannot lock name
3480  * FAILED_EXIST - VG name already exists - cannot reserve
3481  * SUCCESS - VG name does not exist in system and WRITE lock held
3482  */
3483 uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname)
3484 {
3485 	if (!lock_vol(cmd, vgname, LCK_VG_WRITE)) {
3486 		return FAILED_LOCKING;
3487 	}
3488 
3489 	/* Find the vgname in the cache */
3490 	/* If it's not there we must do full scan to be completely sure */
3491 	if (!fmt_from_vgname(vgname, NULL)) {
3492 		lvmcache_label_scan(cmd, 0);
3493 		if (!fmt_from_vgname(vgname, NULL)) {
3494 			if (memlock()) {
3495 				/*
3496 				 * FIXME: Disallow calling this function if
3497 				 * memlock() is true.
3498 				 */
3499 				unlock_vg(cmd, vgname);
3500 				return FAILED_LOCKING;
3501 			}
3502 			lvmcache_label_scan(cmd, 2);
3503 			if (!fmt_from_vgname(vgname, NULL)) {
3504 				/* vgname not found after scanning */
3505 				return SUCCESS;
3506 			}
3507 		}
3508 	}
3509 
3510 	/* Found vgname so cannot reserve. */
3511 	unlock_vg(cmd, vgname);
3512 	return FAILED_EXIST;
3513 }
3514 
3515 /*
3516  * Gets/Sets for external LVM library
3517  */
3518 struct id pv_id(const struct physical_volume *pv)
3519 {
3520 	return pv_field(pv, id);
3521 }
3522 
3523 const struct format_type *pv_format_type(const struct physical_volume *pv)
3524 {
3525 	return pv_field(pv, fmt);
3526 }
3527 
3528 struct id pv_vgid(const struct physical_volume *pv)
3529 {
3530 	return pv_field(pv, vgid);
3531 }
3532 
3533 struct device *pv_dev(const struct physical_volume *pv)
3534 {
3535 	return pv_field(pv, dev);
3536 }
3537 
3538 const char *pv_vg_name(const struct physical_volume *pv)
3539 {
3540 	return pv_field(pv, vg_name);
3541 }
3542 
3543 const char *pv_dev_name(const struct physical_volume *pv)
3544 {
3545 	return dev_name(pv_dev(pv));
3546 }
3547 
3548 uint64_t pv_size(const struct physical_volume *pv)
3549 {
3550 	return pv_field(pv, size);
3551 }
3552 
3553 uint32_t pv_status(const struct physical_volume *pv)
3554 {
3555 	return pv_field(pv, status);
3556 }
3557 
3558 uint32_t pv_pe_size(const struct physical_volume *pv)
3559 {
3560 	return pv_field(pv, pe_size);
3561 }
3562 
3563 uint64_t pv_pe_start(const struct physical_volume *pv)
3564 {
3565 	return pv_field(pv, pe_start);
3566 }
3567 
3568 uint32_t pv_pe_count(const struct physical_volume *pv)
3569 {
3570 	return pv_field(pv, pe_count);
3571 }
3572 
3573 uint32_t pv_pe_alloc_count(const struct physical_volume *pv)
3574 {
3575 	return pv_field(pv, pe_alloc_count);
3576 }
3577 
3578 uint32_t pv_mda_count(const struct physical_volume *pv)
3579 {
3580 	struct lvmcache_info *info;
3581 
3582 	info = info_from_pvid((const char *)&pv->id.uuid, 0);
3583 	return info ? dm_list_size(&info->mdas) : UINT64_C(0);
3584 }
3585 
3586 uint32_t vg_seqno(const struct volume_group *vg)
3587 {
3588 	return vg->seqno;
3589 }
3590 
3591 uint32_t vg_status(const struct volume_group *vg)
3592 {
3593 	return vg->status;
3594 }
3595 
3596 uint64_t vg_size(const struct volume_group *vg)
3597 {
3598 	return (uint64_t) vg->extent_count * vg->extent_size;
3599 }
3600 
3601 uint64_t vg_free(const struct volume_group *vg)
3602 {
3603 	return (uint64_t) vg->free_count * vg->extent_size;
3604 }
3605 
3606 uint64_t vg_extent_size(const struct volume_group *vg)
3607 {
3608 	return (uint64_t) vg->extent_size;
3609 }
3610 
3611 uint64_t vg_extent_count(const struct volume_group *vg)
3612 {
3613 	return (uint64_t) vg->extent_count;
3614 }
3615 
3616 uint64_t vg_free_count(const struct volume_group *vg)
3617 {
3618 	return (uint64_t) vg->free_count;
3619 }
3620 
3621 uint64_t vg_pv_count(const struct volume_group *vg)
3622 {
3623 	return (uint64_t) vg->pv_count;
3624 }
3625 
3626 uint64_t vg_max_pv(const struct volume_group *vg)
3627 {
3628 	return (uint64_t) vg->max_pv;
3629 }
3630 
3631 uint64_t vg_max_lv(const struct volume_group *vg)
3632 {
3633 	return (uint64_t) vg->max_lv;
3634 }
3635 
3636 uint32_t vg_mda_count(const struct volume_group *vg)
3637 {
3638 	return dm_list_size(&vg->fid->metadata_areas);
3639 }
3640 
3641 uint64_t lv_size(const struct logical_volume *lv)
3642 {
3643 	return lv->size;
3644 }
3645 
3646 /**
3647  * pv_by_path - Given a device path return a PV handle if it is a PV
3648  * @cmd - handle to the LVM command instance
3649  * @pv_name - device path to read for the PV
3650  *
3651  * Returns:
3652  *  NULL - device path does not contain a valid PV
3653  *  non-NULL - PV handle corresponding to device path
3654  *
3655  * FIXME: merge with find_pv_by_name ?
3656  */
3657 struct physical_volume *pv_by_path(struct cmd_context *cmd, const char *pv_name)
3658 {
3659 	struct dm_list mdas;
3660 
3661 	dm_list_init(&mdas);
3662 	return _pv_read(cmd, cmd->mem, pv_name, &mdas, NULL, 1, 0);
3663 }
3664