1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
25 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
26 */
27
28 /*
29 * Functions to convert between a list of vdevs and an nvlist representing the
30 * configuration. Each entry in the list can be one of:
31 *
32 * Device vdevs
33 * disk=(path=..., devid=...)
34 * file=(path=...)
35 *
36 * Group vdevs
37 * raidz[1|2]=(...)
38 * mirror=(...)
39 *
40 * Hot spares
41 *
42 * While the underlying implementation supports it, group vdevs cannot contain
43 * other group vdevs. All userland verification of devices is contained within
44 * this file. If successful, the nvlist returned can be passed directly to the
45 * kernel; we've done as much verification as possible in userland.
46 *
47 * Hot spares are a special case, and passed down as an array of disk vdevs, at
48 * the same level as the root of the vdev tree.
49 *
50 * The only function exported by this file is 'make_root_vdev'. The
51 * function performs several passes:
52 *
53 * 1. Construct the vdev specification. Performs syntax validation and
54 * makes sure each device is valid.
55 * 2. Check for devices in use. Using libdiskmgt, makes sure that no
56 * devices are also in use. Some can be overridden using the 'force'
57 * flag, others cannot.
58 * 3. Check for replication errors if the 'force' flag is not specified.
59 * validates that the replication level is consistent across the
60 * entire pool.
61 * 4. Call libzfs to label any whole disks with an EFI label.
62 */
63
64 #include <assert.h>
65 #include <devid.h>
66 #include <errno.h>
67 #include <fcntl.h>
68 #include <libintl.h>
69 #include <libnvpair.h>
70 #include <limits.h>
71 #include <stdio.h>
72 #include <string.h>
73 #include <unistd.h>
74 #include <paths.h>
75 #include <sys/stat.h>
76 #include <sys/disk.h>
77 #include <sys/mntent.h>
78 #ifdef __FreeBSD__
79 #include <libgeom.h>
80 #endif
81 #ifdef __NetBSD__
82 #include <sys/disklabel.h>
83 #include <sys/ioctl.h>
84 #endif
85
86 #include "zpool_util.h"
87
88 #define BACKUP_SLICE "s2"
89
90 /*
91 * For any given vdev specification, we can have multiple errors. The
92 * vdev_error() function keeps track of whether we have seen an error yet, and
93 * prints out a header if its the first error we've seen.
94 */
95 boolean_t error_seen;
96 boolean_t is_force;
97
98 /*PRINTFLIKE1*/
99 static void
vdev_error(const char * fmt,...)100 vdev_error(const char *fmt, ...)
101 {
102 va_list ap;
103
104 if (!error_seen) {
105 (void) fprintf(stderr, gettext("invalid vdev specification\n"));
106 if (!is_force)
107 (void) fprintf(stderr, gettext("use '-f' to override "
108 "the following errors:\n"));
109 else
110 (void) fprintf(stderr, gettext("the following errors "
111 "must be manually repaired:\n"));
112 error_seen = B_TRUE;
113 }
114
115 va_start(ap, fmt);
116 (void) vfprintf(stderr, fmt, ap);
117 va_end(ap);
118 }
119
120 #ifdef illumos
121 static void
libdiskmgt_error(int error)122 libdiskmgt_error(int error)
123 {
124 /*
125 * ENXIO/ENODEV is a valid error message if the device doesn't live in
126 * /dev/dsk. Don't bother printing an error message in this case.
127 */
128 if (error == ENXIO || error == ENODEV)
129 return;
130
131 (void) fprintf(stderr, gettext("warning: device in use checking "
132 "failed: %s\n"), strerror(error));
133 }
134
135 /*
136 * Validate a device, passing the bulk of the work off to libdiskmgt.
137 */
138 static int
check_slice(const char * path,int force,boolean_t wholedisk,boolean_t isspare)139 check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
140 {
141 char *msg;
142 int error = 0;
143 dm_who_type_t who;
144
145 if (force)
146 who = DM_WHO_ZPOOL_FORCE;
147 else if (isspare)
148 who = DM_WHO_ZPOOL_SPARE;
149 else
150 who = DM_WHO_ZPOOL;
151
152 if (dm_inuse((char *)path, &msg, who, &error) || error) {
153 if (error != 0) {
154 libdiskmgt_error(error);
155 return (0);
156 } else {
157 vdev_error("%s", msg);
158 free(msg);
159 return (-1);
160 }
161 }
162
163 /*
164 * If we're given a whole disk, ignore overlapping slices since we're
165 * about to label it anyway.
166 */
167 error = 0;
168 if (!wholedisk && !force &&
169 (dm_isoverlapping((char *)path, &msg, &error) || error)) {
170 if (error == 0) {
171 /* dm_isoverlapping returned -1 */
172 vdev_error(gettext("%s overlaps with %s\n"), path, msg);
173 free(msg);
174 return (-1);
175 } else if (error != ENODEV) {
176 /* libdiskmgt's devcache only handles physical drives */
177 libdiskmgt_error(error);
178 return (0);
179 }
180 }
181
182 return (0);
183 }
184
185
186 /*
187 * Validate a whole disk. Iterate over all slices on the disk and make sure
188 * that none is in use by calling check_slice().
189 */
190 static int
check_disk(const char * name,dm_descriptor_t disk,int force,int isspare)191 check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
192 {
193 dm_descriptor_t *drive, *media, *slice;
194 int err = 0;
195 int i;
196 int ret;
197
198 /*
199 * Get the drive associated with this disk. This should never fail,
200 * because we already have an alias handle open for the device.
201 */
202 if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
203 &err)) == NULL || *drive == NULL) {
204 if (err)
205 libdiskmgt_error(err);
206 return (0);
207 }
208
209 if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
210 &err)) == NULL) {
211 dm_free_descriptors(drive);
212 if (err)
213 libdiskmgt_error(err);
214 return (0);
215 }
216
217 dm_free_descriptors(drive);
218
219 /*
220 * It is possible that the user has specified a removable media drive,
221 * and the media is not present.
222 */
223 if (*media == NULL) {
224 dm_free_descriptors(media);
225 vdev_error(gettext("'%s' has no media in drive\n"), name);
226 return (-1);
227 }
228
229 if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
230 &err)) == NULL) {
231 dm_free_descriptors(media);
232 if (err)
233 libdiskmgt_error(err);
234 return (0);
235 }
236
237 dm_free_descriptors(media);
238
239 ret = 0;
240
241 /*
242 * Iterate over all slices and report any errors. We don't care about
243 * overlapping slices because we are using the whole disk.
244 */
245 for (i = 0; slice[i] != NULL; i++) {
246 char *name = dm_get_name(slice[i], &err);
247
248 if (check_slice(name, force, B_TRUE, isspare) != 0)
249 ret = -1;
250
251 dm_free_name(name);
252 }
253
254 dm_free_descriptors(slice);
255 return (ret);
256 }
257
258 /*
259 * Validate a device.
260 */
261 static int
check_device(const char * path,boolean_t force,boolean_t isspare)262 check_device(const char *path, boolean_t force, boolean_t isspare)
263 {
264 dm_descriptor_t desc;
265 int err;
266 char *dev;
267
268 /*
269 * For whole disks, libdiskmgt does not include the leading dev path.
270 */
271 dev = strrchr(path, '/');
272 assert(dev != NULL);
273 dev++;
274 if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
275 err = check_disk(path, desc, force, isspare);
276 dm_free_descriptor(desc);
277 return (err);
278 }
279
280 return (check_slice(path, force, B_FALSE, isspare));
281 }
282 #endif /* illumos */
283
284 /*
285 * Check that a file is valid. All we can do in this case is check that it's
286 * not in use by another pool, and not in use by swap.
287 */
288 static int
check_file(const char * file,boolean_t force,boolean_t isspare)289 check_file(const char *file, boolean_t force, boolean_t isspare)
290 {
291 char *name;
292 int fd;
293 int ret = 0;
294 int err;
295 pool_state_t state;
296 boolean_t inuse;
297
298 #ifdef illumos
299 if (dm_inuse_swap(file, &err)) {
300 if (err)
301 libdiskmgt_error(err);
302 else
303 vdev_error(gettext("%s is currently used by swap. "
304 "Please see swap(1M).\n"), file);
305 return (-1);
306 }
307 #endif
308
309 if ((fd = open(file, O_RDONLY)) < 0)
310 return (0);
311
312 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
313 const char *desc;
314
315 switch (state) {
316 case POOL_STATE_ACTIVE:
317 desc = gettext("active");
318 break;
319
320 case POOL_STATE_EXPORTED:
321 desc = gettext("exported");
322 break;
323
324 case POOL_STATE_POTENTIALLY_ACTIVE:
325 desc = gettext("potentially active");
326 break;
327
328 default:
329 desc = gettext("unknown");
330 break;
331 }
332
333 /*
334 * Allow hot spares to be shared between pools.
335 */
336 if (state == POOL_STATE_SPARE && isspare)
337 return (0);
338
339 if (state == POOL_STATE_ACTIVE ||
340 state == POOL_STATE_SPARE || !force) {
341 switch (state) {
342 case POOL_STATE_SPARE:
343 vdev_error(gettext("%s is reserved as a hot "
344 "spare for pool %s\n"), file, name);
345 break;
346 default:
347 vdev_error(gettext("%s is part of %s pool "
348 "'%s'\n"), file, desc, name);
349 break;
350 }
351 ret = -1;
352 }
353
354 free(name);
355 }
356
357 (void) close(fd);
358 return (ret);
359 }
360
361 static int
check_device(const char * name,boolean_t force,boolean_t isspare)362 check_device(const char *name, boolean_t force, boolean_t isspare)
363 {
364 char path[MAXPATHLEN];
365
366 if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) != 0)
367 snprintf(path, sizeof(path), "%s%s", _PATH_DEV, name);
368 else
369 strlcpy(path, name, sizeof(path));
370
371 return (check_file(path, force, isspare));
372 }
373
374 /*
375 * By "whole disk" we mean an entire physical disk (something we can
376 * label, toggle the write cache on, etc.) as opposed to the full
377 * capacity of a pseudo-device such as lofi or did. We act as if we
378 * are labeling the disk, which should be a pretty good test of whether
379 * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
380 * it isn't.
381 */
382 static boolean_t
is_whole_disk(const char * arg)383 is_whole_disk(const char *arg)
384 {
385 #ifdef illumos
386 struct dk_gpt *label;
387 int fd;
388 char path[MAXPATHLEN];
389
390 (void) snprintf(path, sizeof (path), "%s%s%s",
391 ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
392 if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
393 return (B_FALSE);
394 if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
395 (void) close(fd);
396 return (B_FALSE);
397 }
398 efi_free(label);
399 (void) close(fd);
400 return (B_TRUE);
401 #endif
402 #ifdef __FreeBSD__
403 int fd;
404
405 fd = g_open(arg, 0);
406 if (fd >= 0) {
407 g_close(fd);
408 return (B_TRUE);
409 }
410 return (B_FALSE);
411 #endif
412 #ifdef __NetBSD__
413 struct disklabel dl;
414 int fd, rv;
415
416 if ((fd = open(arg, O_RDWR | O_NONBLOCK)) < 0)
417 return (B_FALSE);
418
419 rv = ioctl(fd, DIOCGDINFO, &dl);
420 close(fd);
421 return (rv == 0);
422 #endif
423 }
424
425 /*
426 * Create a leaf vdev. Determine if this is a file or a device. If it's a
427 * device, fill in the device id to make a complete nvlist. Valid forms for a
428 * leaf vdev are:
429 *
430 * /dev/dsk/xxx Complete disk path
431 * /xxx Full path to file
432 * xxx Shorthand for /dev/dsk/xxx
433 */
434 static nvlist_t *
make_leaf_vdev(const char * arg,uint64_t is_log)435 make_leaf_vdev(const char *arg, uint64_t is_log)
436 {
437 char path[MAXPATHLEN];
438 struct stat64 statbuf;
439 nvlist_t *vdev = NULL;
440 char *type = NULL;
441 boolean_t wholedisk = B_FALSE;
442
443 /*
444 * Determine what type of vdev this is, and put the full path into
445 * 'path'. We detect whether this is a device of file afterwards by
446 * checking the st_mode of the file.
447 */
448 if (arg[0] == '/') {
449 /*
450 * Complete device or file path. Exact type is determined by
451 * examining the file descriptor afterwards.
452 */
453 wholedisk = is_whole_disk(arg);
454 if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
455 (void) fprintf(stderr,
456 gettext("cannot open '%s': %s\n"),
457 arg, strerror(errno));
458 return (NULL);
459 }
460
461 (void) strlcpy(path, arg, sizeof (path));
462 } else {
463 /*
464 * This may be a short path for a device, or it could be total
465 * gibberish. Check to see if it's a known device in
466 * /dev/dsk/. As part of this check, see if we've been given a
467 * an entire disk (minus the slice number).
468 */
469 if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
470 strlcpy(path, arg, sizeof (path));
471 else
472 snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
473 wholedisk = is_whole_disk(path);
474 if (!wholedisk && (stat64(path, &statbuf) != 0)) {
475 /*
476 * If we got ENOENT, then the user gave us
477 * gibberish, so try to direct them with a
478 * reasonable error message. Otherwise,
479 * regurgitate strerror() since it's the best we
480 * can do.
481 */
482 if (errno == ENOENT) {
483 (void) fprintf(stderr,
484 gettext("cannot open '%s': no such "
485 "device\n"), arg);
486 (void) fprintf(stderr,
487 gettext("must be a full path or "
488 "shorthand device name\n"));
489 return (NULL);
490 } else {
491 (void) fprintf(stderr,
492 gettext("cannot open '%s': %s\n"),
493 path, strerror(errno));
494 return (NULL);
495 }
496 }
497 }
498
499 #ifdef __FreeBSD__
500 if (S_ISCHR(statbuf.st_mode)) {
501 statbuf.st_mode &= ~S_IFCHR;
502 statbuf.st_mode |= S_IFBLK;
503 wholedisk = B_FALSE;
504 }
505 #endif
506
507 /*
508 * Determine whether this is a device or a file.
509 */
510 if (wholedisk || S_ISBLK(statbuf.st_mode)) {
511 type = VDEV_TYPE_DISK;
512 } else if (S_ISREG(statbuf.st_mode)) {
513 type = VDEV_TYPE_FILE;
514 } else {
515 (void) fprintf(stderr, gettext("cannot use '%s': must be a "
516 "device or regular file\n"), path);
517 return (NULL);
518 }
519
520 /*
521 * Finally, we have the complete device or file, and we know that it is
522 * acceptable to use. Construct the nvlist to describe this vdev. All
523 * vdevs have a 'path' element, and devices also have a 'devid' element.
524 */
525 verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
526 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
527 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
528 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
529 if (strcmp(type, VDEV_TYPE_DISK) == 0)
530 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
531 (uint64_t)wholedisk) == 0);
532
533 #ifdef have_devid
534 /*
535 * For a whole disk, defer getting its devid until after labeling it.
536 */
537 if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
538 /*
539 * Get the devid for the device.
540 */
541 int fd;
542 ddi_devid_t devid;
543 char *minor = NULL, *devid_str = NULL;
544
545 if ((fd = open(path, O_RDONLY)) < 0) {
546 (void) fprintf(stderr, gettext("cannot open '%s': "
547 "%s\n"), path, strerror(errno));
548 nvlist_free(vdev);
549 return (NULL);
550 }
551
552 if (devid_get(fd, &devid) == 0) {
553 if (devid_get_minor_name(fd, &minor) == 0 &&
554 (devid_str = devid_str_encode(devid, minor)) !=
555 NULL) {
556 verify(nvlist_add_string(vdev,
557 ZPOOL_CONFIG_DEVID, devid_str) == 0);
558 }
559 if (devid_str != NULL)
560 devid_str_free(devid_str);
561 if (minor != NULL)
562 devid_str_free(minor);
563 devid_free(devid);
564 }
565
566 (void) close(fd);
567 }
568 #endif
569
570 return (vdev);
571 }
572
573 /*
574 * Go through and verify the replication level of the pool is consistent.
575 * Performs the following checks:
576 *
577 * For the new spec, verifies that devices in mirrors and raidz are the
578 * same size.
579 *
580 * If the current configuration already has inconsistent replication
581 * levels, ignore any other potential problems in the new spec.
582 *
583 * Otherwise, make sure that the current spec (if there is one) and the new
584 * spec have consistent replication levels.
585 */
586 typedef struct replication_level {
587 char *zprl_type;
588 uint64_t zprl_children;
589 uint64_t zprl_parity;
590 } replication_level_t;
591
592 #define ZPOOL_FUZZ (16 * 1024 * 1024)
593
594 /*
595 * Given a list of toplevel vdevs, return the current replication level. If
596 * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
597 * an error message will be displayed for each self-inconsistent vdev.
598 */
599 static replication_level_t *
get_replication(nvlist_t * nvroot,boolean_t fatal)600 get_replication(nvlist_t *nvroot, boolean_t fatal)
601 {
602 nvlist_t **top;
603 uint_t t, toplevels;
604 nvlist_t **child;
605 uint_t c, children;
606 nvlist_t *nv;
607 char *type;
608 replication_level_t lastrep = {0};
609 replication_level_t rep;
610 replication_level_t *ret;
611 boolean_t dontreport;
612
613 ret = safe_malloc(sizeof (replication_level_t));
614
615 verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
616 &top, &toplevels) == 0);
617
618 for (t = 0; t < toplevels; t++) {
619 uint64_t is_log = B_FALSE;
620
621 nv = top[t];
622
623 /*
624 * For separate logs we ignore the top level vdev replication
625 * constraints.
626 */
627 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
628 if (is_log)
629 continue;
630
631 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
632 &type) == 0);
633 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
634 &child, &children) != 0) {
635 /*
636 * This is a 'file' or 'disk' vdev.
637 */
638 rep.zprl_type = type;
639 rep.zprl_children = 1;
640 rep.zprl_parity = 0;
641 } else {
642 uint64_t vdev_size;
643
644 /*
645 * This is a mirror or RAID-Z vdev. Go through and make
646 * sure the contents are all the same (files vs. disks),
647 * keeping track of the number of elements in the
648 * process.
649 *
650 * We also check that the size of each vdev (if it can
651 * be determined) is the same.
652 */
653 rep.zprl_type = type;
654 rep.zprl_children = 0;
655
656 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
657 verify(nvlist_lookup_uint64(nv,
658 ZPOOL_CONFIG_NPARITY,
659 &rep.zprl_parity) == 0);
660 assert(rep.zprl_parity != 0);
661 } else {
662 rep.zprl_parity = 0;
663 }
664
665 /*
666 * The 'dontreport' variable indicates that we've
667 * already reported an error for this spec, so don't
668 * bother doing it again.
669 */
670 type = NULL;
671 dontreport = 0;
672 vdev_size = -1ULL;
673 for (c = 0; c < children; c++) {
674 boolean_t is_replacing, is_spare;
675 nvlist_t *cnv = child[c];
676 char *path;
677 struct stat64 statbuf;
678 uint64_t size = -1ULL;
679 char *childtype;
680 int fd, err;
681
682 rep.zprl_children++;
683
684 verify(nvlist_lookup_string(cnv,
685 ZPOOL_CONFIG_TYPE, &childtype) == 0);
686
687 /*
688 * If this is a replacing or spare vdev, then
689 * get the real first child of the vdev.
690 */
691 is_replacing = strcmp(childtype,
692 VDEV_TYPE_REPLACING) == 0;
693 is_spare = strcmp(childtype,
694 VDEV_TYPE_SPARE) == 0;
695 if (is_replacing || is_spare) {
696 nvlist_t **rchild;
697 uint_t rchildren;
698
699 verify(nvlist_lookup_nvlist_array(cnv,
700 ZPOOL_CONFIG_CHILDREN, &rchild,
701 &rchildren) == 0);
702 assert((is_replacing && rchildren == 2)
703 || (is_spare && rchildren >= 2));
704 cnv = rchild[0];
705
706 verify(nvlist_lookup_string(cnv,
707 ZPOOL_CONFIG_TYPE,
708 &childtype) == 0);
709 }
710
711 verify(nvlist_lookup_string(cnv,
712 ZPOOL_CONFIG_PATH, &path) == 0);
713
714 /*
715 * If we have a raidz/mirror that combines disks
716 * with files, report it as an error.
717 */
718 if (!dontreport && type != NULL &&
719 strcmp(type, childtype) != 0) {
720 if (ret != NULL)
721 free(ret);
722 ret = NULL;
723 if (fatal)
724 vdev_error(gettext(
725 "mismatched replication "
726 "level: %s contains both "
727 "files and devices\n"),
728 rep.zprl_type);
729 else
730 return (NULL);
731 dontreport = B_TRUE;
732 }
733
734 /*
735 * According to stat(2), the value of 'st_size'
736 * is undefined for block devices and character
737 * devices. But there is no effective way to
738 * determine the real size in userland.
739 *
740 * Instead, we'll take advantage of an
741 * implementation detail of spec_size(). If the
742 * device is currently open, then we (should)
743 * return a valid size.
744 *
745 * If we still don't get a valid size (indicated
746 * by a size of 0 or MAXOFFSET_T), then ignore
747 * this device altogether.
748 */
749 if ((fd = open(path, O_RDONLY)) >= 0) {
750 err = fstat64(fd, &statbuf);
751 (void) close(fd);
752 } else {
753 err = stat64(path, &statbuf);
754 }
755
756 if (err != 0 ||
757 statbuf.st_size == 0 ||
758 statbuf.st_size == MAXOFFSET_T)
759 continue;
760
761 size = statbuf.st_size;
762
763 /*
764 * Also make sure that devices and
765 * slices have a consistent size. If
766 * they differ by a significant amount
767 * (~16MB) then report an error.
768 */
769 if (!dontreport &&
770 (vdev_size != -1ULL &&
771 (labs(size - vdev_size) >
772 ZPOOL_FUZZ))) {
773 if (ret != NULL)
774 free(ret);
775 ret = NULL;
776 if (fatal)
777 vdev_error(gettext(
778 "%s contains devices of "
779 "different sizes\n"),
780 rep.zprl_type);
781 else
782 return (NULL);
783 dontreport = B_TRUE;
784 }
785
786 type = childtype;
787 vdev_size = size;
788 }
789 }
790
791 /*
792 * At this point, we have the replication of the last toplevel
793 * vdev in 'rep'. Compare it to 'lastrep' to see if its
794 * different.
795 */
796 if (lastrep.zprl_type != NULL) {
797 if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
798 if (ret != NULL)
799 free(ret);
800 ret = NULL;
801 if (fatal)
802 vdev_error(gettext(
803 "mismatched replication level: "
804 "both %s and %s vdevs are "
805 "present\n"),
806 lastrep.zprl_type, rep.zprl_type);
807 else
808 return (NULL);
809 } else if (lastrep.zprl_parity != rep.zprl_parity) {
810 if (ret)
811 free(ret);
812 ret = NULL;
813 if (fatal)
814 vdev_error(gettext(
815 "mismatched replication level: "
816 "both %llu and %llu device parity "
817 "%s vdevs are present\n"),
818 lastrep.zprl_parity,
819 rep.zprl_parity,
820 rep.zprl_type);
821 else
822 return (NULL);
823 } else if (lastrep.zprl_children != rep.zprl_children) {
824 if (ret)
825 free(ret);
826 ret = NULL;
827 if (fatal)
828 vdev_error(gettext(
829 "mismatched replication level: "
830 "both %llu-way and %llu-way %s "
831 "vdevs are present\n"),
832 lastrep.zprl_children,
833 rep.zprl_children,
834 rep.zprl_type);
835 else
836 return (NULL);
837 }
838 }
839 lastrep = rep;
840 }
841
842 if (ret != NULL)
843 *ret = rep;
844
845 return (ret);
846 }
847
848 /*
849 * Check the replication level of the vdev spec against the current pool. Calls
850 * get_replication() to make sure the new spec is self-consistent. If the pool
851 * has a consistent replication level, then we ignore any errors. Otherwise,
852 * report any difference between the two.
853 */
854 static int
check_replication(nvlist_t * config,nvlist_t * newroot)855 check_replication(nvlist_t *config, nvlist_t *newroot)
856 {
857 nvlist_t **child;
858 uint_t children;
859 replication_level_t *current = NULL, *new;
860 int ret;
861
862 /*
863 * If we have a current pool configuration, check to see if it's
864 * self-consistent. If not, simply return success.
865 */
866 if (config != NULL) {
867 nvlist_t *nvroot;
868
869 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
870 &nvroot) == 0);
871 if ((current = get_replication(nvroot, B_FALSE)) == NULL)
872 return (0);
873 }
874 /*
875 * for spares there may be no children, and therefore no
876 * replication level to check
877 */
878 if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
879 &child, &children) != 0) || (children == 0)) {
880 free(current);
881 return (0);
882 }
883
884 /*
885 * If all we have is logs then there's no replication level to check.
886 */
887 if (num_logs(newroot) == children) {
888 free(current);
889 return (0);
890 }
891
892 /*
893 * Get the replication level of the new vdev spec, reporting any
894 * inconsistencies found.
895 */
896 if ((new = get_replication(newroot, B_TRUE)) == NULL) {
897 free(current);
898 return (-1);
899 }
900
901 /*
902 * Check to see if the new vdev spec matches the replication level of
903 * the current pool.
904 */
905 ret = 0;
906 if (current != NULL) {
907 if (strcmp(current->zprl_type, new->zprl_type) != 0) {
908 vdev_error(gettext(
909 "mismatched replication level: pool uses %s "
910 "and new vdev is %s\n"),
911 current->zprl_type, new->zprl_type);
912 ret = -1;
913 } else if (current->zprl_parity != new->zprl_parity) {
914 vdev_error(gettext(
915 "mismatched replication level: pool uses %llu "
916 "device parity and new vdev uses %llu\n"),
917 current->zprl_parity, new->zprl_parity);
918 ret = -1;
919 } else if (current->zprl_children != new->zprl_children) {
920 vdev_error(gettext(
921 "mismatched replication level: pool uses %llu-way "
922 "%s and new vdev uses %llu-way %s\n"),
923 current->zprl_children, current->zprl_type,
924 new->zprl_children, new->zprl_type);
925 ret = -1;
926 }
927 }
928
929 free(new);
930 if (current != NULL)
931 free(current);
932
933 return (ret);
934 }
935
936 #ifdef illumos
937 /*
938 * Go through and find any whole disks in the vdev specification, labelling them
939 * as appropriate. When constructing the vdev spec, we were unable to open this
940 * device in order to provide a devid. Now that we have labelled the disk and
941 * know that slice 0 is valid, we can construct the devid now.
942 *
943 * If the disk was already labeled with an EFI label, we will have gotten the
944 * devid already (because we were able to open the whole disk). Otherwise, we
945 * need to get the devid after we label the disk.
946 */
947 static int
make_disks(zpool_handle_t * zhp,nvlist_t * nv)948 make_disks(zpool_handle_t *zhp, nvlist_t *nv)
949 {
950 nvlist_t **child;
951 uint_t c, children;
952 char *type, *path, *diskname;
953 char buf[MAXPATHLEN];
954 uint64_t wholedisk;
955 int fd;
956 int ret;
957 ddi_devid_t devid;
958 char *minor = NULL, *devid_str = NULL;
959
960 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
961
962 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
963 &child, &children) != 0) {
964
965 if (strcmp(type, VDEV_TYPE_DISK) != 0)
966 return (0);
967
968 /*
969 * We have a disk device. Get the path to the device
970 * and see if it's a whole disk by appending the backup
971 * slice and stat()ing the device.
972 */
973 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
974 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
975 &wholedisk) != 0 || !wholedisk)
976 return (0);
977
978 diskname = strrchr(path, '/');
979 assert(diskname != NULL);
980 diskname++;
981 if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
982 return (-1);
983
984 /*
985 * Fill in the devid, now that we've labeled the disk.
986 */
987 (void) snprintf(buf, sizeof (buf), "%ss0", path);
988 if ((fd = open(buf, O_RDONLY)) < 0) {
989 (void) fprintf(stderr,
990 gettext("cannot open '%s': %s\n"),
991 buf, strerror(errno));
992 return (-1);
993 }
994
995 if (devid_get(fd, &devid) == 0) {
996 if (devid_get_minor_name(fd, &minor) == 0 &&
997 (devid_str = devid_str_encode(devid, minor)) !=
998 NULL) {
999 verify(nvlist_add_string(nv,
1000 ZPOOL_CONFIG_DEVID, devid_str) == 0);
1001 }
1002 if (devid_str != NULL)
1003 devid_str_free(devid_str);
1004 if (minor != NULL)
1005 devid_str_free(minor);
1006 devid_free(devid);
1007 }
1008
1009 /*
1010 * Update the path to refer to the 's0' slice. The presence of
1011 * the 'whole_disk' field indicates to the CLI that we should
1012 * chop off the slice number when displaying the device in
1013 * future output.
1014 */
1015 verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
1016
1017 (void) close(fd);
1018
1019 return (0);
1020 }
1021
1022 for (c = 0; c < children; c++)
1023 if ((ret = make_disks(zhp, child[c])) != 0)
1024 return (ret);
1025
1026 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1027 &child, &children) == 0)
1028 for (c = 0; c < children; c++)
1029 if ((ret = make_disks(zhp, child[c])) != 0)
1030 return (ret);
1031
1032 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1033 &child, &children) == 0)
1034 for (c = 0; c < children; c++)
1035 if ((ret = make_disks(zhp, child[c])) != 0)
1036 return (ret);
1037
1038 return (0);
1039 }
1040 #endif /* illumos */
1041
1042 /*
1043 * Determine if the given path is a hot spare within the given configuration.
1044 */
1045 static boolean_t
is_spare(nvlist_t * config,const char * path)1046 is_spare(nvlist_t *config, const char *path)
1047 {
1048 int fd;
1049 pool_state_t state;
1050 char *name = NULL;
1051 nvlist_t *label;
1052 uint64_t guid, spareguid;
1053 nvlist_t *nvroot;
1054 nvlist_t **spares;
1055 uint_t i, nspares;
1056 boolean_t inuse;
1057
1058 if ((fd = open(path, O_RDONLY)) < 0)
1059 return (B_FALSE);
1060
1061 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
1062 !inuse ||
1063 state != POOL_STATE_SPARE ||
1064 zpool_read_label(fd, &label) != 0) {
1065 free(name);
1066 (void) close(fd);
1067 return (B_FALSE);
1068 }
1069 free(name);
1070 (void) close(fd);
1071
1072 verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
1073 nvlist_free(label);
1074
1075 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
1076 &nvroot) == 0);
1077 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1078 &spares, &nspares) == 0) {
1079 for (i = 0; i < nspares; i++) {
1080 verify(nvlist_lookup_uint64(spares[i],
1081 ZPOOL_CONFIG_GUID, &spareguid) == 0);
1082 if (spareguid == guid)
1083 return (B_TRUE);
1084 }
1085 }
1086
1087 return (B_FALSE);
1088 }
1089
1090 /*
1091 * Go through and find any devices that are in use. We rely on libdiskmgt for
1092 * the majority of this task.
1093 */
1094 static boolean_t
is_device_in_use(nvlist_t * config,nvlist_t * nv,boolean_t force,boolean_t replacing,boolean_t isspare)1095 is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
1096 boolean_t replacing, boolean_t isspare)
1097 {
1098 nvlist_t **child;
1099 uint_t c, children;
1100 char *type, *path;
1101 int ret = 0;
1102 char buf[MAXPATHLEN];
1103 uint64_t wholedisk;
1104 boolean_t anyinuse = B_FALSE;
1105
1106 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
1107
1108 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1109 &child, &children) != 0) {
1110
1111 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
1112
1113 /*
1114 * As a generic check, we look to see if this is a replace of a
1115 * hot spare within the same pool. If so, we allow it
1116 * regardless of what libdiskmgt or zpool_in_use() says.
1117 */
1118 if (replacing) {
1119 #ifdef illumos
1120 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1121 &wholedisk) == 0 && wholedisk)
1122 (void) snprintf(buf, sizeof (buf), "%ss0",
1123 path);
1124 else
1125 #endif
1126 (void) strlcpy(buf, path, sizeof (buf));
1127
1128 if (is_spare(config, buf))
1129 return (B_FALSE);
1130 }
1131
1132 if (strcmp(type, VDEV_TYPE_DISK) == 0)
1133 ret = check_device(path, force, isspare);
1134 else if (strcmp(type, VDEV_TYPE_FILE) == 0)
1135 ret = check_file(path, force, isspare);
1136
1137 return (ret != 0);
1138 }
1139
1140 for (c = 0; c < children; c++)
1141 if (is_device_in_use(config, child[c], force, replacing,
1142 B_FALSE))
1143 anyinuse = B_TRUE;
1144
1145 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1146 &child, &children) == 0)
1147 for (c = 0; c < children; c++)
1148 if (is_device_in_use(config, child[c], force, replacing,
1149 B_TRUE))
1150 anyinuse = B_TRUE;
1151
1152 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1153 &child, &children) == 0)
1154 for (c = 0; c < children; c++)
1155 if (is_device_in_use(config, child[c], force, replacing,
1156 B_FALSE))
1157 anyinuse = B_TRUE;
1158
1159 return (anyinuse);
1160 }
1161
1162 static const char *
is_grouping(const char * type,int * mindev,int * maxdev)1163 is_grouping(const char *type, int *mindev, int *maxdev)
1164 {
1165 if (strncmp(type, "raidz", 5) == 0) {
1166 const char *p = type + 5;
1167 char *end;
1168 long nparity;
1169
1170 if (*p == '\0') {
1171 nparity = 1;
1172 } else if (*p == '0') {
1173 return (NULL); /* no zero prefixes allowed */
1174 } else {
1175 errno = 0;
1176 nparity = strtol(p, &end, 10);
1177 if (errno != 0 || nparity < 1 || nparity >= 255 ||
1178 *end != '\0')
1179 return (NULL);
1180 }
1181
1182 if (mindev != NULL)
1183 *mindev = nparity + 1;
1184 if (maxdev != NULL)
1185 *maxdev = 255;
1186 return (VDEV_TYPE_RAIDZ);
1187 }
1188
1189 if (maxdev != NULL)
1190 *maxdev = INT_MAX;
1191
1192 if (strcmp(type, "mirror") == 0) {
1193 if (mindev != NULL)
1194 *mindev = 2;
1195 return (VDEV_TYPE_MIRROR);
1196 }
1197
1198 if (strcmp(type, "spare") == 0) {
1199 if (mindev != NULL)
1200 *mindev = 1;
1201 return (VDEV_TYPE_SPARE);
1202 }
1203
1204 if (strcmp(type, "log") == 0) {
1205 if (mindev != NULL)
1206 *mindev = 1;
1207 return (VDEV_TYPE_LOG);
1208 }
1209
1210 if (strcmp(type, "cache") == 0) {
1211 if (mindev != NULL)
1212 *mindev = 1;
1213 return (VDEV_TYPE_L2CACHE);
1214 }
1215
1216 return (NULL);
1217 }
1218
1219 /*
1220 * Construct a syntactically valid vdev specification,
1221 * and ensure that all devices and files exist and can be opened.
1222 * Note: we don't bother freeing anything in the error paths
1223 * because the program is just going to exit anyway.
1224 */
1225 nvlist_t *
construct_spec(int argc,char ** argv)1226 construct_spec(int argc, char **argv)
1227 {
1228 nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
1229 int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
1230 const char *type;
1231 uint64_t is_log;
1232 boolean_t seen_logs;
1233
1234 top = NULL;
1235 toplevels = 0;
1236 spares = NULL;
1237 l2cache = NULL;
1238 nspares = 0;
1239 nlogs = 0;
1240 nl2cache = 0;
1241 is_log = B_FALSE;
1242 seen_logs = B_FALSE;
1243
1244 while (argc > 0) {
1245 nv = NULL;
1246
1247 /*
1248 * If it's a mirror or raidz, the subsequent arguments are
1249 * its leaves -- until we encounter the next mirror or raidz.
1250 */
1251 if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
1252 nvlist_t **child = NULL;
1253 int c, children = 0;
1254
1255 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1256 if (spares != NULL) {
1257 (void) fprintf(stderr,
1258 gettext("invalid vdev "
1259 "specification: 'spare' can be "
1260 "specified only once\n"));
1261 return (NULL);
1262 }
1263 is_log = B_FALSE;
1264 }
1265
1266 if (strcmp(type, VDEV_TYPE_LOG) == 0) {
1267 if (seen_logs) {
1268 (void) fprintf(stderr,
1269 gettext("invalid vdev "
1270 "specification: 'log' can be "
1271 "specified only once\n"));
1272 return (NULL);
1273 }
1274 seen_logs = B_TRUE;
1275 is_log = B_TRUE;
1276 argc--;
1277 argv++;
1278 /*
1279 * A log is not a real grouping device.
1280 * We just set is_log and continue.
1281 */
1282 continue;
1283 }
1284
1285 if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1286 if (l2cache != NULL) {
1287 (void) fprintf(stderr,
1288 gettext("invalid vdev "
1289 "specification: 'cache' can be "
1290 "specified only once\n"));
1291 return (NULL);
1292 }
1293 is_log = B_FALSE;
1294 }
1295
1296 if (is_log) {
1297 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
1298 (void) fprintf(stderr,
1299 gettext("invalid vdev "
1300 "specification: unsupported 'log' "
1301 "device: %s\n"), type);
1302 return (NULL);
1303 }
1304 nlogs++;
1305 }
1306
1307 for (c = 1; c < argc; c++) {
1308 if (is_grouping(argv[c], NULL, NULL) != NULL)
1309 break;
1310 children++;
1311 child = realloc(child,
1312 children * sizeof (nvlist_t *));
1313 if (child == NULL)
1314 zpool_no_memory();
1315 if ((nv = make_leaf_vdev(argv[c], B_FALSE))
1316 == NULL)
1317 return (NULL);
1318 child[children - 1] = nv;
1319 }
1320
1321 if (children < mindev) {
1322 (void) fprintf(stderr, gettext("invalid vdev "
1323 "specification: %s requires at least %d "
1324 "devices\n"), argv[0], mindev);
1325 return (NULL);
1326 }
1327
1328 if (children > maxdev) {
1329 (void) fprintf(stderr, gettext("invalid vdev "
1330 "specification: %s supports no more than "
1331 "%d devices\n"), argv[0], maxdev);
1332 return (NULL);
1333 }
1334
1335 argc -= c;
1336 argv += c;
1337
1338 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1339 spares = child;
1340 nspares = children;
1341 continue;
1342 } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1343 l2cache = child;
1344 nl2cache = children;
1345 continue;
1346 } else {
1347 verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
1348 0) == 0);
1349 verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
1350 type) == 0);
1351 verify(nvlist_add_uint64(nv,
1352 ZPOOL_CONFIG_IS_LOG, is_log) == 0);
1353 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
1354 verify(nvlist_add_uint64(nv,
1355 ZPOOL_CONFIG_NPARITY,
1356 mindev - 1) == 0);
1357 }
1358 verify(nvlist_add_nvlist_array(nv,
1359 ZPOOL_CONFIG_CHILDREN, child,
1360 children) == 0);
1361
1362 for (c = 0; c < children; c++)
1363 nvlist_free(child[c]);
1364 free(child);
1365 }
1366 } else {
1367 /*
1368 * We have a device. Pass off to make_leaf_vdev() to
1369 * construct the appropriate nvlist describing the vdev.
1370 */
1371 if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
1372 return (NULL);
1373 if (is_log)
1374 nlogs++;
1375 argc--;
1376 argv++;
1377 }
1378
1379 toplevels++;
1380 top = realloc(top, toplevels * sizeof (nvlist_t *));
1381 if (top == NULL)
1382 zpool_no_memory();
1383 top[toplevels - 1] = nv;
1384 }
1385
1386 if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
1387 (void) fprintf(stderr, gettext("invalid vdev "
1388 "specification: at least one toplevel vdev must be "
1389 "specified\n"));
1390 return (NULL);
1391 }
1392
1393 if (seen_logs && nlogs == 0) {
1394 (void) fprintf(stderr, gettext("invalid vdev specification: "
1395 "log requires at least 1 device\n"));
1396 return (NULL);
1397 }
1398
1399 /*
1400 * Finally, create nvroot and add all top-level vdevs to it.
1401 */
1402 verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
1403 verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
1404 VDEV_TYPE_ROOT) == 0);
1405 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1406 top, toplevels) == 0);
1407 if (nspares != 0)
1408 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1409 spares, nspares) == 0);
1410 if (nl2cache != 0)
1411 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1412 l2cache, nl2cache) == 0);
1413
1414 for (t = 0; t < toplevels; t++)
1415 nvlist_free(top[t]);
1416 for (t = 0; t < nspares; t++)
1417 nvlist_free(spares[t]);
1418 for (t = 0; t < nl2cache; t++)
1419 nvlist_free(l2cache[t]);
1420 if (spares)
1421 free(spares);
1422 if (l2cache)
1423 free(l2cache);
1424 free(top);
1425
1426 return (nvroot);
1427 }
1428
1429 nvlist_t *
split_mirror_vdev(zpool_handle_t * zhp,char * newname,nvlist_t * props,splitflags_t flags,int argc,char ** argv)1430 split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
1431 splitflags_t flags, int argc, char **argv)
1432 {
1433 nvlist_t *newroot = NULL, **child;
1434 uint_t c, children;
1435
1436 if (argc > 0) {
1437 if ((newroot = construct_spec(argc, argv)) == NULL) {
1438 (void) fprintf(stderr, gettext("Unable to build a "
1439 "pool from the specified devices\n"));
1440 return (NULL);
1441 }
1442
1443 #ifdef illumos
1444 if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
1445 nvlist_free(newroot);
1446 return (NULL);
1447 }
1448 #endif
1449
1450 /* avoid any tricks in the spec */
1451 verify(nvlist_lookup_nvlist_array(newroot,
1452 ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
1453 for (c = 0; c < children; c++) {
1454 char *path;
1455 const char *type;
1456 int min, max;
1457
1458 verify(nvlist_lookup_string(child[c],
1459 ZPOOL_CONFIG_PATH, &path) == 0);
1460 if ((type = is_grouping(path, &min, &max)) != NULL) {
1461 (void) fprintf(stderr, gettext("Cannot use "
1462 "'%s' as a device for splitting\n"), type);
1463 nvlist_free(newroot);
1464 return (NULL);
1465 }
1466 }
1467 }
1468
1469 if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
1470 nvlist_free(newroot);
1471 return (NULL);
1472 }
1473
1474 return (newroot);
1475 }
1476
1477 /*
1478 * Get and validate the contents of the given vdev specification. This ensures
1479 * that the nvlist returned is well-formed, that all the devices exist, and that
1480 * they are not currently in use by any other known consumer. The 'poolconfig'
1481 * parameter is the current configuration of the pool when adding devices
1482 * existing pool, and is used to perform additional checks, such as changing the
1483 * replication level of the pool. It can be 'NULL' to indicate that this is a
1484 * new pool. The 'force' flag controls whether devices should be forcefully
1485 * added, even if they appear in use.
1486 */
1487 nvlist_t *
make_root_vdev(zpool_handle_t * zhp,int force,int check_rep,boolean_t replacing,boolean_t dryrun,int argc,char ** argv)1488 make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
1489 boolean_t replacing, boolean_t dryrun, int argc, char **argv)
1490 {
1491 nvlist_t *newroot;
1492 nvlist_t *poolconfig = NULL;
1493 is_force = force;
1494
1495 /*
1496 * Construct the vdev specification. If this is successful, we know
1497 * that we have a valid specification, and that all devices can be
1498 * opened.
1499 */
1500 if ((newroot = construct_spec(argc, argv)) == NULL)
1501 return (NULL);
1502
1503 if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
1504 return (NULL);
1505
1506 /*
1507 * Validate each device to make sure that its not shared with another
1508 * subsystem. We do this even if 'force' is set, because there are some
1509 * uses (such as a dedicated dump device) that even '-f' cannot
1510 * override.
1511 */
1512 if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
1513 nvlist_free(newroot);
1514 return (NULL);
1515 }
1516
1517 /*
1518 * Check the replication level of the given vdevs and report any errors
1519 * found. We include the existing pool spec, if any, as we need to
1520 * catch changes against the existing replication level.
1521 */
1522 if (check_rep && check_replication(poolconfig, newroot) != 0) {
1523 nvlist_free(newroot);
1524 return (NULL);
1525 }
1526
1527 #ifdef illumos
1528 /*
1529 * Run through the vdev specification and label any whole disks found.
1530 */
1531 if (!dryrun && make_disks(zhp, newroot) != 0) {
1532 nvlist_free(newroot);
1533 return (NULL);
1534 }
1535 #endif
1536
1537 return (newroot);
1538 }
1539