1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> 27 * Copyright (c) 2018 Datto Inc. 28 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 29 * Copyright (c) 2017, Intel Corporation. 30 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> 31 */ 32 33 #include <errno.h> 34 #include <libintl.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <strings.h> 38 #include <unistd.h> 39 #include <libgen.h> 40 #include <zone.h> 41 #include <sys/stat.h> 42 #include <sys/efi_partition.h> 43 #include <sys/systeminfo.h> 44 #include <sys/vtoc.h> 45 #include <sys/zfs_ioctl.h> 46 #include <sys/vdev_disk.h> 47 #include <dlfcn.h> 48 #include <libzutil.h> 49 50 #include "zfs_namecheck.h" 51 #include "zfs_prop.h" 52 #include "libzfs_impl.h" 53 #include "zfs_comutil.h" 54 #include "zfeature_common.h" 55 56 /* 57 * If the device has being dynamically expanded then we need to relabel 58 * the disk to use the new unallocated space. 59 */ 60 int 61 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg) 62 { 63 int fd, error; 64 65 if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) { 66 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 67 "relabel '%s': unable to open device: %d"), path, errno); 68 return (zfs_error(hdl, EZFS_OPENFAILED, msg)); 69 } 70 71 /* 72 * It's possible that we might encounter an error if the device 73 * does not have any unallocated space left. If so, we simply 74 * ignore that error and continue on. 75 */ 76 error = efi_use_whole_disk(fd); 77 78 /* Flush the buffers to disk and invalidate the page cache. */ 79 (void) fsync(fd); 80 (void) ioctl(fd, BLKFLSBUF); 81 82 (void) close(fd); 83 if (error && error != VT_ENOSPC) { 84 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 85 "relabel '%s': unable to read disk capacity"), path); 86 return (zfs_error(hdl, EZFS_NOCAP, msg)); 87 } 88 return (0); 89 } 90 91 /* 92 * Read the EFI label from the config, if a label does not exist then 93 * pass back the error to the caller. If the caller has passed a non-NULL 94 * diskaddr argument then we set it to the starting address of the EFI 95 * partition. 96 */ 97 static int 98 read_efi_label(nvlist_t *config, diskaddr_t *sb) 99 { 100 char *path; 101 int fd; 102 char diskname[MAXPATHLEN]; 103 int err = -1; 104 105 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0) 106 return (err); 107 108 (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT, 109 strrchr(path, '/')); 110 if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) { 111 struct dk_gpt *vtoc; 112 113 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { 114 if (sb != NULL) 115 *sb = vtoc->efi_parts[0].p_start; 116 efi_free(vtoc); 117 } 118 (void) close(fd); 119 } 120 return (err); 121 } 122 123 /* 124 * determine where a partition starts on a disk in the current 125 * configuration 126 */ 127 static diskaddr_t 128 find_start_block(nvlist_t *config) 129 { 130 nvlist_t **child; 131 uint_t c, children; 132 diskaddr_t sb = MAXOFFSET_T; 133 uint64_t wholedisk; 134 135 if (nvlist_lookup_nvlist_array(config, 136 ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { 137 if (nvlist_lookup_uint64(config, 138 ZPOOL_CONFIG_WHOLE_DISK, 139 &wholedisk) != 0 || !wholedisk) { 140 return (MAXOFFSET_T); 141 } 142 if (read_efi_label(config, &sb) < 0) 143 sb = MAXOFFSET_T; 144 return (sb); 145 } 146 147 for (c = 0; c < children; c++) { 148 sb = find_start_block(child[c]); 149 if (sb != MAXOFFSET_T) { 150 return (sb); 151 } 152 } 153 return (MAXOFFSET_T); 154 } 155 156 static int 157 zpool_label_disk_check(char *path) 158 { 159 struct dk_gpt *vtoc; 160 int fd, err; 161 162 if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0) 163 return (errno); 164 165 if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) { 166 (void) close(fd); 167 return (err); 168 } 169 170 if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { 171 efi_free(vtoc); 172 (void) close(fd); 173 return (EIDRM); 174 } 175 176 efi_free(vtoc); 177 (void) close(fd); 178 return (0); 179 } 180 181 /* 182 * Generate a unique partition name for the ZFS member. Partitions must 183 * have unique names to ensure udev will be able to create symlinks under 184 * /dev/disk/by-partlabel/ for all pool members. The partition names are 185 * of the form <pool>-<unique-id>. 186 */ 187 static void 188 zpool_label_name(char *label_name, int label_size) 189 { 190 uint64_t id = 0; 191 int fd; 192 193 fd = open("/dev/urandom", O_RDONLY); 194 if (fd >= 0) { 195 if (read(fd, &id, sizeof (id)) != sizeof (id)) 196 id = 0; 197 198 close(fd); 199 } 200 201 if (id == 0) 202 id = (((uint64_t)rand()) << 32) | (uint64_t)rand(); 203 204 snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id); 205 } 206 207 /* 208 * Label an individual disk. The name provided is the short name, 209 * stripped of any leading /dev path. 210 */ 211 int 212 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) 213 { 214 char path[MAXPATHLEN]; 215 struct dk_gpt *vtoc; 216 int rval, fd; 217 size_t resv = EFI_MIN_RESV_SIZE; 218 uint64_t slice_size; 219 diskaddr_t start_block; 220 char errbuf[1024]; 221 222 /* prepare an error message just in case */ 223 (void) snprintf(errbuf, sizeof (errbuf), 224 dgettext(TEXT_DOMAIN, "cannot label '%s'"), name); 225 226 if (zhp) { 227 nvlist_t *nvroot; 228 229 verify(nvlist_lookup_nvlist(zhp->zpool_config, 230 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 231 232 if (zhp->zpool_start_block == 0) 233 start_block = find_start_block(nvroot); 234 else 235 start_block = zhp->zpool_start_block; 236 zhp->zpool_start_block = start_block; 237 } else { 238 /* new pool */ 239 start_block = NEW_START_BLOCK; 240 } 241 242 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); 243 244 if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) { 245 /* 246 * This shouldn't happen. We've long since verified that this 247 * is a valid device. 248 */ 249 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 250 "label '%s': unable to open device: %d"), path, errno); 251 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); 252 } 253 254 if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) { 255 /* 256 * The only way this can fail is if we run out of memory, or we 257 * were unable to read the disk's capacity 258 */ 259 if (errno == ENOMEM) 260 (void) no_memory(hdl); 261 262 (void) close(fd); 263 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 264 "label '%s': unable to read disk capacity"), path); 265 266 return (zfs_error(hdl, EZFS_NOCAP, errbuf)); 267 } 268 269 slice_size = vtoc->efi_last_u_lba + 1; 270 slice_size -= EFI_MIN_RESV_SIZE; 271 if (start_block == MAXOFFSET_T) 272 start_block = NEW_START_BLOCK; 273 slice_size -= start_block; 274 slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT); 275 276 vtoc->efi_parts[0].p_start = start_block; 277 vtoc->efi_parts[0].p_size = slice_size; 278 279 /* 280 * Why we use V_USR: V_BACKUP confuses users, and is considered 281 * disposable by some EFI utilities (since EFI doesn't have a backup 282 * slice). V_UNASSIGNED is supposed to be used only for zero size 283 * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT, 284 * etc. were all pretty specific. V_USR is as close to reality as we 285 * can get, in the absence of V_OTHER. 286 */ 287 vtoc->efi_parts[0].p_tag = V_USR; 288 zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN); 289 290 vtoc->efi_parts[8].p_start = slice_size + start_block; 291 vtoc->efi_parts[8].p_size = resv; 292 vtoc->efi_parts[8].p_tag = V_RESERVED; 293 294 rval = efi_write(fd, vtoc); 295 296 /* Flush the buffers to disk and invalidate the page cache. */ 297 (void) fsync(fd); 298 (void) ioctl(fd, BLKFLSBUF); 299 300 if (rval == 0) 301 rval = efi_rescan(fd); 302 303 /* 304 * Some block drivers (like pcata) may not support EFI GPT labels. 305 * Print out a helpful error message directing the user to manually 306 * label the disk and give a specific slice. 307 */ 308 if (rval != 0) { 309 (void) close(fd); 310 efi_free(vtoc); 311 312 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using " 313 "parted(8) and then provide a specific slice: %d"), rval); 314 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); 315 } 316 317 (void) close(fd); 318 efi_free(vtoc); 319 320 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); 321 (void) zfs_append_partition(path, MAXPATHLEN); 322 323 /* Wait to udev to signal use the device has settled. */ 324 rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT); 325 if (rval) { 326 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to " 327 "detect device partitions on '%s': %d"), path, rval); 328 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); 329 } 330 331 /* We can't be to paranoid. Read the label back and verify it. */ 332 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); 333 rval = zpool_label_disk_check(path); 334 if (rval) { 335 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written " 336 "EFI label on '%s' is damaged. Ensure\nthis device " 337 "is not in use, and is functioning properly: %d"), 338 path, rval); 339 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); 340 } 341 return (0); 342 } 343