1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> 27 * Copyright (c) 2018 Datto Inc. 28 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 29 * Copyright (c) 2017, Intel Corporation. 30 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> 31 */ 32 33 #include <errno.h> 34 #include <libintl.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <strings.h> 38 #include <unistd.h> 39 #include <libgen.h> 40 #include <zone.h> 41 #include <sys/stat.h> 42 #include <sys/efi_partition.h> 43 #include <sys/systeminfo.h> 44 #include <sys/vtoc.h> 45 #include <sys/zfs_ioctl.h> 46 #include <sys/vdev_disk.h> 47 #include <dlfcn.h> 48 #include <libzutil.h> 49 50 #include "zfs_namecheck.h" 51 #include "zfs_prop.h" 52 #include "libzfs_impl.h" 53 #include "zfs_comutil.h" 54 #include "zfeature_common.h" 55 56 /* 57 * If the device has being dynamically expanded then we need to relabel 58 * the disk to use the new unallocated space. 59 */ 60 int 61 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg) 62 { 63 int fd, error; 64 65 if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) { 66 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 67 "relabel '%s': unable to open device: %d"), path, errno); 68 return (zfs_error(hdl, EZFS_OPENFAILED, msg)); 69 } 70 71 /* 72 * It's possible that we might encounter an error if the device 73 * does not have any unallocated space left. If so, we simply 74 * ignore that error and continue on. 75 * 76 * Also, we don't call efi_rescan() - that would just return EBUSY. 77 * The module will do it for us in vdev_disk_open(). 78 */ 79 error = efi_use_whole_disk(fd); 80 81 /* Flush the buffers to disk and invalidate the page cache. */ 82 (void) fsync(fd); 83 (void) ioctl(fd, BLKFLSBUF); 84 85 (void) close(fd); 86 if (error && error != VT_ENOSPC) { 87 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 88 "relabel '%s': unable to read disk capacity"), path); 89 return (zfs_error(hdl, EZFS_NOCAP, msg)); 90 } 91 return (0); 92 } 93 94 /* 95 * Read the EFI label from the config, if a label does not exist then 96 * pass back the error to the caller. If the caller has passed a non-NULL 97 * diskaddr argument then we set it to the starting address of the EFI 98 * partition. 99 */ 100 static int 101 read_efi_label(nvlist_t *config, diskaddr_t *sb) 102 { 103 char *path; 104 int fd; 105 char diskname[MAXPATHLEN]; 106 int err = -1; 107 108 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0) 109 return (err); 110 111 (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT, 112 strrchr(path, '/')); 113 if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) { 114 struct dk_gpt *vtoc; 115 116 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { 117 if (sb != NULL) 118 *sb = vtoc->efi_parts[0].p_start; 119 efi_free(vtoc); 120 } 121 (void) close(fd); 122 } 123 return (err); 124 } 125 126 /* 127 * determine where a partition starts on a disk in the current 128 * configuration 129 */ 130 static diskaddr_t 131 find_start_block(nvlist_t *config) 132 { 133 nvlist_t **child; 134 uint_t c, children; 135 diskaddr_t sb = MAXOFFSET_T; 136 uint64_t wholedisk; 137 138 if (nvlist_lookup_nvlist_array(config, 139 ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { 140 if (nvlist_lookup_uint64(config, 141 ZPOOL_CONFIG_WHOLE_DISK, 142 &wholedisk) != 0 || !wholedisk) { 143 return (MAXOFFSET_T); 144 } 145 if (read_efi_label(config, &sb) < 0) 146 sb = MAXOFFSET_T; 147 return (sb); 148 } 149 150 for (c = 0; c < children; c++) { 151 sb = find_start_block(child[c]); 152 if (sb != MAXOFFSET_T) { 153 return (sb); 154 } 155 } 156 return (MAXOFFSET_T); 157 } 158 159 static int 160 zpool_label_disk_check(char *path) 161 { 162 struct dk_gpt *vtoc; 163 int fd, err; 164 165 if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0) 166 return (errno); 167 168 if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) { 169 (void) close(fd); 170 return (err); 171 } 172 173 if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { 174 efi_free(vtoc); 175 (void) close(fd); 176 return (EIDRM); 177 } 178 179 efi_free(vtoc); 180 (void) close(fd); 181 return (0); 182 } 183 184 /* 185 * Generate a unique partition name for the ZFS member. Partitions must 186 * have unique names to ensure udev will be able to create symlinks under 187 * /dev/disk/by-partlabel/ for all pool members. The partition names are 188 * of the form <pool>-<unique-id>. 189 */ 190 static void 191 zpool_label_name(char *label_name, int label_size) 192 { 193 uint64_t id = 0; 194 int fd; 195 196 fd = open("/dev/urandom", O_RDONLY); 197 if (fd >= 0) { 198 if (read(fd, &id, sizeof (id)) != sizeof (id)) 199 id = 0; 200 201 close(fd); 202 } 203 204 if (id == 0) 205 id = (((uint64_t)rand()) << 32) | (uint64_t)rand(); 206 207 snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id); 208 } 209 210 /* 211 * Label an individual disk. The name provided is the short name, 212 * stripped of any leading /dev path. 213 */ 214 int 215 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) 216 { 217 char path[MAXPATHLEN]; 218 struct dk_gpt *vtoc; 219 int rval, fd; 220 size_t resv = EFI_MIN_RESV_SIZE; 221 uint64_t slice_size; 222 diskaddr_t start_block; 223 char errbuf[1024]; 224 225 /* prepare an error message just in case */ 226 (void) snprintf(errbuf, sizeof (errbuf), 227 dgettext(TEXT_DOMAIN, "cannot label '%s'"), name); 228 229 if (zhp) { 230 nvlist_t *nvroot; 231 232 verify(nvlist_lookup_nvlist(zhp->zpool_config, 233 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 234 235 if (zhp->zpool_start_block == 0) 236 start_block = find_start_block(nvroot); 237 else 238 start_block = zhp->zpool_start_block; 239 zhp->zpool_start_block = start_block; 240 } else { 241 /* new pool */ 242 start_block = NEW_START_BLOCK; 243 } 244 245 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); 246 247 if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) { 248 /* 249 * This shouldn't happen. We've long since verified that this 250 * is a valid device. 251 */ 252 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 253 "label '%s': unable to open device: %d"), path, errno); 254 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); 255 } 256 257 if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) { 258 /* 259 * The only way this can fail is if we run out of memory, or we 260 * were unable to read the disk's capacity 261 */ 262 if (errno == ENOMEM) 263 (void) no_memory(hdl); 264 265 (void) close(fd); 266 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " 267 "label '%s': unable to read disk capacity"), path); 268 269 return (zfs_error(hdl, EZFS_NOCAP, errbuf)); 270 } 271 272 slice_size = vtoc->efi_last_u_lba + 1; 273 slice_size -= EFI_MIN_RESV_SIZE; 274 if (start_block == MAXOFFSET_T) 275 start_block = NEW_START_BLOCK; 276 slice_size -= start_block; 277 slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT); 278 279 vtoc->efi_parts[0].p_start = start_block; 280 vtoc->efi_parts[0].p_size = slice_size; 281 282 /* 283 * Why we use V_USR: V_BACKUP confuses users, and is considered 284 * disposable by some EFI utilities (since EFI doesn't have a backup 285 * slice). V_UNASSIGNED is supposed to be used only for zero size 286 * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT, 287 * etc. were all pretty specific. V_USR is as close to reality as we 288 * can get, in the absence of V_OTHER. 289 */ 290 vtoc->efi_parts[0].p_tag = V_USR; 291 zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN); 292 293 vtoc->efi_parts[8].p_start = slice_size + start_block; 294 vtoc->efi_parts[8].p_size = resv; 295 vtoc->efi_parts[8].p_tag = V_RESERVED; 296 297 rval = efi_write(fd, vtoc); 298 299 /* Flush the buffers to disk and invalidate the page cache. */ 300 (void) fsync(fd); 301 (void) ioctl(fd, BLKFLSBUF); 302 303 if (rval == 0) 304 rval = efi_rescan(fd); 305 306 /* 307 * Some block drivers (like pcata) may not support EFI GPT labels. 308 * Print out a helpful error message directing the user to manually 309 * label the disk and give a specific slice. 310 */ 311 if (rval != 0) { 312 (void) close(fd); 313 efi_free(vtoc); 314 315 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using " 316 "parted(8) and then provide a specific slice: %d"), rval); 317 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); 318 } 319 320 (void) close(fd); 321 efi_free(vtoc); 322 323 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); 324 (void) zfs_append_partition(path, MAXPATHLEN); 325 326 /* Wait to udev to signal use the device has settled. */ 327 rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT); 328 if (rval) { 329 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to " 330 "detect device partitions on '%s': %d"), path, rval); 331 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); 332 } 333 334 /* We can't be to paranoid. Read the label back and verify it. */ 335 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); 336 rval = zpool_label_disk_check(path); 337 if (rval) { 338 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written " 339 "EFI label on '%s' is damaged. Ensure\nthis device " 340 "is not in use, and is functioning properly: %d"), 341 path, rval); 342 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); 343 } 344 return (0); 345 } 346