1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright (c) 2018 Datto Inc.
28  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
29  * Copyright (c) 2017, Intel Corporation.
30  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
31  */
32 
33 #include <errno.h>
34 #include <libintl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <libgen.h>
40 #include <zone.h>
41 #include <sys/stat.h>
42 #include <sys/efi_partition.h>
43 #include <sys/systeminfo.h>
44 #include <sys/vtoc.h>
45 #include <sys/zfs_ioctl.h>
46 #include <sys/vdev_disk.h>
47 #include <dlfcn.h>
48 #include <libzutil.h>
49 
50 #include "zfs_namecheck.h"
51 #include "zfs_prop.h"
52 #include "libzfs_impl.h"
53 #include "zfs_comutil.h"
54 #include "zfeature_common.h"
55 
56 /*
57  * If the device has being dynamically expanded then we need to relabel
58  * the disk to use the new unallocated space.
59  */
60 int
61 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
62 {
63 	int fd, error;
64 
65 	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
66 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
67 		    "relabel '%s': unable to open device: %d"), path, errno);
68 		return (zfs_error(hdl, EZFS_OPENFAILED, msg));
69 	}
70 
71 	/*
72 	 * It's possible that we might encounter an error if the device
73 	 * does not have any unallocated space left. If so, we simply
74 	 * ignore that error and continue on.
75 	 *
76 	 * Also, we don't call efi_rescan() - that would just return EBUSY.
77 	 * The module will do it for us in vdev_disk_open().
78 	 */
79 	error = efi_use_whole_disk(fd);
80 
81 	/* Flush the buffers to disk and invalidate the page cache. */
82 	(void) fsync(fd);
83 	(void) ioctl(fd, BLKFLSBUF);
84 
85 	(void) close(fd);
86 	if (error && error != VT_ENOSPC) {
87 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
88 		    "relabel '%s': unable to read disk capacity"), path);
89 		return (zfs_error(hdl, EZFS_NOCAP, msg));
90 	}
91 	return (0);
92 }
93 
94 /*
95  * Read the EFI label from the config, if a label does not exist then
96  * pass back the error to the caller. If the caller has passed a non-NULL
97  * diskaddr argument then we set it to the starting address of the EFI
98  * partition.
99  */
100 static int
101 read_efi_label(nvlist_t *config, diskaddr_t *sb)
102 {
103 	char *path;
104 	int fd;
105 	char diskname[MAXPATHLEN];
106 	int err = -1;
107 
108 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
109 		return (err);
110 
111 	(void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
112 	    strrchr(path, '/'));
113 	if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
114 		struct dk_gpt *vtoc;
115 
116 		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
117 			if (sb != NULL)
118 				*sb = vtoc->efi_parts[0].p_start;
119 			efi_free(vtoc);
120 		}
121 		(void) close(fd);
122 	}
123 	return (err);
124 }
125 
126 /*
127  * determine where a partition starts on a disk in the current
128  * configuration
129  */
130 static diskaddr_t
131 find_start_block(nvlist_t *config)
132 {
133 	nvlist_t **child;
134 	uint_t c, children;
135 	diskaddr_t sb = MAXOFFSET_T;
136 	uint64_t wholedisk;
137 
138 	if (nvlist_lookup_nvlist_array(config,
139 	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
140 		if (nvlist_lookup_uint64(config,
141 		    ZPOOL_CONFIG_WHOLE_DISK,
142 		    &wholedisk) != 0 || !wholedisk) {
143 			return (MAXOFFSET_T);
144 		}
145 		if (read_efi_label(config, &sb) < 0)
146 			sb = MAXOFFSET_T;
147 		return (sb);
148 	}
149 
150 	for (c = 0; c < children; c++) {
151 		sb = find_start_block(child[c]);
152 		if (sb != MAXOFFSET_T) {
153 			return (sb);
154 		}
155 	}
156 	return (MAXOFFSET_T);
157 }
158 
159 static int
160 zpool_label_disk_check(char *path)
161 {
162 	struct dk_gpt *vtoc;
163 	int fd, err;
164 
165 	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
166 		return (errno);
167 
168 	if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
169 		(void) close(fd);
170 		return (err);
171 	}
172 
173 	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
174 		efi_free(vtoc);
175 		(void) close(fd);
176 		return (EIDRM);
177 	}
178 
179 	efi_free(vtoc);
180 	(void) close(fd);
181 	return (0);
182 }
183 
184 /*
185  * Generate a unique partition name for the ZFS member.  Partitions must
186  * have unique names to ensure udev will be able to create symlinks under
187  * /dev/disk/by-partlabel/ for all pool members.  The partition names are
188  * of the form <pool>-<unique-id>.
189  */
190 static void
191 zpool_label_name(char *label_name, int label_size)
192 {
193 	uint64_t id = 0;
194 	int fd;
195 
196 	fd = open("/dev/urandom", O_RDONLY);
197 	if (fd >= 0) {
198 		if (read(fd, &id, sizeof (id)) != sizeof (id))
199 			id = 0;
200 
201 		close(fd);
202 	}
203 
204 	if (id == 0)
205 		id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
206 
207 	snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
208 }
209 
210 /*
211  * Label an individual disk.  The name provided is the short name,
212  * stripped of any leading /dev path.
213  */
214 int
215 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
216 {
217 	char path[MAXPATHLEN];
218 	struct dk_gpt *vtoc;
219 	int rval, fd;
220 	size_t resv = EFI_MIN_RESV_SIZE;
221 	uint64_t slice_size;
222 	diskaddr_t start_block;
223 	char errbuf[1024];
224 
225 	/* prepare an error message just in case */
226 	(void) snprintf(errbuf, sizeof (errbuf),
227 	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
228 
229 	if (zhp) {
230 		nvlist_t *nvroot;
231 
232 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
233 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
234 
235 		if (zhp->zpool_start_block == 0)
236 			start_block = find_start_block(nvroot);
237 		else
238 			start_block = zhp->zpool_start_block;
239 		zhp->zpool_start_block = start_block;
240 	} else {
241 		/* new pool */
242 		start_block = NEW_START_BLOCK;
243 	}
244 
245 	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
246 
247 	if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
248 		/*
249 		 * This shouldn't happen.  We've long since verified that this
250 		 * is a valid device.
251 		 */
252 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
253 		    "label '%s': unable to open device: %d"), path, errno);
254 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
255 	}
256 
257 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
258 		/*
259 		 * The only way this can fail is if we run out of memory, or we
260 		 * were unable to read the disk's capacity
261 		 */
262 		if (errno == ENOMEM)
263 			(void) no_memory(hdl);
264 
265 		(void) close(fd);
266 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
267 		    "label '%s': unable to read disk capacity"), path);
268 
269 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
270 	}
271 
272 	slice_size = vtoc->efi_last_u_lba + 1;
273 	slice_size -= EFI_MIN_RESV_SIZE;
274 	if (start_block == MAXOFFSET_T)
275 		start_block = NEW_START_BLOCK;
276 	slice_size -= start_block;
277 	slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
278 
279 	vtoc->efi_parts[0].p_start = start_block;
280 	vtoc->efi_parts[0].p_size = slice_size;
281 
282 	/*
283 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
284 	 * disposable by some EFI utilities (since EFI doesn't have a backup
285 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
286 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
287 	 * etc. were all pretty specific.  V_USR is as close to reality as we
288 	 * can get, in the absence of V_OTHER.
289 	 */
290 	vtoc->efi_parts[0].p_tag = V_USR;
291 	zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
292 
293 	vtoc->efi_parts[8].p_start = slice_size + start_block;
294 	vtoc->efi_parts[8].p_size = resv;
295 	vtoc->efi_parts[8].p_tag = V_RESERVED;
296 
297 	rval = efi_write(fd, vtoc);
298 
299 	/* Flush the buffers to disk and invalidate the page cache. */
300 	(void) fsync(fd);
301 	(void) ioctl(fd, BLKFLSBUF);
302 
303 	if (rval == 0)
304 		rval = efi_rescan(fd);
305 
306 	/*
307 	 * Some block drivers (like pcata) may not support EFI GPT labels.
308 	 * Print out a helpful error message directing the user to manually
309 	 * label the disk and give a specific slice.
310 	 */
311 	if (rval != 0) {
312 		(void) close(fd);
313 		efi_free(vtoc);
314 
315 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
316 		    "parted(8) and then provide a specific slice: %d"), rval);
317 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
318 	}
319 
320 	(void) close(fd);
321 	efi_free(vtoc);
322 
323 	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
324 	(void) zfs_append_partition(path, MAXPATHLEN);
325 
326 	/* Wait to udev to signal use the device has settled. */
327 	rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
328 	if (rval) {
329 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
330 		    "detect device partitions on '%s': %d"), path, rval);
331 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
332 	}
333 
334 	/* We can't be to paranoid.  Read the label back and verify it. */
335 	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
336 	rval = zpool_label_disk_check(path);
337 	if (rval) {
338 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
339 		    "EFI label on '%s' is damaged.  Ensure\nthis device "
340 		    "is not in use, and is functioning properly: %d"),
341 		    path, rval);
342 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
343 	}
344 	return (0);
345 }
346