xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_disk.c (revision fb9f9b97)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/vdev_disk.h>
32 #include <sys/vdev_impl.h>
33 #include <sys/fs/zfs.h>
34 #include <sys/zio.h>
35 #include <sys/sunddi.h>
36 
37 /*
38  * Virtual device vector for disks.
39  */
40 
41 extern ldi_ident_t zfs_li;
42 
43 typedef struct vdev_disk_buf {
44 	buf_t	vdb_buf;
45 	zio_t	*vdb_io;
46 } vdev_disk_buf_t;
47 
48 static int
49 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
50 {
51 	vdev_disk_t *dvd;
52 	int error;
53 
54 	/*
55 	 * We must have a pathname, and it must be absolute.
56 	 */
57 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
58 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
59 		return (EINVAL);
60 	}
61 
62 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
63 
64 	/*
65 	 * When opening a disk device, we want to preserve the user's original
66 	 * intent.  We always want to open the device by the path the user gave
67 	 * us, even if it is one of multiple paths to the save device.  But we
68 	 * also want to be able to survive disks being removed/recabled.
69 	 * Therefore the sequence of opening devices is:
70 	 *
71 	 * 1. Try opening the device by path.
72 	 *
73 	 * 	a. First append "s0" to see if this is a whole disk
74 	 * 	b. Fall back to path otherwise
75 	 *
76 	 * 2. If the devid of the device matches the stored value, return
77 	 *    success.
78 	 *
79 	 * 3. Otherwise, the device may have moved.  Try opening the device
80 	 *    by the devid instead.
81 	 *
82 	 */
83 	if (vd->vdev_devid != NULL) {
84 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
85 		    &dvd->vd_minor) != 0) {
86 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
87 			return (EINVAL);
88 		}
89 	}
90 
91 	error = EINVAL;		/* presume failure */
92 
93 	if (vd->vdev_path != NULL) {
94 		size_t len = strlen(vd->vdev_path) + 3;
95 		char *buf = kmem_alloc(len, KM_SLEEP);
96 		ddi_devid_t devid;
97 
98 		(void) snprintf(buf, len, "%ss0", vd->vdev_path);
99 
100 		/*
101 		 * Try whole disk first, then slice name.
102 		 */
103 		if ((error = ldi_open_by_name(buf, spa_mode, kcred,
104 		    &dvd->vd_lh, zfs_li)) != 0)
105 			error = ldi_open_by_name(vd->vdev_path,
106 			    spa_mode, kcred, &dvd->vd_lh, zfs_li);
107 
108 		kmem_free(buf, len);
109 
110 		/*
111 		 * Compare the devid to the stored value.
112 		 */
113 		if (error == 0 && vd->vdev_devid != NULL &&
114 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
115 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
116 				error = EINVAL;
117 				(void) ldi_close(dvd->vd_lh, spa_mode, kcred);
118 				dvd->vd_lh = NULL;
119 			}
120 			ddi_devid_free(devid);
121 		}
122 	}
123 
124 	/*
125 	 * If we were unable to open by path, or the devid check fails, open by
126 	 * devid instead.
127 	 */
128 	if (error != 0 && vd->vdev_devid != NULL)
129 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
130 		    spa_mode, kcred, &dvd->vd_lh, zfs_li);
131 
132 	if (error) {
133 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
134 		return (error);
135 	}
136 
137 	/*
138 	 * Determine the actual size of the device.
139 	 */
140 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
141 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
142 		return (EINVAL);
143 	}
144 
145 	*ashift = SPA_MINBLOCKSHIFT;
146 
147 	return (0);
148 }
149 
150 static void
151 vdev_disk_close(vdev_t *vd)
152 {
153 	vdev_disk_t *dvd = vd->vdev_tsd;
154 
155 	if (dvd == NULL)
156 		return;
157 
158 	dprintf("removing disk %s, devid %s\n",
159 	    vd->vdev_path ? vd->vdev_path : "<none>",
160 	    vd->vdev_devid ? vd->vdev_devid : "<none>");
161 
162 	if (dvd->vd_minor != NULL)
163 		ddi_devid_str_free(dvd->vd_minor);
164 
165 	if (dvd->vd_devid != NULL)
166 		ddi_devid_free(dvd->vd_devid);
167 
168 	if (dvd->vd_lh != NULL)
169 		(void) ldi_close(dvd->vd_lh, spa_mode, kcred);
170 
171 	kmem_free(dvd, sizeof (vdev_disk_t));
172 	vd->vdev_tsd = NULL;
173 }
174 
175 static void
176 vdev_disk_io_intr(buf_t *bp)
177 {
178 	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
179 	zio_t *zio = vdb->vdb_io;
180 
181 	if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0)
182 		zio->io_error = EIO;
183 
184 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
185 
186 	zio_next_stage_async(zio);
187 }
188 
189 static void
190 vdev_disk_ioctl_done(void *zio_arg, int error)
191 {
192 	zio_t *zio = zio_arg;
193 
194 	zio->io_error = error;
195 
196 	zio_next_stage_async(zio);
197 }
198 
199 static void
200 vdev_disk_io_start(zio_t *zio)
201 {
202 	vdev_t *vd = zio->io_vd;
203 	vdev_disk_t *dvd = vd->vdev_tsd;
204 	vdev_disk_buf_t *vdb;
205 	buf_t *bp;
206 	int flags, error;
207 
208 	if (zio->io_type == ZIO_TYPE_IOCTL) {
209 		zio_vdev_io_bypass(zio);
210 
211 		/* XXPOLICY */
212 		if (vdev_is_dead(vd)) {
213 			zio->io_error = ENXIO;
214 			zio_next_stage_async(zio);
215 			return;
216 		}
217 
218 		switch (zio->io_cmd) {
219 
220 		case DKIOCFLUSHWRITECACHE:
221 
222 			zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done;
223 			zio->io_dk_callback.dkc_cookie = zio;
224 
225 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
226 			    (uintptr_t)&zio->io_dk_callback,
227 			    FKIOCTL, kcred, NULL);
228 
229 			if (error == 0) {
230 				/*
231 				 * The ioctl will be done asychronously,
232 				 * and will call vdev_disk_ioctl_done()
233 				 * upon completion.
234 				 */
235 				return;
236 			}
237 			zio->io_error = error;
238 			break;
239 
240 		default:
241 			zio->io_error = ENOTSUP;
242 		}
243 
244 		zio_next_stage_async(zio);
245 		return;
246 	}
247 
248 	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
249 		return;
250 
251 	if ((zio = vdev_queue_io(zio)) == NULL)
252 		return;
253 
254 	flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
255 	flags |= B_BUSY | B_NOCACHE;
256 	if (zio->io_flags & ZIO_FLAG_FAILFAST)
257 		flags |= B_FAILFAST;
258 
259 	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
260 
261 	vdb->vdb_io = zio;
262 	bp = &vdb->vdb_buf;
263 
264 	bioinit(bp);
265 	bp->b_flags = flags;
266 	bp->b_bcount = zio->io_size;
267 	bp->b_un.b_addr = zio->io_data;
268 	bp->b_lblkno = lbtodb(zio->io_offset);
269 	bp->b_bufsize = zio->io_size;
270 	bp->b_iodone = (int (*)())vdev_disk_io_intr;
271 
272 	/* XXPOLICY */
273 	error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
274 	if (error) {
275 		zio->io_error = error;
276 		bioerror(bp, error);
277 		bp->b_resid = bp->b_bcount;
278 		bp->b_iodone(bp);
279 		return;
280 	}
281 
282 	error = ldi_strategy(dvd->vd_lh, bp);
283 	/* ldi_strategy() will return non-zero only on programming errors */
284 	ASSERT(error == 0);
285 }
286 
287 static void
288 vdev_disk_io_done(zio_t *zio)
289 {
290 	vdev_queue_io_done(zio);
291 
292 	if (zio->io_type == ZIO_TYPE_WRITE)
293 		vdev_cache_write(zio);
294 
295 	zio_next_stage(zio);
296 }
297 
298 vdev_ops_t vdev_disk_ops = {
299 	vdev_disk_open,
300 	vdev_disk_close,
301 	vdev_default_asize,
302 	vdev_disk_io_start,
303 	vdev_disk_io_done,
304 	NULL,
305 	VDEV_TYPE_DISK,		/* name of this vdev type */
306 	B_TRUE			/* leaf vdev */
307 };
308