1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
24  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
25  * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
26  * LLNL-CODE-403049.
27  */
28 
29 #ifndef _ZFS_BLKDEV_H
30 #define	_ZFS_BLKDEV_H
31 
32 #include <linux/blkdev.h>
33 #include <linux/elevator.h>
34 #include <linux/backing-dev.h>
35 #include <linux/hdreg.h>
36 #include <linux/msdos_fs.h>	/* for SECTOR_* */
37 
38 #ifndef HAVE_BLK_QUEUE_FLAG_SET
39 static inline void
40 blk_queue_flag_set(unsigned int flag, struct request_queue *q)
41 {
42 	queue_flag_set(flag, q);
43 }
44 #endif
45 
46 #ifndef HAVE_BLK_QUEUE_FLAG_CLEAR
47 static inline void
48 blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
49 {
50 	queue_flag_clear(flag, q);
51 }
52 #endif
53 
54 /*
55  * 4.7 - 4.x API,
56  * The blk_queue_write_cache() interface has replaced blk_queue_flush()
57  * interface.  However, the new interface is GPL-only thus we implement
58  * our own trivial wrapper when the GPL-only version is detected.
59  *
60  * 2.6.36 - 4.6 API,
61  * The blk_queue_flush() interface has replaced blk_queue_ordered()
62  * interface.  However, while the old interface was available to all the
63  * new one is GPL-only.   Thus if the GPL-only version is detected we
64  * implement our own trivial helper.
65  */
66 static inline void
67 blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
68 {
69 #if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
70 	if (wc)
71 		blk_queue_flag_set(QUEUE_FLAG_WC, q);
72 	else
73 		blk_queue_flag_clear(QUEUE_FLAG_WC, q);
74 	if (fua)
75 		blk_queue_flag_set(QUEUE_FLAG_FUA, q);
76 	else
77 		blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
78 #elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
79 	blk_queue_write_cache(q, wc, fua);
80 #elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
81 	if (wc)
82 		q->flush_flags |= REQ_FLUSH;
83 	if (fua)
84 		q->flush_flags |= REQ_FUA;
85 #elif defined(HAVE_BLK_QUEUE_FLUSH)
86 	blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0));
87 #else
88 #error "Unsupported kernel"
89 #endif
90 }
91 
92 static inline void
93 blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
94 {
95 #ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
96 	q->backing_dev_info->ra_pages = ra_pages;
97 #else
98 	q->backing_dev_info.ra_pages = ra_pages;
99 #endif
100 }
101 
102 #if !defined(HAVE_GET_DISK_AND_MODULE)
103 static inline struct kobject *
104 get_disk_and_module(struct gendisk *disk)
105 {
106 	return (get_disk(disk));
107 }
108 #endif
109 
110 #ifdef HAVE_BIO_BVEC_ITER
111 #define	BIO_BI_SECTOR(bio)	(bio)->bi_iter.bi_sector
112 #define	BIO_BI_SIZE(bio)	(bio)->bi_iter.bi_size
113 #define	BIO_BI_IDX(bio)		(bio)->bi_iter.bi_idx
114 #define	BIO_BI_SKIP(bio)	(bio)->bi_iter.bi_bvec_done
115 #define	bio_for_each_segment4(bv, bvp, b, i)	\
116 	bio_for_each_segment((bv), (b), (i))
117 typedef struct bvec_iter bvec_iterator_t;
118 #else
119 #define	BIO_BI_SECTOR(bio)	(bio)->bi_sector
120 #define	BIO_BI_SIZE(bio)	(bio)->bi_size
121 #define	BIO_BI_IDX(bio)		(bio)->bi_idx
122 #define	BIO_BI_SKIP(bio)	(0)
123 #define	bio_for_each_segment4(bv, bvp, b, i)	\
124 	bio_for_each_segment((bvp), (b), (i))
125 typedef int bvec_iterator_t;
126 #endif
127 
128 static inline void
129 bio_set_flags_failfast(struct block_device *bdev, int *flags)
130 {
131 #ifdef CONFIG_BUG
132 	/*
133 	 * Disable FAILFAST for loopback devices because of the
134 	 * following incorrect BUG_ON() in loop_make_request().
135 	 * This support is also disabled for md devices because the
136 	 * test suite layers md devices on top of loopback devices.
137 	 * This may be removed when the loopback driver is fixed.
138 	 *
139 	 *   BUG_ON(!lo || (rw != READ && rw != WRITE));
140 	 */
141 	if ((MAJOR(bdev->bd_dev) == LOOP_MAJOR) ||
142 	    (MAJOR(bdev->bd_dev) == MD_MAJOR))
143 		return;
144 
145 #ifdef BLOCK_EXT_MAJOR
146 	if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
147 		return;
148 #endif /* BLOCK_EXT_MAJOR */
149 #endif /* CONFIG_BUG */
150 
151 	*flags |= REQ_FAILFAST_MASK;
152 }
153 
154 /*
155  * Maximum disk label length, it may be undefined for some kernels.
156  */
157 #if !defined(DISK_NAME_LEN)
158 #define	DISK_NAME_LEN	32
159 #endif /* DISK_NAME_LEN */
160 
161 #ifdef HAVE_BIO_BI_STATUS
162 static inline int
163 bi_status_to_errno(blk_status_t status)
164 {
165 	switch (status)	{
166 	case BLK_STS_OK:
167 		return (0);
168 	case BLK_STS_NOTSUPP:
169 		return (EOPNOTSUPP);
170 	case BLK_STS_TIMEOUT:
171 		return (ETIMEDOUT);
172 	case BLK_STS_NOSPC:
173 		return (ENOSPC);
174 	case BLK_STS_TRANSPORT:
175 		return (ENOLINK);
176 	case BLK_STS_TARGET:
177 		return (EREMOTEIO);
178 	case BLK_STS_NEXUS:
179 		return (EBADE);
180 	case BLK_STS_MEDIUM:
181 		return (ENODATA);
182 	case BLK_STS_PROTECTION:
183 		return (EILSEQ);
184 	case BLK_STS_RESOURCE:
185 		return (ENOMEM);
186 	case BLK_STS_AGAIN:
187 		return (EAGAIN);
188 	case BLK_STS_IOERR:
189 		return (EIO);
190 	default:
191 		return (EIO);
192 	}
193 }
194 
195 static inline blk_status_t
196 errno_to_bi_status(int error)
197 {
198 	switch (error) {
199 	case 0:
200 		return (BLK_STS_OK);
201 	case EOPNOTSUPP:
202 		return (BLK_STS_NOTSUPP);
203 	case ETIMEDOUT:
204 		return (BLK_STS_TIMEOUT);
205 	case ENOSPC:
206 		return (BLK_STS_NOSPC);
207 	case ENOLINK:
208 		return (BLK_STS_TRANSPORT);
209 	case EREMOTEIO:
210 		return (BLK_STS_TARGET);
211 	case EBADE:
212 		return (BLK_STS_NEXUS);
213 	case ENODATA:
214 		return (BLK_STS_MEDIUM);
215 	case EILSEQ:
216 		return (BLK_STS_PROTECTION);
217 	case ENOMEM:
218 		return (BLK_STS_RESOURCE);
219 	case EAGAIN:
220 		return (BLK_STS_AGAIN);
221 	case EIO:
222 		return (BLK_STS_IOERR);
223 	default:
224 		return (BLK_STS_IOERR);
225 	}
226 }
227 #endif /* HAVE_BIO_BI_STATUS */
228 
229 /*
230  * 4.3 API change
231  * The bio_endio() prototype changed slightly.  These are helper
232  * macro's to ensure the prototype and invocation are handled.
233  */
234 #ifdef HAVE_1ARG_BIO_END_IO_T
235 #ifdef HAVE_BIO_BI_STATUS
236 #define	BIO_END_IO_ERROR(bio)		bi_status_to_errno(bio->bi_status)
237 #define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
238 #define	BIO_END_IO(bio, error)		bio_set_bi_status(bio, error)
239 static inline void
240 bio_set_bi_status(struct bio *bio, int error)
241 {
242 	ASSERT3S(error, <=, 0);
243 	bio->bi_status = errno_to_bi_status(-error);
244 	bio_endio(bio);
245 }
246 #else
247 #define	BIO_END_IO_ERROR(bio)		(-(bio->bi_error))
248 #define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
249 #define	BIO_END_IO(bio, error)		bio_set_bi_error(bio, error)
250 static inline void
251 bio_set_bi_error(struct bio *bio, int error)
252 {
253 	ASSERT3S(error, <=, 0);
254 	bio->bi_error = error;
255 	bio_endio(bio);
256 }
257 #endif /* HAVE_BIO_BI_STATUS */
258 
259 #else
260 #define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x, int z)
261 #define	BIO_END_IO(bio, error)		bio_endio(bio, error);
262 #endif /* HAVE_1ARG_BIO_END_IO_T */
263 
264 /*
265  * 4.1 - x.y.z API,
266  * 3.10.0 CentOS 7.x API,
267  *   blkdev_reread_part()
268  *
269  * For older kernels trigger a re-reading of the partition table by calling
270  * check_disk_change() which calls flush_disk() to invalidate the device.
271  */
272 #ifdef HAVE_BLKDEV_REREAD_PART
273 #define	vdev_bdev_reread_part(bdev)	blkdev_reread_part(bdev)
274 #else
275 #define	vdev_bdev_reread_part(bdev)	check_disk_change(bdev)
276 #endif /* HAVE_BLKDEV_REREAD_PART */
277 
278 /*
279  * 2.6.27 API change
280  * The function was exported for use, prior to this it existed but the
281  * symbol was not exported.
282  *
283  * 4.4.0-6.21 API change for Ubuntu
284  * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
285  */
286 #ifdef HAVE_1ARG_LOOKUP_BDEV
287 #define	vdev_lookup_bdev(path)	lookup_bdev(path)
288 #else
289 #ifdef HAVE_2ARGS_LOOKUP_BDEV
290 #define	vdev_lookup_bdev(path)	lookup_bdev(path, 0)
291 #else
292 #error "Unsupported kernel"
293 #endif /* HAVE_2ARGS_LOOKUP_BDEV */
294 #endif /* HAVE_1ARG_LOOKUP_BDEV */
295 
296 /*
297  * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
298  */
299 #if !defined(HAVE_BIO_SET_OP_ATTRS)
300 static inline void
301 bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags)
302 {
303 	bio->bi_rw |= rw | flags;
304 }
305 #endif
306 
307 /*
308  * bio_set_flush - Set the appropriate flags in a bio to guarantee
309  * data are on non-volatile media on completion.
310  *
311  * 2.6.37 - 4.8 API,
312  *   Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a
313  *   replacement for WRITE_BARRIER to allow expressing richer semantics
314  *   to the block layer.  It's up to the block layer to implement the
315  *   semantics correctly. Use the WRITE_FLUSH_FUA flag combination.
316  *
317  * 4.8 - 4.9 API,
318  *   REQ_FLUSH was renamed to REQ_PREFLUSH.  For consistency with previous
319  *   ZoL releases, prefer the WRITE_FLUSH_FUA flag set if it's available.
320  *
321  * 4.10 API,
322  *   The read/write flags and their modifiers, including WRITE_FLUSH,
323  *   WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in
324  *   torvalds/linux@70fd7614 and replaced by direct flag modification
325  *   of the REQ_ flags in bio->bi_opf.  Use REQ_PREFLUSH.
326  */
327 static inline void
328 bio_set_flush(struct bio *bio)
329 {
330 #if defined(HAVE_REQ_PREFLUSH)	/* >= 4.10 */
331 	bio_set_op_attrs(bio, 0, REQ_PREFLUSH);
332 #elif defined(WRITE_FLUSH_FUA)	/* >= 2.6.37 and <= 4.9 */
333 	bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA);
334 #else
335 #error	"Allowing the build will cause bio_set_flush requests to be ignored."
336 #endif
337 }
338 
339 /*
340  * 4.8 - 4.x API,
341  *   REQ_OP_FLUSH
342  *
343  * 4.8-rc0 - 4.8-rc1,
344  *   REQ_PREFLUSH
345  *
346  * 2.6.36 - 4.7 API,
347  *   REQ_FLUSH
348  *
349  * in all cases but may have a performance impact for some kernels.  It
350  * has the advantage of minimizing kernel specific changes in the zvol code.
351  *
352  */
353 static inline boolean_t
354 bio_is_flush(struct bio *bio)
355 {
356 #if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
357 	return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH));
358 #elif defined(HAVE_REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
359 	return (bio->bi_opf & REQ_PREFLUSH);
360 #elif defined(HAVE_REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
361 	return (bio->bi_rw & REQ_PREFLUSH);
362 #elif defined(HAVE_REQ_FLUSH)
363 	return (bio->bi_rw & REQ_FLUSH);
364 #else
365 #error	"Unsupported kernel"
366 #endif
367 }
368 
369 /*
370  * 4.8 - 4.x API,
371  *   REQ_FUA flag moved to bio->bi_opf
372  *
373  * 2.6.x - 4.7 API,
374  *   REQ_FUA
375  */
376 static inline boolean_t
377 bio_is_fua(struct bio *bio)
378 {
379 #if defined(HAVE_BIO_BI_OPF)
380 	return (bio->bi_opf & REQ_FUA);
381 #elif defined(REQ_FUA)
382 	return (bio->bi_rw & REQ_FUA);
383 #else
384 #error	"Allowing the build will cause fua requests to be ignored."
385 #endif
386 }
387 
388 /*
389  * 4.8 - 4.x API,
390  *   REQ_OP_DISCARD
391  *
392  * 2.6.36 - 4.7 API,
393  *   REQ_DISCARD
394  *
395  * In all cases the normal I/O path is used for discards.  The only
396  * difference is how the kernel tags individual I/Os as discards.
397  */
398 static inline boolean_t
399 bio_is_discard(struct bio *bio)
400 {
401 #if defined(HAVE_REQ_OP_DISCARD)
402 	return (bio_op(bio) == REQ_OP_DISCARD);
403 #elif defined(HAVE_REQ_DISCARD)
404 	return (bio->bi_rw & REQ_DISCARD);
405 #else
406 #error "Unsupported kernel"
407 #endif
408 }
409 
410 /*
411  * 4.8 - 4.x API,
412  *   REQ_OP_SECURE_ERASE
413  *
414  * 2.6.36 - 4.7 API,
415  *   REQ_SECURE
416  */
417 static inline boolean_t
418 bio_is_secure_erase(struct bio *bio)
419 {
420 #if defined(HAVE_REQ_OP_SECURE_ERASE)
421 	return (bio_op(bio) == REQ_OP_SECURE_ERASE);
422 #elif defined(REQ_SECURE)
423 	return (bio->bi_rw & REQ_SECURE);
424 #else
425 	return (0);
426 #endif
427 }
428 
429 /*
430  * 2.6.33 API change
431  * Discard granularity and alignment restrictions may now be set.  For
432  * older kernels which do not support this it is safe to skip it.
433  */
434 static inline void
435 blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
436 {
437 	q->limits.discard_granularity = dg;
438 }
439 
440 /*
441  * 4.8 - 4.x API,
442  *   blk_queue_secure_erase()
443  *
444  * 2.6.36 - 4.7 API,
445  *   blk_queue_secdiscard()
446  */
447 static inline int
448 blk_queue_discard_secure(struct request_queue *q)
449 {
450 #if defined(HAVE_BLK_QUEUE_SECURE_ERASE)
451 	return (blk_queue_secure_erase(q));
452 #elif defined(HAVE_BLK_QUEUE_SECDISCARD)
453 	return (blk_queue_secdiscard(q));
454 #else
455 	return (0);
456 #endif
457 }
458 
459 /*
460  * A common holder for vdev_bdev_open() is used to relax the exclusive open
461  * semantics slightly.  Internal vdev disk callers may pass VDEV_HOLDER to
462  * allow them to open the device multiple times.  Other kernel callers and
463  * user space processes which don't pass this value will get EBUSY.  This is
464  * currently required for the correct operation of hot spares.
465  */
466 #define	VDEV_HOLDER			((void *)0x2401de7)
467 
468 static inline void
469 blk_generic_start_io_acct(struct request_queue *q, int rw,
470     unsigned long sectors, struct hd_struct *part)
471 {
472 #if defined(HAVE_GENERIC_IO_ACCT_3ARG)
473 	generic_start_io_acct(rw, sectors, part);
474 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
475 	generic_start_io_acct(q, rw, sectors, part);
476 #endif
477 }
478 
479 static inline void
480 blk_generic_end_io_acct(struct request_queue *q, int rw,
481     struct hd_struct *part, unsigned long start_time)
482 {
483 #if defined(HAVE_GENERIC_IO_ACCT_3ARG)
484 	generic_end_io_acct(rw, part, start_time);
485 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
486 	generic_end_io_acct(q, rw, part, start_time);
487 #endif
488 }
489 
490 #ifndef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
491 static inline struct request_queue *
492 blk_generic_alloc_queue(make_request_fn make_request, int node_id)
493 {
494 #if defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN)
495 	return (blk_alloc_queue(make_request, node_id));
496 #else
497 	struct request_queue *q = blk_alloc_queue(GFP_KERNEL);
498 	if (q != NULL)
499 		blk_queue_make_request(q, make_request);
500 
501 	return (q);
502 #endif
503 }
504 #endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
505 
506 #endif /* _ZFS_BLKDEV_H */
507