xref: /linux/block/genhd.c (revision 309dca30)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  gendisk handling
4  *
5  * Portions Copyright (C) 2020 Christoph Hellwig
6  */
7 
8 #include <linux/module.h>
9 #include <linux/ctype.h>
10 #include <linux/fs.h>
11 #include <linux/genhd.h>
12 #include <linux/kdev_t.h>
13 #include <linux/kernel.h>
14 #include <linux/blkdev.h>
15 #include <linux/backing-dev.h>
16 #include <linux/init.h>
17 #include <linux/spinlock.h>
18 #include <linux/proc_fs.h>
19 #include <linux/seq_file.h>
20 #include <linux/slab.h>
21 #include <linux/kmod.h>
22 #include <linux/mutex.h>
23 #include <linux/idr.h>
24 #include <linux/log2.h>
25 #include <linux/pm_runtime.h>
26 #include <linux/badblocks.h>
27 
28 #include "blk.h"
29 
30 static struct kobject *block_depr;
31 
32 DECLARE_RWSEM(bdev_lookup_sem);
33 
34 /* for extended dynamic devt allocation, currently only one major is used */
35 #define NR_EXT_DEVT		(1 << MINORBITS)
36 static DEFINE_IDA(ext_devt_ida);
37 
38 static void disk_check_events(struct disk_events *ev,
39 			      unsigned int *clearing_ptr);
40 static void disk_alloc_events(struct gendisk *disk);
41 static void disk_add_events(struct gendisk *disk);
42 static void disk_del_events(struct gendisk *disk);
43 static void disk_release_events(struct gendisk *disk);
44 
45 void set_capacity(struct gendisk *disk, sector_t sectors)
46 {
47 	struct block_device *bdev = disk->part0;
48 
49 	spin_lock(&bdev->bd_size_lock);
50 	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
51 	spin_unlock(&bdev->bd_size_lock);
52 }
53 EXPORT_SYMBOL(set_capacity);
54 
55 /*
56  * Set disk capacity and notify if the size is not currently zero and will not
57  * be set to zero.  Returns true if a uevent was sent, otherwise false.
58  */
59 bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
60 {
61 	sector_t capacity = get_capacity(disk);
62 	char *envp[] = { "RESIZE=1", NULL };
63 
64 	set_capacity(disk, size);
65 
66 	/*
67 	 * Only print a message and send a uevent if the gendisk is user visible
68 	 * and alive.  This avoids spamming the log and udev when setting the
69 	 * initial capacity during probing.
70 	 */
71 	if (size == capacity ||
72 	    (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
73 		return false;
74 
75 	pr_info("%s: detected capacity change from %lld to %lld\n",
76 		disk->disk_name, size, capacity);
77 
78 	/*
79 	 * Historically we did not send a uevent for changes to/from an empty
80 	 * device.
81 	 */
82 	if (!capacity || !size)
83 		return false;
84 	kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
85 	return true;
86 }
87 EXPORT_SYMBOL_GPL(set_capacity_and_notify);
88 
89 /*
90  * Format the device name of the indicated disk into the supplied buffer and
91  * return a pointer to that same buffer for convenience.
92  */
93 char *disk_name(struct gendisk *hd, int partno, char *buf)
94 {
95 	if (!partno)
96 		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
97 	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
98 		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
99 	else
100 		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
101 
102 	return buf;
103 }
104 
105 const char *bdevname(struct block_device *bdev, char *buf)
106 {
107 	return disk_name(bdev->bd_disk, bdev->bd_partno, buf);
108 }
109 EXPORT_SYMBOL(bdevname);
110 
111 static void part_stat_read_all(struct block_device *part,
112 		struct disk_stats *stat)
113 {
114 	int cpu;
115 
116 	memset(stat, 0, sizeof(struct disk_stats));
117 	for_each_possible_cpu(cpu) {
118 		struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
119 		int group;
120 
121 		for (group = 0; group < NR_STAT_GROUPS; group++) {
122 			stat->nsecs[group] += ptr->nsecs[group];
123 			stat->sectors[group] += ptr->sectors[group];
124 			stat->ios[group] += ptr->ios[group];
125 			stat->merges[group] += ptr->merges[group];
126 		}
127 
128 		stat->io_ticks += ptr->io_ticks;
129 	}
130 }
131 
132 static unsigned int part_in_flight(struct block_device *part)
133 {
134 	unsigned int inflight = 0;
135 	int cpu;
136 
137 	for_each_possible_cpu(cpu) {
138 		inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
139 			    part_stat_local_read_cpu(part, in_flight[1], cpu);
140 	}
141 	if ((int)inflight < 0)
142 		inflight = 0;
143 
144 	return inflight;
145 }
146 
147 static void part_in_flight_rw(struct block_device *part,
148 		unsigned int inflight[2])
149 {
150 	int cpu;
151 
152 	inflight[0] = 0;
153 	inflight[1] = 0;
154 	for_each_possible_cpu(cpu) {
155 		inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
156 		inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
157 	}
158 	if ((int)inflight[0] < 0)
159 		inflight[0] = 0;
160 	if ((int)inflight[1] < 0)
161 		inflight[1] = 0;
162 }
163 
164 static struct block_device *__disk_get_part(struct gendisk *disk, int partno)
165 {
166 	struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl);
167 
168 	if (unlikely(partno < 0 || partno >= ptbl->len))
169 		return NULL;
170 	return rcu_dereference(ptbl->part[partno]);
171 }
172 
173 /**
174  * disk_part_iter_init - initialize partition iterator
175  * @piter: iterator to initialize
176  * @disk: disk to iterate over
177  * @flags: DISK_PITER_* flags
178  *
179  * Initialize @piter so that it iterates over partitions of @disk.
180  *
181  * CONTEXT:
182  * Don't care.
183  */
184 void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
185 			  unsigned int flags)
186 {
187 	struct disk_part_tbl *ptbl;
188 
189 	rcu_read_lock();
190 	ptbl = rcu_dereference(disk->part_tbl);
191 
192 	piter->disk = disk;
193 	piter->part = NULL;
194 
195 	if (flags & DISK_PITER_REVERSE)
196 		piter->idx = ptbl->len - 1;
197 	else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
198 		piter->idx = 0;
199 	else
200 		piter->idx = 1;
201 
202 	piter->flags = flags;
203 
204 	rcu_read_unlock();
205 }
206 EXPORT_SYMBOL_GPL(disk_part_iter_init);
207 
208 /**
209  * disk_part_iter_next - proceed iterator to the next partition and return it
210  * @piter: iterator of interest
211  *
212  * Proceed @piter to the next partition and return it.
213  *
214  * CONTEXT:
215  * Don't care.
216  */
217 struct block_device *disk_part_iter_next(struct disk_part_iter *piter)
218 {
219 	struct disk_part_tbl *ptbl;
220 	int inc, end;
221 
222 	/* put the last partition */
223 	disk_part_iter_exit(piter);
224 
225 	/* get part_tbl */
226 	rcu_read_lock();
227 	ptbl = rcu_dereference(piter->disk->part_tbl);
228 
229 	/* determine iteration parameters */
230 	if (piter->flags & DISK_PITER_REVERSE) {
231 		inc = -1;
232 		if (piter->flags & (DISK_PITER_INCL_PART0 |
233 				    DISK_PITER_INCL_EMPTY_PART0))
234 			end = -1;
235 		else
236 			end = 0;
237 	} else {
238 		inc = 1;
239 		end = ptbl->len;
240 	}
241 
242 	/* iterate to the next partition */
243 	for (; piter->idx != end; piter->idx += inc) {
244 		struct block_device *part;
245 
246 		part = rcu_dereference(ptbl->part[piter->idx]);
247 		if (!part)
248 			continue;
249 		piter->part = bdgrab(part);
250 		if (!piter->part)
251 			continue;
252 		if (!bdev_nr_sectors(part) &&
253 		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
254 		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
255 		      piter->idx == 0)) {
256 			bdput(piter->part);
257 			piter->part = NULL;
258 			continue;
259 		}
260 
261 		piter->idx += inc;
262 		break;
263 	}
264 
265 	rcu_read_unlock();
266 
267 	return piter->part;
268 }
269 EXPORT_SYMBOL_GPL(disk_part_iter_next);
270 
271 /**
272  * disk_part_iter_exit - finish up partition iteration
273  * @piter: iter of interest
274  *
275  * Called when iteration is over.  Cleans up @piter.
276  *
277  * CONTEXT:
278  * Don't care.
279  */
280 void disk_part_iter_exit(struct disk_part_iter *piter)
281 {
282 	if (piter->part)
283 		bdput(piter->part);
284 	piter->part = NULL;
285 }
286 EXPORT_SYMBOL_GPL(disk_part_iter_exit);
287 
288 static inline int sector_in_part(struct block_device *part, sector_t sector)
289 {
290 	return part->bd_start_sect <= sector &&
291 		sector < part->bd_start_sect + bdev_nr_sectors(part);
292 }
293 
294 /**
295  * disk_map_sector_rcu - map sector to partition
296  * @disk: gendisk of interest
297  * @sector: sector to map
298  *
299  * Find out which partition @sector maps to on @disk.  This is
300  * primarily used for stats accounting.
301  *
302  * CONTEXT:
303  * RCU read locked.
304  *
305  * RETURNS:
306  * Found partition on success, part0 is returned if no partition matches
307  * or the matched partition is being deleted.
308  */
309 struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
310 {
311 	struct disk_part_tbl *ptbl;
312 	struct block_device *part;
313 	int i;
314 
315 	rcu_read_lock();
316 	ptbl = rcu_dereference(disk->part_tbl);
317 
318 	part = rcu_dereference(ptbl->last_lookup);
319 	if (part && sector_in_part(part, sector))
320 		goto out_unlock;
321 
322 	for (i = 1; i < ptbl->len; i++) {
323 		part = rcu_dereference(ptbl->part[i]);
324 		if (part && sector_in_part(part, sector)) {
325 			rcu_assign_pointer(ptbl->last_lookup, part);
326 			goto out_unlock;
327 		}
328 	}
329 
330 	part = disk->part0;
331 out_unlock:
332 	rcu_read_unlock();
333 	return part;
334 }
335 
336 /**
337  * disk_has_partitions
338  * @disk: gendisk of interest
339  *
340  * Walk through the partition table and check if valid partition exists.
341  *
342  * CONTEXT:
343  * Don't care.
344  *
345  * RETURNS:
346  * True if the gendisk has at least one valid non-zero size partition.
347  * Otherwise false.
348  */
349 bool disk_has_partitions(struct gendisk *disk)
350 {
351 	struct disk_part_tbl *ptbl;
352 	int i;
353 	bool ret = false;
354 
355 	rcu_read_lock();
356 	ptbl = rcu_dereference(disk->part_tbl);
357 
358 	/* Iterate partitions skipping the whole device at index 0 */
359 	for (i = 1; i < ptbl->len; i++) {
360 		if (rcu_dereference(ptbl->part[i])) {
361 			ret = true;
362 			break;
363 		}
364 	}
365 
366 	rcu_read_unlock();
367 
368 	return ret;
369 }
370 EXPORT_SYMBOL_GPL(disk_has_partitions);
371 
372 /*
373  * Can be deleted altogether. Later.
374  *
375  */
376 #define BLKDEV_MAJOR_HASH_SIZE 255
377 static struct blk_major_name {
378 	struct blk_major_name *next;
379 	int major;
380 	char name[16];
381 	void (*probe)(dev_t devt);
382 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
383 static DEFINE_MUTEX(major_names_lock);
384 
385 /* index in the above - for now: assume no multimajor ranges */
386 static inline int major_to_index(unsigned major)
387 {
388 	return major % BLKDEV_MAJOR_HASH_SIZE;
389 }
390 
391 #ifdef CONFIG_PROC_FS
392 void blkdev_show(struct seq_file *seqf, off_t offset)
393 {
394 	struct blk_major_name *dp;
395 
396 	mutex_lock(&major_names_lock);
397 	for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
398 		if (dp->major == offset)
399 			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
400 	mutex_unlock(&major_names_lock);
401 }
402 #endif /* CONFIG_PROC_FS */
403 
404 /**
405  * __register_blkdev - register a new block device
406  *
407  * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
408  *         @major = 0, try to allocate any unused major number.
409  * @name: the name of the new block device as a zero terminated string
410  * @probe: allback that is called on access to any minor number of @major
411  *
412  * The @name must be unique within the system.
413  *
414  * The return value depends on the @major input parameter:
415  *
416  *  - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
417  *    then the function returns zero on success, or a negative error code
418  *  - if any unused major number was requested with @major = 0 parameter
419  *    then the return value is the allocated major number in range
420  *    [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
421  *
422  * See Documentation/admin-guide/devices.txt for the list of allocated
423  * major numbers.
424  *
425  * Use register_blkdev instead for any new code.
426  */
427 int __register_blkdev(unsigned int major, const char *name,
428 		void (*probe)(dev_t devt))
429 {
430 	struct blk_major_name **n, *p;
431 	int index, ret = 0;
432 
433 	mutex_lock(&major_names_lock);
434 
435 	/* temporary */
436 	if (major == 0) {
437 		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
438 			if (major_names[index] == NULL)
439 				break;
440 		}
441 
442 		if (index == 0) {
443 			printk("%s: failed to get major for %s\n",
444 			       __func__, name);
445 			ret = -EBUSY;
446 			goto out;
447 		}
448 		major = index;
449 		ret = major;
450 	}
451 
452 	if (major >= BLKDEV_MAJOR_MAX) {
453 		pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
454 		       __func__, major, BLKDEV_MAJOR_MAX-1, name);
455 
456 		ret = -EINVAL;
457 		goto out;
458 	}
459 
460 	p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
461 	if (p == NULL) {
462 		ret = -ENOMEM;
463 		goto out;
464 	}
465 
466 	p->major = major;
467 	p->probe = probe;
468 	strlcpy(p->name, name, sizeof(p->name));
469 	p->next = NULL;
470 	index = major_to_index(major);
471 
472 	for (n = &major_names[index]; *n; n = &(*n)->next) {
473 		if ((*n)->major == major)
474 			break;
475 	}
476 	if (!*n)
477 		*n = p;
478 	else
479 		ret = -EBUSY;
480 
481 	if (ret < 0) {
482 		printk("register_blkdev: cannot get major %u for %s\n",
483 		       major, name);
484 		kfree(p);
485 	}
486 out:
487 	mutex_unlock(&major_names_lock);
488 	return ret;
489 }
490 EXPORT_SYMBOL(__register_blkdev);
491 
492 void unregister_blkdev(unsigned int major, const char *name)
493 {
494 	struct blk_major_name **n;
495 	struct blk_major_name *p = NULL;
496 	int index = major_to_index(major);
497 
498 	mutex_lock(&major_names_lock);
499 	for (n = &major_names[index]; *n; n = &(*n)->next)
500 		if ((*n)->major == major)
501 			break;
502 	if (!*n || strcmp((*n)->name, name)) {
503 		WARN_ON(1);
504 	} else {
505 		p = *n;
506 		*n = p->next;
507 	}
508 	mutex_unlock(&major_names_lock);
509 	kfree(p);
510 }
511 
512 EXPORT_SYMBOL(unregister_blkdev);
513 
514 /**
515  * blk_mangle_minor - scatter minor numbers apart
516  * @minor: minor number to mangle
517  *
518  * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
519  * is enabled.  Mangling twice gives the original value.
520  *
521  * RETURNS:
522  * Mangled value.
523  *
524  * CONTEXT:
525  * Don't care.
526  */
527 static int blk_mangle_minor(int minor)
528 {
529 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
530 	int i;
531 
532 	for (i = 0; i < MINORBITS / 2; i++) {
533 		int low = minor & (1 << i);
534 		int high = minor & (1 << (MINORBITS - 1 - i));
535 		int distance = MINORBITS - 1 - 2 * i;
536 
537 		minor ^= low | high;	/* clear both bits */
538 		low <<= distance;	/* swap the positions */
539 		high >>= distance;
540 		minor |= low | high;	/* and set */
541 	}
542 #endif
543 	return minor;
544 }
545 
546 /**
547  * blk_alloc_devt - allocate a dev_t for a block device
548  * @bdev: block device to allocate dev_t for
549  * @devt: out parameter for resulting dev_t
550  *
551  * Allocate a dev_t for block device.
552  *
553  * RETURNS:
554  * 0 on success, allocated dev_t is returned in *@devt.  -errno on
555  * failure.
556  *
557  * CONTEXT:
558  * Might sleep.
559  */
560 int blk_alloc_devt(struct block_device *bdev, dev_t *devt)
561 {
562 	struct gendisk *disk = bdev->bd_disk;
563 	int idx;
564 
565 	/* in consecutive minor range? */
566 	if (bdev->bd_partno < disk->minors) {
567 		*devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
568 		return 0;
569 	}
570 
571 	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
572 	if (idx < 0)
573 		return idx == -ENOSPC ? -EBUSY : idx;
574 
575 	*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
576 	return 0;
577 }
578 
579 /**
580  * blk_free_devt - free a dev_t
581  * @devt: dev_t to free
582  *
583  * Free @devt which was allocated using blk_alloc_devt().
584  *
585  * CONTEXT:
586  * Might sleep.
587  */
588 void blk_free_devt(dev_t devt)
589 {
590 	if (MAJOR(devt) == BLOCK_EXT_MAJOR)
591 		ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt)));
592 }
593 
594 static char *bdevt_str(dev_t devt, char *buf)
595 {
596 	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
597 		char tbuf[BDEVT_SIZE];
598 		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
599 		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
600 	} else
601 		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
602 
603 	return buf;
604 }
605 
606 static void disk_scan_partitions(struct gendisk *disk)
607 {
608 	struct block_device *bdev;
609 
610 	if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
611 		return;
612 
613 	set_bit(GD_NEED_PART_SCAN, &disk->state);
614 	bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
615 	if (!IS_ERR(bdev))
616 		blkdev_put(bdev, FMODE_READ);
617 }
618 
619 static void register_disk(struct device *parent, struct gendisk *disk,
620 			  const struct attribute_group **groups)
621 {
622 	struct device *ddev = disk_to_dev(disk);
623 	struct disk_part_iter piter;
624 	struct block_device *part;
625 	int err;
626 
627 	ddev->parent = parent;
628 
629 	dev_set_name(ddev, "%s", disk->disk_name);
630 
631 	/* delay uevents, until we scanned partition table */
632 	dev_set_uevent_suppress(ddev, 1);
633 
634 	if (groups) {
635 		WARN_ON(ddev->groups);
636 		ddev->groups = groups;
637 	}
638 	if (device_add(ddev))
639 		return;
640 	if (!sysfs_deprecated) {
641 		err = sysfs_create_link(block_depr, &ddev->kobj,
642 					kobject_name(&ddev->kobj));
643 		if (err) {
644 			device_del(ddev);
645 			return;
646 		}
647 	}
648 
649 	/*
650 	 * avoid probable deadlock caused by allocating memory with
651 	 * GFP_KERNEL in runtime_resume callback of its all ancestor
652 	 * devices
653 	 */
654 	pm_runtime_set_memalloc_noio(ddev, true);
655 
656 	disk->part0->bd_holder_dir =
657 		kobject_create_and_add("holders", &ddev->kobj);
658 	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
659 
660 	if (disk->flags & GENHD_FL_HIDDEN) {
661 		dev_set_uevent_suppress(ddev, 0);
662 		return;
663 	}
664 
665 	disk_scan_partitions(disk);
666 
667 	/* announce disk after possible partitions are created */
668 	dev_set_uevent_suppress(ddev, 0);
669 	kobject_uevent(&ddev->kobj, KOBJ_ADD);
670 
671 	/* announce possible partitions */
672 	disk_part_iter_init(&piter, disk, 0);
673 	while ((part = disk_part_iter_next(&piter)))
674 		kobject_uevent(bdev_kobj(part), KOBJ_ADD);
675 	disk_part_iter_exit(&piter);
676 
677 	if (disk->queue->backing_dev_info->dev) {
678 		err = sysfs_create_link(&ddev->kobj,
679 			  &disk->queue->backing_dev_info->dev->kobj,
680 			  "bdi");
681 		WARN_ON(err);
682 	}
683 }
684 
685 /**
686  * __device_add_disk - add disk information to kernel list
687  * @parent: parent device for the disk
688  * @disk: per-device partitioning information
689  * @groups: Additional per-device sysfs groups
690  * @register_queue: register the queue if set to true
691  *
692  * This function registers the partitioning information in @disk
693  * with the kernel.
694  *
695  * FIXME: error handling
696  */
697 static void __device_add_disk(struct device *parent, struct gendisk *disk,
698 			      const struct attribute_group **groups,
699 			      bool register_queue)
700 {
701 	dev_t devt;
702 	int retval;
703 
704 	/*
705 	 * The disk queue should now be all set with enough information about
706 	 * the device for the elevator code to pick an adequate default
707 	 * elevator if one is needed, that is, for devices requesting queue
708 	 * registration.
709 	 */
710 	if (register_queue)
711 		elevator_init_mq(disk->queue);
712 
713 	/* minors == 0 indicates to use ext devt from part0 and should
714 	 * be accompanied with EXT_DEVT flag.  Make sure all
715 	 * parameters make sense.
716 	 */
717 	WARN_ON(disk->minors && !(disk->major || disk->first_minor));
718 	WARN_ON(!disk->minors &&
719 		!(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
720 
721 	disk->flags |= GENHD_FL_UP;
722 
723 	retval = blk_alloc_devt(disk->part0, &devt);
724 	if (retval) {
725 		WARN_ON(1);
726 		return;
727 	}
728 	disk->major = MAJOR(devt);
729 	disk->first_minor = MINOR(devt);
730 
731 	disk_alloc_events(disk);
732 
733 	if (disk->flags & GENHD_FL_HIDDEN) {
734 		/*
735 		 * Don't let hidden disks show up in /proc/partitions,
736 		 * and don't bother scanning for partitions either.
737 		 */
738 		disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
739 		disk->flags |= GENHD_FL_NO_PART_SCAN;
740 	} else {
741 		struct backing_dev_info *bdi = disk->queue->backing_dev_info;
742 		struct device *dev = disk_to_dev(disk);
743 		int ret;
744 
745 		/* Register BDI before referencing it from bdev */
746 		dev->devt = devt;
747 		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
748 		WARN_ON(ret);
749 		bdi_set_owner(bdi, dev);
750 		bdev_add(disk->part0, devt);
751 	}
752 	register_disk(parent, disk, groups);
753 	if (register_queue)
754 		blk_register_queue(disk);
755 
756 	/*
757 	 * Take an extra ref on queue which will be put on disk_release()
758 	 * so that it sticks around as long as @disk is there.
759 	 */
760 	WARN_ON_ONCE(!blk_get_queue(disk->queue));
761 
762 	disk_add_events(disk);
763 	blk_integrity_add(disk);
764 }
765 
766 void device_add_disk(struct device *parent, struct gendisk *disk,
767 		     const struct attribute_group **groups)
768 
769 {
770 	__device_add_disk(parent, disk, groups, true);
771 }
772 EXPORT_SYMBOL(device_add_disk);
773 
774 void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
775 {
776 	__device_add_disk(parent, disk, NULL, false);
777 }
778 EXPORT_SYMBOL(device_add_disk_no_queue_reg);
779 
780 static void invalidate_partition(struct block_device *bdev)
781 {
782 	fsync_bdev(bdev);
783 	__invalidate_device(bdev, true);
784 
785 	/*
786 	 * Unhash the bdev inode for this device so that it can't be looked
787 	 * up any more even if openers still hold references to it.
788 	 */
789 	remove_inode_hash(bdev->bd_inode);
790 }
791 
792 /**
793  * del_gendisk - remove the gendisk
794  * @disk: the struct gendisk to remove
795  *
796  * Removes the gendisk and all its associated resources. This deletes the
797  * partitions associated with the gendisk, and unregisters the associated
798  * request_queue.
799  *
800  * This is the counter to the respective __device_add_disk() call.
801  *
802  * The final removal of the struct gendisk happens when its refcount reaches 0
803  * with put_disk(), which should be called after del_gendisk(), if
804  * __device_add_disk() was used.
805  *
806  * Drivers exist which depend on the release of the gendisk to be synchronous,
807  * it should not be deferred.
808  *
809  * Context: can sleep
810  */
811 void del_gendisk(struct gendisk *disk)
812 {
813 	struct disk_part_iter piter;
814 	struct block_device *part;
815 
816 	might_sleep();
817 
818 	if (WARN_ON_ONCE(!disk->queue))
819 		return;
820 
821 	blk_integrity_del(disk);
822 	disk_del_events(disk);
823 
824 	/*
825 	 * Block lookups of the disk until all bdevs are unhashed and the
826 	 * disk is marked as dead (GENHD_FL_UP cleared).
827 	 */
828 	down_write(&bdev_lookup_sem);
829 
830 	/* invalidate stuff */
831 	disk_part_iter_init(&piter, disk,
832 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
833 	while ((part = disk_part_iter_next(&piter))) {
834 		invalidate_partition(part);
835 		delete_partition(part);
836 	}
837 	disk_part_iter_exit(&piter);
838 
839 	invalidate_partition(disk->part0);
840 	set_capacity(disk, 0);
841 	disk->flags &= ~GENHD_FL_UP;
842 	up_write(&bdev_lookup_sem);
843 
844 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
845 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
846 
847 		/*
848 		 * Unregister bdi before releasing device numbers (as they can
849 		 * get reused and we'd get clashes in sysfs).
850 		 */
851 		bdi_unregister(disk->queue->backing_dev_info);
852 	}
853 
854 	blk_unregister_queue(disk);
855 
856 	kobject_put(disk->part0->bd_holder_dir);
857 	kobject_put(disk->slave_dir);
858 
859 	part_stat_set_all(disk->part0, 0);
860 	disk->part0->bd_stamp = 0;
861 	if (!sysfs_deprecated)
862 		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
863 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
864 	device_del(disk_to_dev(disk));
865 }
866 EXPORT_SYMBOL(del_gendisk);
867 
868 /* sysfs access to bad-blocks list. */
869 static ssize_t disk_badblocks_show(struct device *dev,
870 					struct device_attribute *attr,
871 					char *page)
872 {
873 	struct gendisk *disk = dev_to_disk(dev);
874 
875 	if (!disk->bb)
876 		return sprintf(page, "\n");
877 
878 	return badblocks_show(disk->bb, page, 0);
879 }
880 
881 static ssize_t disk_badblocks_store(struct device *dev,
882 					struct device_attribute *attr,
883 					const char *page, size_t len)
884 {
885 	struct gendisk *disk = dev_to_disk(dev);
886 
887 	if (!disk->bb)
888 		return -ENXIO;
889 
890 	return badblocks_store(disk->bb, page, len, 0);
891 }
892 
893 void blk_request_module(dev_t devt)
894 {
895 	unsigned int major = MAJOR(devt);
896 	struct blk_major_name **n;
897 
898 	mutex_lock(&major_names_lock);
899 	for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
900 		if ((*n)->major == major && (*n)->probe) {
901 			(*n)->probe(devt);
902 			mutex_unlock(&major_names_lock);
903 			return;
904 		}
905 	}
906 	mutex_unlock(&major_names_lock);
907 
908 	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
909 		/* Make old-style 2.4 aliases work */
910 		request_module("block-major-%d", MAJOR(devt));
911 }
912 
913 /**
914  * bdget_disk - do bdget() by gendisk and partition number
915  * @disk: gendisk of interest
916  * @partno: partition number
917  *
918  * Find partition @partno from @disk, do bdget() on it.
919  *
920  * CONTEXT:
921  * Don't care.
922  *
923  * RETURNS:
924  * Resulting block_device on success, NULL on failure.
925  */
926 struct block_device *bdget_disk(struct gendisk *disk, int partno)
927 {
928 	struct block_device *bdev = NULL;
929 
930 	rcu_read_lock();
931 	bdev = __disk_get_part(disk, partno);
932 	if (bdev && !bdgrab(bdev))
933 		bdev = NULL;
934 	rcu_read_unlock();
935 
936 	return bdev;
937 }
938 
939 /*
940  * print a full list of all partitions - intended for places where the root
941  * filesystem can't be mounted and thus to give the victim some idea of what
942  * went wrong
943  */
944 void __init printk_all_partitions(void)
945 {
946 	struct class_dev_iter iter;
947 	struct device *dev;
948 
949 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
950 	while ((dev = class_dev_iter_next(&iter))) {
951 		struct gendisk *disk = dev_to_disk(dev);
952 		struct disk_part_iter piter;
953 		struct block_device *part;
954 		char name_buf[BDEVNAME_SIZE];
955 		char devt_buf[BDEVT_SIZE];
956 
957 		/*
958 		 * Don't show empty devices or things that have been
959 		 * suppressed
960 		 */
961 		if (get_capacity(disk) == 0 ||
962 		    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
963 			continue;
964 
965 		/*
966 		 * Note, unlike /proc/partitions, I am showing the
967 		 * numbers in hex - the same format as the root=
968 		 * option takes.
969 		 */
970 		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
971 		while ((part = disk_part_iter_next(&piter))) {
972 			bool is_part0 = part == disk->part0;
973 
974 			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
975 			       bdevt_str(part->bd_dev, devt_buf),
976 			       bdev_nr_sectors(part) >> 1,
977 			       disk_name(disk, part->bd_partno, name_buf),
978 			       part->bd_meta_info ?
979 					part->bd_meta_info->uuid : "");
980 			if (is_part0) {
981 				if (dev->parent && dev->parent->driver)
982 					printk(" driver: %s\n",
983 					      dev->parent->driver->name);
984 				else
985 					printk(" (driver?)\n");
986 			} else
987 				printk("\n");
988 		}
989 		disk_part_iter_exit(&piter);
990 	}
991 	class_dev_iter_exit(&iter);
992 }
993 
994 #ifdef CONFIG_PROC_FS
995 /* iterator */
996 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
997 {
998 	loff_t skip = *pos;
999 	struct class_dev_iter *iter;
1000 	struct device *dev;
1001 
1002 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1003 	if (!iter)
1004 		return ERR_PTR(-ENOMEM);
1005 
1006 	seqf->private = iter;
1007 	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
1008 	do {
1009 		dev = class_dev_iter_next(iter);
1010 		if (!dev)
1011 			return NULL;
1012 	} while (skip--);
1013 
1014 	return dev_to_disk(dev);
1015 }
1016 
1017 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
1018 {
1019 	struct device *dev;
1020 
1021 	(*pos)++;
1022 	dev = class_dev_iter_next(seqf->private);
1023 	if (dev)
1024 		return dev_to_disk(dev);
1025 
1026 	return NULL;
1027 }
1028 
1029 static void disk_seqf_stop(struct seq_file *seqf, void *v)
1030 {
1031 	struct class_dev_iter *iter = seqf->private;
1032 
1033 	/* stop is called even after start failed :-( */
1034 	if (iter) {
1035 		class_dev_iter_exit(iter);
1036 		kfree(iter);
1037 		seqf->private = NULL;
1038 	}
1039 }
1040 
1041 static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
1042 {
1043 	void *p;
1044 
1045 	p = disk_seqf_start(seqf, pos);
1046 	if (!IS_ERR_OR_NULL(p) && !*pos)
1047 		seq_puts(seqf, "major minor  #blocks  name\n\n");
1048 	return p;
1049 }
1050 
1051 static int show_partition(struct seq_file *seqf, void *v)
1052 {
1053 	struct gendisk *sgp = v;
1054 	struct disk_part_iter piter;
1055 	struct block_device *part;
1056 	char buf[BDEVNAME_SIZE];
1057 
1058 	/* Don't show non-partitionable removeable devices or empty devices */
1059 	if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
1060 				   (sgp->flags & GENHD_FL_REMOVABLE)))
1061 		return 0;
1062 	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
1063 		return 0;
1064 
1065 	/* show the full disk and all non-0 size partitions of it */
1066 	disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
1067 	while ((part = disk_part_iter_next(&piter)))
1068 		seq_printf(seqf, "%4d  %7d %10llu %s\n",
1069 			   MAJOR(part->bd_dev), MINOR(part->bd_dev),
1070 			   bdev_nr_sectors(part) >> 1,
1071 			   disk_name(sgp, part->bd_partno, buf));
1072 	disk_part_iter_exit(&piter);
1073 
1074 	return 0;
1075 }
1076 
1077 static const struct seq_operations partitions_op = {
1078 	.start	= show_partition_start,
1079 	.next	= disk_seqf_next,
1080 	.stop	= disk_seqf_stop,
1081 	.show	= show_partition
1082 };
1083 #endif
1084 
1085 static int __init genhd_device_init(void)
1086 {
1087 	int error;
1088 
1089 	block_class.dev_kobj = sysfs_dev_block_kobj;
1090 	error = class_register(&block_class);
1091 	if (unlikely(error))
1092 		return error;
1093 	blk_dev_init();
1094 
1095 	register_blkdev(BLOCK_EXT_MAJOR, "blkext");
1096 
1097 	/* create top-level block dir */
1098 	if (!sysfs_deprecated)
1099 		block_depr = kobject_create_and_add("block", NULL);
1100 	return 0;
1101 }
1102 
1103 subsys_initcall(genhd_device_init);
1104 
1105 static ssize_t disk_range_show(struct device *dev,
1106 			       struct device_attribute *attr, char *buf)
1107 {
1108 	struct gendisk *disk = dev_to_disk(dev);
1109 
1110 	return sprintf(buf, "%d\n", disk->minors);
1111 }
1112 
1113 static ssize_t disk_ext_range_show(struct device *dev,
1114 				   struct device_attribute *attr, char *buf)
1115 {
1116 	struct gendisk *disk = dev_to_disk(dev);
1117 
1118 	return sprintf(buf, "%d\n", disk_max_parts(disk));
1119 }
1120 
1121 static ssize_t disk_removable_show(struct device *dev,
1122 				   struct device_attribute *attr, char *buf)
1123 {
1124 	struct gendisk *disk = dev_to_disk(dev);
1125 
1126 	return sprintf(buf, "%d\n",
1127 		       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
1128 }
1129 
1130 static ssize_t disk_hidden_show(struct device *dev,
1131 				   struct device_attribute *attr, char *buf)
1132 {
1133 	struct gendisk *disk = dev_to_disk(dev);
1134 
1135 	return sprintf(buf, "%d\n",
1136 		       (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
1137 }
1138 
1139 static ssize_t disk_ro_show(struct device *dev,
1140 				   struct device_attribute *attr, char *buf)
1141 {
1142 	struct gendisk *disk = dev_to_disk(dev);
1143 
1144 	return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
1145 }
1146 
1147 ssize_t part_size_show(struct device *dev,
1148 		       struct device_attribute *attr, char *buf)
1149 {
1150 	return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
1151 }
1152 
1153 ssize_t part_stat_show(struct device *dev,
1154 		       struct device_attribute *attr, char *buf)
1155 {
1156 	struct block_device *bdev = dev_to_bdev(dev);
1157 	struct request_queue *q = bdev->bd_disk->queue;
1158 	struct disk_stats stat;
1159 	unsigned int inflight;
1160 
1161 	part_stat_read_all(bdev, &stat);
1162 	if (queue_is_mq(q))
1163 		inflight = blk_mq_in_flight(q, bdev);
1164 	else
1165 		inflight = part_in_flight(bdev);
1166 
1167 	return sprintf(buf,
1168 		"%8lu %8lu %8llu %8u "
1169 		"%8lu %8lu %8llu %8u "
1170 		"%8u %8u %8u "
1171 		"%8lu %8lu %8llu %8u "
1172 		"%8lu %8u"
1173 		"\n",
1174 		stat.ios[STAT_READ],
1175 		stat.merges[STAT_READ],
1176 		(unsigned long long)stat.sectors[STAT_READ],
1177 		(unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
1178 		stat.ios[STAT_WRITE],
1179 		stat.merges[STAT_WRITE],
1180 		(unsigned long long)stat.sectors[STAT_WRITE],
1181 		(unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
1182 		inflight,
1183 		jiffies_to_msecs(stat.io_ticks),
1184 		(unsigned int)div_u64(stat.nsecs[STAT_READ] +
1185 				      stat.nsecs[STAT_WRITE] +
1186 				      stat.nsecs[STAT_DISCARD] +
1187 				      stat.nsecs[STAT_FLUSH],
1188 						NSEC_PER_MSEC),
1189 		stat.ios[STAT_DISCARD],
1190 		stat.merges[STAT_DISCARD],
1191 		(unsigned long long)stat.sectors[STAT_DISCARD],
1192 		(unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
1193 		stat.ios[STAT_FLUSH],
1194 		(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
1195 }
1196 
1197 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
1198 			   char *buf)
1199 {
1200 	struct block_device *bdev = dev_to_bdev(dev);
1201 	struct request_queue *q = bdev->bd_disk->queue;
1202 	unsigned int inflight[2];
1203 
1204 	if (queue_is_mq(q))
1205 		blk_mq_in_flight_rw(q, bdev, inflight);
1206 	else
1207 		part_in_flight_rw(bdev, inflight);
1208 
1209 	return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
1210 }
1211 
1212 static ssize_t disk_capability_show(struct device *dev,
1213 				    struct device_attribute *attr, char *buf)
1214 {
1215 	struct gendisk *disk = dev_to_disk(dev);
1216 
1217 	return sprintf(buf, "%x\n", disk->flags);
1218 }
1219 
1220 static ssize_t disk_alignment_offset_show(struct device *dev,
1221 					  struct device_attribute *attr,
1222 					  char *buf)
1223 {
1224 	struct gendisk *disk = dev_to_disk(dev);
1225 
1226 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
1227 }
1228 
1229 static ssize_t disk_discard_alignment_show(struct device *dev,
1230 					   struct device_attribute *attr,
1231 					   char *buf)
1232 {
1233 	struct gendisk *disk = dev_to_disk(dev);
1234 
1235 	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
1236 }
1237 
1238 static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
1239 static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
1240 static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
1241 static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
1242 static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
1243 static DEVICE_ATTR(size, 0444, part_size_show, NULL);
1244 static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
1245 static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
1246 static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
1247 static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
1248 static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
1249 static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
1250 
1251 #ifdef CONFIG_FAIL_MAKE_REQUEST
1252 ssize_t part_fail_show(struct device *dev,
1253 		       struct device_attribute *attr, char *buf)
1254 {
1255 	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
1256 }
1257 
1258 ssize_t part_fail_store(struct device *dev,
1259 			struct device_attribute *attr,
1260 			const char *buf, size_t count)
1261 {
1262 	int i;
1263 
1264 	if (count > 0 && sscanf(buf, "%d", &i) > 0)
1265 		dev_to_bdev(dev)->bd_make_it_fail = i;
1266 
1267 	return count;
1268 }
1269 
1270 static struct device_attribute dev_attr_fail =
1271 	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
1272 #endif /* CONFIG_FAIL_MAKE_REQUEST */
1273 
1274 #ifdef CONFIG_FAIL_IO_TIMEOUT
1275 static struct device_attribute dev_attr_fail_timeout =
1276 	__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
1277 #endif
1278 
1279 static struct attribute *disk_attrs[] = {
1280 	&dev_attr_range.attr,
1281 	&dev_attr_ext_range.attr,
1282 	&dev_attr_removable.attr,
1283 	&dev_attr_hidden.attr,
1284 	&dev_attr_ro.attr,
1285 	&dev_attr_size.attr,
1286 	&dev_attr_alignment_offset.attr,
1287 	&dev_attr_discard_alignment.attr,
1288 	&dev_attr_capability.attr,
1289 	&dev_attr_stat.attr,
1290 	&dev_attr_inflight.attr,
1291 	&dev_attr_badblocks.attr,
1292 #ifdef CONFIG_FAIL_MAKE_REQUEST
1293 	&dev_attr_fail.attr,
1294 #endif
1295 #ifdef CONFIG_FAIL_IO_TIMEOUT
1296 	&dev_attr_fail_timeout.attr,
1297 #endif
1298 	NULL
1299 };
1300 
1301 static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
1302 {
1303 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
1304 	struct gendisk *disk = dev_to_disk(dev);
1305 
1306 	if (a == &dev_attr_badblocks.attr && !disk->bb)
1307 		return 0;
1308 	return a->mode;
1309 }
1310 
1311 static struct attribute_group disk_attr_group = {
1312 	.attrs = disk_attrs,
1313 	.is_visible = disk_visible,
1314 };
1315 
1316 static const struct attribute_group *disk_attr_groups[] = {
1317 	&disk_attr_group,
1318 	NULL
1319 };
1320 
1321 /**
1322  * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
1323  * @disk: disk to replace part_tbl for
1324  * @new_ptbl: new part_tbl to install
1325  *
1326  * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
1327  * original ptbl is freed using RCU callback.
1328  *
1329  * LOCKING:
1330  * Matching bd_mutex locked or the caller is the only user of @disk.
1331  */
1332 static void disk_replace_part_tbl(struct gendisk *disk,
1333 				  struct disk_part_tbl *new_ptbl)
1334 {
1335 	struct disk_part_tbl *old_ptbl =
1336 		rcu_dereference_protected(disk->part_tbl, 1);
1337 
1338 	rcu_assign_pointer(disk->part_tbl, new_ptbl);
1339 
1340 	if (old_ptbl) {
1341 		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
1342 		kfree_rcu(old_ptbl, rcu_head);
1343 	}
1344 }
1345 
1346 /**
1347  * disk_expand_part_tbl - expand disk->part_tbl
1348  * @disk: disk to expand part_tbl for
1349  * @partno: expand such that this partno can fit in
1350  *
1351  * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
1352  * uses RCU to allow unlocked dereferencing for stats and other stuff.
1353  *
1354  * LOCKING:
1355  * Matching bd_mutex locked or the caller is the only user of @disk.
1356  * Might sleep.
1357  *
1358  * RETURNS:
1359  * 0 on success, -errno on failure.
1360  */
1361 int disk_expand_part_tbl(struct gendisk *disk, int partno)
1362 {
1363 	struct disk_part_tbl *old_ptbl =
1364 		rcu_dereference_protected(disk->part_tbl, 1);
1365 	struct disk_part_tbl *new_ptbl;
1366 	int len = old_ptbl ? old_ptbl->len : 0;
1367 	int i, target;
1368 
1369 	/*
1370 	 * check for int overflow, since we can get here from blkpg_ioctl()
1371 	 * with a user passed 'partno'.
1372 	 */
1373 	target = partno + 1;
1374 	if (target < 0)
1375 		return -EINVAL;
1376 
1377 	/* disk_max_parts() is zero during initialization, ignore if so */
1378 	if (disk_max_parts(disk) && target > disk_max_parts(disk))
1379 		return -EINVAL;
1380 
1381 	if (target <= len)
1382 		return 0;
1383 
1384 	new_ptbl = kzalloc_node(struct_size(new_ptbl, part, target), GFP_KERNEL,
1385 				disk->node_id);
1386 	if (!new_ptbl)
1387 		return -ENOMEM;
1388 
1389 	new_ptbl->len = target;
1390 
1391 	for (i = 0; i < len; i++)
1392 		rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
1393 
1394 	disk_replace_part_tbl(disk, new_ptbl);
1395 	return 0;
1396 }
1397 
1398 /**
1399  * disk_release - releases all allocated resources of the gendisk
1400  * @dev: the device representing this disk
1401  *
1402  * This function releases all allocated resources of the gendisk.
1403  *
1404  * Drivers which used __device_add_disk() have a gendisk with a request_queue
1405  * assigned. Since the request_queue sits on top of the gendisk for these
1406  * drivers we also call blk_put_queue() for them, and we expect the
1407  * request_queue refcount to reach 0 at this point, and so the request_queue
1408  * will also be freed prior to the disk.
1409  *
1410  * Context: can sleep
1411  */
1412 static void disk_release(struct device *dev)
1413 {
1414 	struct gendisk *disk = dev_to_disk(dev);
1415 
1416 	might_sleep();
1417 
1418 	blk_free_devt(dev->devt);
1419 	disk_release_events(disk);
1420 	kfree(disk->random);
1421 	disk_replace_part_tbl(disk, NULL);
1422 	bdput(disk->part0);
1423 	if (disk->queue)
1424 		blk_put_queue(disk->queue);
1425 	kfree(disk);
1426 }
1427 struct class block_class = {
1428 	.name		= "block",
1429 };
1430 
1431 static char *block_devnode(struct device *dev, umode_t *mode,
1432 			   kuid_t *uid, kgid_t *gid)
1433 {
1434 	struct gendisk *disk = dev_to_disk(dev);
1435 
1436 	if (disk->fops->devnode)
1437 		return disk->fops->devnode(disk, mode);
1438 	return NULL;
1439 }
1440 
1441 const struct device_type disk_type = {
1442 	.name		= "disk",
1443 	.groups		= disk_attr_groups,
1444 	.release	= disk_release,
1445 	.devnode	= block_devnode,
1446 };
1447 
1448 #ifdef CONFIG_PROC_FS
1449 /*
1450  * aggregate disk stat collector.  Uses the same stats that the sysfs
1451  * entries do, above, but makes them available through one seq_file.
1452  *
1453  * The output looks suspiciously like /proc/partitions with a bunch of
1454  * extra fields.
1455  */
1456 static int diskstats_show(struct seq_file *seqf, void *v)
1457 {
1458 	struct gendisk *gp = v;
1459 	struct disk_part_iter piter;
1460 	struct block_device *hd;
1461 	char buf[BDEVNAME_SIZE];
1462 	unsigned int inflight;
1463 	struct disk_stats stat;
1464 
1465 	/*
1466 	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1467 		seq_puts(seqf,	"major minor name"
1468 				"     rio rmerge rsect ruse wio wmerge "
1469 				"wsect wuse running use aveq"
1470 				"\n\n");
1471 	*/
1472 
1473 	disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1474 	while ((hd = disk_part_iter_next(&piter))) {
1475 		part_stat_read_all(hd, &stat);
1476 		if (queue_is_mq(gp->queue))
1477 			inflight = blk_mq_in_flight(gp->queue, hd);
1478 		else
1479 			inflight = part_in_flight(hd);
1480 
1481 		seq_printf(seqf, "%4d %7d %s "
1482 			   "%lu %lu %lu %u "
1483 			   "%lu %lu %lu %u "
1484 			   "%u %u %u "
1485 			   "%lu %lu %lu %u "
1486 			   "%lu %u"
1487 			   "\n",
1488 			   MAJOR(hd->bd_dev), MINOR(hd->bd_dev),
1489 			   disk_name(gp, hd->bd_partno, buf),
1490 			   stat.ios[STAT_READ],
1491 			   stat.merges[STAT_READ],
1492 			   stat.sectors[STAT_READ],
1493 			   (unsigned int)div_u64(stat.nsecs[STAT_READ],
1494 							NSEC_PER_MSEC),
1495 			   stat.ios[STAT_WRITE],
1496 			   stat.merges[STAT_WRITE],
1497 			   stat.sectors[STAT_WRITE],
1498 			   (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
1499 							NSEC_PER_MSEC),
1500 			   inflight,
1501 			   jiffies_to_msecs(stat.io_ticks),
1502 			   (unsigned int)div_u64(stat.nsecs[STAT_READ] +
1503 						 stat.nsecs[STAT_WRITE] +
1504 						 stat.nsecs[STAT_DISCARD] +
1505 						 stat.nsecs[STAT_FLUSH],
1506 							NSEC_PER_MSEC),
1507 			   stat.ios[STAT_DISCARD],
1508 			   stat.merges[STAT_DISCARD],
1509 			   stat.sectors[STAT_DISCARD],
1510 			   (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
1511 						 NSEC_PER_MSEC),
1512 			   stat.ios[STAT_FLUSH],
1513 			   (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
1514 						 NSEC_PER_MSEC)
1515 			);
1516 	}
1517 	disk_part_iter_exit(&piter);
1518 
1519 	return 0;
1520 }
1521 
1522 static const struct seq_operations diskstats_op = {
1523 	.start	= disk_seqf_start,
1524 	.next	= disk_seqf_next,
1525 	.stop	= disk_seqf_stop,
1526 	.show	= diskstats_show
1527 };
1528 
1529 static int __init proc_genhd_init(void)
1530 {
1531 	proc_create_seq("diskstats", 0, NULL, &diskstats_op);
1532 	proc_create_seq("partitions", 0, NULL, &partitions_op);
1533 	return 0;
1534 }
1535 module_init(proc_genhd_init);
1536 #endif /* CONFIG_PROC_FS */
1537 
1538 dev_t blk_lookup_devt(const char *name, int partno)
1539 {
1540 	dev_t devt = MKDEV(0, 0);
1541 	struct class_dev_iter iter;
1542 	struct device *dev;
1543 
1544 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1545 	while ((dev = class_dev_iter_next(&iter))) {
1546 		struct gendisk *disk = dev_to_disk(dev);
1547 		struct block_device *part;
1548 
1549 		if (strcmp(dev_name(dev), name))
1550 			continue;
1551 
1552 		if (partno < disk->minors) {
1553 			/* We need to return the right devno, even
1554 			 * if the partition doesn't exist yet.
1555 			 */
1556 			devt = MKDEV(MAJOR(dev->devt),
1557 				     MINOR(dev->devt) + partno);
1558 			break;
1559 		}
1560 		part = bdget_disk(disk, partno);
1561 		if (part) {
1562 			devt = part->bd_dev;
1563 			bdput(part);
1564 			break;
1565 		}
1566 	}
1567 	class_dev_iter_exit(&iter);
1568 	return devt;
1569 }
1570 
1571 struct gendisk *__alloc_disk_node(int minors, int node_id)
1572 {
1573 	struct gendisk *disk;
1574 	struct disk_part_tbl *ptbl;
1575 
1576 	if (minors > DISK_MAX_PARTS) {
1577 		printk(KERN_ERR
1578 			"block: can't allocate more than %d partitions\n",
1579 			DISK_MAX_PARTS);
1580 		minors = DISK_MAX_PARTS;
1581 	}
1582 
1583 	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1584 	if (!disk)
1585 		return NULL;
1586 
1587 	disk->part0 = bdev_alloc(disk, 0);
1588 	if (!disk->part0)
1589 		goto out_free_disk;
1590 
1591 	disk->node_id = node_id;
1592 	if (disk_expand_part_tbl(disk, 0))
1593 		goto out_bdput;
1594 
1595 	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
1596 	rcu_assign_pointer(ptbl->part[0], disk->part0);
1597 
1598 	disk->minors = minors;
1599 	rand_initialize_disk(disk);
1600 	disk_to_dev(disk)->class = &block_class;
1601 	disk_to_dev(disk)->type = &disk_type;
1602 	device_initialize(disk_to_dev(disk));
1603 	return disk;
1604 
1605 out_bdput:
1606 	bdput(disk->part0);
1607 out_free_disk:
1608 	kfree(disk);
1609 	return NULL;
1610 }
1611 EXPORT_SYMBOL(__alloc_disk_node);
1612 
1613 /**
1614  * put_disk - decrements the gendisk refcount
1615  * @disk: the struct gendisk to decrement the refcount for
1616  *
1617  * This decrements the refcount for the struct gendisk. When this reaches 0
1618  * we'll have disk_release() called.
1619  *
1620  * Context: Any context, but the last reference must not be dropped from
1621  *          atomic context.
1622  */
1623 void put_disk(struct gendisk *disk)
1624 {
1625 	if (disk)
1626 		put_device(disk_to_dev(disk));
1627 }
1628 EXPORT_SYMBOL(put_disk);
1629 
1630 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1631 {
1632 	char event[] = "DISK_RO=1";
1633 	char *envp[] = { event, NULL };
1634 
1635 	if (!ro)
1636 		event[8] = '0';
1637 	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1638 }
1639 
1640 /**
1641  * set_disk_ro - set a gendisk read-only
1642  * @disk:	gendisk to operate on
1643  * @ready_only:	%true to set the disk read-only, %false set the disk read/write
1644  *
1645  * This function is used to indicate whether a given disk device should have its
1646  * read-only flag set. set_disk_ro() is typically used by device drivers to
1647  * indicate whether the underlying physical device is write-protected.
1648  */
1649 void set_disk_ro(struct gendisk *disk, bool read_only)
1650 {
1651 	if (read_only) {
1652 		if (test_and_set_bit(GD_READ_ONLY, &disk->state))
1653 			return;
1654 	} else {
1655 		if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
1656 			return;
1657 	}
1658 	set_disk_ro_uevent(disk, read_only);
1659 }
1660 EXPORT_SYMBOL(set_disk_ro);
1661 
1662 int bdev_read_only(struct block_device *bdev)
1663 {
1664 	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
1665 }
1666 EXPORT_SYMBOL(bdev_read_only);
1667 
1668 /*
1669  * Disk events - monitor disk events like media change and eject request.
1670  */
1671 struct disk_events {
1672 	struct list_head	node;		/* all disk_event's */
1673 	struct gendisk		*disk;		/* the associated disk */
1674 	spinlock_t		lock;
1675 
1676 	struct mutex		block_mutex;	/* protects blocking */
1677 	int			block;		/* event blocking depth */
1678 	unsigned int		pending;	/* events already sent out */
1679 	unsigned int		clearing;	/* events being cleared */
1680 
1681 	long			poll_msecs;	/* interval, -1 for default */
1682 	struct delayed_work	dwork;
1683 };
1684 
1685 static const char *disk_events_strs[] = {
1686 	[ilog2(DISK_EVENT_MEDIA_CHANGE)]	= "media_change",
1687 	[ilog2(DISK_EVENT_EJECT_REQUEST)]	= "eject_request",
1688 };
1689 
1690 static char *disk_uevents[] = {
1691 	[ilog2(DISK_EVENT_MEDIA_CHANGE)]	= "DISK_MEDIA_CHANGE=1",
1692 	[ilog2(DISK_EVENT_EJECT_REQUEST)]	= "DISK_EJECT_REQUEST=1",
1693 };
1694 
1695 /* list of all disk_events */
1696 static DEFINE_MUTEX(disk_events_mutex);
1697 static LIST_HEAD(disk_events);
1698 
1699 /* disable in-kernel polling by default */
1700 static unsigned long disk_events_dfl_poll_msecs;
1701 
1702 static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
1703 {
1704 	struct disk_events *ev = disk->ev;
1705 	long intv_msecs = 0;
1706 
1707 	/*
1708 	 * If device-specific poll interval is set, always use it.  If
1709 	 * the default is being used, poll if the POLL flag is set.
1710 	 */
1711 	if (ev->poll_msecs >= 0)
1712 		intv_msecs = ev->poll_msecs;
1713 	else if (disk->event_flags & DISK_EVENT_FLAG_POLL)
1714 		intv_msecs = disk_events_dfl_poll_msecs;
1715 
1716 	return msecs_to_jiffies(intv_msecs);
1717 }
1718 
1719 /**
1720  * disk_block_events - block and flush disk event checking
1721  * @disk: disk to block events for
1722  *
1723  * On return from this function, it is guaranteed that event checking
1724  * isn't in progress and won't happen until unblocked by
1725  * disk_unblock_events().  Events blocking is counted and the actual
1726  * unblocking happens after the matching number of unblocks are done.
1727  *
1728  * Note that this intentionally does not block event checking from
1729  * disk_clear_events().
1730  *
1731  * CONTEXT:
1732  * Might sleep.
1733  */
1734 void disk_block_events(struct gendisk *disk)
1735 {
1736 	struct disk_events *ev = disk->ev;
1737 	unsigned long flags;
1738 	bool cancel;
1739 
1740 	if (!ev)
1741 		return;
1742 
1743 	/*
1744 	 * Outer mutex ensures that the first blocker completes canceling
1745 	 * the event work before further blockers are allowed to finish.
1746 	 */
1747 	mutex_lock(&ev->block_mutex);
1748 
1749 	spin_lock_irqsave(&ev->lock, flags);
1750 	cancel = !ev->block++;
1751 	spin_unlock_irqrestore(&ev->lock, flags);
1752 
1753 	if (cancel)
1754 		cancel_delayed_work_sync(&disk->ev->dwork);
1755 
1756 	mutex_unlock(&ev->block_mutex);
1757 }
1758 
1759 static void __disk_unblock_events(struct gendisk *disk, bool check_now)
1760 {
1761 	struct disk_events *ev = disk->ev;
1762 	unsigned long intv;
1763 	unsigned long flags;
1764 
1765 	spin_lock_irqsave(&ev->lock, flags);
1766 
1767 	if (WARN_ON_ONCE(ev->block <= 0))
1768 		goto out_unlock;
1769 
1770 	if (--ev->block)
1771 		goto out_unlock;
1772 
1773 	intv = disk_events_poll_jiffies(disk);
1774 	if (check_now)
1775 		queue_delayed_work(system_freezable_power_efficient_wq,
1776 				&ev->dwork, 0);
1777 	else if (intv)
1778 		queue_delayed_work(system_freezable_power_efficient_wq,
1779 				&ev->dwork, intv);
1780 out_unlock:
1781 	spin_unlock_irqrestore(&ev->lock, flags);
1782 }
1783 
1784 /**
1785  * disk_unblock_events - unblock disk event checking
1786  * @disk: disk to unblock events for
1787  *
1788  * Undo disk_block_events().  When the block count reaches zero, it
1789  * starts events polling if configured.
1790  *
1791  * CONTEXT:
1792  * Don't care.  Safe to call from irq context.
1793  */
1794 void disk_unblock_events(struct gendisk *disk)
1795 {
1796 	if (disk->ev)
1797 		__disk_unblock_events(disk, false);
1798 }
1799 
1800 /**
1801  * disk_flush_events - schedule immediate event checking and flushing
1802  * @disk: disk to check and flush events for
1803  * @mask: events to flush
1804  *
1805  * Schedule immediate event checking on @disk if not blocked.  Events in
1806  * @mask are scheduled to be cleared from the driver.  Note that this
1807  * doesn't clear the events from @disk->ev.
1808  *
1809  * CONTEXT:
1810  * If @mask is non-zero must be called with bdev->bd_mutex held.
1811  */
1812 void disk_flush_events(struct gendisk *disk, unsigned int mask)
1813 {
1814 	struct disk_events *ev = disk->ev;
1815 
1816 	if (!ev)
1817 		return;
1818 
1819 	spin_lock_irq(&ev->lock);
1820 	ev->clearing |= mask;
1821 	if (!ev->block)
1822 		mod_delayed_work(system_freezable_power_efficient_wq,
1823 				&ev->dwork, 0);
1824 	spin_unlock_irq(&ev->lock);
1825 }
1826 
1827 /**
1828  * disk_clear_events - synchronously check, clear and return pending events
1829  * @disk: disk to fetch and clear events from
1830  * @mask: mask of events to be fetched and cleared
1831  *
1832  * Disk events are synchronously checked and pending events in @mask
1833  * are cleared and returned.  This ignores the block count.
1834  *
1835  * CONTEXT:
1836  * Might sleep.
1837  */
1838 static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
1839 {
1840 	struct disk_events *ev = disk->ev;
1841 	unsigned int pending;
1842 	unsigned int clearing = mask;
1843 
1844 	if (!ev)
1845 		return 0;
1846 
1847 	disk_block_events(disk);
1848 
1849 	/*
1850 	 * store the union of mask and ev->clearing on the stack so that the
1851 	 * race with disk_flush_events does not cause ambiguity (ev->clearing
1852 	 * can still be modified even if events are blocked).
1853 	 */
1854 	spin_lock_irq(&ev->lock);
1855 	clearing |= ev->clearing;
1856 	ev->clearing = 0;
1857 	spin_unlock_irq(&ev->lock);
1858 
1859 	disk_check_events(ev, &clearing);
1860 	/*
1861 	 * if ev->clearing is not 0, the disk_flush_events got called in the
1862 	 * middle of this function, so we want to run the workfn without delay.
1863 	 */
1864 	__disk_unblock_events(disk, ev->clearing ? true : false);
1865 
1866 	/* then, fetch and clear pending events */
1867 	spin_lock_irq(&ev->lock);
1868 	pending = ev->pending & mask;
1869 	ev->pending &= ~mask;
1870 	spin_unlock_irq(&ev->lock);
1871 	WARN_ON_ONCE(clearing & mask);
1872 
1873 	return pending;
1874 }
1875 
1876 /**
1877  * bdev_check_media_change - check if a removable media has been changed
1878  * @bdev: block device to check
1879  *
1880  * Check whether a removable media has been changed, and attempt to free all
1881  * dentries and inodes and invalidates all block device page cache entries in
1882  * that case.
1883  *
1884  * Returns %true if the block device changed, or %false if not.
1885  */
1886 bool bdev_check_media_change(struct block_device *bdev)
1887 {
1888 	unsigned int events;
1889 
1890 	events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
1891 				   DISK_EVENT_EJECT_REQUEST);
1892 	if (!(events & DISK_EVENT_MEDIA_CHANGE))
1893 		return false;
1894 
1895 	if (__invalidate_device(bdev, true))
1896 		pr_warn("VFS: busy inodes on changed media %s\n",
1897 			bdev->bd_disk->disk_name);
1898 	set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1899 	return true;
1900 }
1901 EXPORT_SYMBOL(bdev_check_media_change);
1902 
1903 /*
1904  * Separate this part out so that a different pointer for clearing_ptr can be
1905  * passed in for disk_clear_events.
1906  */
1907 static void disk_events_workfn(struct work_struct *work)
1908 {
1909 	struct delayed_work *dwork = to_delayed_work(work);
1910 	struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
1911 
1912 	disk_check_events(ev, &ev->clearing);
1913 }
1914 
1915 static void disk_check_events(struct disk_events *ev,
1916 			      unsigned int *clearing_ptr)
1917 {
1918 	struct gendisk *disk = ev->disk;
1919 	char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
1920 	unsigned int clearing = *clearing_ptr;
1921 	unsigned int events;
1922 	unsigned long intv;
1923 	int nr_events = 0, i;
1924 
1925 	/* check events */
1926 	events = disk->fops->check_events(disk, clearing);
1927 
1928 	/* accumulate pending events and schedule next poll if necessary */
1929 	spin_lock_irq(&ev->lock);
1930 
1931 	events &= ~ev->pending;
1932 	ev->pending |= events;
1933 	*clearing_ptr &= ~clearing;
1934 
1935 	intv = disk_events_poll_jiffies(disk);
1936 	if (!ev->block && intv)
1937 		queue_delayed_work(system_freezable_power_efficient_wq,
1938 				&ev->dwork, intv);
1939 
1940 	spin_unlock_irq(&ev->lock);
1941 
1942 	/*
1943 	 * Tell userland about new events.  Only the events listed in
1944 	 * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
1945 	 * is set. Otherwise, events are processed internally but never
1946 	 * get reported to userland.
1947 	 */
1948 	for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
1949 		if ((events & disk->events & (1 << i)) &&
1950 		    (disk->event_flags & DISK_EVENT_FLAG_UEVENT))
1951 			envp[nr_events++] = disk_uevents[i];
1952 
1953 	if (nr_events)
1954 		kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
1955 }
1956 
1957 /*
1958  * A disk events enabled device has the following sysfs nodes under
1959  * its /sys/block/X/ directory.
1960  *
1961  * events		: list of all supported events
1962  * events_async		: list of events which can be detected w/o polling
1963  *			  (always empty, only for backwards compatibility)
1964  * events_poll_msecs	: polling interval, 0: disable, -1: system default
1965  */
1966 static ssize_t __disk_events_show(unsigned int events, char *buf)
1967 {
1968 	const char *delim = "";
1969 	ssize_t pos = 0;
1970 	int i;
1971 
1972 	for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
1973 		if (events & (1 << i)) {
1974 			pos += sprintf(buf + pos, "%s%s",
1975 				       delim, disk_events_strs[i]);
1976 			delim = " ";
1977 		}
1978 	if (pos)
1979 		pos += sprintf(buf + pos, "\n");
1980 	return pos;
1981 }
1982 
1983 static ssize_t disk_events_show(struct device *dev,
1984 				struct device_attribute *attr, char *buf)
1985 {
1986 	struct gendisk *disk = dev_to_disk(dev);
1987 
1988 	if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
1989 		return 0;
1990 
1991 	return __disk_events_show(disk->events, buf);
1992 }
1993 
1994 static ssize_t disk_events_async_show(struct device *dev,
1995 				      struct device_attribute *attr, char *buf)
1996 {
1997 	return 0;
1998 }
1999 
2000 static ssize_t disk_events_poll_msecs_show(struct device *dev,
2001 					   struct device_attribute *attr,
2002 					   char *buf)
2003 {
2004 	struct gendisk *disk = dev_to_disk(dev);
2005 
2006 	if (!disk->ev)
2007 		return sprintf(buf, "-1\n");
2008 
2009 	return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
2010 }
2011 
2012 static ssize_t disk_events_poll_msecs_store(struct device *dev,
2013 					    struct device_attribute *attr,
2014 					    const char *buf, size_t count)
2015 {
2016 	struct gendisk *disk = dev_to_disk(dev);
2017 	long intv;
2018 
2019 	if (!count || !sscanf(buf, "%ld", &intv))
2020 		return -EINVAL;
2021 
2022 	if (intv < 0 && intv != -1)
2023 		return -EINVAL;
2024 
2025 	if (!disk->ev)
2026 		return -ENODEV;
2027 
2028 	disk_block_events(disk);
2029 	disk->ev->poll_msecs = intv;
2030 	__disk_unblock_events(disk, true);
2031 
2032 	return count;
2033 }
2034 
2035 static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
2036 static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
2037 static const DEVICE_ATTR(events_poll_msecs, 0644,
2038 			 disk_events_poll_msecs_show,
2039 			 disk_events_poll_msecs_store);
2040 
2041 static const struct attribute *disk_events_attrs[] = {
2042 	&dev_attr_events.attr,
2043 	&dev_attr_events_async.attr,
2044 	&dev_attr_events_poll_msecs.attr,
2045 	NULL,
2046 };
2047 
2048 /*
2049  * The default polling interval can be specified by the kernel
2050  * parameter block.events_dfl_poll_msecs which defaults to 0
2051  * (disable).  This can also be modified runtime by writing to
2052  * /sys/module/block/parameters/events_dfl_poll_msecs.
2053  */
2054 static int disk_events_set_dfl_poll_msecs(const char *val,
2055 					  const struct kernel_param *kp)
2056 {
2057 	struct disk_events *ev;
2058 	int ret;
2059 
2060 	ret = param_set_ulong(val, kp);
2061 	if (ret < 0)
2062 		return ret;
2063 
2064 	mutex_lock(&disk_events_mutex);
2065 
2066 	list_for_each_entry(ev, &disk_events, node)
2067 		disk_flush_events(ev->disk, 0);
2068 
2069 	mutex_unlock(&disk_events_mutex);
2070 
2071 	return 0;
2072 }
2073 
2074 static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
2075 	.set	= disk_events_set_dfl_poll_msecs,
2076 	.get	= param_get_ulong,
2077 };
2078 
2079 #undef MODULE_PARAM_PREFIX
2080 #define MODULE_PARAM_PREFIX	"block."
2081 
2082 module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
2083 		&disk_events_dfl_poll_msecs, 0644);
2084 
2085 /*
2086  * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
2087  */
2088 static void disk_alloc_events(struct gendisk *disk)
2089 {
2090 	struct disk_events *ev;
2091 
2092 	if (!disk->fops->check_events || !disk->events)
2093 		return;
2094 
2095 	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
2096 	if (!ev) {
2097 		pr_warn("%s: failed to initialize events\n", disk->disk_name);
2098 		return;
2099 	}
2100 
2101 	INIT_LIST_HEAD(&ev->node);
2102 	ev->disk = disk;
2103 	spin_lock_init(&ev->lock);
2104 	mutex_init(&ev->block_mutex);
2105 	ev->block = 1;
2106 	ev->poll_msecs = -1;
2107 	INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
2108 
2109 	disk->ev = ev;
2110 }
2111 
2112 static void disk_add_events(struct gendisk *disk)
2113 {
2114 	/* FIXME: error handling */
2115 	if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
2116 		pr_warn("%s: failed to create sysfs files for events\n",
2117 			disk->disk_name);
2118 
2119 	if (!disk->ev)
2120 		return;
2121 
2122 	mutex_lock(&disk_events_mutex);
2123 	list_add_tail(&disk->ev->node, &disk_events);
2124 	mutex_unlock(&disk_events_mutex);
2125 
2126 	/*
2127 	 * Block count is initialized to 1 and the following initial
2128 	 * unblock kicks it into action.
2129 	 */
2130 	__disk_unblock_events(disk, true);
2131 }
2132 
2133 static void disk_del_events(struct gendisk *disk)
2134 {
2135 	if (disk->ev) {
2136 		disk_block_events(disk);
2137 
2138 		mutex_lock(&disk_events_mutex);
2139 		list_del_init(&disk->ev->node);
2140 		mutex_unlock(&disk_events_mutex);
2141 	}
2142 
2143 	sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
2144 }
2145 
2146 static void disk_release_events(struct gendisk *disk)
2147 {
2148 	/* the block count should be 1 from disk_del_events() */
2149 	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
2150 	kfree(disk->ev);
2151 }
2152