1/*
2 * linux/fs/inode.c
3 *
4 * (C) 1997 Linus Torvalds
5 */
6
7#include <linux/config.h>
8#include <linux/fs.h>
9#include <linux/mm.h>
10#include <linux/dcache.h>
11#include <linux/init.h>
12#include <linux/quotaops.h>
13#include <linux/slab.h>
14#include <linux/writeback.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/wait.h>
18#include <linux/hash.h>
19#include <linux/swap.h>
20#include <linux/security.h>
21
22/*
23 * This is needed for the following functions:
24 *  - inode_has_buffers
25 *  - invalidate_inode_buffers
26 *  - fsync_bdev
27 *  - invalidate_bdev
28 *
29 * FIXME: remove all knowledge of the buffer layer from this file
30 */
31#include <linux/buffer_head.h>
32
33/*
34 * New inode.c implementation.
35 *
36 * This implementation has the basic premise of trying
37 * to be extremely low-overhead and SMP-safe, yet be
38 * simple enough to be "obviously correct".
39 *
40 * Famous last words.
41 */
42
43/* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
44
45/* #define INODE_PARANOIA 1 */
46/* #define INODE_DEBUG 1 */
47
48/*
49 * Inode lookup is no longer as critical as it used to be:
50 * most of the lookups are going to be through the dcache.
51 */
52#define I_HASHBITS	i_hash_shift
53#define I_HASHMASK	i_hash_mask
54
55static unsigned int i_hash_mask;
56static unsigned int i_hash_shift;
57
58/*
59 * Each inode can be on two separate lists. One is
60 * the hash list of the inode, used for lookups. The
61 * other linked list is the "type" list:
62 *  "in_use" - valid inode, i_count > 0, i_nlink > 0
63 *  "dirty"  - as "in_use" but also dirty
64 *  "unused" - valid inode, i_count = 0
65 *
66 * A "dirty" list is maintained for each super block,
67 * allowing for low-overhead inode sync() operations.
68 */
69
70LIST_HEAD(inode_in_use);
71LIST_HEAD(inode_unused);
72static struct hlist_head *inode_hashtable;
73static HLIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */
74
75/*
76 * A simple spinlock to protect the list manipulations.
77 *
78 * NOTE! You also have to own the lock if you change
79 * the i_state of an inode while it is in use..
80 */
81spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
82
83/*
84 * iprune_sem provides exclusion between the kswapd or try_to_free_pages
85 * icache shrinking path, and the umount path.  Without this exclusion,
86 * by the time prune_icache calls iput for the inode whose pages it has
87 * been invalidating, or by the time it calls clear_inode & destroy_inode
88 * from its final dispose_list, the struct super_block they refer to
89 * (for inode->i_sb->s_op) may already have been freed and reused.
90 */
91static DECLARE_MUTEX(iprune_sem);
92
93/*
94 * Statistics gathering..
95 */
96struct inodes_stat_t inodes_stat;
97
98static kmem_cache_t * inode_cachep;
99
100static struct inode *alloc_inode(struct super_block *sb)
101{
102	static struct address_space_operations empty_aops;
103	static struct inode_operations empty_iops;
104	static struct file_operations empty_fops;
105	struct inode *inode;
106
107	if (sb->s_op->alloc_inode)
108		inode = sb->s_op->alloc_inode(sb);
109	else
110		inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
111
112	if (inode) {
113		struct address_space * const mapping = &inode->i_data;
114
115		inode->i_sb = sb;
116		inode->i_blkbits = sb->s_blocksize_bits;
117		inode->i_flags = 0;
118		atomic_set(&inode->i_count, 1);
119		inode->i_sock = 0;
120		inode->i_op = &empty_iops;
121		inode->i_fop = &empty_fops;
122		inode->i_nlink = 1;
123		atomic_set(&inode->i_writecount, 0);
124		inode->i_size = 0;
125		inode->i_blocks = 0;
126		inode->i_bytes = 0;
127		inode->i_generation = 0;
128		memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
129		inode->i_pipe = NULL;
130		inode->i_bdev = NULL;
131		inode->i_rdev = to_kdev_t(0);
132		inode->i_security = NULL;
133		if (security_inode_alloc(inode)) {
134			if (inode->i_sb->s_op->destroy_inode)
135				inode->i_sb->s_op->destroy_inode(inode);
136			else
137				kmem_cache_free(inode_cachep, (inode));
138			return NULL;
139		}
140
141		mapping->a_ops = &empty_aops;
142 		mapping->host = inode;
143		mapping->gfp_mask = GFP_HIGHUSER;
144		mapping->dirtied_when = 0;
145		mapping->assoc_mapping = NULL;
146		mapping->backing_dev_info = &default_backing_dev_info;
147		if (sb->s_bdev)
148			mapping->backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
149		memset(&inode->u, 0, sizeof(inode->u));
150		inode->i_mapping = mapping;
151	}
152	return inode;
153}
154
155void destroy_inode(struct inode *inode)
156{
157	if (inode_has_buffers(inode))
158		BUG();
159	security_inode_free(inode);
160	if (inode->i_sb->s_op->destroy_inode)
161		inode->i_sb->s_op->destroy_inode(inode);
162	else
163		kmem_cache_free(inode_cachep, (inode));
164}
165
166
167/*
168 * These are initializations that only need to be done
169 * once, because the fields are idempotent across use
170 * of the inode, so let the slab aware of that.
171 */
172void inode_init_once(struct inode *inode)
173{
174	memset(inode, 0, sizeof(*inode));
175	INIT_HLIST_NODE(&inode->i_hash);
176	INIT_LIST_HEAD(&inode->i_data.clean_pages);
177	INIT_LIST_HEAD(&inode->i_data.dirty_pages);
178	INIT_LIST_HEAD(&inode->i_data.locked_pages);
179	INIT_LIST_HEAD(&inode->i_data.io_pages);
180	INIT_LIST_HEAD(&inode->i_dentry);
181	INIT_LIST_HEAD(&inode->i_devices);
182	sema_init(&inode->i_sem, 1);
183	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
184	rwlock_init(&inode->i_data.page_lock);
185	init_MUTEX(&inode->i_data.i_shared_sem);
186	INIT_LIST_HEAD(&inode->i_data.private_list);
187	spin_lock_init(&inode->i_data.private_lock);
188	INIT_LIST_HEAD(&inode->i_data.i_mmap);
189	INIT_LIST_HEAD(&inode->i_data.i_mmap_shared);
190	spin_lock_init(&inode->i_lock);
191}
192
193static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
194{
195	struct inode * inode = (struct inode *) foo;
196
197	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
198	    SLAB_CTOR_CONSTRUCTOR)
199		inode_init_once(inode);
200}
201
202/*
203 * inode_lock must be held
204 */
205void __iget(struct inode * inode)
206{
207	if (atomic_read(&inode->i_count)) {
208		atomic_inc(&inode->i_count);
209		return;
210	}
211	atomic_inc(&inode->i_count);
212	if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
213		list_del(&inode->i_list);
214		list_add(&inode->i_list, &inode_in_use);
215	}
216	inodes_stat.nr_unused--;
217}
218
219/**
220 * clear_inode - clear an inode
221 * @inode: inode to clear
222 *
223 * This is called by the filesystem to tell us
224 * that the inode is no longer useful. We just
225 * terminate it with extreme prejudice.
226 */
227
228void clear_inode(struct inode *inode)
229{
230	invalidate_inode_buffers(inode);
231
232	if (inode->i_data.nrpages)
233		BUG();
234	if (!(inode->i_state & I_FREEING))
235		BUG();
236	if (inode->i_state & I_CLEAR)
237		BUG();
238	wait_on_inode(inode);
239	DQUOT_DROP(inode);
240	if (inode->i_sb && inode->i_sb->s_op->clear_inode)
241		inode->i_sb->s_op->clear_inode(inode);
242	if (inode->i_bdev)
243		bd_forget(inode);
244	inode->i_state = I_CLEAR;
245}
246
247/*
248 * Dispose-list gets a local list with local inodes in it, so it doesn't
249 * need to worry about list corruption and SMP locks.
250 */
251static void dispose_list(struct list_head *head)
252{
253	int nr_disposed = 0;
254
255	while (!list_empty(head)) {
256		struct inode *inode;
257
258		inode = list_entry(head->next, struct inode, i_list);
259		list_del(&inode->i_list);
260
261		if (inode->i_data.nrpages)
262			truncate_inode_pages(&inode->i_data, 0);
263		clear_inode(inode);
264		destroy_inode(inode);
265		nr_disposed++;
266	}
267	spin_lock(&inode_lock);
268	inodes_stat.nr_inodes -= nr_disposed;
269	spin_unlock(&inode_lock);
270}
271
272/*
273 * Invalidate all inodes for a device.
274 */
275static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
276{
277	struct list_head *next;
278	int busy = 0, count = 0;
279
280	next = head->next;
281	for (;;) {
282		struct list_head * tmp = next;
283		struct inode * inode;
284
285		next = next->next;
286		if (tmp == head)
287			break;
288		inode = list_entry(tmp, struct inode, i_list);
289		if (inode->i_sb != sb)
290			continue;
291		invalidate_inode_buffers(inode);
292		if (!atomic_read(&inode->i_count)) {
293			hlist_del_init(&inode->i_hash);
294			list_del(&inode->i_list);
295			list_add(&inode->i_list, dispose);
296			inode->i_state |= I_FREEING;
297			count++;
298			continue;
299		}
300		busy = 1;
301	}
302	/* only unused inodes may be cached with i_count zero */
303	inodes_stat.nr_unused -= count;
304	return busy;
305}
306
307/*
308 * This is a two-stage process. First we collect all
309 * offending inodes onto the throw-away list, and in
310 * the second stage we actually dispose of them. This
311 * is because we don't want to sleep while messing
312 * with the global lists..
313 */
314
315/**
316 *	invalidate_inodes	- discard the inodes on a device
317 *	@sb: superblock
318 *
319 *	Discard all of the inodes for a given superblock. If the discard
320 *	fails because there are busy inodes then a non zero value is returned.
321 *	If the discard is successful all the inodes have been discarded.
322 */
323
324int invalidate_inodes(struct super_block * sb)
325{
326	int busy;
327	LIST_HEAD(throw_away);
328
329	down(&iprune_sem);
330	spin_lock(&inode_lock);
331	busy = invalidate_list(&inode_in_use, sb, &throw_away);
332	busy |= invalidate_list(&inode_unused, sb, &throw_away);
333	busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
334	busy |= invalidate_list(&sb->s_io, sb, &throw_away);
335	spin_unlock(&inode_lock);
336
337	dispose_list(&throw_away);
338	up(&iprune_sem);
339
340	return busy;
341}
342
343int invalidate_device(kdev_t dev, int do_sync)
344{
345	struct super_block *sb;
346	struct block_device *bdev = bdget(kdev_t_to_nr(dev));
347	int res;
348
349	if (!bdev)
350		return 0;
351
352	if (do_sync)
353		fsync_bdev(bdev);
354
355	res = 0;
356	sb = get_super(bdev);
357	if (sb) {
358		/*
359		 * no need to lock the super, get_super holds the
360		 * read semaphore so the filesystem cannot go away
361		 * under us (->put_super runs with the write lock
362		 * hold).
363		 */
364		shrink_dcache_sb(sb);
365		res = invalidate_inodes(sb);
366		drop_super(sb);
367	}
368	invalidate_bdev(bdev, 0);
369	bdput(bdev);
370	return res;
371}
372
373static int can_unuse(struct inode *inode)
374{
375	if (inode->i_state)
376		return 0;
377	if (inode_has_buffers(inode))
378		return 0;
379	if (atomic_read(&inode->i_count))
380		return 0;
381	if (inode->i_data.nrpages)
382		return 0;
383	return 1;
384}
385
386/*
387 * Scan `goal' inodes on the unused list for freeable ones. They are moved to
388 * a temporary list and then are freed outside inode_lock by dispose_list().
389 *
390 * Any inodes which are pinned purely because of attached pagecache have their
391 * pagecache removed.  We expect the final iput() on that inode to add it to
392 * the front of the inode_unused list.  So look for it there and if the
393 * inode is still freeable, proceed.  The right inode is found 99.9% of the
394 * time in testing on a 4-way.
395 *
396 * If the inode has metadata buffers attached to mapping->private_list then
397 * try to remove them.
398 */
399static void prune_icache(int nr_to_scan)
400{
401	LIST_HEAD(freeable);
402	int nr_pruned = 0;
403	int nr_scanned;
404	unsigned long reap = 0;
405
406	down(&iprune_sem);
407	spin_lock(&inode_lock);
408	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
409		struct inode *inode;
410
411		if (list_empty(&inode_unused))
412			break;
413
414		inode = list_entry(inode_unused.prev, struct inode, i_list);
415
416		if (inode->i_state || atomic_read(&inode->i_count)) {
417			list_move(&inode->i_list, &inode_unused);
418			continue;
419		}
420		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
421			__iget(inode);
422			spin_unlock(&inode_lock);
423			if (remove_inode_buffers(inode))
424				reap += invalidate_inode_pages(&inode->i_data);
425			iput(inode);
426			spin_lock(&inode_lock);
427
428			if (inode != list_entry(inode_unused.next,
429						struct inode, i_list))
430				continue;	/* wrong inode or list_empty */
431			if (!can_unuse(inode))
432				continue;
433		}
434		hlist_del_init(&inode->i_hash);
435		list_move(&inode->i_list, &freeable);
436		inode->i_state |= I_FREEING;
437		nr_pruned++;
438	}
439	inodes_stat.nr_unused -= nr_pruned;
440	spin_unlock(&inode_lock);
441
442	dispose_list(&freeable);
443	up(&iprune_sem);
444
445	if (current_is_kswapd)
446		mod_page_state(kswapd_inodesteal, reap);
447	else
448		mod_page_state(pginodesteal, reap);
449}
450
451/*
452 * shrink_icache_memory() will attempt to reclaim some unused inodes.  Here,
453 * "unused" means that no dentries are referring to the inodes: the files are
454 * not open and the dcache references to those inodes have already been
455 * reclaimed.
456 *
457 * This function is passed the number of inodes to scan, and it returns the
458 * total number of remaining possibly-reclaimable inodes.
459 */
460static int shrink_icache_memory(int nr, unsigned int gfp_mask)
461{
462	if (nr) {
463		/*
464		 * Nasty deadlock avoidance.  We may hold various FS locks,
465		 * and we don't want to recurse into the FS that called us
466		 * in clear_inode() and friends..
467	 	 */
468		if (gfp_mask & __GFP_FS)
469			prune_icache(nr);
470	}
471	return inodes_stat.nr_unused;
472}
473
474void __wait_on_freeing_inode(struct inode *inode);
475/*
476 * Called with the inode lock held.
477 * NOTE: we are not increasing the inode-refcount, you must call __iget()
478 * by hand after calling find_inode now! This simplifies iunique and won't
479 * add any additional branch in the common code.
480 */
481static struct inode * find_inode(struct super_block * sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data)
482{
483	struct hlist_node *node;
484	struct inode * inode = NULL;
485
486	hlist_for_each (node, head) {
487		prefetch(node->next);
488		inode = hlist_entry(node, struct inode, i_hash);
489		if (inode->i_sb != sb)
490			continue;
491		if (!test(inode, data))
492			continue;
493		if (inode->i_state & (I_FREEING|I_CLEAR)) {
494			__wait_on_freeing_inode(inode);
495			tmp = head;
496			continue;
497		}
498		break;
499	}
500	return node ? inode : NULL;
501}
502
503/*
504 * find_inode_fast is the fast path version of find_inode, see the comment at
505 * iget_locked for details.
506 */
507static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head *head, unsigned long ino)
508{
509	struct hlist_node *node;
510	struct inode * inode = NULL;
511
512	hlist_for_each (node, head) {
513		prefetch(node->next);
514		inode = list_entry(node, struct inode, i_hash);
515		if (inode->i_ino != ino)
516			continue;
517		if (inode->i_sb != sb)
518			continue;
519		if (inode->i_state & (I_FREEING|I_CLEAR)) {
520			__wait_on_freeing_inode(inode);
521			tmp = head;
522			continue;
523		}
524		break;
525	}
526	return node ? inode : NULL;
527}
528
529/**
530 *	new_inode 	- obtain an inode
531 *	@sb: superblock
532 *
533 *	Allocates a new inode for given superblock.
534 */
535
536struct inode *new_inode(struct super_block *sb)
537{
538	static unsigned long last_ino;
539	struct inode * inode;
540
541	spin_lock_prefetch(&inode_lock);
542
543	inode = alloc_inode(sb);
544	if (inode) {
545		spin_lock(&inode_lock);
546		inodes_stat.nr_inodes++;
547		list_add(&inode->i_list, &inode_in_use);
548		inode->i_ino = ++last_ino;
549		inode->i_state = 0;
550		spin_unlock(&inode_lock);
551	}
552	return inode;
553}
554
555void unlock_new_inode(struct inode *inode)
556{
557	/*
558	 * This is special!  We do not need the spinlock
559	 * when clearing I_LOCK, because we're guaranteed
560	 * that nobody else tries to do anything about the
561	 * state of the inode when it is locked, as we
562	 * just created it (so there can be no old holders
563	 * that haven't tested I_LOCK).
564	 */
565	inode->i_state &= ~(I_LOCK|I_NEW);
566	wake_up_inode(inode);
567}
568EXPORT_SYMBOL(unlock_new_inode);
569
570/*
571 * This is called without the inode lock held.. Be careful.
572 *
573 * We no longer cache the sb_flags in i_flags - see fs.h
574 *	-- rmk@arm.uk.linux.org
575 */
576static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data)
577{
578	struct inode * inode;
579
580	inode = alloc_inode(sb);
581	if (inode) {
582		struct inode * old;
583
584		spin_lock(&inode_lock);
585		/* We released the lock, so.. */
586		old = find_inode(sb, head, test, data);
587		if (!old) {
588			if (set(inode, data))
589				goto set_failed;
590
591			inodes_stat.nr_inodes++;
592			list_add(&inode->i_list, &inode_in_use);
593			hlist_add_head(&inode->i_hash, head);
594			inode->i_state = I_LOCK|I_NEW;
595			spin_unlock(&inode_lock);
596
597			/* Return the locked inode with I_NEW set, the
598			 * caller is responsible for filling in the contents
599			 */
600			return inode;
601		}
602
603		/*
604		 * Uhhuh, somebody else created the same inode under
605		 * us. Use the old inode instead of the one we just
606		 * allocated.
607		 */
608		__iget(old);
609		spin_unlock(&inode_lock);
610		destroy_inode(inode);
611		inode = old;
612		wait_on_inode(inode);
613	}
614	return inode;
615
616set_failed:
617	spin_unlock(&inode_lock);
618	destroy_inode(inode);
619	return NULL;
620}
621
622/*
623 * get_new_inode_fast is the fast path version of get_new_inode, see the
624 * comment at iget_locked for details.
625 */
626static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino)
627{
628	struct inode * inode;
629
630	inode = alloc_inode(sb);
631	if (inode) {
632		struct inode * old;
633
634		spin_lock(&inode_lock);
635		/* We released the lock, so.. */
636		old = find_inode_fast(sb, head, ino);
637		if (!old) {
638			inode->i_ino = ino;
639			inodes_stat.nr_inodes++;
640			list_add(&inode->i_list, &inode_in_use);
641			hlist_add_head(&inode->i_hash, head);
642			inode->i_state = I_LOCK|I_NEW;
643			spin_unlock(&inode_lock);
644
645			/* Return the locked inode with I_NEW set, the
646			 * caller is responsible for filling in the contents
647			 */
648			return inode;
649		}
650
651		/*
652		 * Uhhuh, somebody else created the same inode under
653		 * us. Use the old inode instead of the one we just
654		 * allocated.
655		 */
656		__iget(old);
657		spin_unlock(&inode_lock);
658		destroy_inode(inode);
659		inode = old;
660		wait_on_inode(inode);
661	}
662	return inode;
663}
664
665static inline unsigned long hash(struct super_block *sb, unsigned long hashval)
666{
667	unsigned long tmp = hashval + ((unsigned long) sb / L1_CACHE_BYTES);
668	tmp = tmp + (tmp >> I_HASHBITS);
669	return tmp & I_HASHMASK;
670}
671
672/* Yeah, I know about quadratic hash. Maybe, later. */
673
674/**
675 *	iunique - get a unique inode number
676 *	@sb: superblock
677 *	@max_reserved: highest reserved inode number
678 *
679 *	Obtain an inode number that is unique on the system for a given
680 *	superblock. This is used by file systems that have no natural
681 *	permanent inode numbering system. An inode number is returned that
682 *	is higher than the reserved limit but unique.
683 *
684 *	BUGS:
685 *	With a large number of inodes live on the file system this function
686 *	currently becomes quite slow.
687 */
688
689ino_t iunique(struct super_block *sb, ino_t max_reserved)
690{
691	static ino_t counter = 0;
692	struct inode *inode;
693	struct hlist_head * head;
694	ino_t res;
695	spin_lock(&inode_lock);
696retry:
697	if (counter > max_reserved) {
698		head = inode_hashtable + hash(sb,counter);
699		res = counter++;
700		inode = find_inode_fast(sb, head, res);
701		if (!inode) {
702			spin_unlock(&inode_lock);
703			return res;
704		}
705	} else {
706		counter = max_reserved + 1;
707	}
708	goto retry;
709
710}
711
712struct inode *igrab(struct inode *inode)
713{
714	spin_lock(&inode_lock);
715	if (!(inode->i_state & I_FREEING))
716		__iget(inode);
717	else
718		/*
719		 * Handle the case where s_op->clear_inode is not been
720		 * called yet, and somebody is calling igrab
721		 * while the inode is getting freed.
722		 */
723		inode = NULL;
724	spin_unlock(&inode_lock);
725	return inode;
726}
727
728/**
729 * ifind - internal function, you want ilookup5() or iget5().
730 * @sb:		super block of file system to search
731 * @hashval:	hash value (usually inode number) to search for
732 * @test:	callback used for comparisons between inodes
733 * @data:	opaque data pointer to pass to @test
734 *
735 * ifind() searches for the inode specified by @hashval and @data in the inode
736 * cache. This is a generalized version of ifind_fast() for file systems where
737 * the inode number is not sufficient for unique identification of an inode.
738 *
739 * If the inode is in the cache, the inode is returned with an incremented
740 * reference count.
741 *
742 * Otherwise NULL is returned.
743 *
744 * Note, @test is called with the inode_lock held, so can't sleep.
745 */
746static inline struct inode *ifind(struct super_block *sb,
747		struct hlist_head *head, int (*test)(struct inode *, void *),
748		void *data)
749{
750	struct inode *inode;
751
752	spin_lock(&inode_lock);
753	inode = find_inode(sb, head, test, data);
754	if (inode) {
755		__iget(inode);
756		spin_unlock(&inode_lock);
757		wait_on_inode(inode);
758		return inode;
759	}
760	spin_unlock(&inode_lock);
761	return NULL;
762}
763
764/**
765 * ifind_fast - internal function, you want ilookup() or iget().
766 * @sb:		super block of file system to search
767 * @ino:	inode number to search for
768 *
769 * ifind_fast() searches for the inode @ino in the inode cache. This is for
770 * file systems where the inode number is sufficient for unique identification
771 * of an inode.
772 *
773 * If the inode is in the cache, the inode is returned with an incremented
774 * reference count.
775 *
776 * Otherwise NULL is returned.
777 */
778static inline struct inode *ifind_fast(struct super_block *sb,
779		struct hlist_head *head, unsigned long ino)
780{
781	struct inode *inode;
782
783	spin_lock(&inode_lock);
784	inode = find_inode_fast(sb, head, ino);
785	if (inode) {
786		__iget(inode);
787		spin_unlock(&inode_lock);
788		wait_on_inode(inode);
789		return inode;
790	}
791	spin_unlock(&inode_lock);
792	return NULL;
793}
794
795/**
796 * ilookup5 - search for an inode in the inode cache
797 * @sb:		super block of file system to search
798 * @hashval:	hash value (usually inode number) to search for
799 * @test:	callback used for comparisons between inodes
800 * @data:	opaque data pointer to pass to @test
801 *
802 * ilookup5() uses ifind() to search for the inode specified by @hashval and
803 * @data in the inode cache. This is a generalized version of ilookup() for
804 * file systems where the inode number is not sufficient for unique
805 * identification of an inode.
806 *
807 * If the inode is in the cache, the inode is returned with an incremented
808 * reference count.
809 *
810 * Otherwise NULL is returned.
811 *
812 * Note, @test is called with the inode_lock held, so can't sleep.
813 */
814struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
815		int (*test)(struct inode *, void *), void *data)
816{
817	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
818
819	return ifind(sb, head, test, data);
820}
821EXPORT_SYMBOL(ilookup5);
822
823/**
824 * ilookup - search for an inode in the inode cache
825 * @sb:		super block of file system to search
826 * @ino:	inode number to search for
827 *
828 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache.
829 * This is for file systems where the inode number is sufficient for unique
830 * identification of an inode.
831 *
832 * If the inode is in the cache, the inode is returned with an incremented
833 * reference count.
834 *
835 * Otherwise NULL is returned.
836 */
837struct inode *ilookup(struct super_block *sb, unsigned long ino)
838{
839	struct hlist_head *head = inode_hashtable + hash(sb, ino);
840
841	return ifind_fast(sb, head, ino);
842}
843EXPORT_SYMBOL(ilookup);
844
845/**
846 * iget5_locked - obtain an inode from a mounted file system
847 * @sb:		super block of file system
848 * @hashval:	hash value (usually inode number) to get
849 * @test:	callback used for comparisons between inodes
850 * @set:	callback used to initialize a new struct inode
851 * @data:	opaque data pointer to pass to @test and @set
852 *
853 * This is iget() without the read_inode() portion of get_new_inode().
854 *
855 * iget5_locked() uses ifind() to search for the inode specified by @hashval
856 * and @data in the inode cache and if present it is returned with an increased
857 * reference count. This is a generalized version of iget_locked() for file
858 * systems where the inode number is not sufficient for unique identification
859 * of an inode.
860 *
861 * If the inode is not in cache, get_new_inode() is called to allocate a new
862 * inode and this is returned locked, hashed, and with the I_NEW flag set. The
863 * file system gets to fill it in before unlocking it via unlock_new_inode().
864 *
865 * Note both @test and @set are called with the inode_lock held, so can't sleep.
866 */
867struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
868		int (*test)(struct inode *, void *),
869		int (*set)(struct inode *, void *), void *data)
870{
871	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
872	struct inode *inode;
873
874	inode = ifind(sb, head, test, data);
875	if (inode)
876		return inode;
877	/*
878	 * get_new_inode() will do the right thing, re-trying the search
879	 * in case it had to block at any point.
880	 */
881	return get_new_inode(sb, head, test, set, data);
882}
883EXPORT_SYMBOL(iget5_locked);
884
885/**
886 * iget_locked - obtain an inode from a mounted file system
887 * @sb:		super block of file system
888 * @ino:	inode number to get
889 *
890 * This is iget() without the read_inode() portion of get_new_inode_fast().
891 *
892 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in
893 * the inode cache and if present it is returned with an increased reference
894 * count. This is for file systems where the inode number is sufficient for
895 * unique identification of an inode.
896 *
897 * If the inode is not in cache, get_new_inode_fast() is called to allocate a
898 * new inode and this is returned locked, hashed, and with the I_NEW flag set.
899 * The file system gets to fill it in before unlocking it via
900 * unlock_new_inode().
901 */
902struct inode *iget_locked(struct super_block *sb, unsigned long ino)
903{
904	struct hlist_head *head = inode_hashtable + hash(sb, ino);
905	struct inode *inode;
906
907	inode = ifind_fast(sb, head, ino);
908	if (inode)
909		return inode;
910	/*
911	 * get_new_inode_fast() will do the right thing, re-trying the search
912	 * in case it had to block at any point.
913	 */
914	return get_new_inode_fast(sb, head, ino);
915}
916EXPORT_SYMBOL(iget_locked);
917
918/**
919 *	__insert_inode_hash - hash an inode
920 *	@inode: unhashed inode
921 *	@hashval: unsigned long value used to locate this object in the
922 *		inode_hashtable.
923 *
924 *	Add an inode to the inode hash for this superblock. If the inode
925 *	has no superblock it is added to a separate anonymous chain.
926 */
927
928void __insert_inode_hash(struct inode *inode, unsigned long hashval)
929{
930	struct hlist_head *head = &anon_hash_chain;
931	if (inode->i_sb)
932		head = inode_hashtable + hash(inode->i_sb, hashval);
933	spin_lock(&inode_lock);
934	hlist_add_head(&inode->i_hash, head);
935	spin_unlock(&inode_lock);
936}
937
938/**
939 *	remove_inode_hash - remove an inode from the hash
940 *	@inode: inode to unhash
941 *
942 *	Remove an inode from the superblock or anonymous hash.
943 */
944
945void remove_inode_hash(struct inode *inode)
946{
947	spin_lock(&inode_lock);
948	hlist_del_init(&inode->i_hash);
949	spin_unlock(&inode_lock);
950}
951
952void generic_delete_inode(struct inode *inode)
953{
954	struct super_operations *op = inode->i_sb->s_op;
955
956<<<<<<< found
957	hlist_del_init(&inode->i_hash);
958||||||| expected
959	list_del_init(&inode->i_hash);
960=======
961>>>>>>> replacement
962	list_del_init(&inode->i_list);
963	inode->i_state|=I_FREEING;
964	inodes_stat.nr_inodes--;
965	spin_unlock(&inode_lock);
966
967	if (inode->i_data.nrpages)
968		truncate_inode_pages(&inode->i_data, 0);
969
970	security_inode_delete(inode);
971
972	if (op->delete_inode) {
973		void (*delete)(struct inode *) = op->delete_inode;
974		if (!is_bad_inode(inode))
975			DQUOT_INIT(inode);
976		/* s_op->delete_inode internally recalls clear_inode() */
977		delete(inode);
978	} else
979		clear_inode(inode);
980	spin_lock(&inode_lock);
981	list_del_init(&inode->i_hash);
982	spin_unlock(&inode_lock);
983	wake_up_inode(inode);
984	if (inode->i_state != I_CLEAR)
985		BUG();
986	destroy_inode(inode);
987}
988EXPORT_SYMBOL(generic_delete_inode);
989
990static void generic_forget_inode(struct inode *inode)
991{
992	struct super_block *sb = inode->i_sb;
993
994	if (!hlist_unhashed(&inode->i_hash)) {
995		if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
996			list_del(&inode->i_list);
997			list_add(&inode->i_list, &inode_unused);
998		}
999		inodes_stat.nr_unused++;
1000		spin_unlock(&inode_lock);
1001		if (!sb || (sb->s_flags & MS_ACTIVE))
1002			return;
1003		write_inode_now(inode, 1);
1004		spin_lock(&inode_lock);
1005		inodes_stat.nr_unused--;
1006		hlist_del_init(&inode->i_hash);
1007	}
1008	list_del_init(&inode->i_list);
1009	inode->i_state|=I_FREEING;
1010	inodes_stat.nr_inodes--;
1011	spin_unlock(&inode_lock);
1012	if (inode->i_data.nrpages)
1013		truncate_inode_pages(&inode->i_data, 0);
1014	clear_inode(inode);
1015	destroy_inode(inode);
1016}
1017
1018/*
1019 * Normal UNIX filesystem behaviour: delete the
1020 * inode when the usage count drops to zero, and
1021 * i_nlink is zero.
1022 */
1023static void generic_drop_inode(struct inode *inode)
1024{
1025	if (!inode->i_nlink)
1026		generic_delete_inode(inode);
1027	else
1028		generic_forget_inode(inode);
1029}
1030
1031/*
1032 * Called when we're dropping the last reference
1033 * to an inode.
1034 *
1035 * Call the FS "drop()" function, defaulting to
1036 * the legacy UNIX filesystem behaviour..
1037 *
1038 * NOTE! NOTE! NOTE! We're called with the inode lock
1039 * held, and the drop function is supposed to release
1040 * the lock!
1041 */
1042static inline void iput_final(struct inode *inode)
1043{
1044	struct super_operations *op = inode->i_sb->s_op;
1045	void (*drop)(struct inode *) = generic_drop_inode;
1046
1047	if (op && op->drop_inode)
1048		drop = op->drop_inode;
1049	drop(inode);
1050}
1051
1052/**
1053 *	iput	- put an inode
1054 *	@inode: inode to put
1055 *
1056 *	Puts an inode, dropping its usage count. If the inode use count hits
1057 *	zero the inode is also then freed and may be destroyed.
1058 */
1059
1060void iput(struct inode *inode)
1061{
1062	if (inode) {
1063		struct super_operations *op = inode->i_sb->s_op;
1064
1065		if (inode->i_state == I_CLEAR)
1066			BUG();
1067
1068		if (op && op->put_inode)
1069			op->put_inode(inode);
1070
1071		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
1072			iput_final(inode);
1073	}
1074}
1075
1076/**
1077 *	bmap	- find a block number in a file
1078 *	@inode: inode of file
1079 *	@block: block to find
1080 *
1081 *	Returns the block number on the device holding the inode that
1082 *	is the disk block number for the block of the file requested.
1083 *	That is, asked for block 4 of inode 1 the function will return the
1084 *	disk block relative to the disk start that holds that block of the
1085 *	file.
1086 */
1087
1088sector_t bmap(struct inode * inode, sector_t block)
1089{
1090	sector_t res = 0;
1091	if (inode->i_mapping->a_ops->bmap)
1092		res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
1093	return res;
1094}
1095
1096/*
1097 * Return true if the filesystem which backs this inode considers the two
1098 * passed timespecs to be sufficiently different to warrant flushing the
1099 * altered time out to disk.
1100 */
1101static int inode_times_differ(struct inode *inode,
1102			struct timespec *old, struct timespec *new)
1103{
1104	if (IS_ONE_SECOND(inode))
1105		return old->tv_sec != new->tv_sec;
1106	return !timespec_equal(old, new);
1107}
1108
1109/**
1110 *	update_atime	-	update the access time
1111 *	@inode: inode accessed
1112 *
1113 *	Update the accessed time on an inode and mark it for writeback.
1114 *	This function automatically handles read only file systems and media,
1115 *	as well as the "noatime" flag and inode specific "noatime" markers.
1116 */
1117
1118void update_atime(struct inode *inode)
1119{
1120	struct timespec now;
1121
1122	if (IS_NOATIME(inode))
1123		return;
1124	if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode))
1125		return;
1126	if (IS_RDONLY(inode))
1127		return;
1128
1129	now = current_kernel_time();
1130	if (inode_times_differ(inode, &inode->i_atime, &now)) {
1131		inode->i_atime = now;
1132		mark_inode_dirty_sync(inode);
1133	} else {
1134		if (!timespec_equal(&inode->i_atime, &now))
1135			inode->i_atime = now;
1136	}
1137}
1138
1139/**
1140 *	inode_update_time	-	update mtime and ctime time
1141 *	@inode: inode accessed
1142 *	@ctime_too: update ctime too
1143 *
1144 *	Update the mtime time on an inode and mark it for writeback.
1145 *	When ctime_too is specified update the ctime too.
1146 */
1147
1148void inode_update_time(struct inode *inode, int ctime_too)
1149{
1150	struct timespec now = current_kernel_time();
1151	int sync_it = 0;
1152
1153	if (inode_times_differ(inode, &inode->i_mtime, &now))
1154		sync_it = 1;
1155	inode->i_mtime = now;
1156
1157	if (ctime_too) {
1158		if (inode_times_differ(inode, &inode->i_ctime, &now))
1159			sync_it = 1;
1160		inode->i_ctime = now;
1161	}
1162	if (sync_it)
1163		mark_inode_dirty_sync(inode);
1164}
1165EXPORT_SYMBOL(inode_update_time);
1166
1167int inode_needs_sync(struct inode *inode)
1168{
1169	if (IS_SYNC(inode))
1170		return 1;
1171	if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
1172		return 1;
1173	return 0;
1174}
1175EXPORT_SYMBOL(inode_needs_sync);
1176
1177/*
1178 *	Quota functions that want to walk the inode lists..
1179 */
1180#ifdef CONFIG_QUOTA
1181
1182/* Functions back in dquot.c */
1183void put_dquot_list(struct list_head *);
1184int remove_inode_dquot_ref(struct inode *, int, struct list_head *);
1185
1186void remove_dquot_ref(struct super_block *sb, int type)
1187{
1188	struct inode *inode;
1189	struct list_head *act_head;
1190	LIST_HEAD(tofree_head);
1191
1192	if (!sb->dq_op)
1193		return;	/* nothing to do */
1194	spin_lock(&inode_lock);	/* This lock is for inodes code */
1195	/* We don't have to lock against quota code - test IS_QUOTAINIT is just for speedup... */
1196
1197	list_for_each(act_head, &inode_in_use) {
1198		inode = list_entry(act_head, struct inode, i_list);
1199		if (inode->i_sb == sb && IS_QUOTAINIT(inode))
1200			remove_inode_dquot_ref(inode, type, &tofree_head);
1201	}
1202	list_for_each(act_head, &inode_unused) {
1203		inode = list_entry(act_head, struct inode, i_list);
1204		if (inode->i_sb == sb && IS_QUOTAINIT(inode))
1205			remove_inode_dquot_ref(inode, type, &tofree_head);
1206	}
1207	list_for_each(act_head, &sb->s_dirty) {
1208		inode = list_entry(act_head, struct inode, i_list);
1209		if (IS_QUOTAINIT(inode))
1210			remove_inode_dquot_ref(inode, type, &tofree_head);
1211	}
1212	list_for_each(act_head, &sb->s_io) {
1213		inode = list_entry(act_head, struct inode, i_list);
1214		if (IS_QUOTAINIT(inode))
1215			remove_inode_dquot_ref(inode, type, &tofree_head);
1216	}
1217	spin_unlock(&inode_lock);
1218
1219	put_dquot_list(&tofree_head);
1220}
1221
1222#endif
1223
1224/*
1225 * Hashed waitqueues for wait_on_inode().  The table is pretty small - the
1226 * kernel doesn't lock many inodes at the same time.
1227 */
1228#define I_WAIT_TABLE_ORDER	3
1229static struct i_wait_queue_head {
1230	wait_queue_head_t wqh;
1231} ____cacheline_aligned_in_smp i_wait_queue_heads[1<<I_WAIT_TABLE_ORDER];
1232
1233/*
1234 * Return the address of the waitqueue_head to be used for this inode
1235 */
1236static wait_queue_head_t *i_waitq_head(struct inode *inode)
1237{
1238	return &i_wait_queue_heads[hash_ptr(inode, I_WAIT_TABLE_ORDER)].wqh;
1239}
1240
1241void __wait_on_inode(struct inode *inode)
1242{
1243	DECLARE_WAITQUEUE(wait, current);
1244	wait_queue_head_t *wq = i_waitq_head(inode);
1245
1246	add_wait_queue(wq, &wait);
1247repeat:
1248	set_current_state(TASK_UNINTERRUPTIBLE);
1249	if (inode->i_state & I_LOCK) {
1250		schedule();
1251		goto repeat;
1252	}
1253	remove_wait_queue(wq, &wait);
1254	__set_current_state(TASK_RUNNING);
1255}
1256
1257void __wait_on_freeing_inode(struct inode *inode)
1258{
1259	DECLARE_WAITQUEUE(wait, current);
1260	wait_queue_head_t *wq = i_waitq_head(inode);
1261
1262	add_wait_queue(wq, &wait);
1263	set_current_state(TASK_UNINTERRUPTIBLE);
1264	spin_unlock(&inode_lock);
1265	schedule();
1266	remove_wait_queue(wq, &wait);
1267	current->state = TASK_RUNNING;
1268	spin_lock(&inode_lock);
1269}
1270
1271
1272void wake_up_inode(struct inode *inode)
1273{
1274	wait_queue_head_t *wq = i_waitq_head(inode);
1275
1276	/*
1277	 * Prevent speculative execution through spin_unlock(&inode_lock);
1278	 */
1279	smp_mb();
1280	if (waitqueue_active(wq))
1281		wake_up_all(wq);
1282}
1283
1284/*
1285 * Initialize the waitqueues and inode hash table.
1286 */
1287void __init inode_init(unsigned long mempages)
1288{
1289	struct hlist_head *head;
1290	unsigned long order;
1291	unsigned int nr_hash;
1292	int i;
1293
1294	for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
1295		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
1296
1297	mempages >>= (14 - PAGE_SHIFT);
1298	mempages *= sizeof(struct list_head);
1299	for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
1300		;
1301
1302	do {
1303		unsigned long tmp;
1304
1305		nr_hash = (1UL << order) * PAGE_SIZE /
1306			sizeof(struct hlist_head);
1307		i_hash_mask = (nr_hash - 1);
1308
1309		tmp = nr_hash;
1310		i_hash_shift = 0;
1311		while ((tmp >>= 1UL) != 0UL)
1312			i_hash_shift++;
1313
1314		inode_hashtable = (struct hlist_head *)
1315			__get_free_pages(GFP_ATOMIC, order);
1316	} while (inode_hashtable == NULL && --order >= 0);
1317
1318	printk("Inode-cache hash table entries: %d (order: %ld, %ld bytes)\n",
1319			nr_hash, order, (PAGE_SIZE << order));
1320
1321	if (!inode_hashtable)
1322		panic("Failed to allocate inode hash table\n");
1323
1324	head = inode_hashtable;
1325	i = nr_hash;
1326	do {
1327		INIT_HLIST_HEAD(head);
1328		head++;
1329		i--;
1330	} while (i);
1331
1332	/* inode slab cache */
1333	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
1334					 0, SLAB_HWCACHE_ALIGN, init_once,
1335					 NULL);
1336	if (!inode_cachep)
1337		panic("cannot create inode slab cache");
1338
1339	set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
1340}
1341
1342void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1343{
1344	inode->i_mode = mode;
1345	if (S_ISCHR(mode)) {
1346		inode->i_fop = &def_chr_fops;
1347		inode->i_rdev = to_kdev_t(rdev);
1348	} else if (S_ISBLK(mode)) {
1349		inode->i_fop = &def_blk_fops;
1350		inode->i_rdev = to_kdev_t(rdev);
1351	} else if (S_ISFIFO(mode))
1352		inode->i_fop = &def_fifo_fops;
1353	else if (S_ISSOCK(mode))
1354		inode->i_fop = &bad_sock_fops;
1355	else
1356		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
1357		       mode);
1358}
1359