1 
2 #include <linux/module.h>
3 #include <linux/time.h>
4 #include <linux/fs.h>
5 #include <linux/jbd.h>
6 #include <linux/errno.h>
7 #include <linux/slab.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/freezer.h>
11 #include <linux/pagemap.h>
12 #include <linux/kthread.h>
13 #include <linux/poison.h>
14 #include <linux/proc_fs.h>
15 #include <linux/debugfs.h>
16 
17 
18 /*
19  * Called under j_state_lock.  Returns true if a transaction was started.
20  */
__log_start_commit(journal_t * journal,tid_t target)21 int __log_start_commit(journal_t *journal, tid_t target)
22 {
23     /*
24      * Are we already doing a recent enough commit?
25      */
26     if (!tid_geq(journal->j_commit_request, target)) {
27         /*
28          * We want a new commit: OK, mark the request and wakup the
29          * commit thread.  We do _not_ do the commit ourselves.
30          */
31 
32         journal->j_commit_request = target;
33         jbd_debug(1, "JBD: requesting commit %d/%d\n",
34                   journal->j_commit_request,
35                   journal->j_commit_sequence);
36         wake_up(&journal->j_wait_commit);
37         return 1;
38     }
39     return 0;
40 }
41 
log_start_commit(journal_t * journal,tid_t tid)42 int log_start_commit(journal_t *journal, tid_t tid)
43 {
44     int ret;
45 
46     jbd_lock(&journal->j_state_lock);
47     ret = __log_start_commit(journal, tid);
48     jbd_unlock(&journal->j_state_lock);
49     return ret;
50 }
51 
52 /*
53  * Journal abort has very specific semantics, which we describe
54  * for journal abort.
55  *
56  * Two internal function, which provide abort to te jbd layer
57  * itself are here.
58  */
59 
60 /*
61  * Quick version for internal journal use (doesn't lock the journal).
62  * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
63  * and don't attempt to make any other journal updates.
64  */
__journal_abort_hard(journal_t * journal)65 static void __journal_abort_hard(journal_t *journal)
66 {
67     transaction_t *transaction;
68 
69     if (journal->j_flags & JFS_ABORT)
70         return;
71 
72     jbd_lock(&journal->j_state_lock);
73     journal->j_flags |= JFS_ABORT;
74     transaction = journal->j_running_transaction;
75     if (transaction)
76         __log_start_commit(journal, transaction->t_tid);
77     jbd_unlock(&journal->j_state_lock);
78 }
79 
80 /* Soft abort: record the abort error status in the journal superblock,
81  * but don't do any other IO. */
__journal_abort_soft(journal_t * journal,int err)82 static void __journal_abort_soft (journal_t *journal, int err)
83 {
84     if (journal->j_flags & JFS_ABORT)
85         return;
86 
87     if (!journal->j_errno)
88         journal->j_errno = err;
89 
90     __journal_abort_hard(journal);
91 
92     if (err)
93         journal_update_superblock(journal, 1);
94 }
95 
96 
97 /**
98  * void journal_abort () - Shutdown the journal immediately.
99  * @journal: the journal to shutdown.
100  * @errno:   an error number to record in the journal indicating
101  *           the reason for the shutdown.
102  *
103  * Perform a complete, immediate shutdown of the ENTIRE
104  * journal (not of a single transaction).  This operation cannot be
105  * undone without closing and reopening the journal.
106  *
107  * The journal_abort function is intended to support higher level error
108  * recovery mechanisms such as the ext2/ext3 remount-readonly error
109  * mode.
110  *
111  * Journal abort has very specific semantics.  Any existing dirty,
112  * unjournaled buffers in the main filesystem will still be written to
113  * disk by bdflush, but the journaling mechanism will be suspended
114  * immediately and no further transaction commits will be honoured.
115  *
116  * Any dirty, journaled buffers will be written back to disk without
117  * hitting the journal.  Atomicity cannot be guaranteed on an aborted
118  * filesystem, but we _do_ attempt to leave as much data as possible
119  * behind for fsck to use for cleanup.
120  *
121  * Any attempt to get a new transaction handle on a journal which is in
122  * ABORT state will just result in an -EROFS error return.  A
123  * journal_stop on an existing handle will return -EIO if we have
124  * entered abort state during the update.
125  *
126  * Recursive transactions are not disturbed by journal abort until the
127  * final journal_stop, which will receive the -EIO error.
128  *
129  * Finally, the journal_abort call allows the caller to supply an errno
130  * which will be recorded (if possible) in the journal superblock.  This
131  * allows a client to record failure conditions in the middle of a
132  * transaction without having to complete the transaction to record the
133  * failure to disk.  ext3_error, for example, now uses this
134  * functionality.
135  *
136  * Errors which originate from within the journaling layer will NOT
137  * supply an errno; a null errno implies that absolutely no further
138  * writes are done to the journal (unless there are any already in
139  * progress).
140  *
141  */
142 
journal_abort(journal_t * journal,int err)143 void journal_abort(journal_t *journal, int err)
144 {
145     __journal_abort_soft(journal, err);
146 }
147 
148 /**
149  * int journal_errno () - returns the journal's error state.
150  * @journal: journal to examine.
151  *
152  * This is the errno numbet set with journal_abort(), the last
153  * time the journal was mounted - if the journal was stopped
154  * without calling abort this will be 0.
155  *
156  * If the journal has been aborted on this mount time -EROFS will
157  * be returned.
158  */
journal_errno(journal_t * journal)159 int journal_errno(journal_t *journal)
160 {
161     int err;
162 
163     jbd_lock(&journal->j_state_lock);
164     if (journal->j_flags & JFS_ABORT)
165         err = -EROFS;
166     else
167         err = journal->j_errno;
168     jbd_unlock(&journal->j_state_lock);
169     return err;
170 }
171 
172 /**
173  * int journal_clear_err () - clears the journal's error state
174  * @journal: journal to act on.
175  *
176  * An error must be cleared or Acked to take a FS out of readonly
177  * mode.
178  */
journal_clear_err(journal_t * journal)179 int journal_clear_err(journal_t *journal)
180 {
181     int err = 0;
182 
183     jbd_lock(&journal->j_state_lock);
184     if (journal->j_flags & JFS_ABORT)
185         err = -EROFS;
186     else
187         journal->j_errno = 0;
188     jbd_unlock(&journal->j_state_lock);
189     return err;
190 }
191 
192 /**
193  * void journal_ack_err() - Ack journal err.
194  * @journal: journal to act on.
195  *
196  * An error must be cleared or Acked to take a FS out of readonly
197  * mode.
198  */
journal_ack_err(journal_t * journal)199 void journal_ack_err(journal_t *journal)
200 {
201     jbd_lock(&journal->j_state_lock);
202     if (journal->j_errno)
203         journal->j_flags |= JFS_ACK_ERR;
204     jbd_unlock(&journal->j_state_lock);
205 }
206 
journal_blocks_per_page(struct inode * inode)207 int journal_blocks_per_page(struct inode *inode)
208 {
209     return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
210 }
211 
212 
213 /*
214  * Journal_head storage management
215  */
216 static struct kmem_cache *journal_head_cache = NULL;
217 #ifdef CONFIG_JBD_DEBUG
218 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
219 #endif
220 
journal_init_journal_head_cache(void)221 static int journal_init_journal_head_cache(void)
222 {
223     int retval;
224 
225     J_ASSERT(journal_head_cache == 0);
226     journal_head_cache = kmem_cache_create("journal_head",
227                                            sizeof(struct journal_head),
228                                            0,		/* offset */
229                                            SLAB_TEMPORARY,	/* flags */
230                                            NULL);		/* ctor */
231     retval = 0;
232     if (journal_head_cache == 0) {
233         retval = -ENOMEM;
234         printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
235     }
236     return retval;
237 }
238 
journal_destroy_journal_head_cache(void)239 static void journal_destroy_journal_head_cache(void)
240 {
241     J_ASSERT(journal_head_cache != NULL);
242     kmem_cache_destroy(journal_head_cache);
243     journal_head_cache = NULL;
244 }
245 
246 /*
247  * journal_head splicing and dicing
248  */
journal_alloc_journal_head(void)249 static struct journal_head *journal_alloc_journal_head(void)
250 {
251     struct journal_head *ret;
252     static unsigned long last_warning;
253 
254 #ifdef CONFIG_JBD_DEBUG
255     atomic_inc(&nr_journal_heads);
256 #endif
257     ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
258     if (ret == NULL) {
259         jbd_debug(1, "out of memory for journal_head\n");
260         if (time_after(jiffies, last_warning + 5*HZ)) {
261             printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
262                    __FUNCTION__);
263             last_warning = jiffies;
264         }
265         while (ret == NULL) {
266             yield();
267             ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
268         }
269     }
270     return ret;
271 }
272 
journal_free_journal_head(struct journal_head * jh)273 static void journal_free_journal_head(struct journal_head *jh)
274 {
275 #ifdef CONFIG_JBD_DEBUG
276     atomic_dec(&nr_journal_heads);
277     memset(jh, JBD_POISON_FREE, sizeof(*jh));
278 #endif
279     kmem_cache_free(journal_head_cache, jh);
280 }
281 
282 /*
283  * A journal_head is attached to a buffer_head whenever JBD has an
284  * interest in the buffer.
285  *
286  * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
287  * is set.  This bit is tested in core kernel code where we need to take
288  * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
289  * there.
290  *
291  * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
292  *
293  * When a buffer has its BH_JBD bit set it is immune from being released by
294  * core kernel code, mainly via ->b_count.
295  *
296  * A journal_head may be detached from its buffer_head when the journal_head's
297  * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
298  * Various places in JBD call journal_remove_journal_head() to indicate that the
299  * journal_head can be dropped if needed.
300  *
301  * Various places in the kernel want to attach a journal_head to a buffer_head
302  * _before_ attaching the journal_head to a transaction.  To protect the
303  * journal_head in this situation, journal_add_journal_head elevates the
304  * journal_head's b_jcount refcount by one.  The caller must call
305  * journal_put_journal_head() to undo this.
306  *
307  * So the typical usage would be:
308  *
309  *	(Attach a journal_head if needed.  Increments b_jcount)
310  *	struct journal_head *jh = journal_add_journal_head(bh);
311  *	...
312  *	jh->b_transaction = xxx;
313  *	journal_put_journal_head(jh);
314  *
315  * Now, the journal_head's b_jcount is zero, but it is safe from being released
316  * because it has a non-zero b_transaction.
317  */
318 
319 /*
320  * Give a buffer_head a journal_head.
321  *
322  * Doesn't need the journal lock.
323  * May sleep.
324  */
journal_add_journal_head(struct buffer_head * bh)325 struct journal_head *journal_add_journal_head(struct buffer_head *bh)
326 {
327     struct journal_head *jh;
328     struct journal_head *new_jh = NULL;
329 
330 repeat:
331     if (!buffer_jbd(bh)) {
332         new_jh = journal_alloc_journal_head();
333         memset(new_jh, 0, sizeof(*new_jh));
334     }
335 
336     jbd_lock_bh_journal_head(bh);
337     if (buffer_jbd(bh)) {
338         jh = bh2jh(bh);
339     } else {
340         J_ASSERT_BH(bh,
341                     (atomic_read(&bh->b_count) > 0) ||
342                     (bh->b_page && bh->b_page->mapping));
343 
344         if (!new_jh) {
345             jbd_unlock_bh_journal_head(bh);
346             goto repeat;
347         }
348 
349         jh = new_jh;
350         new_jh = NULL;		/* We consumed it */
351         set_buffer_jbd(bh);
352         bh->b_private = jh;
353         jh->b_bh = bh;
354         get_bh(bh);
355         BUFFER_TRACE(bh, "added journal_head");
356     }
357     jh->b_jcount++;
358     jbd_unlock_bh_journal_head(bh);
359     if (new_jh)
360         journal_free_journal_head(new_jh);
361     return bh->b_private;
362 }
363 
364 /*
365  * Grab a ref against this buffer_head's journal_head.  If it ended up not
366  * having a journal_head, return NULL
367  */
journal_grab_journal_head(struct buffer_head * bh)368 struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
369 {
370     struct journal_head *jh = NULL;
371 
372     jbd_lock_bh_journal_head(bh);
373     if (buffer_jbd(bh)) {
374         jh = bh2jh(bh);
375         jh->b_jcount++;
376     }
377     jbd_unlock_bh_journal_head(bh);
378     return jh;
379 }
380 
__journal_remove_journal_head(struct buffer_head * bh)381 static void __journal_remove_journal_head(struct buffer_head *bh)
382 {
383     struct journal_head *jh = bh2jh(bh);
384 
385     J_ASSERT_JH(jh, jh->b_jcount >= 0);
386 
387     get_bh(bh);
388     if (jh->b_jcount == 0) {
389         if (jh->b_transaction == NULL &&
390                 jh->b_next_transaction == NULL &&
391                 jh->b_cp_transaction == NULL) {
392             J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
393             J_ASSERT_BH(bh, buffer_jbd(bh));
394             J_ASSERT_BH(bh, jh2bh(jh) == bh);
395             BUFFER_TRACE(bh, "remove journal_head");
396             if (jh->b_frozen_data) {
397                 printk(KERN_WARNING "%s: freeing "
398                        "b_frozen_data\n",
399                        __FUNCTION__);
400                 jbd_free(jh->b_frozen_data, bh->b_size);
401             }
402             if (jh->b_committed_data) {
403                 printk(KERN_WARNING "%s: freeing "
404                        "b_committed_data\n",
405                        __FUNCTION__);
406                 jbd_free(jh->b_committed_data, bh->b_size);
407             }
408             bh->b_private = NULL;
409             jh->b_bh = NULL;	/* debug, really */
410             clear_buffer_jbd(bh);
411             __brelse(bh);
412             journal_free_journal_head(jh);
413         } else {
414             BUFFER_TRACE(bh, "journal_head was locked");
415         }
416     }
417 }
418 
419 /*
420  * journal_remove_journal_head(): if the buffer isn't attached to a transaction
421  * and has a zero b_jcount then remove and release its journal_head.   If we did
422  * see that the buffer is not used by any transaction we also "logically"
423  * decrement ->b_count.
424  *
425  * We in fact take an additional increment on ->b_count as a convenience,
426  * because the caller usually wants to do additional things with the bh
427  * after calling here.
428  * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
429  * time.  Once the caller has run __brelse(), the buffer is eligible for
430  * reaping by try_to_free_buffers().
431  */
journal_remove_journal_head(struct buffer_head * bh)432 void journal_remove_journal_head(struct buffer_head *bh)
433 {
434     jbd_lock_bh_journal_head(bh);
435     __journal_remove_journal_head(bh);
436     jbd_unlock_bh_journal_head(bh);
437 }
438 
439 /*
440  * Drop a reference on the passed journal_head.  If it fell to zero then try to
441  * release the journal_head from the buffer_head.
442  */
journal_put_journal_head(struct journal_head * jh)443 void journal_put_journal_head(struct journal_head *jh)
444 {
445     struct buffer_head *bh = jh2bh(jh);
446 
447     jbd_lock_bh_journal_head(bh);
448     J_ASSERT_JH(jh, jh->b_jcount > 0);
449     --jh->b_jcount;
450     if (!jh->b_jcount && !jh->b_transaction) {
451         __journal_remove_journal_head(bh);
452         __brelse(bh);
453     }
454     jbd_unlock_bh_journal_head(bh);
455 }
456 
457 /*
458  * Log buffer allocation routines:
459  */
460 
journal_next_log_block(journal_t * journal,unsigned long * retp)461 int journal_next_log_block(journal_t *journal, unsigned long *retp)
462 {
463     unsigned long blocknr;
464 
465     jbd_lock(&journal->j_state_lock);
466     J_ASSERT(journal->j_free > 1);
467 
468     blocknr = journal->j_head;
469     journal->j_head++;
470     journal->j_free--;
471     if (journal->j_head == journal->j_last)
472         journal->j_head = journal->j_first;
473     jbd_unlock(&journal->j_state_lock);
474     return journal_bmap(journal, blocknr, retp);
475 }
476 
477 /*
478  * Conversion of logical to physical block numbers for the journal
479  *
480  * On external journals the journal blocks are identity-mapped, so
481  * this is a no-op.  If needed, we can use j_blk_offset - everything is
482  * ready.
483  */
journal_bmap(journal_t * journal,unsigned long blocknr,unsigned long * retp)484 int journal_bmap(journal_t *journal, unsigned long blocknr,
485                  unsigned long *retp)
486 {
487     int err = 0;
488     unsigned long ret;
489 
490     if (journal->j_inode) {
491         ret = (unsigned long)bmap(journal->j_inode, (sector_t)blocknr);
492         if (ret)
493             *retp = ret;
494         else {
495             printk(KERN_ALERT "%s: journal block not found "
496                    "at offset %lu ...\n",
497                    __FUNCTION__,
498                    blocknr);
499             err = -EIO;
500             __journal_abort_soft(journal, err);
501         }
502     } else {
503         *retp = blocknr; /* +journal->j_blk_offset */
504     }
505     return err;
506 }
507 
508 /*
509  * We play buffer_head aliasing tricks to write data/metadata blocks to
510  * the journal without copying their contents, but for journal
511  * descriptor blocks we do need to generate bona fide buffers.
512  *
513  * After the caller of journal_get_descriptor_buffer() has finished modifying
514  * the buffer's contents they really should run flush_dcache_page(bh->b_page).
515  * But we don't bother doing that, so there will be coherency problems with
516  * mmaps of blockdevs which hold live JBD-controlled filesystems.
517  */
journal_get_descriptor_buffer(journal_t * journal)518 struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
519 {
520     struct buffer_head *bh;
521     unsigned long blocknr;
522     int err;
523 
524     err = journal_next_log_block(journal, &blocknr);
525 
526     if (err)
527         return NULL;
528 
529     bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
530     lock_buffer(bh);
531     memset(bh->b_data, 0, journal->j_blocksize);
532     set_buffer_uptodate(bh);
533     unlock_buffer(bh);
534     BUFFER_TRACE(bh, "return this buffer");
535     return journal_add_journal_head(bh);
536 }
537 
538 /*
539  * Management for journal control blocks: functions to create and
540  * destroy journal_t structures, and to initialise and read existing
541  * journal blocks from disk.  */
542 
543 /* First: create and setup a journal_t object in memory.  We initialise
544  * very few fields yet: that has to wait until we have created the
545  * journal structures from from scratch, or loaded them from disk. */
546 
journal_init_common(void)547 static journal_t * journal_init_common (void)
548 {
549     journal_t *journal;
550     int err;
551 
552     journal = kzalloc(sizeof(*journal), GFP_KERNEL);
553     if (!journal)
554         goto fail;
555 
556     init_waitqueue_head(&journal->j_wait_transaction_locked);
557     init_waitqueue_head(&journal->j_wait_logspace);
558     init_waitqueue_head(&journal->j_wait_done_commit);
559     init_waitqueue_head(&journal->j_wait_checkpoint);
560     init_waitqueue_head(&journal->j_wait_commit);
561     init_waitqueue_head(&journal->j_wait_updates);
562     mutex_init(&journal->j_barrier);
563     mutex_init(&journal->j_checkpoint_mutex);
564     jbd_lock_init(&journal->j_revoke_lock);
565     jbd_lock_init(&journal->j_list_lock);
566     jbd_lock_init(&journal->j_state_lock);
567 
568     journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
569 
570     /* The journal is marked for error until we succeed with recovery! */
571     journal->j_flags = JFS_ABORT;
572 
573     /* Set up a default-sized revoke table for the new mount. */
574     err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
575     if (err) {
576         kfree(journal);
577         goto fail;
578     }
579     return journal;
580 fail:
581     return NULL;
582 }
583 
584 /**
585  *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
586  *  @inode: An inode to create the journal in
587  *
588  * journal_init_inode creates a journal which maps an on-disk inode as
589  * the journal.  The inode must exist already, must support bmap() and
590  * must have all data blocks preallocated.
591  */
journal_init_inode(struct inode * inode)592 journal_t * journal_init_inode (struct inode *inode)
593 {
594     struct buffer_head *bh;
595     journal_t *journal = journal_init_common();
596     int err;
597     int n;
598     unsigned long blocknr;
599 
600     if (!journal)
601         return NULL;
602 
603     journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
604     journal->j_inode = inode;
605     jbd_debug(1,
606               "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
607               journal, inode->i_sb->s_id, inode->i_ino,
608               (s64) inode->i_size,
609               inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
610 
611     journal->j_maxlen = (unsigned int)(inode->i_size >> inode->i_sb->s_blocksize_bits);
612     journal->j_blocksize = inode->i_sb->s_blocksize;
613 
614     /* journal descriptor can store up to n blocks -bzzz */
615     n = journal->j_blocksize / sizeof(journal_block_tag_t);
616     journal->j_wbufsize = n;
617     journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
618     if (!journal->j_wbuf) {
619         printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
620                __FUNCTION__);
621 
622         J_ASSERT(journal->j_revoke != NULL);
623         if (journal->j_revoke)
624             journal_destroy_revoke(journal);
625 
626         kfree(journal);
627         return NULL;
628     }
629 
630     err = journal_bmap(journal, 0, &blocknr);
631     /* If that failed, give up */
632     if (err) {
633         printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
634                __FUNCTION__);
635 
636         J_ASSERT(journal->j_revoke != NULL);
637         if (journal->j_revoke)
638             journal_destroy_revoke(journal);
639         J_ASSERT(journal->j_wbuf != NULL);
640         kfree(journal->j_wbuf);
641         kfree(journal);
642         return NULL;
643     }
644 
645     bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
646     J_ASSERT(bh != NULL);
647     journal->j_sb_buffer = bh;
648     journal->j_superblock = (journal_superblock_t *)bh->b_data;
649 
650     return journal;
651 }
652 
653 /**
654  *
655  *   wipe all journal data ...
656  *
657  */
658 
journal_wipe_recovery(journal_t * journal)659 void journal_wipe_recovery(journal_t *journal)
660 {
661     /* We can now mark the journal as empty. */
662 
663     journal->j_tail = 0;
664     if (journal->j_sb_buffer) {
665         journal_update_superblock(journal, 0);
666         brelse(journal->j_sb_buffer);
667         journal->j_sb_buffer = NULL;
668     }
669 }
670 
671 /**
672  * void journal_destroy() - Release a journal_t structure.
673  * @journal: Journal to act on.
674  *
675  * Release a journal_t structure once it is no longer in use by the
676  * journaled object.
677  */
journal_destroy(journal_t * journal)678 void journal_destroy(journal_t *journal)
679 {
680 #if 0
681     /* Wait for the commit thread to wake up and die. */
682     journal_kill_thread(journal);
683 
684     /* Force a final log commit */
685     if (journal->j_running_transaction)
686         journal_commit_transaction(journal);
687 
688     /* Force any old transactions to disk */
689 
690     /* Totally anal locking here... */
691     jbd_lock(&journal->j_list_lock);
692     while (journal->j_checkpoint_transactions != NULL) {
693         jbd_unlock(&journal->j_list_lock);
694         log_do_checkpoint(journal);
695         jbd_lock(&journal->j_list_lock);
696     }
697 
698     J_ASSERT(journal->j_running_transaction == NULL);
699     J_ASSERT(journal->j_committing_transaction == NULL);
700     J_ASSERT(journal->j_checkpoint_transactions == NULL);
701     jbd_unlock(&journal->j_list_lock);
702 
703     /* We can now mark the journal as empty. */
704     journal->j_tail = 0;
705     journal->j_tail_sequence = ++journal->j_transaction_sequence;
706     if (journal->j_sb_buffer) {
707         journal_update_superblock(journal, 1);
708         brelse(journal->j_sb_buffer);
709     }
710 #endif
711 
712     if (journal->j_sb_buffer) {
713         brelse(journal->j_sb_buffer);
714     }
715     if (journal->j_inode)
716         iput(journal->j_inode);
717     if (journal->j_revoke)
718         journal_destroy_revoke(journal);
719     kfree(journal->j_wbuf);
720     kfree(journal);
721 }
722 
723 
724 
725 /**
726  *int journal_check_used_features () - Check if features specified are used.
727  * @journal: Journal to check.
728  * @compat: bitmask of compatible features
729  * @ro: bitmask of features that force read-only mount
730  * @incompat: bitmask of incompatible features
731  *
732  * Check whether the journal uses all of a given set of
733  * features.  Return true (non-zero) if it does.
734  **/
735 
journal_check_used_features(journal_t * journal,unsigned long compat,unsigned long ro,unsigned long incompat)736 int journal_check_used_features (journal_t *journal, unsigned long compat,
737                                  unsigned long ro, unsigned long incompat)
738 {
739     journal_superblock_t *sb;
740 
741     if (!compat && !ro && !incompat)
742         return 1;
743     if (journal->j_format_version == 1)
744         return 0;
745 
746     sb = journal->j_superblock;
747 
748     if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
749             ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
750             ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
751         return 1;
752 
753     return 0;
754 }
755 
756 /**
757  * int journal_check_available_features() - Check feature set in journalling layer
758  * @journal: Journal to check.
759  * @compat: bitmask of compatible features
760  * @ro: bitmask of features that force read-only mount
761  * @incompat: bitmask of incompatible features
762  *
763  * Check whether the journaling code supports the use of
764  * all of a given set of features on this journal.  Return true
765  * (non-zero) if it can. */
766 
journal_check_available_features(journal_t * journal,unsigned long compat,unsigned long ro,unsigned long incompat)767 int journal_check_available_features (journal_t *journal, unsigned long compat,
768                                       unsigned long ro, unsigned long incompat)
769 {
770     journal_superblock_t *sb;
771 
772     if (!compat && !ro && !incompat)
773         return 1;
774 
775     sb = journal->j_superblock;
776 
777     /* We can support any known requested features iff the
778      * superblock is in version 2.  Otherwise we fail to support any
779      * extended sb features. */
780 
781     if (journal->j_format_version != 2)
782         return 0;
783 
784     if ((compat   & JFS_KNOWN_COMPAT_FEATURES) == compat &&
785             (ro       & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
786             (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
787         return 1;
788 
789     return 0;
790 }
791 
792 /**
793  * int journal_set_features () - Mark a given journal feature in the superblock
794  * @journal: Journal to act on.
795  * @compat: bitmask of compatible features
796  * @ro: bitmask of features that force read-only mount
797  * @incompat: bitmask of incompatible features
798  *
799  * Mark a given journal feature as present on the
800  * superblock.  Returns true if the requested features could be set.
801  *
802  */
803 
journal_set_features(journal_t * journal,unsigned long compat,unsigned long ro,unsigned long incompat)804 int journal_set_features (journal_t *journal, unsigned long compat,
805                           unsigned long ro, unsigned long incompat)
806 {
807     journal_superblock_t *sb;
808 
809     if (journal_check_used_features(journal, compat, ro, incompat))
810         return 1;
811 
812     if (!journal_check_available_features(journal, compat, ro, incompat))
813         return 0;
814 
815     jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
816               compat, ro, incompat);
817 
818     sb = journal->j_superblock;
819 
820     sb->s_feature_compat    |= cpu_to_be32(compat);
821     sb->s_feature_ro_compat |= cpu_to_be32(ro);
822     sb->s_feature_incompat  |= cpu_to_be32(incompat);
823 
824     return 1;
825 }
826 
journal_convert_superblock_v1(journal_t * journal,journal_superblock_t * sb)827 static int journal_convert_superblock_v1(journal_t *journal,
828         journal_superblock_t *sb)
829 {
830     int offset, blocksize;
831     struct buffer_head *bh;
832 
833     printk(KERN_WARNING
834            "JBD: Converting superblock from version 1 to 2.\n");
835 
836     /* Pre-initialise new fields to zero */
837     offset = (INT)(((INT_PTR) &(sb->s_feature_compat)) - ((INT_PTR) sb));
838     blocksize = be32_to_cpu(sb->s_blocksize);
839     memset(&sb->s_feature_compat, 0, blocksize-offset);
840 
841     sb->s_nr_users = cpu_to_be32(1);
842     sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
843     journal->j_format_version = 2;
844 
845     bh = journal->j_sb_buffer;
846     BUFFER_TRACE(bh, "marking dirty");
847     mark_buffer_dirty(bh);
848     sync_dirty_buffer(bh);
849     return 0;
850 }
851 
852 
853 /*
854  * If the journal init or create aborts, we need to mark the journal
855  * superblock as being NULL to prevent the journal destroy from writing
856  * back a bogus superblock.
857  */
journal_fail_superblock(journal_t * journal)858 static void journal_fail_superblock (journal_t *journal)
859 {
860     struct buffer_head *bh = journal->j_sb_buffer;
861     brelse(bh);
862     journal->j_sb_buffer = NULL;
863 }
864 
865 
866 /*
867  * Read the superblock for a given journal, performing initial
868  * validation of the format.
869  */
870 
journal_get_superblock(journal_t * journal)871 static int journal_get_superblock(journal_t *journal)
872 {
873     struct buffer_head *bh;
874     journal_superblock_t *sb;
875     int err = -EIO;
876 
877     bh = journal->j_sb_buffer;
878 
879     J_ASSERT(bh != NULL);
880     if (!buffer_uptodate(bh)) {
881         ll_rw_block(READ, 1, &bh);
882         wait_on_buffer(bh);
883         if (!buffer_uptodate(bh)) {
884             printk (KERN_ERR
885                     "JBD: IO error reading journal superblock\n");
886             goto out;
887         }
888     }
889 
890     sb = journal->j_superblock;
891 
892     err = -EINVAL;
893 
894     if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
895             sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
896         printk(KERN_WARNING "JBD: no valid journal superblock found\n");
897         goto out;
898     }
899 
900     switch (be32_to_cpu(sb->s_header.h_blocktype)) {
901     case JFS_SUPERBLOCK_V1:
902         journal->j_format_version = 1;
903         break;
904     case JFS_SUPERBLOCK_V2:
905         journal->j_format_version = 2;
906         break;
907     default:
908         printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
909         goto out;
910     }
911 
912     if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
913         journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
914     else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
915         printk (KERN_WARNING "JBD: journal file too short\n");
916         goto out;
917     }
918 
919     return 0;
920 
921 out:
922     journal_fail_superblock(journal);
923     return err;
924 }
925 
926 /*
927  * Load the on-disk journal superblock and read the key fields into the
928  * journal_t.
929  */
930 
load_superblock(journal_t * journal)931 static int load_superblock(journal_t *journal)
932 {
933     int err;
934     journal_superblock_t *sb;
935 
936     err = journal_get_superblock(journal);
937     if (err)
938         return err;
939 
940     sb = journal->j_superblock;
941 
942     journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
943     journal->j_tail = be32_to_cpu(sb->s_start);
944     journal->j_first = be32_to_cpu(sb->s_first);
945     journal->j_last = be32_to_cpu(sb->s_maxlen);
946     journal->j_errno = be32_to_cpu(sb->s_errno);
947 
948     return 0;
949 }
950 
951 /**
952  * int journal_wipe() - Wipe journal contents
953  * @journal: Journal to act on.
954  * @write: flag (see below)
955  *
956  * Wipe out all of the contents of a journal, safely.  This will produce
957  * a warning if the journal contains any valid recovery information.
958  * Must be called between journal_init_*() and journal_load().
959  *
960  * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
961  * we merely suppress recovery.
962  */
963 
journal_wipe(journal_t * journal,int write)964 int journal_wipe(journal_t *journal, int write)
965 {
966     journal_superblock_t *sb;
967     int err = 0;
968 
969     J_ASSERT (!(journal->j_flags & JFS_LOADED));
970 
971     err = load_superblock(journal);
972     if (err)
973         return err;
974 
975     sb = journal->j_superblock;
976 
977     if (!journal->j_tail)
978         goto no_recovery;
979 
980     printk (KERN_WARNING "JBD: %s recovery information on journal\n",
981             write ? "Clearing" : "Ignoring");
982 
983     err = journal_skip_recovery(journal);
984     if (write)
985         journal_update_superblock(journal, 1);
986 
987 no_recovery:
988     return err;
989 }
990 
991 
992 /**
993  * int journal_update_format () - Update on-disk journal structure.
994  * @journal: Journal to act on.
995  *
996  * Given an initialised but unloaded journal struct, poke about in the
997  * on-disk structure to update it to the most recent supported version.
998  */
journal_update_format(journal_t * journal)999 int journal_update_format (journal_t *journal)
1000 {
1001     journal_superblock_t *sb;
1002     int err;
1003 
1004     err = journal_get_superblock(journal);
1005     if (err)
1006         return err;
1007 
1008     sb = journal->j_superblock;
1009 
1010     switch (be32_to_cpu(sb->s_header.h_blocktype)) {
1011     case JFS_SUPERBLOCK_V2:
1012         return 0;
1013     case JFS_SUPERBLOCK_V1:
1014         return journal_convert_superblock_v1(journal, sb);
1015     default:
1016         break;
1017     }
1018     return -EINVAL;
1019 }
1020 
1021 
1022 /**
1023  * void journal_update_superblock() - Update journal sb on disk.
1024  * @journal: The journal to update.
1025  * @wait: Set to '0' if you don't want to wait for IO completion.
1026  *
1027  * Update a journal's dynamic superblock fields and write it to disk,
1028  * optionally waiting for the IO to complete.
1029  */
journal_update_superblock(journal_t * journal,int wait)1030 void journal_update_superblock(journal_t *journal, int wait)
1031 {
1032     journal_superblock_t *sb = journal->j_superblock;
1033     struct buffer_head *bh = journal->j_sb_buffer;
1034 
1035     /*
1036      * As a special case, if the on-disk copy is already marked as needing
1037      * no recovery (s_start == 0) and there are no outstanding transactions
1038      * in the filesystem, then we can safely defer the superblock update
1039      * until the next commit by setting JFS_FLUSHED.  This avoids
1040      * attempting a write to a potential-readonly device.
1041      */
1042     if (sb->s_start == 0 && journal->j_tail_sequence ==
1043             journal->j_transaction_sequence) {
1044         jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
1045                   "(start %ld, seq %d, errno %d)\n",
1046                   journal->j_tail, journal->j_tail_sequence,
1047                   journal->j_errno);
1048         goto out;
1049     }
1050 
1051     jbd_lock(&journal->j_state_lock);
1052     jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
1053               journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1054 
1055     sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1056     sb->s_start    = cpu_to_be32(journal->j_tail);
1057     sb->s_errno    = cpu_to_be32(journal->j_errno);
1058     jbd_unlock(&journal->j_state_lock);
1059 
1060     BUFFER_TRACE(bh, "marking dirty");
1061     mark_buffer_dirty(bh);
1062     if (wait)
1063         sync_dirty_buffer(bh);
1064     else
1065         ll_rw_block(SWRITE, 1, &bh);
1066 
1067 out:
1068     /* If we have just flushed the log (by marking s_start==0), then
1069      * any future commit will have to be careful to update the
1070      * superblock again to re-record the true start of the log. */
1071 
1072     jbd_lock(&journal->j_state_lock);
1073     if (sb->s_start)
1074         journal->j_flags &= ~JFS_FLUSHED;
1075     else
1076         journal->j_flags |= JFS_FLUSHED;
1077     jbd_unlock(&journal->j_state_lock);
1078 }
1079 
1080 /*
1081  * Given a journal_t structure, initialise the various fields for
1082  * startup of a new journaling session.  We use this both when creating
1083  * a journal, and after recovering an old journal to reset it for
1084  * subsequent use.
1085  */
1086 
journal_reset(journal_t * journal)1087 static int journal_reset(journal_t *journal)
1088 {
1089     journal_superblock_t *sb = journal->j_superblock;
1090     unsigned long first, last;
1091 
1092     first = be32_to_cpu(sb->s_first);
1093     last = be32_to_cpu(sb->s_maxlen);
1094 
1095     journal->j_first = first;
1096     journal->j_last = last;
1097 
1098     journal->j_head = first;
1099     journal->j_tail = first;
1100     journal->j_free = last - first;
1101 
1102     journal->j_tail_sequence = journal->j_transaction_sequence;
1103     journal->j_commit_sequence = journal->j_transaction_sequence - 1;
1104     journal->j_commit_request = journal->j_commit_sequence;
1105 
1106     journal->j_max_transaction_buffers = journal->j_maxlen / 4;
1107 
1108     /* Add the dynamic fields and write it to disk. */
1109     journal_update_superblock(journal, 1);
1110     return 0;
1111 }
1112 
1113 /**
1114  * int journal_load() - Read journal from disk.
1115  * @journal: Journal to act on.
1116  *
1117  * Given a journal_t structure which tells us which disk blocks contain
1118  * a journal, read the journal from disk to initialise the in-memory
1119  * structures.
1120  */
journal_load(journal_t * journal)1121 int journal_load(journal_t *journal)
1122 {
1123     int err;
1124     journal_superblock_t *sb;
1125 
1126     err = load_superblock(journal);
1127     if (err)
1128         return err;
1129 
1130     sb = journal->j_superblock;
1131     /* If this is a V2 superblock, then we have to check the
1132      * features flags on it. */
1133 
1134     if (journal->j_format_version >= 2) {
1135         if ((sb->s_feature_ro_compat &
1136                 ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
1137                 (sb->s_feature_incompat &
1138                  ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
1139             printk (KERN_WARNING
1140                     "JBD: Unrecognised features on journal\n");
1141             return -EINVAL;
1142         }
1143     }
1144 
1145     /* Let the recovery code check whether it needs to recover any
1146      * data from the journal. */
1147     if (journal_recover(journal))
1148         goto recovery_error;
1149 
1150     /* OK, we've finished with the dynamic journal bits:
1151      * reinitialise the dynamic contents of the superblock in memory
1152      * and reset them on disk. */
1153     if (journal_reset(journal))
1154         goto recovery_error;
1155 
1156     journal->j_flags &= ~JFS_ABORT;
1157     journal->j_flags |= JFS_LOADED;
1158     return 0;
1159 
1160 recovery_error:
1161     printk (KERN_WARNING "JBD: recovery failed\n");
1162     return -EIO;
1163 }
1164 
1165 
1166 //
1167 // transactions routines
1168 //
1169 
1170 
1171 /*
1172  *
1173  * List management code snippets: various functions for manipulating the
1174  * transaction buffer lists.
1175  *
1176  */
1177 
1178 /*
1179  * Append a buffer to a transaction list, given the transaction's list head
1180  * pointer.
1181  *
1182  * j_list_lock is held.
1183  *
1184  * jbd_lock_bh_state(jh2bh(jh)) is held.
1185  */
1186 
1187 static inline void
__blist_add_buffer(struct journal_head ** list,struct journal_head * jh)1188 __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1189 {
1190     if (!*list) {
1191         jh->b_tnext = jh->b_tprev = jh;
1192         *list = jh;
1193     } else {
1194         /* Insert at the tail of the list to preserve order */
1195         struct journal_head *first = *list, *last = first->b_tprev;
1196         jh->b_tprev = last;
1197         jh->b_tnext = first;
1198         last->b_tnext = first->b_tprev = jh;
1199     }
1200 }
1201 
1202 /*
1203  * Remove a buffer from a transaction list, given the transaction's list
1204  * head pointer.
1205  *
1206  * Called with j_list_lock held, and the journal may not be locked.
1207  *
1208  * jbd_lock_bh_state(jh2bh(jh)) is held.
1209  */
1210 
1211 static inline void
__blist_del_buffer(struct journal_head ** list,struct journal_head * jh)1212 __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1213 {
1214     if (*list == jh) {
1215         *list = jh->b_tnext;
1216         if (*list == jh)
1217             *list = NULL;
1218     }
1219     jh->b_tprev->b_tnext = jh->b_tnext;
1220     jh->b_tnext->b_tprev = jh->b_tprev;
1221 }
1222 
1223 /*
1224  * Remove a buffer from the appropriate transaction list.
1225  *
1226  * Note that this function can *change* the value of
1227  * bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
1228  * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list.  If the caller
1229  * is holding onto a copy of one of thee pointers, it could go bad.
1230  * Generally the caller needs to re-read the pointer from the transaction_t.
1231  *
1232  * Called under j_list_lock.  The journal may not be locked.
1233  */
__journal_temp_unlink_buffer(struct journal_head * jh)1234 static void __journal_temp_unlink_buffer(struct journal_head *jh)
1235 {
1236     struct journal_head **list = NULL;
1237     transaction_t *transaction;
1238     struct buffer_head *bh = jh2bh(jh);
1239 
1240     J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1241     transaction = jh->b_transaction;
1242     if (transaction)
1243         assert_jbd_locked(&transaction->t_journal->j_list_lock);
1244 
1245     J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1246     if (jh->b_jlist != BJ_None)
1247         J_ASSERT_JH(jh, transaction != NULL);
1248 
1249     switch (jh->b_jlist) {
1250     case BJ_None:
1251         return;
1252     case BJ_SyncData:
1253         list = &transaction->t_sync_datalist;
1254         break;
1255     case BJ_Metadata:
1256         transaction->t_nr_buffers--;
1257         J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
1258         list = &transaction->t_buffers;
1259         break;
1260     case BJ_Forget:
1261         list = &transaction->t_forget;
1262         break;
1263     case BJ_IO:
1264         list = &transaction->t_iobuf_list;
1265         break;
1266     case BJ_Shadow:
1267         list = &transaction->t_shadow_list;
1268         break;
1269     case BJ_LogCtl:
1270         list = &transaction->t_log_list;
1271         break;
1272     case BJ_Reserved:
1273         list = &transaction->t_reserved_list;
1274         break;
1275     case BJ_Locked:
1276         list = &transaction->t_locked_list;
1277         break;
1278     }
1279 
1280     __blist_del_buffer(list, jh);
1281     jh->b_jlist = BJ_None;
1282     if (test_clear_buffer_jbddirty(bh))
1283         mark_buffer_dirty(bh);	/* Expose it to the VM */
1284 }
1285 
__journal_unfile_buffer(struct journal_head * jh)1286 void __journal_unfile_buffer(struct journal_head *jh)
1287 {
1288     __journal_temp_unlink_buffer(jh);
1289     jh->b_transaction = NULL;
1290 }
1291 
journal_unfile_buffer(journal_t * journal,struct journal_head * jh)1292 void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1293 {
1294     jbd_lock_bh_state(jh2bh(jh));
1295     jbd_lock(&journal->j_list_lock);
1296     __journal_unfile_buffer(jh);
1297     jbd_unlock(&journal->j_list_lock);
1298     jbd_unlock_bh_state(jh2bh(jh));
1299 }
1300 
1301 /*
1302  * This buffer is no longer needed.  If it is on an older transaction's
1303  * checkpoint list we need to record it on this transaction's forget list
1304  * to pin this buffer (and hence its checkpointing transaction) down until
1305  * this transaction commits.  If the buffer isn't on a checkpoint list, we
1306  * release it.
1307  * Returns non-zero if JBD no longer has an interest in the buffer.
1308  *
1309  * Called under j_list_lock.
1310  *
1311  * Called under jbd_lock_bh_state(bh).
1312  */
__dispose_buffer(struct journal_head * jh,transaction_t * transaction)1313 static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1314 {
1315     int may_free = 1;
1316     struct buffer_head *bh = jh2bh(jh);
1317 
1318     __journal_unfile_buffer(jh);
1319 
1320     if (jh->b_cp_transaction) {
1321         JBUFFER_TRACE(jh, "on running+cp transaction");
1322         __journal_file_buffer(jh, transaction, BJ_Forget);
1323         clear_buffer_jbddirty(bh);
1324         may_free = 0;
1325     } else {
1326         JBUFFER_TRACE(jh, "on running transaction");
1327         journal_remove_journal_head(bh);
1328         __brelse(bh);
1329     }
1330     return may_free;
1331 }
1332 
1333 
1334 /*
1335  * File a buffer on the given transaction list.
1336  */
__journal_file_buffer(struct journal_head * jh,transaction_t * transaction,int jlist)1337 void __journal_file_buffer(struct journal_head *jh,
1338                            transaction_t *transaction, int jlist)
1339 {
1340     struct journal_head **list = NULL;
1341     int was_dirty = 0;
1342     struct buffer_head *bh = jh2bh(jh);
1343 
1344     J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1345     assert_jbd_locked(&transaction->t_journal->j_list_lock);
1346 
1347     J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1348     J_ASSERT_JH(jh, jh->b_transaction == transaction ||
1349                 jh->b_transaction == NULL);
1350 
1351     if (jh->b_transaction && jh->b_jlist == (unsigned) jlist)
1352         return;
1353 
1354     /* The following list of buffer states needs to be consistent
1355      * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1356      * state. */
1357 
1358     if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1359             jlist == BJ_Shadow || jlist == BJ_Forget) {
1360         if (test_clear_buffer_dirty(bh) ||
1361                 test_clear_buffer_jbddirty(bh))
1362             was_dirty = 1;
1363     }
1364 
1365     if (jh->b_transaction)
1366         __journal_temp_unlink_buffer(jh);
1367     jh->b_transaction = transaction;
1368 
1369     switch (jlist) {
1370     case BJ_None:
1371         J_ASSERT_JH(jh, !jh->b_committed_data);
1372         J_ASSERT_JH(jh, !jh->b_frozen_data);
1373         return;
1374     case BJ_SyncData:
1375         list = &transaction->t_sync_datalist;
1376         break;
1377     case BJ_Metadata:
1378         transaction->t_nr_buffers++;
1379         list = &transaction->t_buffers;
1380         break;
1381     case BJ_Forget:
1382         list = &transaction->t_forget;
1383         break;
1384     case BJ_IO:
1385         list = &transaction->t_iobuf_list;
1386         break;
1387     case BJ_Shadow:
1388         list = &transaction->t_shadow_list;
1389         break;
1390     case BJ_LogCtl:
1391         list = &transaction->t_log_list;
1392         break;
1393     case BJ_Reserved:
1394         list = &transaction->t_reserved_list;
1395         break;
1396     case BJ_Locked:
1397         list =  &transaction->t_locked_list;
1398         break;
1399     }
1400 
1401     __blist_add_buffer(list, jh);
1402     jh->b_jlist = jlist;
1403 
1404     if (was_dirty)
1405         set_buffer_jbddirty(bh);
1406 }
1407 
journal_file_buffer(struct journal_head * jh,transaction_t * transaction,int jlist)1408 void journal_file_buffer(struct journal_head *jh,
1409                          transaction_t *transaction, int jlist)
1410 {
1411     jbd_lock_bh_state(jh2bh(jh));
1412     jbd_lock(&transaction->t_journal->j_list_lock);
1413     __journal_file_buffer(jh, transaction, jlist);
1414     jbd_unlock(&transaction->t_journal->j_list_lock);
1415     jbd_unlock_bh_state(jh2bh(jh));
1416 }
1417 
1418 
1419 /*
1420  * journal_release_buffer: undo a get_write_access without any buffer
1421  * updates, if the update decided in the end that it didn't need access.
1422  *
1423  */
1424 void
journal_release_buffer(handle_t * handle,struct buffer_head * bh)1425 journal_release_buffer(handle_t *handle, struct buffer_head *bh)
1426 {
1427     BUFFER_TRACE(bh, "entry");
1428 }
1429 
1430 /**
1431  * void journal_forget() - bforget() for potentially-journaled buffers.
1432  * @handle: transaction handle
1433  * @bh:     bh to 'forget'
1434  *
1435  * We can only do the bforget if there are no commits pending against the
1436  * buffer.  If the buffer is dirty in the current running transaction we
1437  * can safely unlink it.
1438  *
1439  * bh may not be a journalled buffer at all - it may be a non-JBD
1440  * buffer which came off the hashtable.  Check for this.
1441  *
1442  * Decrements bh->b_count by one.
1443  *
1444  * Allow this call even if the handle has aborted --- it may be part of
1445  * the caller's cleanup after an abort.
1446  */
journal_forget(handle_t * handle,struct buffer_head * bh)1447 int journal_forget (handle_t *handle, struct buffer_head *bh)
1448 {
1449     transaction_t *transaction = handle->h_transaction;
1450     journal_t *journal = transaction->t_journal;
1451     struct journal_head *jh;
1452     int drop_reserve = 0;
1453     int err = 0;
1454 
1455     BUFFER_TRACE(bh, "entry");
1456 
1457     jbd_lock_bh_state(bh);
1458     jbd_lock(&journal->j_list_lock);
1459 
1460     if (!buffer_jbd(bh))
1461         goto not_jbd;
1462     jh = bh2jh(bh);
1463 
1464     /* Critical error: attempting to delete a bitmap buffer, maybe?
1465      * Don't do any jbd operations, and return an error. */
1466     if (!J_EXPECT_JH(jh, !jh->b_committed_data,
1467                      "inconsistent data on disk")) {
1468         err = -EIO;
1469         goto not_jbd;
1470     }
1471 
1472     /*
1473      * The buffer's going from the transaction, we must drop
1474      * all references -bzzz
1475      */
1476     jh->b_modified = 0;
1477 
1478     if (jh->b_transaction == handle->h_transaction) {
1479         J_ASSERT_JH(jh, !jh->b_frozen_data);
1480 
1481         /* If we are forgetting a buffer which is already part
1482          * of this transaction, then we can just drop it from
1483          * the transaction immediately. */
1484         clear_buffer_dirty(bh);
1485         clear_buffer_jbddirty(bh);
1486 
1487         JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1488 
1489         drop_reserve = 1;
1490 
1491         /*
1492          * We are no longer going to journal this buffer.
1493          * However, the commit of this transaction is still
1494          * important to the buffer: the delete that we are now
1495          * processing might obsolete an old log entry, so by
1496          * committing, we can satisfy the buffer's checkpoint.
1497          *
1498          * So, if we have a checkpoint on the buffer, we should
1499          * now refile the buffer on our BJ_Forget list so that
1500          * we know to remove the checkpoint after we commit.
1501          */
1502 
1503         if (jh->b_cp_transaction) {
1504             __journal_temp_unlink_buffer(jh);
1505             __journal_file_buffer(jh, transaction, BJ_Forget);
1506         } else {
1507             __journal_unfile_buffer(jh);
1508             journal_remove_journal_head(bh);
1509             __brelse(bh);
1510             if (!buffer_jbd(bh)) {
1511                 jbd_unlock(&journal->j_list_lock);
1512                 jbd_unlock_bh_state(bh);
1513                 __bforget(bh);
1514                 goto drop;
1515             }
1516         }
1517     } else if (jh->b_transaction) {
1518         J_ASSERT_JH(jh, (jh->b_transaction ==
1519                          journal->j_committing_transaction));
1520         /* However, if the buffer is still owned by a prior
1521          * (committing) transaction, we can't drop it yet... */
1522         JBUFFER_TRACE(jh, "belongs to older transaction");
1523         /* ... but we CAN drop it from the new transaction if we
1524          * have also modified it since the original commit. */
1525 
1526         if (jh->b_next_transaction) {
1527             J_ASSERT(jh->b_next_transaction == transaction);
1528             jh->b_next_transaction = NULL;
1529             drop_reserve = 1;
1530         }
1531     }
1532 
1533 not_jbd:
1534     jbd_unlock(&journal->j_list_lock);
1535     jbd_unlock_bh_state(bh);
1536     __brelse(bh);
1537 drop:
1538     if (drop_reserve) {
1539         /* no need to reserve log space for this block -bzzz */
1540         handle->h_buffer_credits++;
1541     }
1542     return err;
1543 }
1544 
1545 /*
1546  * debugfs tunables
1547  */
1548 #ifdef CONFIG_JBD_DEBUG
1549 
1550 u8 journal_enable_debug __read_mostly;
1551 EXPORT_SYMBOL(journal_enable_debug);
1552 
1553 static struct dentry *jbd_debugfs_dir;
1554 static struct dentry *jbd_debug;
1555 
jbd_create_debugfs_entry(void)1556 static void __init jbd_create_debugfs_entry(void)
1557 {
1558     jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
1559     if (jbd_debugfs_dir)
1560         jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
1561                                       jbd_debugfs_dir,
1562                                       &journal_enable_debug);
1563 }
1564 
jbd_remove_debugfs_entry(void)1565 static void __exit jbd_remove_debugfs_entry(void)
1566 {
1567     debugfs_remove(jbd_debug);
1568     debugfs_remove(jbd_debugfs_dir);
1569 }
1570 
1571 #else
1572 
jbd_create_debugfs_entry(void)1573 static inline void jbd_create_debugfs_entry(void)
1574 {
1575 }
1576 
jbd_remove_debugfs_entry(void)1577 static inline void jbd_remove_debugfs_entry(void)
1578 {
1579 }
1580 
1581 #endif
1582 
1583 struct kmem_cache *jbd_handle_cache = NULL;
1584 
journal_init_handle_cache(void)1585 static int __init journal_init_handle_cache(void)
1586 {
1587     jbd_handle_cache = kmem_cache_create("journal_handle",
1588                                          sizeof(handle_t),
1589                                          0,		/* offset */
1590                                          SLAB_TEMPORARY,	/* flags */
1591                                          NULL);		/* ctor */
1592     if (jbd_handle_cache == NULL) {
1593         printk(KERN_EMERG "JBD: failed to create handle cache\n");
1594         return -ENOMEM;
1595     }
1596     return 0;
1597 }
1598 
journal_destroy_handle_cache(void)1599 static void journal_destroy_handle_cache(void)
1600 {
1601     if (jbd_handle_cache)
1602         kmem_cache_destroy(jbd_handle_cache);
1603 }
1604 
1605 /*
1606  * Module startup and shutdown
1607  */
1608 
journal_init_caches(void)1609 static int __init journal_init_caches(void)
1610 {
1611     int ret;
1612 
1613     ret = journal_init_revoke_caches();
1614     if (ret == 0)
1615         ret = journal_init_journal_head_cache();
1616     if (ret == 0)
1617         ret = journal_init_handle_cache();
1618     return ret;
1619 }
1620 
journal_destroy_caches(void)1621 static void journal_destroy_caches(void)
1622 {
1623     journal_destroy_revoke_caches();
1624     journal_destroy_journal_head_cache();
1625     journal_destroy_handle_cache();
1626 }
1627 
journal_init(void)1628 static int __init journal_init(void)
1629 {
1630     int ret;
1631 
1632     J_ASSERT(sizeof(struct journal_superblock_s) == 1024);
1633 
1634     ret = journal_init_caches();
1635     if (ret != 0)
1636         journal_destroy_caches();
1637     jbd_create_debugfs_entry();
1638     return ret;
1639 }
1640 
journal_exit(void)1641 static void __exit journal_exit(void)
1642 {
1643 #ifdef CONFIG_JBD_DEBUG
1644     int n = atomic_read(&nr_journal_heads);
1645     if (n)
1646         printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
1647 #endif
1648     jbd_remove_debugfs_entry();
1649     journal_destroy_caches();
1650 }
1651 
1652 MODULE_LICENSE("GPL");
1653 module_init(journal_init);
1654 module_exit(journal_exit);
1655