1
2 #include <linux/module.h>
3 #include <linux/time.h>
4 #include <linux/fs.h>
5 #include <linux/jbd.h>
6 #include <linux/errno.h>
7 #include <linux/slab.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/freezer.h>
11 #include <linux/pagemap.h>
12 #include <linux/kthread.h>
13 #include <linux/poison.h>
14 #include <linux/proc_fs.h>
15 #include <linux/debugfs.h>
16
17
18 /*
19 * Called under j_state_lock. Returns true if a transaction was started.
20 */
__log_start_commit(journal_t * journal,tid_t target)21 int __log_start_commit(journal_t *journal, tid_t target)
22 {
23 /*
24 * Are we already doing a recent enough commit?
25 */
26 if (!tid_geq(journal->j_commit_request, target)) {
27 /*
28 * We want a new commit: OK, mark the request and wakup the
29 * commit thread. We do _not_ do the commit ourselves.
30 */
31
32 journal->j_commit_request = target;
33 jbd_debug(1, "JBD: requesting commit %d/%d\n",
34 journal->j_commit_request,
35 journal->j_commit_sequence);
36 wake_up(&journal->j_wait_commit);
37 return 1;
38 }
39 return 0;
40 }
41
log_start_commit(journal_t * journal,tid_t tid)42 int log_start_commit(journal_t *journal, tid_t tid)
43 {
44 int ret;
45
46 jbd_lock(&journal->j_state_lock);
47 ret = __log_start_commit(journal, tid);
48 jbd_unlock(&journal->j_state_lock);
49 return ret;
50 }
51
52 /*
53 * Journal abort has very specific semantics, which we describe
54 * for journal abort.
55 *
56 * Two internal function, which provide abort to te jbd layer
57 * itself are here.
58 */
59
60 /*
61 * Quick version for internal journal use (doesn't lock the journal).
62 * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
63 * and don't attempt to make any other journal updates.
64 */
__journal_abort_hard(journal_t * journal)65 static void __journal_abort_hard(journal_t *journal)
66 {
67 transaction_t *transaction;
68
69 if (journal->j_flags & JFS_ABORT)
70 return;
71
72 jbd_lock(&journal->j_state_lock);
73 journal->j_flags |= JFS_ABORT;
74 transaction = journal->j_running_transaction;
75 if (transaction)
76 __log_start_commit(journal, transaction->t_tid);
77 jbd_unlock(&journal->j_state_lock);
78 }
79
80 /* Soft abort: record the abort error status in the journal superblock,
81 * but don't do any other IO. */
__journal_abort_soft(journal_t * journal,int err)82 static void __journal_abort_soft (journal_t *journal, int err)
83 {
84 if (journal->j_flags & JFS_ABORT)
85 return;
86
87 if (!journal->j_errno)
88 journal->j_errno = err;
89
90 __journal_abort_hard(journal);
91
92 if (err)
93 journal_update_superblock(journal, 1);
94 }
95
96
97 /**
98 * void journal_abort () - Shutdown the journal immediately.
99 * @journal: the journal to shutdown.
100 * @errno: an error number to record in the journal indicating
101 * the reason for the shutdown.
102 *
103 * Perform a complete, immediate shutdown of the ENTIRE
104 * journal (not of a single transaction). This operation cannot be
105 * undone without closing and reopening the journal.
106 *
107 * The journal_abort function is intended to support higher level error
108 * recovery mechanisms such as the ext2/ext3 remount-readonly error
109 * mode.
110 *
111 * Journal abort has very specific semantics. Any existing dirty,
112 * unjournaled buffers in the main filesystem will still be written to
113 * disk by bdflush, but the journaling mechanism will be suspended
114 * immediately and no further transaction commits will be honoured.
115 *
116 * Any dirty, journaled buffers will be written back to disk without
117 * hitting the journal. Atomicity cannot be guaranteed on an aborted
118 * filesystem, but we _do_ attempt to leave as much data as possible
119 * behind for fsck to use for cleanup.
120 *
121 * Any attempt to get a new transaction handle on a journal which is in
122 * ABORT state will just result in an -EROFS error return. A
123 * journal_stop on an existing handle will return -EIO if we have
124 * entered abort state during the update.
125 *
126 * Recursive transactions are not disturbed by journal abort until the
127 * final journal_stop, which will receive the -EIO error.
128 *
129 * Finally, the journal_abort call allows the caller to supply an errno
130 * which will be recorded (if possible) in the journal superblock. This
131 * allows a client to record failure conditions in the middle of a
132 * transaction without having to complete the transaction to record the
133 * failure to disk. ext3_error, for example, now uses this
134 * functionality.
135 *
136 * Errors which originate from within the journaling layer will NOT
137 * supply an errno; a null errno implies that absolutely no further
138 * writes are done to the journal (unless there are any already in
139 * progress).
140 *
141 */
142
journal_abort(journal_t * journal,int err)143 void journal_abort(journal_t *journal, int err)
144 {
145 __journal_abort_soft(journal, err);
146 }
147
148 /**
149 * int journal_errno () - returns the journal's error state.
150 * @journal: journal to examine.
151 *
152 * This is the errno numbet set with journal_abort(), the last
153 * time the journal was mounted - if the journal was stopped
154 * without calling abort this will be 0.
155 *
156 * If the journal has been aborted on this mount time -EROFS will
157 * be returned.
158 */
journal_errno(journal_t * journal)159 int journal_errno(journal_t *journal)
160 {
161 int err;
162
163 jbd_lock(&journal->j_state_lock);
164 if (journal->j_flags & JFS_ABORT)
165 err = -EROFS;
166 else
167 err = journal->j_errno;
168 jbd_unlock(&journal->j_state_lock);
169 return err;
170 }
171
172 /**
173 * int journal_clear_err () - clears the journal's error state
174 * @journal: journal to act on.
175 *
176 * An error must be cleared or Acked to take a FS out of readonly
177 * mode.
178 */
journal_clear_err(journal_t * journal)179 int journal_clear_err(journal_t *journal)
180 {
181 int err = 0;
182
183 jbd_lock(&journal->j_state_lock);
184 if (journal->j_flags & JFS_ABORT)
185 err = -EROFS;
186 else
187 journal->j_errno = 0;
188 jbd_unlock(&journal->j_state_lock);
189 return err;
190 }
191
192 /**
193 * void journal_ack_err() - Ack journal err.
194 * @journal: journal to act on.
195 *
196 * An error must be cleared or Acked to take a FS out of readonly
197 * mode.
198 */
journal_ack_err(journal_t * journal)199 void journal_ack_err(journal_t *journal)
200 {
201 jbd_lock(&journal->j_state_lock);
202 if (journal->j_errno)
203 journal->j_flags |= JFS_ACK_ERR;
204 jbd_unlock(&journal->j_state_lock);
205 }
206
journal_blocks_per_page(struct inode * inode)207 int journal_blocks_per_page(struct inode *inode)
208 {
209 return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
210 }
211
212
213 /*
214 * Journal_head storage management
215 */
216 static struct kmem_cache *journal_head_cache = NULL;
217 #ifdef CONFIG_JBD_DEBUG
218 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
219 #endif
220
journal_init_journal_head_cache(void)221 static int journal_init_journal_head_cache(void)
222 {
223 int retval;
224
225 J_ASSERT(journal_head_cache == 0);
226 journal_head_cache = kmem_cache_create("journal_head",
227 sizeof(struct journal_head),
228 0, /* offset */
229 SLAB_TEMPORARY, /* flags */
230 NULL); /* ctor */
231 retval = 0;
232 if (journal_head_cache == 0) {
233 retval = -ENOMEM;
234 printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
235 }
236 return retval;
237 }
238
journal_destroy_journal_head_cache(void)239 static void journal_destroy_journal_head_cache(void)
240 {
241 J_ASSERT(journal_head_cache != NULL);
242 kmem_cache_destroy(journal_head_cache);
243 journal_head_cache = NULL;
244 }
245
246 /*
247 * journal_head splicing and dicing
248 */
journal_alloc_journal_head(void)249 static struct journal_head *journal_alloc_journal_head(void)
250 {
251 struct journal_head *ret;
252 static unsigned long last_warning;
253
254 #ifdef CONFIG_JBD_DEBUG
255 atomic_inc(&nr_journal_heads);
256 #endif
257 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
258 if (ret == NULL) {
259 jbd_debug(1, "out of memory for journal_head\n");
260 if (time_after(jiffies, last_warning + 5*HZ)) {
261 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
262 __FUNCTION__);
263 last_warning = jiffies;
264 }
265 while (ret == NULL) {
266 yield();
267 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
268 }
269 }
270 return ret;
271 }
272
journal_free_journal_head(struct journal_head * jh)273 static void journal_free_journal_head(struct journal_head *jh)
274 {
275 #ifdef CONFIG_JBD_DEBUG
276 atomic_dec(&nr_journal_heads);
277 memset(jh, JBD_POISON_FREE, sizeof(*jh));
278 #endif
279 kmem_cache_free(journal_head_cache, jh);
280 }
281
282 /*
283 * A journal_head is attached to a buffer_head whenever JBD has an
284 * interest in the buffer.
285 *
286 * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
287 * is set. This bit is tested in core kernel code where we need to take
288 * JBD-specific actions. Testing the zeroness of ->b_private is not reliable
289 * there.
290 *
291 * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
292 *
293 * When a buffer has its BH_JBD bit set it is immune from being released by
294 * core kernel code, mainly via ->b_count.
295 *
296 * A journal_head may be detached from its buffer_head when the journal_head's
297 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
298 * Various places in JBD call journal_remove_journal_head() to indicate that the
299 * journal_head can be dropped if needed.
300 *
301 * Various places in the kernel want to attach a journal_head to a buffer_head
302 * _before_ attaching the journal_head to a transaction. To protect the
303 * journal_head in this situation, journal_add_journal_head elevates the
304 * journal_head's b_jcount refcount by one. The caller must call
305 * journal_put_journal_head() to undo this.
306 *
307 * So the typical usage would be:
308 *
309 * (Attach a journal_head if needed. Increments b_jcount)
310 * struct journal_head *jh = journal_add_journal_head(bh);
311 * ...
312 * jh->b_transaction = xxx;
313 * journal_put_journal_head(jh);
314 *
315 * Now, the journal_head's b_jcount is zero, but it is safe from being released
316 * because it has a non-zero b_transaction.
317 */
318
319 /*
320 * Give a buffer_head a journal_head.
321 *
322 * Doesn't need the journal lock.
323 * May sleep.
324 */
journal_add_journal_head(struct buffer_head * bh)325 struct journal_head *journal_add_journal_head(struct buffer_head *bh)
326 {
327 struct journal_head *jh;
328 struct journal_head *new_jh = NULL;
329
330 repeat:
331 if (!buffer_jbd(bh)) {
332 new_jh = journal_alloc_journal_head();
333 memset(new_jh, 0, sizeof(*new_jh));
334 }
335
336 jbd_lock_bh_journal_head(bh);
337 if (buffer_jbd(bh)) {
338 jh = bh2jh(bh);
339 } else {
340 J_ASSERT_BH(bh,
341 (atomic_read(&bh->b_count) > 0) ||
342 (bh->b_page && bh->b_page->mapping));
343
344 if (!new_jh) {
345 jbd_unlock_bh_journal_head(bh);
346 goto repeat;
347 }
348
349 jh = new_jh;
350 new_jh = NULL; /* We consumed it */
351 set_buffer_jbd(bh);
352 bh->b_private = jh;
353 jh->b_bh = bh;
354 get_bh(bh);
355 BUFFER_TRACE(bh, "added journal_head");
356 }
357 jh->b_jcount++;
358 jbd_unlock_bh_journal_head(bh);
359 if (new_jh)
360 journal_free_journal_head(new_jh);
361 return bh->b_private;
362 }
363
364 /*
365 * Grab a ref against this buffer_head's journal_head. If it ended up not
366 * having a journal_head, return NULL
367 */
journal_grab_journal_head(struct buffer_head * bh)368 struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
369 {
370 struct journal_head *jh = NULL;
371
372 jbd_lock_bh_journal_head(bh);
373 if (buffer_jbd(bh)) {
374 jh = bh2jh(bh);
375 jh->b_jcount++;
376 }
377 jbd_unlock_bh_journal_head(bh);
378 return jh;
379 }
380
__journal_remove_journal_head(struct buffer_head * bh)381 static void __journal_remove_journal_head(struct buffer_head *bh)
382 {
383 struct journal_head *jh = bh2jh(bh);
384
385 J_ASSERT_JH(jh, jh->b_jcount >= 0);
386
387 get_bh(bh);
388 if (jh->b_jcount == 0) {
389 if (jh->b_transaction == NULL &&
390 jh->b_next_transaction == NULL &&
391 jh->b_cp_transaction == NULL) {
392 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
393 J_ASSERT_BH(bh, buffer_jbd(bh));
394 J_ASSERT_BH(bh, jh2bh(jh) == bh);
395 BUFFER_TRACE(bh, "remove journal_head");
396 if (jh->b_frozen_data) {
397 printk(KERN_WARNING "%s: freeing "
398 "b_frozen_data\n",
399 __FUNCTION__);
400 jbd_free(jh->b_frozen_data, bh->b_size);
401 }
402 if (jh->b_committed_data) {
403 printk(KERN_WARNING "%s: freeing "
404 "b_committed_data\n",
405 __FUNCTION__);
406 jbd_free(jh->b_committed_data, bh->b_size);
407 }
408 bh->b_private = NULL;
409 jh->b_bh = NULL; /* debug, really */
410 clear_buffer_jbd(bh);
411 __brelse(bh);
412 journal_free_journal_head(jh);
413 } else {
414 BUFFER_TRACE(bh, "journal_head was locked");
415 }
416 }
417 }
418
419 /*
420 * journal_remove_journal_head(): if the buffer isn't attached to a transaction
421 * and has a zero b_jcount then remove and release its journal_head. If we did
422 * see that the buffer is not used by any transaction we also "logically"
423 * decrement ->b_count.
424 *
425 * We in fact take an additional increment on ->b_count as a convenience,
426 * because the caller usually wants to do additional things with the bh
427 * after calling here.
428 * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
429 * time. Once the caller has run __brelse(), the buffer is eligible for
430 * reaping by try_to_free_buffers().
431 */
journal_remove_journal_head(struct buffer_head * bh)432 void journal_remove_journal_head(struct buffer_head *bh)
433 {
434 jbd_lock_bh_journal_head(bh);
435 __journal_remove_journal_head(bh);
436 jbd_unlock_bh_journal_head(bh);
437 }
438
439 /*
440 * Drop a reference on the passed journal_head. If it fell to zero then try to
441 * release the journal_head from the buffer_head.
442 */
journal_put_journal_head(struct journal_head * jh)443 void journal_put_journal_head(struct journal_head *jh)
444 {
445 struct buffer_head *bh = jh2bh(jh);
446
447 jbd_lock_bh_journal_head(bh);
448 J_ASSERT_JH(jh, jh->b_jcount > 0);
449 --jh->b_jcount;
450 if (!jh->b_jcount && !jh->b_transaction) {
451 __journal_remove_journal_head(bh);
452 __brelse(bh);
453 }
454 jbd_unlock_bh_journal_head(bh);
455 }
456
457 /*
458 * Log buffer allocation routines:
459 */
460
journal_next_log_block(journal_t * journal,unsigned long * retp)461 int journal_next_log_block(journal_t *journal, unsigned long *retp)
462 {
463 unsigned long blocknr;
464
465 jbd_lock(&journal->j_state_lock);
466 J_ASSERT(journal->j_free > 1);
467
468 blocknr = journal->j_head;
469 journal->j_head++;
470 journal->j_free--;
471 if (journal->j_head == journal->j_last)
472 journal->j_head = journal->j_first;
473 jbd_unlock(&journal->j_state_lock);
474 return journal_bmap(journal, blocknr, retp);
475 }
476
477 /*
478 * Conversion of logical to physical block numbers for the journal
479 *
480 * On external journals the journal blocks are identity-mapped, so
481 * this is a no-op. If needed, we can use j_blk_offset - everything is
482 * ready.
483 */
journal_bmap(journal_t * journal,unsigned long blocknr,unsigned long * retp)484 int journal_bmap(journal_t *journal, unsigned long blocknr,
485 unsigned long *retp)
486 {
487 int err = 0;
488 unsigned long ret;
489
490 if (journal->j_inode) {
491 ret = (unsigned long)bmap(journal->j_inode, (sector_t)blocknr);
492 if (ret)
493 *retp = ret;
494 else {
495 printk(KERN_ALERT "%s: journal block not found "
496 "at offset %lu ...\n",
497 __FUNCTION__,
498 blocknr);
499 err = -EIO;
500 __journal_abort_soft(journal, err);
501 }
502 } else {
503 *retp = blocknr; /* +journal->j_blk_offset */
504 }
505 return err;
506 }
507
508 /*
509 * We play buffer_head aliasing tricks to write data/metadata blocks to
510 * the journal without copying their contents, but for journal
511 * descriptor blocks we do need to generate bona fide buffers.
512 *
513 * After the caller of journal_get_descriptor_buffer() has finished modifying
514 * the buffer's contents they really should run flush_dcache_page(bh->b_page).
515 * But we don't bother doing that, so there will be coherency problems with
516 * mmaps of blockdevs which hold live JBD-controlled filesystems.
517 */
journal_get_descriptor_buffer(journal_t * journal)518 struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
519 {
520 struct buffer_head *bh;
521 unsigned long blocknr;
522 int err;
523
524 err = journal_next_log_block(journal, &blocknr);
525
526 if (err)
527 return NULL;
528
529 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
530 lock_buffer(bh);
531 memset(bh->b_data, 0, journal->j_blocksize);
532 set_buffer_uptodate(bh);
533 unlock_buffer(bh);
534 BUFFER_TRACE(bh, "return this buffer");
535 return journal_add_journal_head(bh);
536 }
537
538 /*
539 * Management for journal control blocks: functions to create and
540 * destroy journal_t structures, and to initialise and read existing
541 * journal blocks from disk. */
542
543 /* First: create and setup a journal_t object in memory. We initialise
544 * very few fields yet: that has to wait until we have created the
545 * journal structures from from scratch, or loaded them from disk. */
546
journal_init_common(void)547 static journal_t * journal_init_common (void)
548 {
549 journal_t *journal;
550 int err;
551
552 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
553 if (!journal)
554 goto fail;
555
556 init_waitqueue_head(&journal->j_wait_transaction_locked);
557 init_waitqueue_head(&journal->j_wait_logspace);
558 init_waitqueue_head(&journal->j_wait_done_commit);
559 init_waitqueue_head(&journal->j_wait_checkpoint);
560 init_waitqueue_head(&journal->j_wait_commit);
561 init_waitqueue_head(&journal->j_wait_updates);
562 mutex_init(&journal->j_barrier);
563 mutex_init(&journal->j_checkpoint_mutex);
564 jbd_lock_init(&journal->j_revoke_lock);
565 jbd_lock_init(&journal->j_list_lock);
566 jbd_lock_init(&journal->j_state_lock);
567
568 journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
569
570 /* The journal is marked for error until we succeed with recovery! */
571 journal->j_flags = JFS_ABORT;
572
573 /* Set up a default-sized revoke table for the new mount. */
574 err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
575 if (err) {
576 kfree(journal);
577 goto fail;
578 }
579 return journal;
580 fail:
581 return NULL;
582 }
583
584 /**
585 * journal_t * journal_init_inode () - creates a journal which maps to a inode.
586 * @inode: An inode to create the journal in
587 *
588 * journal_init_inode creates a journal which maps an on-disk inode as
589 * the journal. The inode must exist already, must support bmap() and
590 * must have all data blocks preallocated.
591 */
journal_init_inode(struct inode * inode)592 journal_t * journal_init_inode (struct inode *inode)
593 {
594 struct buffer_head *bh;
595 journal_t *journal = journal_init_common();
596 int err;
597 int n;
598 unsigned long blocknr;
599
600 if (!journal)
601 return NULL;
602
603 journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
604 journal->j_inode = inode;
605 jbd_debug(1,
606 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
607 journal, inode->i_sb->s_id, inode->i_ino,
608 (s64) inode->i_size,
609 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
610
611 journal->j_maxlen = (unsigned int)(inode->i_size >> inode->i_sb->s_blocksize_bits);
612 journal->j_blocksize = inode->i_sb->s_blocksize;
613
614 /* journal descriptor can store up to n blocks -bzzz */
615 n = journal->j_blocksize / sizeof(journal_block_tag_t);
616 journal->j_wbufsize = n;
617 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
618 if (!journal->j_wbuf) {
619 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
620 __FUNCTION__);
621
622 J_ASSERT(journal->j_revoke != NULL);
623 if (journal->j_revoke)
624 journal_destroy_revoke(journal);
625
626 kfree(journal);
627 return NULL;
628 }
629
630 err = journal_bmap(journal, 0, &blocknr);
631 /* If that failed, give up */
632 if (err) {
633 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
634 __FUNCTION__);
635
636 J_ASSERT(journal->j_revoke != NULL);
637 if (journal->j_revoke)
638 journal_destroy_revoke(journal);
639 J_ASSERT(journal->j_wbuf != NULL);
640 kfree(journal->j_wbuf);
641 kfree(journal);
642 return NULL;
643 }
644
645 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
646 J_ASSERT(bh != NULL);
647 journal->j_sb_buffer = bh;
648 journal->j_superblock = (journal_superblock_t *)bh->b_data;
649
650 return journal;
651 }
652
653 /**
654 *
655 * wipe all journal data ...
656 *
657 */
658
journal_wipe_recovery(journal_t * journal)659 void journal_wipe_recovery(journal_t *journal)
660 {
661 /* We can now mark the journal as empty. */
662
663 journal->j_tail = 0;
664 if (journal->j_sb_buffer) {
665 journal_update_superblock(journal, 0);
666 brelse(journal->j_sb_buffer);
667 journal->j_sb_buffer = NULL;
668 }
669 }
670
671 /**
672 * void journal_destroy() - Release a journal_t structure.
673 * @journal: Journal to act on.
674 *
675 * Release a journal_t structure once it is no longer in use by the
676 * journaled object.
677 */
journal_destroy(journal_t * journal)678 void journal_destroy(journal_t *journal)
679 {
680 #if 0
681 /* Wait for the commit thread to wake up and die. */
682 journal_kill_thread(journal);
683
684 /* Force a final log commit */
685 if (journal->j_running_transaction)
686 journal_commit_transaction(journal);
687
688 /* Force any old transactions to disk */
689
690 /* Totally anal locking here... */
691 jbd_lock(&journal->j_list_lock);
692 while (journal->j_checkpoint_transactions != NULL) {
693 jbd_unlock(&journal->j_list_lock);
694 log_do_checkpoint(journal);
695 jbd_lock(&journal->j_list_lock);
696 }
697
698 J_ASSERT(journal->j_running_transaction == NULL);
699 J_ASSERT(journal->j_committing_transaction == NULL);
700 J_ASSERT(journal->j_checkpoint_transactions == NULL);
701 jbd_unlock(&journal->j_list_lock);
702
703 /* We can now mark the journal as empty. */
704 journal->j_tail = 0;
705 journal->j_tail_sequence = ++journal->j_transaction_sequence;
706 if (journal->j_sb_buffer) {
707 journal_update_superblock(journal, 1);
708 brelse(journal->j_sb_buffer);
709 }
710 #endif
711
712 if (journal->j_sb_buffer) {
713 brelse(journal->j_sb_buffer);
714 }
715 if (journal->j_inode)
716 iput(journal->j_inode);
717 if (journal->j_revoke)
718 journal_destroy_revoke(journal);
719 kfree(journal->j_wbuf);
720 kfree(journal);
721 }
722
723
724
725 /**
726 *int journal_check_used_features () - Check if features specified are used.
727 * @journal: Journal to check.
728 * @compat: bitmask of compatible features
729 * @ro: bitmask of features that force read-only mount
730 * @incompat: bitmask of incompatible features
731 *
732 * Check whether the journal uses all of a given set of
733 * features. Return true (non-zero) if it does.
734 **/
735
journal_check_used_features(journal_t * journal,unsigned long compat,unsigned long ro,unsigned long incompat)736 int journal_check_used_features (journal_t *journal, unsigned long compat,
737 unsigned long ro, unsigned long incompat)
738 {
739 journal_superblock_t *sb;
740
741 if (!compat && !ro && !incompat)
742 return 1;
743 if (journal->j_format_version == 1)
744 return 0;
745
746 sb = journal->j_superblock;
747
748 if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
749 ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
750 ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
751 return 1;
752
753 return 0;
754 }
755
756 /**
757 * int journal_check_available_features() - Check feature set in journalling layer
758 * @journal: Journal to check.
759 * @compat: bitmask of compatible features
760 * @ro: bitmask of features that force read-only mount
761 * @incompat: bitmask of incompatible features
762 *
763 * Check whether the journaling code supports the use of
764 * all of a given set of features on this journal. Return true
765 * (non-zero) if it can. */
766
journal_check_available_features(journal_t * journal,unsigned long compat,unsigned long ro,unsigned long incompat)767 int journal_check_available_features (journal_t *journal, unsigned long compat,
768 unsigned long ro, unsigned long incompat)
769 {
770 journal_superblock_t *sb;
771
772 if (!compat && !ro && !incompat)
773 return 1;
774
775 sb = journal->j_superblock;
776
777 /* We can support any known requested features iff the
778 * superblock is in version 2. Otherwise we fail to support any
779 * extended sb features. */
780
781 if (journal->j_format_version != 2)
782 return 0;
783
784 if ((compat & JFS_KNOWN_COMPAT_FEATURES) == compat &&
785 (ro & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
786 (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
787 return 1;
788
789 return 0;
790 }
791
792 /**
793 * int journal_set_features () - Mark a given journal feature in the superblock
794 * @journal: Journal to act on.
795 * @compat: bitmask of compatible features
796 * @ro: bitmask of features that force read-only mount
797 * @incompat: bitmask of incompatible features
798 *
799 * Mark a given journal feature as present on the
800 * superblock. Returns true if the requested features could be set.
801 *
802 */
803
journal_set_features(journal_t * journal,unsigned long compat,unsigned long ro,unsigned long incompat)804 int journal_set_features (journal_t *journal, unsigned long compat,
805 unsigned long ro, unsigned long incompat)
806 {
807 journal_superblock_t *sb;
808
809 if (journal_check_used_features(journal, compat, ro, incompat))
810 return 1;
811
812 if (!journal_check_available_features(journal, compat, ro, incompat))
813 return 0;
814
815 jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
816 compat, ro, incompat);
817
818 sb = journal->j_superblock;
819
820 sb->s_feature_compat |= cpu_to_be32(compat);
821 sb->s_feature_ro_compat |= cpu_to_be32(ro);
822 sb->s_feature_incompat |= cpu_to_be32(incompat);
823
824 return 1;
825 }
826
journal_convert_superblock_v1(journal_t * journal,journal_superblock_t * sb)827 static int journal_convert_superblock_v1(journal_t *journal,
828 journal_superblock_t *sb)
829 {
830 int offset, blocksize;
831 struct buffer_head *bh;
832
833 printk(KERN_WARNING
834 "JBD: Converting superblock from version 1 to 2.\n");
835
836 /* Pre-initialise new fields to zero */
837 offset = (INT)(((INT_PTR) &(sb->s_feature_compat)) - ((INT_PTR) sb));
838 blocksize = be32_to_cpu(sb->s_blocksize);
839 memset(&sb->s_feature_compat, 0, blocksize-offset);
840
841 sb->s_nr_users = cpu_to_be32(1);
842 sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
843 journal->j_format_version = 2;
844
845 bh = journal->j_sb_buffer;
846 BUFFER_TRACE(bh, "marking dirty");
847 mark_buffer_dirty(bh);
848 sync_dirty_buffer(bh);
849 return 0;
850 }
851
852
853 /*
854 * If the journal init or create aborts, we need to mark the journal
855 * superblock as being NULL to prevent the journal destroy from writing
856 * back a bogus superblock.
857 */
journal_fail_superblock(journal_t * journal)858 static void journal_fail_superblock (journal_t *journal)
859 {
860 struct buffer_head *bh = journal->j_sb_buffer;
861 brelse(bh);
862 journal->j_sb_buffer = NULL;
863 }
864
865
866 /*
867 * Read the superblock for a given journal, performing initial
868 * validation of the format.
869 */
870
journal_get_superblock(journal_t * journal)871 static int journal_get_superblock(journal_t *journal)
872 {
873 struct buffer_head *bh;
874 journal_superblock_t *sb;
875 int err = -EIO;
876
877 bh = journal->j_sb_buffer;
878
879 J_ASSERT(bh != NULL);
880 if (!buffer_uptodate(bh)) {
881 ll_rw_block(READ, 1, &bh);
882 wait_on_buffer(bh);
883 if (!buffer_uptodate(bh)) {
884 printk (KERN_ERR
885 "JBD: IO error reading journal superblock\n");
886 goto out;
887 }
888 }
889
890 sb = journal->j_superblock;
891
892 err = -EINVAL;
893
894 if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
895 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
896 printk(KERN_WARNING "JBD: no valid journal superblock found\n");
897 goto out;
898 }
899
900 switch (be32_to_cpu(sb->s_header.h_blocktype)) {
901 case JFS_SUPERBLOCK_V1:
902 journal->j_format_version = 1;
903 break;
904 case JFS_SUPERBLOCK_V2:
905 journal->j_format_version = 2;
906 break;
907 default:
908 printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
909 goto out;
910 }
911
912 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
913 journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
914 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
915 printk (KERN_WARNING "JBD: journal file too short\n");
916 goto out;
917 }
918
919 return 0;
920
921 out:
922 journal_fail_superblock(journal);
923 return err;
924 }
925
926 /*
927 * Load the on-disk journal superblock and read the key fields into the
928 * journal_t.
929 */
930
load_superblock(journal_t * journal)931 static int load_superblock(journal_t *journal)
932 {
933 int err;
934 journal_superblock_t *sb;
935
936 err = journal_get_superblock(journal);
937 if (err)
938 return err;
939
940 sb = journal->j_superblock;
941
942 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
943 journal->j_tail = be32_to_cpu(sb->s_start);
944 journal->j_first = be32_to_cpu(sb->s_first);
945 journal->j_last = be32_to_cpu(sb->s_maxlen);
946 journal->j_errno = be32_to_cpu(sb->s_errno);
947
948 return 0;
949 }
950
951 /**
952 * int journal_wipe() - Wipe journal contents
953 * @journal: Journal to act on.
954 * @write: flag (see below)
955 *
956 * Wipe out all of the contents of a journal, safely. This will produce
957 * a warning if the journal contains any valid recovery information.
958 * Must be called between journal_init_*() and journal_load().
959 *
960 * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
961 * we merely suppress recovery.
962 */
963
journal_wipe(journal_t * journal,int write)964 int journal_wipe(journal_t *journal, int write)
965 {
966 journal_superblock_t *sb;
967 int err = 0;
968
969 J_ASSERT (!(journal->j_flags & JFS_LOADED));
970
971 err = load_superblock(journal);
972 if (err)
973 return err;
974
975 sb = journal->j_superblock;
976
977 if (!journal->j_tail)
978 goto no_recovery;
979
980 printk (KERN_WARNING "JBD: %s recovery information on journal\n",
981 write ? "Clearing" : "Ignoring");
982
983 err = journal_skip_recovery(journal);
984 if (write)
985 journal_update_superblock(journal, 1);
986
987 no_recovery:
988 return err;
989 }
990
991
992 /**
993 * int journal_update_format () - Update on-disk journal structure.
994 * @journal: Journal to act on.
995 *
996 * Given an initialised but unloaded journal struct, poke about in the
997 * on-disk structure to update it to the most recent supported version.
998 */
journal_update_format(journal_t * journal)999 int journal_update_format (journal_t *journal)
1000 {
1001 journal_superblock_t *sb;
1002 int err;
1003
1004 err = journal_get_superblock(journal);
1005 if (err)
1006 return err;
1007
1008 sb = journal->j_superblock;
1009
1010 switch (be32_to_cpu(sb->s_header.h_blocktype)) {
1011 case JFS_SUPERBLOCK_V2:
1012 return 0;
1013 case JFS_SUPERBLOCK_V1:
1014 return journal_convert_superblock_v1(journal, sb);
1015 default:
1016 break;
1017 }
1018 return -EINVAL;
1019 }
1020
1021
1022 /**
1023 * void journal_update_superblock() - Update journal sb on disk.
1024 * @journal: The journal to update.
1025 * @wait: Set to '0' if you don't want to wait for IO completion.
1026 *
1027 * Update a journal's dynamic superblock fields and write it to disk,
1028 * optionally waiting for the IO to complete.
1029 */
journal_update_superblock(journal_t * journal,int wait)1030 void journal_update_superblock(journal_t *journal, int wait)
1031 {
1032 journal_superblock_t *sb = journal->j_superblock;
1033 struct buffer_head *bh = journal->j_sb_buffer;
1034
1035 /*
1036 * As a special case, if the on-disk copy is already marked as needing
1037 * no recovery (s_start == 0) and there are no outstanding transactions
1038 * in the filesystem, then we can safely defer the superblock update
1039 * until the next commit by setting JFS_FLUSHED. This avoids
1040 * attempting a write to a potential-readonly device.
1041 */
1042 if (sb->s_start == 0 && journal->j_tail_sequence ==
1043 journal->j_transaction_sequence) {
1044 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
1045 "(start %ld, seq %d, errno %d)\n",
1046 journal->j_tail, journal->j_tail_sequence,
1047 journal->j_errno);
1048 goto out;
1049 }
1050
1051 jbd_lock(&journal->j_state_lock);
1052 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
1053 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1054
1055 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1056 sb->s_start = cpu_to_be32(journal->j_tail);
1057 sb->s_errno = cpu_to_be32(journal->j_errno);
1058 jbd_unlock(&journal->j_state_lock);
1059
1060 BUFFER_TRACE(bh, "marking dirty");
1061 mark_buffer_dirty(bh);
1062 if (wait)
1063 sync_dirty_buffer(bh);
1064 else
1065 ll_rw_block(SWRITE, 1, &bh);
1066
1067 out:
1068 /* If we have just flushed the log (by marking s_start==0), then
1069 * any future commit will have to be careful to update the
1070 * superblock again to re-record the true start of the log. */
1071
1072 jbd_lock(&journal->j_state_lock);
1073 if (sb->s_start)
1074 journal->j_flags &= ~JFS_FLUSHED;
1075 else
1076 journal->j_flags |= JFS_FLUSHED;
1077 jbd_unlock(&journal->j_state_lock);
1078 }
1079
1080 /*
1081 * Given a journal_t structure, initialise the various fields for
1082 * startup of a new journaling session. We use this both when creating
1083 * a journal, and after recovering an old journal to reset it for
1084 * subsequent use.
1085 */
1086
journal_reset(journal_t * journal)1087 static int journal_reset(journal_t *journal)
1088 {
1089 journal_superblock_t *sb = journal->j_superblock;
1090 unsigned long first, last;
1091
1092 first = be32_to_cpu(sb->s_first);
1093 last = be32_to_cpu(sb->s_maxlen);
1094
1095 journal->j_first = first;
1096 journal->j_last = last;
1097
1098 journal->j_head = first;
1099 journal->j_tail = first;
1100 journal->j_free = last - first;
1101
1102 journal->j_tail_sequence = journal->j_transaction_sequence;
1103 journal->j_commit_sequence = journal->j_transaction_sequence - 1;
1104 journal->j_commit_request = journal->j_commit_sequence;
1105
1106 journal->j_max_transaction_buffers = journal->j_maxlen / 4;
1107
1108 /* Add the dynamic fields and write it to disk. */
1109 journal_update_superblock(journal, 1);
1110 return 0;
1111 }
1112
1113 /**
1114 * int journal_load() - Read journal from disk.
1115 * @journal: Journal to act on.
1116 *
1117 * Given a journal_t structure which tells us which disk blocks contain
1118 * a journal, read the journal from disk to initialise the in-memory
1119 * structures.
1120 */
journal_load(journal_t * journal)1121 int journal_load(journal_t *journal)
1122 {
1123 int err;
1124 journal_superblock_t *sb;
1125
1126 err = load_superblock(journal);
1127 if (err)
1128 return err;
1129
1130 sb = journal->j_superblock;
1131 /* If this is a V2 superblock, then we have to check the
1132 * features flags on it. */
1133
1134 if (journal->j_format_version >= 2) {
1135 if ((sb->s_feature_ro_compat &
1136 ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
1137 (sb->s_feature_incompat &
1138 ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
1139 printk (KERN_WARNING
1140 "JBD: Unrecognised features on journal\n");
1141 return -EINVAL;
1142 }
1143 }
1144
1145 /* Let the recovery code check whether it needs to recover any
1146 * data from the journal. */
1147 if (journal_recover(journal))
1148 goto recovery_error;
1149
1150 /* OK, we've finished with the dynamic journal bits:
1151 * reinitialise the dynamic contents of the superblock in memory
1152 * and reset them on disk. */
1153 if (journal_reset(journal))
1154 goto recovery_error;
1155
1156 journal->j_flags &= ~JFS_ABORT;
1157 journal->j_flags |= JFS_LOADED;
1158 return 0;
1159
1160 recovery_error:
1161 printk (KERN_WARNING "JBD: recovery failed\n");
1162 return -EIO;
1163 }
1164
1165
1166 //
1167 // transactions routines
1168 //
1169
1170
1171 /*
1172 *
1173 * List management code snippets: various functions for manipulating the
1174 * transaction buffer lists.
1175 *
1176 */
1177
1178 /*
1179 * Append a buffer to a transaction list, given the transaction's list head
1180 * pointer.
1181 *
1182 * j_list_lock is held.
1183 *
1184 * jbd_lock_bh_state(jh2bh(jh)) is held.
1185 */
1186
1187 static inline void
__blist_add_buffer(struct journal_head ** list,struct journal_head * jh)1188 __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1189 {
1190 if (!*list) {
1191 jh->b_tnext = jh->b_tprev = jh;
1192 *list = jh;
1193 } else {
1194 /* Insert at the tail of the list to preserve order */
1195 struct journal_head *first = *list, *last = first->b_tprev;
1196 jh->b_tprev = last;
1197 jh->b_tnext = first;
1198 last->b_tnext = first->b_tprev = jh;
1199 }
1200 }
1201
1202 /*
1203 * Remove a buffer from a transaction list, given the transaction's list
1204 * head pointer.
1205 *
1206 * Called with j_list_lock held, and the journal may not be locked.
1207 *
1208 * jbd_lock_bh_state(jh2bh(jh)) is held.
1209 */
1210
1211 static inline void
__blist_del_buffer(struct journal_head ** list,struct journal_head * jh)1212 __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1213 {
1214 if (*list == jh) {
1215 *list = jh->b_tnext;
1216 if (*list == jh)
1217 *list = NULL;
1218 }
1219 jh->b_tprev->b_tnext = jh->b_tnext;
1220 jh->b_tnext->b_tprev = jh->b_tprev;
1221 }
1222
1223 /*
1224 * Remove a buffer from the appropriate transaction list.
1225 *
1226 * Note that this function can *change* the value of
1227 * bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
1228 * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller
1229 * is holding onto a copy of one of thee pointers, it could go bad.
1230 * Generally the caller needs to re-read the pointer from the transaction_t.
1231 *
1232 * Called under j_list_lock. The journal may not be locked.
1233 */
__journal_temp_unlink_buffer(struct journal_head * jh)1234 static void __journal_temp_unlink_buffer(struct journal_head *jh)
1235 {
1236 struct journal_head **list = NULL;
1237 transaction_t *transaction;
1238 struct buffer_head *bh = jh2bh(jh);
1239
1240 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1241 transaction = jh->b_transaction;
1242 if (transaction)
1243 assert_jbd_locked(&transaction->t_journal->j_list_lock);
1244
1245 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1246 if (jh->b_jlist != BJ_None)
1247 J_ASSERT_JH(jh, transaction != NULL);
1248
1249 switch (jh->b_jlist) {
1250 case BJ_None:
1251 return;
1252 case BJ_SyncData:
1253 list = &transaction->t_sync_datalist;
1254 break;
1255 case BJ_Metadata:
1256 transaction->t_nr_buffers--;
1257 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
1258 list = &transaction->t_buffers;
1259 break;
1260 case BJ_Forget:
1261 list = &transaction->t_forget;
1262 break;
1263 case BJ_IO:
1264 list = &transaction->t_iobuf_list;
1265 break;
1266 case BJ_Shadow:
1267 list = &transaction->t_shadow_list;
1268 break;
1269 case BJ_LogCtl:
1270 list = &transaction->t_log_list;
1271 break;
1272 case BJ_Reserved:
1273 list = &transaction->t_reserved_list;
1274 break;
1275 case BJ_Locked:
1276 list = &transaction->t_locked_list;
1277 break;
1278 }
1279
1280 __blist_del_buffer(list, jh);
1281 jh->b_jlist = BJ_None;
1282 if (test_clear_buffer_jbddirty(bh))
1283 mark_buffer_dirty(bh); /* Expose it to the VM */
1284 }
1285
__journal_unfile_buffer(struct journal_head * jh)1286 void __journal_unfile_buffer(struct journal_head *jh)
1287 {
1288 __journal_temp_unlink_buffer(jh);
1289 jh->b_transaction = NULL;
1290 }
1291
journal_unfile_buffer(journal_t * journal,struct journal_head * jh)1292 void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1293 {
1294 jbd_lock_bh_state(jh2bh(jh));
1295 jbd_lock(&journal->j_list_lock);
1296 __journal_unfile_buffer(jh);
1297 jbd_unlock(&journal->j_list_lock);
1298 jbd_unlock_bh_state(jh2bh(jh));
1299 }
1300
1301 /*
1302 * This buffer is no longer needed. If it is on an older transaction's
1303 * checkpoint list we need to record it on this transaction's forget list
1304 * to pin this buffer (and hence its checkpointing transaction) down until
1305 * this transaction commits. If the buffer isn't on a checkpoint list, we
1306 * release it.
1307 * Returns non-zero if JBD no longer has an interest in the buffer.
1308 *
1309 * Called under j_list_lock.
1310 *
1311 * Called under jbd_lock_bh_state(bh).
1312 */
__dispose_buffer(struct journal_head * jh,transaction_t * transaction)1313 static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1314 {
1315 int may_free = 1;
1316 struct buffer_head *bh = jh2bh(jh);
1317
1318 __journal_unfile_buffer(jh);
1319
1320 if (jh->b_cp_transaction) {
1321 JBUFFER_TRACE(jh, "on running+cp transaction");
1322 __journal_file_buffer(jh, transaction, BJ_Forget);
1323 clear_buffer_jbddirty(bh);
1324 may_free = 0;
1325 } else {
1326 JBUFFER_TRACE(jh, "on running transaction");
1327 journal_remove_journal_head(bh);
1328 __brelse(bh);
1329 }
1330 return may_free;
1331 }
1332
1333
1334 /*
1335 * File a buffer on the given transaction list.
1336 */
__journal_file_buffer(struct journal_head * jh,transaction_t * transaction,int jlist)1337 void __journal_file_buffer(struct journal_head *jh,
1338 transaction_t *transaction, int jlist)
1339 {
1340 struct journal_head **list = NULL;
1341 int was_dirty = 0;
1342 struct buffer_head *bh = jh2bh(jh);
1343
1344 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1345 assert_jbd_locked(&transaction->t_journal->j_list_lock);
1346
1347 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1348 J_ASSERT_JH(jh, jh->b_transaction == transaction ||
1349 jh->b_transaction == NULL);
1350
1351 if (jh->b_transaction && jh->b_jlist == (unsigned) jlist)
1352 return;
1353
1354 /* The following list of buffer states needs to be consistent
1355 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1356 * state. */
1357
1358 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1359 jlist == BJ_Shadow || jlist == BJ_Forget) {
1360 if (test_clear_buffer_dirty(bh) ||
1361 test_clear_buffer_jbddirty(bh))
1362 was_dirty = 1;
1363 }
1364
1365 if (jh->b_transaction)
1366 __journal_temp_unlink_buffer(jh);
1367 jh->b_transaction = transaction;
1368
1369 switch (jlist) {
1370 case BJ_None:
1371 J_ASSERT_JH(jh, !jh->b_committed_data);
1372 J_ASSERT_JH(jh, !jh->b_frozen_data);
1373 return;
1374 case BJ_SyncData:
1375 list = &transaction->t_sync_datalist;
1376 break;
1377 case BJ_Metadata:
1378 transaction->t_nr_buffers++;
1379 list = &transaction->t_buffers;
1380 break;
1381 case BJ_Forget:
1382 list = &transaction->t_forget;
1383 break;
1384 case BJ_IO:
1385 list = &transaction->t_iobuf_list;
1386 break;
1387 case BJ_Shadow:
1388 list = &transaction->t_shadow_list;
1389 break;
1390 case BJ_LogCtl:
1391 list = &transaction->t_log_list;
1392 break;
1393 case BJ_Reserved:
1394 list = &transaction->t_reserved_list;
1395 break;
1396 case BJ_Locked:
1397 list = &transaction->t_locked_list;
1398 break;
1399 }
1400
1401 __blist_add_buffer(list, jh);
1402 jh->b_jlist = jlist;
1403
1404 if (was_dirty)
1405 set_buffer_jbddirty(bh);
1406 }
1407
journal_file_buffer(struct journal_head * jh,transaction_t * transaction,int jlist)1408 void journal_file_buffer(struct journal_head *jh,
1409 transaction_t *transaction, int jlist)
1410 {
1411 jbd_lock_bh_state(jh2bh(jh));
1412 jbd_lock(&transaction->t_journal->j_list_lock);
1413 __journal_file_buffer(jh, transaction, jlist);
1414 jbd_unlock(&transaction->t_journal->j_list_lock);
1415 jbd_unlock_bh_state(jh2bh(jh));
1416 }
1417
1418
1419 /*
1420 * journal_release_buffer: undo a get_write_access without any buffer
1421 * updates, if the update decided in the end that it didn't need access.
1422 *
1423 */
1424 void
journal_release_buffer(handle_t * handle,struct buffer_head * bh)1425 journal_release_buffer(handle_t *handle, struct buffer_head *bh)
1426 {
1427 BUFFER_TRACE(bh, "entry");
1428 }
1429
1430 /**
1431 * void journal_forget() - bforget() for potentially-journaled buffers.
1432 * @handle: transaction handle
1433 * @bh: bh to 'forget'
1434 *
1435 * We can only do the bforget if there are no commits pending against the
1436 * buffer. If the buffer is dirty in the current running transaction we
1437 * can safely unlink it.
1438 *
1439 * bh may not be a journalled buffer at all - it may be a non-JBD
1440 * buffer which came off the hashtable. Check for this.
1441 *
1442 * Decrements bh->b_count by one.
1443 *
1444 * Allow this call even if the handle has aborted --- it may be part of
1445 * the caller's cleanup after an abort.
1446 */
journal_forget(handle_t * handle,struct buffer_head * bh)1447 int journal_forget (handle_t *handle, struct buffer_head *bh)
1448 {
1449 transaction_t *transaction = handle->h_transaction;
1450 journal_t *journal = transaction->t_journal;
1451 struct journal_head *jh;
1452 int drop_reserve = 0;
1453 int err = 0;
1454
1455 BUFFER_TRACE(bh, "entry");
1456
1457 jbd_lock_bh_state(bh);
1458 jbd_lock(&journal->j_list_lock);
1459
1460 if (!buffer_jbd(bh))
1461 goto not_jbd;
1462 jh = bh2jh(bh);
1463
1464 /* Critical error: attempting to delete a bitmap buffer, maybe?
1465 * Don't do any jbd operations, and return an error. */
1466 if (!J_EXPECT_JH(jh, !jh->b_committed_data,
1467 "inconsistent data on disk")) {
1468 err = -EIO;
1469 goto not_jbd;
1470 }
1471
1472 /*
1473 * The buffer's going from the transaction, we must drop
1474 * all references -bzzz
1475 */
1476 jh->b_modified = 0;
1477
1478 if (jh->b_transaction == handle->h_transaction) {
1479 J_ASSERT_JH(jh, !jh->b_frozen_data);
1480
1481 /* If we are forgetting a buffer which is already part
1482 * of this transaction, then we can just drop it from
1483 * the transaction immediately. */
1484 clear_buffer_dirty(bh);
1485 clear_buffer_jbddirty(bh);
1486
1487 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1488
1489 drop_reserve = 1;
1490
1491 /*
1492 * We are no longer going to journal this buffer.
1493 * However, the commit of this transaction is still
1494 * important to the buffer: the delete that we are now
1495 * processing might obsolete an old log entry, so by
1496 * committing, we can satisfy the buffer's checkpoint.
1497 *
1498 * So, if we have a checkpoint on the buffer, we should
1499 * now refile the buffer on our BJ_Forget list so that
1500 * we know to remove the checkpoint after we commit.
1501 */
1502
1503 if (jh->b_cp_transaction) {
1504 __journal_temp_unlink_buffer(jh);
1505 __journal_file_buffer(jh, transaction, BJ_Forget);
1506 } else {
1507 __journal_unfile_buffer(jh);
1508 journal_remove_journal_head(bh);
1509 __brelse(bh);
1510 if (!buffer_jbd(bh)) {
1511 jbd_unlock(&journal->j_list_lock);
1512 jbd_unlock_bh_state(bh);
1513 __bforget(bh);
1514 goto drop;
1515 }
1516 }
1517 } else if (jh->b_transaction) {
1518 J_ASSERT_JH(jh, (jh->b_transaction ==
1519 journal->j_committing_transaction));
1520 /* However, if the buffer is still owned by a prior
1521 * (committing) transaction, we can't drop it yet... */
1522 JBUFFER_TRACE(jh, "belongs to older transaction");
1523 /* ... but we CAN drop it from the new transaction if we
1524 * have also modified it since the original commit. */
1525
1526 if (jh->b_next_transaction) {
1527 J_ASSERT(jh->b_next_transaction == transaction);
1528 jh->b_next_transaction = NULL;
1529 drop_reserve = 1;
1530 }
1531 }
1532
1533 not_jbd:
1534 jbd_unlock(&journal->j_list_lock);
1535 jbd_unlock_bh_state(bh);
1536 __brelse(bh);
1537 drop:
1538 if (drop_reserve) {
1539 /* no need to reserve log space for this block -bzzz */
1540 handle->h_buffer_credits++;
1541 }
1542 return err;
1543 }
1544
1545 /*
1546 * debugfs tunables
1547 */
1548 #ifdef CONFIG_JBD_DEBUG
1549
1550 u8 journal_enable_debug __read_mostly;
1551 EXPORT_SYMBOL(journal_enable_debug);
1552
1553 static struct dentry *jbd_debugfs_dir;
1554 static struct dentry *jbd_debug;
1555
jbd_create_debugfs_entry(void)1556 static void __init jbd_create_debugfs_entry(void)
1557 {
1558 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
1559 if (jbd_debugfs_dir)
1560 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
1561 jbd_debugfs_dir,
1562 &journal_enable_debug);
1563 }
1564
jbd_remove_debugfs_entry(void)1565 static void __exit jbd_remove_debugfs_entry(void)
1566 {
1567 debugfs_remove(jbd_debug);
1568 debugfs_remove(jbd_debugfs_dir);
1569 }
1570
1571 #else
1572
jbd_create_debugfs_entry(void)1573 static inline void jbd_create_debugfs_entry(void)
1574 {
1575 }
1576
jbd_remove_debugfs_entry(void)1577 static inline void jbd_remove_debugfs_entry(void)
1578 {
1579 }
1580
1581 #endif
1582
1583 struct kmem_cache *jbd_handle_cache = NULL;
1584
journal_init_handle_cache(void)1585 static int __init journal_init_handle_cache(void)
1586 {
1587 jbd_handle_cache = kmem_cache_create("journal_handle",
1588 sizeof(handle_t),
1589 0, /* offset */
1590 SLAB_TEMPORARY, /* flags */
1591 NULL); /* ctor */
1592 if (jbd_handle_cache == NULL) {
1593 printk(KERN_EMERG "JBD: failed to create handle cache\n");
1594 return -ENOMEM;
1595 }
1596 return 0;
1597 }
1598
journal_destroy_handle_cache(void)1599 static void journal_destroy_handle_cache(void)
1600 {
1601 if (jbd_handle_cache)
1602 kmem_cache_destroy(jbd_handle_cache);
1603 }
1604
1605 /*
1606 * Module startup and shutdown
1607 */
1608
journal_init_caches(void)1609 static int __init journal_init_caches(void)
1610 {
1611 int ret;
1612
1613 ret = journal_init_revoke_caches();
1614 if (ret == 0)
1615 ret = journal_init_journal_head_cache();
1616 if (ret == 0)
1617 ret = journal_init_handle_cache();
1618 return ret;
1619 }
1620
journal_destroy_caches(void)1621 static void journal_destroy_caches(void)
1622 {
1623 journal_destroy_revoke_caches();
1624 journal_destroy_journal_head_cache();
1625 journal_destroy_handle_cache();
1626 }
1627
journal_init(void)1628 static int __init journal_init(void)
1629 {
1630 int ret;
1631
1632 J_ASSERT(sizeof(struct journal_superblock_s) == 1024);
1633
1634 ret = journal_init_caches();
1635 if (ret != 0)
1636 journal_destroy_caches();
1637 jbd_create_debugfs_entry();
1638 return ret;
1639 }
1640
journal_exit(void)1641 static void __exit journal_exit(void)
1642 {
1643 #ifdef CONFIG_JBD_DEBUG
1644 int n = atomic_read(&nr_journal_heads);
1645 if (n)
1646 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
1647 #endif
1648 jbd_remove_debugfs_entry();
1649 journal_destroy_caches();
1650 }
1651
1652 MODULE_LICENSE("GPL");
1653 module_init(journal_init);
1654 module_exit(journal_exit);
1655