1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License version 2 as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public Licens 12 * along with this program; if not, write to the Free Software 13 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 14 */ 15 16 #include "ext2fs.h" 17 #include <linux/ext4.h> 18 19 #ifdef _MSC_VER 20 #pragma warning(push) 21 #pragma warning(disable: 4018) 22 #pragma warning(disable: 4242) 23 #pragma warning(disable: 4244) 24 #endif 25 26 27 /* 28 * used by extent splitting. 29 */ 30 #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 31 due to ENOSPC */ 32 #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ 33 #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ 34 35 #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ 36 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ 37 38 #define CONFIG_EXTENT_TEST 39 #ifdef CONFIG_EXTENT_TEST 40 41 #define ext4_mark_inode_dirty(icb, handle, n) ext3_mark_inode_dirty(icb, n) 42 static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) 43 { 44 PEXT2_VCB Vcb; 45 Vcb = inode->i_sb->s_priv; 46 return (inode->i_ino - 1) / BLOCKS_PER_GROUP; 47 } 48 49 static ext4_fsblk_t ext4_new_meta_blocks(void *icb, handle_t *handle, struct inode *inode, 50 ext4_fsblk_t goal, 51 unsigned int flags, 52 unsigned long *count, int *errp) 53 { 54 NTSTATUS status; 55 ULONG blockcnt = (count)?*count:1; 56 ULONG block = 0; 57 58 status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb, 59 inode->i_sb->s_priv, 60 0, goal, 61 &block, 62 &blockcnt); 63 if (count) 64 *count = blockcnt; 65 66 if (!NT_SUCCESS(status)) { 67 *errp = Ext2LinuxError(status); 68 return 0; 69 } 70 inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9)); 71 return block; 72 } 73 74 static void ext4_free_blocks(void *icb, handle_t *handle, struct inode *inode, void *fake, 75 ext4_fsblk_t block, int count, int flags) 76 { 77 Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, block, count); 78 inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9); 79 return; 80 } 81 82 static inline void ext_debug(char *str, ...) 83 { 84 } 85 #if TRUE 86 #define EXT4_ERROR_INODE(inode, str, ...) do { \ 87 DbgPrint("inode[%p]: " str "\n", inode, ##__VA_ARGS__); \ 88 } while(0) 89 #else 90 #define EXT4_ERROR_INODE 91 #endif 92 93 #define ext4_std_error(s, err) 94 #define assert ASSERT 95 96 #endif 97 98 /* 99 * Return the right sibling of a tree node(either leaf or indexes node) 100 */ 101 102 #define EXT_MAX_BLOCKS 0xffffffff 103 104 105 static inline int ext4_ext_space_block(struct inode *inode, int check) 106 { 107 int size; 108 109 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 110 / sizeof(struct ext4_extent); 111 #ifdef AGGRESSIVE_TEST 112 if (!check && size > 6) 113 size = 6; 114 #endif 115 return size; 116 } 117 118 static inline int ext4_ext_space_block_idx(struct inode *inode, int check) 119 { 120 int size; 121 122 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 123 / sizeof(struct ext4_extent_idx); 124 #ifdef AGGRESSIVE_TEST 125 if (!check && size > 5) 126 size = 5; 127 #endif 128 return size; 129 } 130 131 static inline int ext4_ext_space_root(struct inode *inode, int check) 132 { 133 int size; 134 135 size = sizeof(EXT4_I(inode)->i_block); 136 size -= sizeof(struct ext4_extent_header); 137 size /= sizeof(struct ext4_extent); 138 #ifdef AGGRESSIVE_TEST 139 if (!check && size > 3) 140 size = 3; 141 #endif 142 return size; 143 } 144 145 static inline int ext4_ext_space_root_idx(struct inode *inode, int check) 146 { 147 int size; 148 149 size = sizeof(EXT4_I(inode)->i_block); 150 size -= sizeof(struct ext4_extent_header); 151 size /= sizeof(struct ext4_extent_idx); 152 #ifdef AGGRESSIVE_TEST 153 if (!check && size > 4) 154 size = 4; 155 #endif 156 return size; 157 } 158 159 static int 160 ext4_ext_max_entries(struct inode *inode, int depth) 161 { 162 int max; 163 164 if (depth == ext_depth(inode)) { 165 if (depth == 0) 166 max = ext4_ext_space_root(inode, 1); 167 else 168 max = ext4_ext_space_root_idx(inode, 1); 169 } else { 170 if (depth == 0) 171 max = ext4_ext_space_block(inode, 1); 172 else 173 max = ext4_ext_space_block_idx(inode, 1); 174 } 175 176 return max; 177 } 178 179 static int __ext4_ext_check(const char *function, unsigned int line, 180 struct inode *inode, 181 struct ext4_extent_header *eh, int depth, 182 ext4_fsblk_t pblk); 183 184 /* 185 * read_extent_tree_block: 186 * Get a buffer_head by extents_bread, and read fresh data from the storage. 187 */ 188 static struct buffer_head * 189 __read_extent_tree_block(const char *function, unsigned int line, 190 struct inode *inode, ext4_fsblk_t pblk, int depth, 191 int flags) 192 { 193 struct buffer_head *bh; 194 int err; 195 196 bh = extents_bread(inode->i_sb, pblk); 197 if (!bh) 198 return ERR_PTR(-ENOMEM); 199 200 if (!buffer_uptodate(bh)) { 201 err = -EIO; 202 goto errout; 203 } 204 if (buffer_verified(bh)) 205 return bh; 206 err = __ext4_ext_check(function, line, inode, 207 ext_block_hdr(bh), depth, pblk); 208 if (err) 209 goto errout; 210 set_buffer_verified(bh); 211 return bh; 212 errout: 213 extents_brelse(bh); 214 return ERR_PTR(err); 215 216 } 217 218 #define read_extent_tree_block(inode, pblk, depth, flags) \ 219 __read_extent_tree_block("", __LINE__, (inode), (pblk), \ 220 (depth), (flags)) 221 222 #define ext4_ext_check(inode, eh, depth, pblk) \ 223 __ext4_ext_check("", __LINE__, (inode), (eh), (depth), (pblk)) 224 225 int ext4_ext_check_inode(struct inode *inode) 226 { 227 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); 228 } 229 230 static uint32_t ext4_ext_block_csum(struct inode *inode, 231 struct ext4_extent_header *eh) 232 { 233 /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/ 234 return 0; 235 } 236 237 static void ext4_extent_block_csum_set(struct inode *inode, 238 struct ext4_extent_header *eh) 239 { 240 struct ext4_extent_tail *tail; 241 242 tail = find_ext4_extent_tail(eh); 243 tail->et_checksum = ext4_ext_block_csum( 244 inode, eh); 245 } 246 247 static int ext4_split_extent_at(void *icb, 248 handle_t *handle, 249 struct inode *inode, 250 struct ext4_ext_path **ppath, 251 ext4_lblk_t split, 252 int split_flag, 253 int flags); 254 255 static inline int 256 ext4_force_split_extent_at(void *icb, handle_t *handle, struct inode *inode, 257 struct ext4_ext_path **ppath, ext4_lblk_t lblk, 258 int nofail) 259 { 260 struct ext4_ext_path *path = *ppath; 261 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); 262 263 return ext4_split_extent_at(icb, handle, inode, ppath, lblk, unwritten ? 264 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, 265 EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | 266 (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); 267 } 268 269 /* 270 * could return: 271 * - EROFS 272 * - ENOMEM 273 */ 274 275 static int ext4_ext_get_access(void *icb, handle_t *handle, struct inode *inode, 276 struct ext4_ext_path *path) 277 { 278 if (path->p_bh) { 279 /* path points to block */ 280 281 return ext4_journal_get_write_access(icb, handle, path->p_bh); 282 283 } 284 /* path points to leaf/index in inode body */ 285 /* we use in-core data, no need to protect them */ 286 return 0; 287 } 288 289 290 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, 291 struct ext4_ext_path *path, 292 ext4_lblk_t block) 293 { 294 if (path) { 295 int depth = path->p_depth; 296 struct ext4_extent *ex; 297 298 /* 299 * Try to predict block placement assuming that we are 300 * filling in a file which will eventually be 301 * non-sparse --- i.e., in the case of libbfd writing 302 * an ELF object sections out-of-order but in a way 303 * the eventually results in a contiguous object or 304 * executable file, or some database extending a table 305 * space file. However, this is actually somewhat 306 * non-ideal if we are writing a sparse file such as 307 * qemu or KVM writing a raw image file that is going 308 * to stay fairly sparse, since it will end up 309 * fragmenting the file system's free space. Maybe we 310 * should have some hueristics or some way to allow 311 * userspace to pass a hint to file system, 312 * especially if the latter case turns out to be 313 * common. 314 */ 315 ex = path[depth].p_ext; 316 if (ex) { 317 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); 318 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); 319 320 if (block > ext_block) 321 return ext_pblk + (block - ext_block); 322 else 323 return ext_pblk - (ext_block - block); 324 } 325 326 /* it looks like index is empty; 327 * try to find starting block from index itself */ 328 if (path[depth].p_bh) 329 return path[depth].p_bh->b_blocknr; 330 } 331 332 /* OK. use inode's group */ 333 return ext4_inode_to_goal_block(inode); 334 } 335 336 /* 337 * Allocation for a meta data block 338 */ 339 static ext4_fsblk_t 340 ext4_ext_new_meta_block(void *icb, handle_t *handle, struct inode *inode, 341 struct ext4_ext_path *path, 342 struct ext4_extent *ex, int *err, unsigned int flags) 343 { 344 ext4_fsblk_t goal, newblock; 345 346 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 347 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, 348 NULL, err); 349 return newblock; 350 } 351 352 int __ext4_ext_dirty(const char *where, unsigned int line, 353 void *icb, handle_t *handle, 354 struct inode *inode, 355 struct ext4_ext_path *path) 356 { 357 int err; 358 359 if (path->p_bh) { 360 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); 361 /* path points to block */ 362 err = __ext4_handle_dirty_metadata(where, line, icb, handle, inode, path->p_bh); 363 } else { 364 /* path points to leaf/index in inode body */ 365 err = ext4_mark_inode_dirty(icb, handle, inode); 366 } 367 return err; 368 } 369 370 void ext4_ext_drop_refs(struct ext4_ext_path *path) 371 { 372 int depth, i; 373 374 if (!path) 375 return; 376 depth = path->p_depth; 377 for (i = 0; i <= depth; i++, path++) 378 if (path->p_bh) { 379 extents_brelse(path->p_bh); 380 path->p_bh = NULL; 381 } 382 } 383 384 /* 385 * Check that whether the basic information inside the extent header 386 * is correct or not. 387 */ 388 static int __ext4_ext_check(const char *function, unsigned int line, 389 struct inode *inode, 390 struct ext4_extent_header *eh, int depth, 391 ext4_fsblk_t pblk) 392 { 393 struct ext4_extent_tail *tail; 394 const char *error_msg; 395 #ifndef __REACTOS__ 396 int max = 0; 397 #endif 398 if (eh->eh_magic != EXT4_EXT_MAGIC) { 399 error_msg = "invalid magic"; 400 goto corrupted; 401 } 402 if (le16_to_cpu(eh->eh_depth) != depth) { 403 error_msg = "unexpected eh_depth"; 404 goto corrupted; 405 } 406 if (eh->eh_max == 0) { 407 error_msg = "invalid eh_max"; 408 goto corrupted; 409 } 410 if (eh->eh_entries > eh->eh_max) { 411 error_msg = "invalid eh_entries"; 412 goto corrupted; 413 } 414 415 tail = find_ext4_extent_tail(eh); 416 if (tail->et_checksum != ext4_ext_block_csum(inode, eh)) { 417 ext_debug("Warning: extent checksum damaged? tail->et_checksum = " 418 "%lu, ext4_ext_block_csum = %lu\n", 419 tail->et_checksum, ext4_ext_block_csum(inode, eh)); 420 } 421 422 return 0; 423 424 corrupted: 425 ext_debug("corrupted! %s\n", error_msg); 426 return -EIO; 427 } 428 429 /* 430 * ext4_ext_binsearch_idx: 431 * binary search for the closest index of the given block 432 * the header must be checked before calling this 433 */ 434 static void 435 ext4_ext_binsearch_idx(struct inode *inode, 436 struct ext4_ext_path *path, ext4_lblk_t block) 437 { 438 struct ext4_extent_header *eh = path->p_hdr; 439 struct ext4_extent_idx *r, *l, *m; 440 441 ext_debug("binsearch for %u(idx): ", block); 442 443 l = EXT_FIRST_INDEX(eh) + 1; 444 r = EXT_LAST_INDEX(eh); 445 while (l <= r) { 446 m = l + (r - l) / 2; 447 if (block < (m->ei_block)) 448 r = m - 1; 449 else 450 l = m + 1; 451 ext_debug("%p(%u):%p(%u):%p(%u) ", l, (l->ei_block), 452 m, (m->ei_block), 453 r, (r->ei_block)); 454 } 455 456 path->p_idx = l - 1; 457 ext_debug(" -> %u->%lld ", (path->p_idx->ei_block), 458 ext4_idx_pblock(path->p_idx)); 459 460 #ifdef CHECK_BINSEARCH 461 { 462 struct ext4_extent_idx *chix, *ix; 463 int k; 464 465 chix = ix = EXT_FIRST_INDEX(eh); 466 for (k = 0; k < (eh->eh_entries); k++, ix++) { 467 if (k != 0 && 468 (ix->ei_block) <= (ix[-1].ei_block)) { 469 printk(KERN_DEBUG "k=%d, ix=0x%p, " 470 "first=0x%p\n", k, 471 ix, EXT_FIRST_INDEX(eh)); 472 printk(KERN_DEBUG "%u <= %u\n", 473 (ix->ei_block), 474 (ix[-1].ei_block)); 475 } 476 BUG_ON(k && (ix->ei_block) 477 <= (ix[-1].ei_block)); 478 if (block < (ix->ei_block)) 479 break; 480 chix = ix; 481 } 482 BUG_ON(chix != path->p_idx); 483 } 484 #endif 485 486 } 487 488 /* 489 * ext4_ext_binsearch: 490 * binary search for closest extent of the given block 491 * the header must be checked before calling this 492 */ 493 static void 494 ext4_ext_binsearch(struct inode *inode, 495 struct ext4_ext_path *path, ext4_lblk_t block) 496 { 497 struct ext4_extent_header *eh = path->p_hdr; 498 struct ext4_extent *r, *l, *m; 499 500 if (eh->eh_entries == 0) { 501 /* 502 * this leaf is empty: 503 * we get such a leaf in split/add case 504 */ 505 return; 506 } 507 508 ext_debug("binsearch for %u: ", block); 509 510 l = EXT_FIRST_EXTENT(eh) + 1; 511 r = EXT_LAST_EXTENT(eh); 512 513 while (l <= r) { 514 m = l + (r - l) / 2; 515 if (block < m->ee_block) 516 r = m - 1; 517 else 518 l = m + 1; 519 ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, 520 m, (m->ee_block), 521 r, (r->ee_block)); 522 } 523 524 path->p_ext = l - 1; 525 ext_debug(" -> %d:%llu:[%d]%d ", 526 (path->p_ext->ee_block), 527 ext4_ext_pblock(path->p_ext), 528 ext4_ext_is_unwritten(path->p_ext), 529 ext4_ext_get_actual_len(path->p_ext)); 530 531 #ifdef CHECK_BINSEARCH 532 { 533 struct ext4_extent *chex, *ex; 534 int k; 535 536 chex = ex = EXT_FIRST_EXTENT(eh); 537 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { 538 BUG_ON(k && (ex->ee_block) 539 <= (ex[-1].ee_block)); 540 if (block < (ex->ee_block)) 541 break; 542 chex = ex; 543 } 544 BUG_ON(chex != path->p_ext); 545 } 546 #endif 547 548 } 549 550 #ifdef EXT_DEBUG 551 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 552 { 553 int k, l = path->p_depth; 554 555 ext_debug("path:"); 556 for (k = 0; k <= l; k++, path++) { 557 if (path->p_idx) { 558 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 559 ext4_idx_pblock(path->p_idx)); 560 } else if (path->p_ext) { 561 ext_debug(" %d:[%d]%d:%llu ", 562 le32_to_cpu(path->p_ext->ee_block), 563 ext4_ext_is_unwritten(path->p_ext), 564 ext4_ext_get_actual_len(path->p_ext), 565 ext4_ext_pblock(path->p_ext)); 566 } else 567 ext_debug(" []"); 568 } 569 ext_debug("\n"); 570 } 571 572 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) 573 { 574 int depth = ext_depth(inode); 575 struct ext4_extent_header *eh; 576 struct ext4_extent *ex; 577 int i; 578 579 if (!path) 580 return; 581 582 eh = path[depth].p_hdr; 583 ex = EXT_FIRST_EXTENT(eh); 584 585 ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); 586 587 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 588 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 589 ext4_ext_is_unwritten(ex), 590 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); 591 } 592 ext_debug("\n"); 593 } 594 595 static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, 596 ext4_fsblk_t newblock, int level) 597 { 598 int depth = ext_depth(inode); 599 struct ext4_extent *ex; 600 601 if (depth != level) { 602 struct ext4_extent_idx *idx; 603 idx = path[level].p_idx; 604 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { 605 ext_debug("%d: move %d:%llu in new index %llu\n", level, 606 le32_to_cpu(idx->ei_block), 607 ext4_idx_pblock(idx), 608 newblock); 609 idx++; 610 } 611 612 return; 613 } 614 615 ex = path[depth].p_ext; 616 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { 617 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 618 le32_to_cpu(ex->ee_block), 619 ext4_ext_pblock(ex), 620 ext4_ext_is_unwritten(ex), 621 ext4_ext_get_actual_len(ex), 622 newblock); 623 ex++; 624 } 625 } 626 627 #else 628 #define ext4_ext_show_path(inode, path) 629 #define ext4_ext_show_leaf(inode, path) 630 #define ext4_ext_show_move(inode, path, newblock, level) 631 #endif 632 633 struct ext4_ext_path * 634 ext4_find_extent(struct inode *inode, ext4_lblk_t block, 635 struct ext4_ext_path **orig_path, int flags) 636 { 637 struct ext4_extent_header *eh; 638 struct buffer_head *bh; 639 struct ext4_ext_path *path = orig_path ? *orig_path : NULL; 640 short int depth, i, ppos = 0; 641 int ret; 642 643 eh = ext_inode_hdr(inode); 644 depth = ext_depth(inode); 645 646 if (path) { 647 ext4_ext_drop_refs(path); 648 if (depth > path[0].p_maxdepth) { 649 kfree(path); 650 *orig_path = path = NULL; 651 } 652 } 653 if (!path) { 654 /* account possible depth increase */ 655 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 656 GFP_NOFS); 657 if (unlikely(!path)) 658 return ERR_PTR(-ENOMEM); 659 path[0].p_maxdepth = depth + 1; 660 } 661 path[0].p_hdr = eh; 662 path[0].p_bh = NULL; 663 664 i = depth; 665 /* walk through the tree */ 666 while (i) { 667 ext_debug("depth %d: num %d, max %d\n", 668 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 669 670 ext4_ext_binsearch_idx(inode, path + ppos, block); 671 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); 672 path[ppos].p_depth = i; 673 path[ppos].p_ext = NULL; 674 675 bh = read_extent_tree_block(inode, path[ppos].p_block, --i, 676 flags); 677 if (unlikely(IS_ERR(bh))) { 678 ret = PTR_ERR(bh); 679 goto err; 680 } 681 682 eh = ext_block_hdr(bh); 683 ppos++; 684 if (unlikely(ppos > depth)) { 685 extents_brelse(bh); 686 EXT4_ERROR_INODE(inode, 687 "ppos %d > depth %d", ppos, depth); 688 ret = -EIO; 689 goto err; 690 } 691 path[ppos].p_bh = bh; 692 path[ppos].p_hdr = eh; 693 } 694 695 path[ppos].p_depth = i; 696 path[ppos].p_ext = NULL; 697 path[ppos].p_idx = NULL; 698 699 /* find extent */ 700 ext4_ext_binsearch(inode, path + ppos, block); 701 /* if not an empty leaf */ 702 if (path[ppos].p_ext) 703 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); 704 705 ext4_ext_show_path(inode, path); 706 707 return path; 708 709 err: 710 ext4_ext_drop_refs(path); 711 if (path) { 712 kfree(path); 713 if (orig_path) 714 *orig_path = NULL; 715 } 716 return ERR_PTR(ret); 717 } 718 719 /* 720 * ext4_ext_insert_index: 721 * insert new index [@logical;@ptr] into the block at @curp; 722 * check where to insert: before @curp or after @curp 723 */ 724 static int ext4_ext_insert_index(void *icb, handle_t *handle, struct inode *inode, 725 struct ext4_ext_path *curp, 726 int logical, ext4_fsblk_t ptr) 727 { 728 struct ext4_extent_idx *ix; 729 int len, err; 730 731 err = ext4_ext_get_access(icb, handle, inode, curp); 732 if (err) 733 return err; 734 735 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { 736 EXT4_ERROR_INODE(inode, 737 "logical %d == ei_block %d!", 738 logical, le32_to_cpu(curp->p_idx->ei_block)); 739 return -EIO; 740 } 741 742 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) 743 >= le16_to_cpu(curp->p_hdr->eh_max))) { 744 EXT4_ERROR_INODE(inode, 745 "eh_entries %d >= eh_max %d!", 746 le16_to_cpu(curp->p_hdr->eh_entries), 747 le16_to_cpu(curp->p_hdr->eh_max)); 748 return -EIO; 749 } 750 751 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 752 /* insert after */ 753 ext_debug("insert new index %d after: %llu\n", logical, ptr); 754 ix = curp->p_idx + 1; 755 } else { 756 /* insert before */ 757 ext_debug("insert new index %d before: %llu\n", logical, ptr); 758 ix = curp->p_idx; 759 } 760 761 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; 762 BUG_ON(len < 0); 763 if (len > 0) { 764 ext_debug("insert new index %d: " 765 "move %d indices from 0x%p to 0x%p\n", 766 logical, len, ix, ix + 1); 767 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); 768 } 769 770 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { 771 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); 772 return -EIO; 773 } 774 775 ix->ei_block = cpu_to_le32(logical); 776 ext4_idx_store_pblock(ix, ptr); 777 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 778 779 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { 780 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); 781 return -EIO; 782 } 783 784 err = ext4_ext_dirty(icb, handle, inode, curp); 785 ext4_std_error(inode->i_sb, err); 786 787 return err; 788 } 789 790 /* 791 * ext4_ext_split: 792 * inserts new subtree into the path, using free index entry 793 * at depth @at: 794 * - allocates all needed blocks (new leaf and all intermediate index blocks) 795 * - makes decision where to split 796 * - moves remaining extents and index entries (right to the split point) 797 * into the newly allocated blocks 798 * - initializes subtree 799 */ 800 static int ext4_ext_split(void *icb, handle_t *handle, struct inode *inode, 801 unsigned int flags, 802 struct ext4_ext_path *path, 803 struct ext4_extent *newext, int at) 804 { 805 struct buffer_head *bh = NULL; 806 int depth = ext_depth(inode); 807 struct ext4_extent_header *neh; 808 struct ext4_extent_idx *fidx; 809 int i = at, k, m, a; 810 ext4_fsblk_t newblock, oldblock; 811 __le32 border; 812 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ 813 int err = 0; 814 815 /* make decision: where to split? */ 816 /* FIXME: now decision is simplest: at current extent */ 817 818 /* if current leaf will be split, then we should use 819 * border from split point */ 820 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { 821 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); 822 return -EIO; 823 } 824 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 825 border = path[depth].p_ext[1].ee_block; 826 ext_debug("leaf will be split." 827 " next leaf starts at %d\n", 828 le32_to_cpu(border)); 829 } else { 830 border = newext->ee_block; 831 ext_debug("leaf will be added." 832 " next leaf starts at %d\n", 833 le32_to_cpu(border)); 834 } 835 836 /* 837 * If error occurs, then we break processing 838 * and mark filesystem read-only. index won't 839 * be inserted and tree will be in consistent 840 * state. Next mount will repair buffers too. 841 */ 842 843 /* 844 * Get array to track all allocated blocks. 845 * We need this to handle errors and free blocks 846 * upon them. 847 */ 848 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); 849 if (!ablocks) 850 return -ENOMEM; 851 852 /* allocate all needed blocks */ 853 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 854 for (a = 0; a < depth - at; a++) { 855 newblock = ext4_ext_new_meta_block(icb, handle, inode, path, 856 newext, &err, flags); 857 if (newblock == 0) 858 goto cleanup; 859 ablocks[a] = newblock; 860 } 861 862 /* initialize new leaf */ 863 newblock = ablocks[--a]; 864 if (unlikely(newblock == 0)) { 865 EXT4_ERROR_INODE(inode, "newblock == 0!"); 866 err = -EIO; 867 goto cleanup; 868 } 869 bh = extents_bwrite(inode->i_sb, newblock); 870 if (unlikely(!bh)) { 871 err = -ENOMEM; 872 goto cleanup; 873 } 874 875 err = ext4_journal_get_create_access(icb, handle, bh); 876 if (err) 877 goto cleanup; 878 879 neh = ext_block_hdr(bh); 880 neh->eh_entries = 0; 881 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 882 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 883 neh->eh_depth = 0; 884 885 /* move remainder of path[depth] to the new leaf */ 886 if (unlikely(path[depth].p_hdr->eh_entries != 887 path[depth].p_hdr->eh_max)) { 888 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", 889 path[depth].p_hdr->eh_entries, 890 path[depth].p_hdr->eh_max); 891 err = -EIO; 892 goto cleanup; 893 } 894 /* start copy from next extent */ 895 m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; 896 ext4_ext_show_move(inode, path, newblock, depth); 897 if (m) { 898 struct ext4_extent *ex; 899 ex = EXT_FIRST_EXTENT(neh); 900 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); 901 le16_add_cpu(&neh->eh_entries, m); 902 } 903 904 ext4_extent_block_csum_set(inode, neh); 905 set_buffer_uptodate(bh); 906 907 err = ext4_handle_dirty_metadata(icb, handle, inode, bh); 908 if (err) 909 goto cleanup; 910 extents_brelse(bh); 911 bh = NULL; 912 913 /* correct old leaf */ 914 if (m) { 915 err = ext4_ext_get_access(icb, handle, inode, path + depth); 916 if (err) 917 goto cleanup; 918 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); 919 err = ext4_ext_dirty(icb, handle, inode, path + depth); 920 if (err) 921 goto cleanup; 922 923 } 924 925 /* create intermediate indexes */ 926 k = depth - at - 1; 927 if (unlikely(k < 0)) { 928 EXT4_ERROR_INODE(inode, "k %d < 0!", k); 929 err = -EIO; 930 goto cleanup; 931 } 932 if (k) 933 ext_debug("create %d intermediate indices\n", k); 934 /* insert new index into current index block */ 935 /* current depth stored in i var */ 936 i = depth - 1; 937 while (k--) { 938 oldblock = newblock; 939 newblock = ablocks[--a]; 940 bh = extents_bwrite(inode->i_sb, newblock); 941 if (unlikely(!bh)) { 942 err = -ENOMEM; 943 goto cleanup; 944 } 945 946 err = ext4_journal_get_create_access(icb, handle, bh); 947 if (err) 948 goto cleanup; 949 950 neh = ext_block_hdr(bh); 951 neh->eh_entries = cpu_to_le16(1); 952 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 953 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 954 neh->eh_depth = cpu_to_le16(depth - i); 955 fidx = EXT_FIRST_INDEX(neh); 956 fidx->ei_block = border; 957 ext4_idx_store_pblock(fidx, oldblock); 958 959 ext_debug("int.index at %d (block %llu): %u -> %llu\n", 960 i, newblock, le32_to_cpu(border), oldblock); 961 962 /* move remainder of path[i] to the new index block */ 963 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != 964 EXT_LAST_INDEX(path[i].p_hdr))) { 965 EXT4_ERROR_INODE(inode, 966 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", 967 le32_to_cpu(path[i].p_ext->ee_block)); 968 err = -EIO; 969 goto cleanup; 970 } 971 /* start copy indexes */ 972 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; 973 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 974 EXT_MAX_INDEX(path[i].p_hdr)); 975 ext4_ext_show_move(inode, path, newblock, i); 976 if (m) { 977 memmove(++fidx, path[i].p_idx, 978 sizeof(struct ext4_extent_idx) * m); 979 le16_add_cpu(&neh->eh_entries, m); 980 } 981 ext4_extent_block_csum_set(inode, neh); 982 set_buffer_uptodate(bh); 983 984 err = ext4_handle_dirty_metadata(icb, handle, inode, bh); 985 if (err) 986 goto cleanup; 987 extents_brelse(bh); 988 bh = NULL; 989 990 /* correct old index */ 991 if (m) { 992 err = ext4_ext_get_access(icb, handle, inode, path + i); 993 if (err) 994 goto cleanup; 995 le16_add_cpu(&path[i].p_hdr->eh_entries, -m); 996 err = ext4_ext_dirty(icb, handle, inode, path + i); 997 if (err) 998 goto cleanup; 999 } 1000 1001 i--; 1002 } 1003 1004 /* insert new index */ 1005 err = ext4_ext_insert_index(icb, handle, inode, path + at, 1006 le32_to_cpu(border), newblock); 1007 1008 cleanup: 1009 if (bh) 1010 extents_brelse(bh); 1011 1012 if (err) { 1013 /* free all allocated blocks in error case */ 1014 for (i = 0; i < depth; i++) { 1015 if (!ablocks[i]) 1016 continue; 1017 ext4_free_blocks(icb, handle, inode, NULL, ablocks[i], 1, 1018 EXT4_FREE_BLOCKS_METADATA); 1019 } 1020 } 1021 kfree(ablocks); 1022 1023 return err; 1024 } 1025 1026 /* 1027 * ext4_ext_grow_indepth: 1028 * implements tree growing procedure: 1029 * - allocates new block 1030 * - moves top-level data (index block or leaf) into the new block 1031 * - initializes new top-level, creating index that points to the 1032 * just created block 1033 */ 1034 static int ext4_ext_grow_indepth(void *icb, handle_t *handle, struct inode *inode, 1035 unsigned int flags) 1036 { 1037 struct ext4_extent_header *neh; 1038 struct buffer_head *bh; 1039 ext4_fsblk_t newblock, goal = 0; 1040 int err = 0; 1041 1042 /* Try to prepend new index to old one */ 1043 if (ext_depth(inode)) 1044 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); 1045 goal = ext4_inode_to_goal_block(inode); 1046 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, 1047 NULL, &err); 1048 if (newblock == 0) 1049 return err; 1050 1051 bh = extents_bwrite(inode->i_sb, newblock); 1052 if (!bh) 1053 return -ENOMEM; 1054 1055 err = ext4_journal_get_create_access(icb, handle, bh); 1056 if (err) 1057 goto out; 1058 1059 /* move top-level index/leaf into new block */ 1060 memmove(bh->b_data, EXT4_I(inode)->i_block, 1061 sizeof(EXT4_I(inode)->i_block)); 1062 1063 /* set size of new block */ 1064 neh = ext_block_hdr(bh); 1065 /* old root could have indexes or leaves 1066 * so calculate e_max right way */ 1067 if (ext_depth(inode)) 1068 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 1069 else 1070 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 1071 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 1072 ext4_extent_block_csum_set(inode, neh); 1073 set_buffer_uptodate(bh); 1074 1075 err = ext4_handle_dirty_metadata(icb, handle, inode, bh); 1076 if (err) 1077 goto out; 1078 1079 /* Update top-level index: num,max,pointer */ 1080 neh = ext_inode_hdr(inode); 1081 neh->eh_entries = cpu_to_le16(1); 1082 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock); 1083 if (neh->eh_depth == 0) { 1084 /* Root extent block becomes index block */ 1085 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); 1086 EXT_FIRST_INDEX(neh)->ei_block = 1087 EXT_FIRST_EXTENT(neh)->ee_block; 1088 } 1089 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1090 (neh->eh_entries), (neh->eh_max), 1091 (EXT_FIRST_INDEX(neh)->ei_block), 1092 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1093 1094 le16_add_cpu(&neh->eh_depth, 1); 1095 ext4_mark_inode_dirty(icb, handle, inode); 1096 out: 1097 extents_brelse(bh); 1098 1099 return err; 1100 } 1101 1102 /* 1103 * ext4_ext_create_new_leaf: 1104 * finds empty index and adds new leaf. 1105 * if no free index is found, then it requests in-depth growing. 1106 */ 1107 static int ext4_ext_create_new_leaf(void *icb, handle_t *handle, struct inode *inode, 1108 unsigned int mb_flags, 1109 unsigned int gb_flags, 1110 struct ext4_ext_path **ppath, 1111 struct ext4_extent *newext) 1112 { 1113 struct ext4_ext_path *path = *ppath; 1114 struct ext4_ext_path *curp; 1115 int depth, i, err = 0; 1116 1117 repeat: 1118 i = depth = ext_depth(inode); 1119 1120 /* walk up to the tree and look for free index entry */ 1121 curp = path + depth; 1122 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { 1123 i--; 1124 curp--; 1125 } 1126 1127 /* we use already allocated block for index block, 1128 * so subsequent data blocks should be contiguous */ 1129 if (EXT_HAS_FREE_INDEX(curp)) { 1130 /* if we found index with free entry, then use that 1131 * entry: create all needed subtree and add new leaf */ 1132 err = ext4_ext_split(icb, handle, inode, mb_flags, path, newext, i); 1133 if (err) 1134 goto out; 1135 1136 /* refill path */ 1137 path = ext4_find_extent(inode, 1138 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1139 ppath, gb_flags); 1140 if (IS_ERR(path)) 1141 err = PTR_ERR(path); 1142 } else { 1143 /* tree is full, time to grow in depth */ 1144 err = ext4_ext_grow_indepth(icb, handle, inode, mb_flags); 1145 if (err) 1146 goto out; 1147 1148 /* refill path */ 1149 path = ext4_find_extent(inode, 1150 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1151 ppath, gb_flags); 1152 if (IS_ERR(path)) { 1153 err = PTR_ERR(path); 1154 goto out; 1155 } 1156 1157 /* 1158 * only first (depth 0 -> 1) produces free space; 1159 * in all other cases we have to split the grown tree 1160 */ 1161 depth = ext_depth(inode); 1162 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 1163 /* now we need to split */ 1164 goto repeat; 1165 } 1166 } 1167 1168 out: 1169 return err; 1170 } 1171 1172 /* 1173 * search the closest allocated block to the left for *logical 1174 * and returns it at @logical + it's physical address at @phys 1175 * if *logical is the smallest allocated block, the function 1176 * returns 0 at @phys 1177 * return value contains 0 (success) or error code 1178 */ 1179 static int ext4_ext_search_left(struct inode *inode, 1180 struct ext4_ext_path *path, 1181 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1182 { 1183 struct ext4_extent_idx *ix; 1184 struct ext4_extent *ex; 1185 int depth, ee_len; 1186 1187 if (unlikely(path == NULL)) { 1188 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1189 return -EIO; 1190 } 1191 depth = path->p_depth; 1192 *phys = 0; 1193 1194 if (depth == 0 && path->p_ext == NULL) 1195 return 0; 1196 1197 /* usually extent in the path covers blocks smaller 1198 * then *logical, but it can be that extent is the 1199 * first one in the file */ 1200 1201 ex = path[depth].p_ext; 1202 ee_len = ext4_ext_get_actual_len(ex); 1203 if (*logical < le32_to_cpu(ex->ee_block)) { 1204 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1205 EXT4_ERROR_INODE(inode, 1206 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", 1207 *logical, le32_to_cpu(ex->ee_block)); 1208 return -EIO; 1209 } 1210 while (--depth >= 0) { 1211 ix = path[depth].p_idx; 1212 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1213 EXT4_ERROR_INODE(inode, 1214 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1215 ix != NULL ? le32_to_cpu(ix->ei_block) : 0, 1216 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1217 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, 1218 depth); 1219 return -EIO; 1220 } 1221 } 1222 return 0; 1223 } 1224 1225 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1226 EXT4_ERROR_INODE(inode, 1227 "logical %d < ee_block %d + ee_len %d!", 1228 *logical, le32_to_cpu(ex->ee_block), ee_len); 1229 return -EIO; 1230 } 1231 1232 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1233 *phys = ext4_ext_pblock(ex) + ee_len - 1; 1234 return 0; 1235 } 1236 1237 /* 1238 * search the closest allocated block to the right for *logical 1239 * and returns it at @logical + it's physical address at @phys 1240 * if *logical is the largest allocated block, the function 1241 * returns 0 at @phys 1242 * return value contains 0 (success) or error code 1243 */ 1244 static int ext4_ext_search_right(struct inode *inode, 1245 struct ext4_ext_path *path, 1246 ext4_lblk_t *logical, ext4_fsblk_t *phys, 1247 struct ext4_extent **ret_ex) 1248 { 1249 struct buffer_head *bh = NULL; 1250 struct ext4_extent_header *eh; 1251 struct ext4_extent_idx *ix; 1252 struct ext4_extent *ex; 1253 ext4_fsblk_t block; 1254 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1255 int ee_len; 1256 1257 if ((path == NULL)) { 1258 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1259 return -EIO; 1260 } 1261 depth = path->p_depth; 1262 *phys = 0; 1263 1264 if (depth == 0 && path->p_ext == NULL) 1265 return 0; 1266 1267 /* usually extent in the path covers blocks smaller 1268 * then *logical, but it can be that extent is the 1269 * first one in the file */ 1270 1271 ex = path[depth].p_ext; 1272 ee_len = ext4_ext_get_actual_len(ex); 1273 /*if (*logical < le32_to_cpu(ex->ee_block)) {*/ 1274 if (*logical < (ex->ee_block)) { 1275 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1276 EXT4_ERROR_INODE(inode, 1277 "first_extent(path[%d].p_hdr) != ex", 1278 depth); 1279 return -EIO; 1280 } 1281 while (--depth >= 0) { 1282 ix = path[depth].p_idx; 1283 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1284 EXT4_ERROR_INODE(inode, 1285 "ix != EXT_FIRST_INDEX *logical %d!", 1286 *logical); 1287 return -EIO; 1288 } 1289 } 1290 goto found_extent; 1291 } 1292 1293 /*if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {*/ 1294 if (unlikely(*logical < ((ex->ee_block) + ee_len))) { 1295 EXT4_ERROR_INODE(inode, 1296 "logical %d < ee_block %d + ee_len %d!", 1297 /**logical, le32_to_cpu(ex->ee_block), ee_len);*/ 1298 *logical, (ex->ee_block), ee_len); 1299 return -EIO; 1300 } 1301 1302 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1303 /* next allocated block in this leaf */ 1304 ex++; 1305 goto found_extent; 1306 } 1307 1308 /* go up and search for index to the right */ 1309 while (--depth >= 0) { 1310 ix = path[depth].p_idx; 1311 if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) 1312 goto got_index; 1313 } 1314 1315 /* we've gone up to the root and found no index to the right */ 1316 return 0; 1317 1318 got_index: 1319 /* we've found index to the right, let's 1320 * follow it and find the closest allocated 1321 * block to the right */ 1322 ix++; 1323 block = ext4_idx_pblock(ix); 1324 while (++depth < path->p_depth) { 1325 /* subtract from p_depth to get proper eh_depth */ 1326 bh = read_extent_tree_block(inode, block, 1327 path->p_depth - depth, 0); 1328 if (IS_ERR(bh)) 1329 return PTR_ERR(bh); 1330 eh = ext_block_hdr(bh); 1331 ix = EXT_FIRST_INDEX(eh); 1332 block = ext4_idx_pblock(ix); 1333 extents_brelse(bh); 1334 } 1335 1336 bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); 1337 if (IS_ERR(bh)) 1338 return PTR_ERR(bh); 1339 eh = ext_block_hdr(bh); 1340 ex = EXT_FIRST_EXTENT(eh); 1341 found_extent: 1342 /**logical = le32_to_cpu(ex->ee_block);*/ 1343 *logical = (ex->ee_block); 1344 *phys = ext4_ext_pblock(ex); 1345 *ret_ex = ex; 1346 if (bh) 1347 extents_brelse(bh); 1348 return 0; 1349 } 1350 1351 /* 1352 * ext4_ext_next_allocated_block: 1353 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. 1354 * NOTE: it considers block number from index entry as 1355 * allocated block. Thus, index entries have to be consistent 1356 * with leaves. 1357 */ 1358 ext4_lblk_t 1359 ext4_ext_next_allocated_block(struct ext4_ext_path *path) 1360 { 1361 int depth; 1362 1363 depth = path->p_depth; 1364 1365 if (depth == 0 && path->p_ext == NULL) 1366 return EXT_MAX_BLOCKS; 1367 1368 while (depth >= 0) { 1369 if (depth == path->p_depth) { 1370 /* leaf */ 1371 if (path[depth].p_ext && 1372 path[depth].p_ext != 1373 EXT_LAST_EXTENT(path[depth].p_hdr)) 1374 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1375 } else { 1376 /* index */ 1377 if (path[depth].p_idx != 1378 EXT_LAST_INDEX(path[depth].p_hdr)) 1379 return le32_to_cpu(path[depth].p_idx[1].ei_block); 1380 } 1381 depth--; 1382 } 1383 1384 return EXT_MAX_BLOCKS; 1385 } 1386 1387 /* 1388 * ext4_ext_next_leaf_block: 1389 * returns first allocated block from next leaf or EXT_MAX_BLOCKS 1390 */ 1391 static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) 1392 { 1393 int depth; 1394 1395 BUG_ON(path == NULL); 1396 depth = path->p_depth; 1397 1398 /* zero-tree has no leaf blocks at all */ 1399 if (depth == 0) 1400 return EXT_MAX_BLOCKS; 1401 1402 /* go to index block */ 1403 depth--; 1404 1405 while (depth >= 0) { 1406 if (path[depth].p_idx != 1407 EXT_LAST_INDEX(path[depth].p_hdr)) 1408 return (ext4_lblk_t) 1409 le32_to_cpu(path[depth].p_idx[1].ei_block); 1410 depth--; 1411 } 1412 1413 return EXT_MAX_BLOCKS; 1414 } 1415 1416 /* 1417 * ext4_ext_correct_indexes: 1418 * if leaf gets modified and modified extent is first in the leaf, 1419 * then we have to correct all indexes above. 1420 * TODO: do we need to correct tree in all cases? 1421 */ 1422 static int ext4_ext_correct_indexes(void *icb, handle_t *handle, struct inode *inode, 1423 struct ext4_ext_path *path) 1424 { 1425 struct ext4_extent_header *eh; 1426 int depth = ext_depth(inode); 1427 struct ext4_extent *ex; 1428 __le32 border; 1429 int k, err = 0; 1430 1431 eh = path[depth].p_hdr; 1432 ex = path[depth].p_ext; 1433 1434 if (unlikely(ex == NULL || eh == NULL)) { 1435 EXT4_ERROR_INODE(inode, 1436 "ex %p == NULL or eh %p == NULL", ex, eh); 1437 return -EIO; 1438 } 1439 1440 if (depth == 0) { 1441 /* there is no tree at all */ 1442 return 0; 1443 } 1444 1445 if (ex != EXT_FIRST_EXTENT(eh)) { 1446 /* we correct tree if first leaf got modified only */ 1447 return 0; 1448 } 1449 1450 /* 1451 * TODO: we need correction if border is smaller than current one 1452 */ 1453 k = depth - 1; 1454 border = path[depth].p_ext->ee_block; 1455 err = ext4_ext_get_access(icb, handle, inode, path + k); 1456 if (err) 1457 return err; 1458 path[k].p_idx->ei_block = border; 1459 err = ext4_ext_dirty(icb, handle, inode, path + k); 1460 if (err) 1461 return err; 1462 1463 while (k--) { 1464 /* change all left-side indexes */ 1465 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) 1466 break; 1467 err = ext4_ext_get_access(icb, handle, inode, path + k); 1468 if (err) 1469 break; 1470 path[k].p_idx->ei_block = border; 1471 err = ext4_ext_dirty(icb, handle, inode, path + k); 1472 if (err) 1473 break; 1474 } 1475 1476 return err; 1477 } 1478 1479 int 1480 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1481 struct ext4_extent *ex2) 1482 { 1483 unsigned short ext1_ee_len, ext2_ee_len; 1484 1485 /* 1486 * Make sure that both extents are initialized. We don't merge 1487 * unwritten extents so that we can be sure that end_io code has 1488 * the extent that was written properly split out and conversion to 1489 * initialized is trivial. 1490 */ 1491 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) 1492 return 0; 1493 1494 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1495 ext2_ee_len = ext4_ext_get_actual_len(ex2); 1496 1497 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != 1498 le32_to_cpu(ex2->ee_block)) 1499 return 0; 1500 1501 /* 1502 * To allow future support for preallocated extents to be added 1503 * as an RO_COMPAT feature, refuse to merge to extents if 1504 * this can result in the top bit of ee_len being set. 1505 */ 1506 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) 1507 return 0; 1508 if (ext4_ext_is_unwritten(ex1) && 1509 (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)) 1510 return 0; 1511 #ifdef AGGRESSIVE_TEST 1512 if (ext1_ee_len >= 4) 1513 return 0; 1514 #endif 1515 1516 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) 1517 return 1; 1518 return 0; 1519 } 1520 1521 /* 1522 * This function tries to merge the "ex" extent to the next extent in the tree. 1523 * It always tries to merge towards right. If you want to merge towards 1524 * left, pass "ex - 1" as argument instead of "ex". 1525 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1526 * 1 if they got merged. 1527 */ 1528 static int ext4_ext_try_to_merge_right(struct inode *inode, 1529 struct ext4_ext_path *path, 1530 struct ext4_extent *ex) 1531 { 1532 struct ext4_extent_header *eh; 1533 unsigned int depth, len; 1534 int merge_done = 0, unwritten; 1535 1536 depth = ext_depth(inode); 1537 assert(path[depth].p_hdr != NULL); 1538 eh = path[depth].p_hdr; 1539 1540 while (ex < EXT_LAST_EXTENT(eh)) { 1541 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1542 break; 1543 /* merge with next extent! */ 1544 unwritten = ext4_ext_is_unwritten(ex); 1545 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1546 + ext4_ext_get_actual_len(ex + 1)); 1547 if (unwritten) 1548 ext4_ext_mark_unwritten(ex); 1549 1550 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1551 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1552 * sizeof(struct ext4_extent); 1553 memmove(ex + 1, ex + 2, len); 1554 } 1555 le16_add_cpu(&eh->eh_entries, -1); 1556 merge_done = 1; 1557 if (!eh->eh_entries) 1558 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); 1559 } 1560 1561 return merge_done; 1562 } 1563 1564 /* 1565 * This function does a very simple check to see if we can collapse 1566 * an extent tree with a single extent tree leaf block into the inode. 1567 */ 1568 static void ext4_ext_try_to_merge_up(void *icb, handle_t *handle, 1569 struct inode *inode, 1570 struct ext4_ext_path *path) 1571 { 1572 size_t s; 1573 unsigned max_root = ext4_ext_space_root(inode, 0); 1574 ext4_fsblk_t blk; 1575 1576 if ((path[0].p_depth != 1) || 1577 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || 1578 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) 1579 return; 1580 1581 /* 1582 * We need to modify the block allocation bitmap and the block 1583 * group descriptor to release the extent tree block. If we 1584 * can't get the journal credits, give up. 1585 */ 1586 if (ext4_journal_extend(icb, handle, 2)) 1587 return; 1588 1589 /* 1590 * Copy the extent data up to the inode 1591 */ 1592 blk = ext4_idx_pblock(path[0].p_idx); 1593 s = le16_to_cpu(path[1].p_hdr->eh_entries) * 1594 sizeof(struct ext4_extent_idx); 1595 s += sizeof(struct ext4_extent_header); 1596 1597 path[1].p_maxdepth = path[0].p_maxdepth; 1598 memcpy(path[0].p_hdr, path[1].p_hdr, s); 1599 path[0].p_depth = 0; 1600 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + 1601 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); 1602 path[0].p_hdr->eh_max = cpu_to_le16(max_root); 1603 1604 extents_brelse(path[1].p_bh); 1605 ext4_free_blocks(icb, handle, inode, NULL, blk, 1, 1606 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 1607 } 1608 1609 /* 1610 * This function tries to merge the @ex extent to neighbours in the tree. 1611 * return 1 if merge left else 0. 1612 */ 1613 static void ext4_ext_try_to_merge(void *icb, handle_t *handle, 1614 struct inode *inode, 1615 struct ext4_ext_path *path, 1616 struct ext4_extent *ex) { 1617 struct ext4_extent_header *eh; 1618 unsigned int depth; 1619 int merge_done = 0; 1620 1621 depth = ext_depth(inode); 1622 BUG_ON(path[depth].p_hdr == NULL); 1623 eh = path[depth].p_hdr; 1624 1625 if (ex > EXT_FIRST_EXTENT(eh)) 1626 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1627 1628 if (!merge_done) 1629 (void) ext4_ext_try_to_merge_right(inode, path, ex); 1630 1631 ext4_ext_try_to_merge_up(icb, handle, inode, path); 1632 } 1633 1634 /* 1635 * ext4_ext_insert_extent: 1636 * tries to merge requsted extent into the existing extent or 1637 * inserts requested extent as new one into the tree, 1638 * creating new leaf in the no-space case. 1639 */ 1640 int ext4_ext_insert_extent(void *icb, handle_t *handle, struct inode *inode, 1641 struct ext4_ext_path **ppath, 1642 struct ext4_extent *newext, 1643 int gb_flags) 1644 { 1645 struct ext4_ext_path *path = *ppath; 1646 struct ext4_extent_header *eh; 1647 struct ext4_extent *ex, *fex; 1648 struct ext4_extent *nearex; /* nearest extent */ 1649 struct ext4_ext_path *npath = NULL; 1650 int depth, len, err; 1651 ext4_lblk_t next; 1652 int mb_flags = 0, unwritten; 1653 1654 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1655 mb_flags |= EXT4_MB_DELALLOC_RESERVED; 1656 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1657 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1658 return -EIO; 1659 } 1660 depth = ext_depth(inode); 1661 ex = path[depth].p_ext; 1662 eh = path[depth].p_hdr; 1663 if (unlikely(path[depth].p_hdr == NULL)) { 1664 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1665 return -EIO; 1666 } 1667 1668 /* try to insert block into found extent and return */ 1669 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) { 1670 1671 /* 1672 * Try to see whether we should rather test the extent on 1673 * right from ex, or from the left of ex. This is because 1674 * ext4_find_extent() can return either extent on the 1675 * left, or on the right from the searched position. This 1676 * will make merging more effective. 1677 */ 1678 if (ex < EXT_LAST_EXTENT(eh) && 1679 (le32_to_cpu(ex->ee_block) + 1680 ext4_ext_get_actual_len(ex) < 1681 le32_to_cpu(newext->ee_block))) { 1682 ex += 1; 1683 goto prepend; 1684 } else if ((ex > EXT_FIRST_EXTENT(eh)) && 1685 (le32_to_cpu(newext->ee_block) + 1686 ext4_ext_get_actual_len(newext) < 1687 le32_to_cpu(ex->ee_block))) 1688 ex -= 1; 1689 1690 /* Try to append newex to the ex */ 1691 if (ext4_can_extents_be_merged(inode, ex, newext)) { 1692 ext_debug("append [%d]%d block to %u:[%d]%d" 1693 "(from %llu)\n", 1694 ext4_ext_is_unwritten(newext), 1695 ext4_ext_get_actual_len(newext), 1696 le32_to_cpu(ex->ee_block), 1697 ext4_ext_is_unwritten(ex), 1698 ext4_ext_get_actual_len(ex), 1699 ext4_ext_pblock(ex)); 1700 err = ext4_ext_get_access(icb, handle, inode, 1701 path + depth); 1702 if (err) 1703 return err; 1704 unwritten = ext4_ext_is_unwritten(ex); 1705 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1706 + ext4_ext_get_actual_len(newext)); 1707 if (unwritten) 1708 ext4_ext_mark_unwritten(ex); 1709 eh = path[depth].p_hdr; 1710 nearex = ex; 1711 goto merge; 1712 } 1713 1714 prepend: 1715 /* Try to prepend newex to the ex */ 1716 if (ext4_can_extents_be_merged(inode, newext, ex)) { 1717 ext_debug("prepend %u[%d]%d block to %u:[%d]%d" 1718 "(from %llu)\n", 1719 le32_to_cpu(newext->ee_block), 1720 ext4_ext_is_unwritten(newext), 1721 ext4_ext_get_actual_len(newext), 1722 le32_to_cpu(ex->ee_block), 1723 ext4_ext_is_unwritten(ex), 1724 ext4_ext_get_actual_len(ex), 1725 ext4_ext_pblock(ex)); 1726 err = ext4_ext_get_access(icb, handle, inode, 1727 path + depth); 1728 if (err) 1729 return err; 1730 1731 unwritten = ext4_ext_is_unwritten(ex); 1732 ex->ee_block = newext->ee_block; 1733 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); 1734 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1735 + ext4_ext_get_actual_len(newext)); 1736 if (unwritten) 1737 ext4_ext_mark_unwritten(ex); 1738 eh = path[depth].p_hdr; 1739 nearex = ex; 1740 goto merge; 1741 } 1742 } 1743 1744 depth = ext_depth(inode); 1745 eh = path[depth].p_hdr; 1746 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) 1747 goto has_space; 1748 1749 /* probably next leaf has space for us? */ 1750 fex = EXT_LAST_EXTENT(eh); 1751 next = EXT_MAX_BLOCKS; 1752 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) 1753 next = ext4_ext_next_leaf_block(path); 1754 if (next != EXT_MAX_BLOCKS) { 1755 ext_debug("next leaf block - %u\n", next); 1756 BUG_ON(npath != NULL); 1757 npath = ext4_find_extent(inode, next, NULL, 0); 1758 if (IS_ERR(npath)) 1759 return PTR_ERR(npath); 1760 BUG_ON(npath->p_depth != path->p_depth); 1761 eh = npath[depth].p_hdr; 1762 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1763 ext_debug("next leaf isn't full(%d)\n", 1764 le16_to_cpu(eh->eh_entries)); 1765 path = npath; 1766 goto has_space; 1767 } 1768 ext_debug("next leaf has no free space(%d,%d)\n", 1769 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 1770 } 1771 1772 /* 1773 * There is no free space in the found leaf. 1774 * We're gonna add a new leaf in the tree. 1775 */ 1776 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) 1777 mb_flags |= EXT4_MB_USE_RESERVED; 1778 err = ext4_ext_create_new_leaf(icb, handle, inode, mb_flags, gb_flags, 1779 ppath, newext); 1780 if (err) 1781 goto cleanup; 1782 depth = ext_depth(inode); 1783 eh = path[depth].p_hdr; 1784 1785 has_space: 1786 nearex = path[depth].p_ext; 1787 1788 err = ext4_ext_get_access(icb, handle, inode, path + depth); 1789 if (err) 1790 goto cleanup; 1791 1792 if (!nearex) { 1793 /* there is no extent in this leaf, create first one */ 1794 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", 1795 le32_to_cpu(newext->ee_block), 1796 ext4_ext_pblock(newext), 1797 ext4_ext_is_unwritten(newext), 1798 ext4_ext_get_actual_len(newext)); 1799 nearex = EXT_FIRST_EXTENT(eh); 1800 } else { 1801 if (le32_to_cpu(newext->ee_block) 1802 > le32_to_cpu(nearex->ee_block)) { 1803 /* Insert after */ 1804 ext_debug("insert %u:%llu:[%d]%d before: " 1805 "nearest %p\n", 1806 le32_to_cpu(newext->ee_block), 1807 ext4_ext_pblock(newext), 1808 ext4_ext_is_unwritten(newext), 1809 ext4_ext_get_actual_len(newext), 1810 nearex); 1811 nearex++; 1812 } else { 1813 /* Insert before */ 1814 BUG_ON(newext->ee_block == nearex->ee_block); 1815 ext_debug("insert %u:%llu:[%d]%d after: " 1816 "nearest %p\n", 1817 le32_to_cpu(newext->ee_block), 1818 ext4_ext_pblock(newext), 1819 ext4_ext_is_unwritten(newext), 1820 ext4_ext_get_actual_len(newext), 1821 nearex); 1822 } 1823 len = EXT_LAST_EXTENT(eh) - nearex + 1; 1824 if (len > 0) { 1825 ext_debug("insert %u:%llu:[%d]%d: " 1826 "move %d extents from 0x%p to 0x%p\n", 1827 le32_to_cpu(newext->ee_block), 1828 ext4_ext_pblock(newext), 1829 ext4_ext_is_unwritten(newext), 1830 ext4_ext_get_actual_len(newext), 1831 len, nearex, nearex + 1); 1832 memmove(nearex + 1, nearex, 1833 len * sizeof(struct ext4_extent)); 1834 } 1835 } 1836 1837 le16_add_cpu(&eh->eh_entries, 1); 1838 path[depth].p_ext = nearex; 1839 nearex->ee_block = newext->ee_block; 1840 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1841 nearex->ee_len = newext->ee_len; 1842 1843 merge: 1844 /* try to merge extents */ 1845 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) 1846 ext4_ext_try_to_merge(icb, handle, inode, path, nearex); 1847 1848 1849 /* time to correct all indexes above */ 1850 err = ext4_ext_correct_indexes(icb, handle, inode, path); 1851 if (err) 1852 goto cleanup; 1853 1854 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 1855 1856 cleanup: 1857 if (npath) { 1858 ext4_ext_drop_refs(npath); 1859 kfree(npath); 1860 } 1861 return err; 1862 } 1863 1864 static inline int get_default_free_blocks_flags(struct inode *inode) 1865 { 1866 return 0; 1867 } 1868 1869 /* FIXME!! we need to try to merge to left or right after zero-out */ 1870 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 1871 { 1872 ext4_fsblk_t ee_pblock; 1873 unsigned int ee_len; 1874 int ret; 1875 1876 ee_len = ext4_ext_get_actual_len(ex); 1877 ee_pblock = ext4_ext_pblock(ex); 1878 1879 ret = 0; 1880 1881 return ret; 1882 } 1883 1884 static int ext4_remove_blocks(void *icb, handle_t *handle, struct inode *inode, 1885 struct ext4_extent *ex, 1886 unsigned long from, unsigned long to) 1887 { 1888 struct buffer_head *bh; 1889 int i; 1890 1891 if (from >= le32_to_cpu(ex->ee_block) 1892 && to == le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { 1893 /* tail removal */ 1894 unsigned long num, start; 1895 num = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - from; 1896 start = ext4_ext_pblock(ex) + ext4_ext_get_actual_len(ex) - num; 1897 ext4_free_blocks(icb, handle, inode, NULL, start, num, 0); 1898 } else if (from == le32_to_cpu(ex->ee_block) 1899 && to <= le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { 1900 } else { 1901 } 1902 return 0; 1903 } 1904 1905 /* 1906 * routine removes index from the index block 1907 * it's used in truncate case only. thus all requests are for 1908 * last index in the block only 1909 */ 1910 int ext4_ext_rm_idx(void *icb, handle_t *handle, struct inode *inode, 1911 struct ext4_ext_path *path) 1912 { 1913 int err; 1914 ext4_fsblk_t leaf; 1915 1916 /* free index block */ 1917 path--; 1918 leaf = ext4_idx_pblock(path->p_idx); 1919 BUG_ON(path->p_hdr->eh_entries == 0); 1920 if ((err = ext4_ext_get_access(icb, handle, inode, path))) 1921 return err; 1922 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1923 if ((err = ext4_ext_dirty(icb, handle, inode, path))) 1924 return err; 1925 ext4_free_blocks(icb, handle, inode, NULL, leaf, 1, 0); 1926 return err; 1927 } 1928 1929 static int 1930 ext4_ext_rm_leaf(void *icb, handle_t *handle, struct inode *inode, 1931 struct ext4_ext_path *path, unsigned long start) 1932 { 1933 int err = 0, correct_index = 0; 1934 int depth = ext_depth(inode), credits; 1935 struct ext4_extent_header *eh; 1936 unsigned a, b, block, num; 1937 unsigned long ex_ee_block; 1938 unsigned short ex_ee_len; 1939 struct ext4_extent *ex; 1940 1941 /* the header must be checked already in ext4_ext_remove_space() */ 1942 if (!path[depth].p_hdr) 1943 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 1944 eh = path[depth].p_hdr; 1945 BUG_ON(eh == NULL); 1946 1947 /* find where to start removing */ 1948 ex = EXT_LAST_EXTENT(eh); 1949 1950 ex_ee_block = le32_to_cpu(ex->ee_block); 1951 ex_ee_len = ext4_ext_get_actual_len(ex); 1952 1953 while (ex >= EXT_FIRST_EXTENT(eh) && 1954 ex_ee_block + ex_ee_len > start) { 1955 path[depth].p_ext = ex; 1956 1957 a = ex_ee_block > start ? ex_ee_block : start; 1958 b = (unsigned long long)ex_ee_block + ex_ee_len - 1 < 1959 EXT_MAX_BLOCKS ? ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCKS; 1960 1961 1962 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { 1963 block = 0; 1964 num = 0; 1965 BUG(); 1966 } else if (a != ex_ee_block) { 1967 /* remove tail of the extent */ 1968 block = ex_ee_block; 1969 num = a - block; 1970 } else if (b != ex_ee_block + ex_ee_len - 1) { 1971 /* remove head of the extent */ 1972 block = a; 1973 num = b - a; 1974 /* there is no "make a hole" API yet */ 1975 BUG(); 1976 } else { 1977 /* remove whole extent: excellent! */ 1978 block = ex_ee_block; 1979 num = 0; 1980 BUG_ON(a != ex_ee_block); 1981 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 1982 } 1983 1984 /* at present, extent can't cross block group */ 1985 /* leaf + bitmap + group desc + sb + inode */ 1986 credits = 5; 1987 if (ex == EXT_FIRST_EXTENT(eh)) { 1988 correct_index = 1; 1989 credits += (ext_depth(inode)) + 1; 1990 } 1991 1992 /*handle = ext4_ext_journal_restart(icb, handle, credits);*/ 1993 /*if (IS_ERR(icb, handle)) {*/ 1994 /*err = PTR_ERR(icb, handle);*/ 1995 /*goto out;*/ 1996 /*}*/ 1997 1998 err = ext4_ext_get_access(icb, handle, inode, path + depth); 1999 if (err) 2000 goto out; 2001 2002 err = ext4_remove_blocks(icb, handle, inode, ex, a, b); 2003 if (err) 2004 goto out; 2005 2006 if (num == 0) { 2007 /* this extent is removed entirely mark slot unused */ 2008 ext4_ext_store_pblock(ex, 0); 2009 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 2010 } 2011 2012 ex->ee_block = cpu_to_le32(block); 2013 ex->ee_len = cpu_to_le16(num); 2014 2015 err = ext4_ext_dirty(icb, handle, inode, path + depth); 2016 if (err) 2017 goto out; 2018 2019 ex--; 2020 ex_ee_block = le32_to_cpu(ex->ee_block); 2021 ex_ee_len = ext4_ext_get_actual_len(ex); 2022 } 2023 2024 if (correct_index && eh->eh_entries) 2025 err = ext4_ext_correct_indexes(icb, handle, inode, path); 2026 2027 /* if this leaf is free, then we should 2028 * remove it from index block above */ 2029 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2030 err = ext4_ext_rm_idx(icb, handle, inode, path + depth); 2031 2032 out: 2033 return err; 2034 } 2035 2036 /* 2037 * ext4_split_extent_at() splits an extent at given block. 2038 * 2039 * @handle: the journal handle 2040 * @inode: the file inode 2041 * @path: the path to the extent 2042 * @split: the logical block where the extent is splitted. 2043 * @split_flags: indicates if the extent could be zeroout if split fails, and 2044 * the states(init or unwritten) of new extents. 2045 * @flags: flags used to insert new extent to extent tree. 2046 * 2047 * 2048 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states 2049 * of which are deterimined by split_flag. 2050 * 2051 * There are two cases: 2052 * a> the extent are splitted into two extent. 2053 * b> split is not needed, and just mark the extent. 2054 * 2055 * return 0 on success. 2056 */ 2057 static int ext4_split_extent_at(void *icb, handle_t *handle, 2058 struct inode *inode, 2059 struct ext4_ext_path **ppath, 2060 ext4_lblk_t split, 2061 int split_flag, 2062 int flags) 2063 { 2064 struct ext4_ext_path *path = *ppath; 2065 ext4_fsblk_t newblock; 2066 ext4_lblk_t ee_block; 2067 struct ext4_extent *ex, newex, orig_ex, zero_ex; 2068 struct ext4_extent *ex2 = NULL; 2069 unsigned int ee_len, depth; 2070 int err = 0; 2071 2072 ext4_ext_show_leaf(inode, path); 2073 2074 depth = ext_depth(inode); 2075 ex = path[depth].p_ext; 2076 ee_block = le32_to_cpu(ex->ee_block); 2077 ee_len = ext4_ext_get_actual_len(ex); 2078 newblock = split - ee_block + ext4_ext_pblock(ex); 2079 2080 BUG_ON(split < ee_block || split >= (ee_block + ee_len)); 2081 2082 err = ext4_ext_get_access(icb, handle, inode, path + depth); 2083 if (err) 2084 goto out; 2085 2086 if (split == ee_block) { 2087 /* 2088 * case b: block @split is the block that the extent begins with 2089 * then we just change the state of the extent, and splitting 2090 * is not needed. 2091 */ 2092 if (split_flag & EXT4_EXT_MARK_UNWRIT2) 2093 ext4_ext_mark_unwritten(ex); 2094 else 2095 ext4_ext_mark_initialized(ex); 2096 2097 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2098 ext4_ext_try_to_merge(icb, handle, inode, path, ex); 2099 2100 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 2101 goto out; 2102 } 2103 2104 /* case a */ 2105 memcpy(&orig_ex, ex, sizeof(orig_ex)); 2106 ex->ee_len = cpu_to_le16(split - ee_block); 2107 if (split_flag & EXT4_EXT_MARK_UNWRIT1) 2108 ext4_ext_mark_unwritten(ex); 2109 2110 /* 2111 * path may lead to new leaf, not to original leaf any more 2112 * after ext4_ext_insert_extent() returns, 2113 */ 2114 err = ext4_ext_dirty(icb, handle, inode, path + depth); 2115 if (err) 2116 goto fix_extent_len; 2117 2118 ex2 = &newex; 2119 ex2->ee_block = cpu_to_le32(split); 2120 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); 2121 ext4_ext_store_pblock(ex2, newblock); 2122 if (split_flag & EXT4_EXT_MARK_UNWRIT2) 2123 ext4_ext_mark_unwritten(ex2); 2124 2125 err = ext4_ext_insert_extent(icb, handle, inode, ppath, &newex, flags); 2126 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2127 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { 2128 if (split_flag & EXT4_EXT_DATA_VALID1) { 2129 err = ext4_ext_zeroout(inode, ex2); 2130 zero_ex.ee_block = ex2->ee_block; 2131 zero_ex.ee_len = cpu_to_le16( 2132 ext4_ext_get_actual_len(ex2)); 2133 ext4_ext_store_pblock(&zero_ex, 2134 ext4_ext_pblock(ex2)); 2135 } else { 2136 err = ext4_ext_zeroout(inode, ex); 2137 zero_ex.ee_block = ex->ee_block; 2138 zero_ex.ee_len = cpu_to_le16( 2139 ext4_ext_get_actual_len(ex)); 2140 ext4_ext_store_pblock(&zero_ex, 2141 ext4_ext_pblock(ex)); 2142 } 2143 } else { 2144 err = ext4_ext_zeroout(inode, &orig_ex); 2145 zero_ex.ee_block = orig_ex.ee_block; 2146 zero_ex.ee_len = cpu_to_le16( 2147 ext4_ext_get_actual_len(&orig_ex)); 2148 ext4_ext_store_pblock(&zero_ex, 2149 ext4_ext_pblock(&orig_ex)); 2150 } 2151 2152 if (err) 2153 goto fix_extent_len; 2154 /* update the extent length and mark as initialized */ 2155 ex->ee_len = cpu_to_le16(ee_len); 2156 ext4_ext_try_to_merge(icb, handle, inode, path, ex); 2157 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 2158 if (err) 2159 goto fix_extent_len; 2160 2161 goto out; 2162 } else if (err) 2163 goto fix_extent_len; 2164 2165 out: 2166 ext4_ext_show_leaf(inode, path); 2167 return err; 2168 2169 fix_extent_len: 2170 ex->ee_len = orig_ex.ee_len; 2171 ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 2172 return err; 2173 } 2174 2175 /* 2176 * returns 1 if current index have to be freed (even partial) 2177 */ 2178 #ifndef __REACTOS__ 2179 static int inline 2180 #else 2181 inline int 2182 #endif 2183 ext4_ext_more_to_rm(struct ext4_ext_path *path) 2184 { 2185 BUG_ON(path->p_idx == NULL); 2186 2187 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) 2188 return 0; 2189 2190 /* 2191 * if truncate on deeper level happened it it wasn't partial 2192 * so we have to consider current index for truncation 2193 */ 2194 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) 2195 return 0; 2196 return 1; 2197 } 2198 2199 int ext4_ext_remove_space(void *icb, struct inode *inode, unsigned long start) 2200 { 2201 struct super_block *sb = inode->i_sb; 2202 int depth = ext_depth(inode); 2203 struct ext4_ext_path *path; 2204 handle_t *handle = NULL; 2205 int i = 0, err = 0; 2206 2207 /* probably first extent we're gonna free will be last in block */ 2208 /*handle = ext4_journal_start(inode, depth + 1);*/ 2209 /*if (IS_ERR(icb, handle))*/ 2210 /*return PTR_ERR(icb, handle);*/ 2211 2212 /* 2213 * we start scanning from right side freeing all the blocks 2214 * after i_size and walking into the deep 2215 */ 2216 path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 2217 if (path == NULL) { 2218 ext4_journal_stop(icb, handle); 2219 return -ENOMEM; 2220 } 2221 memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); 2222 path[0].p_hdr = ext_inode_hdr(inode); 2223 if (ext4_ext_check_inode(inode)) { 2224 err = -EIO; 2225 goto out; 2226 } 2227 path[0].p_depth = depth; 2228 2229 while (i >= 0 && err == 0) { 2230 if (i == depth) { 2231 /* this is leaf block */ 2232 err = ext4_ext_rm_leaf(icb, handle, inode, path, start); 2233 /* root level have p_bh == NULL, extents_brelse() eats this */ 2234 extents_brelse(path[i].p_bh); 2235 path[i].p_bh = NULL; 2236 i--; 2237 continue; 2238 } 2239 2240 /* this is index block */ 2241 if (!path[i].p_hdr) { 2242 path[i].p_hdr = ext_block_hdr(path[i].p_bh); 2243 } 2244 2245 if (!path[i].p_idx) { 2246 /* this level hasn't touched yet */ 2247 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); 2248 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; 2249 } else { 2250 /* we've already was here, see at next index */ 2251 path[i].p_idx--; 2252 } 2253 2254 if (ext4_ext_more_to_rm(path + i)) { 2255 struct buffer_head *bh; 2256 /* go to the next level */ 2257 memset(path + i + 1, 0, sizeof(*path)); 2258 bh = read_extent_tree_block(inode, ext4_idx_pblock(path[i].p_idx), path[0].p_depth - (i + 1), 0); 2259 if (IS_ERR(bh)) { 2260 /* should we reset i_size? */ 2261 err = -EIO; 2262 break; 2263 } 2264 path[i+1].p_bh = bh; 2265 2266 /* put actual number of indexes to know is this 2267 * number got changed at the next iteration */ 2268 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); 2269 i++; 2270 } else { 2271 /* we finish processing this index, go up */ 2272 if (path[i].p_hdr->eh_entries == 0 && i > 0) { 2273 /* index is empty, remove it 2274 * handle must be already prepared by the 2275 * truncatei_leaf() */ 2276 err = ext4_ext_rm_idx(icb, handle, inode, path + i); 2277 } 2278 /* root level have p_bh == NULL, extents_brelse() eats this */ 2279 extents_brelse(path[i].p_bh); 2280 path[i].p_bh = NULL; 2281 i--; 2282 } 2283 } 2284 2285 /* TODO: flexible tree reduction should be here */ 2286 if (path->p_hdr->eh_entries == 0) { 2287 /* 2288 * truncate to zero freed all the tree 2289 * so, we need to correct eh_depth 2290 */ 2291 err = ext4_ext_get_access(icb, handle, inode, path); 2292 if (err == 0) { 2293 ext_inode_hdr(inode)->eh_depth = 0; 2294 ext_inode_hdr(inode)->eh_max = 2295 cpu_to_le16(ext4_ext_space_root(inode, 0)); 2296 err = ext4_ext_dirty(icb, handle, inode, path); 2297 } 2298 } 2299 out: 2300 if (path) { 2301 ext4_ext_drop_refs(path); 2302 kfree(path); 2303 } 2304 ext4_journal_stop(icb, handle); 2305 2306 return err; 2307 } 2308 2309 int ext4_ext_tree_init(void *icb, handle_t *handle, struct inode *inode) 2310 { 2311 struct ext4_extent_header *eh; 2312 2313 eh = ext_inode_hdr(inode); 2314 eh->eh_depth = 0; 2315 eh->eh_entries = 0; 2316 eh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 2317 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); 2318 ext4_mark_inode_dirty(icb, handle, inode); 2319 return 0; 2320 } 2321 2322 /* 2323 * called at mount time 2324 */ 2325 void ext4_ext_init(struct super_block *sb) 2326 { 2327 /* 2328 * possible initialization would be here 2329 */ 2330 } 2331 2332 static int ext4_ext_convert_to_initialized ( 2333 void *icb, 2334 handle_t *handle, 2335 struct inode *inode, 2336 struct ext4_ext_path **ppath, 2337 ext4_lblk_t split, 2338 unsigned long blocks, 2339 int flags) 2340 { 2341 int depth = ext_depth(inode), err; 2342 struct ext4_extent *ex = (*ppath)[depth].p_ext; 2343 2344 assert (le32_to_cpu(ex->ee_block) <= split); 2345 2346 if (split + blocks == le32_to_cpu(ex->ee_block) + 2347 ext4_ext_get_actual_len(ex)) { 2348 2349 /* split and initialize right part */ 2350 err = ext4_split_extent_at(icb, handle, inode, ppath, split, 2351 EXT4_EXT_MARK_UNWRIT1, flags); 2352 2353 } else if (le32_to_cpu(ex->ee_block) == split) { 2354 2355 /* split and initialize left part */ 2356 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, 2357 EXT4_EXT_MARK_UNWRIT2, flags); 2358 2359 } else { 2360 2361 /* split 1 extent to 3 and initialize the 2nd */ 2362 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, 2363 EXT4_EXT_MARK_UNWRIT1 | 2364 EXT4_EXT_MARK_UNWRIT2, flags); 2365 if (0 == err) { 2366 err = ext4_split_extent_at(icb, handle, inode, ppath, split, 2367 EXT4_EXT_MARK_UNWRIT1, flags); 2368 } 2369 } 2370 2371 return err; 2372 } 2373 2374 int ext4_ext_get_blocks(void *icb, handle_t *handle, struct inode *inode, ext4_fsblk_t iblock, 2375 unsigned long max_blocks, struct buffer_head *bh_result, 2376 int create, int flags) 2377 { 2378 struct ext4_ext_path *path = NULL; 2379 struct ext4_extent newex, *ex; 2380 int goal, err = 0, depth; 2381 unsigned long allocated = 0; 2382 ext4_fsblk_t next, newblock; 2383 2384 clear_buffer_new(bh_result); 2385 /*mutex_lock(&ext4_I(inode)->truncate_mutex);*/ 2386 2387 /* find extent for this block */ 2388 path = ext4_find_extent(inode, iblock, NULL, 0); 2389 if (IS_ERR(path)) { 2390 err = PTR_ERR(path); 2391 path = NULL; 2392 goto out2; 2393 } 2394 2395 depth = ext_depth(inode); 2396 2397 /* 2398 * consistent leaf must not be empty 2399 * this situations is possible, though, _during_ tree modification 2400 * this is why assert can't be put in ext4_ext_find_extent() 2401 */ 2402 BUG_ON(path[depth].p_ext == NULL && depth != 0); 2403 2404 if ((ex = path[depth].p_ext)) { 2405 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 2406 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 2407 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2408 /* if found exent covers block, simple return it */ 2409 if (iblock >= ee_block && iblock < ee_block + ee_len) { 2410 2411 /* number of remain blocks in the extent */ 2412 allocated = ee_len + ee_block - iblock; 2413 2414 if (ext4_ext_is_unwritten(ex)) { 2415 if (create) { 2416 newblock = iblock - ee_block + ee_start; 2417 err = ext4_ext_convert_to_initialized ( 2418 icb, handle, 2419 inode, 2420 &path, 2421 iblock, 2422 allocated, 2423 flags); 2424 if (err) 2425 goto out2; 2426 2427 } else { 2428 newblock = 0; 2429 } 2430 } else { 2431 newblock = iblock - ee_block + ee_start; 2432 } 2433 goto out; 2434 } 2435 } 2436 2437 /* 2438 * requested block isn't allocated yet 2439 * we couldn't try to create block if create flag is zero 2440 */ 2441 if (!create) { 2442 goto out2; 2443 } 2444 2445 /* find next allocated block so that we know how many 2446 * blocks we can allocate without ovelapping next extent */ 2447 next = ext4_ext_next_allocated_block(path); 2448 BUG_ON(next <= iblock); 2449 allocated = next - iblock; 2450 if (flags & EXT4_GET_BLOCKS_PRE_IO && max_blocks > EXT_UNWRITTEN_MAX_LEN) 2451 max_blocks = EXT_UNWRITTEN_MAX_LEN; 2452 if (allocated > max_blocks) 2453 allocated = max_blocks; 2454 2455 /* allocate new block */ 2456 goal = ext4_ext_find_goal(inode, path, iblock); 2457 2458 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, 0, 2459 &allocated, &err); 2460 if (!newblock) 2461 goto out2; 2462 2463 /* try to insert new extent into found leaf and return */ 2464 newex.ee_block = cpu_to_le32(iblock); 2465 ext4_ext_store_pblock(&newex, newblock); 2466 newex.ee_len = cpu_to_le16(allocated); 2467 /* if it's fallocate, mark ex as unwritten */ 2468 if (flags & EXT4_GET_BLOCKS_PRE_IO) { 2469 ext4_ext_mark_unwritten(&newex); 2470 } 2471 err = ext4_ext_insert_extent(icb, handle, inode, &path, &newex, 2472 flags & EXT4_GET_BLOCKS_PRE_IO); 2473 2474 if (err) { 2475 /* free data blocks we just allocated */ 2476 ext4_free_blocks(icb, handle, inode, NULL, ext4_ext_pblock(&newex), 2477 le16_to_cpu(newex.ee_len), get_default_free_blocks_flags(inode)); 2478 goto out2; 2479 } 2480 2481 ext4_mark_inode_dirty(icb, handle, inode); 2482 2483 /* previous routine could use block we allocated */ 2484 if (ext4_ext_is_unwritten(&newex)) 2485 newblock = 0; 2486 else 2487 newblock = ext4_ext_pblock(&newex); 2488 2489 set_buffer_new(bh_result); 2490 2491 out: 2492 if (allocated > max_blocks) 2493 allocated = max_blocks; 2494 2495 ext4_ext_show_leaf(inode, path); 2496 set_buffer_mapped(bh_result); 2497 bh_result->b_bdev = inode->i_sb->s_bdev; 2498 bh_result->b_blocknr = newblock; 2499 out2: 2500 if (path) { 2501 ext4_ext_drop_refs(path); 2502 kfree(path); 2503 } 2504 /*mutex_unlock(&ext4_I(inode)->truncate_mutex);*/ 2505 2506 return err ? err : allocated; 2507 } 2508 2509 int ext4_ext_truncate(void *icb, struct inode *inode, unsigned long start) 2510 { 2511 int ret = ext4_ext_remove_space(icb, inode, start); 2512 2513 /* Save modifications on i_blocks field of the inode. */ 2514 if (!ret) 2515 ret = ext4_mark_inode_dirty(icb, NULL, inode); 2516 2517 return ret; 2518 } 2519 2520 #ifdef _MSC_VER 2521 #pragma warning(pop) 2522 #endif 2523 2524