1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License version 2 as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public Licens 12 * along with this program; if not, write to the Free Software 13 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 14 */ 15 16 #include "ext2fs.h" 17 #ifdef __REACTOS__ 18 #include "linux/ext4.h" 19 #else 20 #include "linux\ext4.h" 21 #endif 22 23 #if defined(_MSC_VER) && defined(__REACTOS__) 24 #pragma warning(push) 25 #pragma warning(disable: 4018) 26 #pragma warning(disable: 4242) 27 #pragma warning(disable: 4244) 28 #endif 29 30 31 /* 32 * used by extent splitting. 33 */ 34 #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 35 due to ENOSPC */ 36 #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ 37 #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ 38 39 #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ 40 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ 41 42 #define CONFIG_EXTENT_TEST 43 #ifdef CONFIG_EXTENT_TEST 44 45 #define ext4_mark_inode_dirty(icb, handle, n) ext3_mark_inode_dirty(icb, n) 46 static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) 47 { 48 PEXT2_VCB Vcb; 49 Vcb = inode->i_sb->s_priv; 50 return (inode->i_ino - 1) / BLOCKS_PER_GROUP; 51 } 52 53 static ext4_fsblk_t ext4_new_meta_blocks(void *icb, handle_t *handle, struct inode *inode, 54 ext4_fsblk_t goal, 55 unsigned int flags, 56 unsigned long *count, int *errp) 57 { 58 NTSTATUS status; 59 ULONG blockcnt = (count)?*count:1; 60 ULONG block = 0; 61 62 status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb, 63 inode->i_sb->s_priv, 64 0, goal, 65 &block, 66 &blockcnt); 67 if (count) 68 *count = blockcnt; 69 70 if (!NT_SUCCESS(status)) { 71 *errp = Ext2LinuxError(status); 72 return 0; 73 } 74 inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9)); 75 return block; 76 } 77 78 static void ext4_free_blocks(void *icb, handle_t *handle, struct inode *inode, void *fake, 79 ext4_fsblk_t block, int count, int flags) 80 { 81 Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, block, count); 82 inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9); 83 return; 84 } 85 86 static inline void ext_debug(char *str, ...) 87 { 88 } 89 #if TRUE 90 #ifdef __REACTOS__ 91 #define EXT4_ERROR_INODE(inode, str, ...) do { \ 92 DbgPrint("inode[%p]: " str "\n", inode, ##__VA_ARGS__); \ 93 } while(0) 94 #else 95 #define EXT4_ERROR_INODE(inode, str, ...) do { \ 96 DbgPrint("inode[%p]: "##str "\n", inode, __VA_ARGS__); \ 97 } while(0) 98 #endif 99 #else 100 #define EXT4_ERROR_INODE 101 #endif 102 103 #define ext4_std_error(s, err) 104 #define assert ASSERT 105 106 #endif 107 108 /* 109 * Return the right sibling of a tree node(either leaf or indexes node) 110 */ 111 112 #define EXT_MAX_BLOCKS 0xffffffff 113 114 115 static inline int ext4_ext_space_block(struct inode *inode, int check) 116 { 117 int size; 118 119 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 120 / sizeof(struct ext4_extent); 121 #ifdef AGGRESSIVE_TEST 122 if (!check && size > 6) 123 size = 6; 124 #endif 125 return size; 126 } 127 128 static inline int ext4_ext_space_block_idx(struct inode *inode, int check) 129 { 130 int size; 131 132 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 133 / sizeof(struct ext4_extent_idx); 134 #ifdef AGGRESSIVE_TEST 135 if (!check && size > 5) 136 size = 5; 137 #endif 138 return size; 139 } 140 141 static inline int ext4_ext_space_root(struct inode *inode, int check) 142 { 143 int size; 144 145 size = sizeof(EXT4_I(inode)->i_block); 146 size -= sizeof(struct ext4_extent_header); 147 size /= sizeof(struct ext4_extent); 148 #ifdef AGGRESSIVE_TEST 149 if (!check && size > 3) 150 size = 3; 151 #endif 152 return size; 153 } 154 155 static inline int ext4_ext_space_root_idx(struct inode *inode, int check) 156 { 157 int size; 158 159 size = sizeof(EXT4_I(inode)->i_block); 160 size -= sizeof(struct ext4_extent_header); 161 size /= sizeof(struct ext4_extent_idx); 162 #ifdef AGGRESSIVE_TEST 163 if (!check && size > 4) 164 size = 4; 165 #endif 166 return size; 167 } 168 169 static int 170 ext4_ext_max_entries(struct inode *inode, int depth) 171 { 172 int max; 173 174 if (depth == ext_depth(inode)) { 175 if (depth == 0) 176 max = ext4_ext_space_root(inode, 1); 177 else 178 max = ext4_ext_space_root_idx(inode, 1); 179 } else { 180 if (depth == 0) 181 max = ext4_ext_space_block(inode, 1); 182 else 183 max = ext4_ext_space_block_idx(inode, 1); 184 } 185 186 return max; 187 } 188 189 static int __ext4_ext_check(const char *function, unsigned int line, 190 struct inode *inode, 191 struct ext4_extent_header *eh, int depth, 192 ext4_fsblk_t pblk); 193 194 /* 195 * read_extent_tree_block: 196 * Get a buffer_head by extents_bread, and read fresh data from the storage. 197 */ 198 static struct buffer_head * 199 __read_extent_tree_block(const char *function, unsigned int line, 200 struct inode *inode, ext4_fsblk_t pblk, int depth, 201 int flags) 202 { 203 struct buffer_head *bh; 204 int err; 205 206 bh = extents_bread(inode->i_sb, pblk); 207 if (!bh) 208 return ERR_PTR(-ENOMEM); 209 210 if (!buffer_uptodate(bh)) { 211 err = -EIO; 212 goto errout; 213 } 214 if (buffer_verified(bh)) 215 return bh; 216 err = __ext4_ext_check(function, line, inode, 217 ext_block_hdr(bh), depth, pblk); 218 if (err) 219 goto errout; 220 set_buffer_verified(bh); 221 return bh; 222 errout: 223 extents_brelse(bh); 224 return ERR_PTR(err); 225 226 } 227 228 #define read_extent_tree_block(inode, pblk, depth, flags) \ 229 __read_extent_tree_block("", __LINE__, (inode), (pblk), \ 230 (depth), (flags)) 231 232 #define ext4_ext_check(inode, eh, depth, pblk) \ 233 __ext4_ext_check("", __LINE__, (inode), (eh), (depth), (pblk)) 234 235 int ext4_ext_check_inode(struct inode *inode) 236 { 237 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); 238 } 239 240 static uint32_t ext4_ext_block_csum(struct inode *inode, 241 struct ext4_extent_header *eh) 242 { 243 /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/ 244 return 0; 245 } 246 247 static void ext4_extent_block_csum_set(struct inode *inode, 248 struct ext4_extent_header *eh) 249 { 250 struct ext4_extent_tail *tail; 251 252 tail = find_ext4_extent_tail(eh); 253 tail->et_checksum = ext4_ext_block_csum( 254 inode, eh); 255 } 256 257 static int ext4_split_extent_at(void *icb, 258 handle_t *handle, 259 struct inode *inode, 260 struct ext4_ext_path **ppath, 261 ext4_lblk_t split, 262 int split_flag, 263 int flags); 264 265 static inline int 266 ext4_force_split_extent_at(void *icb, handle_t *handle, struct inode *inode, 267 struct ext4_ext_path **ppath, ext4_lblk_t lblk, 268 int nofail) 269 { 270 struct ext4_ext_path *path = *ppath; 271 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); 272 273 return ext4_split_extent_at(icb, handle, inode, ppath, lblk, unwritten ? 274 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, 275 EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | 276 (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); 277 } 278 279 /* 280 * could return: 281 * - EROFS 282 * - ENOMEM 283 */ 284 285 static int ext4_ext_get_access(void *icb, handle_t *handle, struct inode *inode, 286 struct ext4_ext_path *path) 287 { 288 if (path->p_bh) { 289 /* path points to block */ 290 291 return ext4_journal_get_write_access(icb, handle, path->p_bh); 292 293 } 294 /* path points to leaf/index in inode body */ 295 /* we use in-core data, no need to protect them */ 296 return 0; 297 } 298 299 300 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, 301 struct ext4_ext_path *path, 302 ext4_lblk_t block) 303 { 304 if (path) { 305 int depth = path->p_depth; 306 struct ext4_extent *ex; 307 308 /* 309 * Try to predict block placement assuming that we are 310 * filling in a file which will eventually be 311 * non-sparse --- i.e., in the case of libbfd writing 312 * an ELF object sections out-of-order but in a way 313 * the eventually results in a contiguous object or 314 * executable file, or some database extending a table 315 * space file. However, this is actually somewhat 316 * non-ideal if we are writing a sparse file such as 317 * qemu or KVM writing a raw image file that is going 318 * to stay fairly sparse, since it will end up 319 * fragmenting the file system's free space. Maybe we 320 * should have some hueristics or some way to allow 321 * userspace to pass a hint to file system, 322 * especially if the latter case turns out to be 323 * common. 324 */ 325 ex = path[depth].p_ext; 326 if (ex) { 327 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); 328 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); 329 330 if (block > ext_block) 331 return ext_pblk + (block - ext_block); 332 else 333 return ext_pblk - (ext_block - block); 334 } 335 336 /* it looks like index is empty; 337 * try to find starting block from index itself */ 338 if (path[depth].p_bh) 339 return path[depth].p_bh->b_blocknr; 340 } 341 342 /* OK. use inode's group */ 343 return ext4_inode_to_goal_block(inode); 344 } 345 346 /* 347 * Allocation for a meta data block 348 */ 349 static ext4_fsblk_t 350 ext4_ext_new_meta_block(void *icb, handle_t *handle, struct inode *inode, 351 struct ext4_ext_path *path, 352 struct ext4_extent *ex, int *err, unsigned int flags) 353 { 354 ext4_fsblk_t goal, newblock; 355 356 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 357 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, 358 NULL, err); 359 return newblock; 360 } 361 362 int __ext4_ext_dirty(const char *where, unsigned int line, 363 void *icb, handle_t *handle, 364 struct inode *inode, 365 struct ext4_ext_path *path) 366 { 367 int err; 368 369 if (path->p_bh) { 370 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); 371 /* path points to block */ 372 err = __ext4_handle_dirty_metadata(where, line, icb, handle, inode, path->p_bh); 373 } else { 374 /* path points to leaf/index in inode body */ 375 err = ext4_mark_inode_dirty(icb, handle, inode); 376 } 377 return err; 378 } 379 380 void ext4_ext_drop_refs(struct ext4_ext_path *path) 381 { 382 int depth, i; 383 384 if (!path) 385 return; 386 depth = path->p_depth; 387 for (i = 0; i <= depth; i++, path++) 388 if (path->p_bh) { 389 extents_brelse(path->p_bh); 390 path->p_bh = NULL; 391 } 392 } 393 394 /* 395 * Check that whether the basic information inside the extent header 396 * is correct or not. 397 */ 398 static int __ext4_ext_check(const char *function, unsigned int line, 399 struct inode *inode, 400 struct ext4_extent_header *eh, int depth, 401 ext4_fsblk_t pblk) 402 { 403 struct ext4_extent_tail *tail; 404 const char *error_msg; 405 #ifndef __REACTOS__ 406 int max = 0; 407 #endif 408 409 if (eh->eh_magic != EXT4_EXT_MAGIC) { 410 error_msg = "invalid magic"; 411 goto corrupted; 412 } 413 if (le16_to_cpu(eh->eh_depth) != depth) { 414 error_msg = "unexpected eh_depth"; 415 goto corrupted; 416 } 417 if (eh->eh_max == 0) { 418 error_msg = "invalid eh_max"; 419 goto corrupted; 420 } 421 if (eh->eh_entries > eh->eh_max) { 422 error_msg = "invalid eh_entries"; 423 goto corrupted; 424 } 425 426 tail = find_ext4_extent_tail(eh); 427 if (tail->et_checksum != ext4_ext_block_csum(inode, eh)) { 428 ext_debug("Warning: extent checksum damaged? tail->et_checksum = " 429 "%lu, ext4_ext_block_csum = %lu\n", 430 tail->et_checksum, ext4_ext_block_csum(inode, eh)); 431 } 432 433 return 0; 434 435 corrupted: 436 ext_debug("corrupted! %s\n", error_msg); 437 return -EIO; 438 } 439 440 /* 441 * ext4_ext_binsearch_idx: 442 * binary search for the closest index of the given block 443 * the header must be checked before calling this 444 */ 445 static void 446 ext4_ext_binsearch_idx(struct inode *inode, 447 struct ext4_ext_path *path, ext4_lblk_t block) 448 { 449 struct ext4_extent_header *eh = path->p_hdr; 450 struct ext4_extent_idx *r, *l, *m; 451 452 ext_debug("binsearch for %u(idx): ", block); 453 454 l = EXT_FIRST_INDEX(eh) + 1; 455 r = EXT_LAST_INDEX(eh); 456 while (l <= r) { 457 m = l + (r - l) / 2; 458 if (block < (m->ei_block)) 459 r = m - 1; 460 else 461 l = m + 1; 462 ext_debug("%p(%u):%p(%u):%p(%u) ", l, (l->ei_block), 463 m, (m->ei_block), 464 r, (r->ei_block)); 465 } 466 467 path->p_idx = l - 1; 468 ext_debug(" -> %u->%lld ", (path->p_idx->ei_block), 469 ext4_idx_pblock(path->p_idx)); 470 471 #ifdef CHECK_BINSEARCH 472 { 473 struct ext4_extent_idx *chix, *ix; 474 int k; 475 476 chix = ix = EXT_FIRST_INDEX(eh); 477 for (k = 0; k < (eh->eh_entries); k++, ix++) { 478 if (k != 0 && 479 (ix->ei_block) <= (ix[-1].ei_block)) { 480 printk(KERN_DEBUG "k=%d, ix=0x%p, " 481 "first=0x%p\n", k, 482 ix, EXT_FIRST_INDEX(eh)); 483 printk(KERN_DEBUG "%u <= %u\n", 484 (ix->ei_block), 485 (ix[-1].ei_block)); 486 } 487 BUG_ON(k && (ix->ei_block) 488 <= (ix[-1].ei_block)); 489 if (block < (ix->ei_block)) 490 break; 491 chix = ix; 492 } 493 BUG_ON(chix != path->p_idx); 494 } 495 #endif 496 497 } 498 499 /* 500 * ext4_ext_binsearch: 501 * binary search for closest extent of the given block 502 * the header must be checked before calling this 503 */ 504 static void 505 ext4_ext_binsearch(struct inode *inode, 506 struct ext4_ext_path *path, ext4_lblk_t block) 507 { 508 struct ext4_extent_header *eh = path->p_hdr; 509 struct ext4_extent *r, *l, *m; 510 511 if (eh->eh_entries == 0) { 512 /* 513 * this leaf is empty: 514 * we get such a leaf in split/add case 515 */ 516 return; 517 } 518 519 ext_debug("binsearch for %u: ", block); 520 521 l = EXT_FIRST_EXTENT(eh) + 1; 522 r = EXT_LAST_EXTENT(eh); 523 524 while (l <= r) { 525 m = l + (r - l) / 2; 526 if (block < m->ee_block) 527 r = m - 1; 528 else 529 l = m + 1; 530 ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, 531 m, (m->ee_block), 532 r, (r->ee_block)); 533 } 534 535 path->p_ext = l - 1; 536 ext_debug(" -> %d:%llu:[%d]%d ", 537 (path->p_ext->ee_block), 538 ext4_ext_pblock(path->p_ext), 539 ext4_ext_is_unwritten(path->p_ext), 540 ext4_ext_get_actual_len(path->p_ext)); 541 542 #ifdef CHECK_BINSEARCH 543 { 544 struct ext4_extent *chex, *ex; 545 int k; 546 547 chex = ex = EXT_FIRST_EXTENT(eh); 548 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { 549 BUG_ON(k && (ex->ee_block) 550 <= (ex[-1].ee_block)); 551 if (block < (ex->ee_block)) 552 break; 553 chex = ex; 554 } 555 BUG_ON(chex != path->p_ext); 556 } 557 #endif 558 559 } 560 561 #ifdef EXT_DEBUG 562 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 563 { 564 int k, l = path->p_depth; 565 566 ext_debug("path:"); 567 for (k = 0; k <= l; k++, path++) { 568 if (path->p_idx) { 569 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 570 ext4_idx_pblock(path->p_idx)); 571 } else if (path->p_ext) { 572 ext_debug(" %d:[%d]%d:%llu ", 573 le32_to_cpu(path->p_ext->ee_block), 574 ext4_ext_is_unwritten(path->p_ext), 575 ext4_ext_get_actual_len(path->p_ext), 576 ext4_ext_pblock(path->p_ext)); 577 } else 578 ext_debug(" []"); 579 } 580 ext_debug("\n"); 581 } 582 583 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) 584 { 585 int depth = ext_depth(inode); 586 struct ext4_extent_header *eh; 587 struct ext4_extent *ex; 588 int i; 589 590 if (!path) 591 return; 592 593 eh = path[depth].p_hdr; 594 ex = EXT_FIRST_EXTENT(eh); 595 596 ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); 597 598 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 599 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 600 ext4_ext_is_unwritten(ex), 601 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); 602 } 603 ext_debug("\n"); 604 } 605 606 static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, 607 ext4_fsblk_t newblock, int level) 608 { 609 int depth = ext_depth(inode); 610 struct ext4_extent *ex; 611 612 if (depth != level) { 613 struct ext4_extent_idx *idx; 614 idx = path[level].p_idx; 615 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { 616 ext_debug("%d: move %d:%llu in new index %llu\n", level, 617 le32_to_cpu(idx->ei_block), 618 ext4_idx_pblock(idx), 619 newblock); 620 idx++; 621 } 622 623 return; 624 } 625 626 ex = path[depth].p_ext; 627 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { 628 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 629 le32_to_cpu(ex->ee_block), 630 ext4_ext_pblock(ex), 631 ext4_ext_is_unwritten(ex), 632 ext4_ext_get_actual_len(ex), 633 newblock); 634 ex++; 635 } 636 } 637 638 #else 639 #define ext4_ext_show_path(inode, path) 640 #define ext4_ext_show_leaf(inode, path) 641 #define ext4_ext_show_move(inode, path, newblock, level) 642 #endif 643 644 struct ext4_ext_path * 645 ext4_find_extent(struct inode *inode, ext4_lblk_t block, 646 struct ext4_ext_path **orig_path, int flags) 647 { 648 struct ext4_extent_header *eh; 649 struct buffer_head *bh; 650 struct ext4_ext_path *path = orig_path ? *orig_path : NULL; 651 short int depth, i, ppos = 0; 652 int ret; 653 654 eh = ext_inode_hdr(inode); 655 depth = ext_depth(inode); 656 657 if (path) { 658 ext4_ext_drop_refs(path); 659 if (depth > path[0].p_maxdepth) { 660 kfree(path); 661 *orig_path = path = NULL; 662 } 663 } 664 if (!path) { 665 /* account possible depth increase */ 666 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 667 GFP_NOFS); 668 if (unlikely(!path)) 669 return ERR_PTR(-ENOMEM); 670 path[0].p_maxdepth = depth + 1; 671 } 672 path[0].p_hdr = eh; 673 path[0].p_bh = NULL; 674 675 i = depth; 676 /* walk through the tree */ 677 while (i) { 678 ext_debug("depth %d: num %d, max %d\n", 679 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 680 681 ext4_ext_binsearch_idx(inode, path + ppos, block); 682 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); 683 path[ppos].p_depth = i; 684 path[ppos].p_ext = NULL; 685 686 bh = read_extent_tree_block(inode, path[ppos].p_block, --i, 687 flags); 688 if (unlikely(IS_ERR(bh))) { 689 ret = PTR_ERR(bh); 690 goto err; 691 } 692 693 eh = ext_block_hdr(bh); 694 ppos++; 695 if (unlikely(ppos > depth)) { 696 extents_brelse(bh); 697 EXT4_ERROR_INODE(inode, 698 "ppos %d > depth %d", ppos, depth); 699 ret = -EIO; 700 goto err; 701 } 702 path[ppos].p_bh = bh; 703 path[ppos].p_hdr = eh; 704 } 705 706 path[ppos].p_depth = i; 707 path[ppos].p_ext = NULL; 708 path[ppos].p_idx = NULL; 709 710 /* find extent */ 711 ext4_ext_binsearch(inode, path + ppos, block); 712 /* if not an empty leaf */ 713 if (path[ppos].p_ext) 714 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); 715 716 ext4_ext_show_path(inode, path); 717 718 return path; 719 720 err: 721 ext4_ext_drop_refs(path); 722 if (path) { 723 kfree(path); 724 if (orig_path) 725 *orig_path = NULL; 726 } 727 return ERR_PTR(ret); 728 } 729 730 /* 731 * ext4_ext_insert_index: 732 * insert new index [@logical;@ptr] into the block at @curp; 733 * check where to insert: before @curp or after @curp 734 */ 735 static int ext4_ext_insert_index(void *icb, handle_t *handle, struct inode *inode, 736 struct ext4_ext_path *curp, 737 int logical, ext4_fsblk_t ptr) 738 { 739 struct ext4_extent_idx *ix; 740 int len, err; 741 742 err = ext4_ext_get_access(icb, handle, inode, curp); 743 if (err) 744 return err; 745 746 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { 747 EXT4_ERROR_INODE(inode, 748 "logical %d == ei_block %d!", 749 logical, le32_to_cpu(curp->p_idx->ei_block)); 750 return -EIO; 751 } 752 753 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) 754 >= le16_to_cpu(curp->p_hdr->eh_max))) { 755 EXT4_ERROR_INODE(inode, 756 "eh_entries %d >= eh_max %d!", 757 le16_to_cpu(curp->p_hdr->eh_entries), 758 le16_to_cpu(curp->p_hdr->eh_max)); 759 return -EIO; 760 } 761 762 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 763 /* insert after */ 764 ext_debug("insert new index %d after: %llu\n", logical, ptr); 765 ix = curp->p_idx + 1; 766 } else { 767 /* insert before */ 768 ext_debug("insert new index %d before: %llu\n", logical, ptr); 769 ix = curp->p_idx; 770 } 771 772 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; 773 BUG_ON(len < 0); 774 if (len > 0) { 775 ext_debug("insert new index %d: " 776 "move %d indices from 0x%p to 0x%p\n", 777 logical, len, ix, ix + 1); 778 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); 779 } 780 781 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { 782 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); 783 return -EIO; 784 } 785 786 ix->ei_block = cpu_to_le32(logical); 787 ext4_idx_store_pblock(ix, ptr); 788 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 789 790 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { 791 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); 792 return -EIO; 793 } 794 795 err = ext4_ext_dirty(icb, handle, inode, curp); 796 ext4_std_error(inode->i_sb, err); 797 798 return err; 799 } 800 801 /* 802 * ext4_ext_split: 803 * inserts new subtree into the path, using free index entry 804 * at depth @at: 805 * - allocates all needed blocks (new leaf and all intermediate index blocks) 806 * - makes decision where to split 807 * - moves remaining extents and index entries (right to the split point) 808 * into the newly allocated blocks 809 * - initializes subtree 810 */ 811 static int ext4_ext_split(void *icb, handle_t *handle, struct inode *inode, 812 unsigned int flags, 813 struct ext4_ext_path *path, 814 struct ext4_extent *newext, int at) 815 { 816 struct buffer_head *bh = NULL; 817 int depth = ext_depth(inode); 818 struct ext4_extent_header *neh; 819 struct ext4_extent_idx *fidx; 820 int i = at, k, m, a; 821 ext4_fsblk_t newblock, oldblock; 822 __le32 border; 823 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ 824 int err = 0; 825 826 /* make decision: where to split? */ 827 /* FIXME: now decision is simplest: at current extent */ 828 829 /* if current leaf will be split, then we should use 830 * border from split point */ 831 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { 832 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); 833 return -EIO; 834 } 835 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 836 border = path[depth].p_ext[1].ee_block; 837 ext_debug("leaf will be split." 838 " next leaf starts at %d\n", 839 le32_to_cpu(border)); 840 } else { 841 border = newext->ee_block; 842 ext_debug("leaf will be added." 843 " next leaf starts at %d\n", 844 le32_to_cpu(border)); 845 } 846 847 /* 848 * If error occurs, then we break processing 849 * and mark filesystem read-only. index won't 850 * be inserted and tree will be in consistent 851 * state. Next mount will repair buffers too. 852 */ 853 854 /* 855 * Get array to track all allocated blocks. 856 * We need this to handle errors and free blocks 857 * upon them. 858 */ 859 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); 860 if (!ablocks) 861 return -ENOMEM; 862 863 /* allocate all needed blocks */ 864 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 865 for (a = 0; a < depth - at; a++) { 866 newblock = ext4_ext_new_meta_block(icb, handle, inode, path, 867 newext, &err, flags); 868 if (newblock == 0) 869 goto cleanup; 870 ablocks[a] = newblock; 871 } 872 873 /* initialize new leaf */ 874 newblock = ablocks[--a]; 875 if (unlikely(newblock == 0)) { 876 EXT4_ERROR_INODE(inode, "newblock == 0!"); 877 err = -EIO; 878 goto cleanup; 879 } 880 bh = extents_bwrite(inode->i_sb, newblock); 881 if (unlikely(!bh)) { 882 err = -ENOMEM; 883 goto cleanup; 884 } 885 886 err = ext4_journal_get_create_access(icb, handle, bh); 887 if (err) 888 goto cleanup; 889 890 neh = ext_block_hdr(bh); 891 neh->eh_entries = 0; 892 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 893 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 894 neh->eh_depth = 0; 895 896 /* move remainder of path[depth] to the new leaf */ 897 if (unlikely(path[depth].p_hdr->eh_entries != 898 path[depth].p_hdr->eh_max)) { 899 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", 900 path[depth].p_hdr->eh_entries, 901 path[depth].p_hdr->eh_max); 902 err = -EIO; 903 goto cleanup; 904 } 905 /* start copy from next extent */ 906 m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; 907 ext4_ext_show_move(inode, path, newblock, depth); 908 if (m) { 909 struct ext4_extent *ex; 910 ex = EXT_FIRST_EXTENT(neh); 911 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); 912 le16_add_cpu(&neh->eh_entries, m); 913 } 914 915 ext4_extent_block_csum_set(inode, neh); 916 set_buffer_uptodate(bh); 917 918 err = ext4_handle_dirty_metadata(icb, handle, inode, bh); 919 if (err) 920 goto cleanup; 921 extents_brelse(bh); 922 bh = NULL; 923 924 /* correct old leaf */ 925 if (m) { 926 err = ext4_ext_get_access(icb, handle, inode, path + depth); 927 if (err) 928 goto cleanup; 929 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); 930 err = ext4_ext_dirty(icb, handle, inode, path + depth); 931 if (err) 932 goto cleanup; 933 934 } 935 936 /* create intermediate indexes */ 937 k = depth - at - 1; 938 if (unlikely(k < 0)) { 939 EXT4_ERROR_INODE(inode, "k %d < 0!", k); 940 err = -EIO; 941 goto cleanup; 942 } 943 if (k) 944 ext_debug("create %d intermediate indices\n", k); 945 /* insert new index into current index block */ 946 /* current depth stored in i var */ 947 i = depth - 1; 948 while (k--) { 949 oldblock = newblock; 950 newblock = ablocks[--a]; 951 bh = extents_bwrite(inode->i_sb, newblock); 952 if (unlikely(!bh)) { 953 err = -ENOMEM; 954 goto cleanup; 955 } 956 957 err = ext4_journal_get_create_access(icb, handle, bh); 958 if (err) 959 goto cleanup; 960 961 neh = ext_block_hdr(bh); 962 neh->eh_entries = cpu_to_le16(1); 963 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 964 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 965 neh->eh_depth = cpu_to_le16(depth - i); 966 fidx = EXT_FIRST_INDEX(neh); 967 fidx->ei_block = border; 968 ext4_idx_store_pblock(fidx, oldblock); 969 970 ext_debug("int.index at %d (block %llu): %u -> %llu\n", 971 i, newblock, le32_to_cpu(border), oldblock); 972 973 /* move remainder of path[i] to the new index block */ 974 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != 975 EXT_LAST_INDEX(path[i].p_hdr))) { 976 EXT4_ERROR_INODE(inode, 977 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", 978 le32_to_cpu(path[i].p_ext->ee_block)); 979 err = -EIO; 980 goto cleanup; 981 } 982 /* start copy indexes */ 983 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; 984 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 985 EXT_MAX_INDEX(path[i].p_hdr)); 986 ext4_ext_show_move(inode, path, newblock, i); 987 if (m) { 988 memmove(++fidx, path[i].p_idx, 989 sizeof(struct ext4_extent_idx) * m); 990 le16_add_cpu(&neh->eh_entries, m); 991 } 992 ext4_extent_block_csum_set(inode, neh); 993 set_buffer_uptodate(bh); 994 995 err = ext4_handle_dirty_metadata(icb, handle, inode, bh); 996 if (err) 997 goto cleanup; 998 extents_brelse(bh); 999 bh = NULL; 1000 1001 /* correct old index */ 1002 if (m) { 1003 err = ext4_ext_get_access(icb, handle, inode, path + i); 1004 if (err) 1005 goto cleanup; 1006 le16_add_cpu(&path[i].p_hdr->eh_entries, -m); 1007 err = ext4_ext_dirty(icb, handle, inode, path + i); 1008 if (err) 1009 goto cleanup; 1010 } 1011 1012 i--; 1013 } 1014 1015 /* insert new index */ 1016 err = ext4_ext_insert_index(icb, handle, inode, path + at, 1017 le32_to_cpu(border), newblock); 1018 1019 cleanup: 1020 if (bh) 1021 extents_brelse(bh); 1022 1023 if (err) { 1024 /* free all allocated blocks in error case */ 1025 for (i = 0; i < depth; i++) { 1026 if (!ablocks[i]) 1027 continue; 1028 ext4_free_blocks(icb, handle, inode, NULL, ablocks[i], 1, 1029 EXT4_FREE_BLOCKS_METADATA); 1030 } 1031 } 1032 kfree(ablocks); 1033 1034 return err; 1035 } 1036 1037 /* 1038 * ext4_ext_grow_indepth: 1039 * implements tree growing procedure: 1040 * - allocates new block 1041 * - moves top-level data (index block or leaf) into the new block 1042 * - initializes new top-level, creating index that points to the 1043 * just created block 1044 */ 1045 static int ext4_ext_grow_indepth(void *icb, handle_t *handle, struct inode *inode, 1046 unsigned int flags) 1047 { 1048 struct ext4_extent_header *neh; 1049 struct buffer_head *bh; 1050 ext4_fsblk_t newblock, goal = 0; 1051 int err = 0; 1052 1053 /* Try to prepend new index to old one */ 1054 if (ext_depth(inode)) 1055 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); 1056 goal = ext4_inode_to_goal_block(inode); 1057 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, 1058 NULL, &err); 1059 if (newblock == 0) 1060 return err; 1061 1062 bh = extents_bwrite(inode->i_sb, newblock); 1063 if (!bh) 1064 return -ENOMEM; 1065 1066 err = ext4_journal_get_create_access(icb, handle, bh); 1067 if (err) 1068 goto out; 1069 1070 /* move top-level index/leaf into new block */ 1071 memmove(bh->b_data, EXT4_I(inode)->i_block, 1072 sizeof(EXT4_I(inode)->i_block)); 1073 1074 /* set size of new block */ 1075 neh = ext_block_hdr(bh); 1076 /* old root could have indexes or leaves 1077 * so calculate e_max right way */ 1078 if (ext_depth(inode)) 1079 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 1080 else 1081 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 1082 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 1083 ext4_extent_block_csum_set(inode, neh); 1084 set_buffer_uptodate(bh); 1085 1086 err = ext4_handle_dirty_metadata(icb, handle, inode, bh); 1087 if (err) 1088 goto out; 1089 1090 /* Update top-level index: num,max,pointer */ 1091 neh = ext_inode_hdr(inode); 1092 neh->eh_entries = cpu_to_le16(1); 1093 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock); 1094 if (neh->eh_depth == 0) { 1095 /* Root extent block becomes index block */ 1096 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); 1097 EXT_FIRST_INDEX(neh)->ei_block = 1098 EXT_FIRST_EXTENT(neh)->ee_block; 1099 } 1100 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1101 (neh->eh_entries), (neh->eh_max), 1102 (EXT_FIRST_INDEX(neh)->ei_block), 1103 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1104 1105 le16_add_cpu(&neh->eh_depth, 1); 1106 ext4_mark_inode_dirty(icb, handle, inode); 1107 out: 1108 extents_brelse(bh); 1109 1110 return err; 1111 } 1112 1113 /* 1114 * ext4_ext_create_new_leaf: 1115 * finds empty index and adds new leaf. 1116 * if no free index is found, then it requests in-depth growing. 1117 */ 1118 static int ext4_ext_create_new_leaf(void *icb, handle_t *handle, struct inode *inode, 1119 unsigned int mb_flags, 1120 unsigned int gb_flags, 1121 struct ext4_ext_path **ppath, 1122 struct ext4_extent *newext) 1123 { 1124 struct ext4_ext_path *path = *ppath; 1125 struct ext4_ext_path *curp; 1126 int depth, i, err = 0; 1127 1128 repeat: 1129 i = depth = ext_depth(inode); 1130 1131 /* walk up to the tree and look for free index entry */ 1132 curp = path + depth; 1133 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { 1134 i--; 1135 curp--; 1136 } 1137 1138 /* we use already allocated block for index block, 1139 * so subsequent data blocks should be contiguous */ 1140 if (EXT_HAS_FREE_INDEX(curp)) { 1141 /* if we found index with free entry, then use that 1142 * entry: create all needed subtree and add new leaf */ 1143 err = ext4_ext_split(icb, handle, inode, mb_flags, path, newext, i); 1144 if (err) 1145 goto out; 1146 1147 /* refill path */ 1148 path = ext4_find_extent(inode, 1149 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1150 ppath, gb_flags); 1151 if (IS_ERR(path)) 1152 err = PTR_ERR(path); 1153 } else { 1154 /* tree is full, time to grow in depth */ 1155 err = ext4_ext_grow_indepth(icb, handle, inode, mb_flags); 1156 if (err) 1157 goto out; 1158 1159 /* refill path */ 1160 path = ext4_find_extent(inode, 1161 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1162 ppath, gb_flags); 1163 if (IS_ERR(path)) { 1164 err = PTR_ERR(path); 1165 goto out; 1166 } 1167 1168 /* 1169 * only first (depth 0 -> 1) produces free space; 1170 * in all other cases we have to split the grown tree 1171 */ 1172 depth = ext_depth(inode); 1173 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 1174 /* now we need to split */ 1175 goto repeat; 1176 } 1177 } 1178 1179 out: 1180 return err; 1181 } 1182 1183 /* 1184 * search the closest allocated block to the left for *logical 1185 * and returns it at @logical + it's physical address at @phys 1186 * if *logical is the smallest allocated block, the function 1187 * returns 0 at @phys 1188 * return value contains 0 (success) or error code 1189 */ 1190 static int ext4_ext_search_left(struct inode *inode, 1191 struct ext4_ext_path *path, 1192 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1193 { 1194 struct ext4_extent_idx *ix; 1195 struct ext4_extent *ex; 1196 int depth, ee_len; 1197 1198 if (unlikely(path == NULL)) { 1199 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1200 return -EIO; 1201 } 1202 depth = path->p_depth; 1203 *phys = 0; 1204 1205 if (depth == 0 && path->p_ext == NULL) 1206 return 0; 1207 1208 /* usually extent in the path covers blocks smaller 1209 * then *logical, but it can be that extent is the 1210 * first one in the file */ 1211 1212 ex = path[depth].p_ext; 1213 ee_len = ext4_ext_get_actual_len(ex); 1214 if (*logical < le32_to_cpu(ex->ee_block)) { 1215 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1216 EXT4_ERROR_INODE(inode, 1217 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", 1218 *logical, le32_to_cpu(ex->ee_block)); 1219 return -EIO; 1220 } 1221 while (--depth >= 0) { 1222 ix = path[depth].p_idx; 1223 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1224 EXT4_ERROR_INODE(inode, 1225 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1226 ix != NULL ? le32_to_cpu(ix->ei_block) : 0, 1227 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1228 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, 1229 depth); 1230 return -EIO; 1231 } 1232 } 1233 return 0; 1234 } 1235 1236 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1237 EXT4_ERROR_INODE(inode, 1238 "logical %d < ee_block %d + ee_len %d!", 1239 *logical, le32_to_cpu(ex->ee_block), ee_len); 1240 return -EIO; 1241 } 1242 1243 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1244 *phys = ext4_ext_pblock(ex) + ee_len - 1; 1245 return 0; 1246 } 1247 1248 /* 1249 * search the closest allocated block to the right for *logical 1250 * and returns it at @logical + it's physical address at @phys 1251 * if *logical is the largest allocated block, the function 1252 * returns 0 at @phys 1253 * return value contains 0 (success) or error code 1254 */ 1255 static int ext4_ext_search_right(struct inode *inode, 1256 struct ext4_ext_path *path, 1257 ext4_lblk_t *logical, ext4_fsblk_t *phys, 1258 struct ext4_extent **ret_ex) 1259 { 1260 struct buffer_head *bh = NULL; 1261 struct ext4_extent_header *eh; 1262 struct ext4_extent_idx *ix; 1263 struct ext4_extent *ex; 1264 ext4_fsblk_t block; 1265 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1266 int ee_len; 1267 1268 if ((path == NULL)) { 1269 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1270 return -EIO; 1271 } 1272 depth = path->p_depth; 1273 *phys = 0; 1274 1275 if (depth == 0 && path->p_ext == NULL) 1276 return 0; 1277 1278 /* usually extent in the path covers blocks smaller 1279 * then *logical, but it can be that extent is the 1280 * first one in the file */ 1281 1282 ex = path[depth].p_ext; 1283 ee_len = ext4_ext_get_actual_len(ex); 1284 /*if (*logical < le32_to_cpu(ex->ee_block)) {*/ 1285 if (*logical < (ex->ee_block)) { 1286 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1287 EXT4_ERROR_INODE(inode, 1288 "first_extent(path[%d].p_hdr) != ex", 1289 depth); 1290 return -EIO; 1291 } 1292 while (--depth >= 0) { 1293 ix = path[depth].p_idx; 1294 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1295 EXT4_ERROR_INODE(inode, 1296 "ix != EXT_FIRST_INDEX *logical %d!", 1297 *logical); 1298 return -EIO; 1299 } 1300 } 1301 goto found_extent; 1302 } 1303 1304 /*if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {*/ 1305 if (unlikely(*logical < ((ex->ee_block) + ee_len))) { 1306 EXT4_ERROR_INODE(inode, 1307 "logical %d < ee_block %d + ee_len %d!", 1308 /**logical, le32_to_cpu(ex->ee_block), ee_len);*/ 1309 *logical, (ex->ee_block), ee_len); 1310 return -EIO; 1311 } 1312 1313 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1314 /* next allocated block in this leaf */ 1315 ex++; 1316 goto found_extent; 1317 } 1318 1319 /* go up and search for index to the right */ 1320 while (--depth >= 0) { 1321 ix = path[depth].p_idx; 1322 if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) 1323 goto got_index; 1324 } 1325 1326 /* we've gone up to the root and found no index to the right */ 1327 return 0; 1328 1329 got_index: 1330 /* we've found index to the right, let's 1331 * follow it and find the closest allocated 1332 * block to the right */ 1333 ix++; 1334 block = ext4_idx_pblock(ix); 1335 while (++depth < path->p_depth) { 1336 /* subtract from p_depth to get proper eh_depth */ 1337 bh = read_extent_tree_block(inode, block, 1338 path->p_depth - depth, 0); 1339 if (IS_ERR(bh)) 1340 return PTR_ERR(bh); 1341 eh = ext_block_hdr(bh); 1342 ix = EXT_FIRST_INDEX(eh); 1343 block = ext4_idx_pblock(ix); 1344 extents_brelse(bh); 1345 } 1346 1347 bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); 1348 if (IS_ERR(bh)) 1349 return PTR_ERR(bh); 1350 eh = ext_block_hdr(bh); 1351 ex = EXT_FIRST_EXTENT(eh); 1352 found_extent: 1353 /**logical = le32_to_cpu(ex->ee_block);*/ 1354 *logical = (ex->ee_block); 1355 *phys = ext4_ext_pblock(ex); 1356 *ret_ex = ex; 1357 if (bh) 1358 extents_brelse(bh); 1359 return 0; 1360 } 1361 1362 /* 1363 * ext4_ext_next_allocated_block: 1364 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. 1365 * NOTE: it considers block number from index entry as 1366 * allocated block. Thus, index entries have to be consistent 1367 * with leaves. 1368 */ 1369 ext4_lblk_t 1370 ext4_ext_next_allocated_block(struct ext4_ext_path *path) 1371 { 1372 int depth; 1373 1374 depth = path->p_depth; 1375 1376 if (depth == 0 && path->p_ext == NULL) 1377 return EXT_MAX_BLOCKS; 1378 1379 while (depth >= 0) { 1380 if (depth == path->p_depth) { 1381 /* leaf */ 1382 if (path[depth].p_ext && 1383 path[depth].p_ext != 1384 EXT_LAST_EXTENT(path[depth].p_hdr)) 1385 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1386 } else { 1387 /* index */ 1388 if (path[depth].p_idx != 1389 EXT_LAST_INDEX(path[depth].p_hdr)) 1390 return le32_to_cpu(path[depth].p_idx[1].ei_block); 1391 } 1392 depth--; 1393 } 1394 1395 return EXT_MAX_BLOCKS; 1396 } 1397 1398 /* 1399 * ext4_ext_next_leaf_block: 1400 * returns first allocated block from next leaf or EXT_MAX_BLOCKS 1401 */ 1402 static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) 1403 { 1404 int depth; 1405 1406 BUG_ON(path == NULL); 1407 depth = path->p_depth; 1408 1409 /* zero-tree has no leaf blocks at all */ 1410 if (depth == 0) 1411 return EXT_MAX_BLOCKS; 1412 1413 /* go to index block */ 1414 depth--; 1415 1416 while (depth >= 0) { 1417 if (path[depth].p_idx != 1418 EXT_LAST_INDEX(path[depth].p_hdr)) 1419 return (ext4_lblk_t) 1420 le32_to_cpu(path[depth].p_idx[1].ei_block); 1421 depth--; 1422 } 1423 1424 return EXT_MAX_BLOCKS; 1425 } 1426 1427 /* 1428 * ext4_ext_correct_indexes: 1429 * if leaf gets modified and modified extent is first in the leaf, 1430 * then we have to correct all indexes above. 1431 * TODO: do we need to correct tree in all cases? 1432 */ 1433 static int ext4_ext_correct_indexes(void *icb, handle_t *handle, struct inode *inode, 1434 struct ext4_ext_path *path) 1435 { 1436 struct ext4_extent_header *eh; 1437 int depth = ext_depth(inode); 1438 struct ext4_extent *ex; 1439 __le32 border; 1440 int k, err = 0; 1441 1442 eh = path[depth].p_hdr; 1443 ex = path[depth].p_ext; 1444 1445 if (unlikely(ex == NULL || eh == NULL)) { 1446 EXT4_ERROR_INODE(inode, 1447 "ex %p == NULL or eh %p == NULL", ex, eh); 1448 return -EIO; 1449 } 1450 1451 if (depth == 0) { 1452 /* there is no tree at all */ 1453 return 0; 1454 } 1455 1456 if (ex != EXT_FIRST_EXTENT(eh)) { 1457 /* we correct tree if first leaf got modified only */ 1458 return 0; 1459 } 1460 1461 /* 1462 * TODO: we need correction if border is smaller than current one 1463 */ 1464 k = depth - 1; 1465 border = path[depth].p_ext->ee_block; 1466 err = ext4_ext_get_access(icb, handle, inode, path + k); 1467 if (err) 1468 return err; 1469 path[k].p_idx->ei_block = border; 1470 err = ext4_ext_dirty(icb, handle, inode, path + k); 1471 if (err) 1472 return err; 1473 1474 while (k--) { 1475 /* change all left-side indexes */ 1476 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) 1477 break; 1478 err = ext4_ext_get_access(icb, handle, inode, path + k); 1479 if (err) 1480 break; 1481 path[k].p_idx->ei_block = border; 1482 err = ext4_ext_dirty(icb, handle, inode, path + k); 1483 if (err) 1484 break; 1485 } 1486 1487 return err; 1488 } 1489 1490 int 1491 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1492 struct ext4_extent *ex2) 1493 { 1494 unsigned short ext1_ee_len, ext2_ee_len; 1495 1496 /* 1497 * Make sure that both extents are initialized. We don't merge 1498 * unwritten extents so that we can be sure that end_io code has 1499 * the extent that was written properly split out and conversion to 1500 * initialized is trivial. 1501 */ 1502 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) 1503 return 0; 1504 1505 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1506 ext2_ee_len = ext4_ext_get_actual_len(ex2); 1507 1508 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != 1509 le32_to_cpu(ex2->ee_block)) 1510 return 0; 1511 1512 /* 1513 * To allow future support for preallocated extents to be added 1514 * as an RO_COMPAT feature, refuse to merge to extents if 1515 * this can result in the top bit of ee_len being set. 1516 */ 1517 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) 1518 return 0; 1519 if (ext4_ext_is_unwritten(ex1) && 1520 (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)) 1521 return 0; 1522 #ifdef AGGRESSIVE_TEST 1523 if (ext1_ee_len >= 4) 1524 return 0; 1525 #endif 1526 1527 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) 1528 return 1; 1529 return 0; 1530 } 1531 1532 /* 1533 * This function tries to merge the "ex" extent to the next extent in the tree. 1534 * It always tries to merge towards right. If you want to merge towards 1535 * left, pass "ex - 1" as argument instead of "ex". 1536 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1537 * 1 if they got merged. 1538 */ 1539 static int ext4_ext_try_to_merge_right(struct inode *inode, 1540 struct ext4_ext_path *path, 1541 struct ext4_extent *ex) 1542 { 1543 struct ext4_extent_header *eh; 1544 unsigned int depth, len; 1545 int merge_done = 0, unwritten; 1546 1547 depth = ext_depth(inode); 1548 assert(path[depth].p_hdr != NULL); 1549 eh = path[depth].p_hdr; 1550 1551 while (ex < EXT_LAST_EXTENT(eh)) { 1552 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1553 break; 1554 /* merge with next extent! */ 1555 unwritten = ext4_ext_is_unwritten(ex); 1556 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1557 + ext4_ext_get_actual_len(ex + 1)); 1558 if (unwritten) 1559 ext4_ext_mark_unwritten(ex); 1560 1561 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1562 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1563 * sizeof(struct ext4_extent); 1564 memmove(ex + 1, ex + 2, len); 1565 } 1566 le16_add_cpu(&eh->eh_entries, -1); 1567 merge_done = 1; 1568 if (!eh->eh_entries) 1569 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); 1570 } 1571 1572 return merge_done; 1573 } 1574 1575 /* 1576 * This function does a very simple check to see if we can collapse 1577 * an extent tree with a single extent tree leaf block into the inode. 1578 */ 1579 static void ext4_ext_try_to_merge_up(void *icb, handle_t *handle, 1580 struct inode *inode, 1581 struct ext4_ext_path *path) 1582 { 1583 size_t s; 1584 unsigned max_root = ext4_ext_space_root(inode, 0); 1585 ext4_fsblk_t blk; 1586 1587 if ((path[0].p_depth != 1) || 1588 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || 1589 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) 1590 return; 1591 1592 /* 1593 * We need to modify the block allocation bitmap and the block 1594 * group descriptor to release the extent tree block. If we 1595 * can't get the journal credits, give up. 1596 */ 1597 if (ext4_journal_extend(icb, handle, 2)) 1598 return; 1599 1600 /* 1601 * Copy the extent data up to the inode 1602 */ 1603 blk = ext4_idx_pblock(path[0].p_idx); 1604 s = le16_to_cpu(path[1].p_hdr->eh_entries) * 1605 sizeof(struct ext4_extent_idx); 1606 s += sizeof(struct ext4_extent_header); 1607 1608 path[1].p_maxdepth = path[0].p_maxdepth; 1609 memcpy(path[0].p_hdr, path[1].p_hdr, s); 1610 path[0].p_depth = 0; 1611 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + 1612 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); 1613 path[0].p_hdr->eh_max = cpu_to_le16(max_root); 1614 1615 extents_brelse(path[1].p_bh); 1616 ext4_free_blocks(icb, handle, inode, NULL, blk, 1, 1617 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 1618 } 1619 1620 /* 1621 * This function tries to merge the @ex extent to neighbours in the tree. 1622 * return 1 if merge left else 0. 1623 */ 1624 static void ext4_ext_try_to_merge(void *icb, handle_t *handle, 1625 struct inode *inode, 1626 struct ext4_ext_path *path, 1627 struct ext4_extent *ex) { 1628 struct ext4_extent_header *eh; 1629 unsigned int depth; 1630 int merge_done = 0; 1631 1632 depth = ext_depth(inode); 1633 BUG_ON(path[depth].p_hdr == NULL); 1634 eh = path[depth].p_hdr; 1635 1636 if (ex > EXT_FIRST_EXTENT(eh)) 1637 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1638 1639 if (!merge_done) 1640 (void) ext4_ext_try_to_merge_right(inode, path, ex); 1641 1642 ext4_ext_try_to_merge_up(icb, handle, inode, path); 1643 } 1644 1645 /* 1646 * ext4_ext_insert_extent: 1647 * tries to merge requsted extent into the existing extent or 1648 * inserts requested extent as new one into the tree, 1649 * creating new leaf in the no-space case. 1650 */ 1651 int ext4_ext_insert_extent(void *icb, handle_t *handle, struct inode *inode, 1652 struct ext4_ext_path **ppath, 1653 struct ext4_extent *newext, 1654 int gb_flags) 1655 { 1656 struct ext4_ext_path *path = *ppath; 1657 struct ext4_extent_header *eh; 1658 struct ext4_extent *ex, *fex; 1659 struct ext4_extent *nearex; /* nearest extent */ 1660 struct ext4_ext_path *npath = NULL; 1661 int depth, len, err; 1662 ext4_lblk_t next; 1663 int mb_flags = 0, unwritten; 1664 1665 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1666 mb_flags |= EXT4_MB_DELALLOC_RESERVED; 1667 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1668 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1669 return -EIO; 1670 } 1671 depth = ext_depth(inode); 1672 ex = path[depth].p_ext; 1673 eh = path[depth].p_hdr; 1674 if (unlikely(path[depth].p_hdr == NULL)) { 1675 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1676 return -EIO; 1677 } 1678 1679 /* try to insert block into found extent and return */ 1680 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) { 1681 1682 /* 1683 * Try to see whether we should rather test the extent on 1684 * right from ex, or from the left of ex. This is because 1685 * ext4_find_extent() can return either extent on the 1686 * left, or on the right from the searched position. This 1687 * will make merging more effective. 1688 */ 1689 if (ex < EXT_LAST_EXTENT(eh) && 1690 (le32_to_cpu(ex->ee_block) + 1691 ext4_ext_get_actual_len(ex) < 1692 le32_to_cpu(newext->ee_block))) { 1693 ex += 1; 1694 goto prepend; 1695 } else if ((ex > EXT_FIRST_EXTENT(eh)) && 1696 (le32_to_cpu(newext->ee_block) + 1697 ext4_ext_get_actual_len(newext) < 1698 le32_to_cpu(ex->ee_block))) 1699 ex -= 1; 1700 1701 /* Try to append newex to the ex */ 1702 if (ext4_can_extents_be_merged(inode, ex, newext)) { 1703 ext_debug("append [%d]%d block to %u:[%d]%d" 1704 "(from %llu)\n", 1705 ext4_ext_is_unwritten(newext), 1706 ext4_ext_get_actual_len(newext), 1707 le32_to_cpu(ex->ee_block), 1708 ext4_ext_is_unwritten(ex), 1709 ext4_ext_get_actual_len(ex), 1710 ext4_ext_pblock(ex)); 1711 err = ext4_ext_get_access(icb, handle, inode, 1712 path + depth); 1713 if (err) 1714 return err; 1715 unwritten = ext4_ext_is_unwritten(ex); 1716 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1717 + ext4_ext_get_actual_len(newext)); 1718 if (unwritten) 1719 ext4_ext_mark_unwritten(ex); 1720 eh = path[depth].p_hdr; 1721 nearex = ex; 1722 goto merge; 1723 } 1724 1725 prepend: 1726 /* Try to prepend newex to the ex */ 1727 if (ext4_can_extents_be_merged(inode, newext, ex)) { 1728 ext_debug("prepend %u[%d]%d block to %u:[%d]%d" 1729 "(from %llu)\n", 1730 le32_to_cpu(newext->ee_block), 1731 ext4_ext_is_unwritten(newext), 1732 ext4_ext_get_actual_len(newext), 1733 le32_to_cpu(ex->ee_block), 1734 ext4_ext_is_unwritten(ex), 1735 ext4_ext_get_actual_len(ex), 1736 ext4_ext_pblock(ex)); 1737 err = ext4_ext_get_access(icb, handle, inode, 1738 path + depth); 1739 if (err) 1740 return err; 1741 1742 unwritten = ext4_ext_is_unwritten(ex); 1743 ex->ee_block = newext->ee_block; 1744 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); 1745 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1746 + ext4_ext_get_actual_len(newext)); 1747 if (unwritten) 1748 ext4_ext_mark_unwritten(ex); 1749 eh = path[depth].p_hdr; 1750 nearex = ex; 1751 goto merge; 1752 } 1753 } 1754 1755 depth = ext_depth(inode); 1756 eh = path[depth].p_hdr; 1757 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) 1758 goto has_space; 1759 1760 /* probably next leaf has space for us? */ 1761 fex = EXT_LAST_EXTENT(eh); 1762 next = EXT_MAX_BLOCKS; 1763 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) 1764 next = ext4_ext_next_leaf_block(path); 1765 if (next != EXT_MAX_BLOCKS) { 1766 ext_debug("next leaf block - %u\n", next); 1767 BUG_ON(npath != NULL); 1768 npath = ext4_find_extent(inode, next, NULL, 0); 1769 if (IS_ERR(npath)) 1770 return PTR_ERR(npath); 1771 BUG_ON(npath->p_depth != path->p_depth); 1772 eh = npath[depth].p_hdr; 1773 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1774 ext_debug("next leaf isn't full(%d)\n", 1775 le16_to_cpu(eh->eh_entries)); 1776 path = npath; 1777 goto has_space; 1778 } 1779 ext_debug("next leaf has no free space(%d,%d)\n", 1780 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 1781 } 1782 1783 /* 1784 * There is no free space in the found leaf. 1785 * We're gonna add a new leaf in the tree. 1786 */ 1787 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) 1788 mb_flags |= EXT4_MB_USE_RESERVED; 1789 err = ext4_ext_create_new_leaf(icb, handle, inode, mb_flags, gb_flags, 1790 ppath, newext); 1791 if (err) 1792 goto cleanup; 1793 depth = ext_depth(inode); 1794 eh = path[depth].p_hdr; 1795 1796 has_space: 1797 nearex = path[depth].p_ext; 1798 1799 err = ext4_ext_get_access(icb, handle, inode, path + depth); 1800 if (err) 1801 goto cleanup; 1802 1803 if (!nearex) { 1804 /* there is no extent in this leaf, create first one */ 1805 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", 1806 le32_to_cpu(newext->ee_block), 1807 ext4_ext_pblock(newext), 1808 ext4_ext_is_unwritten(newext), 1809 ext4_ext_get_actual_len(newext)); 1810 nearex = EXT_FIRST_EXTENT(eh); 1811 } else { 1812 if (le32_to_cpu(newext->ee_block) 1813 > le32_to_cpu(nearex->ee_block)) { 1814 /* Insert after */ 1815 ext_debug("insert %u:%llu:[%d]%d before: " 1816 "nearest %p\n", 1817 le32_to_cpu(newext->ee_block), 1818 ext4_ext_pblock(newext), 1819 ext4_ext_is_unwritten(newext), 1820 ext4_ext_get_actual_len(newext), 1821 nearex); 1822 nearex++; 1823 } else { 1824 /* Insert before */ 1825 BUG_ON(newext->ee_block == nearex->ee_block); 1826 ext_debug("insert %u:%llu:[%d]%d after: " 1827 "nearest %p\n", 1828 le32_to_cpu(newext->ee_block), 1829 ext4_ext_pblock(newext), 1830 ext4_ext_is_unwritten(newext), 1831 ext4_ext_get_actual_len(newext), 1832 nearex); 1833 } 1834 len = EXT_LAST_EXTENT(eh) - nearex + 1; 1835 if (len > 0) { 1836 ext_debug("insert %u:%llu:[%d]%d: " 1837 "move %d extents from 0x%p to 0x%p\n", 1838 le32_to_cpu(newext->ee_block), 1839 ext4_ext_pblock(newext), 1840 ext4_ext_is_unwritten(newext), 1841 ext4_ext_get_actual_len(newext), 1842 len, nearex, nearex + 1); 1843 memmove(nearex + 1, nearex, 1844 len * sizeof(struct ext4_extent)); 1845 } 1846 } 1847 1848 le16_add_cpu(&eh->eh_entries, 1); 1849 path[depth].p_ext = nearex; 1850 nearex->ee_block = newext->ee_block; 1851 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1852 nearex->ee_len = newext->ee_len; 1853 1854 merge: 1855 /* try to merge extents */ 1856 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) 1857 ext4_ext_try_to_merge(icb, handle, inode, path, nearex); 1858 1859 1860 /* time to correct all indexes above */ 1861 err = ext4_ext_correct_indexes(icb, handle, inode, path); 1862 if (err) 1863 goto cleanup; 1864 1865 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 1866 1867 cleanup: 1868 if (npath) { 1869 ext4_ext_drop_refs(npath); 1870 kfree(npath); 1871 } 1872 return err; 1873 } 1874 1875 static inline int get_default_free_blocks_flags(struct inode *inode) 1876 { 1877 return 0; 1878 } 1879 1880 /* FIXME!! we need to try to merge to left or right after zero-out */ 1881 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 1882 { 1883 ext4_fsblk_t ee_pblock; 1884 unsigned int ee_len; 1885 int ret; 1886 1887 ee_len = ext4_ext_get_actual_len(ex); 1888 ee_pblock = ext4_ext_pblock(ex); 1889 1890 ret = 0; 1891 1892 return ret; 1893 } 1894 1895 static int ext4_remove_blocks(void *icb, handle_t *handle, struct inode *inode, 1896 struct ext4_extent *ex, 1897 unsigned long from, unsigned long to) 1898 { 1899 struct buffer_head *bh; 1900 int i; 1901 1902 if (from >= le32_to_cpu(ex->ee_block) 1903 && to == le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { 1904 /* tail removal */ 1905 unsigned long num, start; 1906 num = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - from; 1907 start = ext4_ext_pblock(ex) + ext4_ext_get_actual_len(ex) - num; 1908 ext4_free_blocks(icb, handle, inode, NULL, start, num, 0); 1909 } else if (from == le32_to_cpu(ex->ee_block) 1910 && to <= le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { 1911 } else { 1912 } 1913 return 0; 1914 } 1915 1916 /* 1917 * routine removes index from the index block 1918 * it's used in truncate case only. thus all requests are for 1919 * last index in the block only 1920 */ 1921 int ext4_ext_rm_idx(void *icb, handle_t *handle, struct inode *inode, 1922 struct ext4_ext_path *path) 1923 { 1924 int err; 1925 ext4_fsblk_t leaf; 1926 1927 /* free index block */ 1928 path--; 1929 leaf = ext4_idx_pblock(path->p_idx); 1930 BUG_ON(path->p_hdr->eh_entries == 0); 1931 if ((err = ext4_ext_get_access(icb, handle, inode, path))) 1932 return err; 1933 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1934 if ((err = ext4_ext_dirty(icb, handle, inode, path))) 1935 return err; 1936 ext4_free_blocks(icb, handle, inode, NULL, leaf, 1, 0); 1937 return err; 1938 } 1939 1940 static int 1941 ext4_ext_rm_leaf(void *icb, handle_t *handle, struct inode *inode, 1942 struct ext4_ext_path *path, unsigned long start) 1943 { 1944 int err = 0, correct_index = 0; 1945 int depth = ext_depth(inode), credits; 1946 struct ext4_extent_header *eh; 1947 unsigned a, b, block, num; 1948 unsigned long ex_ee_block; 1949 unsigned short ex_ee_len; 1950 struct ext4_extent *ex; 1951 1952 /* the header must be checked already in ext4_ext_remove_space() */ 1953 if (!path[depth].p_hdr) 1954 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 1955 eh = path[depth].p_hdr; 1956 BUG_ON(eh == NULL); 1957 1958 /* find where to start removing */ 1959 ex = EXT_LAST_EXTENT(eh); 1960 1961 ex_ee_block = le32_to_cpu(ex->ee_block); 1962 ex_ee_len = ext4_ext_get_actual_len(ex); 1963 1964 while (ex >= EXT_FIRST_EXTENT(eh) && 1965 ex_ee_block + ex_ee_len > start) { 1966 path[depth].p_ext = ex; 1967 1968 a = ex_ee_block > start ? ex_ee_block : start; 1969 b = (unsigned long long)ex_ee_block + ex_ee_len - 1 < 1970 EXT_MAX_BLOCKS ? ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCKS; 1971 1972 1973 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { 1974 block = 0; 1975 num = 0; 1976 BUG(); 1977 } else if (a != ex_ee_block) { 1978 /* remove tail of the extent */ 1979 block = ex_ee_block; 1980 num = a - block; 1981 } else if (b != ex_ee_block + ex_ee_len - 1) { 1982 /* remove head of the extent */ 1983 block = a; 1984 num = b - a; 1985 /* there is no "make a hole" API yet */ 1986 BUG(); 1987 } else { 1988 /* remove whole extent: excellent! */ 1989 block = ex_ee_block; 1990 num = 0; 1991 BUG_ON(a != ex_ee_block); 1992 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 1993 } 1994 1995 /* at present, extent can't cross block group */ 1996 /* leaf + bitmap + group desc + sb + inode */ 1997 credits = 5; 1998 if (ex == EXT_FIRST_EXTENT(eh)) { 1999 correct_index = 1; 2000 credits += (ext_depth(inode)) + 1; 2001 } 2002 2003 /*handle = ext4_ext_journal_restart(icb, handle, credits);*/ 2004 /*if (IS_ERR(icb, handle)) {*/ 2005 /*err = PTR_ERR(icb, handle);*/ 2006 /*goto out;*/ 2007 /*}*/ 2008 2009 err = ext4_ext_get_access(icb, handle, inode, path + depth); 2010 if (err) 2011 goto out; 2012 2013 err = ext4_remove_blocks(icb, handle, inode, ex, a, b); 2014 if (err) 2015 goto out; 2016 2017 if (num == 0) { 2018 /* this extent is removed entirely mark slot unused */ 2019 ext4_ext_store_pblock(ex, 0); 2020 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 2021 } 2022 2023 ex->ee_block = cpu_to_le32(block); 2024 ex->ee_len = cpu_to_le16(num); 2025 2026 err = ext4_ext_dirty(icb, handle, inode, path + depth); 2027 if (err) 2028 goto out; 2029 2030 ex--; 2031 ex_ee_block = le32_to_cpu(ex->ee_block); 2032 ex_ee_len = ext4_ext_get_actual_len(ex); 2033 } 2034 2035 if (correct_index && eh->eh_entries) 2036 err = ext4_ext_correct_indexes(icb, handle, inode, path); 2037 2038 /* if this leaf is free, then we should 2039 * remove it from index block above */ 2040 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2041 err = ext4_ext_rm_idx(icb, handle, inode, path + depth); 2042 2043 out: 2044 return err; 2045 } 2046 2047 /* 2048 * ext4_split_extent_at() splits an extent at given block. 2049 * 2050 * @handle: the journal handle 2051 * @inode: the file inode 2052 * @path: the path to the extent 2053 * @split: the logical block where the extent is splitted. 2054 * @split_flags: indicates if the extent could be zeroout if split fails, and 2055 * the states(init or unwritten) of new extents. 2056 * @flags: flags used to insert new extent to extent tree. 2057 * 2058 * 2059 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states 2060 * of which are deterimined by split_flag. 2061 * 2062 * There are two cases: 2063 * a> the extent are splitted into two extent. 2064 * b> split is not needed, and just mark the extent. 2065 * 2066 * return 0 on success. 2067 */ 2068 static int ext4_split_extent_at(void *icb, handle_t *handle, 2069 struct inode *inode, 2070 struct ext4_ext_path **ppath, 2071 ext4_lblk_t split, 2072 int split_flag, 2073 int flags) 2074 { 2075 struct ext4_ext_path *path = *ppath; 2076 ext4_fsblk_t newblock; 2077 ext4_lblk_t ee_block; 2078 struct ext4_extent *ex, newex, orig_ex, zero_ex; 2079 struct ext4_extent *ex2 = NULL; 2080 unsigned int ee_len, depth; 2081 int err = 0; 2082 2083 ext4_ext_show_leaf(inode, path); 2084 2085 depth = ext_depth(inode); 2086 ex = path[depth].p_ext; 2087 ee_block = le32_to_cpu(ex->ee_block); 2088 ee_len = ext4_ext_get_actual_len(ex); 2089 newblock = split - ee_block + ext4_ext_pblock(ex); 2090 2091 BUG_ON(split < ee_block || split >= (ee_block + ee_len)); 2092 2093 err = ext4_ext_get_access(icb, handle, inode, path + depth); 2094 if (err) 2095 goto out; 2096 2097 if (split == ee_block) { 2098 /* 2099 * case b: block @split is the block that the extent begins with 2100 * then we just change the state of the extent, and splitting 2101 * is not needed. 2102 */ 2103 if (split_flag & EXT4_EXT_MARK_UNWRIT2) 2104 ext4_ext_mark_unwritten(ex); 2105 else 2106 ext4_ext_mark_initialized(ex); 2107 2108 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2109 ext4_ext_try_to_merge(icb, handle, inode, path, ex); 2110 2111 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 2112 goto out; 2113 } 2114 2115 /* case a */ 2116 memcpy(&orig_ex, ex, sizeof(orig_ex)); 2117 ex->ee_len = cpu_to_le16(split - ee_block); 2118 if (split_flag & EXT4_EXT_MARK_UNWRIT1) 2119 ext4_ext_mark_unwritten(ex); 2120 2121 /* 2122 * path may lead to new leaf, not to original leaf any more 2123 * after ext4_ext_insert_extent() returns, 2124 */ 2125 err = ext4_ext_dirty(icb, handle, inode, path + depth); 2126 if (err) 2127 goto fix_extent_len; 2128 2129 ex2 = &newex; 2130 ex2->ee_block = cpu_to_le32(split); 2131 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); 2132 ext4_ext_store_pblock(ex2, newblock); 2133 if (split_flag & EXT4_EXT_MARK_UNWRIT2) 2134 ext4_ext_mark_unwritten(ex2); 2135 2136 err = ext4_ext_insert_extent(icb, handle, inode, ppath, &newex, flags); 2137 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2138 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { 2139 if (split_flag & EXT4_EXT_DATA_VALID1) { 2140 err = ext4_ext_zeroout(inode, ex2); 2141 zero_ex.ee_block = ex2->ee_block; 2142 zero_ex.ee_len = cpu_to_le16( 2143 ext4_ext_get_actual_len(ex2)); 2144 ext4_ext_store_pblock(&zero_ex, 2145 ext4_ext_pblock(ex2)); 2146 } else { 2147 err = ext4_ext_zeroout(inode, ex); 2148 zero_ex.ee_block = ex->ee_block; 2149 zero_ex.ee_len = cpu_to_le16( 2150 ext4_ext_get_actual_len(ex)); 2151 ext4_ext_store_pblock(&zero_ex, 2152 ext4_ext_pblock(ex)); 2153 } 2154 } else { 2155 err = ext4_ext_zeroout(inode, &orig_ex); 2156 zero_ex.ee_block = orig_ex.ee_block; 2157 zero_ex.ee_len = cpu_to_le16( 2158 ext4_ext_get_actual_len(&orig_ex)); 2159 ext4_ext_store_pblock(&zero_ex, 2160 ext4_ext_pblock(&orig_ex)); 2161 } 2162 2163 if (err) 2164 goto fix_extent_len; 2165 /* update the extent length and mark as initialized */ 2166 ex->ee_len = cpu_to_le16(ee_len); 2167 ext4_ext_try_to_merge(icb, handle, inode, path, ex); 2168 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 2169 if (err) 2170 goto fix_extent_len; 2171 2172 goto out; 2173 } else if (err) 2174 goto fix_extent_len; 2175 2176 out: 2177 ext4_ext_show_leaf(inode, path); 2178 return err; 2179 2180 fix_extent_len: 2181 ex->ee_len = orig_ex.ee_len; 2182 ext4_ext_dirty(icb, handle, inode, path + path->p_depth); 2183 return err; 2184 } 2185 2186 /* 2187 * returns 1 if current index have to be freed (even partial) 2188 */ 2189 #ifdef __REACTOS__ 2190 inline int 2191 #else 2192 static int inline 2193 #endif 2194 ext4_ext_more_to_rm(struct ext4_ext_path *path) 2195 { 2196 BUG_ON(path->p_idx == NULL); 2197 2198 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) 2199 return 0; 2200 2201 /* 2202 * if truncate on deeper level happened it it wasn't partial 2203 * so we have to consider current index for truncation 2204 */ 2205 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) 2206 return 0; 2207 return 1; 2208 } 2209 2210 int ext4_ext_remove_space(void *icb, struct inode *inode, unsigned long start) 2211 { 2212 struct super_block *sb = inode->i_sb; 2213 int depth = ext_depth(inode); 2214 struct ext4_ext_path *path; 2215 handle_t *handle = NULL; 2216 int i = 0, err = 0; 2217 2218 /* probably first extent we're gonna free will be last in block */ 2219 /*handle = ext4_journal_start(inode, depth + 1);*/ 2220 /*if (IS_ERR(icb, handle))*/ 2221 /*return PTR_ERR(icb, handle);*/ 2222 2223 /* 2224 * we start scanning from right side freeing all the blocks 2225 * after i_size and walking into the deep 2226 */ 2227 path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 2228 if (path == NULL) { 2229 ext4_journal_stop(icb, handle); 2230 return -ENOMEM; 2231 } 2232 memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); 2233 path[0].p_hdr = ext_inode_hdr(inode); 2234 if (ext4_ext_check_inode(inode)) { 2235 err = -EIO; 2236 goto out; 2237 } 2238 path[0].p_depth = depth; 2239 2240 while (i >= 0 && err == 0) { 2241 if (i == depth) { 2242 /* this is leaf block */ 2243 err = ext4_ext_rm_leaf(icb, handle, inode, path, start); 2244 /* root level have p_bh == NULL, extents_brelse() eats this */ 2245 extents_brelse(path[i].p_bh); 2246 path[i].p_bh = NULL; 2247 i--; 2248 continue; 2249 } 2250 2251 /* this is index block */ 2252 if (!path[i].p_hdr) { 2253 path[i].p_hdr = ext_block_hdr(path[i].p_bh); 2254 } 2255 2256 if (!path[i].p_idx) { 2257 /* this level hasn't touched yet */ 2258 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); 2259 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; 2260 } else { 2261 /* we've already was here, see at next index */ 2262 path[i].p_idx--; 2263 } 2264 2265 if (ext4_ext_more_to_rm(path + i)) { 2266 struct buffer_head *bh; 2267 /* go to the next level */ 2268 memset(path + i + 1, 0, sizeof(*path)); 2269 bh = read_extent_tree_block(inode, ext4_idx_pblock(path[i].p_idx), path[0].p_depth - (i + 1), 0); 2270 if (IS_ERR(bh)) { 2271 /* should we reset i_size? */ 2272 err = -EIO; 2273 break; 2274 } 2275 path[i+1].p_bh = bh; 2276 2277 /* put actual number of indexes to know is this 2278 * number got changed at the next iteration */ 2279 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); 2280 i++; 2281 } else { 2282 /* we finish processing this index, go up */ 2283 if (path[i].p_hdr->eh_entries == 0 && i > 0) { 2284 /* index is empty, remove it 2285 * handle must be already prepared by the 2286 * truncatei_leaf() */ 2287 err = ext4_ext_rm_idx(icb, handle, inode, path + i); 2288 } 2289 /* root level have p_bh == NULL, extents_brelse() eats this */ 2290 extents_brelse(path[i].p_bh); 2291 path[i].p_bh = NULL; 2292 i--; 2293 } 2294 } 2295 2296 /* TODO: flexible tree reduction should be here */ 2297 if (path->p_hdr->eh_entries == 0) { 2298 /* 2299 * truncate to zero freed all the tree 2300 * so, we need to correct eh_depth 2301 */ 2302 err = ext4_ext_get_access(icb, handle, inode, path); 2303 if (err == 0) { 2304 ext_inode_hdr(inode)->eh_depth = 0; 2305 ext_inode_hdr(inode)->eh_max = 2306 cpu_to_le16(ext4_ext_space_root(inode, 0)); 2307 err = ext4_ext_dirty(icb, handle, inode, path); 2308 } 2309 } 2310 out: 2311 if (path) { 2312 ext4_ext_drop_refs(path); 2313 kfree(path); 2314 } 2315 ext4_journal_stop(icb, handle); 2316 2317 return err; 2318 } 2319 2320 int ext4_ext_tree_init(void *icb, handle_t *handle, struct inode *inode) 2321 { 2322 struct ext4_extent_header *eh; 2323 2324 eh = ext_inode_hdr(inode); 2325 eh->eh_depth = 0; 2326 eh->eh_entries = 0; 2327 eh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); 2328 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); 2329 ext4_mark_inode_dirty(icb, handle, inode); 2330 return 0; 2331 } 2332 2333 /* 2334 * called at mount time 2335 */ 2336 void ext4_ext_init(struct super_block *sb) 2337 { 2338 /* 2339 * possible initialization would be here 2340 */ 2341 } 2342 2343 static int ext4_ext_convert_to_initialized ( 2344 void *icb, 2345 handle_t *handle, 2346 struct inode *inode, 2347 struct ext4_ext_path **ppath, 2348 ext4_lblk_t split, 2349 unsigned long blocks, 2350 int flags) 2351 { 2352 int depth = ext_depth(inode), err; 2353 struct ext4_extent *ex = (*ppath)[depth].p_ext; 2354 2355 assert (le32_to_cpu(ex->ee_block) <= split); 2356 2357 if (split + blocks == le32_to_cpu(ex->ee_block) + 2358 ext4_ext_get_actual_len(ex)) { 2359 2360 /* split and initialize right part */ 2361 err = ext4_split_extent_at(icb, handle, inode, ppath, split, 2362 EXT4_EXT_MARK_UNWRIT1, flags); 2363 2364 } else if (le32_to_cpu(ex->ee_block) == split) { 2365 2366 /* split and initialize left part */ 2367 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, 2368 EXT4_EXT_MARK_UNWRIT2, flags); 2369 2370 } else { 2371 2372 /* split 1 extent to 3 and initialize the 2nd */ 2373 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, 2374 EXT4_EXT_MARK_UNWRIT1 | 2375 EXT4_EXT_MARK_UNWRIT2, flags); 2376 if (0 == err) { 2377 err = ext4_split_extent_at(icb, handle, inode, ppath, split, 2378 EXT4_EXT_MARK_UNWRIT1, flags); 2379 } 2380 } 2381 2382 return err; 2383 } 2384 2385 int ext4_ext_get_blocks(void *icb, handle_t *handle, struct inode *inode, ext4_fsblk_t iblock, 2386 unsigned long max_blocks, struct buffer_head *bh_result, 2387 int create, int flags) 2388 { 2389 struct ext4_ext_path *path = NULL; 2390 struct ext4_extent newex, *ex; 2391 int goal, err = 0, depth; 2392 unsigned long allocated = 0; 2393 ext4_fsblk_t next, newblock; 2394 2395 clear_buffer_new(bh_result); 2396 /*mutex_lock(&ext4_I(inode)->truncate_mutex);*/ 2397 2398 /* find extent for this block */ 2399 path = ext4_find_extent(inode, iblock, NULL, 0); 2400 if (IS_ERR(path)) { 2401 err = PTR_ERR(path); 2402 path = NULL; 2403 goto out2; 2404 } 2405 2406 depth = ext_depth(inode); 2407 2408 /* 2409 * consistent leaf must not be empty 2410 * this situations is possible, though, _during_ tree modification 2411 * this is why assert can't be put in ext4_ext_find_extent() 2412 */ 2413 BUG_ON(path[depth].p_ext == NULL && depth != 0); 2414 2415 if ((ex = path[depth].p_ext)) { 2416 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 2417 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 2418 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2419 /* if found exent covers block, simple return it */ 2420 if (iblock >= ee_block && iblock < ee_block + ee_len) { 2421 2422 /* number of remain blocks in the extent */ 2423 allocated = ee_len + ee_block - iblock; 2424 2425 if (ext4_ext_is_unwritten(ex)) { 2426 if (create) { 2427 newblock = iblock - ee_block + ee_start; 2428 err = ext4_ext_convert_to_initialized ( 2429 icb, handle, 2430 inode, 2431 &path, 2432 iblock, 2433 allocated, 2434 flags); 2435 if (err) 2436 goto out2; 2437 2438 } else { 2439 newblock = 0; 2440 } 2441 } else { 2442 newblock = iblock - ee_block + ee_start; 2443 } 2444 goto out; 2445 } 2446 } 2447 2448 /* 2449 * requested block isn't allocated yet 2450 * we couldn't try to create block if create flag is zero 2451 */ 2452 if (!create) { 2453 goto out2; 2454 } 2455 2456 /* find next allocated block so that we know how many 2457 * blocks we can allocate without ovelapping next extent */ 2458 next = ext4_ext_next_allocated_block(path); 2459 BUG_ON(next <= iblock); 2460 allocated = next - iblock; 2461 if (flags & EXT4_GET_BLOCKS_PRE_IO && max_blocks > EXT_UNWRITTEN_MAX_LEN) 2462 max_blocks = EXT_UNWRITTEN_MAX_LEN; 2463 if (allocated > max_blocks) 2464 allocated = max_blocks; 2465 2466 /* allocate new block */ 2467 goal = ext4_ext_find_goal(inode, path, iblock); 2468 2469 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, 0, 2470 &allocated, &err); 2471 if (!newblock) 2472 goto out2; 2473 2474 /* try to insert new extent into found leaf and return */ 2475 newex.ee_block = cpu_to_le32(iblock); 2476 ext4_ext_store_pblock(&newex, newblock); 2477 newex.ee_len = cpu_to_le16(allocated); 2478 /* if it's fallocate, mark ex as unwritten */ 2479 if (flags & EXT4_GET_BLOCKS_PRE_IO) { 2480 ext4_ext_mark_unwritten(&newex); 2481 } 2482 err = ext4_ext_insert_extent(icb, handle, inode, &path, &newex, 2483 flags & EXT4_GET_BLOCKS_PRE_IO); 2484 2485 if (err) { 2486 /* free data blocks we just allocated */ 2487 ext4_free_blocks(icb, handle, inode, NULL, ext4_ext_pblock(&newex), 2488 le16_to_cpu(newex.ee_len), get_default_free_blocks_flags(inode)); 2489 goto out2; 2490 } 2491 2492 ext4_mark_inode_dirty(icb, handle, inode); 2493 2494 /* previous routine could use block we allocated */ 2495 if (ext4_ext_is_unwritten(&newex)) 2496 newblock = 0; 2497 else 2498 newblock = ext4_ext_pblock(&newex); 2499 2500 set_buffer_new(bh_result); 2501 2502 out: 2503 if (allocated > max_blocks) 2504 allocated = max_blocks; 2505 2506 ext4_ext_show_leaf(inode, path); 2507 set_buffer_mapped(bh_result); 2508 bh_result->b_bdev = inode->i_sb->s_bdev; 2509 bh_result->b_blocknr = newblock; 2510 out2: 2511 if (path) { 2512 ext4_ext_drop_refs(path); 2513 kfree(path); 2514 } 2515 /*mutex_unlock(&ext4_I(inode)->truncate_mutex);*/ 2516 2517 return err ? err : allocated; 2518 } 2519 2520 int ext4_ext_truncate(void *icb, struct inode *inode, unsigned long start) 2521 { 2522 int ret = ext4_ext_remove_space(icb, inode, start); 2523 2524 /* Save modifications on i_blocks field of the inode. */ 2525 if (!ret) 2526 ret = ext4_mark_inode_dirty(icb, NULL, inode); 2527 2528 return ret; 2529 } 2530 2531 #if defined(_MSC_VER) && defined(__REACTOS__) 2532 #pragma warning(pop) 2533 #endif 2534 2535