1 /* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 /* 6 * Now we have all buffers that must be used in balancing of the tree 7 * Further calculations can not cause schedule(), and thus the buffer 8 * tree will be stable until the balancing will be finished 9 * balance the tree according to the analysis made before, 10 * and using buffers obtained after all above. 11 */ 12 13 #include <asm/uaccess.h> 14 #include <linux/time.h> 15 #include "reiserfs.h" 16 #include <linux/buffer_head.h> 17 #include <linux/kernel.h> 18 19 static inline void buffer_info_init_left(struct tree_balance *tb, 20 struct buffer_info *bi) 21 { 22 bi->tb = tb; 23 bi->bi_bh = tb->L[0]; 24 bi->bi_parent = tb->FL[0]; 25 bi->bi_position = get_left_neighbor_position(tb, 0); 26 } 27 28 static inline void buffer_info_init_right(struct tree_balance *tb, 29 struct buffer_info *bi) 30 { 31 bi->tb = tb; 32 bi->bi_bh = tb->R[0]; 33 bi->bi_parent = tb->FR[0]; 34 bi->bi_position = get_right_neighbor_position(tb, 0); 35 } 36 37 static inline void buffer_info_init_tbS0(struct tree_balance *tb, 38 struct buffer_info *bi) 39 { 40 bi->tb = tb; 41 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); 42 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); 43 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); 44 } 45 46 static inline void buffer_info_init_bh(struct tree_balance *tb, 47 struct buffer_info *bi, 48 struct buffer_head *bh) 49 { 50 bi->tb = tb; 51 bi->bi_bh = bh; 52 bi->bi_parent = NULL; 53 bi->bi_position = 0; 54 } 55 56 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, 57 struct buffer_head *bh, int flag) 58 { 59 journal_mark_dirty(tb->transaction_handle, bh); 60 } 61 62 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 63 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 64 65 /* 66 * summary: 67 * if deleting something ( tb->insert_size[0] < 0 ) 68 * return(balance_leaf_when_delete()); (flag d handled here) 69 * else 70 * if lnum is larger than 0 we put items into the left node 71 * if rnum is larger than 0 we put items into the right node 72 * if snum1 is larger than 0 we put items into the new node s1 73 * if snum2 is larger than 0 we put items into the new node s2 74 * Note that all *num* count new items being created. 75 * 76 * It would be easier to read balance_leaf() if each of these summary 77 * lines was a separate procedure rather than being inlined. I think 78 * that there are many passages here and in balance_leaf_when_delete() in 79 * which two calls to one procedure can replace two passages, and it 80 * might save cache space and improve software maintenance costs to do so. 81 * 82 * Vladimir made the perceptive comment that we should offload most of 83 * the decision making in this function into fix_nodes/check_balance, and 84 * then create some sort of structure in tb that says what actions should 85 * be performed by do_balance. 86 * 87 * -Hans 88 */ 89 90 /* 91 * Balance leaf node in case of delete or cut: insert_size[0] < 0 92 * 93 * lnum, rnum can have values >= -1 94 * -1 means that the neighbor must be joined with S 95 * 0 means that nothing should be done with the neighbor 96 * >0 means to shift entirely or partly the specified number of items 97 * to the neighbor 98 */ 99 static int balance_leaf_when_delete(struct tree_balance *tb, int flag) 100 { 101 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 102 int item_pos = PATH_LAST_POSITION(tb->tb_path); 103 int pos_in_item = tb->tb_path->pos_in_item; 104 struct buffer_info bi; 105 int n; 106 struct item_head *ih; 107 108 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 109 "vs- 12000: level: wrong FR %z", tb->FR[0]); 110 RFALSE(tb->blknum[0] > 1, 111 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 112 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), 113 "PAP-12010: tree can not be empty"); 114 115 ih = item_head(tbS0, item_pos); 116 buffer_info_init_tbS0(tb, &bi); 117 118 /* Delete or truncate the item */ 119 120 switch (flag) { 121 case M_DELETE: /* delete item in S[0] */ 122 123 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 124 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 125 -tb->insert_size[0], ih); 126 127 leaf_delete_items(&bi, 0, item_pos, 1, -1); 128 129 if (!item_pos && tb->CFL[0]) { 130 if (B_NR_ITEMS(tbS0)) { 131 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 132 0); 133 } else { 134 if (!PATH_H_POSITION(tb->tb_path, 1)) 135 replace_key(tb, tb->CFL[0], tb->lkey[0], 136 PATH_H_PPARENT(tb->tb_path, 137 0), 0); 138 } 139 } 140 141 RFALSE(!item_pos && !tb->CFL[0], 142 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], 143 tb->L[0]); 144 145 break; 146 147 case M_CUT:{ /* cut item in S[0] */ 148 if (is_direntry_le_ih(ih)) { 149 150 /* 151 * UFS unlink semantics are such that you 152 * can only delete one directory entry at 153 * a time. 154 */ 155 156 /* 157 * when we cut a directory tb->insert_size[0] 158 * means number of entries to be cut (always 1) 159 */ 160 tb->insert_size[0] = -1; 161 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 162 -tb->insert_size[0]); 163 164 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], 165 "PAP-12030: can not change delimiting key. CFL[0]=%p", 166 tb->CFL[0]); 167 168 if (!item_pos && !pos_in_item && tb->CFL[0]) { 169 replace_key(tb, tb->CFL[0], tb->lkey[0], 170 tbS0, 0); 171 } 172 } else { 173 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 174 -tb->insert_size[0]); 175 176 RFALSE(!ih_item_len(ih), 177 "PAP-12035: cut must leave non-zero dynamic length of item"); 178 } 179 break; 180 } 181 182 default: 183 print_cur_tb("12040"); 184 reiserfs_panic(tb->tb_sb, "PAP-12040", 185 "unexpected mode: %s(%d)", 186 (flag == 187 M_PASTE) ? "PASTE" : ((flag == 188 M_INSERT) ? "INSERT" : 189 "UNKNOWN"), flag); 190 } 191 192 /* 193 * the rule is that no shifting occurs unless by shifting 194 * a node can be freed 195 */ 196 n = B_NR_ITEMS(tbS0); 197 /* L[0] takes part in balancing */ 198 if (tb->lnum[0]) { 199 /* L[0] must be joined with S[0] */ 200 if (tb->lnum[0] == -1) { 201 /* R[0] must be also joined with S[0] */ 202 if (tb->rnum[0] == -1) { 203 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { 204 /* 205 * all contents of all the 3 buffers 206 * will be in L[0] 207 */ 208 if (PATH_H_POSITION(tb->tb_path, 1) == 0 209 && 1 < B_NR_ITEMS(tb->FR[0])) 210 replace_key(tb, tb->CFL[0], 211 tb->lkey[0], 212 tb->FR[0], 1); 213 214 leaf_move_items(LEAF_FROM_S_TO_L, tb, n, 215 -1, NULL); 216 leaf_move_items(LEAF_FROM_R_TO_L, tb, 217 B_NR_ITEMS(tb->R[0]), 218 -1, NULL); 219 220 reiserfs_invalidate_buffer(tb, tbS0); 221 reiserfs_invalidate_buffer(tb, 222 tb->R[0]); 223 224 return 0; 225 } 226 /* 227 * all contents of all the 3 buffers will 228 * be in R[0] 229 */ 230 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, 231 NULL); 232 leaf_move_items(LEAF_FROM_L_TO_R, tb, 233 B_NR_ITEMS(tb->L[0]), -1, NULL); 234 235 /* right_delimiting_key is correct in R[0] */ 236 replace_key(tb, tb->CFR[0], tb->rkey[0], 237 tb->R[0], 0); 238 239 reiserfs_invalidate_buffer(tb, tbS0); 240 reiserfs_invalidate_buffer(tb, tb->L[0]); 241 242 return -1; 243 } 244 245 RFALSE(tb->rnum[0] != 0, 246 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 247 /* all contents of L[0] and S[0] will be in L[0] */ 248 leaf_shift_left(tb, n, -1); 249 250 reiserfs_invalidate_buffer(tb, tbS0); 251 252 return 0; 253 } 254 255 /* 256 * a part of contents of S[0] will be in L[0] and the 257 * rest part of S[0] will be in R[0] 258 */ 259 260 RFALSE((tb->lnum[0] + tb->rnum[0] < n) || 261 (tb->lnum[0] + tb->rnum[0] > n + 1), 262 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", 263 tb->rnum[0], tb->lnum[0], n); 264 RFALSE((tb->lnum[0] + tb->rnum[0] == n) && 265 (tb->lbytes != -1 || tb->rbytes != -1), 266 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", 267 tb->rbytes, tb->lbytes); 268 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && 269 (tb->lbytes < 1 || tb->rbytes != -1), 270 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", 271 tb->rbytes, tb->lbytes); 272 273 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 274 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 275 276 reiserfs_invalidate_buffer(tb, tbS0); 277 278 return 0; 279 } 280 281 if (tb->rnum[0] == -1) { 282 /* all contents of R[0] and S[0] will be in R[0] */ 283 leaf_shift_right(tb, n, -1); 284 reiserfs_invalidate_buffer(tb, tbS0); 285 return 0; 286 } 287 288 RFALSE(tb->rnum[0], 289 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); 290 return 0; 291 } 292 293 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ 294 const char *body, /* body of inserted item or bytes to paste */ 295 int flag, /* i - insert, d - delete, c - cut, p - paste 296 (see comment to do_balance) */ 297 struct item_head *insert_key, /* in our processing of one level we sometimes determine what 298 must be inserted into the next higher level. This insertion 299 consists of a key or two keys and their corresponding 300 pointers */ 301 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ 302 ) 303 { 304 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 305 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] 306 of the affected item */ 307 struct buffer_info bi; 308 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 309 int snum[2]; /* number of items that will be placed 310 into S_new (includes partially shifted 311 items) */ 312 int sbytes[2]; /* if an item is partially shifted into S_new then 313 if it is a directory item 314 it is the number of entries from the item that are shifted into S_new 315 else 316 it is the number of bytes from the item that are shifted into S_new 317 */ 318 int n, i; 319 int ret_val; 320 int pos_in_item; 321 int zeros_num; 322 323 PROC_INFO_INC(tb->tb_sb, balance_at[0]); 324 325 /* Make balance in case insert_size[0] < 0 */ 326 if (tb->insert_size[0] < 0) 327 return balance_leaf_when_delete(tb, flag); 328 329 zeros_num = 0; 330 if (flag == M_INSERT && !body) 331 zeros_num = ih_item_len(ih); 332 333 pos_in_item = tb->tb_path->pos_in_item; 334 /* for indirect item pos_in_item is measured in unformatted node 335 pointers. Recalculate to bytes */ 336 if (flag != M_INSERT 337 && is_indirect_le_ih(item_head(tbS0, item_pos))) 338 pos_in_item *= UNFM_P_SIZE; 339 340 if (tb->lnum[0] > 0) { 341 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 342 if (item_pos < tb->lnum[0]) { 343 /* new item or it part falls to L[0], shift it too */ 344 n = B_NR_ITEMS(tb->L[0]); 345 346 switch (flag) { 347 case M_INSERT: /* insert item into L[0] */ 348 349 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { 350 /* part of new item falls into L[0] */ 351 int new_item_len; 352 int version; 353 354 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1); 355 356 /* Calculate item length to insert to S[0] */ 357 new_item_len = ih_item_len(ih) - tb->lbytes; 358 /* Calculate and check item length to insert to L[0] */ 359 put_ih_item_len(ih, ih_item_len(ih) - new_item_len); 360 361 RFALSE(ih_item_len(ih) <= 0, 362 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 363 ih_item_len(ih)); 364 365 /* Insert new item into L[0] */ 366 buffer_info_init_left(tb, &bi); 367 leaf_insert_into_buf(&bi, 368 n + item_pos - ret_val, ih, body, 369 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); 370 371 version = ih_version(ih); 372 373 /* Calculate key component, item length and body to insert into S[0] */ 374 set_le_ih_k_offset(ih, le_ih_k_offset(ih) + 375 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); 376 377 put_ih_item_len(ih, new_item_len); 378 if (tb->lbytes > zeros_num) { 379 body += (tb->lbytes - zeros_num); 380 zeros_num = 0; 381 } else 382 zeros_num -= tb->lbytes; 383 384 RFALSE(ih_item_len(ih) <= 0, 385 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 386 ih_item_len(ih)); 387 } else { 388 /* new item in whole falls into L[0] */ 389 /* Shift lnum[0]-1 items to L[0] */ 390 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes); 391 /* Insert new item into L[0] */ 392 buffer_info_init_left(tb, &bi); 393 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num); 394 tb->insert_size[0] = 0; 395 zeros_num = 0; 396 } 397 break; 398 399 case M_PASTE: /* append item in L[0] */ 400 401 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { 402 /* we must shift the part of the appended item */ 403 if (is_direntry_le_ih(item_head(tbS0, item_pos))) { 404 405 RFALSE(zeros_num, 406 "PAP-12090: invalid parameter in case of a directory"); 407 /* directory item */ 408 if (tb->lbytes > pos_in_item) { 409 /* new directory entry falls into L[0] */ 410 struct item_head *pasted; 411 int l_pos_in_item = pos_in_item; 412 413 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 414 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1); 415 if (ret_val && !item_pos) { 416 pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1); 417 l_pos_in_item += ih_entry_count(pasted) - (tb->lbytes -1); 418 } 419 420 /* Append given directory entry to directory item */ 421 buffer_info_init_left(tb, &bi); 422 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num); 423 424 /* previous string prepared space for pasting new entry, following string pastes this entry */ 425 426 /* when we have merge directory item, pos_in_item has been changed too */ 427 428 /* paste new directory entry. 1 is entry number */ 429 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item, 430 1, (struct reiserfs_de_head *) body, 431 body + DEH_SIZE, tb->insert_size[0]); 432 tb->insert_size[0] = 0; 433 } else { 434 /* new directory item doesn't fall into L[0] */ 435 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ 436 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 437 } 438 /* Calculate new position to append in item body */ 439 pos_in_item -= tb->lbytes; 440 } else { 441 /* regular object */ 442 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes); 443 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)), 444 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", 445 ih_item_len(item_head(tbS0, item_pos)),pos_in_item); 446 447 if (tb->lbytes >= pos_in_item) { 448 /* appended item will be in L[0] in whole */ 449 int l_n; 450 451 /* this bytes number must be appended to the last item of L[h] */ 452 l_n = tb->lbytes - pos_in_item; 453 454 /* Calculate new insert_size[0] */ 455 tb->insert_size[0] -= l_n; 456 457 RFALSE(tb->insert_size[0] <= 0, 458 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", 459 tb->insert_size[0]); 460 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len 461 (item_head(tbS0, item_pos))); 462 /* Append to body of item in L[0] */ 463 buffer_info_init_left(tb, &bi); 464 leaf_paste_in_buffer 465 (&bi, n + item_pos - ret_val, ih_item_len 466 (item_head(tb->L[0], n + item_pos - ret_val)), 467 l_n, body, 468 zeros_num > l_n ? l_n : zeros_num); 469 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ 470 { 471 int version; 472 int temp_l = l_n; 473 474 RFALSE(ih_item_len(item_head(tbS0, 0)), 475 "PAP-12106: item length must be 0"); 476 RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key 477 (tb->L[0], n + item_pos - ret_val)), 478 "PAP-12107: items must be of the same file"); 479 if (is_indirect_le_ih(item_head(tb->L[0], n + item_pos - ret_val))) { 480 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT); 481 } 482 /* update key of first item in S0 */ 483 version = ih_version(item_head(tbS0, 0)); 484 set_le_key_k_offset(version, leaf_key(tbS0, 0), 485 le_key_k_offset(version,leaf_key(tbS0, 0)) + temp_l); 486 /* update left delimiting key */ 487 set_le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0]), 488 le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0])) + temp_l); 489 } 490 491 /* Calculate new body, position in item and insert_size[0] */ 492 if (l_n > zeros_num) { 493 body += (l_n - zeros_num); 494 zeros_num = 0; 495 } else 496 zeros_num -= l_n; 497 pos_in_item = 0; 498 499 RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1)) 500 || !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size) 501 || !op_is_left_mergeable(internal_key(tb->CFL[0], tb->lkey[0]), tbS0->b_size), 502 "PAP-12120: item must be merge-able with left neighboring item"); 503 } else { /* only part of the appended item will be in L[0] */ 504 505 /* Calculate position in item for append in S[0] */ 506 pos_in_item -= tb->lbytes; 507 508 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); 509 510 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 511 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 512 } 513 } 514 } else { /* appended item will be in L[0] in whole */ 515 516 struct item_head *pasted; 517 518 if (!item_pos && op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ 519 /* then increment pos_in_item by the size of the last item in L[0] */ 520 pasted = item_head(tb->L[0], n - 1); 521 if (is_direntry_le_ih(pasted)) 522 pos_in_item += ih_entry_count(pasted); 523 else 524 pos_in_item += ih_item_len(pasted); 525 } 526 527 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 528 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 529 /* Append to body of item in L[0] */ 530 buffer_info_init_left(tb, &bi); 531 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, 532 pos_in_item, 533 tb->insert_size[0], 534 body, zeros_num); 535 536 /* if appended item is directory, paste entry */ 537 pasted = item_head(tb->L[0], n + item_pos - ret_val); 538 if (is_direntry_le_ih(pasted)) 539 leaf_paste_entries(&bi, n + item_pos - ret_val, 540 pos_in_item, 1, 541 (struct reiserfs_de_head *) body, 542 body + DEH_SIZE, 543 tb->insert_size[0]); 544 /* if appended item is indirect item, put unformatted node into un list */ 545 if (is_indirect_le_ih(pasted)) 546 set_ih_free_space(pasted, 0); 547 tb->insert_size[0] = 0; 548 zeros_num = 0; 549 } 550 break; 551 default: /* cases d and t */ 552 reiserfs_panic(tb->tb_sb, "PAP-12130", 553 "lnum > 0: unexpected mode: " 554 " %s(%d)", 555 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 556 } 557 } else { 558 /* new item doesn't fall into L[0] */ 559 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 560 } 561 } 562 563 /* tb->lnum[0] > 0 */ 564 /* Calculate new item position */ 565 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); 566 567 if (tb->rnum[0] > 0) { 568 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 569 n = B_NR_ITEMS(tbS0); 570 switch (flag) { 571 572 case M_INSERT: /* insert item */ 573 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ 574 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ 575 loff_t old_key_comp, old_len, r_zeros_number; 576 const char *r_body; 577 int version; 578 loff_t offset; 579 580 leaf_shift_right(tb, tb->rnum[0] - 1, -1); 581 582 version = ih_version(ih); 583 /* Remember key component and item length */ 584 old_key_comp = le_ih_k_offset(ih); 585 old_len = ih_item_len(ih); 586 587 /* Calculate key component and item length to insert into R[0] */ 588 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0)); 589 set_le_ih_k_offset(ih, offset); 590 put_ih_item_len(ih, tb->rbytes); 591 /* Insert part of the item into R[0] */ 592 buffer_info_init_right(tb, &bi); 593 if ((old_len - tb->rbytes) > zeros_num) { 594 r_zeros_number = 0; 595 r_body = body + (old_len - tb->rbytes) - zeros_num; 596 } else { 597 r_body = body; 598 r_zeros_number = zeros_num - (old_len - tb->rbytes); 599 zeros_num -= r_zeros_number; 600 } 601 602 leaf_insert_into_buf(&bi, 0, ih, r_body, 603 r_zeros_number); 604 605 /* Replace right delimiting key by first key in R[0] */ 606 replace_key(tb, tb->CFR[0], tb->rkey[0], 607 tb->R[0], 0); 608 609 /* Calculate key component and item length to insert into S[0] */ 610 set_le_ih_k_offset(ih, old_key_comp); 611 put_ih_item_len(ih, old_len - tb->rbytes); 612 613 tb->insert_size[0] -= tb->rbytes; 614 615 } else { /* whole new item falls into R[0] */ 616 617 /* Shift rnum[0]-1 items to R[0] */ 618 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes); 619 /* Insert new item into R[0] */ 620 buffer_info_init_right(tb, &bi); 621 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1, 622 ih, body, zeros_num); 623 624 if (item_pos - n + tb->rnum[0] - 1 == 0) { 625 replace_key(tb, tb->CFR[0], 626 tb->rkey[0], 627 tb->R[0], 0); 628 629 } 630 zeros_num = tb->insert_size[0] = 0; 631 } 632 } else { /* new item or part of it doesn't fall into R[0] */ 633 634 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 635 } 636 break; 637 638 case M_PASTE: /* append item */ 639 640 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ 641 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ 642 if (is_direntry_le_ih(item_head(tbS0, item_pos))) { /* we append to directory item */ 643 int entry_count; 644 645 RFALSE(zeros_num, 646 "PAP-12145: invalid parameter in case of a directory"); 647 entry_count = ih_entry_count(item_head 648 (tbS0, item_pos)); 649 if (entry_count - tb->rbytes < 650 pos_in_item) 651 /* new directory entry falls into R[0] */ 652 { 653 int paste_entry_position; 654 655 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0], 656 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", 657 tb->rbytes, entry_count); 658 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ 659 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1); 660 /* Paste given directory entry to directory item */ 661 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1; 662 buffer_info_init_right(tb, &bi); 663 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num); 664 /* paste entry */ 665 leaf_paste_entries(&bi, 0, paste_entry_position, 1, 666 (struct reiserfs_de_head *) body, 667 body + DEH_SIZE, tb->insert_size[0]); 668 669 if (paste_entry_position == 0) { 670 /* change delimiting keys */ 671 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0); 672 } 673 674 tb->insert_size[0] = 0; 675 pos_in_item++; 676 } else { /* new directory entry doesn't fall into R[0] */ 677 678 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 679 } 680 } else { /* regular object */ 681 682 int n_shift, n_rem, r_zeros_number; 683 const char *r_body; 684 685 /* Calculate number of bytes which must be shifted from appended item */ 686 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0) 687 n_shift = 0; 688 689 RFALSE(pos_in_item != ih_item_len 690 (item_head(tbS0, item_pos)), 691 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", 692 pos_in_item, ih_item_len 693 (item_head(tbS0, item_pos))); 694 695 leaf_shift_right(tb, tb->rnum[0], n_shift); 696 /* Calculate number of bytes which must remain in body after appending to R[0] */ 697 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0) 698 n_rem = 0; 699 700 { 701 int version; 702 unsigned long temp_rem = n_rem; 703 704 version = ih_version(item_head(tb->R[0], 0)); 705 if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) { 706 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT); 707 } 708 set_le_key_k_offset(version, leaf_key(tb->R[0], 0), 709 le_key_k_offset(version, leaf_key(tb->R[0], 0)) + temp_rem); 710 set_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]), 711 le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0])) + temp_rem); 712 } 713 /* k_offset (leaf_key(tb->R[0],0)) += n_rem; 714 k_offset (internal_key(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 715 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0); 716 717 /* Append part of body into R[0] */ 718 buffer_info_init_right(tb, &bi); 719 if (n_rem > zeros_num) { 720 r_zeros_number = 0; 721 r_body = body + n_rem - zeros_num; 722 } else { 723 r_body = body; 724 r_zeros_number = zeros_num - n_rem; 725 zeros_num -= r_zeros_number; 726 } 727 728 leaf_paste_in_buffer(&bi, 0, n_shift, 729 tb->insert_size[0] - n_rem, 730 r_body, r_zeros_number); 731 732 if (is_indirect_le_ih(item_head(tb->R[0], 0))) { 733 #if 0 734 RFALSE(n_rem, 735 "PAP-12160: paste more than one unformatted node pointer"); 736 #endif 737 set_ih_free_space(item_head(tb->R[0], 0), 0); 738 } 739 tb->insert_size[0] = n_rem; 740 if (!n_rem) 741 pos_in_item++; 742 } 743 } else { /* pasted item in whole falls into R[0] */ 744 745 struct item_head *pasted; 746 747 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 748 /* append item in R[0] */ 749 if (pos_in_item >= 0) { 750 buffer_info_init_right(tb, &bi); 751 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item, 752 tb->insert_size[0], body, zeros_num); 753 } 754 755 /* paste new entry, if item is directory item */ 756 pasted = item_head(tb->R[0], item_pos - n + tb->rnum[0]); 757 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) { 758 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0], 759 pos_in_item, 1, 760 (struct reiserfs_de_head *) body, 761 body + DEH_SIZE, tb->insert_size[0]); 762 if (!pos_in_item) { 763 764 RFALSE(item_pos - n + tb->rnum[0], 765 "PAP-12165: directory item must be first item of node when pasting is in 0th position"); 766 767 /* update delimiting keys */ 768 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); 769 } 770 } 771 772 if (is_indirect_le_ih(pasted)) 773 set_ih_free_space(pasted, 0); 774 zeros_num = tb->insert_size[0] = 0; 775 } 776 } else { /* new item doesn't fall into R[0] */ 777 778 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 779 } 780 break; 781 default: /* cases d and t */ 782 reiserfs_panic(tb->tb_sb, "PAP-12175", 783 "rnum > 0: unexpected mode: %s(%d)", 784 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 785 } 786 787 } 788 789 /* tb->rnum[0] > 0 */ 790 RFALSE(tb->blknum[0] > 3, 791 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); 792 RFALSE(tb->blknum[0] < 0, 793 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); 794 795 /* if while adding to a node we discover that it is possible to split 796 it in two, and merge the left part into the left neighbor and the 797 right part into the right neighbor, eliminating the node */ 798 if (tb->blknum[0] == 0) { /* node S[0] is empty now */ 799 800 RFALSE(!tb->lnum[0] || !tb->rnum[0], 801 "PAP-12190: lnum and rnum must not be zero"); 802 /* if insertion was done before 0-th position in R[0], right 803 delimiting key of the tb->L[0]'s and left delimiting key are 804 not set correctly */ 805 if (tb->CFL[0]) { 806 if (!tb->CFR[0]) 807 reiserfs_panic(tb->tb_sb, "vs-12195", 808 "CFR not initialized"); 809 copy_key(internal_key(tb->CFL[0], tb->lkey[0]), 810 internal_key(tb->CFR[0], tb->rkey[0])); 811 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); 812 } 813 814 reiserfs_invalidate_buffer(tb, tbS0); 815 return 0; 816 } 817 818 /* Fill new nodes that appear in place of S[0] */ 819 820 /* I am told that this copying is because we need an array to enable 821 the looping code. -Hans */ 822 snum[0] = tb->s1num, snum[1] = tb->s2num; 823 sbytes[0] = tb->s1bytes; 824 sbytes[1] = tb->s2bytes; 825 for (i = tb->blknum[0] - 2; i >= 0; i--) { 826 827 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, 828 snum[i]); 829 830 /* here we shift from S to S_new nodes */ 831 832 S_new[i] = get_FEB(tb); 833 834 /* initialized block type and tree level */ 835 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); 836 837 n = B_NR_ITEMS(tbS0); 838 839 switch (flag) { 840 case M_INSERT: /* insert item */ 841 842 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ 843 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ 844 int old_key_comp, old_len, r_zeros_number; 845 const char *r_body; 846 int version; 847 848 /* Move snum[i]-1 items from S[0] to S_new[i] */ 849 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 850 snum[i] - 1, -1, 851 S_new[i]); 852 /* Remember key component and item length */ 853 version = ih_version(ih); 854 old_key_comp = le_ih_k_offset(ih); 855 old_len = ih_item_len(ih); 856 857 /* Calculate key component and item length to insert into S_new[i] */ 858 set_le_ih_k_offset(ih, le_ih_k_offset(ih) + 859 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); 860 861 put_ih_item_len(ih, sbytes[i]); 862 863 /* Insert part of the item into S_new[i] before 0-th item */ 864 buffer_info_init_bh(tb, &bi, S_new[i]); 865 866 if ((old_len - sbytes[i]) > zeros_num) { 867 r_zeros_number = 0; 868 r_body = body + (old_len - sbytes[i]) - zeros_num; 869 } else { 870 r_body = body; 871 r_zeros_number = zeros_num - (old_len - sbytes[i]); 872 zeros_num -= r_zeros_number; 873 } 874 875 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number); 876 877 /* Calculate key component and item length to insert into S[i] */ 878 set_le_ih_k_offset(ih, old_key_comp); 879 put_ih_item_len(ih, old_len - sbytes[i]); 880 tb->insert_size[0] -= sbytes[i]; 881 } else { /* whole new item falls into S_new[i] */ 882 883 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ 884 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 885 snum[i] - 1, sbytes[i], S_new[i]); 886 887 /* Insert new item into S_new[i] */ 888 buffer_info_init_bh(tb, &bi, S_new[i]); 889 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1, 890 ih, body, zeros_num); 891 892 zeros_num = tb->insert_size[0] = 0; 893 } 894 } 895 896 else { /* new item or it part don't falls into S_new[i] */ 897 898 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 899 snum[i], sbytes[i], S_new[i]); 900 } 901 break; 902 903 case M_PASTE: /* append item */ 904 905 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ 906 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ 907 struct item_head *aux_ih; 908 909 RFALSE(ih, "PAP-12210: ih must be 0"); 910 911 aux_ih = item_head(tbS0, item_pos); 912 if (is_direntry_le_ih(aux_ih)) { 913 /* we append to directory item */ 914 915 int entry_count; 916 917 entry_count = ih_entry_count(aux_ih); 918 919 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) { 920 /* new directory entry falls into S_new[i] */ 921 922 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0"); 923 RFALSE(sbytes[i] - 1 >= entry_count, 924 "PAP-12220: there are no so much entries (%d), only %d", 925 sbytes[i] - 1, entry_count); 926 927 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ 928 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]); 929 /* Paste given directory entry to directory item */ 930 buffer_info_init_bh(tb, &bi, S_new[i]); 931 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 932 tb->insert_size[0], body, zeros_num); 933 /* paste new directory entry */ 934 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1, 935 (struct reiserfs_de_head *) body, 936 body + DEH_SIZE, tb->insert_size[0]); 937 tb->insert_size[0] = 0; 938 pos_in_item++; 939 } else { /* new directory entry doesn't fall into S_new[i] */ 940 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]); 941 } 942 } else { /* regular object */ 943 944 int n_shift, n_rem, r_zeros_number; 945 const char *r_body; 946 947 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)) || tb->insert_size[0] <= 0, 948 "PAP-12225: item too short or insert_size <= 0"); 949 950 /* Calculate number of bytes which must be shifted from appended item */ 951 n_shift = sbytes[i] - tb->insert_size[0]; 952 if (n_shift < 0) 953 n_shift = 0; 954 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); 955 956 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 957 n_rem = tb->insert_size[0] - sbytes[i]; 958 if (n_rem < 0) 959 n_rem = 0; 960 /* Append part of body into S_new[0] */ 961 buffer_info_init_bh(tb, &bi, S_new[i]); 962 if (n_rem > zeros_num) { 963 r_zeros_number = 0; 964 r_body = body + n_rem - zeros_num; 965 } else { 966 r_body = body; 967 r_zeros_number = zeros_num - n_rem; 968 zeros_num -= r_zeros_number; 969 } 970 971 leaf_paste_in_buffer(&bi, 0, n_shift, 972 tb->insert_size[0] - n_rem, 973 r_body, r_zeros_number); 974 { 975 struct item_head *tmp; 976 977 tmp = item_head(S_new[i], 0); 978 if (is_indirect_le_ih 979 (tmp)) { 980 set_ih_free_space(tmp, 0); 981 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT))); 982 } else { 983 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem); 984 } 985 } 986 987 tb->insert_size[0] = n_rem; 988 if (!n_rem) 989 pos_in_item++; 990 } 991 } else 992 /* item falls wholly into S_new[i] */ 993 { 994 int leaf_mi; 995 struct item_head *pasted; 996 997 #ifdef CONFIG_REISERFS_CHECK 998 struct item_head *ih_check = item_head(tbS0, item_pos); 999 1000 if (!is_direntry_le_ih(ih_check) 1001 && (pos_in_item != ih_item_len(ih_check) 1002 || tb->insert_size[0] <= 0)) 1003 reiserfs_panic(tb->tb_sb, 1004 "PAP-12235", 1005 "pos_in_item " 1006 "must be equal " 1007 "to ih_item_len"); 1008 #endif /* CONFIG_REISERFS_CHECK */ 1009 1010 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, 1011 tb, snum[i], 1012 sbytes[i], 1013 S_new[i]); 1014 1015 RFALSE(leaf_mi, 1016 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 1017 leaf_mi); 1018 1019 /* paste into item */ 1020 buffer_info_init_bh(tb, &bi, S_new[i]); 1021 leaf_paste_in_buffer(&bi, 1022 item_pos - n + snum[i], 1023 pos_in_item, 1024 tb->insert_size[0], 1025 body, zeros_num); 1026 1027 pasted = item_head(S_new[i], item_pos - n + snum[i]); 1028 if (is_direntry_le_ih(pasted)) { 1029 leaf_paste_entries(&bi, 1030 item_pos - n + snum[i], 1031 pos_in_item, 1, 1032 (struct reiserfs_de_head *)body, 1033 body + DEH_SIZE, 1034 tb->insert_size[0] 1035 ); 1036 } 1037 1038 /* if we paste to indirect item update ih_free_space */ 1039 if (is_indirect_le_ih(pasted)) 1040 set_ih_free_space(pasted, 0); 1041 zeros_num = tb->insert_size[0] = 0; 1042 } 1043 } 1044 1045 else { /* pasted item doesn't fall into S_new[i] */ 1046 1047 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1048 snum[i], sbytes[i], S_new[i]); 1049 } 1050 break; 1051 default: /* cases d and t */ 1052 reiserfs_panic(tb->tb_sb, "PAP-12245", 1053 "blknum > 2: unexpected mode: %s(%d)", 1054 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 1055 } 1056 1057 memcpy(insert_key + i, leaf_key(S_new[i], 0), KEY_SIZE); 1058 insert_ptr[i] = S_new[i]; 1059 1060 RFALSE(!buffer_journaled(S_new[i]) 1061 || buffer_journal_dirty(S_new[i]) 1062 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", 1063 i, S_new[i]); 1064 } 1065 1066 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1067 affected item which remains in S */ 1068 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ 1069 1070 switch (flag) { 1071 case M_INSERT: /* insert item into S[0] */ 1072 buffer_info_init_tbS0(tb, &bi); 1073 leaf_insert_into_buf(&bi, item_pos, ih, body, 1074 zeros_num); 1075 1076 /* If we insert the first key change the delimiting key */ 1077 if (item_pos == 0) { 1078 if (tb->CFL[0]) /* can be 0 in reiserfsck */ 1079 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); 1080 } 1081 break; 1082 1083 case M_PASTE:{ /* append item in S[0] */ 1084 struct item_head *pasted; 1085 1086 pasted = item_head(tbS0, item_pos); 1087 /* when directory, may be new entry already pasted */ 1088 if (is_direntry_le_ih(pasted)) { 1089 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) { 1090 1091 RFALSE(!tb->insert_size[0], 1092 "PAP-12260: insert_size is 0 already"); 1093 1094 /* prepare space */ 1095 buffer_info_init_tbS0(tb, &bi); 1096 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, 1097 tb->insert_size[0], body, 1098 zeros_num); 1099 1100 /* paste entry */ 1101 leaf_paste_entries(&bi, item_pos, pos_in_item, 1, 1102 (struct reiserfs_de_head *)body, 1103 body + DEH_SIZE, 1104 tb->insert_size[0]); 1105 if (!item_pos && !pos_in_item) { 1106 RFALSE(!tb->CFL[0] || !tb->L[0], 1107 "PAP-12270: CFL[0]/L[0] must be specified"); 1108 if (tb->CFL[0]) 1109 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); 1110 } 1111 tb->insert_size[0] = 0; 1112 } 1113 } else { /* regular object */ 1114 if (pos_in_item == ih_item_len(pasted)) { 1115 1116 RFALSE(tb->insert_size[0] <= 0, 1117 "PAP-12275: insert size must not be %d", 1118 tb->insert_size[0]); 1119 buffer_info_init_tbS0(tb, &bi); 1120 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, 1121 tb->insert_size[0], body, zeros_num); 1122 1123 if (is_indirect_le_ih(pasted)) { 1124 #if 0 1125 RFALSE(tb-> 1126 insert_size[0] != 1127 UNFM_P_SIZE, 1128 "PAP-12280: insert_size for indirect item must be %d, not %d", 1129 UNFM_P_SIZE, 1130 tb-> 1131 insert_size[0]); 1132 #endif 1133 set_ih_free_space(pasted, 0); 1134 } 1135 tb->insert_size[0] = 0; 1136 } 1137 #ifdef CONFIG_REISERFS_CHECK 1138 else { 1139 if (tb->insert_size[0]) { 1140 print_cur_tb("12285"); 1141 reiserfs_panic(tb->tb_sb, 1142 "PAP-12285", 1143 "insert_size " 1144 "must be 0 " 1145 "(%d)", 1146 tb->insert_size[0]); 1147 } 1148 } 1149 #endif /* CONFIG_REISERFS_CHECK */ 1150 1151 } 1152 } /* case M_PASTE: */ 1153 } 1154 } 1155 #ifdef CONFIG_REISERFS_CHECK 1156 if (flag == M_PASTE && tb->insert_size[0]) { 1157 print_cur_tb("12290"); 1158 reiserfs_panic(tb->tb_sb, 1159 "PAP-12290", "insert_size is still not 0 (%d)", 1160 tb->insert_size[0]); 1161 } 1162 #endif /* CONFIG_REISERFS_CHECK */ 1163 return 0; 1164 } /* Leaf level of the tree is balanced (end of balance_leaf) */ 1165 1166 /* Make empty node */ 1167 void make_empty_node(struct buffer_info *bi) 1168 { 1169 struct block_head *blkh; 1170 1171 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1172 1173 blkh = B_BLK_HEAD(bi->bi_bh); 1174 set_blkh_nr_item(blkh, 0); 1175 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); 1176 1177 if (bi->bi_parent) 1178 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1179 } 1180 1181 /* Get first empty buffer */ 1182 struct buffer_head *get_FEB(struct tree_balance *tb) 1183 { 1184 int i; 1185 struct buffer_info bi; 1186 1187 for (i = 0; i < MAX_FEB_SIZE; i++) 1188 if (tb->FEB[i] != NULL) 1189 break; 1190 1191 if (i == MAX_FEB_SIZE) 1192 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); 1193 1194 buffer_info_init_bh(tb, &bi, tb->FEB[i]); 1195 make_empty_node(&bi); 1196 set_buffer_uptodate(tb->FEB[i]); 1197 tb->used[i] = tb->FEB[i]; 1198 tb->FEB[i] = NULL; 1199 1200 return tb->used[i]; 1201 } 1202 1203 /* This is now used because reiserfs_free_block has to be able to schedule. */ 1204 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) 1205 { 1206 int i; 1207 1208 if (buffer_dirty(bh)) 1209 reiserfs_warning(tb->tb_sb, "reiserfs-12320", 1210 "called with dirty buffer"); 1211 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) 1212 if (!tb->thrown[i]) { 1213 tb->thrown[i] = bh; 1214 get_bh(bh); /* free_thrown puts this */ 1215 return; 1216 } 1217 reiserfs_warning(tb->tb_sb, "reiserfs-12321", 1218 "too many thrown buffers"); 1219 } 1220 1221 static void free_thrown(struct tree_balance *tb) 1222 { 1223 int i; 1224 b_blocknr_t blocknr; 1225 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { 1226 if (tb->thrown[i]) { 1227 blocknr = tb->thrown[i]->b_blocknr; 1228 if (buffer_dirty(tb->thrown[i])) 1229 reiserfs_warning(tb->tb_sb, "reiserfs-12322", 1230 "called with dirty buffer %d", 1231 blocknr); 1232 brelse(tb->thrown[i]); /* incremented in store_thrown */ 1233 reiserfs_free_block(tb->transaction_handle, NULL, 1234 blocknr, 0); 1235 } 1236 } 1237 } 1238 1239 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) 1240 { 1241 struct block_head *blkh; 1242 blkh = B_BLK_HEAD(bh); 1243 set_blkh_level(blkh, FREE_LEVEL); 1244 set_blkh_nr_item(blkh, 0); 1245 1246 clear_buffer_dirty(bh); 1247 store_thrown(tb, bh); 1248 } 1249 1250 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1251 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, 1252 struct buffer_head *src, int n_src) 1253 { 1254 1255 RFALSE(dest == NULL || src == NULL, 1256 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1257 src, dest); 1258 RFALSE(!B_IS_KEYS_LEVEL(dest), 1259 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1260 dest); 1261 RFALSE(n_dest < 0 || n_src < 0, 1262 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1263 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1264 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1265 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1266 1267 if (B_IS_ITEMS_LEVEL(src)) 1268 /* source buffer contains leaf node */ 1269 memcpy(internal_key(dest, n_dest), item_head(src, n_src), 1270 KEY_SIZE); 1271 else 1272 memcpy(internal_key(dest, n_dest), internal_key(src, n_src), 1273 KEY_SIZE); 1274 1275 do_balance_mark_internal_dirty(tb, dest, 0); 1276 } 1277 1278 int get_left_neighbor_position(struct tree_balance *tb, int h) 1279 { 1280 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1281 1282 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, 1283 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1284 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); 1285 1286 if (Sh_position == 0) 1287 return B_NR_ITEMS(tb->FL[h]); 1288 else 1289 return Sh_position - 1; 1290 } 1291 1292 int get_right_neighbor_position(struct tree_balance *tb, int h) 1293 { 1294 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1295 1296 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, 1297 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1298 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); 1299 1300 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) 1301 return 0; 1302 else 1303 return Sh_position + 1; 1304 } 1305 1306 #ifdef CONFIG_REISERFS_CHECK 1307 1308 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); 1309 static void check_internal_node(struct super_block *s, struct buffer_head *bh, 1310 char *mes) 1311 { 1312 struct disk_child *dc; 1313 int i; 1314 1315 RFALSE(!bh, "PAP-12336: bh == 0"); 1316 1317 if (!bh || !B_IS_IN_TREE(bh)) 1318 return; 1319 1320 RFALSE(!buffer_dirty(bh) && 1321 !(buffer_journaled(bh) || buffer_journal_dirty(bh)), 1322 "PAP-12337: buffer (%b) must be dirty", bh); 1323 dc = B_N_CHILD(bh, 0); 1324 1325 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { 1326 if (!is_reusable(s, dc_block_number(dc), 1)) { 1327 print_cur_tb(mes); 1328 reiserfs_panic(s, "PAP-12338", 1329 "invalid child pointer %y in %b", 1330 dc, bh); 1331 } 1332 } 1333 } 1334 1335 static int locked_or_not_in_tree(struct tree_balance *tb, 1336 struct buffer_head *bh, char *which) 1337 { 1338 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || 1339 !B_IS_IN_TREE(bh)) { 1340 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); 1341 return 1; 1342 } 1343 return 0; 1344 } 1345 1346 static int check_before_balancing(struct tree_balance *tb) 1347 { 1348 int retval = 0; 1349 1350 if (REISERFS_SB(tb->tb_sb)->cur_tb) { 1351 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " 1352 "occurred based on cur_tb not being null at " 1353 "this point in code. do_balance cannot properly " 1354 "handle concurrent tree accesses on a same " 1355 "mount point."); 1356 } 1357 1358 /* 1359 * double check that buffers that we will modify are unlocked. 1360 * (fix_nodes should already have prepped all of these for us). 1361 */ 1362 if (tb->lnum[0]) { 1363 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); 1364 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); 1365 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); 1366 check_leaf(tb->L[0]); 1367 } 1368 if (tb->rnum[0]) { 1369 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); 1370 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); 1371 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); 1372 check_leaf(tb->R[0]); 1373 } 1374 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), 1375 "S[0]"); 1376 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1377 1378 return retval; 1379 } 1380 1381 static void check_after_balance_leaf(struct tree_balance *tb) 1382 { 1383 if (tb->lnum[0]) { 1384 if (B_FREE_SPACE(tb->L[0]) != 1385 MAX_CHILD_SIZE(tb->L[0]) - 1386 dc_size(B_N_CHILD 1387 (tb->FL[0], get_left_neighbor_position(tb, 0)))) { 1388 print_cur_tb("12221"); 1389 reiserfs_panic(tb->tb_sb, "PAP-12355", 1390 "shift to left was incorrect"); 1391 } 1392 } 1393 if (tb->rnum[0]) { 1394 if (B_FREE_SPACE(tb->R[0]) != 1395 MAX_CHILD_SIZE(tb->R[0]) - 1396 dc_size(B_N_CHILD 1397 (tb->FR[0], get_right_neighbor_position(tb, 0)))) { 1398 print_cur_tb("12222"); 1399 reiserfs_panic(tb->tb_sb, "PAP-12360", 1400 "shift to right was incorrect"); 1401 } 1402 } 1403 if (PATH_H_PBUFFER(tb->tb_path, 1) && 1404 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != 1405 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1406 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1407 PATH_H_POSITION(tb->tb_path, 1)))))) { 1408 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); 1409 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1410 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1411 PATH_H_POSITION(tb->tb_path, 1412 1)))); 1413 print_cur_tb("12223"); 1414 reiserfs_warning(tb->tb_sb, "reiserfs-12363", 1415 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " 1416 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", 1417 left, 1418 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), 1419 PATH_H_PBUFFER(tb->tb_path, 1), 1420 PATH_H_POSITION(tb->tb_path, 1), 1421 dc_size(B_N_CHILD 1422 (PATH_H_PBUFFER(tb->tb_path, 1), 1423 PATH_H_POSITION(tb->tb_path, 1))), 1424 right); 1425 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); 1426 } 1427 } 1428 1429 static void check_leaf_level(struct tree_balance *tb) 1430 { 1431 check_leaf(tb->L[0]); 1432 check_leaf(tb->R[0]); 1433 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1434 } 1435 1436 static void check_internal_levels(struct tree_balance *tb) 1437 { 1438 int h; 1439 1440 /* check all internal nodes */ 1441 for (h = 1; tb->insert_size[h]; h++) { 1442 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), 1443 "BAD BUFFER ON PATH"); 1444 if (tb->lnum[h]) 1445 check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); 1446 if (tb->rnum[h]) 1447 check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); 1448 } 1449 1450 } 1451 1452 #endif 1453 1454 /* 1455 * Now we have all of the buffers that must be used in balancing of 1456 * the tree. We rely on the assumption that schedule() will not occur 1457 * while do_balance works. ( Only interrupt handlers are acceptable.) 1458 * We balance the tree according to the analysis made before this, 1459 * using buffers already obtained. For SMP support it will someday be 1460 * necessary to add ordered locking of tb. 1461 */ 1462 1463 /* 1464 * Some interesting rules of balancing: 1465 * we delete a maximum of two nodes per level per balancing: we never 1466 * delete R, when we delete two of three nodes L, S, R then we move 1467 * them into R. 1468 * 1469 * we only delete L if we are deleting two nodes, if we delete only 1470 * one node we delete S 1471 * 1472 * if we shift leaves then we shift as much as we can: this is a 1473 * deliberate policy of extremism in node packing which results in 1474 * higher average utilization after repeated random balance operations 1475 * at the cost of more memory copies and more balancing as a result of 1476 * small insertions to full nodes. 1477 * 1478 * if we shift internal nodes we try to evenly balance the node 1479 * utilization, with consequent less balancing at the cost of lower 1480 * utilization. 1481 * 1482 * one could argue that the policy for directories in leaves should be 1483 * that of internal nodes, but we will wait until another day to 1484 * evaluate this.... It would be nice to someday measure and prove 1485 * these assumptions as to what is optimal.... 1486 */ 1487 1488 static inline void do_balance_starts(struct tree_balance *tb) 1489 { 1490 /* use print_cur_tb() to see initial state of struct tree_balance */ 1491 1492 /* store_print_tb (tb); */ 1493 1494 /* do not delete, just comment it out */ 1495 /* 1496 print_tb(flag, PATH_LAST_POSITION(tb->tb_path), 1497 tb->tb_path->pos_in_item, tb, "check"); 1498 */ 1499 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1500 #ifdef CONFIG_REISERFS_CHECK 1501 REISERFS_SB(tb->tb_sb)->cur_tb = tb; 1502 #endif 1503 } 1504 1505 static inline void do_balance_completed(struct tree_balance *tb) 1506 { 1507 1508 #ifdef CONFIG_REISERFS_CHECK 1509 check_leaf_level(tb); 1510 check_internal_levels(tb); 1511 REISERFS_SB(tb->tb_sb)->cur_tb = NULL; 1512 #endif 1513 1514 /* 1515 * reiserfs_free_block is no longer schedule safe. So, we need to 1516 * put the buffers we want freed on the thrown list during do_balance, 1517 * and then free them now 1518 */ 1519 1520 REISERFS_SB(tb->tb_sb)->s_do_balance++; 1521 1522 /* release all nodes hold to perform the balancing */ 1523 unfix_nodes(tb); 1524 1525 free_thrown(tb); 1526 } 1527 1528 /* 1529 * do_balance - balance the tree 1530 * 1531 * @tb: tree_balance structure 1532 * @ih: item header of inserted item 1533 * @body: body of inserted item or bytes to paste 1534 * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste 1535 * 1536 * Cut means delete part of an item (includes removing an entry from a 1537 * directory). 1538 * 1539 * Delete means delete whole item. 1540 * 1541 * Insert means add a new item into the tree. 1542 * 1543 * Paste means to append to the end of an existing file or to 1544 * insert a directory entry. 1545 */ 1546 void do_balance(struct tree_balance *tb, struct item_head *ih, 1547 const char *body, int flag) 1548 { 1549 int child_pos; /* position of a child node in its parent */ 1550 int h; /* level of the tree being processed */ 1551 1552 /* 1553 * in our processing of one level we sometimes determine what 1554 * must be inserted into the next higher level. This insertion 1555 * consists of a key or two keys and their corresponding 1556 * pointers 1557 */ 1558 struct item_head insert_key[2]; 1559 1560 /* inserted node-ptrs for the next level */ 1561 struct buffer_head *insert_ptr[2]; 1562 1563 tb->tb_mode = flag; 1564 tb->need_balance_dirty = 0; 1565 1566 if (FILESYSTEM_CHANGED_TB(tb)) { 1567 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " 1568 "changed"); 1569 } 1570 /* if we have no real work to do */ 1571 if (!tb->insert_size[0]) { 1572 reiserfs_warning(tb->tb_sb, "PAP-12350", 1573 "insert_size == 0, mode == %c", flag); 1574 unfix_nodes(tb); 1575 return; 1576 } 1577 1578 atomic_inc(&fs_generation(tb->tb_sb)); 1579 do_balance_starts(tb); 1580 1581 /* 1582 * balance_leaf returns 0 except if combining L R and S into 1583 * one node. see balance_internal() for explanation of this 1584 * line of code. 1585 */ 1586 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + 1587 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); 1588 1589 #ifdef CONFIG_REISERFS_CHECK 1590 check_after_balance_leaf(tb); 1591 #endif 1592 1593 /* Balance internal level of the tree. */ 1594 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) 1595 child_pos = 1596 balance_internal(tb, h, child_pos, insert_key, insert_ptr); 1597 1598 do_balance_completed(tb); 1599 1600 } 1601