xref: /linux/fs/reiserfs/do_balan.c (revision a228bf8f)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /*
6  * Now we have all buffers that must be used in balancing of the tree
7  * Further calculations can not cause schedule(), and thus the buffer
8  * tree will be stable until the balancing will be finished
9  * balance the tree according to the analysis made before,
10  * and using buffers obtained after all above.
11  */
12 
13 #include <asm/uaccess.h>
14 #include <linux/time.h>
15 #include "reiserfs.h"
16 #include <linux/buffer_head.h>
17 #include <linux/kernel.h>
18 
19 static inline void buffer_info_init_left(struct tree_balance *tb,
20                                          struct buffer_info *bi)
21 {
22 	bi->tb          = tb;
23 	bi->bi_bh       = tb->L[0];
24 	bi->bi_parent   = tb->FL[0];
25 	bi->bi_position = get_left_neighbor_position(tb, 0);
26 }
27 
28 static inline void buffer_info_init_right(struct tree_balance *tb,
29                                           struct buffer_info *bi)
30 {
31 	bi->tb          = tb;
32 	bi->bi_bh       = tb->R[0];
33 	bi->bi_parent   = tb->FR[0];
34 	bi->bi_position = get_right_neighbor_position(tb, 0);
35 }
36 
37 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
38                                          struct buffer_info *bi)
39 {
40 	bi->tb          = tb;
41 	bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
42 	bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
43 	bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
44 }
45 
46 static inline void buffer_info_init_bh(struct tree_balance *tb,
47                                        struct buffer_info *bi,
48                                        struct buffer_head *bh)
49 {
50 	bi->tb          = tb;
51 	bi->bi_bh       = bh;
52 	bi->bi_parent   = NULL;
53 	bi->bi_position = 0;
54 }
55 
56 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
57 				       struct buffer_head *bh, int flag)
58 {
59 	journal_mark_dirty(tb->transaction_handle, bh);
60 }
61 
62 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
63 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
64 
65 /*
66  * summary:
67  *  if deleting something ( tb->insert_size[0] < 0 )
68  *    return(balance_leaf_when_delete()); (flag d handled here)
69  *  else
70  *    if lnum is larger than 0 we put items into the left node
71  *    if rnum is larger than 0 we put items into the right node
72  *    if snum1 is larger than 0 we put items into the new node s1
73  *    if snum2 is larger than 0 we put items into the new node s2
74  * Note that all *num* count new items being created.
75  *
76  * It would be easier to read balance_leaf() if each of these summary
77  * lines was a separate procedure rather than being inlined.  I think
78  * that there are many passages here and in balance_leaf_when_delete() in
79  * which two calls to one procedure can replace two passages, and it
80  * might save cache space and improve software maintenance costs to do so.
81  *
82  * Vladimir made the perceptive comment that we should offload most of
83  * the decision making in this function into fix_nodes/check_balance, and
84  * then create some sort of structure in tb that says what actions should
85  * be performed by do_balance.
86  *
87  * -Hans
88  */
89 
90 /*
91  * Balance leaf node in case of delete or cut: insert_size[0] < 0
92  *
93  * lnum, rnum can have values >= -1
94  *	-1 means that the neighbor must be joined with S
95  *	 0 means that nothing should be done with the neighbor
96  *	>0 means to shift entirely or partly the specified number of items
97  *         to the neighbor
98  */
99 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
100 {
101 	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
102 	int item_pos = PATH_LAST_POSITION(tb->tb_path);
103 	int pos_in_item = tb->tb_path->pos_in_item;
104 	struct buffer_info bi;
105 	int n;
106 	struct item_head *ih;
107 
108 	RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
109 	       "vs- 12000: level: wrong FR %z", tb->FR[0]);
110 	RFALSE(tb->blknum[0] > 1,
111 	       "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
112 	RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
113 	       "PAP-12010: tree can not be empty");
114 
115 	ih = item_head(tbS0, item_pos);
116 	buffer_info_init_tbS0(tb, &bi);
117 
118 	/* Delete or truncate the item */
119 
120 	switch (flag) {
121 	case M_DELETE:		/* delete item in S[0] */
122 
123 		RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
124 		       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
125 		       -tb->insert_size[0], ih);
126 
127 		leaf_delete_items(&bi, 0, item_pos, 1, -1);
128 
129 		if (!item_pos && tb->CFL[0]) {
130 			if (B_NR_ITEMS(tbS0)) {
131 				replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
132 					    0);
133 			} else {
134 				if (!PATH_H_POSITION(tb->tb_path, 1))
135 					replace_key(tb, tb->CFL[0], tb->lkey[0],
136 						    PATH_H_PPARENT(tb->tb_path,
137 								   0), 0);
138 			}
139 		}
140 
141 		RFALSE(!item_pos && !tb->CFL[0],
142 		       "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
143 		       tb->L[0]);
144 
145 		break;
146 
147 	case M_CUT:{		/* cut item in S[0] */
148 			if (is_direntry_le_ih(ih)) {
149 
150 				/*
151 				 * UFS unlink semantics are such that you
152 				 * can only delete one directory entry at
153 				 * a time.
154 				 */
155 
156 				/*
157 				 * when we cut a directory tb->insert_size[0]
158 				 * means number of entries to be cut (always 1)
159 				 */
160 				tb->insert_size[0] = -1;
161 				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
162 						     -tb->insert_size[0]);
163 
164 				RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
165 				       "PAP-12030: can not change delimiting key. CFL[0]=%p",
166 				       tb->CFL[0]);
167 
168 				if (!item_pos && !pos_in_item && tb->CFL[0]) {
169 					replace_key(tb, tb->CFL[0], tb->lkey[0],
170 						    tbS0, 0);
171 				}
172 			} else {
173 				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
174 						     -tb->insert_size[0]);
175 
176 				RFALSE(!ih_item_len(ih),
177 				       "PAP-12035: cut must leave non-zero dynamic length of item");
178 			}
179 			break;
180 		}
181 
182 	default:
183 		print_cur_tb("12040");
184 		reiserfs_panic(tb->tb_sb, "PAP-12040",
185 			       "unexpected mode: %s(%d)",
186 			       (flag ==
187 				M_PASTE) ? "PASTE" : ((flag ==
188 						       M_INSERT) ? "INSERT" :
189 						      "UNKNOWN"), flag);
190 	}
191 
192 	/*
193 	 * the rule is that no shifting occurs unless by shifting
194 	 * a node can be freed
195 	 */
196 	n = B_NR_ITEMS(tbS0);
197 	/* L[0] takes part in balancing */
198 	if (tb->lnum[0]) {
199 		/* L[0] must be joined with S[0] */
200 		if (tb->lnum[0] == -1) {
201 			/* R[0] must be also joined with S[0] */
202 			if (tb->rnum[0] == -1) {
203 				if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
204 					/*
205 					 * all contents of all the 3 buffers
206 					 * will be in L[0]
207 					 */
208 					if (PATH_H_POSITION(tb->tb_path, 1) == 0
209 					    && 1 < B_NR_ITEMS(tb->FR[0]))
210 						replace_key(tb, tb->CFL[0],
211 							    tb->lkey[0],
212 							    tb->FR[0], 1);
213 
214 					leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
215 							-1, NULL);
216 					leaf_move_items(LEAF_FROM_R_TO_L, tb,
217 							B_NR_ITEMS(tb->R[0]),
218 							-1, NULL);
219 
220 					reiserfs_invalidate_buffer(tb, tbS0);
221 					reiserfs_invalidate_buffer(tb,
222 								   tb->R[0]);
223 
224 					return 0;
225 				}
226 				/*
227 				 * all contents of all the 3 buffers will
228 				 * be in R[0]
229 				 */
230 				leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
231 						NULL);
232 				leaf_move_items(LEAF_FROM_L_TO_R, tb,
233 						B_NR_ITEMS(tb->L[0]), -1, NULL);
234 
235 				/* right_delimiting_key is correct in R[0] */
236 				replace_key(tb, tb->CFR[0], tb->rkey[0],
237 					    tb->R[0], 0);
238 
239 				reiserfs_invalidate_buffer(tb, tbS0);
240 				reiserfs_invalidate_buffer(tb, tb->L[0]);
241 
242 				return -1;
243 			}
244 
245 			RFALSE(tb->rnum[0] != 0,
246 			       "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
247 			/* all contents of L[0] and S[0] will be in L[0] */
248 			leaf_shift_left(tb, n, -1);
249 
250 			reiserfs_invalidate_buffer(tb, tbS0);
251 
252 			return 0;
253 		}
254 
255 		/*
256 		 * a part of contents of S[0] will be in L[0] and the
257 		 * rest part of S[0] will be in R[0]
258 		 */
259 
260 		RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
261 		       (tb->lnum[0] + tb->rnum[0] > n + 1),
262 		       "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
263 		       tb->rnum[0], tb->lnum[0], n);
264 		RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
265 		       (tb->lbytes != -1 || tb->rbytes != -1),
266 		       "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
267 		       tb->rbytes, tb->lbytes);
268 		RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
269 		       (tb->lbytes < 1 || tb->rbytes != -1),
270 		       "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
271 		       tb->rbytes, tb->lbytes);
272 
273 		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
274 		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
275 
276 		reiserfs_invalidate_buffer(tb, tbS0);
277 
278 		return 0;
279 	}
280 
281 	if (tb->rnum[0] == -1) {
282 		/* all contents of R[0] and S[0] will be in R[0] */
283 		leaf_shift_right(tb, n, -1);
284 		reiserfs_invalidate_buffer(tb, tbS0);
285 		return 0;
286 	}
287 
288 	RFALSE(tb->rnum[0],
289 	       "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
290 	return 0;
291 }
292 
293 static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item header of inserted item (this is on little endian) */
294 			const char *body,	/* body  of inserted item or bytes to paste */
295 			int flag,	/* i - insert, d - delete, c - cut, p - paste
296 					   (see comment to do_balance) */
297 			struct item_head *insert_key,	/* in our processing of one level we sometimes determine what
298 							   must be inserted into the next higher level.  This insertion
299 							   consists of a key or two keys and their corresponding
300 							   pointers */
301 			struct buffer_head **insert_ptr	/* inserted node-ptrs for the next level */
302     )
303 {
304 	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
305 	int item_pos = PATH_LAST_POSITION(tb->tb_path);	/*  index into the array of item headers in S[0]
306 							   of the affected item */
307 	struct buffer_info bi;
308 	struct buffer_head *S_new[2];	/* new nodes allocated to hold what could not fit into S */
309 	int snum[2];		/* number of items that will be placed
310 				   into S_new (includes partially shifted
311 				   items) */
312 	int sbytes[2];		/* if an item is partially shifted into S_new then
313 				   if it is a directory item
314 				   it is the number of entries from the item that are shifted into S_new
315 				   else
316 				   it is the number of bytes from the item that are shifted into S_new
317 				 */
318 	int n, i;
319 	int ret_val;
320 	int pos_in_item;
321 	int zeros_num;
322 
323 	PROC_INFO_INC(tb->tb_sb, balance_at[0]);
324 
325 	/* Make balance in case insert_size[0] < 0 */
326 	if (tb->insert_size[0] < 0)
327 		return balance_leaf_when_delete(tb, flag);
328 
329 	zeros_num = 0;
330 	if (flag == M_INSERT && !body)
331 		zeros_num = ih_item_len(ih);
332 
333 	pos_in_item = tb->tb_path->pos_in_item;
334 	/* for indirect item pos_in_item is measured in unformatted node
335 	   pointers. Recalculate to bytes */
336 	if (flag != M_INSERT
337 	    && is_indirect_le_ih(item_head(tbS0, item_pos)))
338 		pos_in_item *= UNFM_P_SIZE;
339 
340 	if (tb->lnum[0] > 0) {
341 		/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
342 		if (item_pos < tb->lnum[0]) {
343 			/* new item or it part falls to L[0], shift it too */
344 			n = B_NR_ITEMS(tb->L[0]);
345 
346 			switch (flag) {
347 			case M_INSERT:	/* insert item into L[0] */
348 
349 				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
350 					/* part of new item falls into L[0] */
351 					int new_item_len;
352 					int version;
353 
354 					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
355 
356 					/* Calculate item length to insert to S[0] */
357 					new_item_len = ih_item_len(ih) - tb->lbytes;
358 					/* Calculate and check item length to insert to L[0] */
359 					put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
360 
361 					RFALSE(ih_item_len(ih) <= 0,
362 					       "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
363 					       ih_item_len(ih));
364 
365 					/* Insert new item into L[0] */
366 					buffer_info_init_left(tb, &bi);
367 					leaf_insert_into_buf(&bi,
368 							n + item_pos - ret_val, ih, body,
369 							zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
370 
371 					version = ih_version(ih);
372 
373 					/* Calculate key component, item length and body to insert into S[0] */
374 					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
375 							(tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
376 
377 					put_ih_item_len(ih, new_item_len);
378 					if (tb->lbytes > zeros_num) {
379 						body += (tb->lbytes - zeros_num);
380 						zeros_num = 0;
381 					} else
382 						zeros_num -= tb->lbytes;
383 
384 					RFALSE(ih_item_len(ih) <= 0,
385 					       "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
386 					       ih_item_len(ih));
387 				} else {
388 					/* new item in whole falls into L[0] */
389 					/* Shift lnum[0]-1 items to L[0] */
390 					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
391 					/* Insert new item into L[0] */
392 					buffer_info_init_left(tb, &bi);
393 					leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
394 					tb->insert_size[0] = 0;
395 					zeros_num = 0;
396 				}
397 				break;
398 
399 			case M_PASTE:	/* append item in L[0] */
400 
401 				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
402 					/* we must shift the part of the appended item */
403 					if (is_direntry_le_ih(item_head(tbS0, item_pos))) {
404 
405 						RFALSE(zeros_num,
406 						       "PAP-12090: invalid parameter in case of a directory");
407 						/* directory item */
408 						if (tb->lbytes > pos_in_item) {
409 							/* new directory entry falls into L[0] */
410 							struct item_head *pasted;
411 							int l_pos_in_item = pos_in_item;
412 
413 							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
414 							ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
415 							if (ret_val && !item_pos) {
416 								pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
417 								l_pos_in_item += ih_entry_count(pasted) - (tb->lbytes -1);
418 							}
419 
420 							/* Append given directory entry to directory item */
421 							buffer_info_init_left(tb, &bi);
422 							leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
423 
424 							/* previous string prepared space for pasting new entry, following string pastes this entry */
425 
426 							/* when we have merge directory item, pos_in_item has been changed too */
427 
428 							/* paste new directory entry. 1 is entry number */
429 							leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
430 									   1, (struct reiserfs_de_head *) body,
431 									   body + DEH_SIZE, tb->insert_size[0]);
432 							tb->insert_size[0] = 0;
433 						} else {
434 							/* new directory item doesn't fall into L[0] */
435 							/* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
436 							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
437 						}
438 						/* Calculate new position to append in item body */
439 						pos_in_item -= tb->lbytes;
440 					} else {
441 						/* regular object */
442 						RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
443 						RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)),
444 						       "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
445 						       ih_item_len(item_head(tbS0, item_pos)),pos_in_item);
446 
447 						if (tb->lbytes >= pos_in_item) {
448 							/* appended item will be in L[0] in whole */
449 							int l_n;
450 
451 							/* this bytes number must be appended to the last item of L[h] */
452 							l_n = tb->lbytes - pos_in_item;
453 
454 							/* Calculate new insert_size[0] */
455 							tb->insert_size[0] -= l_n;
456 
457 							RFALSE(tb->insert_size[0] <= 0,
458 							       "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
459 							       tb->insert_size[0]);
460 							ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
461 									    (item_head(tbS0, item_pos)));
462 							/* Append to body of item in L[0] */
463 							buffer_info_init_left(tb, &bi);
464 							leaf_paste_in_buffer
465 							    (&bi, n + item_pos - ret_val, ih_item_len
466 							     (item_head(tb->L[0], n + item_pos - ret_val)),
467 							     l_n, body,
468 							     zeros_num > l_n ? l_n : zeros_num);
469 							/* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
470 							{
471 								int version;
472 								int temp_l = l_n;
473 
474 								RFALSE(ih_item_len(item_head(tbS0, 0)),
475 								     "PAP-12106: item length must be 0");
476 								RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key
477 								      (tb->L[0], n + item_pos - ret_val)),
478 								     "PAP-12107: items must be of the same file");
479 								if (is_indirect_le_ih(item_head(tb->L[0], n + item_pos - ret_val))) {
480 									temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
481 								}
482 								/* update key of first item in S0 */
483 								version = ih_version(item_head(tbS0, 0));
484 								set_le_key_k_offset(version, leaf_key(tbS0, 0),
485 								     le_key_k_offset(version,leaf_key(tbS0, 0)) + temp_l);
486 								/* update left delimiting key */
487 								set_le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0]),
488 								     le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0])) + temp_l);
489 							}
490 
491 							/* Calculate new body, position in item and insert_size[0] */
492 							if (l_n > zeros_num) {
493 								body += (l_n - zeros_num);
494 								zeros_num = 0;
495 							} else
496 								zeros_num -= l_n;
497 							pos_in_item = 0;
498 
499 							RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
500 							     || !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)
501 							     || !op_is_left_mergeable(internal_key(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
502 							     "PAP-12120: item must be merge-able with left neighboring item");
503 						} else {	/* only part of the appended item will be in L[0] */
504 
505 							/* Calculate position in item for append in S[0] */
506 							pos_in_item -= tb->lbytes;
507 
508 							RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
509 
510 							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
511 							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
512 						}
513 					}
514 				} else {	/* appended item will be in L[0] in whole */
515 
516 					struct item_head *pasted;
517 
518 					if (!item_pos && op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) {	/* if we paste into first item of S[0] and it is left mergable */
519 						/* then increment pos_in_item by the size of the last item in L[0] */
520 						pasted = item_head(tb->L[0], n - 1);
521 						if (is_direntry_le_ih(pasted))
522 							pos_in_item += ih_entry_count(pasted);
523 						else
524 							pos_in_item += ih_item_len(pasted);
525 					}
526 
527 					/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
528 					ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
529 					/* Append to body of item in L[0] */
530 					buffer_info_init_left(tb, &bi);
531 					leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
532 							     pos_in_item,
533 							     tb->insert_size[0],
534 							     body, zeros_num);
535 
536 					/* if appended item is directory, paste entry */
537 					pasted = item_head(tb->L[0], n + item_pos - ret_val);
538 					if (is_direntry_le_ih(pasted))
539 						leaf_paste_entries(&bi, n + item_pos - ret_val,
540 								   pos_in_item, 1,
541 								   (struct reiserfs_de_head *) body,
542 								   body + DEH_SIZE,
543 								   tb->insert_size[0]);
544 					/* if appended item is indirect item, put unformatted node into un list */
545 					if (is_indirect_le_ih(pasted))
546 						set_ih_free_space(pasted, 0);
547 					tb->insert_size[0] = 0;
548 					zeros_num = 0;
549 				}
550 				break;
551 			default:	/* cases d and t */
552 				reiserfs_panic(tb->tb_sb, "PAP-12130",
553 					       "lnum > 0: unexpected mode: "
554 					       " %s(%d)",
555 					       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
556 			}
557 		} else {
558 			/* new item doesn't fall into L[0] */
559 			leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
560 		}
561 	}
562 
563 	/* tb->lnum[0] > 0 */
564 	/* Calculate new item position */
565 	item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
566 
567 	if (tb->rnum[0] > 0) {
568 		/* shift rnum[0] items from S[0] to the right neighbor R[0] */
569 		n = B_NR_ITEMS(tbS0);
570 		switch (flag) {
571 
572 		case M_INSERT:	/* insert item */
573 			if (n - tb->rnum[0] < item_pos) {	/* new item or its part falls to R[0] */
574 				if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {	/* part of new item falls into R[0] */
575 					loff_t old_key_comp, old_len, r_zeros_number;
576 					const char *r_body;
577 					int version;
578 					loff_t offset;
579 
580 					leaf_shift_right(tb, tb->rnum[0] - 1, -1);
581 
582 					version = ih_version(ih);
583 					/* Remember key component and item length */
584 					old_key_comp = le_ih_k_offset(ih);
585 					old_len = ih_item_len(ih);
586 
587 					/* Calculate key component and item length to insert into R[0] */
588 					offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
589 					set_le_ih_k_offset(ih, offset);
590 					put_ih_item_len(ih, tb->rbytes);
591 					/* Insert part of the item into R[0] */
592 					buffer_info_init_right(tb, &bi);
593 					if ((old_len - tb->rbytes) > zeros_num) {
594 						r_zeros_number = 0;
595 						r_body = body + (old_len - tb->rbytes) - zeros_num;
596 					} else {
597 						r_body = body;
598 						r_zeros_number = zeros_num - (old_len - tb->rbytes);
599 						zeros_num -= r_zeros_number;
600 					}
601 
602 					leaf_insert_into_buf(&bi, 0, ih, r_body,
603 							     r_zeros_number);
604 
605 					/* Replace right delimiting key by first key in R[0] */
606 					replace_key(tb, tb->CFR[0], tb->rkey[0],
607 						    tb->R[0], 0);
608 
609 					/* Calculate key component and item length to insert into S[0] */
610 					set_le_ih_k_offset(ih, old_key_comp);
611 					put_ih_item_len(ih, old_len - tb->rbytes);
612 
613 					tb->insert_size[0] -= tb->rbytes;
614 
615 				} else {	/* whole new item falls into R[0] */
616 
617 					/* Shift rnum[0]-1 items to R[0] */
618 					ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
619 					/* Insert new item into R[0] */
620 					buffer_info_init_right(tb, &bi);
621 					leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
622 							     ih, body, zeros_num);
623 
624 					if (item_pos - n + tb->rnum[0] - 1 == 0) {
625 						replace_key(tb, tb->CFR[0],
626 							    tb->rkey[0],
627 							    tb->R[0], 0);
628 
629 					}
630 					zeros_num = tb->insert_size[0] = 0;
631 				}
632 			} else {	/* new item or part of it doesn't fall into R[0] */
633 
634 				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
635 			}
636 			break;
637 
638 		case M_PASTE:	/* append item */
639 
640 			if (n - tb->rnum[0] <= item_pos) {	/* pasted item or part of it falls to R[0] */
641 				if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) {	/* we must shift the part of the appended item */
642 					if (is_direntry_le_ih(item_head(tbS0, item_pos))) {	/* we append to directory item */
643 						int entry_count;
644 
645 						RFALSE(zeros_num,
646 						       "PAP-12145: invalid parameter in case of a directory");
647 						entry_count = ih_entry_count(item_head
648 								  (tbS0, item_pos));
649 						if (entry_count - tb->rbytes <
650 						    pos_in_item)
651 							/* new directory entry falls into R[0] */
652 						{
653 							int paste_entry_position;
654 
655 							RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
656 							       "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
657 							       tb->rbytes, entry_count);
658 							/* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
659 							leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
660 							/* Paste given directory entry to directory item */
661 							paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
662 							buffer_info_init_right(tb, &bi);
663 							leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
664 							/* paste entry */
665 							leaf_paste_entries(&bi, 0, paste_entry_position, 1,
666 									   (struct reiserfs_de_head *) body,
667 									   body + DEH_SIZE, tb->insert_size[0]);
668 
669 							if (paste_entry_position == 0) {
670 								/* change delimiting keys */
671 								replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
672 							}
673 
674 							tb->insert_size[0] = 0;
675 							pos_in_item++;
676 						} else {	/* new directory entry doesn't fall into R[0] */
677 
678 							leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
679 						}
680 					} else {	/* regular object */
681 
682 						int n_shift, n_rem, r_zeros_number;
683 						const char *r_body;
684 
685 						/* Calculate number of bytes which must be shifted from appended item */
686 						if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
687 							n_shift = 0;
688 
689 						RFALSE(pos_in_item != ih_item_len
690 						       (item_head(tbS0, item_pos)),
691 						       "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
692 						       pos_in_item, ih_item_len
693 						       (item_head(tbS0, item_pos)));
694 
695 						leaf_shift_right(tb, tb->rnum[0], n_shift);
696 						/* Calculate number of bytes which must remain in body after appending to R[0] */
697 						if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
698 							n_rem = 0;
699 
700 						{
701 							int version;
702 							unsigned long temp_rem = n_rem;
703 
704 							version = ih_version(item_head(tb->R[0], 0));
705 							if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) {
706 								temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
707 							}
708 							set_le_key_k_offset(version, leaf_key(tb->R[0], 0),
709 							     le_key_k_offset(version, leaf_key(tb->R[0], 0)) + temp_rem);
710 							set_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]),
711 							     le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0])) + temp_rem);
712 						}
713 /*		  k_offset (leaf_key(tb->R[0],0)) += n_rem;
714 		  k_offset (internal_key(tb->CFR[0],tb->rkey[0])) += n_rem;*/
715 						do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
716 
717 						/* Append part of body into R[0] */
718 						buffer_info_init_right(tb, &bi);
719 						if (n_rem > zeros_num) {
720 							r_zeros_number = 0;
721 							r_body = body + n_rem - zeros_num;
722 						} else {
723 							r_body = body;
724 							r_zeros_number = zeros_num - n_rem;
725 							zeros_num -= r_zeros_number;
726 						}
727 
728 						leaf_paste_in_buffer(&bi, 0, n_shift,
729 								     tb->insert_size[0] - n_rem,
730 								     r_body, r_zeros_number);
731 
732 						if (is_indirect_le_ih(item_head(tb->R[0], 0))) {
733 #if 0
734 							RFALSE(n_rem,
735 							       "PAP-12160: paste more than one unformatted node pointer");
736 #endif
737 							set_ih_free_space(item_head(tb->R[0], 0), 0);
738 						}
739 						tb->insert_size[0] = n_rem;
740 						if (!n_rem)
741 							pos_in_item++;
742 					}
743 				} else {	/* pasted item in whole falls into R[0] */
744 
745 					struct item_head *pasted;
746 
747 					ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
748 					/* append item in R[0] */
749 					if (pos_in_item >= 0) {
750 						buffer_info_init_right(tb, &bi);
751 						leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
752 								     tb->insert_size[0], body, zeros_num);
753 					}
754 
755 					/* paste new entry, if item is directory item */
756 					pasted = item_head(tb->R[0], item_pos - n + tb->rnum[0]);
757 					if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
758 						leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
759 								   pos_in_item, 1,
760 								   (struct reiserfs_de_head *) body,
761 								   body + DEH_SIZE, tb->insert_size[0]);
762 						if (!pos_in_item) {
763 
764 							RFALSE(item_pos - n + tb->rnum[0],
765 							       "PAP-12165: directory item must be first item of node when pasting is in 0th position");
766 
767 							/* update delimiting keys */
768 							replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
769 						}
770 					}
771 
772 					if (is_indirect_le_ih(pasted))
773 						set_ih_free_space(pasted, 0);
774 					zeros_num = tb->insert_size[0] = 0;
775 				}
776 			} else {	/* new item doesn't fall into R[0] */
777 
778 				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
779 			}
780 			break;
781 		default:	/* cases d and t */
782 			reiserfs_panic(tb->tb_sb, "PAP-12175",
783 				       "rnum > 0: unexpected mode: %s(%d)",
784 				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
785 		}
786 
787 	}
788 
789 	/* tb->rnum[0] > 0 */
790 	RFALSE(tb->blknum[0] > 3,
791 	       "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
792 	RFALSE(tb->blknum[0] < 0,
793 	       "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
794 
795 	/* if while adding to a node we discover that it is possible to split
796 	   it in two, and merge the left part into the left neighbor and the
797 	   right part into the right neighbor, eliminating the node */
798 	if (tb->blknum[0] == 0) {	/* node S[0] is empty now */
799 
800 		RFALSE(!tb->lnum[0] || !tb->rnum[0],
801 		       "PAP-12190: lnum and rnum must not be zero");
802 		/* if insertion was done before 0-th position in R[0], right
803 		   delimiting key of the tb->L[0]'s and left delimiting key are
804 		   not set correctly */
805 		if (tb->CFL[0]) {
806 			if (!tb->CFR[0])
807 				reiserfs_panic(tb->tb_sb, "vs-12195",
808 					       "CFR not initialized");
809 			copy_key(internal_key(tb->CFL[0], tb->lkey[0]),
810 				 internal_key(tb->CFR[0], tb->rkey[0]));
811 			do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
812 		}
813 
814 		reiserfs_invalidate_buffer(tb, tbS0);
815 		return 0;
816 	}
817 
818 	/* Fill new nodes that appear in place of S[0] */
819 
820 	/* I am told that this copying is because we need an array to enable
821 	   the looping code. -Hans */
822 	snum[0] = tb->s1num, snum[1] = tb->s2num;
823 	sbytes[0] = tb->s1bytes;
824 	sbytes[1] = tb->s2bytes;
825 	for (i = tb->blknum[0] - 2; i >= 0; i--) {
826 
827 		RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
828 		       snum[i]);
829 
830 		/* here we shift from S to S_new nodes */
831 
832 		S_new[i] = get_FEB(tb);
833 
834 		/* initialized block type and tree level */
835 		set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
836 
837 		n = B_NR_ITEMS(tbS0);
838 
839 		switch (flag) {
840 		case M_INSERT:	/* insert item */
841 
842 			if (n - snum[i] < item_pos) {	/* new item or it's part falls to first new node S_new[i] */
843 				if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) {	/* part of new item falls into S_new[i] */
844 					int old_key_comp, old_len, r_zeros_number;
845 					const char *r_body;
846 					int version;
847 
848 					/* Move snum[i]-1 items from S[0] to S_new[i] */
849 					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
850 							snum[i] - 1, -1,
851 							S_new[i]);
852 					/* Remember key component and item length */
853 					version = ih_version(ih);
854 					old_key_comp = le_ih_k_offset(ih);
855 					old_len = ih_item_len(ih);
856 
857 					/* Calculate key component and item length to insert into S_new[i] */
858 					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
859 							   ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
860 
861 					put_ih_item_len(ih, sbytes[i]);
862 
863 					/* Insert part of the item into S_new[i] before 0-th item */
864 					buffer_info_init_bh(tb, &bi, S_new[i]);
865 
866 					if ((old_len - sbytes[i]) > zeros_num) {
867 						r_zeros_number = 0;
868 						r_body = body + (old_len - sbytes[i]) - zeros_num;
869 					} else {
870 						r_body = body;
871 						r_zeros_number = zeros_num - (old_len - sbytes[i]);
872 						zeros_num -= r_zeros_number;
873 					}
874 
875 					leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
876 
877 					/* Calculate key component and item length to insert into S[i] */
878 					set_le_ih_k_offset(ih, old_key_comp);
879 					put_ih_item_len(ih, old_len - sbytes[i]);
880 					tb->insert_size[0] -= sbytes[i];
881 				} else {	/* whole new item falls into S_new[i] */
882 
883 					/* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
884 					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
885 							snum[i] - 1, sbytes[i], S_new[i]);
886 
887 					/* Insert new item into S_new[i] */
888 					buffer_info_init_bh(tb, &bi, S_new[i]);
889 					leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
890 							     ih, body, zeros_num);
891 
892 					zeros_num = tb->insert_size[0] = 0;
893 				}
894 			}
895 
896 			else {	/* new item or it part don't falls into S_new[i] */
897 
898 				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
899 						snum[i], sbytes[i], S_new[i]);
900 			}
901 			break;
902 
903 		case M_PASTE:	/* append item */
904 
905 			if (n - snum[i] <= item_pos) {	/* pasted item or part if it falls to S_new[i] */
906 				if (item_pos == n - snum[i] && sbytes[i] != -1) {	/* we must shift part of the appended item */
907 					struct item_head *aux_ih;
908 
909 					RFALSE(ih, "PAP-12210: ih must be 0");
910 
911 					aux_ih = item_head(tbS0, item_pos);
912 					if (is_direntry_le_ih(aux_ih)) {
913 						/* we append to directory item */
914 
915 						int entry_count;
916 
917 						entry_count = ih_entry_count(aux_ih);
918 
919 						if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
920 							/* new directory entry falls into S_new[i] */
921 
922 							RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
923 							RFALSE(sbytes[i] - 1 >= entry_count,
924 							       "PAP-12220: there are no so much entries (%d), only %d",
925 							       sbytes[i] - 1, entry_count);
926 
927 							/* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
928 							leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
929 							/* Paste given directory entry to directory item */
930 							buffer_info_init_bh(tb, &bi, S_new[i]);
931 							leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
932 							     tb->insert_size[0], body, zeros_num);
933 							/* paste new directory entry */
934 							leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
935 									   (struct reiserfs_de_head *) body,
936 									   body + DEH_SIZE, tb->insert_size[0]);
937 							tb->insert_size[0] = 0;
938 							pos_in_item++;
939 						} else {	/* new directory entry doesn't fall into S_new[i] */
940 							leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
941 						}
942 					} else {	/* regular object */
943 
944 						int n_shift, n_rem, r_zeros_number;
945 						const char *r_body;
946 
947 						RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)) || tb->insert_size[0] <= 0,
948 						       "PAP-12225: item too short or insert_size <= 0");
949 
950 						/* Calculate number of bytes which must be shifted from appended item */
951 						n_shift = sbytes[i] - tb->insert_size[0];
952 						if (n_shift < 0)
953 							n_shift = 0;
954 						leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
955 
956 						/* Calculate number of bytes which must remain in body after append to S_new[i] */
957 						n_rem = tb->insert_size[0] - sbytes[i];
958 						if (n_rem < 0)
959 							n_rem = 0;
960 						/* Append part of body into S_new[0] */
961 						buffer_info_init_bh(tb, &bi, S_new[i]);
962 						if (n_rem > zeros_num) {
963 							r_zeros_number = 0;
964 							r_body = body + n_rem - zeros_num;
965 						} else {
966 							r_body = body;
967 							r_zeros_number = zeros_num - n_rem;
968 							zeros_num -= r_zeros_number;
969 						}
970 
971 						leaf_paste_in_buffer(&bi, 0, n_shift,
972 								     tb->insert_size[0] - n_rem,
973 								     r_body, r_zeros_number);
974 						{
975 							struct item_head *tmp;
976 
977 							tmp = item_head(S_new[i], 0);
978 							if (is_indirect_le_ih
979 							    (tmp)) {
980 								set_ih_free_space(tmp, 0);
981 								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
982 							} else {
983 								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
984 							}
985 						}
986 
987 						tb->insert_size[0] = n_rem;
988 						if (!n_rem)
989 							pos_in_item++;
990 					}
991 				} else
992 					/* item falls wholly into S_new[i] */
993 				{
994 					int leaf_mi;
995 					struct item_head *pasted;
996 
997 #ifdef CONFIG_REISERFS_CHECK
998 					struct item_head *ih_check = item_head(tbS0, item_pos);
999 
1000 					if (!is_direntry_le_ih(ih_check)
1001 					    && (pos_in_item != ih_item_len(ih_check)
1002 						|| tb->insert_size[0] <= 0))
1003 						reiserfs_panic(tb->tb_sb,
1004 							     "PAP-12235",
1005 							     "pos_in_item "
1006 							     "must be equal "
1007 							     "to ih_item_len");
1008 #endif				/* CONFIG_REISERFS_CHECK */
1009 
1010 					leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
1011 							    tb, snum[i],
1012 							    sbytes[i],
1013 							    S_new[i]);
1014 
1015 					RFALSE(leaf_mi,
1016 					       "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1017 					       leaf_mi);
1018 
1019 					/* paste into item */
1020 					buffer_info_init_bh(tb, &bi, S_new[i]);
1021 					leaf_paste_in_buffer(&bi,
1022 							     item_pos - n + snum[i],
1023 							     pos_in_item,
1024 							     tb->insert_size[0],
1025 							     body, zeros_num);
1026 
1027 					pasted = item_head(S_new[i], item_pos - n + snum[i]);
1028 					if (is_direntry_le_ih(pasted)) {
1029 						leaf_paste_entries(&bi,
1030 								   item_pos - n + snum[i],
1031 								   pos_in_item, 1,
1032 								   (struct reiserfs_de_head *)body,
1033 								   body + DEH_SIZE,
1034 								   tb->insert_size[0]
1035 						    );
1036 					}
1037 
1038 					/* if we paste to indirect item update ih_free_space */
1039 					if (is_indirect_le_ih(pasted))
1040 						set_ih_free_space(pasted, 0);
1041 					zeros_num = tb->insert_size[0] = 0;
1042 				}
1043 			}
1044 
1045 			else {	/* pasted item doesn't fall into S_new[i] */
1046 
1047 				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1048 						snum[i], sbytes[i], S_new[i]);
1049 			}
1050 			break;
1051 		default:	/* cases d and t */
1052 			reiserfs_panic(tb->tb_sb, "PAP-12245",
1053 				       "blknum > 2: unexpected mode: %s(%d)",
1054 				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1055 		}
1056 
1057 		memcpy(insert_key + i, leaf_key(S_new[i], 0), KEY_SIZE);
1058 		insert_ptr[i] = S_new[i];
1059 
1060 		RFALSE(!buffer_journaled(S_new[i])
1061 		       || buffer_journal_dirty(S_new[i])
1062 		       || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1063 		       i, S_new[i]);
1064 	}
1065 
1066 	/* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1067 	   affected item which remains in S */
1068 	if (0 <= item_pos && item_pos < tb->s0num) {	/* if we must insert or append into buffer S[0] */
1069 
1070 		switch (flag) {
1071 		case M_INSERT:	/* insert item into S[0] */
1072 			buffer_info_init_tbS0(tb, &bi);
1073 			leaf_insert_into_buf(&bi, item_pos, ih, body,
1074 					     zeros_num);
1075 
1076 			/* If we insert the first key change the delimiting key */
1077 			if (item_pos == 0) {
1078 				if (tb->CFL[0])	/* can be 0 in reiserfsck */
1079 					replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1080 			}
1081 			break;
1082 
1083 		case M_PASTE:{	/* append item in S[0] */
1084 				struct item_head *pasted;
1085 
1086 				pasted = item_head(tbS0, item_pos);
1087 				/* when directory, may be new entry already pasted */
1088 				if (is_direntry_le_ih(pasted)) {
1089 					if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1090 
1091 						RFALSE(!tb->insert_size[0],
1092 						       "PAP-12260: insert_size is 0 already");
1093 
1094 						/* prepare space */
1095 						buffer_info_init_tbS0(tb, &bi);
1096 						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1097 								     tb->insert_size[0], body,
1098 								     zeros_num);
1099 
1100 						/* paste entry */
1101 						leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1102 								   (struct reiserfs_de_head *)body,
1103 								   body + DEH_SIZE,
1104 								   tb->insert_size[0]);
1105 						if (!item_pos && !pos_in_item) {
1106 							RFALSE(!tb->CFL[0] || !tb->L[0],
1107 							       "PAP-12270: CFL[0]/L[0] must be specified");
1108 							if (tb->CFL[0])
1109 								replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1110 						}
1111 						tb->insert_size[0] = 0;
1112 					}
1113 				} else {	/* regular object */
1114 					if (pos_in_item == ih_item_len(pasted)) {
1115 
1116 						RFALSE(tb->insert_size[0] <= 0,
1117 						       "PAP-12275: insert size must not be %d",
1118 						       tb->insert_size[0]);
1119 						buffer_info_init_tbS0(tb, &bi);
1120 						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1121 								     tb->insert_size[0], body, zeros_num);
1122 
1123 						if (is_indirect_le_ih(pasted)) {
1124 #if 0
1125 							RFALSE(tb->
1126 							       insert_size[0] !=
1127 							       UNFM_P_SIZE,
1128 							       "PAP-12280: insert_size for indirect item must be %d, not %d",
1129 							       UNFM_P_SIZE,
1130 							       tb->
1131 							       insert_size[0]);
1132 #endif
1133 							set_ih_free_space(pasted, 0);
1134 						}
1135 						tb->insert_size[0] = 0;
1136 					}
1137 #ifdef CONFIG_REISERFS_CHECK
1138 					else {
1139 						if (tb->insert_size[0]) {
1140 							print_cur_tb("12285");
1141 							reiserfs_panic(tb->tb_sb,
1142 							    "PAP-12285",
1143 							    "insert_size "
1144 							    "must be 0 "
1145 							    "(%d)",
1146 							    tb->insert_size[0]);
1147 						}
1148 					}
1149 #endif				/* CONFIG_REISERFS_CHECK */
1150 
1151 				}
1152 			}	/* case M_PASTE: */
1153 		}
1154 	}
1155 #ifdef CONFIG_REISERFS_CHECK
1156 	if (flag == M_PASTE && tb->insert_size[0]) {
1157 		print_cur_tb("12290");
1158 		reiserfs_panic(tb->tb_sb,
1159 			       "PAP-12290", "insert_size is still not 0 (%d)",
1160 			       tb->insert_size[0]);
1161 	}
1162 #endif				/* CONFIG_REISERFS_CHECK */
1163 	return 0;
1164 }				/* Leaf level of the tree is balanced (end of balance_leaf) */
1165 
1166 /* Make empty node */
1167 void make_empty_node(struct buffer_info *bi)
1168 {
1169 	struct block_head *blkh;
1170 
1171 	RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1172 
1173 	blkh = B_BLK_HEAD(bi->bi_bh);
1174 	set_blkh_nr_item(blkh, 0);
1175 	set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1176 
1177 	if (bi->bi_parent)
1178 		B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0;	/* Endian safe if 0 */
1179 }
1180 
1181 /* Get first empty buffer */
1182 struct buffer_head *get_FEB(struct tree_balance *tb)
1183 {
1184 	int i;
1185 	struct buffer_info bi;
1186 
1187 	for (i = 0; i < MAX_FEB_SIZE; i++)
1188 		if (tb->FEB[i] != NULL)
1189 			break;
1190 
1191 	if (i == MAX_FEB_SIZE)
1192 		reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1193 
1194 	buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1195 	make_empty_node(&bi);
1196 	set_buffer_uptodate(tb->FEB[i]);
1197 	tb->used[i] = tb->FEB[i];
1198 	tb->FEB[i] = NULL;
1199 
1200 	return tb->used[i];
1201 }
1202 
1203 /* This is now used because reiserfs_free_block has to be able to schedule. */
1204 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1205 {
1206 	int i;
1207 
1208 	if (buffer_dirty(bh))
1209 		reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1210 				 "called with dirty buffer");
1211 	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1212 		if (!tb->thrown[i]) {
1213 			tb->thrown[i] = bh;
1214 			get_bh(bh);	/* free_thrown puts this */
1215 			return;
1216 		}
1217 	reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1218 			 "too many thrown buffers");
1219 }
1220 
1221 static void free_thrown(struct tree_balance *tb)
1222 {
1223 	int i;
1224 	b_blocknr_t blocknr;
1225 	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1226 		if (tb->thrown[i]) {
1227 			blocknr = tb->thrown[i]->b_blocknr;
1228 			if (buffer_dirty(tb->thrown[i]))
1229 				reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1230 						 "called with dirty buffer %d",
1231 						 blocknr);
1232 			brelse(tb->thrown[i]);	/* incremented in store_thrown */
1233 			reiserfs_free_block(tb->transaction_handle, NULL,
1234 					    blocknr, 0);
1235 		}
1236 	}
1237 }
1238 
1239 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1240 {
1241 	struct block_head *blkh;
1242 	blkh = B_BLK_HEAD(bh);
1243 	set_blkh_level(blkh, FREE_LEVEL);
1244 	set_blkh_nr_item(blkh, 0);
1245 
1246 	clear_buffer_dirty(bh);
1247 	store_thrown(tb, bh);
1248 }
1249 
1250 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1251 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1252 		 struct buffer_head *src, int n_src)
1253 {
1254 
1255 	RFALSE(dest == NULL || src == NULL,
1256 	       "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1257 	       src, dest);
1258 	RFALSE(!B_IS_KEYS_LEVEL(dest),
1259 	       "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1260 	       dest);
1261 	RFALSE(n_dest < 0 || n_src < 0,
1262 	       "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1263 	RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1264 	       "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1265 	       n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1266 
1267 	if (B_IS_ITEMS_LEVEL(src))
1268 		/* source buffer contains leaf node */
1269 		memcpy(internal_key(dest, n_dest), item_head(src, n_src),
1270 		       KEY_SIZE);
1271 	else
1272 		memcpy(internal_key(dest, n_dest), internal_key(src, n_src),
1273 		       KEY_SIZE);
1274 
1275 	do_balance_mark_internal_dirty(tb, dest, 0);
1276 }
1277 
1278 int get_left_neighbor_position(struct tree_balance *tb, int h)
1279 {
1280 	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1281 
1282 	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1283 	       "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1284 	       h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1285 
1286 	if (Sh_position == 0)
1287 		return B_NR_ITEMS(tb->FL[h]);
1288 	else
1289 		return Sh_position - 1;
1290 }
1291 
1292 int get_right_neighbor_position(struct tree_balance *tb, int h)
1293 {
1294 	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1295 
1296 	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1297 	       "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1298 	       h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1299 
1300 	if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1301 		return 0;
1302 	else
1303 		return Sh_position + 1;
1304 }
1305 
1306 #ifdef CONFIG_REISERFS_CHECK
1307 
1308 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1309 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1310 				char *mes)
1311 {
1312 	struct disk_child *dc;
1313 	int i;
1314 
1315 	RFALSE(!bh, "PAP-12336: bh == 0");
1316 
1317 	if (!bh || !B_IS_IN_TREE(bh))
1318 		return;
1319 
1320 	RFALSE(!buffer_dirty(bh) &&
1321 	       !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1322 	       "PAP-12337: buffer (%b) must be dirty", bh);
1323 	dc = B_N_CHILD(bh, 0);
1324 
1325 	for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1326 		if (!is_reusable(s, dc_block_number(dc), 1)) {
1327 			print_cur_tb(mes);
1328 			reiserfs_panic(s, "PAP-12338",
1329 				       "invalid child pointer %y in %b",
1330 				       dc, bh);
1331 		}
1332 	}
1333 }
1334 
1335 static int locked_or_not_in_tree(struct tree_balance *tb,
1336 				  struct buffer_head *bh, char *which)
1337 {
1338 	if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1339 	    !B_IS_IN_TREE(bh)) {
1340 		reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1341 		return 1;
1342 	}
1343 	return 0;
1344 }
1345 
1346 static int check_before_balancing(struct tree_balance *tb)
1347 {
1348 	int retval = 0;
1349 
1350 	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1351 		reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1352 			       "occurred based on cur_tb not being null at "
1353 			       "this point in code. do_balance cannot properly "
1354 			       "handle concurrent tree accesses on a same "
1355 			       "mount point.");
1356 	}
1357 
1358 	/*
1359 	 * double check that buffers that we will modify are unlocked.
1360 	 * (fix_nodes should already have prepped all of these for us).
1361 	 */
1362 	if (tb->lnum[0]) {
1363 		retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1364 		retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1365 		retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1366 		check_leaf(tb->L[0]);
1367 	}
1368 	if (tb->rnum[0]) {
1369 		retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1370 		retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1371 		retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1372 		check_leaf(tb->R[0]);
1373 	}
1374 	retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1375 					"S[0]");
1376 	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1377 
1378 	return retval;
1379 }
1380 
1381 static void check_after_balance_leaf(struct tree_balance *tb)
1382 {
1383 	if (tb->lnum[0]) {
1384 		if (B_FREE_SPACE(tb->L[0]) !=
1385 		    MAX_CHILD_SIZE(tb->L[0]) -
1386 		    dc_size(B_N_CHILD
1387 			    (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1388 			print_cur_tb("12221");
1389 			reiserfs_panic(tb->tb_sb, "PAP-12355",
1390 				       "shift to left was incorrect");
1391 		}
1392 	}
1393 	if (tb->rnum[0]) {
1394 		if (B_FREE_SPACE(tb->R[0]) !=
1395 		    MAX_CHILD_SIZE(tb->R[0]) -
1396 		    dc_size(B_N_CHILD
1397 			    (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1398 			print_cur_tb("12222");
1399 			reiserfs_panic(tb->tb_sb, "PAP-12360",
1400 				       "shift to right was incorrect");
1401 		}
1402 	}
1403 	if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1404 	    (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1405 	     (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1406 	      dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1407 				PATH_H_POSITION(tb->tb_path, 1)))))) {
1408 		int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1409 		int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1410 			     dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1411 					       PATH_H_POSITION(tb->tb_path,
1412 							       1))));
1413 		print_cur_tb("12223");
1414 		reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1415 				 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1416 				 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1417 				 left,
1418 				 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1419 				 PATH_H_PBUFFER(tb->tb_path, 1),
1420 				 PATH_H_POSITION(tb->tb_path, 1),
1421 				 dc_size(B_N_CHILD
1422 					 (PATH_H_PBUFFER(tb->tb_path, 1),
1423 					  PATH_H_POSITION(tb->tb_path, 1))),
1424 				 right);
1425 		reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1426 	}
1427 }
1428 
1429 static void check_leaf_level(struct tree_balance *tb)
1430 {
1431 	check_leaf(tb->L[0]);
1432 	check_leaf(tb->R[0]);
1433 	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1434 }
1435 
1436 static void check_internal_levels(struct tree_balance *tb)
1437 {
1438 	int h;
1439 
1440 	/* check all internal nodes */
1441 	for (h = 1; tb->insert_size[h]; h++) {
1442 		check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1443 				    "BAD BUFFER ON PATH");
1444 		if (tb->lnum[h])
1445 			check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1446 		if (tb->rnum[h])
1447 			check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1448 	}
1449 
1450 }
1451 
1452 #endif
1453 
1454 /*
1455  * Now we have all of the buffers that must be used in balancing of
1456  * the tree.  We rely on the assumption that schedule() will not occur
1457  * while do_balance works. ( Only interrupt handlers are acceptable.)
1458  * We balance the tree according to the analysis made before this,
1459  * using buffers already obtained.  For SMP support it will someday be
1460  * necessary to add ordered locking of tb.
1461  */
1462 
1463 /*
1464  * Some interesting rules of balancing:
1465  * we delete a maximum of two nodes per level per balancing: we never
1466  * delete R, when we delete two of three nodes L, S, R then we move
1467  * them into R.
1468  *
1469  * we only delete L if we are deleting two nodes, if we delete only
1470  * one node we delete S
1471  *
1472  * if we shift leaves then we shift as much as we can: this is a
1473  * deliberate policy of extremism in node packing which results in
1474  * higher average utilization after repeated random balance operations
1475  * at the cost of more memory copies and more balancing as a result of
1476  * small insertions to full nodes.
1477  *
1478  * if we shift internal nodes we try to evenly balance the node
1479  * utilization, with consequent less balancing at the cost of lower
1480  * utilization.
1481  *
1482  * one could argue that the policy for directories in leaves should be
1483  * that of internal nodes, but we will wait until another day to
1484  * evaluate this....  It would be nice to someday measure and prove
1485  * these assumptions as to what is optimal....
1486  */
1487 
1488 static inline void do_balance_starts(struct tree_balance *tb)
1489 {
1490 	/* use print_cur_tb() to see initial state of struct tree_balance */
1491 
1492 	/* store_print_tb (tb); */
1493 
1494 	/* do not delete, just comment it out */
1495 	/*
1496 	print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1497 		 tb->tb_path->pos_in_item, tb, "check");
1498 	*/
1499 	RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1500 #ifdef CONFIG_REISERFS_CHECK
1501 	REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1502 #endif
1503 }
1504 
1505 static inline void do_balance_completed(struct tree_balance *tb)
1506 {
1507 
1508 #ifdef CONFIG_REISERFS_CHECK
1509 	check_leaf_level(tb);
1510 	check_internal_levels(tb);
1511 	REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1512 #endif
1513 
1514 	/*
1515 	 * reiserfs_free_block is no longer schedule safe.  So, we need to
1516 	 * put the buffers we want freed on the thrown list during do_balance,
1517 	 * and then free them now
1518 	 */
1519 
1520 	REISERFS_SB(tb->tb_sb)->s_do_balance++;
1521 
1522 	/* release all nodes hold to perform the balancing */
1523 	unfix_nodes(tb);
1524 
1525 	free_thrown(tb);
1526 }
1527 
1528 /*
1529  * do_balance - balance the tree
1530  *
1531  * @tb: tree_balance structure
1532  * @ih: item header of inserted item
1533  * @body: body of inserted item or bytes to paste
1534  * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1535  *
1536  * Cut means delete part of an item (includes removing an entry from a
1537  * directory).
1538  *
1539  * Delete means delete whole item.
1540  *
1541  * Insert means add a new item into the tree.
1542  *
1543  * Paste means to append to the end of an existing file or to
1544  * insert a directory entry.
1545  */
1546 void do_balance(struct tree_balance *tb, struct item_head *ih,
1547 		const char *body, int flag)
1548 {
1549 	int child_pos;		/* position of a child node in its parent */
1550 	int h;			/* level of the tree being processed */
1551 
1552 	/*
1553 	 * in our processing of one level we sometimes determine what
1554 	 * must be inserted into the next higher level.  This insertion
1555 	 * consists of a key or two keys and their corresponding
1556 	 * pointers
1557 	 */
1558 	struct item_head insert_key[2];
1559 
1560 	/* inserted node-ptrs for the next level */
1561 	struct buffer_head *insert_ptr[2];
1562 
1563 	tb->tb_mode = flag;
1564 	tb->need_balance_dirty = 0;
1565 
1566 	if (FILESYSTEM_CHANGED_TB(tb)) {
1567 		reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1568 			       "changed");
1569 	}
1570 	/* if we have no real work to do  */
1571 	if (!tb->insert_size[0]) {
1572 		reiserfs_warning(tb->tb_sb, "PAP-12350",
1573 				 "insert_size == 0, mode == %c", flag);
1574 		unfix_nodes(tb);
1575 		return;
1576 	}
1577 
1578 	atomic_inc(&fs_generation(tb->tb_sb));
1579 	do_balance_starts(tb);
1580 
1581 	/*
1582 	 * balance_leaf returns 0 except if combining L R and S into
1583 	 * one node.  see balance_internal() for explanation of this
1584 	 * line of code.
1585 	 */
1586 	child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1587 	    balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1588 
1589 #ifdef CONFIG_REISERFS_CHECK
1590 	check_after_balance_leaf(tb);
1591 #endif
1592 
1593 	/* Balance internal level of the tree. */
1594 	for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1595 		child_pos =
1596 		    balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1597 
1598 	do_balance_completed(tb);
1599 
1600 }
1601