1 /*
2  * COPYRIGHT:        See COPYRIGHT.TXT
3  * PROJECT:          Ext2 File System Driver for WinNT/2K/XP
4  * FILE:             lock.c
5  * PROGRAMMER:       Matt Wu <mattwu@163.com>
6  * HOMEPAGE:         http://www.ext2fsd.com
7  * UPDATE HISTORY:
8  *                   Copied from linux/lib/halfmd4.c
9  *                               linux/fs/ext3/hash.c
10  */
11 
12 /* INCLUDES *****************************************************************/
13 
14 #include "ext2fs.h"
15 
16 #ifdef EXT2_HTREE_INDEX
17 
18 #define DX_DEBUG 0
19 
20 #if DX_DEBUG
21 #define dxtrace(command) command
22 #else
23 #define dxtrace(command)
24 #endif
25 
26 #ifndef swap
27 #define swap(type, x, y) do { type z = x; x = y; y = z; } while (0)
28 #endif
29 
30 /* F, G and H are basic MD4 functions: selection, majority, parity */
31 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
32 #define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
33 #define H(x, y, z) ((x) ^ (y) ^ (z))
34 
35 /*
36  * The generic round function.  The application is so specific that
37  * we don't bother protecting all the arguments with parens, as is generally
38  * good macro practice, in favor of extra legibility.
39  * Rotation is separate from addition to prevent recomputation
40  */
41 #define ROUND(f, a, b, c, d, x, s)	\
42 	(a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s)))
43 #define K1 0
44 #define K2 013240474631
45 #define K3 015666365641
46 
47 /*
48  * Basic cut-down MD4 transform.  Returns only 32 bits of result.
49  */
50 __u32 half_md4_transform(__u32 buf[4], __u32 const in[8])
51 {
52     __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
53 
54     /* Round 1 */
55     ROUND(F, a, b, c, d, in[0] + K1,  3);
56     ROUND(F, d, a, b, c, in[1] + K1,  7);
57     ROUND(F, c, d, a, b, in[2] + K1, 11);
58     ROUND(F, b, c, d, a, in[3] + K1, 19);
59     ROUND(F, a, b, c, d, in[4] + K1,  3);
60     ROUND(F, d, a, b, c, in[5] + K1,  7);
61     ROUND(F, c, d, a, b, in[6] + K1, 11);
62     ROUND(F, b, c, d, a, in[7] + K1, 19);
63 
64     /* Round 2 */
65     ROUND(G, a, b, c, d, in[1] + K2,  3);
66     ROUND(G, d, a, b, c, in[3] + K2,  5);
67     ROUND(G, c, d, a, b, in[5] + K2,  9);
68     ROUND(G, b, c, d, a, in[7] + K2, 13);
69     ROUND(G, a, b, c, d, in[0] + K2,  3);
70     ROUND(G, d, a, b, c, in[2] + K2,  5);
71     ROUND(G, c, d, a, b, in[4] + K2,  9);
72     ROUND(G, b, c, d, a, in[6] + K2, 13);
73 
74     /* Round 3 */
75     ROUND(H, a, b, c, d, in[3] + K3,  3);
76     ROUND(H, d, a, b, c, in[7] + K3,  9);
77     ROUND(H, c, d, a, b, in[2] + K3, 11);
78     ROUND(H, b, c, d, a, in[6] + K3, 15);
79     ROUND(H, a, b, c, d, in[1] + K3,  3);
80     ROUND(H, d, a, b, c, in[5] + K3,  9);
81     ROUND(H, c, d, a, b, in[0] + K3, 11);
82     ROUND(H, b, c, d, a, in[4] + K3, 15);
83 
84     buf[0] += a;
85     buf[1] += b;
86     buf[2] += c;
87     buf[3] += d;
88 
89     return buf[1]; /* "most hashed" word */
90 }
91 
92 #define DELTA 0x9E3779B9
93 
94 static void TEA_transform(__u32 buf[4], __u32 const in[])
95 {
96     __u32	sum = 0;
97     __u32	b0 = buf[0], b1 = buf[1];
98     __u32	a = in[0], b = in[1], c = in[2], d = in[3];
99     int	n = 16;
100 
101     do {
102         sum += DELTA;
103         b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
104         b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
105     } while (--n);
106 
107     buf[0] += b0;
108     buf[1] += b1;
109 }
110 
111 
112 /* The old legacy hash */
113 static __u32 dx_hack_hash_unsigned(const char *name, int len)
114 {
115 	__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
116 	const unsigned char *ucp = (const unsigned char *) name;
117 
118 	while (len--) {
119 		hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
120 
121 		if (hash & 0x80000000)
122 			hash -= 0x7fffffff;
123 		hash1 = hash0;
124 		hash0 = hash;
125 	}
126 	return hash0 << 1;
127 }
128 
129 static __u32 dx_hack_hash_signed(const char *name, int len)
130 {
131 	__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
132 	const signed char *scp = (const signed char *) name;
133 
134 	while (len--) {
135 		hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
136 
137 		if (hash & 0x80000000)
138 			hash -= 0x7fffffff;
139 		hash1 = hash0;
140 		hash0 = hash;
141 	}
142 	return hash0 << 1;
143 }
144 
145 static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
146 {
147 	__u32	pad, val;
148 	int	i;
149 	const signed char *scp = (const signed char *) msg;
150 
151 	pad = (__u32)len | ((__u32)len << 8);
152 	pad |= pad << 16;
153 
154 	val = pad;
155 	if (len > num*4)
156 		len = num * 4;
157 	for (i = 0; i < len; i++) {
158 		if ((i % 4) == 0)
159 			val = pad;
160 		val = ((int) scp[i]) + (val << 8);
161 		if ((i % 4) == 3) {
162 			*buf++ = val;
163 			val = pad;
164 			num--;
165 		}
166 	}
167 	if (--num >= 0)
168 		*buf++ = val;
169 	while (--num >= 0)
170 		*buf++ = pad;
171 }
172 
173 static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
174 {
175 	__u32	pad, val;
176 	int	i;
177 	const unsigned char *ucp = (const unsigned char *) msg;
178 
179 	pad = (__u32)len | ((__u32)len << 8);
180 	pad |= pad << 16;
181 
182 	val = pad;
183 	if (len > num*4)
184 		len = num * 4;
185 	for (i = 0; i < len; i++) {
186 		if ((i % 4) == 0)
187 			val = pad;
188 		val = ((int) ucp[i]) + (val << 8);
189 		if ((i % 4) == 3) {
190 			*buf++ = val;
191 			val = pad;
192 			num--;
193 		}
194 	}
195 	if (--num >= 0)
196 		*buf++ = val;
197 	while (--num >= 0)
198 		*buf++ = pad;
199 }
200 
201 
202 #endif /* EXT2_HTREE_INDEX */
203 
204 __u32 ext3_current_time(struct inode *in)
205 {
206     LARGE_INTEGER   SysTime;
207     KeQuerySystemTime(&SysTime);
208 
209     return Ext2LinuxTime(SysTime);
210 }
211 
212 void ext3_warning (struct super_block * sb, const char * function,
213                    char * fmt, ...)
214 {
215 #if DX_DEBUG
216     va_list args;
217 
218     va_start(args, fmt);
219     printk("EXT3-fs warning (device %s): %s: ",
220            sb->s_id, function);
221     printk(fmt, args);
222     printk("\n");
223     va_end(args);
224 #endif
225 }
226 
227 
228 /* ext3_bread is safe for meta-data blocks. it's not safe to read file data,
229    since file data is managed by file cache, not volume cache */
230 struct buffer_head *ext3_bread(struct ext2_icb *icb, struct inode *inode,
231                                            unsigned long block, int *err)
232 {
233     struct buffer_head * bh = NULL;
234     NTSTATUS    status = STATUS_SUCCESS;
235     ULONG       lbn = 0, num = 0;
236 
237     PEXT2_MCB   Mcb = CONTAINING_RECORD(inode, EXT2_MCB, Inode);
238 
239     /* for symlink file, read it's target instead */
240     if (NULL != Mcb && IsMcbSymLink(Mcb))
241         Mcb = Mcb->Target;
242     if (NULL == Mcb) {
243         *err = -EINVAL;
244         return NULL;
245     }
246 
247     /* mapping file offset to ext2 block */
248     if (INODE_HAS_EXTENT(&Mcb->Inode)) {
249         status = Ext2MapExtent(icb, inode->i_sb->s_priv,
250                                Mcb, block, FALSE,
251                                &lbn, &num);
252     } else {
253         status = Ext2MapIndirect(icb, inode->i_sb->s_priv,
254                                  Mcb, block, FALSE,
255                                  &lbn, &num);
256     }
257 
258     if (!NT_SUCCESS(status)) {
259         *err = Ext2LinuxError(status);
260         return bh;
261     }
262 
263     bh = sb_getblk(inode->i_sb, lbn);
264     if (!bh) {
265         *err = -ENOMEM;
266         return bh;
267     }
268     if (buffer_uptodate(bh))
269         return bh;
270 
271     *err = bh_submit_read(bh);
272     if (*err) {
273 	    __brelse(bh);
274 	    return NULL;
275     }
276     return bh;
277 }
278 
279 struct buffer_head *ext3_append(struct ext2_icb *icb, struct inode *inode,
280                                             ext3_lblk_t *block, int *err)
281 {
282     PEXT2_MCB   mcb = CONTAINING_RECORD(inode, EXT2_MCB, Inode);
283     PEXT2_FCB   dcb = mcb->Fcb;
284     NTSTATUS    status;
285 
286     ASSERT(dcb);
287     ASSERT(inode == dcb->Inode);
288 
289     /* allocate new block since there's no space for us */
290     *block = (ext3_lblk_t)(inode->i_size >> inode->i_sb->s_blocksize_bits);
291     dcb->Header.AllocationSize.QuadPart += dcb->Vcb->BlockSize;
292     status = Ext2ExpandFile(icb, dcb->Vcb, mcb, &(dcb->Header.AllocationSize));
293     if (NT_SUCCESS(status)) {
294 
295         /* update Dcb */
296         dcb->Header.ValidDataLength = dcb->Header.FileSize = dcb->Header.AllocationSize;
297         mcb->Inode.i_size = dcb->Header.AllocationSize.QuadPart;
298 
299         /* save parent directory's inode */
300         Ext2SaveInode(icb, dcb->Vcb, inode);
301     }
302 
303     return ext3_bread(icb, inode, *block, err);
304 }
305 
306 
307 void ext3_inc_count(struct inode *inode)
308 {
309     inode->i_nlink++;
310 }
311 
312 void ext3_dec_count(struct inode *inode)
313 {
314     inode->i_nlink--;
315 }
316 
317 unsigned char ext3_type_by_mode(umode_t mode)
318 {
319     unsigned char type = 0;
320 
321     switch (mode & S_IFMT) {
322     case S_IFREG:
323         type = EXT3_FT_REG_FILE;
324         break;
325     case S_IFDIR:
326         type = EXT3_FT_DIR;
327         break;
328     case S_IFCHR:
329         type =  EXT3_FT_CHRDEV;
330         break;
331     case S_IFBLK:
332         type = EXT3_FT_BLKDEV;
333         break;
334     case S_IFIFO:
335         type = EXT3_FT_FIFO;
336         break;
337     case S_IFSOCK:
338         type = EXT3_FT_SOCK;
339         break;
340     case S_IFLNK:
341         type = EXT3_FT_SYMLINK;
342     }
343 
344     return type;
345 };
346 
347 void ext3_set_de_type(struct super_block *sb,
348                       struct ext3_dir_entry_2 *de,
349                       umode_t mode)
350 {
351     if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
352         de->file_type = ext3_type_by_mode(mode);
353 }
354 
355 /*
356  * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
357  * do not perform it in these functions.  We perform it at the call site,
358  * if it is needed.
359  */
360 int ext3_mark_inode_dirty(struct ext2_icb *icb, struct inode *in)
361 {
362     if (Ext2SaveInode(icb, in->i_sb->s_priv, in))
363         return 0;
364 
365     return -ENOMEM;
366 }
367 
368 void ext3_update_dx_flag(struct inode *inode)
369 {
370     if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
371                                  EXT3_FEATURE_COMPAT_DIR_INDEX))
372         EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
373 }
374 
375 /*
376  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
377  * it points to a directory entry which is guaranteed to be large
378  * enough for new directory entry.  If de is NULL, then
379  * add_dirent_to_buf will attempt search the directory block for
380  * space.  It will return -ENOSPC if no space is available, and -EIO
381  * and -EEXIST if directory entry already exists.
382  *
383  * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
384  * all other cases bh is released.
385  */
386 int add_dirent_to_buf(struct ext2_icb *icb, struct dentry *dentry,
387                       struct inode *inode, struct ext3_dir_entry_2 *de,
388                       struct buffer_head *bh)
389 {
390     struct inode *dir = dentry->d_parent->d_inode;
391     const char	*name = dentry->d_name.name;
392     int		namelen = dentry->d_name.len;
393     unsigned int	offset = 0;
394     unsigned short	reclen;
395     int		nlen, rlen, err;
396     char		*top;
397 
398     reclen = EXT3_DIR_REC_LEN(namelen);
399     if (!de) {
400         de = (struct ext3_dir_entry_2 *)bh->b_data;
401         top = bh->b_data + dir->i_sb->s_blocksize - reclen;
402         while ((char *) de <= top) {
403             if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
404                                       bh, offset)) {
405                 __brelse(bh);
406                 return -EIO;
407             }
408             if (ext3_match(namelen, name, de)) {
409                 __brelse(bh);
410                 return -EEXIST;
411             }
412             nlen = EXT3_DIR_REC_LEN(de->name_len);
413             rlen = ext3_rec_len_from_disk(de->rec_len);
414             if ((de->inode? rlen - nlen: rlen) >= reclen)
415                 break;
416             de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
417             offset += rlen;
418         }
419         if ((char *) de > top)
420             return -ENOSPC;
421     }
422 
423     /* By now the buffer is marked for journaling */
424     nlen = EXT3_DIR_REC_LEN(de->name_len);
425     rlen = ext3_rec_len_from_disk(de->rec_len);
426     if (de->inode) {
427         struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
428         de1->rec_len = ext3_rec_len_to_disk(rlen - nlen);
429         de->rec_len = ext3_rec_len_to_disk(nlen);
430         de = de1;
431     }
432     de->file_type = EXT3_FT_UNKNOWN;
433     if (inode) {
434         de->inode = cpu_to_le32(inode->i_ino);
435         ext3_set_de_type(dir->i_sb, de, inode->i_mode);
436     } else
437         de->inode = 0;
438     de->name_len = (__u8)namelen;
439     memcpy(de->name, name, namelen);
440 
441     /*
442      * XXX shouldn't update any times until successful
443      * completion of syscall, but too many callers depend
444      * on this.
445      *
446      * XXX similarly, too many callers depend on
447      * ext4_new_inode() setting the times, but error
448      * recovery deletes the inode, so the worst that can
449      * happen is that the times are slightly out of date
450      * and/or different from the directory change time.
451      */
452     dir->i_mtime = dir->i_ctime = ext3_current_time(dir);
453     ext3_update_dx_flag(dir);
454     dir->i_version++;
455     ext3_mark_inode_dirty(icb, dir);
456     set_buffer_dirty(bh);
457     __brelse(bh);
458     return 0;
459 }
460 
461 #ifdef EXT2_HTREE_INDEX
462 
463 /*
464  * Returns the hash of a filename.  If len is 0 and name is NULL, then
465  * this function can be used to test whether or not a hash version is
466  * supported.
467  *
468  * The seed is an 4 longword (32 bits) "secret" which can be used to
469  * uniquify a hash.  If the seed is all zero's, then some default seed
470  * may be used.
471  *
472  * A particular hash version specifies whether or not the seed is
473  * represented, and whether or not the returned hash is 32 bits or 64
474  * bits.  32 bit hashes will return 0 for the minor hash.
475  */
476 int ext3_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
477 {
478     __u32	hash;
479     __u32	minor_hash = 0;
480     const char	*p;
481     int		i;
482     __u32 		in[8], buf[4];
483 
484 	void		(*str2hashbuf)(const char *, int, __u32 *, int) =
485 				str2hashbuf_signed;
486 
487     /* Initialize the default seed for the hash checksum functions */
488     buf[0] = 0x67452301;
489     buf[1] = 0xefcdab89;
490     buf[2] = 0x98badcfe;
491     buf[3] = 0x10325476;
492 
493 	/* Check to see if the seed is all zero's */
494 	if (hinfo->seed) {
495 		for (i = 0; i < 4; i++) {
496 			if (hinfo->seed[i])
497 				break;
498 		}
499 		if (i < 4)
500 			memcpy(buf, hinfo->seed, sizeof(buf));
501 	}
502 
503 	switch (hinfo->hash_version) {
504 	case DX_HASH_LEGACY_UNSIGNED:
505 		hash = dx_hack_hash_unsigned(name, len);
506 		break;
507 	case DX_HASH_LEGACY:
508 		hash = dx_hack_hash_signed(name, len);
509 		break;
510 	case DX_HASH_HALF_MD4_UNSIGNED:
511 		str2hashbuf = str2hashbuf_unsigned;
512 	case DX_HASH_HALF_MD4:
513 		p = name;
514 		while (len > 0) {
515 			(*str2hashbuf)(p, len, in, 8);
516 			half_md4_transform(buf, in);
517 			len -= 32;
518 			p += 32;
519 		}
520 		minor_hash = buf[2];
521 		hash = buf[1];
522 		break;
523 	case DX_HASH_TEA_UNSIGNED:
524 		str2hashbuf = str2hashbuf_unsigned;
525 	case DX_HASH_TEA:
526 		p = name;
527 		while (len > 0) {
528 			(*str2hashbuf)(p, len, in, 4);
529 			TEA_transform(buf, in);
530 			len -= 16;
531 			p += 16;
532 		}
533 		hash = buf[0];
534 		minor_hash = buf[1];
535 		break;
536 	default:
537 		hinfo->hash = 0;
538 		return -1;
539 	}
540 	hash = hash & ~1;
541 	if (hash == (EXT4_HTREE_EOF_32BIT << 1))
542 		hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
543 	hinfo->hash = hash;
544 	hinfo->minor_hash = minor_hash;
545 	return 0;
546 }
547 EXPORT_SYMBOL(ext3_dirhash);
548 
549 
550 /*
551  * These functions convert from the major/minor hash to an f_pos
552  * value.
553  *
554  * Currently we only use major hash numer.  This is unfortunate, but
555  * on 32-bit machines, the same VFS interface is used for lseek and
556  * llseek, so if we use the 64 bit offset, then the 32-bit versions of
557  * lseek/telldir/seekdir will blow out spectacularly, and from within
558  * the ext2 low-level routine, we don't know if we're being called by
559  * a 64-bit version of the system call or the 32-bit version of the
560  * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
561  * cookie.  Sigh.
562  */
563 #define hash2pos(major, minor)	(major >> 1)
564 #define pos2maj_hash(pos)	((pos << 1) & 0xffffffff)
565 #define pos2min_hash(pos)	(0)
566 
567 /*
568  * This structure holds the nodes of the red-black tree used to store
569  * the directory entry in hash order.
570  */
571 struct fname {
572     __u32		hash;
573     __u32		minor_hash;
574     struct rb_node	rb_hash;
575     struct fname	*next;
576     __u32		inode;
577     __u8		name_len;
578     __u8		file_type;
579     char		name[0];
580 };
581 
582 /*
583  * This functoin implements a non-recursive way of freeing all of the
584  * nodes in the red-black tree.
585  */
586 static void free_rb_tree_fname(struct rb_root *root)
587 {
588     struct rb_node	*n = root->rb_node;
589     struct rb_node	*parent;
590     struct fname	*fname;
591 
592     while (n) {
593         /* Do the node's children first */
594         if ((n)->rb_left) {
595             n = n->rb_left;
596             continue;
597         }
598         if (n->rb_right) {
599             n = n->rb_right;
600             continue;
601         }
602         /*
603          * The node has no children; free it, and then zero
604          * out parent's link to it.  Finally go to the
605          * beginning of the loop and try to free the parent
606          * node.
607          */
608         parent = rb_parent(n);
609         fname = rb_entry(n, struct fname, rb_hash);
610         while (fname) {
611             struct fname * old = fname;
612             fname = fname->next;
613             kfree (old);
614         }
615         if (!parent)
616             root->rb_node = NULL;
617         else if (parent->rb_left == n)
618             parent->rb_left = NULL;
619         else if (parent->rb_right == n)
620             parent->rb_right = NULL;
621         n = parent;
622     }
623     root->rb_node = NULL;
624 }
625 
626 
627 static struct dir_private_info *create_dir_info(loff_t pos)
628 {
629     struct dir_private_info *p;
630 
631     p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
632     if (!p)
633         return NULL;
634     p->root.rb_node = NULL;
635     p->curr_node = NULL;
636     p->extra_fname = NULL;
637     p->last_pos = 0;
638     p->curr_hash = (__u32)pos2maj_hash(pos);
639     p->curr_minor_hash = (__u32)pos2min_hash(pos);
640     p->next_hash = 0;
641     return p;
642 }
643 
644 void ext3_htree_free_dir_info(struct dir_private_info *p)
645 {
646     free_rb_tree_fname(&p->root);
647     kfree(p);
648 }
649 
650 /*
651  * Given a directory entry, enter it into the fname rb tree.
652  */
653 int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
654                             __u32 minor_hash,
655                             struct ext3_dir_entry_2 *dirent)
656 {
657     struct rb_node **p, *parent = NULL;
658     struct fname * fname, *new_fn;
659     struct dir_private_info *info;
660     int extra_data = 0;
661     int len;
662 
663     info = (struct dir_private_info *) dir_file->private_data;
664     p = &info->root.rb_node;
665 
666     /* Create and allocate the fname structure */
667     if (dirent->file_type & EXT3_DIRENT_LUFID)
668         extra_data = ext3_get_dirent_data_len(dirent);
669 
670     len = sizeof(struct fname) + dirent->name_len + extra_data;
671     new_fn = kmalloc(len, GFP_KERNEL);
672     if (!new_fn)
673         return -ENOMEM;
674     memset(new_fn, 0, len);
675     new_fn->hash = hash;
676     new_fn->minor_hash = minor_hash;
677     new_fn->inode = le32_to_cpu(dirent->inode);
678     new_fn->name_len = dirent->name_len;
679     new_fn->file_type = dirent->file_type;
680     memcpy(&new_fn->name[0], &dirent->name[0],
681            dirent->name_len + extra_data);
682     new_fn->name[dirent->name_len] = 0;
683 
684     while (*p) {
685         parent = *p;
686         fname = rb_entry(parent, struct fname, rb_hash);
687 
688         /*
689          * If the hash and minor hash match up, then we put
690          * them on a linked list.  This rarely happens...
691          */
692         if ((new_fn->hash == fname->hash) &&
693                 (new_fn->minor_hash == fname->minor_hash)) {
694             new_fn->next = fname->next;
695             fname->next = new_fn;
696             return 0;
697         }
698 
699         if (new_fn->hash < fname->hash)
700             p = &(*p)->rb_left;
701         else if (new_fn->hash > fname->hash)
702             p = &(*p)->rb_right;
703         else if (new_fn->minor_hash < fname->minor_hash)
704             p = &(*p)->rb_left;
705         else /* if (new_fn->minor_hash > fname->minor_hash) */
706             p = &(*p)->rb_right;
707     }
708 
709     rb_link_node(&new_fn->rb_hash, parent, p);
710     rb_insert_color(&new_fn->rb_hash, &info->root);
711     return 0;
712 }
713 
714 static unsigned char ext3_filetype_table[] = {
715     DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
716 };
717 
718 static unsigned char get_dtype(struct super_block *sb, int filetype)
719 {
720     if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
721             (filetype >= EXT3_FT_MAX))
722         return DT_UNKNOWN;
723 
724     return (ext3_filetype_table[filetype]);
725 }
726 
727 /*
728  * This is a helper function for ext3_dx_readdir.  It calls filldir
729  * for all entres on the fname linked list.  (Normally there is only
730  * one entry on the linked list, unless there are 62 bit hash collisions.)
731  */
732 static int call_filldir(struct file * filp, void * cookie,
733                         filldir_t filldir, struct fname *fname)
734 {
735     struct dir_private_info *info = filp->private_data;
736     loff_t	curr_pos;
737     struct inode *inode = filp->f_dentry->d_inode;
738     struct super_block * sb;
739     int error;
740 
741     sb = inode->i_sb;
742 
743     if (!fname) {
744         printk("call_filldir: called with null fname?!?\n");
745         return 0;
746     }
747     curr_pos = hash2pos(fname->hash, fname->minor_hash);
748     while (fname) {
749         error = filldir(cookie, fname->name,
750                         fname->name_len, (ULONG)curr_pos,
751                         fname->inode, get_dtype(sb, fname->file_type));
752         if (error) {
753             filp->f_pos = curr_pos;
754             info->extra_fname = fname;
755             return error;
756         }
757         fname = fname->next;
758     }
759     return 0;
760 }
761 
762 struct fake_dirent
763 {
764     __le32 inode;
765     __le16 rec_len;
766     __u8 name_len;
767     __u8 file_type;
768 };
769 
770 struct dx_countlimit
771 {
772     __le16 limit;
773     __le16 count;
774 };
775 
776 struct dx_entry
777 {
778     __le32 hash;
779     __le32 block;
780 };
781 
782 /*
783  * dx_root_info is laid out so that if it should somehow get overlaid by a
784  * dirent the two low bits of the hash version will be zero.  Therefore, the
785  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
786  */
787 
788 struct dx_root
789 {
790     struct fake_dirent dot;
791     char dot_name[4];
792     struct fake_dirent dotdot;
793     char dotdot_name[4];
794     struct dx_root_info
795     {
796         __le32 reserved_zero;
797         __u8 hash_version;
798         __u8 info_length; /* 8 */
799         __u8 indirect_levels;
800         __u8 unused_flags;
801     }
802     info;
803     struct dx_entry	entries[0];
804 };
805 
806 struct dx_node
807 {
808     struct fake_dirent fake;
809     struct dx_entry	entries[0];
810 };
811 
812 
813 struct dx_frame
814 {
815     struct buffer_head *bh;
816     struct dx_entry *entries;
817     struct dx_entry *at;
818 };
819 
820 struct dx_map_entry
821 {
822     __u32 hash;
823     __u16 offs;
824     __u16 size;
825 };
826 
827 #if defined(__REACTOS__) && !defined(_MSC_VER)
828 struct ext3_dir_entry_2 *
829             do_split(struct ext2_icb *icb, struct inode *dir,
830                      struct buffer_head **bh,struct dx_frame *frame,
831                      struct dx_hash_info *hinfo, int *error);
832 #endif
833 
834 /*
835  * Future: use high four bits of block for coalesce-on-delete flags
836  * Mask them off for now.
837  */
838 
839 static inline unsigned dx_get_block (struct dx_entry *entry)
840 {
841     return le32_to_cpu(entry->block) & 0x00ffffff;
842 }
843 
844 static inline void dx_set_block (struct dx_entry *entry, unsigned value)
845 {
846     entry->block = cpu_to_le32(value);
847 }
848 
849 static inline unsigned dx_get_hash (struct dx_entry *entry)
850 {
851     return le32_to_cpu(entry->hash);
852 }
853 
854 static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
855 {
856     entry->hash = cpu_to_le32(value);
857 }
858 
859 static inline unsigned dx_get_count (struct dx_entry *entries)
860 {
861     return le16_to_cpu(((struct dx_countlimit *) entries)->count);
862 }
863 
864 static inline unsigned dx_get_limit (struct dx_entry *entries)
865 {
866     return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
867 }
868 
869 static inline void dx_set_count (struct dx_entry *entries, unsigned value)
870 {
871     ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
872 }
873 
874 static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
875 {
876     ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
877 }
878 
879 static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
880 {
881     unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
882                            EXT3_DIR_REC_LEN(2) - infosize;
883     return 0? 20: entry_space / sizeof(struct dx_entry);
884 }
885 
886 static inline unsigned dx_node_limit (struct inode *dir)
887 {
888     unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
889     return 0? 22: entry_space / sizeof(struct dx_entry);
890 }
891 
892 /*
893  * Debug
894  */
895 #if DX_DEBUG
896 static void dx_show_index (char * label, struct dx_entry *entries)
897 {
898     int i, n = dx_get_count (entries);
899     printk("%s index ", label);
900     for (i = 0; i < n; i++)
901     {
902         printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
903     }
904     printk("\n");
905 }
906 
907 struct stats
908 {
909     unsigned names;
910     unsigned space;
911     unsigned bcount;
912 };
913 
914 struct stats dx_show_leaf(struct ext2_icb *icb, struct dx_hash_info *hinfo,
915                                       struct ext3_dir_entry_2 *de, int size, int show_names)
916 {
917     struct stats rc;
918     unsigned names = 0, space = 0;
919     char *base = (char *) de;
920     struct dx_hash_info h = *hinfo;
921 
922     printk("names: ");
923     while ((char *) de < base + size)
924     {
925         if (de->inode)
926         {
927             if (show_names)
928             {
929                 int len = de->name_len;
930                 char *name = de->name;
931                 while (len--) printk("%c", *name++);
932                 ext3_dirhash(de->name, de->name_len, &h);
933                 printk(":%x.%u ", h.hash,
934                        ((char *) de - base));
935             }
936             space += EXT3_DIR_REC_LEN(de->name_len);
937             names++;
938         }
939         de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
940     }
941     printk("(%i)\n", names);
942 
943     rc.names = names;
944     rc.space = space;
945     rc.bcount = 1;
946 
947     return rc;
948 }
949 
950 struct stats dx_show_entries(struct ext2_icb *icb, struct dx_hash_info *hinfo,
951                                          struct inode *dir, struct dx_entry *entries, int levels)
952 {
953     unsigned blocksize = dir->i_sb->s_blocksize;
954     unsigned count = dx_get_count (entries), names = 0, space = 0, i;
955     unsigned bcount = 0;
956     struct buffer_head *bh;
957     struct stats rc;
958     int err;
959 
960     printk("%i indexed blocks...\n", count);
961     for (i = 0; i < count; i++, entries++)
962     {
963         u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
964         u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
965         struct stats stats;
966         printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
967         if (!(bh = ext3_bread (icb, dir, block, &err))) continue;
968         stats = levels?
969                 dx_show_entries(icb, hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
970                 dx_show_leaf(icb, hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
971         names += stats.names;
972         space += stats.space;
973         bcount += stats.bcount;
974         __brelse (bh);
975     }
976     if (bcount)
977         printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
978                names, space/bcount,(space/bcount)*100/blocksize);
979 
980     rc.names = names;
981     rc.space = space;
982     rc.bcount = 1;
983 
984     return rc;
985 }
986 #endif /* DX_DEBUG */
987 
988 
989 int ext3_save_inode ( struct ext2_icb *icb, struct inode *in)
990 {
991     return Ext2SaveInode(icb, in->i_sb->s_priv, in);
992 }
993 
994 /*
995  * Probe for a directory leaf block to search.
996  *
997  * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
998  * error in the directory index, and the caller should fall back to
999  * searching the directory normally.  The callers of dx_probe **MUST**
1000  * check for this error code, and make sure it never gets reflected
1001  * back to userspace.
1002  */
1003 static struct dx_frame *
1004             dx_probe(struct ext2_icb *icb, struct dentry *dentry, struct inode *dir,
1005                      struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
1006 {
1007     unsigned count, indirect;
1008     struct dx_entry *at, *entries, *p, *q, *m;
1009     struct dx_root *root;
1010     struct buffer_head *bh;
1011     struct dx_frame *frame = frame_in;
1012     u32 hash;
1013 
1014     frame->bh = NULL;
1015     if (dentry)
1016         dir = dentry->d_parent->d_inode;
1017     if (!(bh = ext3_bread (icb, dir, 0, err)))
1018         goto fail;
1019     root = (struct dx_root *) bh->b_data;
1020     if (root->info.hash_version != DX_HASH_TEA &&
1021             root->info.hash_version != DX_HASH_HALF_MD4 &&
1022             root->info.hash_version != DX_HASH_LEGACY) {
1023         ext3_warning(dir->i_sb, __FUNCTION__,
1024                      "Unrecognised inode hash code %d",
1025                      root->info.hash_version);
1026         __brelse(bh);
1027         *err = ERR_BAD_DX_DIR;
1028         goto fail;
1029     }
1030     hinfo->hash_version = root->info.hash_version;
1031     hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
1032     if (dentry)
1033         ext3_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
1034     hash = hinfo->hash;
1035 
1036     if (root->info.unused_flags & 1) {
1037         ext3_warning(dir->i_sb, __FUNCTION__,
1038                      "Unimplemented inode hash flags: %#06x",
1039                      root->info.unused_flags);
1040         brelse(bh);
1041         *err = ERR_BAD_DX_DIR;
1042         goto fail;
1043     }
1044 
1045     if ((indirect = root->info.indirect_levels) > 1) {
1046         ext3_warning(dir->i_sb, __FUNCTION__,
1047                      "Unimplemented inode hash depth: %#06x",
1048                      root->info.indirect_levels);
1049         brelse(bh);
1050         *err = ERR_BAD_DX_DIR;
1051         goto fail;
1052     }
1053 
1054     entries = (struct dx_entry *) (((char *)&root->info) +
1055                                    root->info.info_length);
1056 
1057     if (dx_get_limit(entries) != dx_root_limit(dir,
1058             root->info.info_length)) {
1059         ext3_warning(dir->i_sb, __FUNCTION__,
1060                      "dx entry: limit != root limit");
1061         brelse(bh);
1062         *err = ERR_BAD_DX_DIR;
1063         goto fail;
1064     }
1065 
1066     dxtrace(printk("Look up %x", hash));
1067     while (1)
1068     {
1069         count = dx_get_count(entries);
1070         if (!count || count > dx_get_limit(entries)) {
1071             ext3_warning(dir->i_sb, __FUNCTION__,
1072                          "dx entry: no count or count > limit");
1073             brelse(bh);
1074             *err = ERR_BAD_DX_DIR;
1075             goto fail2;
1076         }
1077 
1078         p = entries + 1;
1079         q = entries + count - 1;
1080         while (p <= q)
1081         {
1082             m = p + (q - p)/2;
1083             if (dx_get_hash(m) > hash)
1084                 q = m - 1;
1085             else
1086                 p = m + 1;
1087         }
1088 
1089         if (0) // linear search cross check
1090         {
1091             unsigned n = count - 1;
1092             at = entries;
1093             while (n--)
1094             {
1095                 if (dx_get_hash(++at) > hash)
1096                 {
1097                     at--;
1098                     break;
1099                 }
1100             }
1101             ASSERT(at == p - 1);
1102         }
1103 
1104         at = p - 1;
1105         frame->bh = bh;
1106         frame->entries = entries;
1107         frame->at = at;
1108         if (!indirect--) return frame;
1109         if (!(bh = ext3_bread(icb, dir, dx_get_block(at), err)))
1110             goto fail2;
1111         at = entries = ((struct dx_node *) bh->b_data)->entries;
1112         if (dx_get_limit(entries) != dx_node_limit (dir)) {
1113             ext3_warning(dir->i_sb, __FUNCTION__,
1114                          "dx entry: limit != node limit");
1115             brelse(bh);
1116             *err = ERR_BAD_DX_DIR;
1117             goto fail2;
1118         }
1119         frame++;
1120         frame->bh = NULL;
1121     }
1122 fail2:
1123     while (frame >= frame_in) {
1124         brelse(frame->bh);
1125         frame--;
1126     }
1127 fail:
1128     if (*err == ERR_BAD_DX_DIR)
1129         ext3_warning(dir->i_sb, __FUNCTION__,
1130                      "Corrupt dir inode %ld, running e2fsck is "
1131                      "recommended.", dir->i_ino);
1132     return NULL;
1133 }
1134 
1135 static void dx_release (struct dx_frame *frames)
1136 {
1137     if (frames[0].bh == NULL)
1138         return;
1139 
1140     if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
1141         brelse(frames[1].bh);
1142     brelse(frames[0].bh);
1143 }
1144 
1145 /*
1146  * This function increments the frame pointer to search the next leaf
1147  * block, and reads in the necessary intervening nodes if the search
1148  * should be necessary.  Whether or not the search is necessary is
1149  * controlled by the hash parameter.  If the hash value is even, then
1150  * the search is only continued if the next block starts with that
1151  * hash value.  This is used if we are searching for a specific file.
1152  *
1153  * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
1154  *
1155  * This function returns 1 if the caller should continue to search,
1156  * or 0 if it should not.  If there is an error reading one of the
1157  * index blocks, it will a negative error code.
1158  *
1159  * If start_hash is non-null, it will be filled in with the starting
1160  * hash of the next page.
1161  */
1162 int ext3_htree_next_block(struct ext2_icb *icb, struct inode *dir,
1163                           __u32 hash, struct dx_frame *frame,
1164                           struct dx_frame *frames, __u32 *start_hash)
1165 {
1166     struct dx_frame *p;
1167     struct buffer_head *bh;
1168     int err, num_frames = 0;
1169     __u32 bhash;
1170 
1171     p = frame;
1172     /*
1173      * Find the next leaf page by incrementing the frame pointer.
1174      * If we run out of entries in the interior node, loop around and
1175      * increment pointer in the parent node.  When we break out of
1176      * this loop, num_frames indicates the number of interior
1177      * nodes need to be read.
1178      */
1179     while (1) {
1180         if (++(p->at) < p->entries + dx_get_count(p->entries))
1181             break;
1182         if (p == frames)
1183             return 0;
1184         num_frames++;
1185         p--;
1186     }
1187 
1188     /*
1189      * If the hash is 1, then continue only if the next page has a
1190      * continuation hash of any value.  This is used for readdir
1191      * handling.  Otherwise, check to see if the hash matches the
1192      * desired contiuation hash.  If it doesn't, return since
1193      * there's no point to read in the successive index pages.
1194      */
1195     bhash = dx_get_hash(p->at);
1196     if (start_hash)
1197         *start_hash = bhash;
1198     if ((hash & 1) == 0) {
1199         if ((bhash & ~1) != hash)
1200             return 0;
1201     }
1202     /*
1203      * If the hash is HASH_NB_ALWAYS, we always go to the next
1204      * block so no check is necessary
1205      */
1206     while (num_frames--) {
1207         if (!(bh = ext3_bread(icb, dir, dx_get_block(p->at), &err)))
1208             return err; /* Failure */
1209         p++;
1210         brelse (p->bh);
1211         p->bh = bh;
1212         p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
1213     }
1214     return 1;
1215 }
1216 
1217 /*
1218  * This function fills a red-black tree with information from a
1219  * directory block.  It returns the number directory entries loaded
1220  * into the tree.  If there is an error it is returned in err.
1221  */
1222 int htree_dirblock_to_tree(struct ext2_icb *icb, struct file *dir_file,
1223                            struct inode *dir, int block,
1224                            struct dx_hash_info *hinfo,
1225                            __u32 start_hash, __u32 start_minor_hash)
1226 {
1227     struct buffer_head *bh;
1228     struct ext3_dir_entry_2 *de, *top;
1229     int err, count = 0;
1230 
1231     dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
1232     if (!(bh = ext3_bread (icb, dir, block, &err)))
1233         return err;
1234 
1235     de = (struct ext3_dir_entry_2 *) bh->b_data;
1236     top = (struct ext3_dir_entry_2 *) ((char *) de +
1237                                        dir->i_sb->s_blocksize -
1238                                        EXT3_DIR_REC_LEN(0));
1239     for (; de < top; de = ext3_next_entry(de)) {
1240         if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
1241                                   ((unsigned long)block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
1242                                   + (unsigned long)((char *)de - bh->b_data))) {
1243             /* On error, skip the f_pos to the next block. */
1244             dir_file->f_pos = (dir_file->f_pos |
1245                                (dir->i_sb->s_blocksize - 1)) + 1;
1246             brelse (bh);
1247             return count;
1248         }
1249         ext3_dirhash(de->name, de->name_len, hinfo);
1250         if ((hinfo->hash < start_hash) ||
1251                 ((hinfo->hash == start_hash) &&
1252                  (hinfo->minor_hash < start_minor_hash)))
1253             continue;
1254         if (de->inode == 0)
1255             continue;
1256         if ((err = ext3_htree_store_dirent(dir_file,
1257                                            hinfo->hash, hinfo->minor_hash, de)) != 0) {
1258             brelse(bh);
1259             return err;
1260         }
1261         count++;
1262     }
1263     brelse(bh);
1264     return count;
1265 }
1266 
1267 /*
1268  * This function fills a red-black tree with information from a
1269  * directory.  We start scanning the directory in hash order, starting
1270  * at start_hash and start_minor_hash.
1271  *
1272  * This function returns the number of entries inserted into the tree,
1273  * or a negative error code.
1274  */
1275 int ext3_htree_fill_tree(struct ext2_icb *icb, struct file *dir_file,
1276                          __u32 start_hash, __u32 start_minor_hash,
1277                          __u32 *next_hash)
1278 {
1279     struct dx_hash_info hinfo;
1280     struct ext3_dir_entry_2 *de;
1281     struct dx_frame frames[2], *frame;
1282     int block, err = 0;
1283     struct inode *dir;
1284     int count = 0;
1285     int ret;
1286     __u32 hashval;
1287 
1288     dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
1289                    start_minor_hash));
1290     dir = dir_file->f_dentry->d_inode;
1291     if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
1292         hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
1293         hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
1294         count = htree_dirblock_to_tree(icb, dir_file, dir, 0, &hinfo,
1295                                        start_hash, start_minor_hash);
1296         *next_hash = ~0;
1297         return count;
1298     }
1299 
1300     hinfo.hash = start_hash;
1301     hinfo.minor_hash = 0;
1302     frame = dx_probe(icb, NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
1303     if (!frame)
1304         return err;
1305 
1306     /* Add '.' and '..' from the htree header */
1307     if (!start_hash && !start_minor_hash) {
1308         de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
1309         if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
1310             goto errout;
1311         count++;
1312     }
1313     if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1314         de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
1315         de = ext3_next_entry(de);
1316         if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
1317             goto errout;
1318         count++;
1319     }
1320 
1321     while (1) {
1322         block = dx_get_block(frame->at);
1323         ret = htree_dirblock_to_tree(icb, dir_file, dir, block, &hinfo,
1324                                      start_hash, start_minor_hash);
1325         if (ret < 0) {
1326             err = ret;
1327             goto errout;
1328         }
1329         count += ret;
1330         hashval = ~0;
1331         ret = ext3_htree_next_block(icb, dir, HASH_NB_ALWAYS,
1332                                     frame, frames, &hashval);
1333         *next_hash = hashval;
1334         if (ret < 0) {
1335             err = ret;
1336             goto errout;
1337         }
1338         /*
1339          * Stop if:  (a) there are no more entries, or
1340          * (b) we have inserted at least one entry and the
1341          * next hash value is not a continuation
1342          */
1343         if ((ret == 0) ||
1344                 (count && ((hashval & 1) == 0)))
1345             break;
1346     }
1347     dx_release(frames);
1348     dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
1349                    count, *next_hash));
1350     return count;
1351 errout:
1352     dx_release(frames);
1353 
1354     return (err);
1355 }
1356 
1357 
1358 /*
1359  * Directory block splitting, compacting
1360  */
1361 
1362 /*
1363  * Create map of hash values, offsets, and sizes, stored at end of block.
1364  * Returns number of entries mapped.
1365  */
1366 static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
1367                         struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
1368 {
1369     int count = 0;
1370     char *base = (char *) de;
1371     struct dx_hash_info h = *hinfo;
1372 
1373     while ((char *) de < base + size)
1374     {
1375         if (de->name_len && de->inode) {
1376             ext3_dirhash(de->name, de->name_len, &h);
1377             map_tail--;
1378             map_tail->hash = h.hash;
1379             map_tail->offs = (u16) ((char *) de - base);
1380             map_tail->size = le16_to_cpu(de->rec_len);
1381             count++;
1382             cond_resched();
1383         }
1384         /* XXX: do we need to check rec_len == 0 case? -Chris */
1385         de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
1386     }
1387     return count;
1388 }
1389 
1390 /* Sort map by hash value */
1391 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1392 {
1393     struct dx_map_entry *p, *q, *top = map + count - 1;
1394     int more;
1395     /* Combsort until bubble sort doesn't suck */
1396     while (count > 2)
1397     {
1398         count = count*10/13;
1399         if (count - 9 < 2) /* 9, 10 -> 11 */
1400             count = 11;
1401         for (p = top, q = p - count; q >= map; p--, q--)
1402             if (p->hash < q->hash)
1403                 swap(struct dx_map_entry, *p, *q);
1404     }
1405     /* Garden variety bubble sort */
1406     do {
1407         more = 0;
1408         q = top;
1409         while (q-- > map)
1410         {
1411             if (q[1].hash >= q[0].hash)
1412                 continue;
1413             swap(struct dx_map_entry, *(q+1), *q);
1414             more = 1;
1415         }
1416     } while (more);
1417 }
1418 
1419 static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
1420 {
1421     struct dx_entry *entries = frame->entries;
1422     struct dx_entry *old = frame->at, *new = old + 1;
1423     unsigned int count = dx_get_count(entries);
1424 
1425     ASSERT(count < dx_get_limit(entries));
1426     ASSERT(old < entries + count);
1427     memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1428     dx_set_hash(new, hash);
1429     dx_set_block(new, block);
1430     dx_set_count(entries, count + 1);
1431 }
1432 
1433 struct buffer_head *
1434             ext3_dx_find_entry(struct ext2_icb *icb, struct dentry *dentry,
1435                                struct ext3_dir_entry_2 **res_dir, int *err)
1436 {
1437     struct super_block * sb;
1438     struct dx_hash_info	hinfo = {0};
1439     u32 hash;
1440     struct dx_frame frames[2], *frame;
1441     struct ext3_dir_entry_2 *de, *top;
1442     struct buffer_head *bh;
1443     unsigned long block;
1444     int retval;
1445     int namelen = dentry->d_name.len;
1446     const __u8 *name = dentry->d_name.name;
1447     struct inode *dir = dentry->d_parent->d_inode;
1448 
1449     sb = dir->i_sb;
1450     /* NFS may look up ".." - look at dx_root directory block */
1451     if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')) {
1452         if (!(frame = dx_probe(icb, dentry, NULL, &hinfo, frames, err)))
1453             return NULL;
1454     } else {
1455         frame = frames;
1456         frame->bh = NULL;			/* for dx_release() */
1457         frame->at = (struct dx_entry *)frames;	/* hack for zero entry*/
1458         dx_set_block(frame->at, 0);		/* dx_root block is 0 */
1459     }
1460     hash = hinfo.hash;
1461     do {
1462         block = dx_get_block(frame->at);
1463         if (!(bh = ext3_bread (icb, dir, block, err)))
1464             goto errout;
1465         de = (struct ext3_dir_entry_2 *) bh->b_data;
1466         top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
1467                                            EXT3_DIR_REC_LEN(0));
1468         for (; de < top; de = ext3_next_entry(de))
1469             if (ext3_match (namelen, name, de)) {
1470                 if (!ext3_check_dir_entry("ext3_find_entry",
1471                                           dir, de, bh,
1472                                           (block<<EXT3_BLOCK_SIZE_BITS(sb))
1473                                           + (unsigned long)((char *)de - bh->b_data))) {
1474                     brelse (bh);
1475                     goto errout;
1476                 }
1477                 *res_dir = de;
1478                 dx_release (frames);
1479                 return bh;
1480             }
1481         brelse (bh);
1482         /* Check to see if we should continue to search */
1483         retval = ext3_htree_next_block(icb, dir, hash, frame,
1484                                        frames, NULL);
1485         if (retval < 0) {
1486             ext3_warning(sb, __FUNCTION__,
1487                          "error reading index page in directory #%lu",
1488                          dir->i_ino);
1489             *err = retval;
1490             goto errout;
1491         }
1492     } while (retval == 1);
1493 
1494     *err = -ENOENT;
1495 errout:
1496     dxtrace(printk("%s not found\n", name));
1497     dx_release (frames);
1498     return NULL;
1499 }
1500 
1501 int ext3_dx_readdir(struct file *filp, filldir_t filldir,
1502                     void * context)
1503 {
1504     struct dir_private_info *info = filp->private_data;
1505     struct inode *inode = filp->f_dentry->d_inode;
1506     struct fname *fname;
1507     PEXT2_FILLDIR_CONTEXT fc = context;
1508     int	ret;
1509 
1510     if (!info) {
1511         info = create_dir_info(filp->f_pos);
1512         if (!info)
1513             return -ENOMEM;
1514         filp->private_data = info;
1515     }
1516 
1517     if (filp->f_pos == EXT3_HTREE_EOF)
1518         return 0;	/* EOF */
1519 
1520     /* Some one has messed with f_pos; reset the world */
1521     if (info->last_pos != filp->f_pos) {
1522         free_rb_tree_fname(&info->root);
1523         info->curr_node = NULL;
1524         info->extra_fname = NULL;
1525         info->curr_hash = (__u32)pos2maj_hash(filp->f_pos);
1526         info->curr_minor_hash = (__u32)pos2min_hash(filp->f_pos);
1527     }
1528 
1529     /*
1530      * If there are any leftover names on the hash collision
1531      * chain, return them first.
1532      */
1533     if (info->extra_fname) {
1534         if (call_filldir(filp, context, filldir, info->extra_fname))
1535             goto finished;
1536         info->extra_fname = NULL;
1537         goto next_node;
1538     } else if (!info->curr_node)
1539         info->curr_node = rb_first(&info->root);
1540 
1541     while (1) {
1542         /*
1543          * Fill the rbtree if we have no more entries,
1544          * or the inode has changed since we last read in the
1545          * cached entries.
1546          */
1547         if ((!info->curr_node) ||
1548                 (filp->f_version != inode->i_version)) {
1549             info->curr_node = NULL;
1550             free_rb_tree_fname(&info->root);
1551             filp->f_version = inode->i_version;
1552             ret = ext3_htree_fill_tree(fc->efc_irp, filp, info->curr_hash,
1553                                        info->curr_minor_hash, &info->next_hash);
1554             if (ret < 0)
1555                 return ret;
1556             if (ret == 0) {
1557                 filp->f_pos = EXT3_HTREE_EOF;
1558                 break;
1559             }
1560             info->curr_node = rb_first(&info->root);
1561         }
1562 
1563         fname = rb_entry(info->curr_node, struct fname, rb_hash);
1564         info->curr_hash = fname->hash;
1565         info->curr_minor_hash = fname->minor_hash;
1566         if (call_filldir(filp, context, filldir, fname))
1567             break;
1568 next_node:
1569         info->curr_node = rb_next(info->curr_node);
1570         if (info->curr_node) {
1571             fname = rb_entry(info->curr_node, struct fname,
1572                              rb_hash);
1573             info->curr_hash = fname->hash;
1574             info->curr_minor_hash = fname->minor_hash;
1575         } else {
1576             if (info->next_hash == ~0) {
1577                 filp->f_pos = EXT3_HTREE_EOF;
1578                 break;
1579             }
1580             info->curr_hash = info->next_hash;
1581             info->curr_minor_hash = 0;
1582         }
1583     }
1584 finished:
1585     info->last_pos = filp->f_pos;
1586     return 0;
1587 }
1588 
1589 int ext3_release_dir (struct inode * inode, struct file * filp)
1590 {
1591     if (filp->private_data) {
1592         ext3_htree_free_dir_info(filp->private_data);
1593         filp->private_data = NULL;
1594     }
1595 
1596     return 0;
1597 }
1598 
1599 /* FIXME: Inspect the clang-cl code path */
1600 #if defined(__REACTOS__) && defined(__clang__)
1601 struct ext3_dir_entry_2* do_split(struct ext2_icb *icb, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo, int *error);
1602 #endif
1603 
1604 /*
1605  * Returns 0 for success, or a negative error value
1606  */
1607 int ext3_dx_add_entry(struct ext2_icb *icb, struct dentry *dentry,
1608                       struct inode *inode)
1609 {
1610     struct dx_frame frames[2], *frame;
1611     struct dx_entry *entries, *at;
1612     struct dx_hash_info hinfo;
1613     struct buffer_head * bh;
1614     struct inode *dir = dentry->d_parent->d_inode;
1615     struct super_block * sb = dir->i_sb;
1616     struct ext3_dir_entry_2 *de;
1617     int err;
1618 
1619     frame = dx_probe(icb, dentry, NULL, &hinfo, frames, &err);
1620     if (!frame)
1621         return err;
1622     entries = frame->entries;
1623     at = frame->at;
1624 
1625     if (!(bh = ext3_bread(icb, dir, dx_get_block(frame->at), &err)))
1626         goto cleanup;
1627 
1628     err = add_dirent_to_buf(icb, dentry, inode, NULL, bh);
1629     if (err != -ENOSPC) {
1630         bh = NULL;
1631         goto cleanup;
1632     }
1633 
1634     /* Block full, should compress but for now just split */
1635     dxtrace(printk("using %u of %u node entries\n",
1636                    dx_get_count(entries), dx_get_limit(entries)));
1637     /* Need to split index? */
1638     if (dx_get_count(entries) == dx_get_limit(entries)) {
1639         u32 newblock;
1640         unsigned icount = dx_get_count(entries);
1641         int levels = (int)(frame - frames);
1642         struct dx_entry *entries2;
1643         struct dx_node *node2;
1644         struct buffer_head *bh2;
1645 
1646         if (levels && (dx_get_count(frames->entries) ==
1647                        dx_get_limit(frames->entries))) {
1648             ext3_warning(sb, __FUNCTION__,
1649                          "Directory index full!");
1650             err = -ENOSPC;
1651             goto cleanup;
1652         }
1653         bh2 = ext3_append (icb, dir, &newblock, &err);
1654         if (!(bh2))
1655             goto cleanup;
1656         node2 = (struct dx_node *)(bh2->b_data);
1657         entries2 = node2->entries;
1658         node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
1659         node2->fake.inode = 0;
1660 
1661         if (levels) {
1662             unsigned icount1 = icount/2, icount2 = icount - icount1;
1663             unsigned hash2 = dx_get_hash(entries + icount1);
1664             dxtrace(printk("Split index %i/%i\n", icount1, icount2));
1665 
1666             memcpy ((char *) entries2, (char *) (entries + icount1),
1667                     icount2 * sizeof(struct dx_entry));
1668             dx_set_count (entries, icount1);
1669             dx_set_count (entries2, icount2);
1670             dx_set_limit (entries2, dx_node_limit(dir));
1671 
1672             /* Which index block gets the new entry? */
1673             if ((unsigned int)(at - entries) >= icount1) {
1674                 frame->at = at = at - entries - icount1 + entries2;
1675                 frame->entries = entries = entries2;
1676                 swap(struct buffer_head *, frame->bh, bh2);
1677             }
1678             dx_insert_block (frames + 0, hash2, newblock);
1679             dxtrace(dx_show_index ("node", frames[1].entries));
1680             dxtrace(dx_show_index ("node",
1681                                    ((struct dx_node *) bh2->b_data)->entries));
1682             set_buffer_dirty(bh2);
1683             brelse (bh2);
1684         } else {
1685             dxtrace(printk("Creating second level index...\n"));
1686             memcpy((char *) entries2, (char *) entries,
1687                    icount * sizeof(struct dx_entry));
1688             dx_set_limit(entries2, dx_node_limit(dir));
1689 
1690             /* Set up root */
1691             dx_set_count(entries, 1);
1692             dx_set_block(entries + 0, newblock);
1693             ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
1694 
1695             /* Add new access path frame */
1696             frame = frames + 1;
1697             frame->at = at = at - entries + entries2;
1698             frame->entries = entries = entries2;
1699             frame->bh = bh2;
1700         }
1701         // ext3_journal_dirty_metadata(handle, frames[0].bh);
1702         set_buffer_dirty(frames[0].bh);
1703     }
1704     de = do_split(icb, dir, &bh, frame, &hinfo, &err);
1705     if (!de)
1706         goto cleanup;
1707     err = add_dirent_to_buf(icb, dentry, inode, de, bh);
1708     bh = NULL;
1709     goto cleanup;
1710 
1711 cleanup:
1712     if (bh)
1713         brelse(bh);
1714     dx_release(frames);
1715     return err;
1716 }
1717 
1718 /*
1719  * Move count entries from end of map between two memory locations.
1720  * Returns pointer to last entry moved.
1721  */
1722 struct ext3_dir_entry_2 *
1723             dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1724 {
1725     unsigned rec_len = 0;
1726 
1727     while (count--) {
1728         struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
1729         rec_len = EXT3_DIR_REC_LEN(de->name_len);
1730         memcpy (to, de, rec_len);
1731         ((struct ext3_dir_entry_2 *) to)->rec_len =
1732             cpu_to_le16(rec_len);
1733         de->inode = 0;
1734         map++;
1735         to += rec_len;
1736     }
1737     return (struct ext3_dir_entry_2 *) (to - rec_len);
1738 }
1739 
1740 /*
1741  * Compact each dir entry in the range to the minimal rec_len.
1742  * Returns pointer to last entry in range.
1743  */
1744 struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1745 {
1746     struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
1747     unsigned rec_len = 0;
1748 
1749     prev = to = de;
1750     while ((char*)de < base + size) {
1751         next = (struct ext3_dir_entry_2 *) ((char *) de +
1752                                             le16_to_cpu(de->rec_len));
1753         if (de->inode && de->name_len) {
1754             rec_len = EXT3_DIR_REC_LEN(de->name_len);
1755             if (de > to)
1756                 memmove(to, de, rec_len);
1757             to->rec_len = cpu_to_le16(rec_len);
1758             prev = to;
1759             to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
1760         }
1761         de = next;
1762     }
1763     return prev;
1764 }
1765 
1766 /*
1767  * Split a full leaf block to make room for a new dir entry.
1768  * Allocate a new block, and move entries so that they are approx. equally full.
1769  * Returns pointer to de in block into which the new entry will be inserted.
1770  */
1771 struct ext3_dir_entry_2 *
1772             do_split(struct ext2_icb *icb, struct inode *dir,
1773                      struct buffer_head **bh,struct dx_frame *frame,
1774                      struct dx_hash_info *hinfo, int *error)
1775 {
1776     unsigned blocksize = dir->i_sb->s_blocksize;
1777     unsigned count, continued;
1778     struct buffer_head *bh2;
1779     u32 newblock;
1780     u32 hash2;
1781     struct dx_map_entry *map;
1782     char *data1 = (*bh)->b_data, *data2;
1783     unsigned split, move, size;
1784     struct ext3_dir_entry_2 *de = NULL, *de2;
1785     int	err, i;
1786 
1787     bh2 = ext3_append (icb, dir, &newblock, error);
1788     if (!(bh2)) {
1789         brelse(*bh);
1790         *bh = NULL;
1791         goto errout;
1792     }
1793 
1794     data2 = bh2->b_data;
1795 
1796     /* create map in the end of data2 block */
1797     map = (struct dx_map_entry *) (data2 + blocksize);
1798     count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
1799                          blocksize, hinfo, map);
1800     map -= count;
1801     dx_sort_map (map, count);
1802     /* Split the existing block in the middle, size-wise */
1803     size = 0;
1804     move = 0;
1805     for (i = count-1; i >= 0; i--) {
1806         /* is more than half of this entry in 2nd half of the block? */
1807         if (size + map[i].size/2 > blocksize/2)
1808             break;
1809         size += map[i].size;
1810         move++;
1811     }
1812     /* map index at which we will split */
1813     split = count - move;
1814     hash2 = map[split].hash;
1815     continued = hash2 == map[split - 1].hash;
1816     dxtrace(printk("Split block %i at %x, %i/%i\n",
1817                    dx_get_block(frame->at), hash2, split, count-split));
1818 
1819     /* Fancy dance to stay within two buffers */
1820     de2 = dx_move_dirents(data1, data2, map + split, count - split);
1821     de = dx_pack_dirents(data1,blocksize);
1822     de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
1823     de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
1824     dxtrace(dx_show_leaf (icb, hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
1825     dxtrace(dx_show_leaf (icb, hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
1826 
1827     /* Which block gets the new entry? */
1828     if (hinfo->hash >= hash2)
1829     {
1830         swap(struct buffer_head *, *bh, bh2);
1831         de = de2;
1832     }
1833     dx_insert_block (frame, hash2 + continued, newblock);
1834     set_buffer_dirty(bh2);
1835     set_buffer_dirty(frame->bh);
1836 
1837     brelse (bh2);
1838     dxtrace(dx_show_index ("frame", frame->entries));
1839 errout:
1840     return de;
1841 }
1842 
1843 /*
1844  * This converts a one block unindexed directory to a 3 block indexed
1845  * directory, and adds the dentry to the indexed directory.
1846  */
1847 int make_indexed_dir(struct ext2_icb *icb, struct dentry *dentry,
1848                      struct inode *inode, struct buffer_head *bh)
1849 {
1850     struct inode	*dir = dentry->d_parent->d_inode;
1851     const char	*name = dentry->d_name.name;
1852     int		namelen = dentry->d_name.len;
1853     struct buffer_head *bh2;
1854     struct dx_root	*root;
1855     struct dx_frame	frames[2], *frame;
1856     struct dx_entry *entries;
1857     struct ext3_dir_entry_2	*de, *de2;
1858     char		*data1, *top;
1859     unsigned	len;
1860     int		retval;
1861     unsigned	blocksize;
1862     struct dx_hash_info hinfo;
1863     ext3_lblk_t  block;
1864     struct fake_dirent *fde;
1865 
1866     blocksize =  dir->i_sb->s_blocksize;
1867     dxtrace(printk("Creating index: inode %lu\n", dir->i_ino));
1868 
1869     root = (struct dx_root *) bh->b_data;
1870 
1871     /* The 0th block becomes the root, move the dirents out */
1872     fde = &root->dotdot;
1873     de = (struct ext3_dir_entry_2 *)((char *)fde +
1874                                      ext3_rec_len_from_disk(fde->rec_len));
1875     if ((char *) de >= (((char *) root) + blocksize)) {
1876         DEBUG(DL_ERR, ( "%s: invalid rec_len for '..' in inode %lu", __FUNCTION__,
1877                        dir->i_ino));
1878         brelse(bh);
1879         return -EIO;
1880     }
1881     len = (unsigned int)((char *) root + blocksize - (char *) de);
1882 
1883     /* Allocate new block for the 0th block's dirents */
1884     bh2 = ext3_append(icb, dir, &block, &retval);
1885     if (!(bh2)) {
1886         brelse(bh);
1887         return retval;
1888     }
1889     EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
1890     data1 = bh2->b_data;
1891 
1892     memcpy (data1, de, len);
1893     de = (struct ext3_dir_entry_2 *) data1;
1894     top = data1 + len;
1895     while ((char *)(de2 = ext3_next_entry(de)) < top)
1896         de = de2;
1897     de->rec_len = ext3_rec_len_to_disk(blocksize + (__u32)(data1 - (char *)de));
1898     /* Initialize the root; the dot dirents already exist */
1899     de = (struct ext3_dir_entry_2 *) (&root->dotdot);
1900     de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2));
1901     memset (&root->info, 0, sizeof(root->info));
1902     root->info.info_length = sizeof(root->info);
1903     root->info.hash_version = (__u8)(EXT3_SB(dir->i_sb)->s_def_hash_version);
1904     entries = root->entries;
1905     dx_set_block(entries, 1);
1906     dx_set_count(entries, 1);
1907     dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1908 
1909     /* Initialize as for dx_probe */
1910     hinfo.hash_version = root->info.hash_version;
1911     hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
1912     ext3_dirhash(name, namelen, &hinfo);
1913     frame = frames;
1914     frame->entries = entries;
1915     frame->at = entries;
1916     frame->bh = bh;
1917     bh = bh2;
1918     /* bh and bh2 are to be marked as dirty in do_split */
1919     de = do_split(icb, dir, &bh, frame, &hinfo, &retval);
1920     dx_release (frames);
1921     if (!(de))
1922         return retval;
1923 
1924     return add_dirent_to_buf(icb, dentry, inode, de, bh);
1925 }
1926 
1927 #else /* EXT2_HTREE_INDEX */
1928 
1929 int ext3_release_dir (struct inode * inode, struct file * filp)
1930 {
1931     return 0;
1932 }
1933 
1934 #endif /* !EXT2_HTREE_INDEX */
1935 
1936 /*
1937  *	ext3_add_entry()
1938  *
1939  * adds a file entry to the specified directory, using the same
1940  * semantics as ext3_find_entry(). It returns NULL if it failed.
1941  *
1942  * NOTE!! The inode part of 'de' is left at 0 - which means you
1943  * may not sleep between calling this and putting something into
1944  * the entry, as someone else might have used it while you slept.
1945  */
1946 int ext3_add_entry(struct ext2_icb *icb, struct dentry *dentry, struct inode *inode)
1947 {
1948     struct inode *dir = dentry->d_parent->d_inode;
1949     struct buffer_head *bh;
1950     struct ext3_dir_entry_2 *de;
1951     struct super_block *sb;
1952     int	retval;
1953 #ifdef EXT2_HTREE_INDEX
1954     int	dx_fallback=0;
1955 #endif
1956     unsigned blocksize;
1957     ext3_lblk_t block, blocks;
1958 
1959     sb = dir->i_sb;
1960     blocksize = sb->s_blocksize;
1961     if (!dentry->d_name.len)
1962         return -EINVAL;
1963 
1964 #ifdef EXT2_HTREE_INDEX
1965     if (is_dx(dir)) {
1966         retval = ext3_dx_add_entry(icb, dentry, inode);
1967         if (!retval || (retval != ERR_BAD_DX_DIR))
1968             return retval;
1969         EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
1970         dx_fallback++;
1971         ext3_save_inode(icb, dir);
1972     }
1973 #endif
1974 
1975     blocks = (ext3_lblk_t)(dir->i_size >> sb->s_blocksize_bits);
1976     for (block = 0; block < blocks; block++) {
1977         bh = ext3_bread(icb, dir, block, &retval);
1978         if (!bh)
1979             return retval;
1980         retval = add_dirent_to_buf(icb, dentry, inode, NULL, bh);
1981         if (retval != -ENOSPC)
1982             return retval;
1983 
1984 #ifdef EXT2_HTREE_INDEX
1985         if (blocks == 1 && !dx_fallback &&
1986                 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
1987             return make_indexed_dir(icb, dentry, inode, bh);
1988 #endif
1989 
1990         brelse(bh);
1991     }
1992     bh = ext3_append(icb, dir, &block, &retval);
1993     if (!bh)
1994         return retval;
1995     de = (struct ext3_dir_entry_2 *) bh->b_data;
1996     de->inode = 0;
1997     de->rec_len = ext3_rec_len_to_disk(blocksize);
1998     return add_dirent_to_buf(icb, dentry, inode, de, bh);
1999 }
2000 
2001 /*
2002  * ext3_delete_entry deletes a directory entry by merging it with the
2003  * previous entry
2004  */
2005 int ext3_delete_entry(struct ext2_icb *icb, struct inode *dir,
2006                       struct ext3_dir_entry_2 *de_del,
2007                       struct buffer_head *bh)
2008 {
2009     struct ext3_dir_entry_2 *de, *pde = NULL;
2010     size_t i = 0;
2011 
2012     de = (struct ext3_dir_entry_2 *) bh->b_data;
2013     while (i < bh->b_size) {
2014         if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
2015             return -EIO;
2016         if (de == de_del)  {
2017             if (pde)
2018                 pde->rec_len = ext3_rec_len_to_disk(
2019                                    ext3_rec_len_from_disk(pde->rec_len) +
2020                                    ext3_rec_len_from_disk(de->rec_len));
2021             else
2022                 de->inode = 0;
2023             dir->i_version++;
2024             /* ext3_journal_dirty_metadata(handle, bh); */
2025             set_buffer_dirty(bh);
2026             return 0;
2027         }
2028         i += ext3_rec_len_from_disk(de->rec_len);
2029         pde = de;
2030         de = ext3_next_entry(de);
2031     }
2032     return -ENOENT;
2033 }
2034 
2035 /*
2036  * routine to check that the specified directory is empty (for rmdir)
2037  */
2038 int ext3_is_dir_empty(struct ext2_icb *icb, struct inode *inode)
2039 {
2040     unsigned int offset;
2041     struct buffer_head *bh;
2042     struct ext3_dir_entry_2 *de, *de1;
2043     struct super_block *sb;
2044     int err = 0;
2045 
2046     sb = inode->i_sb;
2047     if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
2048             !(bh = ext3_bread(icb, inode, 0, &err))) {
2049         if (err)
2050             ext3_error(inode->i_sb, __FUNCTION__,
2051                        "error %d reading directory #%lu offset 0",
2052                        err, inode->i_ino);
2053         else
2054             ext3_warning(inode->i_sb, __FUNCTION__,
2055                          "bad directory (dir #%lu) - no data block",
2056                          inode->i_ino);
2057         return 1;
2058     }
2059     de = (struct ext3_dir_entry_2 *) bh->b_data;
2060     de1 = ext3_next_entry(de);
2061     if (le32_to_cpu(de->inode) != inode->i_ino ||
2062             !le32_to_cpu(de1->inode) ||
2063             strcmp(".", de->name) ||
2064             strcmp("..", de1->name)) {
2065         ext3_warning(inode->i_sb, "empty_dir",
2066                      "bad directory (dir #%lu) - no `.' or `..'",
2067                      inode->i_ino);
2068         brelse(bh);
2069         return 1;
2070     }
2071     offset = ext3_rec_len_from_disk(de->rec_len) +
2072              ext3_rec_len_from_disk(de1->rec_len);
2073     de = ext3_next_entry(de1);
2074     while (offset < inode->i_size) {
2075         if (!bh ||
2076                 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
2077             err = 0;
2078             brelse(bh);
2079             bh = ext3_bread(icb, inode, offset >> EXT3_BLOCK_SIZE_BITS(sb), &err);
2080             if (!bh) {
2081                 if (err)
2082                     ext3_error(sb, __FUNCTION__, "error %d reading directory"
2083                                " #%lu offset %u", err, inode->i_ino, offset);
2084                 offset += sb->s_blocksize;
2085                 continue;
2086             }
2087             de = (struct ext3_dir_entry_2 *) bh->b_data;
2088         }
2089         if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
2090             de = (struct ext3_dir_entry_2 *)(bh->b_data +
2091                                              sb->s_blocksize);
2092             offset = (offset | (sb->s_blocksize - 1)) + 1;
2093             continue;
2094         }
2095         if (le32_to_cpu(de->inode)) {
2096             brelse(bh);
2097             return 0;
2098         }
2099         offset += ext3_rec_len_from_disk(de->rec_len);
2100         de = ext3_next_entry(de);
2101     }
2102     brelse(bh);
2103     return 1;
2104 }
2105 
2106 /*
2107  * Returns 0 if not found, -1 on failure, and 1 on success
2108  */
2109 static inline int search_dirblock(struct buffer_head * bh,
2110                                   struct inode *dir,
2111                                   struct dentry *dentry,
2112                                   unsigned long offset,
2113                                   struct ext3_dir_entry_2 ** res_dir)
2114 {
2115     struct ext3_dir_entry_2 * de;
2116     char * dlimit;
2117     int de_len;
2118     const char *name = dentry->d_name.name;
2119     int namelen = dentry->d_name.len;
2120 
2121     de = (struct ext3_dir_entry_2 *) bh->b_data;
2122     dlimit = bh->b_data + dir->i_sb->s_blocksize;
2123     while ((char *) de < dlimit) {
2124         /* this code is executed quadratically often */
2125         /* do minimal checking `by hand' */
2126 
2127         if ((char *) de + namelen <= dlimit &&
2128                 ext3_match (namelen, name, de)) {
2129             /* found a match - just to be sure, do a full check */
2130             if (!ext3_check_dir_entry("ext3_find_entry",
2131                                       dir, de, bh, offset))
2132                 return -1;
2133             *res_dir = de;
2134             return 1;
2135         }
2136         /* prevent looping on a bad block */
2137         de_len = ext3_rec_len_from_disk(de->rec_len);
2138 
2139         if (de_len <= 0)
2140             return -1;
2141         offset += de_len;
2142         de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
2143     }
2144     return 0;
2145 }
2146 
2147 /*
2148  * define how far ahead to read directories while searching them.
2149  */
2150 #define NAMEI_RA_CHUNKS  2
2151 #define NAMEI_RA_BLOCKS  4
2152 #define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
2153 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
2154 
2155 /*
2156  *	ext4_find_entry()
2157  *
2158  * finds an entry in the specified directory with the wanted name. It
2159  * returns the cache buffer in which the entry was found, and the entry
2160  * itself (as a parameter - res_dir). It does NOT read the inode of the
2161  * entry - you'll have to do that yourself if you want to.
2162  *
2163  * The returned buffer_head has ->b_count elevated.  The caller is expected
2164  * to brelse() it when appropriate.
2165  */
2166 struct buffer_head * ext3_find_entry (struct ext2_icb *icb,
2167                                                   struct dentry *dentry,
2168                                                   struct ext3_dir_entry_2 ** res_dir)
2169 {
2170     struct inode *dir = dentry->d_parent->d_inode;
2171     struct super_block *sb = dir->i_sb;
2172     struct buffer_head *bh_use[NAMEI_RA_SIZE];
2173     struct buffer_head *bh, *ret = NULL;
2174     ext3_lblk_t start, block, b;
2175     int ra_max = 0;		/* Number of bh's in the readahead
2176 				   buffer, bh_use[] */
2177     int ra_ptr = 0;		/* Current index into readahead
2178 				   buffer */
2179     int num = 0;
2180     ext3_lblk_t  nblocks;
2181     int i, err;
2182     int namelen = dentry->d_name.len;
2183 
2184     *res_dir = NULL;
2185     if (namelen > EXT3_NAME_LEN)
2186         return NULL;
2187 
2188 #ifdef EXT2_HTREE_INDEX
2189     if (icb->MajorFunction != IRP_MJ_CREATE && is_dx(dir)) {
2190         bh = ext3_dx_find_entry(icb, dentry, res_dir, &err);
2191         /*
2192          * On success, or if the error was file not found,
2193          * return.  Otherwise, fall back to doing a search the
2194          * old fashioned way.
2195          */
2196         if (bh || (err != ERR_BAD_DX_DIR))
2197             return bh;
2198         dxtrace(printk("ext4_find_entry: dx failed, "
2199                        "falling back\n"));
2200     }
2201 #endif
2202 
2203     nblocks = (ext3_lblk_t)(dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb));
2204     start = 0;
2205     block = start;
2206 restart:
2207     do {
2208         /*
2209          * We deal with the read-ahead logic here.
2210          */
2211         if (ra_ptr >= ra_max) {
2212             /* Refill the readahead buffer */
2213             ra_ptr = 0;
2214             b = block;
2215             for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
2216                 /*
2217                  * Terminate if we reach the end of the
2218                  * directory and must wrap, or if our
2219                  * search has finished at this block.
2220                  */
2221                 if (b >= nblocks || (num && block == start)) {
2222                     bh_use[ra_max] = NULL;
2223                     break;
2224                 }
2225                 num++;
2226                 bh = ext3_bread(icb, dir, b++, &err);
2227                 bh_use[ra_max] = bh;
2228             }
2229         }
2230         if ((bh = bh_use[ra_ptr++]) == NULL)
2231             goto next;
2232         wait_on_buffer(bh);
2233         if (!buffer_uptodate(bh)) {
2234             /* read error, skip block & hope for the best */
2235             ext3_error(sb, __FUNCTION__, "reading directory #%lu "
2236                        "offset %lu", dir->i_ino,
2237                        (unsigned long)block);
2238             brelse(bh);
2239             goto next;
2240         }
2241         i = search_dirblock(bh, dir, dentry,
2242                             block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
2243         if (i == 1) {
2244             ret = bh;
2245             goto cleanup_and_exit;
2246         } else {
2247             brelse(bh);
2248             if (i < 0)
2249                 goto cleanup_and_exit;
2250         }
2251 next:
2252         if (++block >= nblocks)
2253             block = 0;
2254     } while (block != start);
2255 
2256     /*
2257      * If the directory has grown while we were searching, then
2258      * search the last part of the directory before giving up.
2259      */
2260     block = nblocks;
2261     nblocks = (ext3_lblk_t)(dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb));
2262     if (block < nblocks) {
2263         start = 0;
2264         goto restart;
2265     }
2266 
2267 cleanup_and_exit:
2268     /* Clean up the read-ahead blocks */
2269     for (; ra_ptr < ra_max; ra_ptr++)
2270         brelse(bh_use[ra_ptr]);
2271     return ret;
2272 }
2273