1 /*
2  * rehash.c --- rebuild hash tree directories
3  *
4  * Copyright (C) 2002 Theodore Ts'o
5  *
6  * %Begin-Header%
7  * This file may be redistributed under the terms of the GNU Public
8  * License.
9  * %End-Header%
10  *
11  * This algorithm is designed for simplicity of implementation and to
12  * pack the directory as much as possible.  It however requires twice
13  * as much memory as the size of the directory.  The maximum size
14  * directory supported using a 4k blocksize is roughly a gigabyte, and
15  * so there may very well be problems with machines that don't have
16  * virtual memory, and obscenely large directories.
17  *
18  * An alternate algorithm which is much more disk intensive could be
19  * written, and probably will need to be written in the future.  The
20  * design goals of such an algorithm are: (a) use (roughly) constant
21  * amounts of memory, no matter how large the directory, (b) the
22  * directory must be safe at all times, even if e2fsck is interrupted
23  * in the middle, (c) we must use minimal amounts of extra disk
24  * blocks.  This pretty much requires an incremental approach, where
25  * we are reading from one part of the directory, and inserting into
26  * the front half.  So the algorithm will have to keep track of a
27  * moving block boundary between the new tree and the old tree, and
28  * files will need to be moved from the old directory and inserted
29  * into the new tree.  If the new directory requires space which isn't
30  * yet available, blocks from the beginning part of the old directory
31  * may need to be moved to the end of the directory to make room for
32  * the new tree:
33  *
34  *    --------------------------------------------------------
35  *    |  new tree   |        | old tree                      |
36  *    --------------------------------------------------------
37  *                  ^ ptr    ^ptr
38  *                tail new   head old
39  *
40  * This is going to be a pain in the tuckus to implement, and will
41  * require a lot more disk accesses.  So I'm going to skip it for now;
42  * it's only really going to be an issue for really, really big
43  * filesystems (when we reach the level of tens of millions of files
44  * in a single directory).  It will probably be easier to simply
45  * require that e2fsck use VM first.
46  */
47 
48 #include "config.h"
49 #include <string.h>
50 #include <ctype.h>
51 #include <errno.h>
52 #include "e2fsck.h"
53 #include "problem.h"
54 #include "support/sort_r.h"
55 
56 /* Schedule a dir to be rebuilt during pass 3A. */
e2fsck_rehash_dir_later(e2fsck_t ctx,ext2_ino_t ino)57 void e2fsck_rehash_dir_later(e2fsck_t ctx, ext2_ino_t ino)
58 {
59 	if (!ctx->dirs_to_hash)
60 		ext2fs_u32_list_create(&ctx->dirs_to_hash, 50);
61 	if (ctx->dirs_to_hash)
62 		ext2fs_u32_list_add(ctx->dirs_to_hash, ino);
63 }
64 
65 /* Ask if a dir will be rebuilt during pass 3A. */
e2fsck_dir_will_be_rehashed(e2fsck_t ctx,ext2_ino_t ino)66 int e2fsck_dir_will_be_rehashed(e2fsck_t ctx, ext2_ino_t ino)
67 {
68 	if (ctx->options & E2F_OPT_COMPRESS_DIRS)
69 		return 1;
70 	if (!ctx->dirs_to_hash)
71 		return 0;
72 	return ext2fs_u32_list_test(ctx->dirs_to_hash, ino);
73 }
74 
75 #undef REHASH_DEBUG
76 
77 struct fill_dir_struct {
78 	char *buf;
79 	struct ext2_inode *inode;
80 	ext2_ino_t ino;
81 	errcode_t err;
82 	e2fsck_t ctx;
83 	struct hash_entry *harray;
84 	blk_t max_array, num_array;
85 	ext2_off64_t dir_size;
86 	int compress;
87 	ext2_ino_t parent;
88 	ext2_ino_t dir;
89 };
90 
91 struct hash_entry {
92 	ext2_dirhash_t	hash;
93 	ext2_dirhash_t	minor_hash;
94 	ino_t		ino;
95 	struct ext2_dir_entry	*dir;
96 };
97 
98 struct out_dir {
99 	blk_t		num;
100 	blk_t		max;
101 	char		*buf;
102 	ext2_dirhash_t	*hashes;
103 };
104 
105 #define DOTDOT_OFFSET 12
106 
is_fake_entry(ext2_filsys fs,int lblk,unsigned int offset)107 static int is_fake_entry(ext2_filsys fs, int lblk, unsigned int offset)
108 {
109 	/* Entries in the first block before this value refer to . or .. */
110 	if (lblk == 0 && offset <= DOTDOT_OFFSET)
111 		return 1;
112 	/* Check if this is likely the csum entry */
113 	if (ext2fs_has_feature_metadata_csum(fs->super) &&
114 	    (offset & (fs->blocksize - 1)) ==
115 			    fs->blocksize - sizeof(struct ext2_dir_entry_tail))
116 		return 1;
117 	return 0;
118 }
119 
fill_dir_block(ext2_filsys fs,blk64_t * block_nr,e2_blkcnt_t blockcnt,blk64_t ref_block EXT2FS_ATTR ((unused)),int ref_offset EXT2FS_ATTR ((unused)),void * priv_data)120 static int fill_dir_block(ext2_filsys fs,
121 			  blk64_t *block_nr,
122 			  e2_blkcnt_t blockcnt,
123 			  blk64_t ref_block EXT2FS_ATTR((unused)),
124 			  int ref_offset EXT2FS_ATTR((unused)),
125 			  void *priv_data)
126 {
127 	struct fill_dir_struct	*fd = (struct fill_dir_struct *) priv_data;
128 	struct hash_entry 	*ent;
129 	struct ext2_dir_entry 	*dirent;
130 	char			*dir;
131 	unsigned int		offset, dir_offset, rec_len, name_len;
132 	int			hash_alg, hash_flags, hash_in_entry;
133 
134 	if (blockcnt < 0)
135 		return 0;
136 
137 	offset = blockcnt * fs->blocksize;
138 	if (offset + fs->blocksize > fd->inode->i_size) {
139 		fd->err = EXT2_ET_DIR_CORRUPTED;
140 		return BLOCK_ABORT;
141 	}
142 
143 	dir = (fd->buf+offset);
144 	if (*block_nr == 0) {
145 		memset(dir, 0, fs->blocksize);
146 		dirent = (struct ext2_dir_entry *) dir;
147 		(void) ext2fs_set_rec_len(fs, fs->blocksize, dirent);
148 	} else {
149 		int flags = fs->flags;
150 		fs->flags |= EXT2_FLAG_IGNORE_CSUM_ERRORS;
151 		fd->err = ext2fs_read_dir_block4(fs, *block_nr, dir, 0,
152 						 fd->dir);
153 		fs->flags = (flags & EXT2_FLAG_IGNORE_CSUM_ERRORS) |
154 			    (fs->flags & ~EXT2_FLAG_IGNORE_CSUM_ERRORS);
155 		if (fd->err)
156 			return BLOCK_ABORT;
157 	}
158 	hash_flags = fd->inode->i_flags & EXT4_CASEFOLD_FL;
159 	hash_in_entry = ext4_hash_in_dirent(fd->inode);
160 	hash_alg = fs->super->s_def_hash_version;
161 	if ((hash_alg <= EXT2_HASH_TEA) &&
162 	    (fs->super->s_flags & EXT2_FLAGS_UNSIGNED_HASH))
163 		hash_alg += 3;
164 	/* While the directory block is "hot", index it. */
165 	dir_offset = 0;
166 	while (dir_offset < fs->blocksize) {
167 		unsigned int min_rec = EXT2_DIR_ENTRY_HEADER_LEN;
168 		int extended = hash_in_entry && !is_fake_entry(fs, blockcnt, dir_offset);
169 
170 		if (extended)
171 			min_rec += EXT2_DIR_ENTRY_HASH_LEN;
172 		dirent = (struct ext2_dir_entry *) (dir + dir_offset);
173 		(void) ext2fs_get_rec_len(fs, dirent, &rec_len);
174 		name_len = ext2fs_dirent_name_len(dirent);
175 		if (((dir_offset + rec_len) > fs->blocksize) ||
176 		    (rec_len < min_rec) ||
177 		    ((rec_len % 4) != 0) ||
178 		    (name_len + min_rec > rec_len)) {
179 			fd->err = EXT2_ET_DIR_CORRUPTED;
180 			return BLOCK_ABORT;
181 		}
182 		dir_offset += rec_len;
183 		if (dirent->inode == 0)
184 			continue;
185 		if ((name_len) == 0) {
186 			fd->err = EXT2_ET_DIR_CORRUPTED;
187 			return BLOCK_ABORT;
188 		}
189 		if (!fd->compress && (name_len == 1) &&
190 		    (dirent->name[0] == '.'))
191 			continue;
192 		if (!fd->compress && (name_len == 2) &&
193 		    (dirent->name[0] == '.') && (dirent->name[1] == '.')) {
194 			fd->parent = dirent->inode;
195 			continue;
196 		}
197 		if (fd->num_array >= fd->max_array) {
198 			errcode_t retval;
199 
200 			retval = ext2fs_resize_array(sizeof(struct hash_entry),
201 						     fd->max_array,
202 						     fd->max_array + 500,
203 						     &fd->harray);
204 			if (retval) {
205 				fd->err = retval;
206 				return BLOCK_ABORT;
207 			}
208 			fd->max_array += 500;
209 		}
210 		ent = fd->harray + fd->num_array++;
211 		ent->dir = dirent;
212 		fd->dir_size += ext2fs_dir_rec_len(name_len, extended);
213 		ent->ino = dirent->inode;
214 		if (extended) {
215 			ent->hash = EXT2_DIRENT_HASH(dirent);
216 			ent->minor_hash = EXT2_DIRENT_MINOR_HASH(dirent);
217 		} else if (fd->compress) {
218 			ent->hash = ent->minor_hash = 0;
219 		} else {
220 			fd->err = ext2fs_dirhash2(hash_alg,
221 						  dirent->name, name_len,
222 						  fs->encoding, hash_flags,
223 						  fs->super->s_hash_seed,
224 						  &ent->hash, &ent->minor_hash);
225 			if (fd->err)
226 				return BLOCK_ABORT;
227 		}
228 	}
229 
230 	return 0;
231 }
232 
233 /* Used for sorting the hash entry */
ino_cmp(const void * a,const void * b)234 static EXT2_QSORT_TYPE ino_cmp(const void *a, const void *b)
235 {
236 	const struct hash_entry *he_a = (const struct hash_entry *) a;
237 	const struct hash_entry *he_b = (const struct hash_entry *) b;
238 
239 	return (he_a->ino - he_b->ino);
240 }
241 
242 struct name_cmp_ctx
243 {
244 	int casefold;
245 	const struct ext2fs_nls_table *tbl;
246 };
247 
same_name(const struct name_cmp_ctx * cmp_ctx,char * s1,int len1,char * s2,int len2)248 static int same_name(const struct name_cmp_ctx *cmp_ctx, char *s1,
249 		     int len1, char *s2, int len2)
250 {
251 	if (!cmp_ctx->casefold)
252 		return (len1 == len2 &&	!memcmp(s1, s2, len1));
253 	else
254 		return !ext2fs_casefold_cmp(cmp_ctx->tbl,
255 					    (unsigned char *) s1, len1,
256 					    (unsigned char *) s2, len2);
257 }
258 
259 /* Used for sorting the hash entry */
name_cmp(const void * a,const void * b)260 static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
261 {
262 	const struct hash_entry *he_a = (const struct hash_entry *) a;
263 	const struct hash_entry *he_b = (const struct hash_entry *) b;
264 	unsigned int he_a_len, he_b_len, min_len;
265 	int	ret;
266 
267 	he_a_len = ext2fs_dirent_name_len(he_a->dir);
268 	he_b_len = ext2fs_dirent_name_len(he_b->dir);
269 	min_len = he_a_len;
270 	if (min_len > he_b_len)
271 		min_len = he_b_len;
272 
273 	ret = memcmp(he_a->dir->name, he_b->dir->name, min_len);
274 	if (ret == 0) {
275 		if (he_a_len > he_b_len)
276 			ret = 1;
277 		else if (he_a_len < he_b_len)
278 			ret = -1;
279 		else
280 			ret = he_b->dir->inode - he_a->dir->inode;
281 	}
282 	return ret;
283 }
284 
name_cf_cmp(const struct name_cmp_ctx * ctx,const void * a,const void * b)285 static EXT2_QSORT_TYPE name_cf_cmp(const struct name_cmp_ctx *ctx,
286 				   const void *a, const void *b)
287 {
288 	const struct hash_entry *he_a = (const struct hash_entry *) a;
289 	const struct hash_entry *he_b = (const struct hash_entry *) b;
290 	unsigned int he_a_len, he_b_len;
291 	int ret;
292 
293 	he_a_len = ext2fs_dirent_name_len(he_a->dir);
294 	he_b_len = ext2fs_dirent_name_len(he_b->dir);
295 
296 	ret = ext2fs_casefold_cmp(ctx->tbl,
297 				  (unsigned char *) he_a->dir->name, he_a_len,
298 				  (unsigned char *) he_b->dir->name, he_b_len);
299 	if (ret == 0) {
300 		if (he_a_len > he_b_len)
301 			ret = 1;
302 		else if (he_a_len < he_b_len)
303 			ret = -1;
304 		else
305 			ret = he_b->dir->inode - he_a->dir->inode;
306 	}
307 	return ret;
308 }
309 
310 /* Used for sorting the hash entry */
hash_cmp(const void * a,const void * b,void * arg)311 static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b, void *arg)
312 {
313 	const struct name_cmp_ctx *ctx = (struct name_cmp_ctx *) arg;
314 	const struct hash_entry *he_a = (const struct hash_entry *) a;
315 	const struct hash_entry *he_b = (const struct hash_entry *) b;
316 	int	ret;
317 
318 	if (he_a->hash > he_b->hash)
319 		ret = 1;
320 	else if (he_a->hash < he_b->hash)
321 		ret = -1;
322 	else {
323 		if (he_a->minor_hash > he_b->minor_hash)
324 			ret = 1;
325 		else if (he_a->minor_hash < he_b->minor_hash)
326 			ret = -1;
327 		else {
328 			if (ctx->casefold)
329 				ret = name_cf_cmp(ctx, a, b);
330 			else
331 				ret = name_cmp(a, b);
332 		}
333 	}
334 	return ret;
335 }
336 
alloc_size_dir(ext2_filsys fs,struct out_dir * outdir,blk_t blocks)337 static errcode_t alloc_size_dir(ext2_filsys fs, struct out_dir *outdir,
338 				blk_t blocks)
339 {
340 	errcode_t retval;
341 
342 	if (outdir->max) {
343 		retval = ext2fs_resize_array(fs->blocksize, outdir->max, blocks,
344 					     &outdir->buf);
345 		if (retval)
346 			return retval;
347 		retval = ext2fs_resize_array(sizeof(ext2_dirhash_t),
348 					     outdir->max, blocks,
349 					     &outdir->hashes);
350 		if (retval)
351 			return retval;
352 	} else {
353 		retval = ext2fs_get_array(fs->blocksize, blocks, &outdir->buf);
354 		if (retval)
355 			return retval;
356 		retval = ext2fs_get_array(sizeof(ext2_dirhash_t), blocks,
357 					  &outdir->hashes);
358 		if (retval)
359 			return retval;
360 		outdir->num = 0;
361 	}
362 	outdir->max = blocks;
363 	return 0;
364 }
365 
free_out_dir(struct out_dir * outdir)366 static void free_out_dir(struct out_dir *outdir)
367 {
368 	free(outdir->buf);
369 	free(outdir->hashes);
370 	outdir->max = 0;
371 	outdir->num =0;
372 }
373 
get_next_block(ext2_filsys fs,struct out_dir * outdir,char ** ret)374 static errcode_t get_next_block(ext2_filsys fs, struct out_dir *outdir,
375 			 char ** ret)
376 {
377 	errcode_t	retval;
378 
379 	if (outdir->num >= outdir->max) {
380 		int increment = outdir->max / 10;
381 
382 		if (increment < 50)
383 			increment = 50;
384 		retval = alloc_size_dir(fs, outdir, outdir->max + increment);
385 		if (retval)
386 			return retval;
387 	}
388 	*ret = outdir->buf + (size_t)outdir->num++ * fs->blocksize;
389 	memset(*ret, 0, fs->blocksize);
390 	return 0;
391 }
392 
393 /*
394  * This function is used to make a unique filename.  We do this by
395  * appending ~0, and then incrementing the number.  However, we cannot
396  * expand the length of the filename beyond the padding available in
397  * the directory entry.
398  */
mutate_name(char * str,unsigned int * len)399 static void mutate_name(char *str, unsigned int *len)
400 {
401 	int i;
402 	unsigned int l = *len;
403 
404 	/*
405 	 * First check to see if it looks the name has been mutated
406 	 * already
407 	 */
408 	for (i = l-1; i > 0; i--) {
409 		if (!isdigit(str[i]))
410 			break;
411 	}
412 	if ((i == (int)l - 1) || (str[i] != '~')) {
413 		if (((l-1) & 3) < 2)
414 			l += 2;
415 		else
416 			l = (l+3) & ~3;
417 		str[l-2] = '~';
418 		str[l-1] = '0';
419 		*len = l;
420 		return;
421 	}
422 	for (i = l-1; i >= 0; i--) {
423 		if (isdigit(str[i])) {
424 			if (str[i] == '9')
425 				str[i] = '0';
426 			else {
427 				str[i]++;
428 				return;
429 			}
430 			continue;
431 		}
432 		if (i == 1) {
433 			if (str[0] == 'z')
434 				str[0] = 'A';
435 			else if (str[0] == 'Z') {
436 				str[0] = '~';
437 				str[1] = '0';
438 			} else
439 				str[0]++;
440 		} else if (i > 0) {
441 			str[i] = '1';
442 			str[i-1] = '~';
443 		} else {
444 			if (str[0] == '~')
445 				str[0] = 'a';
446 			else
447 				str[0]++;
448 		}
449 		break;
450 	}
451 }
452 
duplicate_search_and_fix(e2fsck_t ctx,ext2_filsys fs,ext2_ino_t ino,struct fill_dir_struct * fd,const struct name_cmp_ctx * cmp_ctx)453 static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
454 				    ext2_ino_t ino,
455 				    struct fill_dir_struct *fd,
456 				    const struct name_cmp_ctx *cmp_ctx)
457 {
458 	struct problem_context	pctx;
459 	struct hash_entry	*ent, *prev;
460 	blk_t			i, j;
461 	int			fixed = 0;
462 	char			new_name[256];
463 	unsigned int		new_len;
464 	int			hash_alg;
465 	int hash_flags = fd->inode->i_flags & EXT4_CASEFOLD_FL;
466 
467 	clear_problem_context(&pctx);
468 	pctx.ino = ino;
469 
470 	hash_alg = fs->super->s_def_hash_version;
471 	if ((hash_alg <= EXT2_HASH_TEA) &&
472 	    (fs->super->s_flags & EXT2_FLAGS_UNSIGNED_HASH))
473 		hash_alg += 3;
474 
475 	for (i=1; i < fd->num_array; i++) {
476 		ent = fd->harray + i;
477 		prev = ent - 1;
478 		if (!ent->dir->inode ||
479 		    !same_name(cmp_ctx, ent->dir->name,
480 			       ext2fs_dirent_name_len(ent->dir),
481 			       prev->dir->name,
482 			       ext2fs_dirent_name_len(prev->dir)))
483 			continue;
484 		pctx.dirent = ent->dir;
485 		if ((ent->dir->inode == prev->dir->inode) &&
486 		    fix_problem(ctx, PR_2_DUPLICATE_DIRENT, &pctx)) {
487 			e2fsck_adjust_inode_count(ctx, ent->dir->inode, -1);
488 			ent->dir->inode = 0;
489 			fixed++;
490 			continue;
491 		}
492 		/* Can't alter encrypted name without key, so just drop it */
493 		if (fd->inode->i_flags & EXT4_ENCRYPT_FL) {
494 			if (fix_problem(ctx, PR_2_NON_UNIQUE_FILE_NO_RENAME, &pctx)) {
495 				e2fsck_adjust_inode_count(ctx, ent->dir->inode, -1);
496 				ent->dir->inode = 0;
497 				fixed++;
498 				continue;
499 			}
500 		}
501 		new_len = ext2fs_dirent_name_len(ent->dir);
502 		if (new_len == 0) {
503 			 /* should never happen */
504 			ext2fs_unmark_valid(fs);
505 			continue;
506 		}
507 		memcpy(new_name, ent->dir->name, new_len);
508 		mutate_name(new_name, &new_len);
509 		for (j=0; j < fd->num_array; j++) {
510 			if ((i==j) ||
511 			    !same_name(cmp_ctx, new_name, new_len,
512 				       fd->harray[j].dir->name,
513 				       ext2fs_dirent_name_len(fd->harray[j].dir))) {
514 				continue;
515 			}
516 			mutate_name(new_name, &new_len);
517 
518 			j = -1;
519 		}
520 		new_name[new_len] = 0;
521 		pctx.str = new_name;
522 		if (fix_problem(ctx, PR_2_NON_UNIQUE_FILE, &pctx)) {
523 			memcpy(ent->dir->name, new_name, new_len);
524 			ext2fs_dirent_set_name_len(ent->dir, new_len);
525 			ext2fs_dirhash2(hash_alg, new_name, new_len,
526 					fs->encoding, hash_flags,
527 					fs->super->s_hash_seed,
528 					&ent->hash, &ent->minor_hash);
529 			fixed++;
530 		}
531 	}
532 	return fixed;
533 }
534 
535 
copy_dir_entries(e2fsck_t ctx,struct fill_dir_struct * fd,struct out_dir * outdir)536 static errcode_t copy_dir_entries(e2fsck_t ctx,
537 				  struct fill_dir_struct *fd,
538 				  struct out_dir *outdir)
539 {
540 	ext2_filsys 		fs = ctx->fs;
541 	errcode_t		retval;
542 	char			*block_start;
543 	struct hash_entry 	*ent;
544 	struct ext2_dir_entry	*dirent;
545 	unsigned int		rec_len, prev_rec_len, left, slack, offset;
546 	blk_t			i;
547 	ext2_dirhash_t		prev_hash;
548 	int			csum_size = 0;
549 	struct			ext2_dir_entry_tail *t;
550 	int hash_in_entry = ext4_hash_in_dirent(fd->inode);
551 	unsigned int min_rec_len = ext2fs_dir_rec_len(1, hash_in_entry);
552 
553 	if (ctx->htree_slack_percentage == 255) {
554 		profile_get_uint(ctx->profile, "options",
555 				 "indexed_dir_slack_percentage",
556 				 0, 20,
557 				 &ctx->htree_slack_percentage);
558 		if (ctx->htree_slack_percentage > 100)
559 			ctx->htree_slack_percentage = 20;
560 	}
561 
562 	if (ext2fs_has_feature_metadata_csum(fs->super))
563 		csum_size = sizeof(struct ext2_dir_entry_tail);
564 
565 	outdir->max = 0;
566 	retval = alloc_size_dir(fs, outdir,
567 				(fd->dir_size / fs->blocksize) + 2);
568 	if (retval)
569 		return retval;
570 	outdir->num = fd->compress ? 0 : 1;
571 	offset = 0;
572 	outdir->hashes[0] = 0;
573 	prev_hash = 1;
574 	if ((retval = get_next_block(fs, outdir, &block_start)))
575 		return retval;
576 	dirent = (struct ext2_dir_entry *) block_start;
577 	prev_rec_len = 0;
578 	rec_len = 0;
579 	left = fs->blocksize - csum_size;
580 	slack = fd->compress ? min_rec_len :
581 		((fs->blocksize - csum_size) * ctx->htree_slack_percentage)/100;
582 	if (slack < min_rec_len)
583 		slack = min_rec_len;
584 	for (i = 0; i < fd->num_array; i++) {
585 		ent = fd->harray + i;
586 		if (ent->dir->inode == 0)
587 			continue;
588 		rec_len = ext2fs_dir_rec_len(ext2fs_dirent_name_len(ent->dir),
589 					     hash_in_entry);
590 		if (rec_len > left) {
591 			if (left) {
592 				left += prev_rec_len;
593 				retval = ext2fs_set_rec_len(fs, left, dirent);
594 				if (retval)
595 					return retval;
596 			}
597 			if (csum_size) {
598 				t = EXT2_DIRENT_TAIL(block_start,
599 						     fs->blocksize);
600 				ext2fs_initialize_dirent_tail(fs, t);
601 			}
602 			if ((retval = get_next_block(fs, outdir,
603 						      &block_start)))
604 				return retval;
605 			offset = 0;
606 		}
607 		left = (fs->blocksize - csum_size) - offset;
608 		dirent = (struct ext2_dir_entry *) (block_start + offset);
609 		if (offset == 0) {
610 			if (ent->hash == prev_hash)
611 				outdir->hashes[outdir->num-1] = ent->hash | 1;
612 			else
613 				outdir->hashes[outdir->num-1] = ent->hash;
614 		}
615 		dirent->inode = ent->dir->inode;
616 		ext2fs_dirent_set_name_len(dirent,
617 					   ext2fs_dirent_name_len(ent->dir));
618 		ext2fs_dirent_set_file_type(dirent,
619 					    ext2fs_dirent_file_type(ent->dir));
620 		retval = ext2fs_set_rec_len(fs, rec_len, dirent);
621 		if (retval)
622 			return retval;
623 		prev_rec_len = rec_len;
624 		memcpy(dirent->name, ent->dir->name,
625 		       ext2fs_dirent_name_len(dirent));
626 		if (hash_in_entry) {
627 			EXT2_DIRENT_HASHES(dirent)->hash = ext2fs_cpu_to_le32(ent->hash);
628 			EXT2_DIRENT_HASHES(dirent)->minor_hash =
629 							ext2fs_cpu_to_le32(ent->minor_hash);
630 		}
631 		offset += rec_len;
632 		left -= rec_len;
633 		if (left < slack) {
634 			prev_rec_len += left;
635 			retval = ext2fs_set_rec_len(fs, prev_rec_len, dirent);
636 			if (retval)
637 				return retval;
638 			offset += left;
639 			left = 0;
640 		}
641 		prev_hash = ent->hash;
642 	}
643 	if (left)
644 		retval = ext2fs_set_rec_len(fs, rec_len + left, dirent);
645 	if (csum_size) {
646 		t = EXT2_DIRENT_TAIL(block_start, fs->blocksize);
647 		ext2fs_initialize_dirent_tail(fs, t);
648 	}
649 
650 	return retval;
651 }
652 
653 
set_root_node(ext2_filsys fs,char * buf,ext2_ino_t ino,ext2_ino_t parent,struct ext2_inode * inode)654 static struct ext2_dx_root_info *set_root_node(ext2_filsys fs, char *buf,
655 				    ext2_ino_t ino, ext2_ino_t parent,
656 				    struct ext2_inode *inode)
657 {
658 	struct ext2_dir_entry 		*dir;
659 	struct ext2_dx_root_info  	*root;
660 	struct ext2_dx_countlimit	*limits;
661 	int				filetype = 0;
662 	int				csum_size = 0;
663 
664 	if (ext2fs_has_feature_filetype(fs->super))
665 		filetype = EXT2_FT_DIR;
666 
667 	memset(buf, 0, fs->blocksize);
668 	dir = (struct ext2_dir_entry *) buf;
669 	dir->inode = ino;
670 	dir->name[0] = '.';
671 	ext2fs_dirent_set_name_len(dir, 1);
672 	ext2fs_dirent_set_file_type(dir, filetype);
673 	dir->rec_len = 12;
674 	dir = (struct ext2_dir_entry *) (buf + 12);
675 	dir->inode = parent;
676 	dir->name[0] = '.';
677 	dir->name[1] = '.';
678 	ext2fs_dirent_set_name_len(dir, 2);
679 	ext2fs_dirent_set_file_type(dir, filetype);
680 	dir->rec_len = fs->blocksize - 12;
681 
682 	root = (struct ext2_dx_root_info *) (buf+24);
683 	root->reserved_zero = 0;
684 	if (ext4_hash_in_dirent(inode))
685 		root->hash_version = EXT2_HASH_SIPHASH;
686 	else
687 		root->hash_version = fs->super->s_def_hash_version;
688 	root->info_length = 8;
689 	root->indirect_levels = 0;
690 	root->unused_flags = 0;
691 
692 	if (ext2fs_has_feature_metadata_csum(fs->super))
693 		csum_size = sizeof(struct ext2_dx_tail);
694 
695 	limits = (struct ext2_dx_countlimit *) (buf+32);
696 	limits->limit = (fs->blocksize - (32 + csum_size)) /
697 			sizeof(struct ext2_dx_entry);
698 	limits->count = 0;
699 
700 	return root;
701 }
702 
703 
set_int_node(ext2_filsys fs,char * buf)704 static struct ext2_dx_entry *set_int_node(ext2_filsys fs, char *buf)
705 {
706 	struct ext2_dir_entry 		*dir;
707 	struct ext2_dx_countlimit	*limits;
708 	int				csum_size = 0;
709 
710 	memset(buf, 0, fs->blocksize);
711 	dir = (struct ext2_dir_entry *) buf;
712 	dir->inode = 0;
713 	(void) ext2fs_set_rec_len(fs, fs->blocksize, dir);
714 
715 	if (ext2fs_has_feature_metadata_csum(fs->super))
716 		csum_size = sizeof(struct ext2_dx_tail);
717 
718 	limits = (struct ext2_dx_countlimit *) (buf+8);
719 	limits->limit = (fs->blocksize - (8 + csum_size)) /
720 			sizeof(struct ext2_dx_entry);
721 	limits->count = 0;
722 
723 	return (struct ext2_dx_entry *) limits;
724 }
725 
alloc_blocks(ext2_filsys fs,struct ext2_dx_countlimit ** limit,struct ext2_dx_entry ** prev_ent,struct ext2_dx_entry ** next_ent,int * prev_offset,int * next_offset,struct out_dir * outdir,int i,int * prev_count,int * next_count)726 static int alloc_blocks(ext2_filsys fs,
727 			struct ext2_dx_countlimit **limit,
728 			struct ext2_dx_entry **prev_ent,
729 			struct ext2_dx_entry **next_ent,
730 			int *prev_offset, int *next_offset,
731 			struct out_dir *outdir, int i,
732 			int *prev_count, int *next_count)
733 {
734 	errcode_t	retval;
735 	char		*block_start;
736 
737 	if (*limit)
738 		(*limit)->limit = (*limit)->count =
739 			ext2fs_cpu_to_le16((*limit)->limit);
740 	*prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
741 	(*prev_ent)->block = ext2fs_cpu_to_le32(outdir->num);
742 
743 	if (i != 1)
744 		(*prev_ent)->hash =
745 			ext2fs_cpu_to_le32(outdir->hashes[i]);
746 
747 	retval = get_next_block(fs, outdir, &block_start);
748 	if (retval)
749 		return retval;
750 
751 	/* outdir->buf might be reallocated */
752 	*prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
753 
754 	*next_ent = set_int_node(fs, block_start);
755 	*limit = (struct ext2_dx_countlimit *)(*next_ent);
756 	if (next_offset)
757 		*next_offset = ((char *) *next_ent - outdir->buf);
758 
759 	*next_count = (*limit)->limit;
760 	(*prev_offset) += sizeof(struct ext2_dx_entry);
761 	(*prev_count)--;
762 
763 	return 0;
764 }
765 
766 /*
767  * This function takes the leaf nodes which have been written in
768  * outdir, and populates the root node and any necessary interior nodes.
769  */
calculate_tree(ext2_filsys fs,struct out_dir * outdir,ext2_ino_t ino,ext2_ino_t parent,struct ext2_inode * inode)770 static errcode_t calculate_tree(ext2_filsys fs,
771 				struct out_dir *outdir,
772 				ext2_ino_t ino,
773 				ext2_ino_t parent,
774 				struct ext2_inode *inode)
775 {
776 	struct ext2_dx_root_info	*root_info;
777 	struct ext2_dx_entry		*root, *int_ent, *dx_ent = 0;
778 	struct ext2_dx_countlimit	*root_limit, *int_limit, *limit;
779 	errcode_t			retval;
780 	int				i, c1, c2, c3, nblks;
781 	int				limit_offset, int_offset, root_offset;
782 
783 	root_info = set_root_node(fs, outdir->buf, ino, parent, inode);
784 	root_offset = limit_offset = ((char *) root_info - outdir->buf) +
785 		root_info->info_length;
786 	root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
787 	c1 = root_limit->limit;
788 	nblks = outdir->num;
789 
790 	/* Write out the pointer blocks */
791 	if (nblks - 1 <= c1) {
792 		/* Just write out the root block, and we're done */
793 		root = (struct ext2_dx_entry *) (outdir->buf + root_offset);
794 		for (i=1; i < nblks; i++) {
795 			root->block = ext2fs_cpu_to_le32(i);
796 			if (i != 1)
797 				root->hash =
798 					ext2fs_cpu_to_le32(outdir->hashes[i]);
799 			root++;
800 			c1--;
801 		}
802 	} else if (nblks - 1 <= ext2fs_htree_intnode_maxrecs(fs, c1)) {
803 		c2 = 0;
804 		limit = NULL;
805 		root_info->indirect_levels = 1;
806 		for (i=1; i < nblks; i++) {
807 			if (c2 == 0 && c1 == 0)
808 				return ENOSPC;
809 			if (c2 == 0) {
810 				retval = alloc_blocks(fs, &limit, &root,
811 						      &dx_ent, &root_offset,
812 						      NULL, outdir, i, &c1,
813 						      &c2);
814 				if (retval)
815 					return retval;
816 			}
817 			dx_ent->block = ext2fs_cpu_to_le32(i);
818 			if (c2 != limit->limit)
819 				dx_ent->hash =
820 					ext2fs_cpu_to_le32(outdir->hashes[i]);
821 			dx_ent++;
822 			c2--;
823 		}
824 		limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
825 		limit->limit = ext2fs_cpu_to_le16(limit->limit);
826 	} else {
827 		c2 = 0;
828 		c3 = 0;
829 		limit = NULL;
830 		int_limit = 0;
831 		root_info->indirect_levels = 2;
832 		for (i = 1; i < nblks; i++) {
833 			if (c3 == 0 && c2 == 0 && c1 == 0)
834 				return ENOSPC;
835 			if (c3 == 0 && c2 == 0) {
836 				retval = alloc_blocks(fs, &int_limit, &root,
837 						      &int_ent, &root_offset,
838 						      &int_offset, outdir, i,
839 						      &c1, &c2);
840 				if (retval)
841 					return retval;
842 			}
843 			if (c3 == 0) {
844 				int delta1 = (char *)int_limit - outdir->buf;
845 				int delta2 = (char *)root - outdir->buf;
846 
847 				retval = alloc_blocks(fs, &limit, &int_ent,
848 						      &dx_ent, &int_offset,
849 						      NULL, outdir, i, &c2,
850 						      &c3);
851 				if (retval)
852 					return retval;
853 
854 				/* outdir->buf might be reallocated */
855 				int_limit = (struct ext2_dx_countlimit *)
856 					(outdir->buf + delta1);
857 				root = (struct ext2_dx_entry *)
858 					(outdir->buf + delta2);
859 			}
860 			dx_ent->block = ext2fs_cpu_to_le32(i);
861 			if (c3 != limit->limit)
862 				dx_ent->hash =
863 					ext2fs_cpu_to_le32(outdir->hashes[i]);
864 			dx_ent++;
865 			c3--;
866 		}
867 		int_limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
868 		int_limit->limit = ext2fs_cpu_to_le16(limit->limit);
869 
870 		limit->count = ext2fs_cpu_to_le16(limit->limit - c3);
871 		limit->limit = ext2fs_cpu_to_le16(limit->limit);
872 
873 	}
874 	root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
875 	root_limit->count = ext2fs_cpu_to_le16(root_limit->limit - c1);
876 	root_limit->limit = ext2fs_cpu_to_le16(root_limit->limit);
877 
878 	return 0;
879 }
880 
881 struct write_dir_struct {
882 	struct out_dir *outdir;
883 	errcode_t	err;
884 	ext2_ino_t	ino;
885 	e2fsck_t	ctx;
886 	ext2_ino_t	dir;
887 };
888 
889 /*
890  * Helper function which writes out a directory block.
891  */
write_dir_block(ext2_filsys fs,blk64_t * block_nr,e2_blkcnt_t blockcnt,blk64_t ref_block EXT2FS_ATTR ((unused)),int ref_offset EXT2FS_ATTR ((unused)),void * priv_data)892 static int write_dir_block(ext2_filsys fs,
893 			   blk64_t *block_nr,
894 			   e2_blkcnt_t blockcnt,
895 			   blk64_t ref_block EXT2FS_ATTR((unused)),
896 			   int ref_offset EXT2FS_ATTR((unused)),
897 			   void *priv_data)
898 {
899 	struct write_dir_struct	*wd = (struct write_dir_struct *) priv_data;
900 	char	*dir, *buf = 0;
901 
902 #ifdef REHASH_DEBUG
903 	printf("%u: write_dir_block %lld:%lld", wd->ino, blockcnt, *block_nr);
904 #endif
905 	if ((*block_nr == 0) || (blockcnt < 0)) {
906 #ifdef REHASH_DEBUG
907 		printf(" - skip\n");
908 #endif
909 		return 0;
910 	}
911 	if (blockcnt < wd->outdir->num)
912 		dir = wd->outdir->buf + (blockcnt * fs->blocksize);
913 	else if (wd->ctx->lost_and_found == wd->dir) {
914 		/* Don't release any extra directory blocks for lost+found */
915 		wd->err = ext2fs_new_dir_block(fs, 0, 0, &buf);
916 		if (wd->err)
917 			return BLOCK_ABORT;
918 		dir = buf;
919 		wd->outdir->num++;
920 	} else {
921 		/* Don't free blocks at the end of the directory, they
922 		 * will be truncated by the caller. */
923 #ifdef REHASH_DEBUG
924 		printf(" - not freed\n");
925 #endif
926 		return 0;
927 	}
928 	wd->err = ext2fs_write_dir_block4(fs, *block_nr, dir, 0, wd->dir);
929 	if (buf)
930 		ext2fs_free_mem(&buf);
931 
932 #ifdef REHASH_DEBUG
933 	printf(" - write (%d)\n", wd->err);
934 #endif
935 	if (wd->err)
936 		return BLOCK_ABORT;
937 	return 0;
938 }
939 
write_directory(e2fsck_t ctx,ext2_filsys fs,struct out_dir * outdir,ext2_ino_t ino,struct ext2_inode * inode,int compress)940 static errcode_t write_directory(e2fsck_t ctx, ext2_filsys fs,
941 				 struct out_dir *outdir,
942 				 ext2_ino_t ino, struct ext2_inode *inode,
943 				 int compress)
944 {
945 	struct write_dir_struct wd;
946 	errcode_t	retval;
947 
948 	retval = e2fsck_expand_directory(ctx, ino, -1, outdir->num);
949 	if (retval)
950 		return retval;
951 
952 	wd.outdir = outdir;
953 	wd.err = 0;
954 	wd.ino = ino;
955 	wd.ctx = ctx;
956 	wd.dir = ino;
957 
958 	retval = ext2fs_block_iterate3(fs, ino, 0, NULL,
959 				       write_dir_block, &wd);
960 	if (retval)
961 		return retval;
962 	if (wd.err)
963 		return wd.err;
964 
965 	e2fsck_read_inode(ctx, ino, inode, "rehash_dir");
966 	if (compress)
967 		inode->i_flags &= ~EXT2_INDEX_FL;
968 	else
969 		inode->i_flags |= EXT2_INDEX_FL;
970 #ifdef REHASH_DEBUG
971 	printf("%u: set inode size to %u blocks = %u bytes\n",
972 	       ino, outdir->num, outdir->num * fs->blocksize);
973 #endif
974 	retval = ext2fs_inode_size_set(fs, inode, (ext2_off64_t)outdir->num *
975 						   fs->blocksize);
976 	if (retval)
977 		return retval;
978 
979 	/* ext2fs_punch() calls ext2fs_write_inode() which writes the size */
980 	return ext2fs_punch(fs, ino, inode, NULL, outdir->num, ~0ULL);
981 }
982 
e2fsck_rehash_dir(e2fsck_t ctx,ext2_ino_t ino,struct problem_context * pctx)983 errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino,
984 			    struct problem_context *pctx)
985 {
986 	ext2_filsys 		fs = ctx->fs;
987 	errcode_t		retval;
988 	struct ext2_inode 	inode;
989 	char			*dir_buf = 0;
990 	struct fill_dir_struct	fd = { NULL, NULL, 0, 0, 0, NULL,
991 				       0, 0, 0, 0, 0, 0 };
992 	struct out_dir		outdir = { 0, 0, 0, 0 };
993 	struct name_cmp_ctx name_cmp_ctx = {0, NULL};
994 
995 	e2fsck_read_inode(ctx, ino, &inode, "rehash_dir");
996 
997 	if (ext2fs_has_feature_inline_data(fs->super) &&
998 	   (inode.i_flags & EXT4_INLINE_DATA_FL))
999 		return 0;
1000 
1001 	retval = ext2fs_get_mem(inode.i_size, &dir_buf);
1002 	if (retval)
1003 		goto errout;
1004 
1005 	fd.max_array = inode.i_size / 32;
1006 	retval = ext2fs_get_array(sizeof(struct hash_entry),
1007 				  fd.max_array, &fd.harray);
1008 	if (retval)
1009 		goto errout;
1010 
1011 	fd.ino = ino;
1012 	fd.ctx = ctx;
1013 	fd.buf = dir_buf;
1014 	fd.inode = &inode;
1015 	fd.dir = ino;
1016 	if (!ext2fs_has_feature_dir_index(fs->super) ||
1017 	    (inode.i_size / fs->blocksize) < 2)
1018 		fd.compress = 1;
1019 	fd.parent = 0;
1020 
1021 	if (fs->encoding && (inode.i_flags & EXT4_CASEFOLD_FL)) {
1022 		name_cmp_ctx.casefold = 1;
1023 		name_cmp_ctx.tbl = fs->encoding;
1024 	}
1025 
1026 retry_nohash:
1027 	/* Read in the entire directory into memory */
1028 	retval = ext2fs_block_iterate3(fs, ino, 0, 0,
1029 				       fill_dir_block, &fd);
1030 	if (fd.err) {
1031 		retval = fd.err;
1032 		goto errout;
1033 	}
1034 
1035 	/*
1036 	 * If the entries read are less than a block, then don't index
1037 	 * the directory
1038 	 */
1039 	if (!fd.compress && (fd.dir_size < (fs->blocksize - 24))) {
1040 		fd.compress = 1;
1041 		fd.dir_size = 0;
1042 		fd.num_array = 0;
1043 		goto retry_nohash;
1044 	}
1045 
1046 #if 0
1047 	printf("%d entries (%d bytes) found in inode %d\n",
1048 	       fd.num_array, fd.dir_size, ino);
1049 #endif
1050 
1051 	/* Sort the list */
1052 resort:
1053 	if (fd.compress && fd.num_array > 1)
1054 		sort_r_simple(fd.harray+2, fd.num_array-2,
1055 			      sizeof(struct hash_entry),
1056 			      hash_cmp, &name_cmp_ctx);
1057 	else
1058 		sort_r_simple(fd.harray, fd.num_array,
1059 			      sizeof(struct hash_entry),
1060 			      hash_cmp, &name_cmp_ctx);
1061 
1062 	/*
1063 	 * Look for duplicates
1064 	 */
1065 	if (duplicate_search_and_fix(ctx, fs, ino, &fd, &name_cmp_ctx))
1066 		goto resort;
1067 
1068 	if (ctx->options & E2F_OPT_NO) {
1069 		retval = 0;
1070 		goto errout;
1071 	}
1072 
1073 	/* Sort non-hashed directories by inode number */
1074 	if (fd.compress && fd.num_array > 1)
1075 		qsort(fd.harray+2, fd.num_array-2,
1076 		      sizeof(struct hash_entry), ino_cmp);
1077 
1078 	/*
1079 	 * Copy the directory entries.  In a htree directory these
1080 	 * will become the leaf nodes.
1081 	 */
1082 	retval = copy_dir_entries(ctx, &fd, &outdir);
1083 	if (retval)
1084 		goto errout;
1085 
1086 	free(dir_buf); dir_buf = 0;
1087 
1088 	if (!fd.compress) {
1089 		/* Calculate the interior nodes */
1090 		retval = calculate_tree(fs, &outdir, ino, fd.parent, fd.inode);
1091 		if (retval)
1092 			goto errout;
1093 	}
1094 
1095 	retval = write_directory(ctx, fs, &outdir, ino, &inode, fd.compress);
1096 	if (retval)
1097 		goto errout;
1098 
1099 	if (ctx->options & E2F_OPT_CONVERT_BMAP)
1100 		retval = e2fsck_rebuild_extents_later(ctx, ino);
1101 	else
1102 		retval = e2fsck_check_rebuild_extents(ctx, ino, &inode, pctx);
1103 errout:
1104 	ext2fs_free_mem(&dir_buf);
1105 	ext2fs_free_mem(&fd.harray);
1106 
1107 	free_out_dir(&outdir);
1108 	return retval;
1109 }
1110 
e2fsck_rehash_directories(e2fsck_t ctx)1111 void e2fsck_rehash_directories(e2fsck_t ctx)
1112 {
1113 	struct problem_context	pctx;
1114 #ifdef RESOURCE_TRACK
1115 	struct resource_track	rtrack;
1116 #endif
1117 	struct dir_info		*dir;
1118 	ext2_u32_iterate 	iter;
1119 	struct dir_info_iter *	dirinfo_iter = 0;
1120 	ext2_ino_t		ino;
1121 	errcode_t		retval;
1122 	int			cur, max, all_dirs, first = 1;
1123 
1124 	init_resource_track(&rtrack, ctx->fs->io);
1125 	all_dirs = ctx->options & E2F_OPT_COMPRESS_DIRS;
1126 
1127 	if (!ctx->dirs_to_hash && !all_dirs)
1128 		return;
1129 
1130 	(void) e2fsck_get_lost_and_found(ctx, 0);
1131 
1132 	clear_problem_context(&pctx);
1133 
1134 	cur = 0;
1135 	if (all_dirs) {
1136 		dirinfo_iter = e2fsck_dir_info_iter_begin(ctx);
1137 		max = e2fsck_get_num_dirinfo(ctx);
1138 	} else {
1139 		retval = ext2fs_u32_list_iterate_begin(ctx->dirs_to_hash,
1140 						       &iter);
1141 		if (retval) {
1142 			pctx.errcode = retval;
1143 			fix_problem(ctx, PR_3A_OPTIMIZE_ITER, &pctx);
1144 			return;
1145 		}
1146 		max = ext2fs_u32_list_count(ctx->dirs_to_hash);
1147 	}
1148 	while (1) {
1149 		if (all_dirs) {
1150 			if ((dir = e2fsck_dir_info_iter(ctx,
1151 							dirinfo_iter)) == 0)
1152 				break;
1153 			ino = dir->ino;
1154 		} else {
1155 			if (!ext2fs_u32_list_iterate(iter, &ino))
1156 				break;
1157 		}
1158 		if (!ext2fs_test_inode_bitmap2(ctx->inode_dir_map, ino))
1159 			continue;
1160 
1161 		pctx.dir = ino;
1162 		if (first) {
1163 			fix_problem(ctx, PR_3A_PASS_HEADER, &pctx);
1164 			first = 0;
1165 		}
1166 #if 0
1167 		fix_problem(ctx, PR_3A_OPTIMIZE_DIR, &pctx);
1168 #endif
1169 		pctx.errcode = e2fsck_rehash_dir(ctx, ino, &pctx);
1170 		if (pctx.errcode) {
1171 			end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
1172 			fix_problem(ctx, PR_3A_OPTIMIZE_DIR_ERR, &pctx);
1173 		}
1174 		if (ctx->progress && !ctx->progress_fd)
1175 			e2fsck_simple_progress(ctx, "Rebuilding directory",
1176 			       100.0 * (float) (++cur) / (float) max, ino);
1177 	}
1178 	end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
1179 	if (all_dirs)
1180 		e2fsck_dir_info_iter_end(ctx, dirinfo_iter);
1181 	else
1182 		ext2fs_u32_list_iterate_end(iter);
1183 
1184 	if (ctx->dirs_to_hash)
1185 		ext2fs_u32_list_free(ctx->dirs_to_hash);
1186 	ctx->dirs_to_hash = 0;
1187 
1188 	print_resource_track(ctx, "Pass 3A", &rtrack, ctx->fs->io);
1189 }
1190