1 /*
2  * readahead.c -- Prefetch filesystem metadata to speed up fsck.
3  *
4  * Copyright (C) 2014 Oracle.
5  *
6  * %Begin-Header%
7  * This file may be redistributed under the terms of the GNU Library
8  * General Public License, version 2.
9  * %End-Header%
10  */
11 
12 #include "config.h"
13 #include <string.h>
14 
15 #include "e2fsck.h"
16 
17 #undef DEBUG
18 
19 #ifdef DEBUG
20 # define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
21 #else
22 # define dbg_printf(f, a...)
23 #endif
24 
25 struct read_dblist {
26 	errcode_t err;
27 	blk64_t run_start;
28 	blk64_t run_len;
29 	int flags;
30 };
31 
readahead_dir_block(ext2_filsys fs,struct ext2_db_entry2 * db,void * priv_data)32 static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db,
33 			       void *priv_data)
34 {
35 	struct read_dblist *pr = priv_data;
36 	e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ?
37 			     1 : db->blockcnt);
38 
39 	if (!pr->run_len || db->blk != pr->run_start + pr->run_len) {
40 		if (pr->run_len) {
41 			pr->err = io_channel_cache_readahead(fs->io,
42 							     pr->run_start,
43 							     pr->run_len);
44 			dbg_printf("readahead start=%llu len=%llu err=%d\n",
45 				   pr->run_start, pr->run_len,
46 				   (int)pr->err);
47 		}
48 		pr->run_start = db->blk;
49 		pr->run_len = 0;
50 	}
51 	pr->run_len += count;
52 
53 	return pr->err ? DBLIST_ABORT : 0;
54 }
55 
e2fsck_readahead_dblist(ext2_filsys fs,int flags,ext2_dblist dblist,unsigned long long start,unsigned long long count)56 errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags,
57 				  ext2_dblist dblist,
58 				  unsigned long long start,
59 				  unsigned long long count)
60 {
61 	errcode_t err;
62 	struct read_dblist pr;
63 
64 	dbg_printf("%s: flags=0x%x\n", __func__, flags);
65 	if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS)
66 		return EXT2_ET_INVALID_ARGUMENT;
67 
68 	memset(&pr, 0, sizeof(pr));
69 	pr.flags = flags;
70 	err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start,
71 				     count, &pr);
72 	if (pr.err)
73 		return pr.err;
74 	if (err)
75 		return err;
76 
77 	if (pr.run_len)
78 		err = io_channel_cache_readahead(fs->io, pr.run_start,
79 						 pr.run_len);
80 
81 	return err;
82 }
83 
e2fsck_readahead_bitmap(ext2_filsys fs,ext2fs_block_bitmap ra_map)84 static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs,
85 					 ext2fs_block_bitmap ra_map)
86 {
87 	blk64_t start, end, out;
88 	errcode_t err;
89 
90 	start = 1;
91 	end = ext2fs_blocks_count(fs->super) - 1;
92 
93 	err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out);
94 	while (err == 0) {
95 		start = out;
96 		err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end,
97 							   &out);
98 		if (err == ENOENT) {
99 			out = end;
100 			err = 0;
101 			if (out == start)
102 				break;
103 		} else if (err)
104 			break;
105 
106 		err = io_channel_cache_readahead(fs->io, start, out - start);
107 		if (err)
108 			break;
109 		start = out;
110 		err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end,
111 							  &out);
112 	}
113 
114 	if (err == ENOENT)
115 		err = 0;
116 
117 	return err;
118 }
119 
120 /* Try not to spew bitmap range errors for readahead */
mark_bmap_range(ext2fs_block_bitmap map,blk64_t blk,unsigned int num)121 static errcode_t mark_bmap_range(ext2fs_block_bitmap map,
122 				 blk64_t blk, unsigned int num)
123 {
124 	if (blk >= ext2fs_get_generic_bmap_start(map) &&
125 	    blk + num <= ext2fs_get_generic_bmap_end(map))
126 		ext2fs_mark_block_bitmap_range2(map, blk, num);
127 	else
128 		return EXT2_ET_INVALID_ARGUMENT;
129 	return 0;
130 }
131 
mark_bmap(ext2fs_block_bitmap map,blk64_t blk)132 static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk)
133 {
134 	if (blk >= ext2fs_get_generic_bmap_start(map) &&
135 	    blk <= ext2fs_get_generic_bmap_end(map))
136 		ext2fs_mark_block_bitmap2(map, blk);
137 	else
138 		return EXT2_ET_INVALID_ARGUMENT;
139 	return 0;
140 }
141 
e2fsck_readahead(ext2_filsys fs,int flags,dgrp_t start,dgrp_t ngroups)142 errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start,
143 			   dgrp_t ngroups)
144 {
145 	blk64_t		super, old_gdt, new_gdt;
146 	blk_t		blocks;
147 	dgrp_t		i;
148 	ext2fs_block_bitmap		ra_map = NULL;
149 	dgrp_t		end = start + ngroups;
150 	errcode_t	err = 0;
151 
152 	dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags,
153 		   start, ngroups);
154 	if (flags & ~E2FSCK_READA_ALL_FLAGS)
155 		return EXT2_ET_INVALID_ARGUMENT;
156 
157 	if (end > fs->group_desc_count)
158 		end = fs->group_desc_count;
159 
160 	if (flags == 0)
161 		return 0;
162 
163 	err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap",
164 					   &ra_map);
165 	if (err)
166 		return err;
167 
168 	for (i = start; i < end; i++) {
169 		err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt,
170 						&new_gdt, &blocks);
171 		if (err)
172 			break;
173 
174 		if (flags & E2FSCK_READA_SUPER) {
175 			err = mark_bmap(ra_map, super);
176 			if (err)
177 				break;
178 		}
179 
180 		if (flags & E2FSCK_READA_GDT) {
181 			err = mark_bmap_range(ra_map,
182 					      old_gdt ? old_gdt : new_gdt,
183 					      blocks);
184 			if (err)
185 				break;
186 		}
187 
188 		if ((flags & E2FSCK_READA_BBITMAP) &&
189 		    !ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) &&
190 		    ext2fs_bg_free_blocks_count(fs, i) <
191 				fs->super->s_blocks_per_group) {
192 			super = ext2fs_block_bitmap_loc(fs, i);
193 			err = mark_bmap(ra_map, super);
194 			if (err)
195 				break;
196 		}
197 
198 		if ((flags & E2FSCK_READA_IBITMAP) &&
199 		    !ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) &&
200 		    ext2fs_bg_free_inodes_count(fs, i) <
201 				fs->super->s_inodes_per_group) {
202 			super = ext2fs_inode_bitmap_loc(fs, i);
203 			err = mark_bmap(ra_map, super);
204 			if (err)
205 				break;
206 		}
207 
208 		if ((flags & E2FSCK_READA_ITABLE) &&
209 		    ext2fs_bg_free_inodes_count(fs, i) <
210 				fs->super->s_inodes_per_group) {
211 			super = ext2fs_inode_table_loc(fs, i);
212 			blocks = fs->inode_blocks_per_group -
213 				 (ext2fs_bg_itable_unused(fs, i) *
214 				  EXT2_INODE_SIZE(fs->super) / fs->blocksize);
215 			err = mark_bmap_range(ra_map, super, blocks);
216 			if (err)
217 				break;
218 		}
219 	}
220 
221 	if (!err)
222 		err = e2fsck_readahead_bitmap(fs, ra_map);
223 
224 	ext2fs_free_block_bitmap(ra_map);
225 	return err;
226 }
227 
e2fsck_can_readahead(ext2_filsys fs)228 int e2fsck_can_readahead(ext2_filsys fs)
229 {
230 	errcode_t err;
231 
232 	err = io_channel_cache_readahead(fs->io, 0, 1);
233 	dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED);
234 	return err != EXT2_ET_OP_NOT_SUPPORTED;
235 }
236 
e2fsck_guess_readahead(ext2_filsys fs)237 unsigned long long e2fsck_guess_readahead(ext2_filsys fs)
238 {
239 	unsigned long long guess;
240 
241 	/*
242 	 * The optimal readahead sizes were experimentally determined by
243 	 * djwong in August 2014.  Setting the RA size to two block groups'
244 	 * worth of inode table blocks seems to yield the largest reductions
245 	 * in e2fsck runtime.
246 	 */
247 	guess = 2ULL * fs->blocksize * fs->inode_blocks_per_group;
248 
249 	/* Disable RA if it'd use more 1/50th of RAM. */
250 	if (get_memory_size() > (guess * 50))
251 		return guess / 1024;
252 
253 	return 0;
254 }
255