1 /*
2  * undo_io.c --- This is the undo io manager that copies the old data that
3  * copies the old data being overwritten into a tdb database
4  *
5  * Copyright IBM Corporation, 2007
6  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
7  *
8  * %Begin-Header%
9  * This file may be redistributed under the terms of the GNU Library
10  * General Public License, version 2.
11  * %End-Header%
12  */
13 
14 #ifndef _LARGEFILE_SOURCE
15 #define _LARGEFILE_SOURCE
16 #endif
17 #ifndef _LARGEFILE64_SOURCE
18 #define _LARGEFILE64_SOURCE
19 #endif
20 
21 #include "config.h"
22 #include <stdio.h>
23 #include <string.h>
24 #if HAVE_UNISTD_H
25 #include <unistd.h>
26 #endif
27 #if HAVE_ERRNO_H
28 #include <errno.h>
29 #endif
30 #include <fcntl.h>
31 #include <time.h>
32 #ifdef __linux__
33 #include <sys/utsname.h>
34 #endif
35 #if HAVE_SYS_STAT_H
36 #include <sys/stat.h>
37 #endif
38 #if HAVE_SYS_TYPES_H
39 #include <sys/types.h>
40 #endif
41 #if HAVE_SYS_RESOURCE_H
42 #include <sys/resource.h>
43 #endif
44 #include <limits.h>
45 
46 #include "ext2_fs.h"
47 #include "ext2fs.h"
48 #include "ext2fsP.h"
49 
50 #ifdef __GNUC__
51 #define ATTR(x) __attribute__(x)
52 #else
53 #define ATTR(x)
54 #endif
55 
56 #undef DEBUG
57 
58 #ifdef DEBUG
59 # define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
60 #else
61 # define dbg_printf(f, a...)
62 #endif
63 
64 /*
65  * For checking structure magic numbers...
66  */
67 
68 #define EXT2_CHECK_MAGIC(struct, code) \
69 	  if ((struct)->magic != (code)) return (code)
70 /*
71  * Undo file format: The file is cut up into undo_header.block_size blocks.
72  * The first block contains the header.
73  * The second block contains the superblock.
74  * There is then a repeating series of blocks as follows:
75  *   A key block, which contains undo_keys to map the following data blocks.
76  *   Data blocks
77  * (Note that there are pointers to the first key block and the sb, so this
78  * order isn't strictly necessary.)
79  */
80 #define E2UNDO_MAGIC "E2UNDO02"
81 #define KEYBLOCK_MAGIC 0xCADECADE
82 
83 #define E2UNDO_STATE_FINISHED	0x1	/* undo file is complete */
84 
85 #define E2UNDO_MIN_BLOCK_SIZE	1024	/* undo blocks are no less than 1KB */
86 #define E2UNDO_MAX_BLOCK_SIZE	1048576	/* undo blocks are no more than 1MB */
87 
88 struct undo_header {
89 	char magic[8];		/* "E2UNDO02" */
90 	__le64 num_keys;	/* how many keys? */
91 	__le64 super_offset;	/* where in the file is the superblock copy? */
92 	__le64 key_offset;	/* where do the key/data block chunks start? */
93 	__le32 block_size;	/* block size of the undo file */
94 	__le32 fs_block_size;	/* block size of the target device */
95 	__le32 sb_crc;		/* crc32c of the superblock */
96 	__le32 state;		/* e2undo state flags */
97 	__le32 f_compat;	/* compatible features */
98 	__le32 f_incompat;	/* incompatible features (none so far) */
99 	__le32 f_rocompat;	/* ro compatible features (none so far) */
100 	__le32 pad32;		/* padding for fs_offset */
101 	__le64 fs_offset;	/* filesystem offset */
102 	__u8 padding[436];	/* padding */
103 	__le32 header_crc;	/* crc32c of this header (but not this field) */
104 };
105 
106 #define E2UNDO_MAX_EXTENT_BLOCKS	512	/* max extent size, in blocks */
107 
108 struct undo_key {
109 	__le64 fsblk;		/* where in the fs does the block go */
110 	__le32 blk_crc;		/* crc32c of the block */
111 	__le32 size;		/* how many bytes in this block? */
112 };
113 
114 struct undo_key_block {
115 	__le32 magic;		/* KEYBLOCK_MAGIC number */
116 	__le32 crc;		/* block checksum */
117 	__le64 reserved;	/* zero */
118 
119 #if __STDC_VERSION__ >= 199901L
120 	struct undo_key keys[];		/* keys, which come immediately after */
121 #else
122 	struct undo_key keys[0];	/* keys, which come immediately after */
123 #endif
124 };
125 
126 struct undo_private_data {
127 	int	magic;
128 
129 	/* the undo file io channel */
130 	io_channel undo_file;
131 	blk64_t undo_blk_num;			/* next free block */
132 	blk64_t key_blk_num;			/* current key block location */
133 	blk64_t super_blk_num;			/* superblock location */
134 	blk64_t first_key_blk;			/* first key block location */
135 	struct undo_key_block *keyb;
136 	size_t num_keys, keys_in_block;
137 
138 	/* The backing io channel */
139 	io_channel real;
140 
141 	unsigned long long tdb_data_size;
142 	int tdb_written;
143 
144 	/* to support offset in unix I/O manager */
145 	ext2_loff_t offset;
146 
147 	ext2fs_block_bitmap written_block_map;
148 	struct struct_ext2_filsys fake_fs;
149 	char *tdb_file;
150 	struct undo_header hdr;
151 };
152 #define KEYS_PER_BLOCK(d) (((d)->tdb_data_size / sizeof(struct undo_key)) - 1)
153 
154 #define E2UNDO_FEATURE_COMPAT_FS_OFFSET 0x1	/* the filesystem offset */
155 
156 static inline void e2undo_set_feature_fs_offset(struct undo_header *header) {
157 	header->f_compat |= ext2fs_le32_to_cpu(E2UNDO_FEATURE_COMPAT_FS_OFFSET);
158 }
159 
160 static inline void e2undo_clear_feature_fs_offset(struct undo_header *header) {
161 	header->f_compat &= ~ext2fs_le32_to_cpu(E2UNDO_FEATURE_COMPAT_FS_OFFSET);
162 }
163 
164 static io_manager undo_io_backing_manager;
165 static char *tdb_file;
166 static int actual_size;
167 
168 errcode_t set_undo_io_backing_manager(io_manager manager)
169 {
170 	/*
171 	 * We may want to do some validation later
172 	 */
173 	undo_io_backing_manager = manager;
174 	return 0;
175 }
176 
177 errcode_t set_undo_io_backup_file(char *file_name)
178 {
179 	tdb_file = strdup(file_name);
180 
181 	if (tdb_file == NULL) {
182 		return EXT2_ET_NO_MEMORY;
183 	}
184 
185 	return 0;
186 }
187 
188 static errcode_t write_undo_indexes(struct undo_private_data *data, int flush)
189 {
190 	errcode_t retval;
191 	struct ext2_super_block super;
192 	io_channel channel;
193 	int block_size;
194 	__u32 sb_crc, hdr_crc;
195 
196 	/* Spit out a key block, if there's any data */
197 	if (data->keys_in_block) {
198 		data->keyb->magic = ext2fs_cpu_to_le32(KEYBLOCK_MAGIC);
199 		data->keyb->crc = 0;
200 		data->keyb->crc = ext2fs_cpu_to_le32(
201 					 ext2fs_crc32c_le(~0,
202 					 (unsigned char *)data->keyb,
203 					 data->tdb_data_size));
204 		dbg_printf("Writing keyblock to blk %llu\n", data->key_blk_num);
205 		retval = io_channel_write_blk64(data->undo_file,
206 						data->key_blk_num,
207 						1, data->keyb);
208 		if (retval)
209 			return retval;
210 		/* Move on to the next key block if it's full. */
211 		if (data->keys_in_block == KEYS_PER_BLOCK(data)) {
212 			memset(data->keyb, 0, data->tdb_data_size);
213 			data->keys_in_block = 0;
214 			data->key_blk_num = data->undo_blk_num;
215 			data->undo_blk_num++;
216 		}
217 	}
218 
219 	/* Prepare superblock for write */
220 	channel = data->real;
221 	block_size = channel->block_size;
222 
223 	io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
224 	retval = io_channel_read_blk64(channel, 1, -SUPERBLOCK_SIZE, &super);
225 	if (retval)
226 		goto err_out;
227 	sb_crc = ext2fs_crc32c_le(~0, (unsigned char *)&super, SUPERBLOCK_SIZE);
228 	super.s_magic = ~super.s_magic;
229 
230 	/* Write the undo header to disk. */
231 	memcpy(data->hdr.magic, E2UNDO_MAGIC, sizeof(data->hdr.magic));
232 	data->hdr.num_keys = ext2fs_cpu_to_le64(data->num_keys);
233 	data->hdr.super_offset = ext2fs_cpu_to_le64(data->super_blk_num);
234 	data->hdr.key_offset = ext2fs_cpu_to_le64(data->first_key_blk);
235 	data->hdr.fs_block_size = ext2fs_cpu_to_le32(block_size);
236 	data->hdr.sb_crc = ext2fs_cpu_to_le32(sb_crc);
237 	data->hdr.fs_offset = ext2fs_cpu_to_le64(data->offset);
238 	if (data->offset)
239 		e2undo_set_feature_fs_offset(&data->hdr);
240 	else
241 		e2undo_clear_feature_fs_offset(&data->hdr);
242 	hdr_crc = ext2fs_crc32c_le(~0, (unsigned char *)&data->hdr,
243 				   sizeof(data->hdr) -
244 				   sizeof(data->hdr.header_crc));
245 	data->hdr.header_crc = ext2fs_cpu_to_le32(hdr_crc);
246 	retval = io_channel_write_blk64(data->undo_file, 0,
247 					-(int)sizeof(data->hdr),
248 					&data->hdr);
249 	if (retval)
250 		goto err_out;
251 
252 	/*
253 	 * Record the entire superblock (in FS byte order) so that we can't
254 	 * apply e2undo files to the wrong FS or out of order.
255 	 */
256 	dbg_printf("Writing superblock to block %llu\n", data->super_blk_num);
257 	retval = io_channel_write_blk64(data->undo_file, data->super_blk_num,
258 					-SUPERBLOCK_SIZE, &super);
259 	if (retval)
260 		goto err_out;
261 
262 	if (flush)
263 		retval = io_channel_flush(data->undo_file);
264 err_out:
265 	io_channel_set_blksize(channel, block_size);
266 	return retval;
267 }
268 
269 static errcode_t undo_setup_tdb(struct undo_private_data *data)
270 {
271 	int i;
272 	errcode_t retval;
273 
274 	if (data->tdb_written == 1)
275 		return 0;
276 
277 	data->tdb_written = 1;
278 
279 	/* Make a bitmap to track what we've written */
280 	memset(&data->fake_fs, 0, sizeof(data->fake_fs));
281 	data->fake_fs.blocksize = data->tdb_data_size;
282 	retval = ext2fs_alloc_generic_bmap(&data->fake_fs,
283 				EXT2_ET_MAGIC_BLOCK_BITMAP64,
284 				EXT2FS_BMAP64_RBTREE,
285 				0, ~1ULL, ~1ULL,
286 				"undo block map", &data->written_block_map);
287 	if (retval)
288 		return retval;
289 
290 	/* Allocate key block */
291 	retval = ext2fs_get_mem(data->tdb_data_size, &data->keyb);
292 	if (retval)
293 		return retval;
294 	data->key_blk_num = data->first_key_blk;
295 
296 	/* Record block size */
297 	dbg_printf("Undo block size %llu\n", data->tdb_data_size);
298 	dbg_printf("Keys per block %llu\n", KEYS_PER_BLOCK(data));
299 	data->hdr.block_size = ext2fs_cpu_to_le32(data->tdb_data_size);
300 	io_channel_set_blksize(data->undo_file, data->tdb_data_size);
301 
302 	/* Ensure that we have space for header blocks */
303 	for (i = 0; i <= 2; i++) {
304 		retval = io_channel_read_blk64(data->undo_file, i, 1,
305 					       data->keyb);
306 		if (retval)
307 			memset(data->keyb, 0, data->tdb_data_size);
308 		retval = io_channel_write_blk64(data->undo_file, i, 1,
309 						data->keyb);
310 		if (retval)
311 			return retval;
312 		retval = io_channel_flush(data->undo_file);
313 		if (retval)
314 			return retval;
315 	}
316 	memset(data->keyb, 0, data->tdb_data_size);
317 	return 0;
318 }
319 
320 static errcode_t undo_write_tdb(io_channel channel,
321 				unsigned long long block, int count)
322 
323 {
324 	int size, sz;
325 	unsigned long long block_num, backing_blk_num;
326 	errcode_t retval = 0;
327 	ext2_loff_t offset;
328 	struct undo_private_data *data;
329 	unsigned char *read_ptr;
330 	unsigned long long end_block;
331 	unsigned long long data_size;
332 	struct undo_key *key;
333 	__u32 blk_crc;
334 
335 	data = (struct undo_private_data *) channel->private_data;
336 
337 	if (data->undo_file == NULL) {
338 		/*
339 		 * Transaction database not initialized
340 		 */
341 		return 0;
342 	}
343 
344 	if (count == 1)
345 		size = channel->block_size;
346 	else {
347 		if (count < 0)
348 			size = -count;
349 		else
350 			size = count * channel->block_size;
351 	}
352 
353 	retval = undo_setup_tdb(data);
354 	if (retval)
355 		return retval;
356 	/*
357 	 * Data is stored in tdb database as blocks of tdb_data_size size
358 	 * This helps in efficient lookup further.
359 	 *
360 	 * We divide the disk to blocks of tdb_data_size.
361 	 */
362 	offset = (block * channel->block_size) + data->offset ;
363 	block_num = offset / data->tdb_data_size;
364 	end_block = (offset + size - 1) / data->tdb_data_size;
365 
366 	while (block_num <= end_block) {
367 		__u32 keysz;
368 
369 		/*
370 		 * Check if we have the record already
371 		 */
372 		if (ext2fs_test_block_bitmap2(data->written_block_map,
373 						   block_num)) {
374 			/* Try the next block */
375 			block_num++;
376 			continue;
377 		}
378 		ext2fs_mark_block_bitmap2(data->written_block_map, block_num);
379 
380 		/*
381 		 * Read one block using the backing I/O manager
382 		 * The backing I/O manager block size may be
383 		 * different from the tdb_data_size.
384 		 * Also we need to recalculate the block number with respect
385 		 * to the backing I/O manager.
386 		 */
387 		offset = block_num * data->tdb_data_size +
388 				(data->offset % data->tdb_data_size);
389 		backing_blk_num = (offset - data->offset) / channel->block_size;
390 
391 		retval = ext2fs_get_mem(data->tdb_data_size, &read_ptr);
392 		if (retval) {
393 			return retval;
394 		}
395 
396 		memset(read_ptr, 0, data->tdb_data_size);
397 		actual_size = 0;
398 		if ((data->tdb_data_size % channel->block_size) == 0)
399 			sz = data->tdb_data_size / channel->block_size;
400 		else
401 			sz = -data->tdb_data_size;
402 		retval = io_channel_read_blk64(data->real, backing_blk_num,
403 					     sz, read_ptr);
404 		if (retval) {
405 			if (retval != EXT2_ET_SHORT_READ) {
406 				free(read_ptr);
407 				return retval;
408 			}
409 			/*
410 			 * short read so update the record size
411 			 * accordingly
412 			 */
413 			data_size = actual_size;
414 		} else {
415 			data_size = data->tdb_data_size;
416 		}
417 		if (data_size == 0) {
418 			free(read_ptr);
419 			block_num++;
420 			continue;
421 		}
422 		dbg_printf("Read %llu bytes from FS block %llu (blk=%llu cnt=%llu)\n",
423 		       data_size, backing_blk_num, block, data->tdb_data_size);
424 		if ((data_size % data->undo_file->block_size) == 0)
425 			sz = data_size / data->undo_file->block_size;
426 		else
427 			sz = -data_size;;
428 		/* extend this key? */
429 		if (data->keys_in_block) {
430 			key = data->keyb->keys + data->keys_in_block - 1;
431 			keysz = ext2fs_le32_to_cpu(key->size);
432 		} else {
433 			key = NULL;
434 			keysz = 0;
435 		}
436 		if (key != NULL &&
437 		    (ext2fs_le64_to_cpu(key->fsblk) * channel->block_size +
438 		     channel->block_size - 1 +
439 		     keysz) / channel->block_size == backing_blk_num &&
440 		    E2UNDO_MAX_EXTENT_BLOCKS * data->tdb_data_size >
441 		    keysz + data_size) {
442 			blk_crc = ext2fs_le32_to_cpu(key->blk_crc);
443 			blk_crc = ext2fs_crc32c_le(blk_crc, read_ptr, data_size);
444 			key->blk_crc = ext2fs_cpu_to_le32(blk_crc);
445 			key->size = ext2fs_cpu_to_le32(keysz + data_size);
446 		} else {
447 			data->num_keys++;
448 			key = data->keyb->keys + data->keys_in_block;
449 			data->keys_in_block++;
450 			key->fsblk = ext2fs_cpu_to_le64(backing_blk_num);
451 			blk_crc = ext2fs_crc32c_le(~0, read_ptr, data_size);
452 			key->blk_crc = ext2fs_cpu_to_le32(blk_crc);
453 			key->size = ext2fs_cpu_to_le32(data_size);
454 		}
455 		dbg_printf("Writing block %llu to offset %llu size %d key %zu\n",
456 		       block_num,
457 		       data->undo_blk_num,
458 		       sz, data->num_keys - 1);
459 		retval = io_channel_write_blk64(data->undo_file,
460 					data->undo_blk_num, sz, read_ptr);
461 		if (retval) {
462 			free(read_ptr);
463 			return retval;
464 		}
465 		data->undo_blk_num++;
466 		free(read_ptr);
467 
468 		/* Write out the key block */
469 		retval = write_undo_indexes(data, 0);
470 		if (retval)
471 			return retval;
472 
473 		/* Next block */
474 		block_num++;
475 	}
476 
477 	return retval;
478 }
479 
480 static errcode_t undo_io_read_error(io_channel channel ATTR((unused)),
481 				    unsigned long block ATTR((unused)),
482 				    int count ATTR((unused)),
483 				    void *data ATTR((unused)),
484 				    size_t size ATTR((unused)),
485 				    int actual,
486 				    errcode_t error ATTR((unused)))
487 {
488 	actual_size = actual;
489 	return error;
490 }
491 
492 static void undo_err_handler_init(io_channel channel)
493 {
494 	channel->read_error = undo_io_read_error;
495 }
496 
497 static int check_filesystem(struct undo_header *hdr, io_channel undo_file,
498 			    unsigned int blocksize, blk64_t super_block,
499 			    io_channel channel)
500 {
501 	struct ext2_super_block super, *sb;
502 	char *buf;
503 	__u32 sb_crc;
504 	errcode_t retval;
505 
506 	io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
507 	retval = io_channel_read_blk64(channel, 1, -SUPERBLOCK_SIZE, &super);
508 	if (retval)
509 		return retval;
510 
511 	/*
512 	 * Compare the FS and the undo file superblock so that we don't
513 	 * append to something that doesn't match this FS.
514 	 */
515 	retval = ext2fs_get_mem(blocksize, &buf);
516 	if (retval)
517 		return retval;
518 	retval = io_channel_read_blk64(undo_file, super_block,
519 				       -SUPERBLOCK_SIZE, buf);
520 	if (retval)
521 		goto out;
522 	sb = (struct ext2_super_block *)buf;
523 	sb->s_magic = ~sb->s_magic;
524 	if (memcmp(&super, buf, sizeof(super))) {
525 		retval = -1;
526 		goto out;
527 	}
528 	sb_crc = ext2fs_crc32c_le(~0, (unsigned char *)buf, SUPERBLOCK_SIZE);
529 	if (ext2fs_le32_to_cpu(hdr->sb_crc) != sb_crc) {
530 		retval = -1;
531 		goto out;
532 	}
533 
534 out:
535 	ext2fs_free_mem(&buf);
536 	return retval;
537 }
538 
539 /*
540  * Try to re-open the undo file, so that we can resume where we left off.
541  * That way, the user can pass the same undo file to various programs as
542  * part of an FS upgrade instead of having to create multiple files and
543  * then apply them in correct order.
544  */
545 static errcode_t try_reopen_undo_file(int undo_fd,
546 				      struct undo_private_data *data)
547 {
548 	struct undo_header hdr;
549 	struct undo_key *dkey;
550 	ext2fs_struct_stat statbuf;
551 	unsigned int blocksize, fs_blocksize;
552 	blk64_t super_block, lblk;
553 	size_t num_keys, keys_per_block, i;
554 	__u32 hdr_crc, key_crc;
555 	errcode_t retval;
556 
557 	/* Zero size already? */
558 	retval = ext2fs_fstat(undo_fd, &statbuf);
559 	if (retval)
560 		goto bad_file;
561 	if (statbuf.st_size == 0)
562 		goto out;
563 
564 	/* check the file header */
565 	retval = io_channel_read_blk64(data->undo_file, 0, -(int)sizeof(hdr),
566 				       &hdr);
567 	if (retval)
568 		goto bad_file;
569 
570 	if (memcmp(hdr.magic, E2UNDO_MAGIC,
571 		    sizeof(hdr.magic)))
572 		goto bad_file;
573 	hdr_crc = ext2fs_crc32c_le(~0, (unsigned char *)&hdr,
574 				   sizeof(struct undo_header) -
575 				   sizeof(__u32));
576 	if (ext2fs_le32_to_cpu(hdr.header_crc) != hdr_crc)
577 		goto bad_file;
578 	blocksize = ext2fs_le32_to_cpu(hdr.block_size);
579 	fs_blocksize = ext2fs_le32_to_cpu(hdr.fs_block_size);
580 	if (blocksize > E2UNDO_MAX_BLOCK_SIZE ||
581 	    blocksize < E2UNDO_MIN_BLOCK_SIZE ||
582 	    !blocksize || !fs_blocksize)
583 		goto bad_file;
584 	super_block = ext2fs_le64_to_cpu(hdr.super_offset);
585 	num_keys = ext2fs_le64_to_cpu(hdr.num_keys);
586 	io_channel_set_blksize(data->undo_file, blocksize);
587 	/*
588 	 * Do not compare hdr.f_compat with the available compatible
589 	 * features set, because a "missing" compatible feature should
590 	 * not cause any problems.
591 	 */
592 	if (hdr.f_incompat || hdr.f_rocompat)
593 		goto bad_file;
594 
595 	/* Superblock matches this FS? */
596 	if (check_filesystem(&hdr, data->undo_file, blocksize, super_block,
597 			     data->real) != 0) {
598 		retval = EXT2_ET_UNDO_FILE_WRONG;
599 		goto out;
600 	}
601 
602 	/* Try to set ourselves up */
603 	data->tdb_data_size = blocksize;
604 	retval = undo_setup_tdb(data);
605 	if (retval)
606 		goto bad_file;
607 	data->num_keys = num_keys;
608 	data->super_blk_num = super_block;
609 	data->first_key_blk = ext2fs_le64_to_cpu(hdr.key_offset);
610 
611 	/* load the written block map */
612 	keys_per_block = KEYS_PER_BLOCK(data);
613 	lblk = data->first_key_blk;
614 	dbg_printf("nr_keys=%lu, kpb=%zu, blksz=%u\n",
615 		   num_keys, keys_per_block, blocksize);
616 	for (i = 0; i < num_keys; i += keys_per_block) {
617 		size_t j, max_j;
618 		__le32 crc;
619 
620 		data->key_blk_num = lblk;
621 		retval = io_channel_read_blk64(data->undo_file,
622 					       lblk, 1, data->keyb);
623 		if (retval)
624 			goto bad_key_replay;
625 
626 		/* check keys */
627 		if (ext2fs_le32_to_cpu(data->keyb->magic) != KEYBLOCK_MAGIC) {
628 			retval = EXT2_ET_UNDO_FILE_CORRUPT;
629 			goto bad_key_replay;
630 		}
631 		crc = data->keyb->crc;
632 		data->keyb->crc = 0;
633 		key_crc = ext2fs_crc32c_le(~0, (unsigned char *)data->keyb,
634 					   blocksize);
635 		if (ext2fs_le32_to_cpu(crc) != key_crc) {
636 			retval = EXT2_ET_UNDO_FILE_CORRUPT;
637 			goto bad_key_replay;
638 		}
639 
640 		/* load keys from key block */
641 		lblk++;
642 		max_j = data->num_keys - i;
643 		if (max_j > keys_per_block)
644 			max_j = keys_per_block;
645 		for (j = 0, dkey = data->keyb->keys;
646 		     j < max_j;
647 		     j++, dkey++) {
648 			blk64_t fsblk = ext2fs_le64_to_cpu(dkey->fsblk);
649 			blk64_t undo_blk = fsblk * fs_blocksize / blocksize;
650 			size_t size = ext2fs_le32_to_cpu(dkey->size);
651 
652 			ext2fs_mark_block_bitmap_range2(data->written_block_map,
653 					 undo_blk,
654 					(size + blocksize - 1) / blocksize);
655 			lblk += (size + blocksize - 1) / blocksize;
656 			data->undo_blk_num = lblk;
657 			data->keys_in_block = j + 1;
658 		}
659 	}
660 	dbg_printf("Reopen undo, keyblk=%llu undoblk=%llu nrkeys=%zu kib=%zu\n",
661 		   data->key_blk_num, data->undo_blk_num, data->num_keys,
662 		   data->keys_in_block);
663 
664 	data->hdr.state = hdr.state & ~E2UNDO_STATE_FINISHED;
665 	data->hdr.f_compat = hdr.f_compat;
666 	data->hdr.f_incompat = hdr.f_incompat;
667 	data->hdr.f_rocompat = hdr.f_rocompat;
668 	return retval;
669 
670 bad_key_replay:
671 	data->key_blk_num = data->undo_blk_num = 0;
672 	data->keys_in_block = 0;
673 	ext2fs_free_mem(&data->keyb);
674 	ext2fs_free_generic_bitmap(data->written_block_map);
675 	data->tdb_written = 0;
676 	goto out;
677 bad_file:
678 	retval = EXT2_ET_UNDO_FILE_CORRUPT;
679 out:
680 	return retval;
681 }
682 
683 static void undo_atexit(void *p)
684 {
685 	struct undo_private_data *data = p;
686 	errcode_t err;
687 
688 	err = write_undo_indexes(data, 1);
689 	io_channel_close(data->undo_file);
690 
691 	com_err(data->tdb_file, err, "while force-closing undo file");
692 }
693 
694 static errcode_t undo_open(const char *name, int flags, io_channel *channel)
695 {
696 	io_channel	io = NULL;
697 	struct undo_private_data *data = NULL;
698 	int		undo_fd = -1;
699 	errcode_t	retval;
700 
701 	/* We don't support multi-threading, at least for now */
702 	flags &= ~IO_FLAG_THREADS;
703 	if (name == 0)
704 		return EXT2_ET_BAD_DEVICE_NAME;
705 	retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
706 	if (retval)
707 		goto cleanup;
708 	memset(io, 0, sizeof(struct struct_io_channel));
709 	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
710 	retval = ext2fs_get_mem(sizeof(struct undo_private_data), &data);
711 	if (retval)
712 		goto cleanup;
713 
714 	io->manager = undo_io_manager;
715 	retval = ext2fs_get_mem(strlen(name)+1, &io->name);
716 	if (retval)
717 		goto cleanup;
718 
719 	strcpy(io->name, name);
720 	io->private_data = data;
721 	io->block_size = 1024;
722 	io->read_error = 0;
723 	io->write_error = 0;
724 	io->refcount = 1;
725 
726 	memset(data, 0, sizeof(struct undo_private_data));
727 	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
728 	data->super_blk_num = 1;
729 	data->first_key_blk = 2;
730 	data->undo_blk_num = 3;
731 
732 	if (undo_io_backing_manager) {
733 		retval = undo_io_backing_manager->open(name, flags,
734 						       &data->real);
735 		if (retval)
736 			goto cleanup;
737 
738 		data->tdb_file = strdup(tdb_file);
739 		if (data->tdb_file == NULL)
740 			goto cleanup;
741 		undo_fd = ext2fs_open_file(data->tdb_file, O_RDWR | O_CREAT,
742 					   0600);
743 		if (undo_fd < 0)
744 			goto cleanup;
745 
746 		retval = undo_io_backing_manager->open(data->tdb_file,
747 						       IO_FLAG_RW,
748 						       &data->undo_file);
749 		if (retval)
750 			goto cleanup;
751 	} else {
752 		data->real = NULL;
753 		data->undo_file = NULL;
754 	}
755 
756 	if (data->real)
757 		io->flags = (io->flags & ~CHANNEL_FLAGS_DISCARD_ZEROES) |
758 			    (data->real->flags & CHANNEL_FLAGS_DISCARD_ZEROES);
759 
760 	/*
761 	 * setup err handler for read so that we know
762 	 * when the backing manager fails do short read
763 	 */
764 	if (data->real)
765 		undo_err_handler_init(data->real);
766 
767 	if (data->undo_file) {
768 		retval = try_reopen_undo_file(undo_fd, data);
769 		if (retval)
770 			goto cleanup;
771 	}
772 	retval = ext2fs_add_exit_fn(undo_atexit, data);
773 	if (retval)
774 		goto cleanup;
775 
776 	*channel = io;
777 	if (undo_fd >= 0)
778 		close(undo_fd);
779 	return retval;
780 
781 cleanup:
782 	ext2fs_remove_exit_fn(undo_atexit, data);
783 	if (undo_fd >= 0)
784 		close(undo_fd);
785 	if (data && data->undo_file)
786 		io_channel_close(data->undo_file);
787 	if (data && data->tdb_file)
788 		free(data->tdb_file);
789 	if (data && data->real)
790 		io_channel_close(data->real);
791 	if (data)
792 		ext2fs_free_mem(&data);
793 	if (io)
794 		ext2fs_free_mem(&io);
795 	return retval;
796 }
797 
798 static errcode_t undo_close(io_channel channel)
799 {
800 	struct undo_private_data *data;
801 	errcode_t	err, retval = 0;
802 
803 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
804 	data = (struct undo_private_data *) channel->private_data;
805 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
806 
807 	if (--channel->refcount > 0)
808 		return 0;
809 	/* Before closing write the file system identity */
810 	if (!getenv("UNDO_IO_SIMULATE_UNFINISHED"))
811 		data->hdr.state = ext2fs_cpu_to_le32(E2UNDO_STATE_FINISHED);
812 	err = write_undo_indexes(data, 1);
813 	ext2fs_remove_exit_fn(undo_atexit, data);
814 	if (data->real)
815 		retval = io_channel_close(data->real);
816 	if (data->tdb_file)
817 		free(data->tdb_file);
818 	if (data->undo_file)
819 		io_channel_close(data->undo_file);
820 	ext2fs_free_mem(&data->keyb);
821 	if (data->written_block_map)
822 		ext2fs_free_generic_bitmap(data->written_block_map);
823 	ext2fs_free_mem(&channel->private_data);
824 	if (channel->name)
825 		ext2fs_free_mem(&channel->name);
826 	ext2fs_free_mem(&channel);
827 
828 	if (err)
829 		return err;
830 	return retval;
831 }
832 
833 static errcode_t undo_set_blksize(io_channel channel, int blksize)
834 {
835 	struct undo_private_data *data;
836 	errcode_t		retval = 0;
837 
838 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
839 	data = (struct undo_private_data *) channel->private_data;
840 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
841 
842 	if (blksize > E2UNDO_MAX_BLOCK_SIZE || blksize < E2UNDO_MIN_BLOCK_SIZE)
843 		return EXT2_ET_INVALID_ARGUMENT;
844 
845 	if (data->real)
846 		retval = io_channel_set_blksize(data->real, blksize);
847 	/*
848 	 * Set the block size used for tdb
849 	 */
850 	if (!data->tdb_data_size || !data->tdb_written)
851 		data->tdb_data_size = blksize;
852 	channel->block_size = blksize;
853 	return retval;
854 }
855 
856 static errcode_t undo_read_blk64(io_channel channel, unsigned long long block,
857 			       int count, void *buf)
858 {
859 	errcode_t	retval = 0;
860 	struct undo_private_data *data;
861 
862 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
863 	data = (struct undo_private_data *) channel->private_data;
864 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
865 
866 	if (data->real)
867 		retval = io_channel_read_blk64(data->real, block, count, buf);
868 
869 	return retval;
870 }
871 
872 static errcode_t undo_read_blk(io_channel channel, unsigned long block,
873 			       int count, void *buf)
874 {
875 	return undo_read_blk64(channel, block, count, buf);
876 }
877 
878 static errcode_t undo_write_blk64(io_channel channel, unsigned long long block,
879 				int count, const void *buf)
880 {
881 	struct undo_private_data *data;
882 	errcode_t	retval = 0;
883 
884 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
885 	data = (struct undo_private_data *) channel->private_data;
886 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
887 	/*
888 	 * First write the existing content into database
889 	 */
890 	retval = undo_write_tdb(channel, block, count);
891 	if (retval)
892 		 return retval;
893 	if (data->real)
894 		retval = io_channel_write_blk64(data->real, block, count, buf);
895 
896 	return retval;
897 }
898 
899 static errcode_t undo_write_blk(io_channel channel, unsigned long block,
900 				int count, const void *buf)
901 {
902 	return undo_write_blk64(channel, block, count, buf);
903 }
904 
905 static errcode_t undo_write_byte(io_channel channel, unsigned long offset,
906 				 int size, const void *buf)
907 {
908 	struct undo_private_data *data;
909 	errcode_t	retval = 0;
910 	ext2_loff_t	location;
911 	unsigned long blk_num, count;;
912 
913 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
914 	data = (struct undo_private_data *) channel->private_data;
915 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
916 
917 	location = offset + data->offset;
918 	blk_num = location/channel->block_size;
919 	/*
920 	 * the size specified may spread across multiple blocks
921 	 * also make sure we account for the fact that block start
922 	 * offset for tdb is different from the backing I/O manager
923 	 * due to possible different block size
924 	 */
925 	count = (size + (location % channel->block_size) +
926 			channel->block_size  -1)/channel->block_size;
927 	retval = undo_write_tdb(channel, blk_num, count);
928 	if (retval)
929 		return retval;
930 	if (data->real && data->real->manager->write_byte)
931 		retval = io_channel_write_byte(data->real, offset, size, buf);
932 
933 	return retval;
934 }
935 
936 static errcode_t undo_discard(io_channel channel, unsigned long long block,
937 			      unsigned long long count)
938 {
939 	struct undo_private_data *data;
940 	errcode_t	retval = 0;
941 	int icount;
942 
943 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
944 	data = (struct undo_private_data *) channel->private_data;
945 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
946 
947 	if (count > INT_MAX)
948 		return EXT2_ET_UNIMPLEMENTED;
949 	icount = count;
950 
951 	/*
952 	 * First write the existing content into database
953 	 */
954 	retval = undo_write_tdb(channel, block, icount);
955 	if (retval)
956 		return retval;
957 	if (data->real)
958 		retval = io_channel_discard(data->real, block, count);
959 
960 	return retval;
961 }
962 
963 static errcode_t undo_zeroout(io_channel channel, unsigned long long block,
964 			      unsigned long long count)
965 {
966 	struct undo_private_data *data;
967 	errcode_t	retval = 0;
968 	int icount;
969 
970 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
971 	data = (struct undo_private_data *) channel->private_data;
972 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
973 
974 	if (count > INT_MAX)
975 		return EXT2_ET_UNIMPLEMENTED;
976 	icount = count;
977 
978 	/*
979 	 * First write the existing content into database
980 	 */
981 	retval = undo_write_tdb(channel, block, icount);
982 	if (retval)
983 		return retval;
984 	if (data->real)
985 		retval = io_channel_zeroout(data->real, block, count);
986 
987 	return retval;
988 }
989 
990 static errcode_t undo_cache_readahead(io_channel channel,
991 				      unsigned long long block,
992 				      unsigned long long count)
993 {
994 	struct undo_private_data *data;
995 	errcode_t	retval = 0;
996 
997 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
998 	data = (struct undo_private_data *) channel->private_data;
999 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1000 
1001 	if (data->real)
1002 		retval = io_channel_cache_readahead(data->real, block, count);
1003 
1004 	return retval;
1005 }
1006 
1007 /*
1008  * Flush data buffers to disk.
1009  */
1010 static errcode_t undo_flush(io_channel channel)
1011 {
1012 	errcode_t	retval = 0;
1013 	struct undo_private_data *data;
1014 
1015 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1016 	data = (struct undo_private_data *) channel->private_data;
1017 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1018 
1019 	if (data->real)
1020 		retval = io_channel_flush(data->real);
1021 
1022 	return retval;
1023 }
1024 
1025 static errcode_t undo_set_option(io_channel channel, const char *option,
1026 				 const char *arg)
1027 {
1028 	errcode_t	retval = 0;
1029 	struct undo_private_data *data;
1030 	unsigned long tmp;
1031 	char *end;
1032 
1033 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1034 	data = (struct undo_private_data *) channel->private_data;
1035 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1036 
1037 	if (!strcmp(option, "tdb_data_size")) {
1038 		if (!arg)
1039 			return EXT2_ET_INVALID_ARGUMENT;
1040 
1041 		tmp = strtoul(arg, &end, 0);
1042 		if (*end)
1043 			return EXT2_ET_INVALID_ARGUMENT;
1044 		if (tmp > E2UNDO_MAX_BLOCK_SIZE || tmp < E2UNDO_MIN_BLOCK_SIZE)
1045 			return EXT2_ET_INVALID_ARGUMENT;
1046 		if (!data->tdb_data_size || !data->tdb_written) {
1047 			data->tdb_written = -1;
1048 			data->tdb_data_size = tmp;
1049 		}
1050 		return 0;
1051 	}
1052 	/*
1053 	 * Need to support offset option to work with
1054 	 * Unix I/O manager
1055 	 */
1056 	if (data->real && data->real->manager->set_option) {
1057 		retval = data->real->manager->set_option(data->real,
1058 							option, arg);
1059 	}
1060 	if (!retval && !strcmp(option, "offset")) {
1061 		if (!arg)
1062 			return EXT2_ET_INVALID_ARGUMENT;
1063 
1064 		tmp = strtoul(arg, &end, 0);
1065 		if (*end)
1066 			return EXT2_ET_INVALID_ARGUMENT;
1067 		data->offset = tmp;
1068 	}
1069 	return retval;
1070 }
1071 
1072 static errcode_t undo_get_stats(io_channel channel, io_stats *stats)
1073 {
1074 	errcode_t	retval = 0;
1075 	struct undo_private_data *data;
1076 
1077 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1078 	data = (struct undo_private_data *) channel->private_data;
1079 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1080 
1081 	if (data->real)
1082 		retval = (data->real->manager->get_stats)(data->real, stats);
1083 
1084 	return retval;
1085 }
1086 
1087 static struct struct_io_manager struct_undo_manager = {
1088 	.magic		= EXT2_ET_MAGIC_IO_MANAGER,
1089 	.name		= "Undo I/O Manager",
1090 	.open		= undo_open,
1091 	.close		= undo_close,
1092 	.set_blksize	= undo_set_blksize,
1093 	.read_blk	= undo_read_blk,
1094 	.write_blk	= undo_write_blk,
1095 	.flush		= undo_flush,
1096 	.write_byte	= undo_write_byte,
1097 	.set_option	= undo_set_option,
1098 	.get_stats	= undo_get_stats,
1099 	.read_blk64	= undo_read_blk64,
1100 	.write_blk64	= undo_write_blk64,
1101 	.discard	= undo_discard,
1102 	.zeroout	= undo_zeroout,
1103 	.cache_readahead	= undo_cache_readahead,
1104 };
1105 
1106 io_manager undo_io_manager = &struct_undo_manager;
1107