xref: /freebsd/sbin/fsck_msdosfs/fat.c (revision a3557ef0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019 Google LLC
5  * Copyright (C) 1995, 1996, 1997 Wolfgang Solfrank
6  * Copyright (c) 1995 Martin Husemann
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 
30 #include <sys/cdefs.h>
31 #ifndef lint
32 __RCSID("$NetBSD: fat.c,v 1.18 2006/06/05 16:51:18 christos Exp $");
33 static const char rcsid[] =
34   "$FreeBSD$";
35 #endif /* not lint */
36 
37 #include <sys/endian.h>
38 #include <sys/queue.h>
39 #include <sys/limits.h>
40 #include <sys/mman.h>
41 #include <sys/param.h>
42 
43 #include <assert.h>
44 #include <stdbool.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <stdio.h>
49 #include <unistd.h>
50 
51 #include "ext.h"
52 #include "fsutil.h"
53 
54 static int _readfat(struct fat_descriptor *);
55 static inline struct bootblock* boot_of_(struct fat_descriptor *);
56 static inline int fd_of_(struct fat_descriptor *);
57 static inline bool valid_cl(struct fat_descriptor *, cl_t);
58 
59 
60 /*
61  * Head bitmap for FAT scanning.
62  *
63  * FAT32 have up to 2^28 = 256M entries, and FAT16/12 have much less.
64  * For each cluster, we use 1 bit to represent if it's a head cluster
65  * (the first cluster of a cluster chain).
66  *
67  * Head bitmap
68  * ===========
69  * Initially, we set all bits to 1.  In readfat(), we traverse the
70  * whole FAT and mark each cluster identified as "next" cluster as
71  * 0.  After the scan, we have a bitmap with 1's to indicate the
72  * corresponding cluster was a "head" cluster.
73  *
74  * We use head bitmap to identify lost chains: a head cluster that was
75  * not being claimed by any file or directories is the head cluster of
76  * a lost chain.
77  *
78  * Handle of lost chains
79  * =====================
80  * At the end of scanning, we can easily find all lost chain's heads
81  * by finding out the 1's in the head bitmap.
82  */
83 
84 typedef struct long_bitmap {
85 	unsigned long	*map;
86 	size_t		 count;		/* Total set bits in the map */
87 } long_bitmap_t;
88 
89 static inline void
90 bitmap_clear(long_bitmap_t *lbp, cl_t cl)
91 {
92 	cl_t i = cl / LONG_BIT;
93 	unsigned long clearmask = ~(1UL << (cl % LONG_BIT));
94 
95 	assert((lbp->map[i] & ~clearmask) != 0);
96 	lbp->map[i] &= clearmask;
97 	lbp->count--;
98 }
99 
100 static inline bool
101 bitmap_get(long_bitmap_t *lbp, cl_t cl)
102 {
103 	cl_t i = cl / LONG_BIT;
104 	unsigned long usedbit = 1UL << (cl % LONG_BIT);
105 
106 	return ((lbp->map[i] & usedbit) == usedbit);
107 }
108 
109 static inline bool
110 bitmap_none_in_range(long_bitmap_t *lbp, cl_t cl)
111 {
112 	cl_t i = cl / LONG_BIT;
113 
114 	return (lbp->map[i] == 0);
115 }
116 
117 static inline size_t
118 bitmap_count(long_bitmap_t *lbp)
119 {
120 	return (lbp->count);
121 }
122 
123 static int
124 bitmap_ctor(long_bitmap_t *lbp, size_t bits, bool allone)
125 {
126 	size_t bitmap_size = roundup2(bits, LONG_BIT) / (LONG_BIT / 8);
127 
128 	free(lbp->map);
129 	lbp->map = calloc(1, bitmap_size);
130 	if (lbp->map == NULL)
131 		return FSFATAL;
132 
133 	if (allone) {
134 		memset(lbp->map, 0xff, bitmap_size);
135 		lbp->count = bits;
136 	} else {
137 		lbp->count = 0;
138 	}
139 	return FSOK;
140 }
141 
142 static void
143 bitmap_dtor(long_bitmap_t *lbp)
144 {
145 	free(lbp->map);
146 	lbp->map = NULL;
147 }
148 
149 /*
150  * FAT32 can be as big as 256MiB (2^26 entries * 4 bytes), when we
151  * can not ask the kernel to manage the access, use a simple LRU
152  * cache with chunk size of 128 KiB to manage it.
153  */
154 struct fat32_cache_entry {
155 	TAILQ_ENTRY(fat32_cache_entry)	entries;
156 	uint8_t			*chunk;		/* pointer to chunk */
157 	off_t			 addr;		/* offset */
158 	bool			 dirty;		/* dirty bit */
159 };
160 
161 static const size_t	fat32_cache_chunk_size = 131072; /* MAXPHYS */
162 static const size_t	fat32_cache_size = 4194304;
163 static const size_t	fat32_cache_entries = 32; /* XXXgcc: cache_size / cache_chunk_size */
164 
165 /*
166  * FAT table descriptor, represents a FAT table that is already loaded
167  * into memory.
168  */
169 struct fat_descriptor {
170 	struct bootblock	*boot;
171 	uint8_t			*fatbuf;
172 	cl_t			(*get)(struct fat_descriptor *, cl_t);
173 	int			(*set)(struct fat_descriptor *, cl_t, cl_t);
174 	long_bitmap_t		 headbitmap;
175 	int			 fd;
176 	bool			 is_mmapped;
177 	bool			 use_cache;
178 	size_t		  	 fatsize;
179 
180 	size_t			 fat32_cached_chunks;
181 	TAILQ_HEAD(cachehead, fat32_cache_entry)	fat32_cache_head;
182 	struct fat32_cache_entry	*fat32_cache_allentries;
183 	off_t			 fat32_offset;
184 	off_t			 fat32_lastaddr;
185 };
186 
187 void
188 fat_clear_cl_head(struct fat_descriptor *fat, cl_t cl)
189 {
190 	bitmap_clear(&fat->headbitmap, cl);
191 }
192 
193 bool
194 fat_is_cl_head(struct fat_descriptor *fat, cl_t cl)
195 {
196 	return (bitmap_get(&fat->headbitmap, cl));
197 }
198 
199 static inline bool
200 fat_is_cl_head_in_range(struct fat_descriptor *fat, cl_t cl)
201 {
202 	return (!(bitmap_none_in_range(&fat->headbitmap, cl)));
203 }
204 
205 static size_t
206 fat_get_head_count(struct fat_descriptor *fat)
207 {
208 	return (bitmap_count(&fat->headbitmap));
209 }
210 
211 /*
212  * FAT12 accessors.
213  *
214  * FAT12s are sufficiently small, expect it to always fit in the RAM.
215  */
216 static inline uint8_t *
217 fat_get_fat12_ptr(struct fat_descriptor *fat, cl_t cl)
218 {
219 	return (fat->fatbuf + ((cl + (cl >> 1))));
220 }
221 
222 static cl_t
223 fat_get_fat12_next(struct fat_descriptor *fat, cl_t cl)
224 {
225 	const uint8_t	*p;
226 	cl_t	retval;
227 
228 	p = fat_get_fat12_ptr(fat, cl);
229 	retval = le16dec(p);
230 	/* Odd cluster: lower 4 bits belongs to the subsequent cluster */
231 	if ((cl & 1) == 1)
232 		retval >>= 4;
233 	retval &= CLUST12_MASK;
234 
235 	if (retval >= (CLUST_BAD & CLUST12_MASK))
236 		retval |= ~CLUST12_MASK;
237 
238 	return (retval);
239 }
240 
241 static int
242 fat_set_fat12_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
243 {
244 	uint8_t	*p;
245 
246 	/* Truncate 'nextcl' value, if needed */
247 	nextcl &= CLUST12_MASK;
248 
249 	p = fat_get_fat12_ptr(fat, cl);
250 
251 	/*
252 	 * Read in the 4 bits from the subsequent (for even clusters)
253 	 * or the preceding (for odd clusters) cluster and combine
254 	 * it to the nextcl value for encoding
255 	 */
256 	if ((cl & 1) == 0) {
257 		nextcl |= ((p[1] & 0xf0) << 8);
258 	} else {
259 		nextcl <<= 4;
260 		nextcl |= (p[0] & 0x0f);
261 	}
262 
263 	le16enc(p, (uint16_t)nextcl);
264 
265 	return (0);
266 }
267 
268 /*
269  * FAT16 accessors.
270  *
271  * FAT16s are sufficiently small, expect it to always fit in the RAM.
272  */
273 static inline uint8_t *
274 fat_get_fat16_ptr(struct fat_descriptor *fat, cl_t cl)
275 {
276 	return (fat->fatbuf + (cl << 1));
277 }
278 
279 static cl_t
280 fat_get_fat16_next(struct fat_descriptor *fat, cl_t cl)
281 {
282 	const uint8_t	*p;
283 	cl_t	retval;
284 
285 	p = fat_get_fat16_ptr(fat, cl);
286 	retval = le16dec(p) & CLUST16_MASK;
287 
288 	if (retval >= (CLUST_BAD & CLUST16_MASK))
289 		retval |= ~CLUST16_MASK;
290 
291 	return (retval);
292 }
293 
294 static int
295 fat_set_fat16_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
296 {
297 	uint8_t	*p;
298 
299 	/* Truncate 'nextcl' value, if needed */
300 	nextcl &= CLUST16_MASK;
301 
302 	p = fat_get_fat16_ptr(fat, cl);
303 
304 	le16enc(p, (uint16_t)nextcl);
305 
306 	return (0);
307 }
308 
309 /*
310  * FAT32 accessors.
311  */
312 static inline uint8_t *
313 fat_get_fat32_ptr(struct fat_descriptor *fat, cl_t cl)
314 {
315 	return (fat->fatbuf + (cl << 2));
316 }
317 
318 static cl_t
319 fat_get_fat32_next(struct fat_descriptor *fat, cl_t cl)
320 {
321 	const uint8_t	*p;
322 	cl_t	retval;
323 
324 	p = fat_get_fat32_ptr(fat, cl);
325 	retval = le32dec(p) & CLUST32_MASK;
326 
327 	if (retval >= (CLUST_BAD & CLUST32_MASK))
328 		retval |= ~CLUST32_MASK;
329 
330 	return (retval);
331 }
332 
333 static int
334 fat_set_fat32_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
335 {
336 	uint8_t	*p;
337 
338 	/* Truncate 'nextcl' value, if needed */
339 	nextcl &= CLUST32_MASK;
340 
341 	p = fat_get_fat32_ptr(fat, cl);
342 
343 	le32enc(p, (uint32_t)nextcl);
344 
345 	return (0);
346 }
347 
348 static inline size_t
349 fat_get_iosize(struct fat_descriptor *fat, off_t address)
350 {
351 
352 	if (address == fat->fat32_lastaddr) {
353 		return (fat->fatsize & ((off_t)fat32_cache_chunk_size - 1));
354 	} else {
355 		return (fat32_cache_chunk_size);
356 	}
357 }
358 
359 static int
360 fat_flush_fat32_cache_entry(struct fat_descriptor *fat,
361     struct fat32_cache_entry *entry)
362 {
363 	int fd;
364 	off_t fat_addr;
365 	size_t writesize;
366 
367 	fd = fd_of_(fat);
368 
369 	if (!entry->dirty)
370 		return (FSOK);
371 
372 	writesize = fat_get_iosize(fat, entry->addr);
373 
374 	fat_addr = fat->fat32_offset + entry->addr;
375 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
376 	    (size_t)write(fd, entry->chunk, writesize) != writesize) {
377 			pfatal("Unable to write FAT");
378 			return (FSFATAL);
379 	}
380 
381 	entry->dirty = false;
382 	return (FSOK);
383 }
384 
385 static struct fat32_cache_entry *
386 fat_get_fat32_cache_entry(struct fat_descriptor *fat, off_t addr,
387     bool writing)
388 {
389 	int fd;
390 	struct fat32_cache_entry *entry, *first;
391 	off_t	fat_addr;
392 	size_t	rwsize;
393 
394 	addr &= ~(fat32_cache_chunk_size - 1);
395 
396 	first = TAILQ_FIRST(&fat->fat32_cache_head);
397 
398 	/*
399 	 * Cache hit: if we already have the chunk, move it to list head
400 	 */
401 	TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
402 		if (entry->addr == addr) {
403 			if (writing) {
404 				entry->dirty = true;
405 			}
406 			if (entry != first) {
407 
408 				TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
409 				TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
410 			}
411 			return (entry);
412 		}
413 	}
414 
415 	/*
416 	 * Cache miss: detach the chunk at tail of list, overwrite with
417 	 * the located chunk, and populate with data from disk.
418 	 */
419 	entry = TAILQ_LAST(&fat->fat32_cache_head, cachehead);
420 	TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
421 	if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
422 		return (NULL);
423 	}
424 
425 	rwsize = fat_get_iosize(fat, addr);
426 	fat_addr = fat->fat32_offset + addr;
427 	entry->addr = addr;
428 	fd = fd_of_(fat);
429 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
430 		(size_t)read(fd, entry->chunk, rwsize) != rwsize) {
431 		pfatal("Unable to read FAT");
432 		return (NULL);
433 	}
434 	if (writing) {
435 		entry->dirty = true;
436 	}
437 	TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
438 
439 	return (entry);
440 }
441 
442 static inline uint8_t *
443 fat_get_fat32_cached_ptr(struct fat_descriptor *fat, cl_t cl, bool writing)
444 {
445 	off_t addr, off;
446 	struct fat32_cache_entry *entry;
447 
448 	addr = cl << 2;
449 	entry = fat_get_fat32_cache_entry(fat, addr, writing);
450 
451 	if (entry != NULL) {
452 		off = addr & (fat32_cache_chunk_size - 1);
453 		return (entry->chunk + off);
454 	} else {
455 		return (NULL);
456 	}
457 }
458 
459 
460 static cl_t
461 fat_get_fat32_cached_next(struct fat_descriptor *fat, cl_t cl)
462 {
463 	const uint8_t	*p;
464 	cl_t	retval;
465 
466 	p = fat_get_fat32_cached_ptr(fat, cl, false);
467 	if (p != NULL) {
468 		retval = le32dec(p) & CLUST32_MASK;
469 		if (retval >= (CLUST_BAD & CLUST32_MASK))
470 			retval |= ~CLUST32_MASK;
471 	} else {
472 		retval = CLUST_DEAD;
473 	}
474 
475 	return (retval);
476 }
477 
478 static int
479 fat_set_fat32_cached_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
480 {
481 	uint8_t	*p;
482 
483 	/* Truncate 'nextcl' value, if needed */
484 	nextcl &= CLUST32_MASK;
485 
486 	p = fat_get_fat32_cached_ptr(fat, cl, true);
487 	if (p != NULL) {
488 		le32enc(p, (uint32_t)nextcl);
489 		return FSOK;
490 	} else {
491 		return FSFATAL;
492 	}
493 }
494 
495 cl_t fat_get_cl_next(struct fat_descriptor *fat, cl_t cl)
496 {
497 
498 	if (!valid_cl(fat, cl)) {
499 		pfatal("Invalid cluster: %ud", cl);
500 		return CLUST_DEAD;
501 	}
502 
503 	return (fat->get(fat, cl));
504 }
505 
506 int fat_set_cl_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
507 {
508 
509 	if (rdonly) {
510 		pwarn(" (NO WRITE)\n");
511 		return FSFATAL;
512 	}
513 
514 	if (!valid_cl(fat, cl)) {
515 		pfatal("Invalid cluster: %ud", cl);
516 		return FSFATAL;
517 	}
518 
519 	return (fat->set(fat, cl, nextcl));
520 }
521 
522 static inline struct bootblock*
523 boot_of_(struct fat_descriptor *fat) {
524 
525 	return (fat->boot);
526 }
527 
528 struct bootblock*
529 fat_get_boot(struct fat_descriptor *fat) {
530 
531 	return (boot_of_(fat));
532 }
533 
534 static inline int
535 fd_of_(struct fat_descriptor *fat)
536 {
537 	return (fat->fd);
538 }
539 
540 int
541 fat_get_fd(struct fat_descriptor * fat)
542 {
543 	return (fd_of_(fat));
544 }
545 
546 /*
547  * Whether a cl is in valid data range.
548  */
549 bool
550 fat_is_valid_cl(struct fat_descriptor *fat, cl_t cl)
551 {
552 
553 	return (valid_cl(fat, cl));
554 }
555 
556 static inline bool
557 valid_cl(struct fat_descriptor *fat, cl_t cl)
558 {
559 	const struct bootblock *boot = boot_of_(fat);
560 
561 	return (cl >= CLUST_FIRST && cl < boot->NumClusters);
562 }
563 
564 /*
565  * The first 2 FAT entries contain pseudo-cluster numbers with the following
566  * layout:
567  *
568  * 31...... ........ ........ .......0
569  * rrrr1111 11111111 11111111 mmmmmmmm         FAT32 entry 0
570  * rrrrsh11 11111111 11111111 11111xxx         FAT32 entry 1
571  *
572  *                   11111111 mmmmmmmm         FAT16 entry 0
573  *                   sh111111 11111xxx         FAT16 entry 1
574  *
575  * r = reserved
576  * m = BPB media ID byte
577  * s = clean flag (1 = dismounted; 0 = still mounted)
578  * h = hard error flag (1 = ok; 0 = I/O error)
579  * x = any value ok
580  */
581 int
582 checkdirty(int fs, struct bootblock *boot)
583 {
584 	off_t off;
585 	u_char *buffer;
586 	int ret = 0;
587 	size_t len;
588 
589 	if (boot->ClustMask != CLUST16_MASK && boot->ClustMask != CLUST32_MASK)
590 		return 0;
591 
592 	off = boot->bpbResSectors;
593 	off *= boot->bpbBytesPerSec;
594 
595 	buffer = malloc(len = boot->bpbBytesPerSec);
596 	if (buffer == NULL) {
597 		perr("No space for FAT sectors (%zu)", len);
598 		return 1;
599 	}
600 
601 	if (lseek(fs, off, SEEK_SET) != off) {
602 		perr("Unable to read FAT");
603 		goto err;
604 	}
605 
606 	if ((size_t)read(fs, buffer, boot->bpbBytesPerSec) !=
607 	    boot->bpbBytesPerSec) {
608 		perr("Unable to read FAT");
609 		goto err;
610 	}
611 
612 	/*
613 	 * If we don't understand the FAT, then the file system must be
614 	 * assumed to be unclean.
615 	 */
616 	if (buffer[0] != boot->bpbMedia || buffer[1] != 0xff)
617 		goto err;
618 	if (boot->ClustMask == CLUST16_MASK) {
619 		if ((buffer[2] & 0xf8) != 0xf8 || (buffer[3] & 0x3f) != 0x3f)
620 			goto err;
621 	} else {
622 		if (buffer[2] != 0xff || (buffer[3] & 0x0f) != 0x0f
623 		    || (buffer[4] & 0xf8) != 0xf8 || buffer[5] != 0xff
624 		    || buffer[6] != 0xff || (buffer[7] & 0x03) != 0x03)
625 			goto err;
626 	}
627 
628 	/*
629 	 * Now check the actual clean flag (and the no-error flag).
630 	 */
631 	if (boot->ClustMask == CLUST16_MASK) {
632 		if ((buffer[3] & 0xc0) == 0xc0)
633 			ret = 1;
634 	} else {
635 		if ((buffer[7] & 0x0c) == 0x0c)
636 			ret = 1;
637 	}
638 
639 err:
640 	free(buffer);
641 	return ret;
642 }
643 
644 int
645 cleardirty(struct fat_descriptor *fat)
646 {
647 	int fd, ret = FSERROR;
648 	struct bootblock *boot;
649 	u_char *buffer;
650 	size_t len;
651 	off_t off;
652 
653 	boot = boot_of_(fat);
654 	fd = fd_of_(fat);
655 
656 	if (boot->ClustMask != CLUST16_MASK && boot->ClustMask != CLUST32_MASK)
657 		return 0;
658 
659 	off = boot->bpbResSectors;
660 	off *= boot->bpbBytesPerSec;
661 
662 	buffer = malloc(len = boot->bpbBytesPerSec);
663 	if (buffer == NULL) {
664 		perr("No memory for FAT sectors (%zu)", len);
665 		return 1;
666 	}
667 
668 	if ((size_t)pread(fd, buffer, len, off) != len) {
669 		perr("Unable to read FAT");
670 		goto err;
671 	}
672 
673 	if (boot->ClustMask == CLUST16_MASK) {
674 		buffer[3] |= 0x80;
675 	} else {
676 		buffer[7] |= 0x08;
677 	}
678 
679 	if ((size_t)pwrite(fd, buffer, len, off) != len) {
680 		perr("Unable to write FAT");
681 		goto err;
682 	}
683 
684 	ret = FSOK;
685 
686 err:
687 	free(buffer);
688 	return ret;
689 }
690 
691 /*
692  * Read a FAT from disk. Returns 1 if successful, 0 otherwise.
693  */
694 static int
695 _readfat(struct fat_descriptor *fat)
696 {
697 	int fd;
698 	size_t i;
699 	off_t off;
700 	size_t readsize;
701 	struct bootblock *boot;
702 	struct fat32_cache_entry *entry;
703 
704 	boot = boot_of_(fat);
705 	fd = fd_of_(fat);
706 	fat->fatsize = boot->FATsecs * boot->bpbBytesPerSec;
707 
708 	off = boot->bpbResSectors;
709 	off *= boot->bpbBytesPerSec;
710 
711 	fat->is_mmapped = false;
712 	fat->use_cache = false;
713 
714 	/* Attempt to mmap() first */
715 	if (allow_mmap) {
716 		fat->fatbuf = mmap(NULL, fat->fatsize,
717 				PROT_READ | (rdonly ? 0 : PROT_WRITE),
718 				MAP_SHARED, fd_of_(fat), off);
719 		if (fat->fatbuf != MAP_FAILED) {
720 			fat->is_mmapped = true;
721 			return 1;
722 		}
723 	}
724 
725 	/*
726 	 * Unfortunately, we were unable to mmap().
727 	 *
728 	 * Only use the cache manager when it's necessary, that is,
729 	 * when the FAT is sufficiently large; in that case, only
730 	 * read in the first 4 MiB of FAT into memory, and split the
731 	 * buffer into chunks and insert to the LRU queue to populate
732 	 * the cache with data.
733 	 */
734 	if (boot->ClustMask == CLUST32_MASK &&
735 	    fat->fatsize >= fat32_cache_size) {
736 		readsize = fat32_cache_size;
737 		fat->use_cache = true;
738 
739 		fat->fat32_offset = boot->bpbResSectors * boot->bpbBytesPerSec;
740 		fat->fat32_lastaddr = fat->fatsize & ~(fat32_cache_chunk_size);
741 	} else {
742 		readsize = fat->fatsize;
743 	}
744 	fat->fatbuf = malloc(readsize);
745 	if (fat->fatbuf == NULL) {
746 		perr("No space for FAT (%zu)", readsize);
747 		return 0;
748 	}
749 
750 	if (lseek(fd, off, SEEK_SET) != off) {
751 		perr("Unable to read FAT");
752 		goto err;
753 	}
754 	if ((size_t)read(fd, fat->fatbuf, readsize) != readsize) {
755 		perr("Unable to read FAT");
756 		goto err;
757 	}
758 
759 	/*
760 	 * When cache is used, split the buffer into chunks, and
761 	 * connect the buffer into the cache.
762 	 */
763 	if (fat->use_cache) {
764 		TAILQ_INIT(&fat->fat32_cache_head);
765 		entry = calloc(fat32_cache_entries, sizeof(*entry));
766 		if (entry == NULL) {
767 			perr("No space for FAT cache (%zu of %zu)",
768 			    fat32_cache_entries, sizeof(entry));
769 			goto err;
770 		}
771 		for (i = 0; i < fat32_cache_entries; i++) {
772 			entry[i].addr = fat32_cache_chunk_size * i;
773 			entry[i].chunk = &fat->fatbuf[entry[i].addr];
774 			TAILQ_INSERT_TAIL(&fat->fat32_cache_head,
775 			    &entry[i], entries);
776 		}
777 		fat->fat32_cache_allentries = entry;
778 	}
779 
780 	return 1;
781 
782 err:
783 	free(fat->fatbuf);
784 	fat->fatbuf = NULL;
785 	return 0;
786 }
787 
788 static void
789 releasefat(struct fat_descriptor *fat)
790 {
791 	if (fat->is_mmapped) {
792 		munmap(fat->fatbuf, fat->fatsize);
793 	} else {
794 		if (fat->use_cache) {
795 			free(fat->fat32_cache_allentries);
796 			fat->fat32_cache_allentries = NULL;
797 		}
798 		free(fat->fatbuf);
799 	}
800 	fat->fatbuf = NULL;
801 	bitmap_dtor(&fat->headbitmap);
802 }
803 
804 /*
805  * Read or map a FAT and populate head bitmap
806  */
807 int
808 readfat(int fs, struct bootblock *boot, struct fat_descriptor **fp)
809 {
810 	struct fat_descriptor *fat;
811 	u_char *buffer, *p;
812 	cl_t cl, nextcl;
813 	int ret = FSOK;
814 
815 	boot->NumFree = boot->NumBad = 0;
816 
817 	fat = calloc(1, sizeof(struct fat_descriptor));
818 	if (fat == NULL) {
819 		perr("No space for FAT descriptor");
820 		return FSFATAL;
821 	}
822 
823 	fat->fd = fs;
824 	fat->boot = boot;
825 
826 	if (!_readfat(fat)) {
827 		free(fat);
828 		return FSFATAL;
829 	}
830 	buffer = fat->fatbuf;
831 
832 	/* Populate accessors */
833 	switch(boot->ClustMask) {
834 	case CLUST12_MASK:
835 		fat->get = fat_get_fat12_next;
836 		fat->set = fat_set_fat12_next;
837 		break;
838 	case CLUST16_MASK:
839 		fat->get = fat_get_fat16_next;
840 		fat->set = fat_set_fat16_next;
841 		break;
842 	case CLUST32_MASK:
843 		if (fat->is_mmapped || !fat->use_cache) {
844 			fat->get = fat_get_fat32_next;
845 			fat->set = fat_set_fat32_next;
846 		} else {
847 			fat->get = fat_get_fat32_cached_next;
848 			fat->set = fat_set_fat32_cached_next;
849 		}
850 		break;
851 	default:
852 		pfatal("Invalid ClustMask: %d", boot->ClustMask);
853 		releasefat(fat);
854 		free(fat);
855 		return FSFATAL;
856 	}
857 
858 	if (bitmap_ctor(&fat->headbitmap, boot->NumClusters,
859 	    true) != FSOK) {
860 		perr("No space for head bitmap for FAT clusters (%zu)",
861 		    (size_t)boot->NumClusters);
862 		releasefat(fat);
863 		free(fat);
864 		return FSFATAL;
865 	}
866 
867 	if (buffer[0] != boot->bpbMedia
868 	    || buffer[1] != 0xff || buffer[2] != 0xff
869 	    || (boot->ClustMask == CLUST16_MASK && buffer[3] != 0xff)
870 	    || (boot->ClustMask == CLUST32_MASK
871 		&& ((buffer[3]&0x0f) != 0x0f
872 		    || buffer[4] != 0xff || buffer[5] != 0xff
873 		    || buffer[6] != 0xff || (buffer[7]&0x0f) != 0x0f))) {
874 
875 		/* Windows 95 OSR2 (and possibly any later) changes
876 		 * the FAT signature to 0xXXffff7f for FAT16 and to
877 		 * 0xXXffff0fffffff07 for FAT32 upon boot, to know that the
878 		 * file system is dirty if it doesn't reboot cleanly.
879 		 * Check this special condition before errorring out.
880 		 */
881 		if (buffer[0] == boot->bpbMedia && buffer[1] == 0xff
882 		    && buffer[2] == 0xff
883 		    && ((boot->ClustMask == CLUST16_MASK && buffer[3] == 0x7f)
884 			|| (boot->ClustMask == CLUST32_MASK
885 			    && buffer[3] == 0x0f && buffer[4] == 0xff
886 			    && buffer[5] == 0xff && buffer[6] == 0xff
887 			    && buffer[7] == 0x07)))
888 			ret |= FSDIRTY;
889 		else {
890 			/* just some odd byte sequence in FAT */
891 
892 			switch (boot->ClustMask) {
893 			case CLUST32_MASK:
894 				pwarn("%s (%02x%02x%02x%02x%02x%02x%02x%02x)\n",
895 				      "FAT starts with odd byte sequence",
896 				      buffer[0], buffer[1], buffer[2], buffer[3],
897 				      buffer[4], buffer[5], buffer[6], buffer[7]);
898 				break;
899 			case CLUST16_MASK:
900 				pwarn("%s (%02x%02x%02x%02x)\n",
901 				    "FAT starts with odd byte sequence",
902 				    buffer[0], buffer[1], buffer[2], buffer[3]);
903 				break;
904 			default:
905 				pwarn("%s (%02x%02x%02x)\n",
906 				    "FAT starts with odd byte sequence",
907 				    buffer[0], buffer[1], buffer[2]);
908 				break;
909 			}
910 
911 			if (ask(1, "Correct")) {
912 				ret |= FSFATMOD;
913 				p = buffer;
914 
915 				*p++ = (u_char)boot->bpbMedia;
916 				*p++ = 0xff;
917 				*p++ = 0xff;
918 				switch (boot->ClustMask) {
919 				case CLUST16_MASK:
920 					*p++ = 0xff;
921 					break;
922 				case CLUST32_MASK:
923 					*p++ = 0x0f;
924 					*p++ = 0xff;
925 					*p++ = 0xff;
926 					*p++ = 0xff;
927 					*p++ = 0x0f;
928 					break;
929 				default:
930 					break;
931 				}
932 			}
933 		}
934 	}
935 
936 	/*
937 	 * Traverse the FAT table and populate head map.  Initially, we
938 	 * consider all clusters as possible head cluster (beginning of
939 	 * a file or directory), and traverse the whole allocation table
940 	 * by marking every non-head nodes as such (detailed below) and
941 	 * fix obvious issues while we walk.
942 	 *
943 	 * For each "next" cluster, the possible values are:
944 	 *
945 	 * a) CLUST_FREE or CLUST_BAD.  The *current* cluster can't be a
946 	 *    head node.
947 	 * b) An out-of-range value. The only fix would be to truncate at
948 	 *    the cluster.
949 	 * c) A valid cluster.  It means that cluster (nextcl) is not a
950 	 *    head cluster.  Note that during the scan, every cluster is
951 	 *    expected to be seen for at most once, and when we saw them
952 	 *    twice, it means a cross-linked chain which should be
953 	 *    truncated at the current cluster.
954 	 *
955 	 * After scan, the remaining set bits indicates all possible
956 	 * head nodes, because they were never claimed by any other
957 	 * node as the next node, but we do not know if these chains
958 	 * would end with a valid EOF marker.  We will check that in
959 	 * checkchain() at a later time when checking directories,
960 	 * where these head nodes would be marked as non-head.
961 	 *
962 	 * In the final pass, all head nodes should be cleared, and if
963 	 * there is still head nodes, these would be leaders of lost
964 	 * chain.
965 	 */
966 	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++) {
967 		nextcl = fat_get_cl_next(fat, cl);
968 
969 		/* Check if the next cluster number is valid */
970 		if (nextcl == CLUST_FREE) {
971 			/* Save a hint for next free cluster */
972 			if (boot->FSNext == 0) {
973 				boot->FSNext = cl;
974 			}
975 			if (fat_is_cl_head(fat, cl)) {
976 				fat_clear_cl_head(fat, cl);
977 			}
978 			boot->NumFree++;
979 		} else if (nextcl == CLUST_BAD) {
980 			if (fat_is_cl_head(fat, cl)) {
981 				fat_clear_cl_head(fat, cl);
982 			}
983 			boot->NumBad++;
984 		} else if (!valid_cl(fat, nextcl) && nextcl < CLUST_RSRVD) {
985 			pwarn("Cluster %u continues with out of range "
986 			    "cluster number %u\n",
987 			    cl,
988 			    nextcl & boot->ClustMask);
989 			if (ask(0, "Truncate")) {
990 				ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
991 				ret |= FSFATMOD;
992 			}
993 		} else if (valid_cl(fat, nextcl)) {
994 			if (fat_is_cl_head(fat, nextcl)) {
995 				fat_clear_cl_head(fat, nextcl);
996 			} else {
997 				pwarn("Cluster %u crossed another chain at %u\n",
998 				    cl, nextcl);
999 				if (ask(0, "Truncate")) {
1000 					ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
1001 					ret |= FSFATMOD;
1002 				}
1003 			}
1004 		}
1005 
1006 	}
1007 
1008 	if (ret & FSFATAL) {
1009 		releasefat(fat);
1010 		free(fat);
1011 		*fp = NULL;
1012 	} else
1013 		*fp = fat;
1014 	return ret;
1015 }
1016 
1017 /*
1018  * Get type of reserved cluster
1019  */
1020 const char *
1021 rsrvdcltype(cl_t cl)
1022 {
1023 	if (cl == CLUST_FREE)
1024 		return "free";
1025 	if (cl < CLUST_BAD)
1026 		return "reserved";
1027 	if (cl > CLUST_BAD)
1028 		return "as EOF";
1029 	return "bad";
1030 }
1031 
1032 /*
1033  * Examine a cluster chain for errors and count its size.
1034  */
1035 int
1036 checkchain(struct fat_descriptor *fat, cl_t head, size_t *chainsize)
1037 {
1038 	cl_t prev_cl, current_cl, next_cl;
1039 	const char *op;
1040 
1041 	/*
1042 	 * We expect that the caller to give us a real, unvisited 'head'
1043 	 * cluster, and it must be a valid cluster.  While scanning the
1044 	 * FAT table, we already excluded all clusters that was claimed
1045 	 * as a "next" cluster.  Assert all the three conditions.
1046 	 */
1047 	assert(valid_cl(fat, head));
1048 	assert(fat_is_cl_head(fat, head));
1049 
1050 	/*
1051 	 * Immediately mark the 'head' cluster that we are about to visit.
1052 	 */
1053 	fat_clear_cl_head(fat, head);
1054 
1055 	/*
1056 	 * The allocation of a non-zero sized file or directory is
1057 	 * represented as a singly linked list, and the tail node
1058 	 * would be the EOF marker (>=CLUST_EOFS).
1059 	 *
1060 	 * With a valid head node at hand, we expect all subsequent
1061 	 * cluster to be either a not yet seen and valid cluster (we
1062 	 * would continue counting), or the EOF marker (we conclude
1063 	 * the scan of this chain).
1064 	 *
1065 	 * For all other cases, the chain is invalid, and the only
1066 	 * viable fix would be to truncate at the current node (mark
1067 	 * it as EOF) when the next node violates that.
1068 	 */
1069 	*chainsize = 0;
1070 	prev_cl = current_cl = head;
1071 	for (next_cl = fat_get_cl_next(fat, current_cl);
1072 	    valid_cl(fat, next_cl);
1073 	    prev_cl = current_cl, current_cl = next_cl, next_cl = fat_get_cl_next(fat, current_cl))
1074 		(*chainsize)++;
1075 
1076 	/* A natural end */
1077 	if (next_cl >= CLUST_EOFS) {
1078 		(*chainsize)++;
1079 		return FSOK;
1080 	}
1081 
1082 	/*
1083 	 * The chain ended with an out-of-range cluster number.
1084 	 *
1085 	 * If the current node is e.g. CLUST_FREE, CLUST_BAD, etc.,
1086 	 * it should not be present in a chain and we has to truncate
1087 	 * at the previous node.
1088 	 *
1089 	 * If the current cluster points to an invalid cluster, the
1090 	 * current cluster might have useful data and we truncate at
1091 	 * the current cluster instead.
1092 	 */
1093 	if (next_cl == CLUST_FREE || next_cl >= CLUST_RSRVD) {
1094 		pwarn("Cluster chain starting at %u ends with cluster marked %s\n",
1095 		    head, rsrvdcltype(next_cl));
1096 		current_cl = prev_cl;
1097 	} else {
1098 		pwarn("Cluster chain starting at %u ends with cluster out of range (%u)\n",
1099 		    head,
1100 		    next_cl & boot_of_(fat)->ClustMask);
1101 		(*chainsize)++;
1102 	}
1103 
1104 	if (*chainsize > 0) {
1105 		op = "Truncate";
1106 		next_cl = CLUST_EOF;
1107 	} else {
1108 		op = "Clear";
1109 		next_cl = CLUST_FREE;
1110 	}
1111 	if (ask(0, "%s", op)) {
1112 		return (fat_set_cl_next(fat, current_cl, next_cl) | FSFATMOD);
1113 	} else {
1114 		return (FSERROR);
1115 	}
1116 }
1117 
1118 /*
1119  * Clear cluster chain from head.
1120  */
1121 void
1122 clearchain(struct fat_descriptor *fat, cl_t head)
1123 {
1124 	cl_t current_cl, next_cl;
1125 	struct bootblock *boot = boot_of_(fat);
1126 
1127 	current_cl = head;
1128 
1129 	while (valid_cl(fat, current_cl)) {
1130 		next_cl = fat_get_cl_next(fat, current_cl);
1131 		(void)fat_set_cl_next(fat, current_cl, CLUST_FREE);
1132 		boot->NumFree++;
1133 		current_cl = next_cl;
1134 	}
1135 
1136 }
1137 
1138 /*
1139  * Overwrite the n-th FAT with FAT0
1140  */
1141 static int
1142 copyfat(struct fat_descriptor *fat, int n)
1143 {
1144 	size_t	rwsize, tailsize, blobs, i;
1145 	off_t	dst_off, src_off;
1146 	struct bootblock *boot;
1147 	int ret, fd;
1148 
1149 	ret = FSOK;
1150 	fd = fd_of_(fat);
1151 	boot = boot_of_(fat);
1152 
1153 	blobs = howmany(fat->fatsize, fat32_cache_size);
1154 	tailsize = fat->fatsize % fat32_cache_size;
1155 	if (tailsize == 0) {
1156 		tailsize = fat32_cache_size;
1157 	}
1158 	rwsize = fat32_cache_size;
1159 
1160 	src_off = fat->fat32_offset;
1161 	dst_off = boot->bpbResSectors + n * boot->FATsecs;
1162 	dst_off *= boot->bpbBytesPerSec;
1163 
1164 	for (i = 0; i < blobs;
1165 	    i++, src_off += fat32_cache_size, dst_off += fat32_cache_size) {
1166 		if (i == blobs - 1) {
1167 			rwsize = tailsize;
1168 		}
1169 		if ((lseek(fd, src_off, SEEK_SET) != src_off ||
1170 		    (size_t)read(fd, fat->fatbuf, rwsize) != rwsize) &&
1171 		    ret == FSOK) {
1172 			perr("Unable to read FAT0");
1173 			ret = FSFATAL;
1174 			continue;
1175 		}
1176 		if ((lseek(fd, dst_off, SEEK_SET) != dst_off ||
1177 			(size_t)write(fd, fat->fatbuf, rwsize) != rwsize) &&
1178 			ret == FSOK) {
1179 			perr("Unable to write FAT %d", n);
1180 			ret = FSERROR;
1181 		}
1182 	}
1183 	return (ret);
1184 }
1185 
1186 /*
1187  * Write out FAT
1188  */
1189 int
1190 writefat(struct fat_descriptor *fat)
1191 {
1192 	u_int i;
1193 	size_t writesz;
1194 	off_t dst_base;
1195 	int ret = FSOK, fd;
1196 	struct bootblock *boot;
1197 	struct fat32_cache_entry *entry;
1198 
1199 	boot = boot_of_(fat);
1200 	fd = fd_of_(fat);
1201 
1202 	if (fat->use_cache) {
1203 		/*
1204 		 * Attempt to flush all in-flight cache, and bail out
1205 		 * if we encountered an error (but only emit error
1206 		 * message once).  Stop proceeding with copyfat()
1207 		 * if any flush failed.
1208 		 */
1209 		TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
1210 			if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
1211 				if (ret == FSOK) {
1212 					perr("Unable to write FAT");
1213 					ret = FSFATAL;
1214 				}
1215 			}
1216 		}
1217 		if (ret != FSOK)
1218 			return (ret);
1219 
1220 		/* Update backup copies of FAT, error is not fatal */
1221 		for (i = 1; i < boot->bpbFATs; i++) {
1222 			if (copyfat(fat, i) != FSOK)
1223 				ret = FSERROR;
1224 		}
1225 	} else {
1226 		writesz = fat->fatsize;
1227 
1228 		for (i = fat->is_mmapped ? 1 : 0; i < boot->bpbFATs; i++) {
1229 			dst_base = boot->bpbResSectors + i * boot->FATsecs;
1230 			dst_base *= boot->bpbBytesPerSec;
1231 			if ((lseek(fd, dst_base, SEEK_SET) != dst_base ||
1232 			    (size_t)write(fd, fat->fatbuf, writesz) != writesz) &&
1233 			    ret == FSOK) {
1234 				perr("Unable to write FAT %d", i);
1235 				ret = ((i == 0) ? FSFATAL : FSERROR);
1236 			}
1237 		}
1238 	}
1239 
1240 	return ret;
1241 }
1242 
1243 /*
1244  * Check a complete in-memory FAT for lost cluster chains
1245  */
1246 int
1247 checklost(struct fat_descriptor *fat)
1248 {
1249 	cl_t head;
1250 	int mod = FSOK;
1251 	int dosfs, ret;
1252 	size_t chains, chainlength;
1253 	struct bootblock *boot;
1254 
1255 	dosfs = fd_of_(fat);
1256 	boot = boot_of_(fat);
1257 
1258 	/*
1259 	 * At this point, we have already traversed all directories.
1260 	 * All remaining chain heads in the bitmap are heads of lost
1261 	 * chains.
1262 	 */
1263 	chains = fat_get_head_count(fat);
1264 	for (head = CLUST_FIRST;
1265 	    chains > 0 && head < boot->NumClusters;
1266 	    ) {
1267 		/*
1268 		 * We expect the bitmap to be very sparse, so skip if
1269 		 * the range is full of 0's
1270 		 */
1271 		if (head % LONG_BIT == 0 &&
1272 		    !fat_is_cl_head_in_range(fat, head)) {
1273 			head += LONG_BIT;
1274 			continue;
1275 		}
1276 		if (fat_is_cl_head(fat, head)) {
1277 			ret = checkchain(fat, head, &chainlength);
1278 			if (ret != FSERROR && chainlength > 0) {
1279 				pwarn("Lost cluster chain at cluster %u\n"
1280 				    "%zd Cluster(s) lost\n",
1281 				    head, chainlength);
1282 				mod |= ret = reconnect(fat, head,
1283 				    chainlength);
1284 			}
1285 			if (mod & FSFATAL)
1286 				break;
1287 			if (ret == FSERROR && ask(0, "Clear")) {
1288 				clearchain(fat, head);
1289 				mod |= FSFATMOD;
1290 			}
1291 			chains--;
1292 		}
1293 		head++;
1294 	}
1295 
1296 	finishlf();
1297 
1298 	if (boot->bpbFSInfo) {
1299 		ret = 0;
1300 		if (boot->FSFree != 0xffffffffU &&
1301 		    boot->FSFree != boot->NumFree) {
1302 			pwarn("Free space in FSInfo block (%u) not correct (%u)\n",
1303 			      boot->FSFree, boot->NumFree);
1304 			if (ask(1, "Fix")) {
1305 				boot->FSFree = boot->NumFree;
1306 				ret = 1;
1307 			}
1308 		}
1309 		if (boot->FSNext != 0xffffffffU &&
1310 		    (boot->FSNext >= boot->NumClusters ||
1311 		    (boot->NumFree && fat_get_cl_next(fat, boot->FSNext) != CLUST_FREE))) {
1312 			pwarn("Next free cluster in FSInfo block (%u) %s\n",
1313 			      boot->FSNext,
1314 			      (boot->FSNext >= boot->NumClusters) ? "invalid" : "not free");
1315 			if (ask(1, "Fix"))
1316 				for (head = CLUST_FIRST; head < boot->NumClusters; head++)
1317 					if (fat_get_cl_next(fat, head) == CLUST_FREE) {
1318 						boot->FSNext = head;
1319 						ret = 1;
1320 						break;
1321 					}
1322 		}
1323 		if (ret)
1324 			mod |= writefsinfo(dosfs, boot);
1325 	}
1326 
1327 	return mod;
1328 }
1329