xref: /freebsd/sbin/fsck_msdosfs/fat.c (revision e0c4386e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019 Google LLC
5  * Copyright (C) 1995, 1996, 1997 Wolfgang Solfrank
6  * Copyright (c) 1995 Martin Husemann
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 
30 #include <sys/cdefs.h>
31 #ifndef lint
32 __RCSID("$NetBSD: fat.c,v 1.18 2006/06/05 16:51:18 christos Exp $");
33 #endif /* not lint */
34 
35 #include <sys/endian.h>
36 #include <sys/queue.h>
37 #include <sys/limits.h>
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 
41 #include <assert.h>
42 #include <stdbool.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <ctype.h>
46 #include <stdio.h>
47 #include <unistd.h>
48 
49 #include "ext.h"
50 #include "fsutil.h"
51 
52 static int _readfat(struct fat_descriptor *);
53 static inline struct bootblock* boot_of_(struct fat_descriptor *);
54 static inline int fd_of_(struct fat_descriptor *);
55 static inline bool valid_cl(struct fat_descriptor *, cl_t);
56 
57 
58 /*
59  * Head bitmap for FAT scanning.
60  *
61  * FAT32 have up to 2^28 = 256M entries, and FAT16/12 have much less.
62  * For each cluster, we use 1 bit to represent if it's a head cluster
63  * (the first cluster of a cluster chain).
64  *
65  * Head bitmap
66  * ===========
67  * Initially, we set all bits to 1.  In readfat(), we traverse the
68  * whole FAT and mark each cluster identified as "next" cluster as
69  * 0.  After the scan, we have a bitmap with 1's to indicate the
70  * corresponding cluster was a "head" cluster.
71  *
72  * We use head bitmap to identify lost chains: a head cluster that was
73  * not being claimed by any file or directories is the head cluster of
74  * a lost chain.
75  *
76  * Handle of lost chains
77  * =====================
78  * At the end of scanning, we can easily find all lost chain's heads
79  * by finding out the 1's in the head bitmap.
80  */
81 
82 typedef struct long_bitmap {
83 	unsigned long	*map;
84 	size_t		 count;		/* Total set bits in the map */
85 } long_bitmap_t;
86 
87 static inline void
88 bitmap_clear(long_bitmap_t *lbp, cl_t cl)
89 {
90 	cl_t i = cl / LONG_BIT;
91 	unsigned long clearmask = ~(1UL << (cl % LONG_BIT));
92 
93 	assert((lbp->map[i] & ~clearmask) != 0);
94 	lbp->map[i] &= clearmask;
95 	lbp->count--;
96 }
97 
98 static inline bool
99 bitmap_get(long_bitmap_t *lbp, cl_t cl)
100 {
101 	cl_t i = cl / LONG_BIT;
102 	unsigned long usedbit = 1UL << (cl % LONG_BIT);
103 
104 	return ((lbp->map[i] & usedbit) == usedbit);
105 }
106 
107 static inline bool
108 bitmap_none_in_range(long_bitmap_t *lbp, cl_t cl)
109 {
110 	cl_t i = cl / LONG_BIT;
111 
112 	return (lbp->map[i] == 0);
113 }
114 
115 static inline size_t
116 bitmap_count(long_bitmap_t *lbp)
117 {
118 	return (lbp->count);
119 }
120 
121 static int
122 bitmap_ctor(long_bitmap_t *lbp, size_t bits, bool allone)
123 {
124 	size_t bitmap_size = roundup2(bits, LONG_BIT) / (LONG_BIT / 8);
125 
126 	free(lbp->map);
127 	lbp->map = calloc(1, bitmap_size);
128 	if (lbp->map == NULL)
129 		return FSFATAL;
130 
131 	if (allone) {
132 		memset(lbp->map, 0xff, bitmap_size);
133 		lbp->count = bits;
134 	} else {
135 		lbp->count = 0;
136 	}
137 	return FSOK;
138 }
139 
140 static void
141 bitmap_dtor(long_bitmap_t *lbp)
142 {
143 	free(lbp->map);
144 	lbp->map = NULL;
145 }
146 
147 /*
148  * FAT32 can be as big as 256MiB (2^26 entries * 4 bytes), when we
149  * can not ask the kernel to manage the access, use a simple LRU
150  * cache with chunk size of 128 KiB to manage it.
151  */
152 struct fat32_cache_entry {
153 	TAILQ_ENTRY(fat32_cache_entry)	entries;
154 	uint8_t			*chunk;		/* pointer to chunk */
155 	off_t			 addr;		/* offset */
156 	bool			 dirty;		/* dirty bit */
157 };
158 
159 static const size_t	fat32_cache_chunk_size = 131072; /* MAXPHYS */
160 static const size_t	fat32_cache_size = 4194304;
161 static const size_t	fat32_cache_entries = 32; /* XXXgcc: cache_size / cache_chunk_size */
162 
163 /*
164  * FAT table descriptor, represents a FAT table that is already loaded
165  * into memory.
166  */
167 struct fat_descriptor {
168 	struct bootblock	*boot;
169 	uint8_t			*fatbuf;
170 	cl_t			(*get)(struct fat_descriptor *, cl_t);
171 	int			(*set)(struct fat_descriptor *, cl_t, cl_t);
172 	long_bitmap_t		 headbitmap;
173 	int			 fd;
174 	bool			 is_mmapped;
175 	bool			 use_cache;
176 	size_t		  	 fatsize;
177 
178 	size_t			 fat32_cached_chunks;
179 	TAILQ_HEAD(cachehead, fat32_cache_entry)	fat32_cache_head;
180 	struct fat32_cache_entry	*fat32_cache_allentries;
181 	off_t			 fat32_offset;
182 	off_t			 fat32_lastaddr;
183 };
184 
185 void
186 fat_clear_cl_head(struct fat_descriptor *fat, cl_t cl)
187 {
188 	bitmap_clear(&fat->headbitmap, cl);
189 }
190 
191 bool
192 fat_is_cl_head(struct fat_descriptor *fat, cl_t cl)
193 {
194 	return (bitmap_get(&fat->headbitmap, cl));
195 }
196 
197 static inline bool
198 fat_is_cl_head_in_range(struct fat_descriptor *fat, cl_t cl)
199 {
200 	return (!(bitmap_none_in_range(&fat->headbitmap, cl)));
201 }
202 
203 static size_t
204 fat_get_head_count(struct fat_descriptor *fat)
205 {
206 	return (bitmap_count(&fat->headbitmap));
207 }
208 
209 /*
210  * FAT12 accessors.
211  *
212  * FAT12s are sufficiently small, expect it to always fit in the RAM.
213  */
214 static inline uint8_t *
215 fat_get_fat12_ptr(struct fat_descriptor *fat, cl_t cl)
216 {
217 	return (fat->fatbuf + ((cl + (cl >> 1))));
218 }
219 
220 static cl_t
221 fat_get_fat12_next(struct fat_descriptor *fat, cl_t cl)
222 {
223 	const uint8_t	*p;
224 	cl_t	retval;
225 
226 	p = fat_get_fat12_ptr(fat, cl);
227 	retval = le16dec(p);
228 	/* Odd cluster: lower 4 bits belongs to the subsequent cluster */
229 	if ((cl & 1) == 1)
230 		retval >>= 4;
231 	retval &= CLUST12_MASK;
232 
233 	if (retval >= (CLUST_BAD & CLUST12_MASK))
234 		retval |= ~CLUST12_MASK;
235 
236 	return (retval);
237 }
238 
239 static int
240 fat_set_fat12_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
241 {
242 	uint8_t	*p;
243 
244 	/* Truncate 'nextcl' value, if needed */
245 	nextcl &= CLUST12_MASK;
246 
247 	p = fat_get_fat12_ptr(fat, cl);
248 
249 	/*
250 	 * Read in the 4 bits from the subsequent (for even clusters)
251 	 * or the preceding (for odd clusters) cluster and combine
252 	 * it to the nextcl value for encoding
253 	 */
254 	if ((cl & 1) == 0) {
255 		nextcl |= ((p[1] & 0xf0) << 8);
256 	} else {
257 		nextcl <<= 4;
258 		nextcl |= (p[0] & 0x0f);
259 	}
260 
261 	le16enc(p, (uint16_t)nextcl);
262 
263 	return (0);
264 }
265 
266 /*
267  * FAT16 accessors.
268  *
269  * FAT16s are sufficiently small, expect it to always fit in the RAM.
270  */
271 static inline uint8_t *
272 fat_get_fat16_ptr(struct fat_descriptor *fat, cl_t cl)
273 {
274 	return (fat->fatbuf + (cl << 1));
275 }
276 
277 static cl_t
278 fat_get_fat16_next(struct fat_descriptor *fat, cl_t cl)
279 {
280 	const uint8_t	*p;
281 	cl_t	retval;
282 
283 	p = fat_get_fat16_ptr(fat, cl);
284 	retval = le16dec(p) & CLUST16_MASK;
285 
286 	if (retval >= (CLUST_BAD & CLUST16_MASK))
287 		retval |= ~CLUST16_MASK;
288 
289 	return (retval);
290 }
291 
292 static int
293 fat_set_fat16_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
294 {
295 	uint8_t	*p;
296 
297 	/* Truncate 'nextcl' value, if needed */
298 	nextcl &= CLUST16_MASK;
299 
300 	p = fat_get_fat16_ptr(fat, cl);
301 
302 	le16enc(p, (uint16_t)nextcl);
303 
304 	return (0);
305 }
306 
307 /*
308  * FAT32 accessors.
309  */
310 static inline uint8_t *
311 fat_get_fat32_ptr(struct fat_descriptor *fat, cl_t cl)
312 {
313 	return (fat->fatbuf + (cl << 2));
314 }
315 
316 static cl_t
317 fat_get_fat32_next(struct fat_descriptor *fat, cl_t cl)
318 {
319 	const uint8_t	*p;
320 	cl_t	retval;
321 
322 	p = fat_get_fat32_ptr(fat, cl);
323 	retval = le32dec(p) & CLUST32_MASK;
324 
325 	if (retval >= (CLUST_BAD & CLUST32_MASK))
326 		retval |= ~CLUST32_MASK;
327 
328 	return (retval);
329 }
330 
331 static int
332 fat_set_fat32_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
333 {
334 	uint8_t	*p;
335 
336 	/* Truncate 'nextcl' value, if needed */
337 	nextcl &= CLUST32_MASK;
338 
339 	p = fat_get_fat32_ptr(fat, cl);
340 
341 	le32enc(p, (uint32_t)nextcl);
342 
343 	return (0);
344 }
345 
346 static inline size_t
347 fat_get_iosize(struct fat_descriptor *fat, off_t address)
348 {
349 
350 	if (address == fat->fat32_lastaddr) {
351 		return (fat->fatsize & ((off_t)fat32_cache_chunk_size - 1));
352 	} else {
353 		return (fat32_cache_chunk_size);
354 	}
355 }
356 
357 static int
358 fat_flush_fat32_cache_entry(struct fat_descriptor *fat,
359     struct fat32_cache_entry *entry)
360 {
361 	int fd;
362 	off_t fat_addr;
363 	size_t writesize;
364 
365 	fd = fd_of_(fat);
366 
367 	if (!entry->dirty)
368 		return (FSOK);
369 
370 	writesize = fat_get_iosize(fat, entry->addr);
371 
372 	fat_addr = fat->fat32_offset + entry->addr;
373 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
374 	    (size_t)write(fd, entry->chunk, writesize) != writesize) {
375 			pfatal("Unable to write FAT");
376 			return (FSFATAL);
377 	}
378 
379 	entry->dirty = false;
380 	return (FSOK);
381 }
382 
383 static struct fat32_cache_entry *
384 fat_get_fat32_cache_entry(struct fat_descriptor *fat, off_t addr,
385     bool writing)
386 {
387 	int fd;
388 	struct fat32_cache_entry *entry, *first;
389 	off_t	fat_addr;
390 	size_t	rwsize;
391 
392 	addr &= ~(fat32_cache_chunk_size - 1);
393 
394 	first = TAILQ_FIRST(&fat->fat32_cache_head);
395 
396 	/*
397 	 * Cache hit: if we already have the chunk, move it to list head
398 	 */
399 	TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
400 		if (entry->addr == addr) {
401 			if (writing) {
402 				entry->dirty = true;
403 			}
404 			if (entry != first) {
405 
406 				TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
407 				TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
408 			}
409 			return (entry);
410 		}
411 	}
412 
413 	/*
414 	 * Cache miss: detach the chunk at tail of list, overwrite with
415 	 * the located chunk, and populate with data from disk.
416 	 */
417 	entry = TAILQ_LAST(&fat->fat32_cache_head, cachehead);
418 	TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
419 	if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
420 		return (NULL);
421 	}
422 
423 	rwsize = fat_get_iosize(fat, addr);
424 	fat_addr = fat->fat32_offset + addr;
425 	entry->addr = addr;
426 	fd = fd_of_(fat);
427 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
428 		(size_t)read(fd, entry->chunk, rwsize) != rwsize) {
429 		pfatal("Unable to read FAT");
430 		return (NULL);
431 	}
432 	if (writing) {
433 		entry->dirty = true;
434 	}
435 	TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
436 
437 	return (entry);
438 }
439 
440 static inline uint8_t *
441 fat_get_fat32_cached_ptr(struct fat_descriptor *fat, cl_t cl, bool writing)
442 {
443 	off_t addr, off;
444 	struct fat32_cache_entry *entry;
445 
446 	addr = cl << 2;
447 	entry = fat_get_fat32_cache_entry(fat, addr, writing);
448 
449 	if (entry != NULL) {
450 		off = addr & (fat32_cache_chunk_size - 1);
451 		return (entry->chunk + off);
452 	} else {
453 		return (NULL);
454 	}
455 }
456 
457 
458 static cl_t
459 fat_get_fat32_cached_next(struct fat_descriptor *fat, cl_t cl)
460 {
461 	const uint8_t	*p;
462 	cl_t	retval;
463 
464 	p = fat_get_fat32_cached_ptr(fat, cl, false);
465 	if (p != NULL) {
466 		retval = le32dec(p) & CLUST32_MASK;
467 		if (retval >= (CLUST_BAD & CLUST32_MASK))
468 			retval |= ~CLUST32_MASK;
469 	} else {
470 		retval = CLUST_DEAD;
471 	}
472 
473 	return (retval);
474 }
475 
476 static int
477 fat_set_fat32_cached_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
478 {
479 	uint8_t	*p;
480 
481 	/* Truncate 'nextcl' value, if needed */
482 	nextcl &= CLUST32_MASK;
483 
484 	p = fat_get_fat32_cached_ptr(fat, cl, true);
485 	if (p != NULL) {
486 		le32enc(p, (uint32_t)nextcl);
487 		return FSOK;
488 	} else {
489 		return FSFATAL;
490 	}
491 }
492 
493 cl_t fat_get_cl_next(struct fat_descriptor *fat, cl_t cl)
494 {
495 
496 	if (!valid_cl(fat, cl)) {
497 		pfatal("Invalid cluster: %ud", cl);
498 		return CLUST_DEAD;
499 	}
500 
501 	return (fat->get(fat, cl));
502 }
503 
504 int fat_set_cl_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
505 {
506 
507 	if (rdonly) {
508 		pwarn(" (NO WRITE)\n");
509 		return FSFATAL;
510 	}
511 
512 	if (!valid_cl(fat, cl)) {
513 		pfatal("Invalid cluster: %ud", cl);
514 		return FSFATAL;
515 	}
516 
517 	return (fat->set(fat, cl, nextcl));
518 }
519 
520 static inline struct bootblock*
521 boot_of_(struct fat_descriptor *fat) {
522 
523 	return (fat->boot);
524 }
525 
526 struct bootblock*
527 fat_get_boot(struct fat_descriptor *fat) {
528 
529 	return (boot_of_(fat));
530 }
531 
532 static inline int
533 fd_of_(struct fat_descriptor *fat)
534 {
535 	return (fat->fd);
536 }
537 
538 int
539 fat_get_fd(struct fat_descriptor * fat)
540 {
541 	return (fd_of_(fat));
542 }
543 
544 /*
545  * Whether a cl is in valid data range.
546  */
547 bool
548 fat_is_valid_cl(struct fat_descriptor *fat, cl_t cl)
549 {
550 
551 	return (valid_cl(fat, cl));
552 }
553 
554 static inline bool
555 valid_cl(struct fat_descriptor *fat, cl_t cl)
556 {
557 	const struct bootblock *boot = boot_of_(fat);
558 
559 	return (cl >= CLUST_FIRST && cl < boot->NumClusters);
560 }
561 
562 /*
563  * The first 2 FAT entries contain pseudo-cluster numbers with the following
564  * layout:
565  *
566  * 31...... ........ ........ .......0
567  * rrrr1111 11111111 11111111 mmmmmmmm         FAT32 entry 0
568  * rrrrsh11 11111111 11111111 11111xxx         FAT32 entry 1
569  *
570  *                   11111111 mmmmmmmm         FAT16 entry 0
571  *                   sh111111 11111xxx         FAT16 entry 1
572  *
573  * r = reserved
574  * m = BPB media ID byte
575  * s = clean flag (1 = dismounted; 0 = still mounted)
576  * h = hard error flag (1 = ok; 0 = I/O error)
577  * x = any value ok
578  */
579 int
580 checkdirty(int fs, struct bootblock *boot)
581 {
582 	off_t off;
583 	u_char *buffer;
584 	int ret = 0;
585 	size_t len;
586 
587 	if (boot->ClustMask != CLUST16_MASK && boot->ClustMask != CLUST32_MASK)
588 		return 0;
589 
590 	off = boot->bpbResSectors;
591 	off *= boot->bpbBytesPerSec;
592 
593 	buffer = malloc(len = boot->bpbBytesPerSec);
594 	if (buffer == NULL) {
595 		perr("No space for FAT sectors (%zu)", len);
596 		return 1;
597 	}
598 
599 	if (lseek(fs, off, SEEK_SET) != off) {
600 		perr("Unable to read FAT");
601 		goto err;
602 	}
603 
604 	if ((size_t)read(fs, buffer, boot->bpbBytesPerSec) !=
605 	    boot->bpbBytesPerSec) {
606 		perr("Unable to read FAT");
607 		goto err;
608 	}
609 
610 	/*
611 	 * If we don't understand the FAT, then the file system must be
612 	 * assumed to be unclean.
613 	 */
614 	if (buffer[0] != boot->bpbMedia || buffer[1] != 0xff)
615 		goto err;
616 	if (boot->ClustMask == CLUST16_MASK) {
617 		if ((buffer[2] & 0xf8) != 0xf8 || (buffer[3] & 0x3f) != 0x3f)
618 			goto err;
619 	} else {
620 		if (buffer[2] != 0xff || (buffer[3] & 0x0f) != 0x0f
621 		    || (buffer[4] & 0xf8) != 0xf8 || buffer[5] != 0xff
622 		    || buffer[6] != 0xff || (buffer[7] & 0x03) != 0x03)
623 			goto err;
624 	}
625 
626 	/*
627 	 * Now check the actual clean flag (and the no-error flag).
628 	 */
629 	if (boot->ClustMask == CLUST16_MASK) {
630 		if ((buffer[3] & 0xc0) == 0xc0)
631 			ret = 1;
632 	} else {
633 		if ((buffer[7] & 0x0c) == 0x0c)
634 			ret = 1;
635 	}
636 
637 err:
638 	free(buffer);
639 	return ret;
640 }
641 
642 int
643 cleardirty(struct fat_descriptor *fat)
644 {
645 	int fd, ret = FSERROR;
646 	struct bootblock *boot;
647 	u_char *buffer;
648 	size_t len;
649 	off_t off;
650 
651 	boot = boot_of_(fat);
652 	fd = fd_of_(fat);
653 
654 	if (boot->ClustMask != CLUST16_MASK && boot->ClustMask != CLUST32_MASK)
655 		return 0;
656 
657 	off = boot->bpbResSectors;
658 	off *= boot->bpbBytesPerSec;
659 
660 	buffer = malloc(len = boot->bpbBytesPerSec);
661 	if (buffer == NULL) {
662 		perr("No memory for FAT sectors (%zu)", len);
663 		return 1;
664 	}
665 
666 	if ((size_t)pread(fd, buffer, len, off) != len) {
667 		perr("Unable to read FAT");
668 		goto err;
669 	}
670 
671 	if (boot->ClustMask == CLUST16_MASK) {
672 		buffer[3] |= 0x80;
673 	} else {
674 		buffer[7] |= 0x08;
675 	}
676 
677 	if ((size_t)pwrite(fd, buffer, len, off) != len) {
678 		perr("Unable to write FAT");
679 		goto err;
680 	}
681 
682 	ret = FSOK;
683 
684 err:
685 	free(buffer);
686 	return ret;
687 }
688 
689 /*
690  * Read a FAT from disk. Returns 1 if successful, 0 otherwise.
691  */
692 static int
693 _readfat(struct fat_descriptor *fat)
694 {
695 	int fd;
696 	size_t i;
697 	off_t off;
698 	size_t readsize;
699 	struct bootblock *boot;
700 	struct fat32_cache_entry *entry;
701 
702 	boot = boot_of_(fat);
703 	fd = fd_of_(fat);
704 	fat->fatsize = boot->FATsecs * boot->bpbBytesPerSec;
705 
706 	off = boot->bpbResSectors;
707 	off *= boot->bpbBytesPerSec;
708 
709 	fat->is_mmapped = false;
710 	fat->use_cache = false;
711 
712 	/* Attempt to mmap() first */
713 	if (allow_mmap) {
714 		fat->fatbuf = mmap(NULL, fat->fatsize,
715 				PROT_READ | (rdonly ? 0 : PROT_WRITE),
716 				MAP_SHARED, fd_of_(fat), off);
717 		if (fat->fatbuf != MAP_FAILED) {
718 			fat->is_mmapped = true;
719 			return 1;
720 		}
721 	}
722 
723 	/*
724 	 * Unfortunately, we were unable to mmap().
725 	 *
726 	 * Only use the cache manager when it's necessary, that is,
727 	 * when the FAT is sufficiently large; in that case, only
728 	 * read in the first 4 MiB of FAT into memory, and split the
729 	 * buffer into chunks and insert to the LRU queue to populate
730 	 * the cache with data.
731 	 */
732 	if (boot->ClustMask == CLUST32_MASK &&
733 	    fat->fatsize >= fat32_cache_size) {
734 		readsize = fat32_cache_size;
735 		fat->use_cache = true;
736 
737 		fat->fat32_offset = boot->bpbResSectors * boot->bpbBytesPerSec;
738 		fat->fat32_lastaddr = fat->fatsize & ~(fat32_cache_chunk_size);
739 	} else {
740 		readsize = fat->fatsize;
741 	}
742 	fat->fatbuf = malloc(readsize);
743 	if (fat->fatbuf == NULL) {
744 		perr("No space for FAT (%zu)", readsize);
745 		return 0;
746 	}
747 
748 	if (lseek(fd, off, SEEK_SET) != off) {
749 		perr("Unable to read FAT");
750 		goto err;
751 	}
752 	if ((size_t)read(fd, fat->fatbuf, readsize) != readsize) {
753 		perr("Unable to read FAT");
754 		goto err;
755 	}
756 
757 	/*
758 	 * When cache is used, split the buffer into chunks, and
759 	 * connect the buffer into the cache.
760 	 */
761 	if (fat->use_cache) {
762 		TAILQ_INIT(&fat->fat32_cache_head);
763 		entry = calloc(fat32_cache_entries, sizeof(*entry));
764 		if (entry == NULL) {
765 			perr("No space for FAT cache (%zu of %zu)",
766 			    fat32_cache_entries, sizeof(entry));
767 			goto err;
768 		}
769 		for (i = 0; i < fat32_cache_entries; i++) {
770 			entry[i].addr = fat32_cache_chunk_size * i;
771 			entry[i].chunk = &fat->fatbuf[entry[i].addr];
772 			TAILQ_INSERT_TAIL(&fat->fat32_cache_head,
773 			    &entry[i], entries);
774 		}
775 		fat->fat32_cache_allentries = entry;
776 	}
777 
778 	return 1;
779 
780 err:
781 	free(fat->fatbuf);
782 	fat->fatbuf = NULL;
783 	return 0;
784 }
785 
786 static void
787 releasefat(struct fat_descriptor *fat)
788 {
789 	if (fat->is_mmapped) {
790 		munmap(fat->fatbuf, fat->fatsize);
791 	} else {
792 		if (fat->use_cache) {
793 			free(fat->fat32_cache_allentries);
794 			fat->fat32_cache_allentries = NULL;
795 		}
796 		free(fat->fatbuf);
797 	}
798 	fat->fatbuf = NULL;
799 	bitmap_dtor(&fat->headbitmap);
800 }
801 
802 /*
803  * Read or map a FAT and populate head bitmap
804  */
805 int
806 readfat(int fs, struct bootblock *boot, struct fat_descriptor **fp)
807 {
808 	struct fat_descriptor *fat;
809 	u_char *buffer, *p;
810 	cl_t cl, nextcl;
811 	int ret = FSOK;
812 
813 	boot->NumFree = boot->NumBad = 0;
814 
815 	fat = calloc(1, sizeof(struct fat_descriptor));
816 	if (fat == NULL) {
817 		perr("No space for FAT descriptor");
818 		return FSFATAL;
819 	}
820 
821 	fat->fd = fs;
822 	fat->boot = boot;
823 
824 	if (!_readfat(fat)) {
825 		free(fat);
826 		return FSFATAL;
827 	}
828 	buffer = fat->fatbuf;
829 
830 	/* Populate accessors */
831 	switch(boot->ClustMask) {
832 	case CLUST12_MASK:
833 		fat->get = fat_get_fat12_next;
834 		fat->set = fat_set_fat12_next;
835 		break;
836 	case CLUST16_MASK:
837 		fat->get = fat_get_fat16_next;
838 		fat->set = fat_set_fat16_next;
839 		break;
840 	case CLUST32_MASK:
841 		if (fat->is_mmapped || !fat->use_cache) {
842 			fat->get = fat_get_fat32_next;
843 			fat->set = fat_set_fat32_next;
844 		} else {
845 			fat->get = fat_get_fat32_cached_next;
846 			fat->set = fat_set_fat32_cached_next;
847 		}
848 		break;
849 	default:
850 		pfatal("Invalid ClustMask: %d", boot->ClustMask);
851 		releasefat(fat);
852 		free(fat);
853 		return FSFATAL;
854 	}
855 
856 	if (bitmap_ctor(&fat->headbitmap, boot->NumClusters,
857 	    true) != FSOK) {
858 		perr("No space for head bitmap for FAT clusters (%zu)",
859 		    (size_t)boot->NumClusters);
860 		releasefat(fat);
861 		free(fat);
862 		return FSFATAL;
863 	}
864 
865 	if (buffer[0] != boot->bpbMedia
866 	    || buffer[1] != 0xff || buffer[2] != 0xff
867 	    || (boot->ClustMask == CLUST16_MASK && buffer[3] != 0xff)
868 	    || (boot->ClustMask == CLUST32_MASK
869 		&& ((buffer[3]&0x0f) != 0x0f
870 		    || buffer[4] != 0xff || buffer[5] != 0xff
871 		    || buffer[6] != 0xff || (buffer[7]&0x0f) != 0x0f))) {
872 
873 		/* Windows 95 OSR2 (and possibly any later) changes
874 		 * the FAT signature to 0xXXffff7f for FAT16 and to
875 		 * 0xXXffff0fffffff07 for FAT32 upon boot, to know that the
876 		 * file system is dirty if it doesn't reboot cleanly.
877 		 * Check this special condition before errorring out.
878 		 */
879 		if (buffer[0] == boot->bpbMedia && buffer[1] == 0xff
880 		    && buffer[2] == 0xff
881 		    && ((boot->ClustMask == CLUST16_MASK && buffer[3] == 0x7f)
882 			|| (boot->ClustMask == CLUST32_MASK
883 			    && buffer[3] == 0x0f && buffer[4] == 0xff
884 			    && buffer[5] == 0xff && buffer[6] == 0xff
885 			    && buffer[7] == 0x07)))
886 			ret |= FSDIRTY;
887 		else {
888 			/* just some odd byte sequence in FAT */
889 
890 			switch (boot->ClustMask) {
891 			case CLUST32_MASK:
892 				pwarn("%s (%02x%02x%02x%02x%02x%02x%02x%02x)\n",
893 				      "FAT starts with odd byte sequence",
894 				      buffer[0], buffer[1], buffer[2], buffer[3],
895 				      buffer[4], buffer[5], buffer[6], buffer[7]);
896 				break;
897 			case CLUST16_MASK:
898 				pwarn("%s (%02x%02x%02x%02x)\n",
899 				    "FAT starts with odd byte sequence",
900 				    buffer[0], buffer[1], buffer[2], buffer[3]);
901 				break;
902 			default:
903 				pwarn("%s (%02x%02x%02x)\n",
904 				    "FAT starts with odd byte sequence",
905 				    buffer[0], buffer[1], buffer[2]);
906 				break;
907 			}
908 
909 			if (ask(1, "Correct")) {
910 				ret |= FSFATMOD;
911 				p = buffer;
912 
913 				*p++ = (u_char)boot->bpbMedia;
914 				*p++ = 0xff;
915 				*p++ = 0xff;
916 				switch (boot->ClustMask) {
917 				case CLUST16_MASK:
918 					*p++ = 0xff;
919 					break;
920 				case CLUST32_MASK:
921 					*p++ = 0x0f;
922 					*p++ = 0xff;
923 					*p++ = 0xff;
924 					*p++ = 0xff;
925 					*p++ = 0x0f;
926 					break;
927 				default:
928 					break;
929 				}
930 			}
931 		}
932 	}
933 
934 	/*
935 	 * Traverse the FAT table and populate head map.  Initially, we
936 	 * consider all clusters as possible head cluster (beginning of
937 	 * a file or directory), and traverse the whole allocation table
938 	 * by marking every non-head nodes as such (detailed below) and
939 	 * fix obvious issues while we walk.
940 	 *
941 	 * For each "next" cluster, the possible values are:
942 	 *
943 	 * a) CLUST_FREE or CLUST_BAD.  The *current* cluster can't be a
944 	 *    head node.
945 	 * b) An out-of-range value. The only fix would be to truncate at
946 	 *    the cluster.
947 	 * c) A valid cluster.  It means that cluster (nextcl) is not a
948 	 *    head cluster.  Note that during the scan, every cluster is
949 	 *    expected to be seen for at most once, and when we saw them
950 	 *    twice, it means a cross-linked chain which should be
951 	 *    truncated at the current cluster.
952 	 *
953 	 * After scan, the remaining set bits indicates all possible
954 	 * head nodes, because they were never claimed by any other
955 	 * node as the next node, but we do not know if these chains
956 	 * would end with a valid EOF marker.  We will check that in
957 	 * checkchain() at a later time when checking directories,
958 	 * where these head nodes would be marked as non-head.
959 	 *
960 	 * In the final pass, all head nodes should be cleared, and if
961 	 * there is still head nodes, these would be leaders of lost
962 	 * chain.
963 	 */
964 	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++) {
965 		nextcl = fat_get_cl_next(fat, cl);
966 
967 		/* Check if the next cluster number is valid */
968 		if (nextcl == CLUST_FREE) {
969 			/* Save a hint for next free cluster */
970 			if (boot->FSNext == 0) {
971 				boot->FSNext = cl;
972 			}
973 			if (fat_is_cl_head(fat, cl)) {
974 				fat_clear_cl_head(fat, cl);
975 			}
976 			boot->NumFree++;
977 		} else if (nextcl == CLUST_BAD) {
978 			if (fat_is_cl_head(fat, cl)) {
979 				fat_clear_cl_head(fat, cl);
980 			}
981 			boot->NumBad++;
982 		} else if (!valid_cl(fat, nextcl) && nextcl < CLUST_RSRVD) {
983 			pwarn("Cluster %u continues with out of range "
984 			    "cluster number %u\n",
985 			    cl,
986 			    nextcl & boot->ClustMask);
987 			if (ask(0, "Truncate")) {
988 				ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
989 				ret |= FSFATMOD;
990 			}
991 		} else if (valid_cl(fat, nextcl)) {
992 			if (fat_is_cl_head(fat, nextcl)) {
993 				fat_clear_cl_head(fat, nextcl);
994 			} else {
995 				pwarn("Cluster %u crossed another chain at %u\n",
996 				    cl, nextcl);
997 				if (ask(0, "Truncate")) {
998 					ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
999 					ret |= FSFATMOD;
1000 				}
1001 			}
1002 		}
1003 
1004 	}
1005 
1006 	if (ret & FSFATAL) {
1007 		releasefat(fat);
1008 		free(fat);
1009 		*fp = NULL;
1010 	} else
1011 		*fp = fat;
1012 	return ret;
1013 }
1014 
1015 /*
1016  * Get type of reserved cluster
1017  */
1018 const char *
1019 rsrvdcltype(cl_t cl)
1020 {
1021 	if (cl == CLUST_FREE)
1022 		return "free";
1023 	if (cl < CLUST_BAD)
1024 		return "reserved";
1025 	if (cl > CLUST_BAD)
1026 		return "as EOF";
1027 	return "bad";
1028 }
1029 
1030 /*
1031  * Examine a cluster chain for errors and count its size.
1032  */
1033 int
1034 checkchain(struct fat_descriptor *fat, cl_t head, size_t *chainsize)
1035 {
1036 	cl_t prev_cl, current_cl, next_cl;
1037 	const char *op;
1038 
1039 	/*
1040 	 * We expect that the caller to give us a real, unvisited 'head'
1041 	 * cluster, and it must be a valid cluster.  While scanning the
1042 	 * FAT table, we already excluded all clusters that was claimed
1043 	 * as a "next" cluster.  Assert all the three conditions.
1044 	 */
1045 	assert(valid_cl(fat, head));
1046 	assert(fat_is_cl_head(fat, head));
1047 
1048 	/*
1049 	 * Immediately mark the 'head' cluster that we are about to visit.
1050 	 */
1051 	fat_clear_cl_head(fat, head);
1052 
1053 	/*
1054 	 * The allocation of a non-zero sized file or directory is
1055 	 * represented as a singly linked list, and the tail node
1056 	 * would be the EOF marker (>=CLUST_EOFS).
1057 	 *
1058 	 * With a valid head node at hand, we expect all subsequent
1059 	 * cluster to be either a not yet seen and valid cluster (we
1060 	 * would continue counting), or the EOF marker (we conclude
1061 	 * the scan of this chain).
1062 	 *
1063 	 * For all other cases, the chain is invalid, and the only
1064 	 * viable fix would be to truncate at the current node (mark
1065 	 * it as EOF) when the next node violates that.
1066 	 */
1067 	*chainsize = 0;
1068 	prev_cl = current_cl = head;
1069 	for (next_cl = fat_get_cl_next(fat, current_cl);
1070 	    valid_cl(fat, next_cl);
1071 	    prev_cl = current_cl, current_cl = next_cl, next_cl = fat_get_cl_next(fat, current_cl))
1072 		(*chainsize)++;
1073 
1074 	/* A natural end */
1075 	if (next_cl >= CLUST_EOFS) {
1076 		(*chainsize)++;
1077 		return FSOK;
1078 	}
1079 
1080 	/*
1081 	 * The chain ended with an out-of-range cluster number.
1082 	 *
1083 	 * If the current node is e.g. CLUST_FREE, CLUST_BAD, etc.,
1084 	 * it should not be present in a chain and we has to truncate
1085 	 * at the previous node.
1086 	 *
1087 	 * If the current cluster points to an invalid cluster, the
1088 	 * current cluster might have useful data and we truncate at
1089 	 * the current cluster instead.
1090 	 */
1091 	if (next_cl == CLUST_FREE || next_cl >= CLUST_RSRVD) {
1092 		pwarn("Cluster chain starting at %u ends with cluster marked %s\n",
1093 		    head, rsrvdcltype(next_cl));
1094 		current_cl = prev_cl;
1095 	} else {
1096 		pwarn("Cluster chain starting at %u ends with cluster out of range (%u)\n",
1097 		    head,
1098 		    next_cl & boot_of_(fat)->ClustMask);
1099 		(*chainsize)++;
1100 	}
1101 
1102 	if (*chainsize > 0) {
1103 		op = "Truncate";
1104 		next_cl = CLUST_EOF;
1105 	} else {
1106 		op = "Clear";
1107 		next_cl = CLUST_FREE;
1108 	}
1109 	if (ask(0, "%s", op)) {
1110 		return (fat_set_cl_next(fat, current_cl, next_cl) | FSFATMOD);
1111 	} else {
1112 		return (FSERROR);
1113 	}
1114 }
1115 
1116 /*
1117  * Clear cluster chain from head.
1118  */
1119 void
1120 clearchain(struct fat_descriptor *fat, cl_t head)
1121 {
1122 	cl_t current_cl, next_cl;
1123 	struct bootblock *boot = boot_of_(fat);
1124 
1125 	current_cl = head;
1126 
1127 	while (valid_cl(fat, current_cl)) {
1128 		next_cl = fat_get_cl_next(fat, current_cl);
1129 		(void)fat_set_cl_next(fat, current_cl, CLUST_FREE);
1130 		boot->NumFree++;
1131 		current_cl = next_cl;
1132 	}
1133 
1134 }
1135 
1136 /*
1137  * Overwrite the n-th FAT with FAT0
1138  */
1139 static int
1140 copyfat(struct fat_descriptor *fat, int n)
1141 {
1142 	size_t	rwsize, tailsize, blobs, i;
1143 	off_t	dst_off, src_off;
1144 	struct bootblock *boot;
1145 	int ret, fd;
1146 
1147 	ret = FSOK;
1148 	fd = fd_of_(fat);
1149 	boot = boot_of_(fat);
1150 
1151 	blobs = howmany(fat->fatsize, fat32_cache_size);
1152 	tailsize = fat->fatsize % fat32_cache_size;
1153 	if (tailsize == 0) {
1154 		tailsize = fat32_cache_size;
1155 	}
1156 	rwsize = fat32_cache_size;
1157 
1158 	src_off = fat->fat32_offset;
1159 	dst_off = boot->bpbResSectors + n * boot->FATsecs;
1160 	dst_off *= boot->bpbBytesPerSec;
1161 
1162 	for (i = 0; i < blobs;
1163 	    i++, src_off += fat32_cache_size, dst_off += fat32_cache_size) {
1164 		if (i == blobs - 1) {
1165 			rwsize = tailsize;
1166 		}
1167 		if ((lseek(fd, src_off, SEEK_SET) != src_off ||
1168 		    (size_t)read(fd, fat->fatbuf, rwsize) != rwsize) &&
1169 		    ret == FSOK) {
1170 			perr("Unable to read FAT0");
1171 			ret = FSFATAL;
1172 			continue;
1173 		}
1174 		if ((lseek(fd, dst_off, SEEK_SET) != dst_off ||
1175 			(size_t)write(fd, fat->fatbuf, rwsize) != rwsize) &&
1176 			ret == FSOK) {
1177 			perr("Unable to write FAT %d", n);
1178 			ret = FSERROR;
1179 		}
1180 	}
1181 	return (ret);
1182 }
1183 
1184 /*
1185  * Write out FAT
1186  */
1187 int
1188 writefat(struct fat_descriptor *fat)
1189 {
1190 	u_int i;
1191 	size_t writesz;
1192 	off_t dst_base;
1193 	int ret = FSOK, fd;
1194 	struct bootblock *boot;
1195 	struct fat32_cache_entry *entry;
1196 
1197 	boot = boot_of_(fat);
1198 	fd = fd_of_(fat);
1199 
1200 	if (fat->use_cache) {
1201 		/*
1202 		 * Attempt to flush all in-flight cache, and bail out
1203 		 * if we encountered an error (but only emit error
1204 		 * message once).  Stop proceeding with copyfat()
1205 		 * if any flush failed.
1206 		 */
1207 		TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
1208 			if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
1209 				if (ret == FSOK) {
1210 					perr("Unable to write FAT");
1211 					ret = FSFATAL;
1212 				}
1213 			}
1214 		}
1215 		if (ret != FSOK)
1216 			return (ret);
1217 
1218 		/* Update backup copies of FAT, error is not fatal */
1219 		for (i = 1; i < boot->bpbFATs; i++) {
1220 			if (copyfat(fat, i) != FSOK)
1221 				ret = FSERROR;
1222 		}
1223 	} else {
1224 		writesz = fat->fatsize;
1225 
1226 		for (i = fat->is_mmapped ? 1 : 0; i < boot->bpbFATs; i++) {
1227 			dst_base = boot->bpbResSectors + i * boot->FATsecs;
1228 			dst_base *= boot->bpbBytesPerSec;
1229 			if ((lseek(fd, dst_base, SEEK_SET) != dst_base ||
1230 			    (size_t)write(fd, fat->fatbuf, writesz) != writesz) &&
1231 			    ret == FSOK) {
1232 				perr("Unable to write FAT %d", i);
1233 				ret = ((i == 0) ? FSFATAL : FSERROR);
1234 			}
1235 		}
1236 	}
1237 
1238 	return ret;
1239 }
1240 
1241 /*
1242  * Check a complete in-memory FAT for lost cluster chains
1243  */
1244 int
1245 checklost(struct fat_descriptor *fat)
1246 {
1247 	cl_t head;
1248 	int mod = FSOK;
1249 	int dosfs, ret;
1250 	size_t chains, chainlength;
1251 	struct bootblock *boot;
1252 
1253 	dosfs = fd_of_(fat);
1254 	boot = boot_of_(fat);
1255 
1256 	/*
1257 	 * At this point, we have already traversed all directories.
1258 	 * All remaining chain heads in the bitmap are heads of lost
1259 	 * chains.
1260 	 */
1261 	chains = fat_get_head_count(fat);
1262 	for (head = CLUST_FIRST;
1263 	    chains > 0 && head < boot->NumClusters;
1264 	    ) {
1265 		/*
1266 		 * We expect the bitmap to be very sparse, so skip if
1267 		 * the range is full of 0's
1268 		 */
1269 		if (head % LONG_BIT == 0 &&
1270 		    !fat_is_cl_head_in_range(fat, head)) {
1271 			head += LONG_BIT;
1272 			continue;
1273 		}
1274 		if (fat_is_cl_head(fat, head)) {
1275 			ret = checkchain(fat, head, &chainlength);
1276 			if (ret != FSERROR && chainlength > 0) {
1277 				pwarn("Lost cluster chain at cluster %u\n"
1278 				    "%zd Cluster(s) lost\n",
1279 				    head, chainlength);
1280 				mod |= ret = reconnect(fat, head,
1281 				    chainlength);
1282 			}
1283 			if (mod & FSFATAL)
1284 				break;
1285 			if (ret == FSERROR && ask(0, "Clear")) {
1286 				clearchain(fat, head);
1287 				mod |= FSFATMOD;
1288 			}
1289 			chains--;
1290 		}
1291 		head++;
1292 	}
1293 
1294 	finishlf();
1295 
1296 	if (boot->bpbFSInfo) {
1297 		ret = 0;
1298 		if (boot->FSFree != 0xffffffffU &&
1299 		    boot->FSFree != boot->NumFree) {
1300 			pwarn("Free space in FSInfo block (%u) not correct (%u)\n",
1301 			      boot->FSFree, boot->NumFree);
1302 			if (ask(1, "Fix")) {
1303 				boot->FSFree = boot->NumFree;
1304 				ret = 1;
1305 			}
1306 		}
1307 		if (boot->FSNext != 0xffffffffU &&
1308 		    (boot->FSNext >= boot->NumClusters ||
1309 		    (boot->NumFree && fat_get_cl_next(fat, boot->FSNext) != CLUST_FREE))) {
1310 			pwarn("Next free cluster in FSInfo block (%u) %s\n",
1311 			      boot->FSNext,
1312 			      (boot->FSNext >= boot->NumClusters) ? "invalid" : "not free");
1313 			if (ask(1, "Fix"))
1314 				for (head = CLUST_FIRST; head < boot->NumClusters; head++)
1315 					if (fat_get_cl_next(fat, head) == CLUST_FREE) {
1316 						boot->FSNext = head;
1317 						ret = 1;
1318 						break;
1319 					}
1320 		}
1321 		if (ret)
1322 			mod |= writefsinfo(dosfs, boot);
1323 	}
1324 
1325 	return mod;
1326 }
1327