xref: /dragonfly/sbin/fsck_msdosfs/fat.c (revision 029e6489)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019 Google LLC
5  * Copyright (C) 1995, 1996, 1997 Wolfgang Solfrank
6  * Copyright (c) 1995 Martin Husemann
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/endian.h>
30 #include <sys/queue.h>
31 #include <sys/limits.h>
32 #include <sys/mman.h>
33 #include <sys/param.h>
34 
35 #include <assert.h>
36 #include <stdbool.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <unistd.h>
42 
43 #include "ext.h"
44 #include "fsutil.h"
45 
46 static int _readfat(struct fat_descriptor *);
47 static inline struct bootblock* boot_of_(struct fat_descriptor *);
48 static inline int fd_of_(struct fat_descriptor *);
49 static inline bool valid_cl(struct fat_descriptor *, cl_t);
50 
51 
52 /*
53  * Head bitmap for FAT scanning.
54  *
55  * FAT32 have up to 2^28 = 256M entries, and FAT16/12 have much less.
56  * For each cluster, we use 1 bit to represent if it's a head cluster
57  * (the first cluster of a cluster chain).
58  *
59  * Head bitmap
60  * ===========
61  * Initially, we set all bits to 1.  In readfat(), we traverse the
62  * whole FAT and mark each cluster identified as "next" cluster as
63  * 0.  After the scan, we have a bitmap with 1's to indicate the
64  * corresponding cluster was a "head" cluster.
65  *
66  * We use head bitmap to identify lost chains: a head cluster that was
67  * not being claimed by any file or directories is the head cluster of
68  * a lost chain.
69  *
70  * Handle of lost chains
71  * =====================
72  * At the end of scanning, we can easily find all lost chain's heads
73  * by finding out the 1's in the head bitmap.
74  */
75 
76 typedef struct long_bitmap {
77 	unsigned long	*map;
78 	size_t		 count;		/* Total set bits in the map */
79 } long_bitmap_t;
80 
81 static inline void
82 bitmap_clear(long_bitmap_t *lbp, cl_t cl)
83 {
84 	cl_t i = cl / LONG_BIT;
85 	unsigned long clearmask = ~(1UL << (cl % LONG_BIT));
86 
87 	assert((lbp->map[i] & ~clearmask) != 0);
88 	lbp->map[i] &= clearmask;
89 	lbp->count--;
90 }
91 
92 static inline bool
93 bitmap_get(long_bitmap_t *lbp, cl_t cl)
94 {
95 	cl_t i = cl / LONG_BIT;
96 	unsigned long usedbit = 1UL << (cl % LONG_BIT);
97 
98 	return ((lbp->map[i] & usedbit) == usedbit);
99 }
100 
101 static inline bool
102 bitmap_none_in_range(long_bitmap_t *lbp, cl_t cl)
103 {
104 	cl_t i = cl / LONG_BIT;
105 
106 	return (lbp->map[i] == 0);
107 }
108 
109 static inline size_t
110 bitmap_count(long_bitmap_t *lbp)
111 {
112 	return (lbp->count);
113 }
114 
115 static int
116 bitmap_ctor(long_bitmap_t *lbp, size_t bits, bool allone)
117 {
118 	size_t bitmap_size = roundup2(bits, LONG_BIT) / (LONG_BIT / 8);
119 
120 	free(lbp->map);
121 	lbp->map = calloc(1, bitmap_size);
122 	if (lbp->map == NULL)
123 		return FSFATAL;
124 
125 	if (allone) {
126 		memset(lbp->map, 0xff, bitmap_size);
127 		lbp->count = bits;
128 	} else {
129 		lbp->count = 0;
130 	}
131 	return FSOK;
132 }
133 
134 static void
135 bitmap_dtor(long_bitmap_t *lbp)
136 {
137 	free(lbp->map);
138 	lbp->map = NULL;
139 }
140 
141 /*
142  * FAT32 can be as big as 256MiB (2^26 entries * 4 bytes), when we
143  * can not ask the kernel to manage the access, use a simple LRU
144  * cache with chunk size of 128 KiB to manage it.
145  */
146 struct fat32_cache_entry {
147 	TAILQ_ENTRY(fat32_cache_entry)	entries;
148 	uint8_t			*chunk;		/* pointer to chunk */
149 	off_t			 addr;		/* offset */
150 	bool			 dirty;		/* dirty bit */
151 };
152 
153 static const size_t	fat32_cache_chunk_size = 131072; /* MAXPHYS */
154 static const size_t	fat32_cache_size = 4194304;
155 static const size_t	fat32_cache_entries = 32; /* XXXgcc: cache_size / cache_chunk_size */
156 
157 /*
158  * FAT table descriptor, represents a FAT table that is already loaded
159  * into memory.
160  */
161 struct fat_descriptor {
162 	struct bootblock	*boot;
163 	uint8_t			*fatbuf;
164 	cl_t			(*get)(struct fat_descriptor *, cl_t);
165 	int			(*set)(struct fat_descriptor *, cl_t, cl_t);
166 	long_bitmap_t		 headbitmap;
167 	int			 fd;
168 	bool			 is_mmapped;
169 	bool			 use_cache;
170 	size_t		  	 fatsize;
171 
172 	size_t			 fat32_cached_chunks;
173 	TAILQ_HEAD(cachehead, fat32_cache_entry)	fat32_cache_head;
174 	struct fat32_cache_entry	*fat32_cache_allentries;
175 	off_t			 fat32_offset;
176 	off_t			 fat32_lastaddr;
177 };
178 
179 void
180 fat_clear_cl_head(struct fat_descriptor *fat, cl_t cl)
181 {
182 	bitmap_clear(&fat->headbitmap, cl);
183 }
184 
185 bool
186 fat_is_cl_head(struct fat_descriptor *fat, cl_t cl)
187 {
188 	return (bitmap_get(&fat->headbitmap, cl));
189 }
190 
191 static inline bool
192 fat_is_cl_head_in_range(struct fat_descriptor *fat, cl_t cl)
193 {
194 	return (!(bitmap_none_in_range(&fat->headbitmap, cl)));
195 }
196 
197 static size_t
198 fat_get_head_count(struct fat_descriptor *fat)
199 {
200 	return (bitmap_count(&fat->headbitmap));
201 }
202 
203 /*
204  * FAT12 accessors.
205  *
206  * FAT12s are sufficiently small, expect it to always fit in the RAM.
207  */
208 static inline uint8_t *
209 fat_get_fat12_ptr(struct fat_descriptor *fat, cl_t cl)
210 {
211 	return (fat->fatbuf + ((cl + (cl >> 1))));
212 }
213 
214 static cl_t
215 fat_get_fat12_next(struct fat_descriptor *fat, cl_t cl)
216 {
217 	const uint8_t	*p;
218 	cl_t	retval;
219 
220 	p = fat_get_fat12_ptr(fat, cl);
221 	retval = le16dec(p);
222 	/* Odd cluster: lower 4 bits belongs to the subsequent cluster */
223 	if ((cl & 1) == 1)
224 		retval >>= 4;
225 	retval &= CLUST12_MASK;
226 
227 	if (retval >= (CLUST_BAD & CLUST12_MASK))
228 		retval |= ~CLUST12_MASK;
229 
230 	return (retval);
231 }
232 
233 static int
234 fat_set_fat12_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
235 {
236 	uint8_t	*p;
237 
238 	/* Truncate 'nextcl' value, if needed */
239 	nextcl &= CLUST12_MASK;
240 
241 	p = fat_get_fat12_ptr(fat, cl);
242 
243 	/*
244 	 * Read in the 4 bits from the subsequent (for even clusters)
245 	 * or the preceding (for odd clusters) cluster and combine
246 	 * it to the nextcl value for encoding
247 	 */
248 	if ((cl & 1) == 0) {
249 		nextcl |= ((p[1] & 0xf0) << 8);
250 	} else {
251 		nextcl <<= 4;
252 		nextcl |= (p[0] & 0x0f);
253 	}
254 
255 	le16enc(p, (uint16_t)nextcl);
256 
257 	return (0);
258 }
259 
260 /*
261  * FAT16 accessors.
262  *
263  * FAT16s are sufficiently small, expect it to always fit in the RAM.
264  */
265 static inline uint8_t *
266 fat_get_fat16_ptr(struct fat_descriptor *fat, cl_t cl)
267 {
268 	return (fat->fatbuf + (cl << 1));
269 }
270 
271 static cl_t
272 fat_get_fat16_next(struct fat_descriptor *fat, cl_t cl)
273 {
274 	const uint8_t	*p;
275 	cl_t	retval;
276 
277 	p = fat_get_fat16_ptr(fat, cl);
278 	retval = le16dec(p) & CLUST16_MASK;
279 
280 	if (retval >= (CLUST_BAD & CLUST16_MASK))
281 		retval |= ~CLUST16_MASK;
282 
283 	return (retval);
284 }
285 
286 static int
287 fat_set_fat16_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
288 {
289 	uint8_t	*p;
290 
291 	/* Truncate 'nextcl' value, if needed */
292 	nextcl &= CLUST16_MASK;
293 
294 	p = fat_get_fat16_ptr(fat, cl);
295 
296 	le16enc(p, (uint16_t)nextcl);
297 
298 	return (0);
299 }
300 
301 /*
302  * FAT32 accessors.
303  */
304 static inline uint8_t *
305 fat_get_fat32_ptr(struct fat_descriptor *fat, cl_t cl)
306 {
307 	return (fat->fatbuf + (cl << 2));
308 }
309 
310 static cl_t
311 fat_get_fat32_next(struct fat_descriptor *fat, cl_t cl)
312 {
313 	const uint8_t	*p;
314 	cl_t	retval;
315 
316 	p = fat_get_fat32_ptr(fat, cl);
317 	retval = le32dec(p) & CLUST32_MASK;
318 
319 	if (retval >= (CLUST_BAD & CLUST32_MASK))
320 		retval |= ~CLUST32_MASK;
321 
322 	return (retval);
323 }
324 
325 static int
326 fat_set_fat32_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
327 {
328 	uint8_t	*p;
329 
330 	/* Truncate 'nextcl' value, if needed */
331 	nextcl &= CLUST32_MASK;
332 
333 	p = fat_get_fat32_ptr(fat, cl);
334 
335 	le32enc(p, (uint32_t)nextcl);
336 
337 	return (0);
338 }
339 
340 static inline size_t
341 fat_get_iosize(struct fat_descriptor *fat, off_t address)
342 {
343 
344 	if (address == fat->fat32_lastaddr) {
345 		return (fat->fatsize & ((off_t)fat32_cache_chunk_size - 1));
346 	} else {
347 		return (fat32_cache_chunk_size);
348 	}
349 }
350 
351 static int
352 fat_flush_fat32_cache_entry(struct fat_descriptor *fat,
353     struct fat32_cache_entry *entry)
354 {
355 	int fd;
356 	off_t fat_addr;
357 	size_t writesize;
358 
359 	fd = fd_of_(fat);
360 
361 	if (!entry->dirty)
362 		return (FSOK);
363 
364 	writesize = fat_get_iosize(fat, entry->addr);
365 
366 	fat_addr = fat->fat32_offset + entry->addr;
367 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
368 	    (size_t)write(fd, entry->chunk, writesize) != writesize) {
369 			pfatal("Unable to write FAT");
370 			return (FSFATAL);
371 	}
372 
373 	entry->dirty = false;
374 	return (FSOK);
375 }
376 
377 static struct fat32_cache_entry *
378 fat_get_fat32_cache_entry(struct fat_descriptor *fat, off_t addr,
379     bool writing)
380 {
381 	int fd;
382 	struct fat32_cache_entry *entry, *first;
383 	off_t	fat_addr;
384 	size_t	rwsize;
385 
386 	addr &= ~(fat32_cache_chunk_size - 1);
387 
388 	first = TAILQ_FIRST(&fat->fat32_cache_head);
389 
390 	/*
391 	 * Cache hit: if we already have the chunk, move it to list head
392 	 */
393 	TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
394 		if (entry->addr == addr) {
395 			if (writing) {
396 				entry->dirty = true;
397 			}
398 			if (entry != first) {
399 
400 				TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
401 				TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
402 			}
403 			return (entry);
404 		}
405 	}
406 
407 	/*
408 	 * Cache miss: detach the chunk at tail of list, overwrite with
409 	 * the located chunk, and populate with data from disk.
410 	 */
411 	entry = TAILQ_LAST(&fat->fat32_cache_head, cachehead);
412 	TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
413 	if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
414 		return (NULL);
415 	}
416 
417 	rwsize = fat_get_iosize(fat, addr);
418 	fat_addr = fat->fat32_offset + addr;
419 	entry->addr = addr;
420 	fd = fd_of_(fat);
421 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
422 		(size_t)read(fd, entry->chunk, rwsize) != rwsize) {
423 		pfatal("Unable to read FAT");
424 		return (NULL);
425 	}
426 	if (writing) {
427 		entry->dirty = true;
428 	}
429 	TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
430 
431 	return (entry);
432 }
433 
434 static inline uint8_t *
435 fat_get_fat32_cached_ptr(struct fat_descriptor *fat, cl_t cl, bool writing)
436 {
437 	off_t addr, off;
438 	struct fat32_cache_entry *entry;
439 
440 	addr = cl << 2;
441 	entry = fat_get_fat32_cache_entry(fat, addr, writing);
442 
443 	if (entry != NULL) {
444 		off = addr & (fat32_cache_chunk_size - 1);
445 		return (entry->chunk + off);
446 	} else {
447 		return (NULL);
448 	}
449 }
450 
451 
452 static cl_t
453 fat_get_fat32_cached_next(struct fat_descriptor *fat, cl_t cl)
454 {
455 	const uint8_t	*p;
456 	cl_t	retval;
457 
458 	p = fat_get_fat32_cached_ptr(fat, cl, false);
459 	if (p != NULL) {
460 		retval = le32dec(p) & CLUST32_MASK;
461 		if (retval >= (CLUST_BAD & CLUST32_MASK))
462 			retval |= ~CLUST32_MASK;
463 	} else {
464 		retval = CLUST_DEAD;
465 	}
466 
467 	return (retval);
468 }
469 
470 static int
471 fat_set_fat32_cached_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
472 {
473 	uint8_t	*p;
474 
475 	/* Truncate 'nextcl' value, if needed */
476 	nextcl &= CLUST32_MASK;
477 
478 	p = fat_get_fat32_cached_ptr(fat, cl, true);
479 	if (p != NULL) {
480 		le32enc(p, (uint32_t)nextcl);
481 		return FSOK;
482 	} else {
483 		return FSFATAL;
484 	}
485 }
486 
487 cl_t fat_get_cl_next(struct fat_descriptor *fat, cl_t cl)
488 {
489 
490 	if (!valid_cl(fat, cl)) {
491 		pfatal("Invalid cluster: %ud", cl);
492 		return CLUST_DEAD;
493 	}
494 
495 	return (fat->get(fat, cl));
496 }
497 
498 int fat_set_cl_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
499 {
500 
501 	if (rdonly) {
502 		pwarn(" (NO WRITE)\n");
503 		return FSFATAL;
504 	}
505 
506 	if (!valid_cl(fat, cl)) {
507 		pfatal("Invalid cluster: %ud", cl);
508 		return FSFATAL;
509 	}
510 
511 	return (fat->set(fat, cl, nextcl));
512 }
513 
514 static inline struct bootblock*
515 boot_of_(struct fat_descriptor *fat) {
516 
517 	return (fat->boot);
518 }
519 
520 struct bootblock*
521 fat_get_boot(struct fat_descriptor *fat) {
522 
523 	return (boot_of_(fat));
524 }
525 
526 static inline int
527 fd_of_(struct fat_descriptor *fat)
528 {
529 	return (fat->fd);
530 }
531 
532 int
533 fat_get_fd(struct fat_descriptor * fat)
534 {
535 	return (fd_of_(fat));
536 }
537 
538 /*
539  * Whether a cl is in valid data range.
540  */
541 bool
542 fat_is_valid_cl(struct fat_descriptor *fat, cl_t cl)
543 {
544 
545 	return (valid_cl(fat, cl));
546 }
547 
548 static inline bool
549 valid_cl(struct fat_descriptor *fat, cl_t cl)
550 {
551 	const struct bootblock *boot = boot_of_(fat);
552 
553 	return (cl >= CLUST_FIRST && cl < boot->NumClusters);
554 }
555 
556 int
557 cleardirty(struct fat_descriptor *fat)
558 {
559 	int fd, ret = FSERROR;
560 	struct bootblock *boot;
561 	u_char *buffer;
562 	size_t len;
563 	off_t off;
564 
565 	boot = boot_of_(fat);
566 	fd = fd_of_(fat);
567 
568 	if (boot->ClustMask != CLUST16_MASK && boot->ClustMask != CLUST32_MASK)
569 		return 0;
570 
571 	off = boot->bpbResSectors;
572 	off *= boot->bpbBytesPerSec;
573 
574 	buffer = malloc(len = boot->bpbBytesPerSec);
575 	if (buffer == NULL) {
576 		perr("No memory for FAT sectors (%zu)", len);
577 		return 1;
578 	}
579 
580 	if ((size_t)pread(fd, buffer, len, off) != len) {
581 		perr("Unable to read FAT");
582 		goto err;
583 	}
584 
585 	if (boot->ClustMask == CLUST16_MASK) {
586 		buffer[3] |= 0x80;
587 	} else {
588 		buffer[7] |= 0x08;
589 	}
590 
591 	if ((size_t)pwrite(fd, buffer, len, off) != len) {
592 		perr("Unable to write FAT");
593 		goto err;
594 	}
595 
596 	ret = FSOK;
597 
598 err:
599 	free(buffer);
600 	return ret;
601 }
602 
603 /*
604  * Read a FAT from disk. Returns 1 if successful, 0 otherwise.
605  */
606 static int
607 _readfat(struct fat_descriptor *fat)
608 {
609 	int fd;
610 	size_t i;
611 	off_t off;
612 	size_t readsize;
613 	struct bootblock *boot;
614 	struct fat32_cache_entry *entry;
615 
616 	boot = boot_of_(fat);
617 	fd = fd_of_(fat);
618 	fat->fatsize = boot->FATsecs * boot->bpbBytesPerSec;
619 
620 	off = boot->bpbResSectors;
621 	off *= boot->bpbBytesPerSec;
622 
623 	fat->is_mmapped = false;
624 	fat->use_cache = false;
625 
626 	/* Attempt to mmap() first */
627 	if (allow_mmap) {
628 		fat->fatbuf = mmap(NULL, fat->fatsize,
629 				PROT_READ | (rdonly ? 0 : PROT_WRITE),
630 				MAP_SHARED, fd_of_(fat), off);
631 		if (fat->fatbuf != MAP_FAILED) {
632 			fat->is_mmapped = true;
633 			return 1;
634 		}
635 	}
636 
637 	/*
638 	 * Unfortunately, we were unable to mmap().
639 	 *
640 	 * Only use the cache manager when it's necessary, that is,
641 	 * when the FAT is sufficiently large; in that case, only
642 	 * read in the first 4 MiB of FAT into memory, and split the
643 	 * buffer into chunks and insert to the LRU queue to populate
644 	 * the cache with data.
645 	 */
646 	if (boot->ClustMask == CLUST32_MASK &&
647 	    fat->fatsize >= fat32_cache_size) {
648 		readsize = fat32_cache_size;
649 		fat->use_cache = true;
650 
651 		fat->fat32_offset = boot->bpbResSectors * boot->bpbBytesPerSec;
652 		fat->fat32_lastaddr = fat->fatsize & ~(fat32_cache_chunk_size);
653 	} else {
654 		readsize = fat->fatsize;
655 	}
656 	fat->fatbuf = malloc(readsize);
657 	if (fat->fatbuf == NULL) {
658 		perr("No space for FAT (%zu)", readsize);
659 		return 0;
660 	}
661 
662 	if (lseek(fd, off, SEEK_SET) != off) {
663 		perr("Unable to read FAT");
664 		goto err;
665 	}
666 	if ((size_t)read(fd, fat->fatbuf, readsize) != readsize) {
667 		perr("Unable to read FAT");
668 		goto err;
669 	}
670 
671 	/*
672 	 * When cache is used, split the buffer into chunks, and
673 	 * connect the buffer into the cache.
674 	 */
675 	if (fat->use_cache) {
676 		TAILQ_INIT(&fat->fat32_cache_head);
677 		entry = calloc(fat32_cache_entries, sizeof(*entry));
678 		if (entry == NULL) {
679 			perr("No space for FAT cache (%zu of %zu)",
680 			    fat32_cache_entries, sizeof(entry));
681 			goto err;
682 		}
683 		for (i = 0; i < fat32_cache_entries; i++) {
684 			entry[i].addr = fat32_cache_chunk_size * i;
685 			entry[i].chunk = &fat->fatbuf[entry[i].addr];
686 			TAILQ_INSERT_TAIL(&fat->fat32_cache_head,
687 			    &entry[i], entries);
688 		}
689 		fat->fat32_cache_allentries = entry;
690 	}
691 
692 	return 1;
693 
694 err:
695 	free(fat->fatbuf);
696 	fat->fatbuf = NULL;
697 	return 0;
698 }
699 
700 static void
701 releasefat(struct fat_descriptor *fat)
702 {
703 	if (fat->is_mmapped) {
704 		munmap(fat->fatbuf, fat->fatsize);
705 	} else {
706 		if (fat->use_cache) {
707 			free(fat->fat32_cache_allentries);
708 			fat->fat32_cache_allentries = NULL;
709 		}
710 		free(fat->fatbuf);
711 	}
712 	fat->fatbuf = NULL;
713 	bitmap_dtor(&fat->headbitmap);
714 }
715 
716 /*
717  * Read or map a FAT and populate head bitmap
718  */
719 int
720 readfat(int fs, struct bootblock *boot, struct fat_descriptor **fp)
721 {
722 	struct fat_descriptor *fat;
723 	u_char *buffer, *p;
724 	cl_t cl, nextcl;
725 	int ret = FSOK;
726 
727 	boot->NumFree = boot->NumBad = 0;
728 
729 	fat = calloc(1, sizeof(struct fat_descriptor));
730 	if (fat == NULL) {
731 		perr("No space for FAT descriptor");
732 		return FSFATAL;
733 	}
734 
735 	fat->fd = fs;
736 	fat->boot = boot;
737 
738 	if (!_readfat(fat)) {
739 		free(fat);
740 		return FSFATAL;
741 	}
742 	buffer = fat->fatbuf;
743 
744 	/* Populate accessors */
745 	switch(boot->ClustMask) {
746 	case CLUST12_MASK:
747 		fat->get = fat_get_fat12_next;
748 		fat->set = fat_set_fat12_next;
749 		break;
750 	case CLUST16_MASK:
751 		fat->get = fat_get_fat16_next;
752 		fat->set = fat_set_fat16_next;
753 		break;
754 	case CLUST32_MASK:
755 		if (fat->is_mmapped || !fat->use_cache) {
756 			fat->get = fat_get_fat32_next;
757 			fat->set = fat_set_fat32_next;
758 		} else {
759 			fat->get = fat_get_fat32_cached_next;
760 			fat->set = fat_set_fat32_cached_next;
761 		}
762 		break;
763 	default:
764 		pfatal("Invalid ClustMask: %d", boot->ClustMask);
765 		releasefat(fat);
766 		free(fat);
767 		return FSFATAL;
768 	}
769 
770 	if (bitmap_ctor(&fat->headbitmap, boot->NumClusters,
771 	    true) != FSOK) {
772 		perr("No space for head bitmap for FAT clusters (%zu)",
773 		    (size_t)boot->NumClusters);
774 		releasefat(fat);
775 		free(fat);
776 		return FSFATAL;
777 	}
778 
779 	if (buffer[0] != boot->bpbMedia
780 	    || buffer[1] != 0xff || buffer[2] != 0xff
781 	    || (boot->ClustMask == CLUST16_MASK && buffer[3] != 0xff)
782 	    || (boot->ClustMask == CLUST32_MASK
783 		&& ((buffer[3]&0x0f) != 0x0f
784 		    || buffer[4] != 0xff || buffer[5] != 0xff
785 		    || buffer[6] != 0xff || (buffer[7]&0x0f) != 0x0f))) {
786 
787 		/* Windows 95 OSR2 (and possibly any later) changes
788 		 * the FAT signature to 0xXXffff7f for FAT16 and to
789 		 * 0xXXffff0fffffff07 for FAT32 upon boot, to know that the
790 		 * file system is dirty if it doesn't reboot cleanly.
791 		 * Check this special condition before errorring out.
792 		 */
793 		if (buffer[0] == boot->bpbMedia && buffer[1] == 0xff
794 		    && buffer[2] == 0xff
795 		    && ((boot->ClustMask == CLUST16_MASK && buffer[3] == 0x7f)
796 			|| (boot->ClustMask == CLUST32_MASK
797 			    && buffer[3] == 0x0f && buffer[4] == 0xff
798 			    && buffer[5] == 0xff && buffer[6] == 0xff
799 			    && buffer[7] == 0x07)))
800 			ret |= FSDIRTY;
801 		else {
802 			/* just some odd byte sequence in FAT */
803 			switch (boot->ClustMask) {
804 			case CLUST32_MASK:
805 				pwarn("%s (%02x%02x%02x%02x%02x%02x%02x%02x)\n",
806 				      "FAT starts with odd byte sequence",
807 				      buffer[0], buffer[1], buffer[2], buffer[3],
808 				      buffer[4], buffer[5], buffer[6], buffer[7]);
809 				break;
810 			case CLUST16_MASK:
811 				pwarn("%s (%02x%02x%02x%02x)\n",
812 				    "FAT starts with odd byte sequence",
813 				    buffer[0], buffer[1], buffer[2], buffer[3]);
814 				break;
815 			default:
816 				pwarn("%s (%02x%02x%02x)\n",
817 				    "FAT starts with odd byte sequence",
818 				    buffer[0], buffer[1], buffer[2]);
819 				break;
820 			}
821 
822 			if (ask(1, "Correct")) {
823 				ret |= FSFATMOD;
824 				p = buffer;
825 
826 				*p++ = (u_char)boot->bpbMedia;
827 				*p++ = 0xff;
828 				*p++ = 0xff;
829 				switch (boot->ClustMask) {
830 				case CLUST16_MASK:
831 					*p++ = 0xff;
832 					break;
833 				case CLUST32_MASK:
834 					*p++ = 0x0f;
835 					*p++ = 0xff;
836 					*p++ = 0xff;
837 					*p++ = 0xff;
838 					*p++ = 0x0f;
839 					break;
840 				default:
841 					break;
842 				}
843 			}
844 		}
845 	}
846 
847 	/*
848 	 * Traverse the FAT table and populate head map.  Initially, we
849 	 * consider all clusters as possible head cluster (beginning of
850 	 * a file or directory), and traverse the whole allocation table
851 	 * by marking every non-head nodes as such (detailed below) and
852 	 * fix obvious issues while we walk.
853 	 *
854 	 * For each "next" cluster, the possible values are:
855 	 *
856 	 * a) CLUST_FREE or CLUST_BAD.  The *current* cluster can't be a
857 	 *    head node.
858 	 * b) An out-of-range value. The only fix would be to truncate at
859 	 *    the cluster.
860 	 * c) A valid cluster.  It means that cluster (nextcl) is not a
861 	 *    head cluster.  Note that during the scan, every cluster is
862 	 *    expected to be seen for at most once, and when we saw them
863 	 *    twice, it means a cross-linked chain which should be
864 	 *    truncated at the current cluster.
865 	 *
866 	 * After scan, the remaining set bits indicates all possible
867 	 * head nodes, because they were never claimed by any other
868 	 * node as the next node, but we do not know if these chains
869 	 * would end with a valid EOF marker.  We will check that in
870 	 * checkchain() at a later time when checking directories,
871 	 * where these head nodes would be marked as non-head.
872 	 *
873 	 * In the final pass, all head nodes should be cleared, and if
874 	 * there is still head nodes, these would be leaders of lost
875 	 * chain.
876 	 */
877 	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++) {
878 		nextcl = fat_get_cl_next(fat, cl);
879 
880 		/* Check if the next cluster number is valid */
881 		if (nextcl == CLUST_FREE) {
882 			/* Save a hint for next free cluster */
883 			if (boot->FSNext == 0) {
884 				boot->FSNext = cl;
885 			}
886 			if (fat_is_cl_head(fat, cl)) {
887 				fat_clear_cl_head(fat, cl);
888 			}
889 			boot->NumFree++;
890 		} else if (nextcl == CLUST_BAD) {
891 			if (fat_is_cl_head(fat, cl)) {
892 				fat_clear_cl_head(fat, cl);
893 			}
894 			boot->NumBad++;
895 		} else if (!valid_cl(fat, nextcl) && nextcl < CLUST_RSRVD) {
896 			pwarn("Cluster %u continues with out of range "
897 			    "cluster number %u\n",
898 			    cl,
899 			    nextcl & boot->ClustMask);
900 			if (ask(0, "Truncate")) {
901 				ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
902 				ret |= FSFATMOD;
903 			}
904 		} else if (valid_cl(fat, nextcl)) {
905 			if (fat_is_cl_head(fat, nextcl)) {
906 				fat_clear_cl_head(fat, nextcl);
907 			} else {
908 				pwarn("Cluster %u crossed another chain at %u\n",
909 				    cl, nextcl);
910 				if (ask(0, "Truncate")) {
911 					ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
912 					ret |= FSFATMOD;
913 				}
914 			}
915 		}
916 
917 	}
918 
919 	if (ret & FSFATAL) {
920 		releasefat(fat);
921 		free(fat);
922 		*fp = NULL;
923 	} else
924 		*fp = fat;
925 	return ret;
926 }
927 
928 /*
929  * Get type of reserved cluster
930  */
931 const char *
932 rsrvdcltype(cl_t cl)
933 {
934 	if (cl == CLUST_FREE)
935 		return "free";
936 	if (cl < CLUST_BAD)
937 		return "reserved";
938 	if (cl > CLUST_BAD)
939 		return "as EOF";
940 	return "bad";
941 }
942 
943 /*
944  * Examine a cluster chain for errors and count its size.
945  */
946 int
947 checkchain(struct fat_descriptor *fat, cl_t head, size_t *chainsize)
948 {
949 	cl_t prev_cl, current_cl, next_cl;
950 	const char *op;
951 
952 	/*
953 	 * We expect that the caller to give us a real, unvisited 'head'
954 	 * cluster, and it must be a valid cluster.  While scanning the
955 	 * FAT table, we already excluded all clusters that was claimed
956 	 * as a "next" cluster.  Assert all the three conditions.
957 	 */
958 	assert(valid_cl(fat, head));
959 	assert(fat_is_cl_head(fat, head));
960 
961 	/*
962 	 * Immediately mark the 'head' cluster that we are about to visit.
963 	 */
964 	fat_clear_cl_head(fat, head);
965 
966 	/*
967 	 * The allocation of a non-zero sized file or directory is
968 	 * represented as a singly linked list, and the tail node
969 	 * would be the EOF marker (>=CLUST_EOFS).
970 	 *
971 	 * With a valid head node at hand, we expect all subsequent
972 	 * cluster to be either a not yet seen and valid cluster (we
973 	 * would continue counting), or the EOF marker (we conclude
974 	 * the scan of this chain).
975 	 *
976 	 * For all other cases, the chain is invalid, and the only
977 	 * viable fix would be to truncate at the current node (mark
978 	 * it as EOF) when the next node violates that.
979 	 */
980 	*chainsize = 0;
981 	prev_cl = current_cl = head;
982 	for (next_cl = fat_get_cl_next(fat, current_cl);
983 	    valid_cl(fat, next_cl);
984 	    prev_cl = current_cl, current_cl = next_cl, next_cl = fat_get_cl_next(fat, current_cl))
985 		(*chainsize)++;
986 
987 	/* A natural end */
988 	if (next_cl >= CLUST_EOFS) {
989 		(*chainsize)++;
990 		return FSOK;
991 	}
992 
993 	/*
994 	 * The chain ended with an out-of-range cluster number.
995 	 *
996 	 * If the current node is e.g. CLUST_FREE, CLUST_BAD, etc.,
997 	 * it should not be present in a chain and we has to truncate
998 	 * at the previous node.
999 	 *
1000 	 * If the current cluster points to an invalid cluster, the
1001 	 * current cluster might have useful data and we truncate at
1002 	 * the current cluster instead.
1003 	 */
1004 	if (next_cl == CLUST_FREE || next_cl >= CLUST_RSRVD) {
1005 		pwarn("Cluster chain starting at %u ends with cluster marked %s\n",
1006 		    head, rsrvdcltype(next_cl));
1007 		current_cl = prev_cl;
1008 	} else {
1009 		pwarn("Cluster chain starting at %u ends with cluster out of range (%u)\n",
1010 		    head,
1011 		    next_cl & boot_of_(fat)->ClustMask);
1012 		(*chainsize)++;
1013 	}
1014 
1015 	if (*chainsize > 0) {
1016 		op = "Truncate";
1017 		next_cl = CLUST_EOF;
1018 	} else {
1019 		op = "Clear";
1020 		next_cl = CLUST_FREE;
1021 	}
1022 	if (ask(0, "%s", op)) {
1023 		return (fat_set_cl_next(fat, current_cl, next_cl) | FSFATMOD);
1024 	} else {
1025 		return (FSERROR);
1026 	}
1027 }
1028 
1029 /*
1030  * Clear cluster chain from head.
1031  */
1032 void
1033 clearchain(struct fat_descriptor *fat, cl_t head)
1034 {
1035 	cl_t current_cl, next_cl;
1036 	struct bootblock *boot = boot_of_(fat);
1037 
1038 	current_cl = head;
1039 
1040 	while (valid_cl(fat, current_cl)) {
1041 		next_cl = fat_get_cl_next(fat, current_cl);
1042 		(void)fat_set_cl_next(fat, current_cl, CLUST_FREE);
1043 		boot->NumFree++;
1044 		current_cl = next_cl;
1045 	}
1046 }
1047 
1048 /*
1049  * Overwrite the n-th FAT with FAT0
1050  */
1051 static int
1052 copyfat(struct fat_descriptor *fat, int n)
1053 {
1054 	size_t	rwsize, tailsize, blobs, i;
1055 	off_t	dst_off, src_off;
1056 	struct bootblock *boot;
1057 	int ret, fd;
1058 
1059 	ret = FSOK;
1060 	fd = fd_of_(fat);
1061 	boot = boot_of_(fat);
1062 
1063 	blobs = howmany(fat->fatsize, fat32_cache_size);
1064 	tailsize = fat->fatsize % fat32_cache_size;
1065 	if (tailsize == 0) {
1066 		tailsize = fat32_cache_size;
1067 	}
1068 	rwsize = fat32_cache_size;
1069 
1070 	src_off = fat->fat32_offset;
1071 	dst_off = boot->bpbResSectors + n * boot->FATsecs;
1072 	dst_off *= boot->bpbBytesPerSec;
1073 
1074 	for (i = 0; i < blobs;
1075 	    i++, src_off += fat32_cache_size, dst_off += fat32_cache_size) {
1076 		if (i == blobs - 1) {
1077 			rwsize = tailsize;
1078 		}
1079 		if ((lseek(fd, src_off, SEEK_SET) != src_off ||
1080 		    (size_t)read(fd, fat->fatbuf, rwsize) != rwsize) &&
1081 		    ret == FSOK) {
1082 			perr("Unable to read FAT0");
1083 			ret = FSFATAL;
1084 			continue;
1085 		}
1086 		if ((lseek(fd, dst_off, SEEK_SET) != dst_off ||
1087 			(size_t)write(fd, fat->fatbuf, rwsize) != rwsize) &&
1088 			ret == FSOK) {
1089 			perr("Unable to write FAT %d", n);
1090 			ret = FSERROR;
1091 		}
1092 	}
1093 	return (ret);
1094 }
1095 
1096 /*
1097  * Write out FAT
1098  */
1099 int
1100 writefat(struct fat_descriptor *fat)
1101 {
1102 	u_int i;
1103 	size_t writesz;
1104 	off_t dst_base;
1105 	int ret = FSOK, fd;
1106 	struct bootblock *boot;
1107 	struct fat32_cache_entry *entry;
1108 
1109 	boot = boot_of_(fat);
1110 	fd = fd_of_(fat);
1111 
1112 	if (fat->use_cache) {
1113 		/*
1114 		 * Attempt to flush all in-flight cache, and bail out
1115 		 * if we encountered an error (but only emit error
1116 		 * message once).  Stop proceeding with copyfat()
1117 		 * if any flush failed.
1118 		 */
1119 		TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
1120 			if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
1121 				if (ret == FSOK) {
1122 					perr("Unable to write FAT");
1123 					ret = FSFATAL;
1124 				}
1125 			}
1126 		}
1127 		if (ret != FSOK)
1128 			return (ret);
1129 
1130 		/* Update backup copies of FAT, error is not fatal */
1131 		for (i = 1; i < boot->bpbFATs; i++) {
1132 			if (copyfat(fat, i) != FSOK)
1133 				ret = FSERROR;
1134 		}
1135 	} else {
1136 		writesz = fat->fatsize;
1137 
1138 		for (i = fat->is_mmapped ? 1 : 0; i < boot->bpbFATs; i++) {
1139 			dst_base = boot->bpbResSectors + i * boot->FATsecs;
1140 			dst_base *= boot->bpbBytesPerSec;
1141 			if ((lseek(fd, dst_base, SEEK_SET) != dst_base ||
1142 			    (size_t)write(fd, fat->fatbuf, writesz) != writesz) &&
1143 			    ret == FSOK) {
1144 				perr("Unable to write FAT %d", i);
1145 				ret = ((i == 0) ? FSFATAL : FSERROR);
1146 			}
1147 		}
1148 	}
1149 
1150 	return ret;
1151 }
1152 
1153 /*
1154  * Check a complete in-memory FAT for lost cluster chains
1155  */
1156 int
1157 checklost(struct fat_descriptor *fat)
1158 {
1159 	cl_t head;
1160 	int mod = FSOK;
1161 	int dosfs, ret;
1162 	size_t chains, chainlength;
1163 	struct bootblock *boot;
1164 
1165 	dosfs = fd_of_(fat);
1166 	boot = boot_of_(fat);
1167 
1168 	/*
1169 	 * At this point, we have already traversed all directories.
1170 	 * All remaining chain heads in the bitmap are heads of lost
1171 	 * chains.
1172 	 */
1173 	chains = fat_get_head_count(fat);
1174 	for (head = CLUST_FIRST;
1175 	    chains > 0 && head < boot->NumClusters;
1176 	    ) {
1177 		/*
1178 		 * We expect the bitmap to be very sparse, so skip if
1179 		 * the range is full of 0's
1180 		 */
1181 		if (head % LONG_BIT == 0 &&
1182 		    !fat_is_cl_head_in_range(fat, head)) {
1183 			head += LONG_BIT;
1184 			continue;
1185 		}
1186 		if (fat_is_cl_head(fat, head)) {
1187 			ret = checkchain(fat, head, &chainlength);
1188 			if (ret != FSERROR && chainlength > 0) {
1189 				pwarn("Lost cluster chain at cluster %u\n"
1190 				    "%zd Cluster(s) lost\n",
1191 				    head, chainlength);
1192 				mod |= ret = reconnect(fat, head,
1193 				    chainlength);
1194 			}
1195 			if (mod & FSFATAL)
1196 				break;
1197 			if (ret == FSERROR && ask(0, "Clear")) {
1198 				clearchain(fat, head);
1199 				mod |= FSFATMOD;
1200 			}
1201 			chains--;
1202 		}
1203 		head++;
1204 	}
1205 
1206 	finishlf();
1207 
1208 	if (boot->bpbFSInfo) {
1209 		ret = 0;
1210 		if (boot->FSFree != 0xffffffffU &&
1211 		    boot->FSFree != boot->NumFree) {
1212 			pwarn("Free space in FSInfo block (%u) not correct (%u)\n",
1213 			      boot->FSFree, boot->NumFree);
1214 			if (ask(1, "Fix")) {
1215 				boot->FSFree = boot->NumFree;
1216 				ret = 1;
1217 			}
1218 		}
1219 		if (boot->FSNext != 0xffffffffU &&
1220 		    (boot->FSNext >= boot->NumClusters ||
1221 		    (boot->NumFree && fat_get_cl_next(fat, boot->FSNext) != CLUST_FREE))) {
1222 			pwarn("Next free cluster in FSInfo block (%u) %s\n",
1223 			      boot->FSNext,
1224 			      (boot->FSNext >= boot->NumClusters) ? "invalid" : "not free");
1225 			if (ask(1, "Fix"))
1226 				for (head = CLUST_FIRST; head < boot->NumClusters; head++)
1227 					if (fat_get_cl_next(fat, head) == CLUST_FREE) {
1228 						boot->FSNext = head;
1229 						ret = 1;
1230 						break;
1231 					}
1232 		}
1233 		if (ret)
1234 			mod |= writefsinfo(dosfs, boot);
1235 	}
1236 
1237 	return mod;
1238 }
1239