xref: /dragonfly/sbin/fsck_msdosfs/fat.c (revision 16d4386a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019 Google LLC
5  * Copyright (C) 1995, 1996, 1997 Wolfgang Solfrank
6  * Copyright (c) 1995 Martin Husemann
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/endian.h>
30 #include <sys/queue.h>
31 #include <sys/limits.h>
32 #include <sys/mman.h>
33 #include <sys/param.h>
34 
35 #include <assert.h>
36 #include <stdbool.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <unistd.h>
42 
43 #include "ext.h"
44 #include "fsutil.h"
45 
46 static int _readfat(struct fat_descriptor *);
47 static inline struct bootblock* boot_of_(struct fat_descriptor *);
48 static inline int fd_of_(struct fat_descriptor *);
49 static inline bool valid_cl(struct fat_descriptor *, cl_t);
50 
51 
52 /*
53  * Head bitmap for FAT scanning.
54  *
55  * FAT32 have up to 2^28 = 256M entries, and FAT16/12 have much less.
56  * For each cluster, we use 1 bit to represent if it's a head cluster
57  * (the first cluster of a cluster chain).
58  *
59  * Head bitmap
60  * ===========
61  * Initially, we set all bits to 1.  In readfat(), we traverse the
62  * whole FAT and mark each cluster identified as "next" cluster as
63  * 0.  After the scan, we have a bitmap with 1's to indicate the
64  * corresponding cluster was a "head" cluster.
65  *
66  * We use head bitmap to identify lost chains: a head cluster that was
67  * not being claimed by any file or directories is the head cluster of
68  * a lost chain.
69  *
70  * Handle of lost chains
71  * =====================
72  * At the end of scanning, we can easily find all lost chain's heads
73  * by finding out the 1's in the head bitmap.
74  */
75 
76 typedef struct long_bitmap {
77 	unsigned long	*map;
78 	size_t		 count;		/* Total set bits in the map */
79 } long_bitmap_t;
80 
81 static inline void
82 bitmap_clear(long_bitmap_t *lbp, cl_t cl)
83 {
84 	cl_t i = cl / LONG_BIT;
85 	unsigned long clearmask = ~(1UL << (cl % LONG_BIT));
86 
87 	assert((lbp->map[i] & ~clearmask) != 0);
88 	lbp->map[i] &= clearmask;
89 	lbp->count--;
90 }
91 
92 static inline bool
93 bitmap_get(long_bitmap_t *lbp, cl_t cl)
94 {
95 	cl_t i = cl / LONG_BIT;
96 	unsigned long usedbit = 1UL << (cl % LONG_BIT);
97 
98 	return ((lbp->map[i] & usedbit) == usedbit);
99 }
100 
101 static inline bool
102 bitmap_none_in_range(long_bitmap_t *lbp, cl_t cl)
103 {
104 	cl_t i = cl / LONG_BIT;
105 
106 	return (lbp->map[i] == 0);
107 }
108 
109 static inline size_t
110 bitmap_count(long_bitmap_t *lbp)
111 {
112 	return (lbp->count);
113 }
114 
115 static int
116 bitmap_ctor(long_bitmap_t *lbp, size_t bits, bool allone)
117 {
118 	size_t bitmap_size = roundup2(bits, LONG_BIT) / (LONG_BIT / 8);
119 
120 	free(lbp->map);
121 	lbp->map = calloc(1, bitmap_size);
122 	if (lbp->map == NULL)
123 		return FSFATAL;
124 
125 	if (allone) {
126 		memset(lbp->map, 0xff, bitmap_size);
127 		lbp->count = bits;
128 	} else {
129 		lbp->count = 0;
130 	}
131 	return FSOK;
132 }
133 
134 static void
135 bitmap_dtor(long_bitmap_t *lbp)
136 {
137 	free(lbp->map);
138 	lbp->map = NULL;
139 }
140 
141 /*
142  * FAT32 can be as big as 256MiB (2^26 entries * 4 bytes), when we
143  * can not ask the kernel to manage the access, use a simple LRU
144  * cache with chunk size of 128 KiB to manage it.
145  */
146 struct fat32_cache_entry {
147 	TAILQ_ENTRY(fat32_cache_entry)	entries;
148 	uint8_t			*chunk;		/* pointer to chunk */
149 	off_t			 addr;		/* offset */
150 	bool			 dirty;		/* dirty bit */
151 };
152 
153 static const size_t	fat32_cache_chunk_size = 131072; /* MAXPHYS */
154 static const size_t	fat32_cache_size = 4194304;
155 static const size_t	fat32_cache_entries = 32; /* XXXgcc: cache_size / cache_chunk_size */
156 
157 /*
158  * FAT table descriptor, represents a FAT table that is already loaded
159  * into memory.
160  */
161 struct fat_descriptor {
162 	struct bootblock	*boot;
163 	uint8_t			*fatbuf;
164 	cl_t			(*get)(struct fat_descriptor *, cl_t);
165 	int			(*set)(struct fat_descriptor *, cl_t, cl_t);
166 	long_bitmap_t		 headbitmap;
167 	int			 fd;
168 	bool			 is_mmapped;
169 	bool			 use_cache;
170 	size_t		  	 fatsize;
171 
172 	size_t			 fat32_cached_chunks;
173 	TAILQ_HEAD(cachehead, fat32_cache_entry)	fat32_cache_head;
174 	struct fat32_cache_entry	*fat32_cache_allentries;
175 	off_t			 fat32_offset;
176 	off_t			 fat32_lastaddr;
177 };
178 
179 void
180 fat_clear_cl_head(struct fat_descriptor *fat, cl_t cl)
181 {
182 	bitmap_clear(&fat->headbitmap, cl);
183 }
184 
185 bool
186 fat_is_cl_head(struct fat_descriptor *fat, cl_t cl)
187 {
188 	return (bitmap_get(&fat->headbitmap, cl));
189 }
190 
191 static inline bool
192 fat_is_cl_head_in_range(struct fat_descriptor *fat, cl_t cl)
193 {
194 	return (!(bitmap_none_in_range(&fat->headbitmap, cl)));
195 }
196 
197 static size_t
198 fat_get_head_count(struct fat_descriptor *fat)
199 {
200 	return (bitmap_count(&fat->headbitmap));
201 }
202 
203 /*
204  * FAT12 accessors.
205  *
206  * FAT12s are sufficiently small, expect it to always fit in the RAM.
207  */
208 static inline uint8_t *
209 fat_get_fat12_ptr(struct fat_descriptor *fat, cl_t cl)
210 {
211 	return (fat->fatbuf + ((cl + (cl >> 1))));
212 }
213 
214 static cl_t
215 fat_get_fat12_next(struct fat_descriptor *fat, cl_t cl)
216 {
217 	const uint8_t	*p;
218 	cl_t	retval;
219 
220 	p = fat_get_fat12_ptr(fat, cl);
221 	retval = le16dec(p);
222 	/* Odd cluster: lower 4 bits belongs to the subsequent cluster */
223 	if ((cl & 1) == 1)
224 		retval >>= 4;
225 	retval &= CLUST12_MASK;
226 
227 	if (retval >= (CLUST_BAD & CLUST12_MASK))
228 		retval |= ~CLUST12_MASK;
229 
230 	return (retval);
231 }
232 
233 static int
234 fat_set_fat12_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
235 {
236 	uint8_t	*p;
237 
238 	/* Truncate 'nextcl' value, if needed */
239 	nextcl &= CLUST12_MASK;
240 
241 	p = fat_get_fat12_ptr(fat, cl);
242 
243 	/*
244 	 * Read in the 4 bits from the subsequent (for even clusters)
245 	 * or the preceding (for odd clusters) cluster and combine
246 	 * it to the nextcl value for encoding
247 	 */
248 	if ((cl & 1) == 0) {
249 		nextcl |= ((p[1] & 0xf0) << 8);
250 	} else {
251 		nextcl <<= 4;
252 		nextcl |= (p[0] & 0x0f);
253 	}
254 
255 	le16enc(p, (uint16_t)nextcl);
256 
257 	return (0);
258 }
259 
260 /*
261  * FAT16 accessors.
262  *
263  * FAT16s are sufficiently small, expect it to always fit in the RAM.
264  */
265 static inline uint8_t *
266 fat_get_fat16_ptr(struct fat_descriptor *fat, cl_t cl)
267 {
268 	return (fat->fatbuf + (cl << 1));
269 }
270 
271 static cl_t
272 fat_get_fat16_next(struct fat_descriptor *fat, cl_t cl)
273 {
274 	const uint8_t	*p;
275 	cl_t	retval;
276 
277 	p = fat_get_fat16_ptr(fat, cl);
278 	retval = le16dec(p) & CLUST16_MASK;
279 
280 	if (retval >= (CLUST_BAD & CLUST16_MASK))
281 		retval |= ~CLUST16_MASK;
282 
283 	return (retval);
284 }
285 
286 static int
287 fat_set_fat16_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
288 {
289 	uint8_t	*p;
290 
291 	/* Truncate 'nextcl' value, if needed */
292 	nextcl &= CLUST16_MASK;
293 
294 	p = fat_get_fat16_ptr(fat, cl);
295 
296 	le16enc(p, (uint16_t)nextcl);
297 
298 	return (0);
299 }
300 
301 /*
302  * FAT32 accessors.
303  */
304 static inline uint8_t *
305 fat_get_fat32_ptr(struct fat_descriptor *fat, cl_t cl)
306 {
307 	return (fat->fatbuf + (cl << 2));
308 }
309 
310 static cl_t
311 fat_get_fat32_next(struct fat_descriptor *fat, cl_t cl)
312 {
313 	const uint8_t	*p;
314 	cl_t	retval;
315 
316 	p = fat_get_fat32_ptr(fat, cl);
317 	retval = le32dec(p) & CLUST32_MASK;
318 
319 	if (retval >= (CLUST_BAD & CLUST32_MASK))
320 		retval |= ~CLUST32_MASK;
321 
322 	return (retval);
323 }
324 
325 static int
326 fat_set_fat32_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
327 {
328 	uint8_t	*p;
329 
330 	/* Truncate 'nextcl' value, if needed */
331 	nextcl &= CLUST32_MASK;
332 
333 	p = fat_get_fat32_ptr(fat, cl);
334 
335 	le32enc(p, (uint32_t)nextcl);
336 
337 	return (0);
338 }
339 
340 static inline size_t
341 fat_get_iosize(struct fat_descriptor *fat, off_t address)
342 {
343 
344 	if (address == fat->fat32_lastaddr) {
345 		return (fat->fatsize & ((off_t)fat32_cache_chunk_size - 1));
346 	} else {
347 		return (fat32_cache_chunk_size);
348 	}
349 }
350 
351 static int
352 fat_flush_fat32_cache_entry(struct fat_descriptor *fat,
353     struct fat32_cache_entry *entry)
354 {
355 	int fd;
356 	off_t fat_addr;
357 	size_t writesize;
358 
359 	fd = fd_of_(fat);
360 
361 	if (!entry->dirty)
362 		return (FSOK);
363 
364 	writesize = fat_get_iosize(fat, entry->addr);
365 
366 	fat_addr = fat->fat32_offset + entry->addr;
367 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
368 	    (size_t)write(fd, entry->chunk, writesize) != writesize) {
369 			pfatal("Unable to write FAT");
370 			return (FSFATAL);
371 	}
372 
373 	entry->dirty = false;
374 	return (FSOK);
375 }
376 
377 static struct fat32_cache_entry *
378 fat_get_fat32_cache_entry(struct fat_descriptor *fat, off_t addr,
379     bool writing)
380 {
381 	int fd;
382 	struct fat32_cache_entry *entry, *first;
383 	off_t	fat_addr;
384 	size_t	rwsize;
385 
386 	addr &= ~(fat32_cache_chunk_size - 1);
387 
388 	first = TAILQ_FIRST(&fat->fat32_cache_head);
389 
390 	/*
391 	 * Cache hit: if we already have the chunk, move it to list head
392 	 */
393 	TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
394 		if (entry->addr == addr) {
395 			if (writing) {
396 				entry->dirty = true;
397 			}
398 			if (entry != first) {
399 
400 				TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
401 				TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
402 			}
403 			return (entry);
404 		}
405 	}
406 
407 	/*
408 	 * Cache miss: detach the chunk at tail of list, overwrite with
409 	 * the located chunk, and populate with data from disk.
410 	 */
411 	entry = TAILQ_LAST(&fat->fat32_cache_head, cachehead);
412 	TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
413 	if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
414 		return (NULL);
415 	}
416 
417 	rwsize = fat_get_iosize(fat, addr);
418 	fat_addr = fat->fat32_offset + addr;
419 	entry->addr = addr;
420 	fd = fd_of_(fat);
421 	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
422 		(size_t)read(fd, entry->chunk, rwsize) != rwsize) {
423 		pfatal("Unable to read FAT");
424 		return (NULL);
425 	}
426 	if (writing) {
427 		entry->dirty = true;
428 	}
429 	TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
430 
431 	return (entry);
432 }
433 
434 static inline uint8_t *
435 fat_get_fat32_cached_ptr(struct fat_descriptor *fat, cl_t cl, bool writing)
436 {
437 	off_t addr, off;
438 	struct fat32_cache_entry *entry;
439 
440 	addr = cl << 2;
441 	entry = fat_get_fat32_cache_entry(fat, addr, writing);
442 
443 	if (entry != NULL) {
444 		off = addr & (fat32_cache_chunk_size - 1);
445 		return (entry->chunk + off);
446 	} else {
447 		return (NULL);
448 	}
449 }
450 
451 
452 static cl_t
453 fat_get_fat32_cached_next(struct fat_descriptor *fat, cl_t cl)
454 {
455 	const uint8_t	*p;
456 	cl_t	retval;
457 
458 	p = fat_get_fat32_cached_ptr(fat, cl, false);
459 	if (p != NULL) {
460 		retval = le32dec(p) & CLUST32_MASK;
461 		if (retval >= (CLUST_BAD & CLUST32_MASK))
462 			retval |= ~CLUST32_MASK;
463 	} else {
464 		retval = CLUST_DEAD;
465 	}
466 
467 	return (retval);
468 }
469 
470 static int
471 fat_set_fat32_cached_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
472 {
473 	uint8_t	*p;
474 
475 	/* Truncate 'nextcl' value, if needed */
476 	nextcl &= CLUST32_MASK;
477 
478 	p = fat_get_fat32_cached_ptr(fat, cl, true);
479 	if (p != NULL) {
480 		le32enc(p, (uint32_t)nextcl);
481 		return FSOK;
482 	} else {
483 		return FSFATAL;
484 	}
485 }
486 
487 cl_t fat_get_cl_next(struct fat_descriptor *fat, cl_t cl)
488 {
489 
490 	if (!valid_cl(fat, cl)) {
491 		pfatal("Invalid cluster: %ud", cl);
492 		return CLUST_DEAD;
493 	}
494 
495 	return (fat->get(fat, cl));
496 }
497 
498 int fat_set_cl_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
499 {
500 
501 	if (rdonly) {
502 		pwarn(" (NO WRITE)\n");
503 		return FSFATAL;
504 	}
505 
506 	if (!valid_cl(fat, cl)) {
507 		pfatal("Invalid cluster: %ud", cl);
508 		return FSFATAL;
509 	}
510 
511 	return (fat->set(fat, cl, nextcl));
512 }
513 
514 static inline struct bootblock*
515 boot_of_(struct fat_descriptor *fat) {
516 
517 	return (fat->boot);
518 }
519 
520 struct bootblock*
521 fat_get_boot(struct fat_descriptor *fat) {
522 
523 	return (boot_of_(fat));
524 }
525 
526 static inline int
527 fd_of_(struct fat_descriptor *fat)
528 {
529 	return (fat->fd);
530 }
531 
532 int
533 fat_get_fd(struct fat_descriptor * fat)
534 {
535 	return (fd_of_(fat));
536 }
537 
538 /*
539  * Whether a cl is in valid data range.
540  */
541 bool
542 fat_is_valid_cl(struct fat_descriptor *fat, cl_t cl)
543 {
544 
545 	return (valid_cl(fat, cl));
546 }
547 
548 static inline bool
549 valid_cl(struct fat_descriptor *fat, cl_t cl)
550 {
551 	const struct bootblock *boot = boot_of_(fat);
552 
553 	return (cl >= CLUST_FIRST && cl < boot->NumClusters);
554 }
555 
556 /*
557  * Read a FAT from disk. Returns 1 if successful, 0 otherwise.
558  */
559 static int
560 _readfat(struct fat_descriptor *fat)
561 {
562 	int fd;
563 	size_t i;
564 	off_t off;
565 	size_t readsize;
566 	struct bootblock *boot;
567 	struct fat32_cache_entry *entry;
568 
569 	boot = boot_of_(fat);
570 	fd = fd_of_(fat);
571 	fat->fatsize = boot->FATsecs * boot->bpbBytesPerSec;
572 
573 	off = boot->bpbResSectors;
574 	off *= boot->bpbBytesPerSec;
575 
576 	fat->is_mmapped = false;
577 	fat->use_cache = false;
578 
579 	/* Attempt to mmap() first */
580 	if (allow_mmap) {
581 		fat->fatbuf = mmap(NULL, fat->fatsize,
582 				PROT_READ | (rdonly ? 0 : PROT_WRITE),
583 				MAP_SHARED, fd_of_(fat), off);
584 		if (fat->fatbuf != MAP_FAILED) {
585 			fat->is_mmapped = true;
586 			return 1;
587 		}
588 	}
589 
590 	/*
591 	 * Unfortunately, we were unable to mmap().
592 	 *
593 	 * Only use the cache manager when it's necessary, that is,
594 	 * when the FAT is sufficiently large; in that case, only
595 	 * read in the first 4 MiB of FAT into memory, and split the
596 	 * buffer into chunks and insert to the LRU queue to populate
597 	 * the cache with data.
598 	 */
599 	if (boot->ClustMask == CLUST32_MASK &&
600 	    fat->fatsize >= fat32_cache_size) {
601 		readsize = fat32_cache_size;
602 		fat->use_cache = true;
603 
604 		fat->fat32_offset = boot->bpbResSectors * boot->bpbBytesPerSec;
605 		fat->fat32_lastaddr = fat->fatsize & ~(fat32_cache_chunk_size);
606 	} else {
607 		readsize = fat->fatsize;
608 	}
609 	fat->fatbuf = malloc(readsize);
610 	if (fat->fatbuf == NULL) {
611 		perr("No space for FAT (%zu)", readsize);
612 		return 0;
613 	}
614 
615 	if (lseek(fd, off, SEEK_SET) != off) {
616 		perr("Unable to read FAT");
617 		goto err;
618 	}
619 	if ((size_t)read(fd, fat->fatbuf, readsize) != readsize) {
620 		perr("Unable to read FAT");
621 		goto err;
622 	}
623 
624 	/*
625 	 * When cache is used, split the buffer into chunks, and
626 	 * connect the buffer into the cache.
627 	 */
628 	if (fat->use_cache) {
629 		TAILQ_INIT(&fat->fat32_cache_head);
630 		entry = calloc(fat32_cache_entries, sizeof(*entry));
631 		if (entry == NULL) {
632 			perr("No space for FAT cache (%zu of %zu)",
633 			    fat32_cache_entries, sizeof(entry));
634 			goto err;
635 		}
636 		for (i = 0; i < fat32_cache_entries; i++) {
637 			entry[i].addr = fat32_cache_chunk_size * i;
638 			entry[i].chunk = &fat->fatbuf[entry[i].addr];
639 			TAILQ_INSERT_TAIL(&fat->fat32_cache_head,
640 			    &entry[i], entries);
641 		}
642 		fat->fat32_cache_allentries = entry;
643 	}
644 
645 	return 1;
646 
647 err:
648 	free(fat->fatbuf);
649 	fat->fatbuf = NULL;
650 	return 0;
651 }
652 
653 static void
654 releasefat(struct fat_descriptor *fat)
655 {
656 	if (fat->is_mmapped) {
657 		munmap(fat->fatbuf, fat->fatsize);
658 	} else {
659 		if (fat->use_cache) {
660 			free(fat->fat32_cache_allentries);
661 			fat->fat32_cache_allentries = NULL;
662 		}
663 		free(fat->fatbuf);
664 	}
665 	fat->fatbuf = NULL;
666 	bitmap_dtor(&fat->headbitmap);
667 }
668 
669 /*
670  * Read or map a FAT and populate head bitmap
671  */
672 int
673 readfat(int fs, struct bootblock *boot, struct fat_descriptor **fp)
674 {
675 	struct fat_descriptor *fat;
676 	u_char *buffer, *p;
677 	cl_t cl, nextcl;
678 	int ret = FSOK;
679 
680 	boot->NumFree = boot->NumBad = 0;
681 
682 	fat = calloc(1, sizeof(struct fat_descriptor));
683 	if (fat == NULL) {
684 		perr("No space for FAT descriptor");
685 		return FSFATAL;
686 	}
687 
688 	fat->fd = fs;
689 	fat->boot = boot;
690 
691 	if (!_readfat(fat)) {
692 		free(fat);
693 		return FSFATAL;
694 	}
695 	buffer = fat->fatbuf;
696 
697 	/* Populate accessors */
698 	switch(boot->ClustMask) {
699 	case CLUST12_MASK:
700 		fat->get = fat_get_fat12_next;
701 		fat->set = fat_set_fat12_next;
702 		break;
703 	case CLUST16_MASK:
704 		fat->get = fat_get_fat16_next;
705 		fat->set = fat_set_fat16_next;
706 		break;
707 	case CLUST32_MASK:
708 		if (fat->is_mmapped || !fat->use_cache) {
709 			fat->get = fat_get_fat32_next;
710 			fat->set = fat_set_fat32_next;
711 		} else {
712 			fat->get = fat_get_fat32_cached_next;
713 			fat->set = fat_set_fat32_cached_next;
714 		}
715 		break;
716 	default:
717 		pfatal("Invalid ClustMask: %d", boot->ClustMask);
718 		releasefat(fat);
719 		free(fat);
720 		return FSFATAL;
721 	}
722 
723 	if (bitmap_ctor(&fat->headbitmap, boot->NumClusters,
724 	    true) != FSOK) {
725 		perr("No space for head bitmap for FAT clusters (%zu)",
726 		    (size_t)boot->NumClusters);
727 		releasefat(fat);
728 		free(fat);
729 		return FSFATAL;
730 	}
731 
732 	if (buffer[0] != boot->bpbMedia
733 	    || buffer[1] != 0xff || buffer[2] != 0xff
734 	    || (boot->ClustMask == CLUST16_MASK && buffer[3] != 0xff)
735 	    || (boot->ClustMask == CLUST32_MASK
736 		&& ((buffer[3]&0x0f) != 0x0f
737 		    || buffer[4] != 0xff || buffer[5] != 0xff
738 		    || buffer[6] != 0xff || (buffer[7]&0x0f) != 0x0f))) {
739 
740 		/* Windows 95 OSR2 (and possibly any later) changes
741 		 * the FAT signature to 0xXXffff7f for FAT16 and to
742 		 * 0xXXffff0fffffff07 for FAT32 upon boot, to know that the
743 		 * file system is dirty if it doesn't reboot cleanly.
744 		 * Check this special condition before errorring out.
745 		 */
746 		if (buffer[0] == boot->bpbMedia && buffer[1] == 0xff
747 		    && buffer[2] == 0xff
748 		    && ((boot->ClustMask == CLUST16_MASK && buffer[3] == 0x7f)
749 			|| (boot->ClustMask == CLUST32_MASK
750 			    && buffer[3] == 0x0f && buffer[4] == 0xff
751 			    && buffer[5] == 0xff && buffer[6] == 0xff
752 			    && buffer[7] == 0x07)))
753 			ret |= FSDIRTY;
754 		else {
755 			/* just some odd byte sequence in FAT */
756 			switch (boot->ClustMask) {
757 			case CLUST32_MASK:
758 				pwarn("%s (%02x%02x%02x%02x%02x%02x%02x%02x)\n",
759 				      "FAT starts with odd byte sequence",
760 				      buffer[0], buffer[1], buffer[2], buffer[3],
761 				      buffer[4], buffer[5], buffer[6], buffer[7]);
762 				break;
763 			case CLUST16_MASK:
764 				pwarn("%s (%02x%02x%02x%02x)\n",
765 				    "FAT starts with odd byte sequence",
766 				    buffer[0], buffer[1], buffer[2], buffer[3]);
767 				break;
768 			default:
769 				pwarn("%s (%02x%02x%02x)\n",
770 				    "FAT starts with odd byte sequence",
771 				    buffer[0], buffer[1], buffer[2]);
772 				break;
773 			}
774 
775 			if (ask(1, "Correct")) {
776 				ret |= FSFATMOD;
777 				p = buffer;
778 
779 				*p++ = (u_char)boot->bpbMedia;
780 				*p++ = 0xff;
781 				*p++ = 0xff;
782 				switch (boot->ClustMask) {
783 				case CLUST16_MASK:
784 					*p++ = 0xff;
785 					break;
786 				case CLUST32_MASK:
787 					*p++ = 0x0f;
788 					*p++ = 0xff;
789 					*p++ = 0xff;
790 					*p++ = 0xff;
791 					*p++ = 0x0f;
792 					break;
793 				default:
794 					break;
795 				}
796 			}
797 		}
798 	}
799 
800 	/*
801 	 * Traverse the FAT table and populate head map.  Initially, we
802 	 * consider all clusters as possible head cluster (beginning of
803 	 * a file or directory), and traverse the whole allocation table
804 	 * by marking every non-head nodes as such (detailed below) and
805 	 * fix obvious issues while we walk.
806 	 *
807 	 * For each "next" cluster, the possible values are:
808 	 *
809 	 * a) CLUST_FREE or CLUST_BAD.  The *current* cluster can't be a
810 	 *    head node.
811 	 * b) An out-of-range value. The only fix would be to truncate at
812 	 *    the cluster.
813 	 * c) A valid cluster.  It means that cluster (nextcl) is not a
814 	 *    head cluster.  Note that during the scan, every cluster is
815 	 *    expected to be seen for at most once, and when we saw them
816 	 *    twice, it means a cross-linked chain which should be
817 	 *    truncated at the current cluster.
818 	 *
819 	 * After scan, the remaining set bits indicates all possible
820 	 * head nodes, because they were never claimed by any other
821 	 * node as the next node, but we do not know if these chains
822 	 * would end with a valid EOF marker.  We will check that in
823 	 * checkchain() at a later time when checking directories,
824 	 * where these head nodes would be marked as non-head.
825 	 *
826 	 * In the final pass, all head nodes should be cleared, and if
827 	 * there is still head nodes, these would be leaders of lost
828 	 * chain.
829 	 */
830 	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++) {
831 		nextcl = fat_get_cl_next(fat, cl);
832 
833 		/* Check if the next cluster number is valid */
834 		if (nextcl == CLUST_FREE) {
835 			/* Save a hint for next free cluster */
836 			if (boot->FSNext == 0) {
837 				boot->FSNext = cl;
838 			}
839 			if (fat_is_cl_head(fat, cl)) {
840 				fat_clear_cl_head(fat, cl);
841 			}
842 			boot->NumFree++;
843 		} else if (nextcl == CLUST_BAD) {
844 			if (fat_is_cl_head(fat, cl)) {
845 				fat_clear_cl_head(fat, cl);
846 			}
847 			boot->NumBad++;
848 		} else if (!valid_cl(fat, nextcl) && nextcl < CLUST_RSRVD) {
849 			pwarn("Cluster %u continues with out of range "
850 			    "cluster number %u\n",
851 			    cl,
852 			    nextcl & boot->ClustMask);
853 			if (ask(0, "Truncate")) {
854 				ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
855 				ret |= FSFATMOD;
856 			}
857 		} else if (valid_cl(fat, nextcl)) {
858 			if (fat_is_cl_head(fat, nextcl)) {
859 				fat_clear_cl_head(fat, nextcl);
860 			} else {
861 				pwarn("Cluster %u crossed another chain at %u\n",
862 				    cl, nextcl);
863 				if (ask(0, "Truncate")) {
864 					ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
865 					ret |= FSFATMOD;
866 				}
867 			}
868 		}
869 
870 	}
871 
872 	if (ret & FSFATAL) {
873 		releasefat(fat);
874 		free(fat);
875 		*fp = NULL;
876 	} else
877 		*fp = fat;
878 	return ret;
879 }
880 
881 /*
882  * Get type of reserved cluster
883  */
884 const char *
885 rsrvdcltype(cl_t cl)
886 {
887 	if (cl == CLUST_FREE)
888 		return "free";
889 	if (cl < CLUST_BAD)
890 		return "reserved";
891 	if (cl > CLUST_BAD)
892 		return "as EOF";
893 	return "bad";
894 }
895 
896 /*
897  * Examine a cluster chain for errors and count its size.
898  */
899 int
900 checkchain(struct fat_descriptor *fat, cl_t head, size_t *chainsize)
901 {
902 	cl_t prev_cl, current_cl, next_cl;
903 	const char *op;
904 
905 	/*
906 	 * We expect that the caller to give us a real, unvisited 'head'
907 	 * cluster, and it must be a valid cluster.  While scanning the
908 	 * FAT table, we already excluded all clusters that was claimed
909 	 * as a "next" cluster.  Assert all the three conditions.
910 	 */
911 	assert(valid_cl(fat, head));
912 	assert(fat_is_cl_head(fat, head));
913 
914 	/*
915 	 * Immediately mark the 'head' cluster that we are about to visit.
916 	 */
917 	fat_clear_cl_head(fat, head);
918 
919 	/*
920 	 * The allocation of a non-zero sized file or directory is
921 	 * represented as a singly linked list, and the tail node
922 	 * would be the EOF marker (>=CLUST_EOFS).
923 	 *
924 	 * With a valid head node at hand, we expect all subsequent
925 	 * cluster to be either a not yet seen and valid cluster (we
926 	 * would continue counting), or the EOF marker (we conclude
927 	 * the scan of this chain).
928 	 *
929 	 * For all other cases, the chain is invalid, and the only
930 	 * viable fix would be to truncate at the current node (mark
931 	 * it as EOF) when the next node violates that.
932 	 */
933 	*chainsize = 0;
934 	prev_cl = current_cl = head;
935 	for (next_cl = fat_get_cl_next(fat, current_cl);
936 	    valid_cl(fat, next_cl);
937 	    prev_cl = current_cl, current_cl = next_cl, next_cl = fat_get_cl_next(fat, current_cl))
938 		(*chainsize)++;
939 
940 	/* A natural end */
941 	if (next_cl >= CLUST_EOFS) {
942 		(*chainsize)++;
943 		return FSOK;
944 	}
945 
946 	/*
947 	 * The chain ended with an out-of-range cluster number.
948 	 *
949 	 * If the current node is e.g. CLUST_FREE, CLUST_BAD, etc.,
950 	 * it should not be present in a chain and we has to truncate
951 	 * at the previous node.
952 	 *
953 	 * If the current cluster points to an invalid cluster, the
954 	 * current cluster might have useful data and we truncate at
955 	 * the current cluster instead.
956 	 */
957 	if (next_cl == CLUST_FREE || next_cl >= CLUST_RSRVD) {
958 		pwarn("Cluster chain starting at %u ends with cluster marked %s\n",
959 		    head, rsrvdcltype(next_cl));
960 		current_cl = prev_cl;
961 	} else {
962 		pwarn("Cluster chain starting at %u ends with cluster out of range (%u)\n",
963 		    head,
964 		    next_cl & boot_of_(fat)->ClustMask);
965 		(*chainsize)++;
966 	}
967 
968 	if (*chainsize > 0) {
969 		op = "Truncate";
970 		next_cl = CLUST_EOF;
971 	} else {
972 		op = "Clear";
973 		next_cl = CLUST_FREE;
974 	}
975 	if (ask(0, "%s", op)) {
976 		return (fat_set_cl_next(fat, current_cl, next_cl) | FSFATMOD);
977 	} else {
978 		return (FSERROR);
979 	}
980 }
981 
982 /*
983  * Clear cluster chain from head.
984  */
985 void
986 clearchain(struct fat_descriptor *fat, cl_t head)
987 {
988 	cl_t current_cl, next_cl;
989 	struct bootblock *boot = boot_of_(fat);
990 
991 	current_cl = head;
992 
993 	while (valid_cl(fat, current_cl)) {
994 		next_cl = fat_get_cl_next(fat, current_cl);
995 		(void)fat_set_cl_next(fat, current_cl, CLUST_FREE);
996 		boot->NumFree++;
997 		current_cl = next_cl;
998 	}
999 }
1000 
1001 /*
1002  * Overwrite the n-th FAT with FAT0
1003  */
1004 static int
1005 copyfat(struct fat_descriptor *fat, int n)
1006 {
1007 	size_t	rwsize, tailsize, blobs, i;
1008 	off_t	dst_off, src_off;
1009 	struct bootblock *boot;
1010 	int ret, fd;
1011 
1012 	ret = FSOK;
1013 	fd = fd_of_(fat);
1014 	boot = boot_of_(fat);
1015 
1016 	blobs = howmany(fat->fatsize, fat32_cache_size);
1017 	tailsize = fat->fatsize % fat32_cache_size;
1018 	if (tailsize == 0) {
1019 		tailsize = fat32_cache_size;
1020 	}
1021 	rwsize = fat32_cache_size;
1022 
1023 	src_off = fat->fat32_offset;
1024 	dst_off = boot->bpbResSectors + n * boot->FATsecs;
1025 	dst_off *= boot->bpbBytesPerSec;
1026 
1027 	for (i = 0; i < blobs;
1028 	    i++, src_off += fat32_cache_size, dst_off += fat32_cache_size) {
1029 		if (i == blobs - 1) {
1030 			rwsize = tailsize;
1031 		}
1032 		if ((lseek(fd, src_off, SEEK_SET) != src_off ||
1033 		    (size_t)read(fd, fat->fatbuf, rwsize) != rwsize) &&
1034 		    ret == FSOK) {
1035 			perr("Unable to read FAT0");
1036 			ret = FSFATAL;
1037 			continue;
1038 		}
1039 		if ((lseek(fd, dst_off, SEEK_SET) != dst_off ||
1040 			(size_t)write(fd, fat->fatbuf, rwsize) != rwsize) &&
1041 			ret == FSOK) {
1042 			perr("Unable to write FAT %d", n);
1043 			ret = FSERROR;
1044 		}
1045 	}
1046 	return (ret);
1047 }
1048 
1049 /*
1050  * Write out FAT
1051  */
1052 int
1053 writefat(struct fat_descriptor *fat)
1054 {
1055 	u_int i;
1056 	size_t writesz;
1057 	off_t dst_base;
1058 	int ret = FSOK, fd;
1059 	struct bootblock *boot;
1060 	struct fat32_cache_entry *entry;
1061 
1062 	boot = boot_of_(fat);
1063 	fd = fd_of_(fat);
1064 
1065 	if (fat->use_cache) {
1066 		/*
1067 		 * Attempt to flush all in-flight cache, and bail out
1068 		 * if we encountered an error (but only emit error
1069 		 * message once).  Stop proceeding with copyfat()
1070 		 * if any flush failed.
1071 		 */
1072 		TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
1073 			if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
1074 				if (ret == FSOK) {
1075 					perr("Unable to write FAT");
1076 					ret = FSFATAL;
1077 				}
1078 			}
1079 		}
1080 		if (ret != FSOK)
1081 			return (ret);
1082 
1083 		/* Update backup copies of FAT, error is not fatal */
1084 		for (i = 1; i < boot->bpbFATs; i++) {
1085 			if (copyfat(fat, i) != FSOK)
1086 				ret = FSERROR;
1087 		}
1088 	} else {
1089 		writesz = fat->fatsize;
1090 
1091 		for (i = fat->is_mmapped ? 1 : 0; i < boot->bpbFATs; i++) {
1092 			dst_base = boot->bpbResSectors + i * boot->FATsecs;
1093 			dst_base *= boot->bpbBytesPerSec;
1094 			if ((lseek(fd, dst_base, SEEK_SET) != dst_base ||
1095 			    (size_t)write(fd, fat->fatbuf, writesz) != writesz) &&
1096 			    ret == FSOK) {
1097 				perr("Unable to write FAT %d", i);
1098 				ret = ((i == 0) ? FSFATAL : FSERROR);
1099 			}
1100 		}
1101 	}
1102 
1103 	return ret;
1104 }
1105 
1106 /*
1107  * Check a complete in-memory FAT for lost cluster chains
1108  */
1109 int
1110 checklost(struct fat_descriptor *fat)
1111 {
1112 	cl_t head;
1113 	int mod = FSOK;
1114 	int dosfs, ret;
1115 	size_t chains, chainlength;
1116 	struct bootblock *boot;
1117 
1118 	dosfs = fd_of_(fat);
1119 	boot = boot_of_(fat);
1120 
1121 	/*
1122 	 * At this point, we have already traversed all directories.
1123 	 * All remaining chain heads in the bitmap are heads of lost
1124 	 * chains.
1125 	 */
1126 	chains = fat_get_head_count(fat);
1127 	for (head = CLUST_FIRST;
1128 	    chains > 0 && head < boot->NumClusters;
1129 	    ) {
1130 		/*
1131 		 * We expect the bitmap to be very sparse, so skip if
1132 		 * the range is full of 0's
1133 		 */
1134 		if (head % LONG_BIT == 0 &&
1135 		    !fat_is_cl_head_in_range(fat, head)) {
1136 			head += LONG_BIT;
1137 			continue;
1138 		}
1139 		if (fat_is_cl_head(fat, head)) {
1140 			ret = checkchain(fat, head, &chainlength);
1141 			if (ret != FSERROR && chainlength > 0) {
1142 				pwarn("Lost cluster chain at cluster %u\n"
1143 				    "%zd Cluster(s) lost\n",
1144 				    head, chainlength);
1145 				mod |= ret = reconnect(fat, head,
1146 				    chainlength);
1147 			}
1148 			if (mod & FSFATAL)
1149 				break;
1150 			if (ret == FSERROR && ask(0, "Clear")) {
1151 				clearchain(fat, head);
1152 				mod |= FSFATMOD;
1153 			}
1154 			chains--;
1155 		}
1156 		head++;
1157 	}
1158 
1159 	finishlf();
1160 
1161 	if (boot->bpbFSInfo) {
1162 		ret = 0;
1163 		if (boot->FSFree != 0xffffffffU &&
1164 		    boot->FSFree != boot->NumFree) {
1165 			pwarn("Free space in FSInfo block (%u) not correct (%u)\n",
1166 			      boot->FSFree, boot->NumFree);
1167 			if (ask(1, "Fix")) {
1168 				boot->FSFree = boot->NumFree;
1169 				ret = 1;
1170 			}
1171 		}
1172 		if (boot->FSNext != 0xffffffffU &&
1173 		    (boot->FSNext >= boot->NumClusters ||
1174 		    (boot->NumFree && fat_get_cl_next(fat, boot->FSNext) != CLUST_FREE))) {
1175 			pwarn("Next free cluster in FSInfo block (%u) %s\n",
1176 			      boot->FSNext,
1177 			      (boot->FSNext >= boot->NumClusters) ? "invalid" : "not free");
1178 			if (ask(1, "Fix"))
1179 				for (head = CLUST_FIRST; head < boot->NumClusters; head++)
1180 					if (fat_get_cl_next(fat, head) == CLUST_FREE) {
1181 						boot->FSNext = head;
1182 						ret = 1;
1183 						break;
1184 					}
1185 		}
1186 		if (ret)
1187 			mod |= writefsinfo(dosfs, boot);
1188 	}
1189 
1190 	return mod;
1191 }
1192