xref: /reactos/sdk/lib/fslib/vfatlib/check/lfn.c (revision 8786e12d)
1 /* lfn.c - Functions for handling VFAT long filenames
2 
3    Copyright (C) 1998 Roman Hodek <Roman.Hodek@informatik.uni-erlangen.de>
4    Copyright (C) 2008-2014 Daniel Baumann <mail@daniel-baumann.ch>
5    Copyright (C) 2015 Andreas Bombe <aeb@debian.org>
6 
7    This program is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program. If not, see <http://www.gnu.org/licenses/>.
19 
20    The complete text of the GNU General Public License
21    can be found in /usr/share/common-licenses/GPL-3 file.
22 */
23 
24 #include "vfatlib.h"
25 
26 #define NDEBUG
27 #include <debug.h>
28 
29 typedef struct {
30     uint8_t id;			/* sequence number for slot */
31     uint8_t name0_4[10];	/* first 5 characters in name */
32     uint8_t attr;		/* attribute byte */
33     uint8_t reserved;		/* always 0 */
34     uint8_t alias_checksum;	/* checksum for 8.3 alias */
35     uint8_t name5_10[12];	/* 6 more characters in name */
36     uint16_t start;		/* starting cluster number, 0 in long slots */
37     uint8_t name11_12[4];	/* last 2 characters in name */
38 } LFN_ENT;
39 
40 #define LFN_ID_START	0x40
41 #define LFN_ID_SLOTMASK	0x1f
42 
43 #define CHARS_PER_LFN	13
44 
45 /* These modul-global vars represent the state of the LFN parser */
46 unsigned char *lfn_unicode = NULL;
47 unsigned char lfn_checksum;
48 int lfn_slot = -1;
49 off_t *lfn_offsets = NULL;
50 int lfn_parts = 0;
51 
52 static unsigned char fat_uni2esc[64] = {
53     '0', '1', '2', '3', '4', '5', '6', '7',
54     '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
55     'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
56     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
57     'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
58     'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
59     'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
60     'u', 'v', 'w', 'x', 'y', 'z', '+', '-'
61 };
62 
63 /* This defines which unicode chars are directly convertable to ISO-8859-1 */
64 #define UNICODE_CONVERTABLE(cl,ch)	(ch == 0 && (cl < 0x80 || cl >= 0xa0))
65 
66 /* for maxlen param */
67 #define UNTIL_0		INT_MAX
68 
69 #ifdef __REACTOS__
70 static void copy_lfn_part(unsigned char *dst, LFN_ENT * lfn);
71 static char *cnv_unicode(const unsigned char *uni, int maxlen, int use_q);
72 #endif
73 
74 /* Convert name part in 'lfn' from unicode to ASCII */
75 #ifndef __REACTOS__
76 #define CNV_THIS_PART(lfn)				\
77     ({							\
78 	unsigned char __part_uni[CHARS_PER_LFN*2];		\
79 	copy_lfn_part( __part_uni, lfn );		\
80 	cnv_unicode( __part_uni, CHARS_PER_LFN, 0 );	\
81     })
82 #else
83 static __inline char* CNV_THIS_PART(LFN_ENT * lfn)
84 {
85     unsigned char __part_uni[CHARS_PER_LFN*2];
86     copy_lfn_part(__part_uni, lfn);
87     return cnv_unicode(__part_uni, CHARS_PER_LFN, 0);
88 }
89 #endif
90 
91 /* Convert name parts collected so far (from previous slots) from unicode to
92  * ASCII */
93 #define CNV_PARTS_SO_FAR()					\
94 	(cnv_unicode( lfn_unicode+(lfn_slot*CHARS_PER_LFN*2),	\
95 		      lfn_parts*CHARS_PER_LFN, 0 ))
96 
97 #define BYTES_TO_WCHAR(cl,ch) ((wchar_t)((unsigned)(cl) + ((unsigned)(ch) << 8)))
98 static size_t mbslen(wchar_t x)
99 {
100     wchar_t wstr[] = { x, 0 };
101     return wcstombs(NULL, wstr, 0);
102 }
103 
104 static size_t wctombs(char *dest, wchar_t x)
105 {
106     wchar_t wstr[] = { x, 0 };
107     size_t size = wcstombs(NULL, wstr, 0);
108     if (size != (size_t) - 1)
109 	size = wcstombs(dest, wstr, size + 1);
110     return size;
111 }
112 
113 /* This function converts an unicode string to a normal ASCII string, assuming
114  * ISO-8859-1 charset. Characters not in 8859-1 are converted to the same
115  * escape notation as used by the kernel, i.e. the uuencode-like ":xxx" */
116 static char *cnv_unicode(const unsigned char *uni, int maxlen, int use_q)
117 {
118     const unsigned char *up;
119     unsigned char *out, *cp;
120     int len, val;
121     size_t x;
122 
123     for (len = 0, up = uni; (up - uni) / 2 < maxlen && (up[0] || up[1]);
124 	 up += 2) {
125 	if ((x = mbslen(BYTES_TO_WCHAR(up[0], up[1]))) != (size_t) - 1)
126 	    len += x;
127 	else if (UNICODE_CONVERTABLE(up[0], up[1]))
128 	    ++len;
129 	else
130 	    len += 4;
131     }
132     cp = out = use_q ? qalloc(&mem_queue, len + 1) : alloc(len + 1);
133 
134     for (up = uni; (up - uni) / 2 < maxlen && (up[0] || up[1]); up += 2) {
135 	if ((x =
136 	     wctombs((char *)cp, BYTES_TO_WCHAR(up[0], up[1]))) != (size_t) - 1)
137 	    cp += x;
138 	else if (UNICODE_CONVERTABLE(up[0], up[1]))
139 	    *cp++ = up[0];
140 	else {
141 	    /* here the same escape notation is used as in the Linux kernel */
142 	    *cp++ = ':';
143 	    val = (up[1] << 8) + up[0];
144 	    cp[2] = fat_uni2esc[val & 0x3f];
145 	    val >>= 6;
146 	    cp[1] = fat_uni2esc[val & 0x3f];
147 	    val >>= 6;
148 	    cp[0] = fat_uni2esc[val & 0x3f];
149 	    cp += 3;
150 	}
151     }
152     *cp = 0;
153 
154     return (char *)out;
155 }
156 
157 static void copy_lfn_part(unsigned char *dst, LFN_ENT * lfn)
158 {
159     memcpy(dst, lfn->name0_4, 10);
160     memcpy(dst + 10, lfn->name5_10, 12);
161     memcpy(dst + 22, lfn->name11_12, 4);
162 }
163 
164 static void clear_lfn_slots(int start, int end)
165 {
166     int i;
167     LFN_ENT empty;
168 
169     /* New dir entry is zeroed except first byte, which is set to 0xe5.
170      * This is to avoid that some FAT-reading OSes (not Linux! ;) stop reading
171      * a directory at the first zero entry...
172      */
173     memset(&empty, 0, sizeof(empty));
174     empty.id = DELETED_FLAG;
175 
176     for (i = start; i <= end; ++i) {
177 	fs_write(lfn_offsets[i], sizeof(LFN_ENT), &empty);
178     }
179 }
180 
181 void lfn_fix_checksum(off_t from, off_t to, const char *short_name)
182 {
183     int i;
184     uint8_t sum;
185     for (sum = 0, i = 0; i < 11; i++)
186 	sum = (((sum & 1) << 7) | ((sum & 0xfe) >> 1)) + short_name[i];
187 
188     for (; from < to; from += sizeof(LFN_ENT)) {
189 	fs_write(from + offsetof(LFN_ENT, alias_checksum), sizeof(sum), &sum);
190     }
191 }
192 
193 void lfn_reset(void)
194 {
195     if (lfn_unicode)
196 	free(lfn_unicode);
197     lfn_unicode = NULL;
198     if (lfn_offsets)
199 	free(lfn_offsets);
200     lfn_offsets = NULL;
201     lfn_slot = -1;
202 }
203 
204 /* This function is only called with de->attr == VFAT_LN_ATTR. It stores part
205  * of the long name. */
206 void lfn_add_slot(DIR_ENT * de, off_t dir_offset)
207 {
208     LFN_ENT *lfn = (LFN_ENT *) de;
209     int slot = lfn->id & LFN_ID_SLOTMASK;
210     unsigned offset;
211 
212     if (lfn_slot == 0)
213 	lfn_check_orphaned();
214 
215     if (de->attr != VFAT_LN_ATTR)
216 	die("lfn_add_slot called with non-LFN directory entry");
217 
218     if (lfn->id & LFN_ID_START && slot != 0) {
219 	if (lfn_slot != -1) {
220 	    int can_clear = 0;
221 	    /* There is already a LFN "in progess", so it is an error that a
222 	     * new start entry is here. */
223 	    /* Causes: 1) if slot# == expected: start bit set mysteriously, 2)
224 	     *         old LFN overwritten by new one */
225 	    /* Fixes: 1) delete previous LFN 2) if slot# == expected and
226 	     *        checksum ok: clear start bit */
227 	    /* XXX: Should delay that until next LFN known (then can better
228 	     * display the name) */
229 	    printf("A new long file name starts within an old one.\n");
230 	    if (slot == lfn_slot && lfn->alias_checksum == lfn_checksum) {
231 		char *part1 = CNV_THIS_PART(lfn);
232 		char *part2 = CNV_PARTS_SO_FAR();
233 		printf("  It could be that the LFN start bit is wrong here\n"
234 		       "  if \"%s\" seems to match \"%s\".\n", part1, part2);
235 		free(part1);
236 		free(part2);
237 		can_clear = 1;
238 	    }
239 	    if (interactive) {
240 		printf("1: Delete previous LFN\n2: Leave it as it is.\n");
241 		if (can_clear)
242 		    printf("3: Clear start bit and concatenate LFNs\n");
243 	    } else
244 		printf("  Not auto-correcting this.\n");
245 	    if (interactive) {
246 		switch (get_key(can_clear ? "123" : "12", "?")) {
247 		case '1':
248 		    clear_lfn_slots(0, lfn_parts - 1);
249 		    lfn_reset();
250 		    break;
251 		case '2':
252 		    break;
253 		case '3':
254 		    lfn->id &= ~LFN_ID_START;
255 		    fs_write(dir_offset + offsetof(LFN_ENT, id),
256 			     sizeof(lfn->id), &lfn->id);
257 		    break;
258 		}
259 	    }
260 	}
261 	lfn_slot = slot;
262 	lfn_checksum = lfn->alias_checksum;
263 	lfn_unicode = alloc((lfn_slot * CHARS_PER_LFN + 1) * 2);
264 	lfn_offsets = alloc(lfn_slot * sizeof(off_t));
265 	lfn_parts = 0;
266     } else if (lfn_slot == -1 && slot != 0) {
267 	/* No LFN in progress, but slot found; start bit missing */
268 	/* Causes: 1) start bit got lost, 2) Previous slot with start bit got
269 	 *         lost */
270 	/* Fixes: 1) delete LFN, 2) set start bit */
271 	char *part = CNV_THIS_PART(lfn);
272 	printf("Long filename fragment \"%s\" found outside a LFN "
273 	       "sequence.\n  (Maybe the start bit is missing on the "
274 	       "last fragment)\n", part);
275 	free(part);
276 	if (interactive) {
277 	    printf("1: Delete fragment\n2: Leave it as it is.\n"
278 		   "3: Set start bit\n");
279 	} else
280 	    printf("  Not auto-correcting this.\n");
281 	switch (interactive ? get_key("123", "?") : '2') {
282 	case '1':
283 	    if (!lfn_offsets)
284 		lfn_offsets = alloc(sizeof(off_t));
285 	    lfn_offsets[0] = dir_offset;
286 	    clear_lfn_slots(0, 0);
287 	    lfn_reset();
288 	    return;
289 	case '2':
290 	    lfn_reset();
291 	    return;
292 	case '3':
293 	    lfn->id |= LFN_ID_START;
294 	    fs_write(dir_offset + offsetof(LFN_ENT, id),
295 		     sizeof(lfn->id), &lfn->id);
296 	    lfn_slot = slot;
297 	    lfn_checksum = lfn->alias_checksum;
298 	    lfn_unicode = alloc((lfn_slot * CHARS_PER_LFN + 1) * 2);
299 	    lfn_offsets = alloc(lfn_slot * sizeof(off_t));
300 	    lfn_parts = 0;
301 	    break;
302 	}
303     } else if (slot != lfn_slot) {
304 	/* wrong sequence number */
305 	/* Causes: 1) seq-no destroyed */
306 	/* Fixes: 1) delete LFN, 2) fix number (maybe only if following parts
307 	 *        are ok?, maybe only if checksum is ok?) (Attention: space
308 	 *        for name was allocated before!) */
309 	int can_fix = 0;
310 	printf("Unexpected long filename sequence number "
311 	       "(%d vs. expected %d).\n", slot, lfn_slot);
312 	if (lfn->alias_checksum == lfn_checksum && lfn_slot > 0) {
313 	    char *part1 = CNV_THIS_PART(lfn);
314 	    char *part2 = CNV_PARTS_SO_FAR();
315 	    printf("  It could be that just the number is wrong\n"
316 		   "  if \"%s\" seems to match \"%s\".\n", part1, part2);
317 	    free(part1);
318 	    free(part2);
319 	    can_fix = 1;
320 	}
321 	if (interactive) {
322 	    printf
323 		("1: Delete LFN\n2: Leave it as it is (and ignore LFN so far)\n");
324 	    if (can_fix)
325 		printf("3: Correct sequence number\n");
326 	} else
327 	    printf("  Not auto-correcting this.\n");
328 	switch (interactive ? get_key(can_fix ? "123" : "12", "?") : '2') {
329 	case '1':
330 	    if (!lfn_offsets) {
331 		lfn_offsets = alloc(sizeof(off_t));
332 		lfn_parts = 0;
333 	    }
334 	    lfn_offsets[lfn_parts++] = dir_offset;
335 	    clear_lfn_slots(0, lfn_parts - 1);
336 	    lfn_reset();
337 	    return;
338 	case '2':
339 	    lfn_reset();
340 	    return;
341 	case '3':
342 	    lfn->id = (lfn->id & ~LFN_ID_SLOTMASK) | lfn_slot;
343 	    fs_write(dir_offset + offsetof(LFN_ENT, id),
344 		     sizeof(lfn->id), &lfn->id);
345 	    break;
346 	}
347     }
348 
349     if (lfn->alias_checksum != lfn_checksum) {
350 	/* checksum mismatch */
351 	/* Causes: 1) checksum field here destroyed */
352 	/* Fixes: 1) delete LFN, 2) fix checksum */
353 	printf("Checksum in long filename part wrong "
354 	       "(%02x vs. expected %02x).\n",
355 	       lfn->alias_checksum, lfn_checksum);
356 	if (interactive) {
357 	    printf("1: Delete LFN\n2: Leave it as it is.\n"
358 		   "3: Correct checksum\n");
359 	} else
360 	    printf("  Not auto-correcting this.\n");
361 	if (interactive) {
362 	    switch (get_key("123", "?")) {
363 	    case '1':
364 		lfn_offsets[lfn_parts++] = dir_offset;
365 		clear_lfn_slots(0, lfn_parts - 1);
366 		lfn_reset();
367 		return;
368 	    case '2':
369 		break;
370 	    case '3':
371 		lfn->alias_checksum = lfn_checksum;
372 		fs_write(dir_offset + offsetof(LFN_ENT, alias_checksum),
373 			 sizeof(lfn->alias_checksum), &lfn->alias_checksum);
374 		break;
375 	    }
376 	}
377     }
378 
379     if (lfn_slot != -1) {
380 	lfn_slot--;
381 	offset = lfn_slot * CHARS_PER_LFN * 2;
382 	copy_lfn_part(lfn_unicode + offset, lfn);
383 	if (lfn->id & LFN_ID_START)
384 	    lfn_unicode[offset + 26] = lfn_unicode[offset + 27] = 0;
385 	lfn_offsets[lfn_parts++] = dir_offset;
386     }
387 
388     if (lfn->reserved != 0) {
389 	printf("Reserved field in VFAT long filename slot is not 0 "
390 	       "(but 0x%02x).\n", lfn->reserved);
391 	if (interactive)
392 	    printf("1: Fix.\n2: Leave it.\n");
393 	else
394 	    printf("Auto-setting to 0.\n");
395 	if (!interactive || get_key("12", "?") == '1') {
396 	    lfn->reserved = 0;
397 	    fs_write(dir_offset + offsetof(LFN_ENT, reserved),
398 		     sizeof(lfn->reserved), &lfn->reserved);
399 	}
400     }
401     if (lfn->start != htole16(0)) {
402 	printf("Start cluster field in VFAT long filename slot is not 0 "
403 	       "(but 0x%04x).\n", lfn->start);
404 	if (interactive)
405 	    printf("1: Fix.\n2: Leave it.\n");
406 	else
407 	    printf("Auto-setting to 0.\n");
408 	if (!interactive || get_key("12", "?") == '1') {
409 	    lfn->start = htole16(0);
410 	    fs_write(dir_offset + offsetof(LFN_ENT, start),
411 		     sizeof(lfn->start), &lfn->start);
412 	}
413     }
414 }
415 
416 /* This function is always called when de->attr != VFAT_LN_ATTR is found, to
417  * retrieve the previously constructed LFN. */
418 char *lfn_get(DIR_ENT * de, off_t * lfn_offset)
419 {
420     char *lfn;
421     uint8_t sum;
422     int i;
423 
424     *lfn_offset = 0;
425     if (de->attr == VFAT_LN_ATTR)
426 	die("lfn_get called with LFN directory entry");
427 
428 #if 0
429     if (de->lcase)
430 	printf("lcase=%02x\n", de->lcase);
431 #endif
432 
433     if (lfn_slot == -1)
434 	/* no long name for this file */
435 	return NULL;
436 
437     if (lfn_slot != 0) {
438 	/* The long name isn't finished yet. */
439 	/* Causes: 1) LFN slot overwritten by non-VFAT aware tool */
440 	/* Fixes: 1) delete LFN 2) move overwriting entry to somewhere else
441 	 * and let user enter missing part of LFN (hard to do :-()
442 	 * 3) renumber entries and truncate name */
443 	char *long_name = CNV_PARTS_SO_FAR();
444 	char *short_name = file_name(de->name);
445 	printf("Unfinished long file name \"%s\".\n"
446 	       "  (Start may have been overwritten by %s)\n",
447 	       long_name, short_name);
448 	free(long_name);
449 	if (interactive) {
450 	    printf("1: Delete LFN\n2: Leave it as it is.\n"
451 		   "3: Fix numbering (truncates long name and attaches "
452 		   "it to short name %s)\n", short_name);
453 	} else
454 	    printf("  Not auto-correcting this.\n");
455 	switch (interactive ? get_key("123", "?") : '2') {
456 	case '1':
457 	    clear_lfn_slots(0, lfn_parts - 1);
458 	    lfn_reset();
459 	    return NULL;
460 	case '2':
461 	    lfn_reset();
462 	    return NULL;
463 	case '3':
464 	    for (i = 0; i < lfn_parts; ++i) {
465 		uint8_t id = (lfn_parts - i) | (i == 0 ? LFN_ID_START : 0);
466 		fs_write(lfn_offsets[i] + offsetof(LFN_ENT, id),
467 			 sizeof(id), &id);
468 	    }
469 	    memmove(lfn_unicode, lfn_unicode + lfn_slot * CHARS_PER_LFN * 2,
470 		    lfn_parts * CHARS_PER_LFN * 2);
471 	    break;
472 	}
473     }
474 
475     for (sum = 0, i = 0; i < MSDOS_NAME; i++)
476 	sum = (((sum & 1) << 7) | ((sum & 0xfe) >> 1)) + de->name[i];
477     if (sum != lfn_checksum) {
478 	/* checksum doesn't match, long name doesn't apply to this alias */
479 	/* Causes: 1) alias renamed */
480 	/* Fixes: 1) Fix checksum in LFN entries */
481 	char *long_name = CNV_PARTS_SO_FAR();
482 	char *short_name = file_name(de->name);
483 	printf("Wrong checksum for long file name \"%s\".\n"
484 	       "  (Short name %s may have changed without updating the long name)\n",
485 	       long_name, short_name);
486 	free(long_name);
487 	if (interactive) {
488 	    printf("1: Delete LFN\n2: Leave it as it is.\n"
489 		   "3: Fix checksum (attaches to short name %s)\n", short_name);
490 	} else
491 	    printf("  Not auto-correcting this.\n");
492 	if (interactive) {
493 	    switch (get_key("123", "?")) {
494 	    case '1':
495 		clear_lfn_slots(0, lfn_parts - 1);
496 		lfn_reset();
497 		return NULL;
498 	    case '2':
499 		lfn_reset();
500 		return NULL;
501 	    case '3':
502 		for (i = 0; i < lfn_parts; ++i) {
503 		    fs_write(lfn_offsets[i] + offsetof(LFN_ENT, alias_checksum),
504 			     sizeof(sum), &sum);
505 		}
506 		break;
507 	    }
508 	}
509     }
510 
511     *lfn_offset = lfn_offsets[0];
512     lfn = cnv_unicode(lfn_unicode, UNTIL_0, 1);
513     lfn_reset();
514     return (lfn);
515 }
516 
517 void lfn_check_orphaned(void)
518 {
519     char *long_name;
520 
521     if (lfn_slot == -1)
522 	return;
523 
524     long_name = CNV_PARTS_SO_FAR();
525     printf("Orphaned long file name part \"%s\"\n", long_name);
526     free(long_name);
527     if (interactive)
528 	printf("1: Delete.\n2: Leave it.\n");
529     else
530 	printf("  Auto-deleting.\n");
531     if (!interactive || get_key("12", "?") == '1') {
532 	clear_lfn_slots(0, lfn_parts - 1);
533     }
534     lfn_reset();
535 }
536