1 /******************************* LICENCE **************************************
2 * Any code in this file may be redistributed or modified under the terms of
3 * the GNU General Public Licence as published by the Free Software
4 * Foundation; version 2 of the licence.
5 ****************************** END LICENCE ***********************************/
6 
7 /******************************************************************************
8 * Author:
9 * Andrew Smith, http://littlesvr.ca/misc/contactandrew.php
10 *
11 * Contributors:
12 * The Samba Project - http://samba.org/
13 * - most of the filename mangling code
14 ******************************************************************************/
15 
16 #include <strings.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <ctype.h>
20 #include <sys/types.h>
21 
22 #include "bk.h"
23 #include "bkInternal.h"
24 #include "bkMangle.h"
25 #include "bkError.h"
26 
27 /* length of aaa in aaa~xxxx.bbb */
28 #define NCHARS_9660_BASE 3
29 
30 /*
31 * note that some unsigned ints in mangling functions are
32 * required to be 32 bits long for the hashing to work
33 * see the samba code for details
34 */
35 
36 /******************************************************************************
37 * charIsValid9660()
38 *
39 * */
charIsValid9660(char theChar)40 bool charIsValid9660(char theChar)
41 {
42     if( (theChar >= '0' && theChar <= '9') ||
43         (theChar >= 'a' && theChar <= 'z') ||
44         (theChar >= 'A' && theChar <= 'Z') ||
45         strchr("._-$~", theChar) )
46     {
47         return true;
48     }
49     else
50         return false;
51 }
52 
53 /******************************************************************************
54 * charIsValidJoliet()
55 *
56 * */
charIsValidJoliet(char theChar)57 bool charIsValidJoliet(char theChar)
58 {
59     /* can be any ascii char between decimal 32 and 126
60     * except '*' (42) '/' (47), ':' (58), ';' (59), '?' (63) and '\' (92) */
61     if(theChar < 32 || theChar > 126 ||
62        theChar == 42 || theChar == 47 || theChar == 58 ||
63        theChar == 59 || theChar == 63 || theChar == 92)
64         return false;
65     else
66         return true;
67 }
68 
69 /*
70    hash a string of the specified length. The string does not need to be
71    null terminated
72 
73    this hash needs to be fast with a low collision rate (what hash doesn't?)
74 */
hashString(const char * str,unsigned int length)75 unsigned hashString(const char *str, unsigned int length)
76 {
77     unsigned value;
78     unsigned i;
79 
80     static const unsigned fnv1Prime = 0x01000193;
81 
82     /* Set the initial value from the key size. */
83     /* fnv1 of the string: idra@samba.org 2002 */
84     value = 0xa6b93095;
85     for (i = 0; i < length; i++)
86     {
87         value *= (unsigned)fnv1Prime;
88         value ^= (unsigned)(str[i]);
89     }
90 
91     /* note that we force it to a 31 bit hash, to keep within the limits
92        of the 36^6 mangle space */
93     return value & ~0x80000000;
94 }
95 
96 /******************************************************************************
97 * mangleDir()
98 * Mangles the filenames from origDir and puts the results into newDir, whcich
99 * it also creates.
100 * filenameTypes is all types required in the end
101 * */
mangleDir(const BkDir * origDir,DirToWrite * newDir,int filenameTypes)102 int mangleDir(const BkDir* origDir, DirToWrite* newDir, int filenameTypes)
103 {
104     int rc;
105     bool haveCollisions;
106     int numTimesTried;
107     int num9660Collisions;
108     const int name9660len = 13;
109     char newName9660[name9660len]; /* for remangling */
110     int numJolietCollisions;
111     char newNameJoliet[NCHARS_FILE_ID_MAX_JOLIET]; /* for remangling */
112 
113     BkFileBase* currentOrigChild;
114     BaseToWrite** currentNewChild;
115 
116     /* for counting collisions */
117     BaseToWrite* currentChild;
118     BaseToWrite* currentChildToCompare;
119 
120     /* MANGLE all names, create new children list */
121     currentOrigChild = origDir->children;
122     currentNewChild = &(newDir->children);
123     while(currentOrigChild != NULL)
124     {
125         if( IS_DIR(currentOrigChild->posixFileMode) )
126         {
127             *currentNewChild = malloc(sizeof(DirToWrite));
128             if(*currentNewChild == NULL)
129                 return BKERROR_OUT_OF_MEMORY;
130 
131             memset(*currentNewChild, 0, sizeof(DirToWrite));
132         }
133         else if( IS_REG_FILE(currentOrigChild->posixFileMode) )
134         {
135             *currentNewChild = malloc(sizeof(FileToWrite));
136             if(*currentNewChild == NULL)
137                 return BKERROR_OUT_OF_MEMORY;
138 
139             memset(*currentNewChild, 0, sizeof(FileToWrite));
140         }
141         else if( IS_SYMLINK(currentOrigChild->posixFileMode) )
142         {
143             *currentNewChild = malloc(sizeof(SymLinkToWrite));
144             if(*currentNewChild == NULL)
145                 return BKERROR_OUT_OF_MEMORY;
146 
147             memset(*currentNewChild, 0, sizeof(SymLinkToWrite));
148         }
149         else
150             return BKERROR_NO_SPECIAL_FILES;
151 
152         if(currentOrigChild->original9660name[0] != '\0')
153             strncpy((*currentNewChild)->name9660, currentOrigChild->original9660name, NBYTES_FILE_ID_MAX_9660);
154         else
155             shortenNameFor9660(currentOrigChild->name, (*currentNewChild)->name9660);
156 
157         if(filenameTypes | FNTYPE_ROCKRIDGE)
158             strncpy((*currentNewChild)->nameRock, currentOrigChild->name, NCHARS_FILE_ID_MAX_STORE);
159         else
160             (*currentNewChild)->nameRock[0] = '\0';
161 
162         if(filenameTypes | FNTYPE_JOLIET)
163             mangleNameForJoliet(currentOrigChild->name, (*currentNewChild)->nameJoliet, false);
164         else
165             (*currentNewChild)->nameJoliet[0] = '\0';
166 
167         (*currentNewChild)->posixFileMode = currentOrigChild->posixFileMode;
168 
169         if( IS_DIR(currentOrigChild->posixFileMode) )
170         {
171             rc = mangleDir(BK_DIR_PTR(currentOrigChild), DIRTW_PTR(*currentNewChild),
172                            filenameTypes);
173             if(rc < 0)
174             {
175                 free(*currentNewChild);
176                 *currentNewChild = NULL;
177                 return rc;
178             }
179         }
180         else if( IS_REG_FILE(currentOrigChild->posixFileMode) )
181         {
182             BkFile* origFile = BK_FILE_PTR(currentOrigChild);
183             FileToWrite* newFile = FILETW_PTR(*currentNewChild);
184 
185             newFile->size = origFile->size;
186 
187             newFile->location = origFile->location;
188 
189             newFile->onImage = origFile->onImage;
190 
191             newFile->offset = origFile->position;
192 
193             if( !origFile->onImage )
194             {
195                 newFile->pathAndName = malloc(strlen(origFile->pathAndName) + 1);
196                 if( newFile->pathAndName == NULL )
197                 {
198                     free(*currentNewChild);
199                     *currentNewChild = NULL;
200                     return BKERROR_OUT_OF_MEMORY;
201                 }
202 
203                 strcpy(newFile->pathAndName, origFile->pathAndName);
204             }
205 
206             newFile->origFile = origFile;
207         }
208         else /* if( IS_SYMLINK(currentOrigChild->posixFileMode) ) */
209         {
210             strncpy(SYMLINKTW_PTR(*currentNewChild)->target,
211                     BK_SYMLINK_PTR(currentOrigChild)->target, NCHARS_SYMLINK_TARGET_MAX);
212         }
213 
214         currentOrigChild = currentOrigChild->next;
215         currentNewChild = &((*currentNewChild)->next);
216     }
217     /* END MANGLE all names, create new children list */
218 
219     haveCollisions = true;
220     numTimesTried = 0;
221     while(haveCollisions && numTimesTried < 50000) /* random big number */
222     {
223         haveCollisions = false;
224 
225         currentChild = newDir->children;
226         while(currentChild != NULL)
227         {
228             num9660Collisions = 0;
229             numJolietCollisions = 0;
230 
231             currentChildToCompare = newDir->children;
232             while(currentChildToCompare != NULL)
233             {
234                 if(strcmp(currentChild->name9660,
235                           currentChildToCompare->name9660) == 0)
236                 {
237                     num9660Collisions++;
238                 }
239 
240                 if(strcmp(currentChild->nameJoliet,
241                           currentChildToCompare->nameJoliet) == 0)
242                 {
243                     numJolietCollisions++;
244                 }
245 
246                 currentChildToCompare = currentChildToCompare->next;
247             }
248 
249             if(num9660Collisions != 1)
250             {
251                 haveCollisions = true;
252 
253                 if( IS_DIR(currentChild->posixFileMode) )
254                     mangleNameFor9660(currentChild->name9660, newName9660, true);
255                 else
256                     mangleNameFor9660(currentChild->name9660, newName9660, false);
257 
258                 strcpy(currentChild->name9660, newName9660);
259             }
260 
261             if(numJolietCollisions != 1)
262             {
263                 haveCollisions = true;
264 
265                 mangleNameForJoliet(currentChild->nameJoliet, newNameJoliet, true);
266 
267                 strcpy(currentChild->nameJoliet, newNameJoliet);
268             }
269 
270 
271             currentChild = currentChild->next;
272         }
273 
274         numTimesTried++;
275     }
276 
277     if(haveCollisions)
278         return BKERROR_MANGLE_TOO_MANY_COL;
279 
280     return 1;
281 }
282 
283 /******************************************************************************
284 * mangleNameFor9660()
285 * Convert a long filename into an ISO9660 acceptable form:
286 * see charIsValid9660(), 8 chars max for directories and 8.3 chars
287 * for files. Extension is kept if it's shorter then 4 chars.
288 * 3 chars from the original name are kept, the rest is filled with ~XXXX where
289 * the XXXX is a random string (but still with valid characters).
290 * */
mangleNameFor9660(const char * origName,char * newName,bool isADir)291 void mangleNameFor9660(const char* origName, char* newName, bool isADir)
292 {
293     char* dot_p;
294     int i;
295     char base[7]; /* max 6 chars */
296     char extension[4]; /* max 3 chars */
297     int extensionLen;
298     unsigned hash;
299     unsigned v;
300     /* these are the characters we use in the 8.3 hash. Must be 36 chars long */
301     static const char* baseChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
302 
303     /* FIND extension */
304     if(isADir)
305     {
306         dot_p = NULL;
307     }
308     else
309     {
310         dot_p = strrchr(origName, '.');
311 
312         if(dot_p)
313         {
314             /* if the extension contains any illegal characters or
315                is too long (> 3) or zero length then we treat it as part
316                of the prefix */
317             for(i = 0; i < 4 && dot_p[i + 1] != '\0'; i++)
318             {
319                 if( !charIsValid9660(dot_p[i + 1]) )
320                 {
321                     dot_p = NULL;
322                     break;
323                 }
324             }
325 
326             if(i == 0 || i == 4 || dot_p == origName)
327                 dot_p = NULL;
328         }
329     }
330     /* END FIND extension */
331 
332     /* GET base */
333     /* the leading characters in the mangled name is taken from
334     *  the first characters of the name, if they are ascii otherwise
335     *  '_' is used */
336     for(i = 0; i < NCHARS_9660_BASE && origName[i] != '\0'; i++)
337     {
338         base[i] = origName[i];
339 
340         if ( !charIsValid9660(origName[i]) )
341             base[i] = '_';
342 
343         base[i] = toupper(base[i]);
344     }
345 
346     /* make sure base doesn't contain part of the extension */
347     if(dot_p != NULL)
348     {
349         if(i > dot_p - origName)
350             i = dot_p - origName;
351     }
352 
353     /* fixed length */
354     while(i < NCHARS_9660_BASE)
355     {
356         base[i] = '_';
357 
358         i++;
359     }
360 
361     base[NCHARS_9660_BASE] = '\0';
362     /* END GET base */
363 
364     /* GET extension */
365     /* the extension of the mangled name is taken from the first 3
366     *  ascii chars after the dot */
367     extensionLen = 0;
368     if(dot_p)
369     {
370         for(i = 1; extensionLen < 3 && dot_p[i] != '\0'; i++)
371         {
372             extension[extensionLen] = toupper(dot_p[i]);
373 
374             extensionLen++;
375         }
376     }
377 
378     extension[extensionLen] = '\0';
379     /* END GET extension */
380 
381     /* find the hash for this prefix */
382     hash = hashString(origName, strlen(origName));
383 
384     /* now form the mangled name. */
385     for(i = 0; i < NCHARS_9660_BASE; i++)
386     {
387         newName[i] = base[i];
388     }
389 
390     newName[NCHARS_9660_BASE] = '~';
391 
392     v = hash;
393     newName[7] = baseChars[v % 36];
394     for(i = 6; i > NCHARS_9660_BASE; i--)
395     {
396         v = v / 36;
397         newName[i] = baseChars[v % 36];
398     }
399 
400     /* add the extension and terminate string */
401     if(extensionLen > 0)
402     {
403         newName[8] = '.';
404 
405         strcpy(newName + 9, extension);
406     }
407     else
408     {
409         newName[8] = '\0';
410     }
411 
412     printf("remangled '%s' -> '%s'\n", origName, newName);
413 }
414 
mangleNameForJoliet(const char * origName,char * newName,bool appendHash)415 void mangleNameForJoliet(const char* origName, char* newName, bool appendHash)
416 {
417     char* dot_p;
418     int i;
419     char base[NCHARS_FILE_ID_MAX_JOLIET]; /* '\0' terminated */
420     char extension[6]; /* max 3 chars */
421     int extensionLen;
422     unsigned hash;
423     unsigned v;
424     char hashStr[5]; /* '\0' terminated */
425     /* these are the characters we use in the 8.3 hash. Must be 36 chars long */
426     static const char* baseChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
427 
428     /* FIND extension candidate */
429     dot_p = strrchr(origName, '.');
430 
431     if(dot_p)
432     {
433         /* if the extension contains any illegal characters or
434            is too long (> 5) or zero length then we treat it as part
435            of the prefix */
436         for(i = 0; i < 6 && dot_p[i + 1] != '\0'; i++)
437         {
438             if( !charIsValidJoliet(dot_p[i + 1]) )
439             {
440                 dot_p = NULL;
441                 break;
442             }
443         }
444 
445         if(i == 0 || i == 6 || dot_p == origName)
446             dot_p = NULL;
447     }
448     /* END FIND extension candidate */
449 
450     /* GET base */
451     /* The leading characters in the mangled name are taken from
452     *  the first characters of the name if they are allowed, otherwise
453     *  '_' is used */
454     for(i = 0; i < NCHARS_FILE_ID_MAX_JOLIET - 1 && origName[i] != '\0'; i++)
455     {
456         base[i] = origName[i];
457 
458         if ( !charIsValidJoliet(origName[i]) )
459             base[i] = '_';
460     }
461 
462     /* make sure base doesn't contain part of the extension */
463     if(dot_p != NULL)
464     {
465         if(i > dot_p - origName)
466             i = dot_p - origName;
467     }
468 
469     base[i] = '\0';
470     /* END GET base */
471 
472     /* GET extension */
473     /* the extension of the mangled name is taken from the first 3
474        ascii chars after the dot */
475     extensionLen = 0;
476     if(dot_p)
477     {
478         for(i = 1; extensionLen < 5 && dot_p[i] != '\0'; i++)
479         {
480             extension[extensionLen] = dot_p[i];
481 
482             extensionLen++;
483         }
484     }
485 
486     extension[extensionLen] = '\0';
487     /* END GET extension */
488 
489     /* FIND the hash for this prefix */
490     hash = hashString(origName, strlen(origName));
491 
492     hashStr[4] = '\0';
493     v = hash;
494     hashStr[3] = baseChars[v % 36];
495     for(i = 2; i >= 0; i--)
496     {
497         v = v / 36;
498         hashStr[i] = baseChars[v % 36];
499     }
500     /* END FIND the hash for this prefix */
501 
502     /* ASSEMBLE name */
503     strcpy(newName, base);
504 
505     if(appendHash)
506     {
507         /* max name len - '~' - hash - '.' - extension */
508         if(strlen(newName) >= NCHARS_FILE_ID_MAX_JOLIET - 1 - 1 - 4 - 1 - 5)
509             newName[NCHARS_FILE_ID_MAX_JOLIET - 1 - 1 - 4 - 1 - 5] = '\0';
510 
511         strcat(newName, "~");
512         strcat(newName, hashStr);
513     }
514     if(extensionLen > 0)
515     {
516         strcat(newName, ".");
517         strcat(newName, extension);
518     }
519     /* END ASSEMBLE name */
520 
521     if(appendHash)
522         printf("joliet mangle '%s' -> '%s'\n", origName, newName);
523 }
524 
525 /******************************************************************************
526 * shortenNameFor9660()
527 * Same as mangleNameFor9660() but without the ~XXXX.
528 * */
shortenNameFor9660(const char * origName,char * newName)529 void shortenNameFor9660(const char* origName, char* newName)
530 {
531     char* dot_p;
532     int i;
533     char base[9]; /* max 9 chars */
534     char extension[4]; /* max 3 chars */
535     int extensionLen;
536 
537     /* FIND extension */
538     /* ISO9660 requires that directories have no dots ('.') but some isolinux
539     * cds have the kernel in a directory with a dot so i need to allow dots in
540     * directories :( */
541     /*if(isADir)
542     {
543         dot_p = NULL;
544     }
545     else
546     {*/
547         dot_p = strrchr(origName, '.');
548 
549         if(dot_p)
550         {
551             /* if the extension contains any illegal characters or
552                is too long (> 3) or zero length then we treat it as part
553                of the prefix */
554             for(i = 0; i < 4 && dot_p[i + 1] != '\0'; i++)
555             {
556                 if( !charIsValid9660(dot_p[i + 1]) )
557                 {
558                     dot_p = NULL;
559                     break;
560                 }
561             }
562 
563             if(i == 0 || i == 4 || dot_p == origName)
564                 dot_p = NULL;
565         }
566     /*}*/
567     /* END FIND extension */
568 
569     /* GET base */
570     /* the leading characters in the mangled name is taken from
571     *  the first characters of the name, if they are allowed otherwise
572     *  '_' is used */
573     for(i = 0; i < 8 && origName[i] != '\0'; i++)
574     {
575         base[i] = origName[i];
576 
577         if ( !charIsValid9660(origName[i]) )
578             base[i] = '_';
579 
580         base[i] = toupper(base[i]);
581     }
582 
583     /* make sure base doesn't contain part of the extension */
584     if(dot_p != NULL)
585     {
586         if(i > dot_p - origName)
587             i = dot_p - origName;
588     }
589 
590     base[i] = '\0';
591     /* END GET base */
592 
593     /* GET extension */
594     /* the extension of the mangled name is taken from the first 3
595        ascii chars after the dot */
596     extensionLen = 0;
597     if(dot_p)
598     {
599         for(i = 1; extensionLen < 3 && dot_p[i] != '\0'; i++)
600         {
601             extension[extensionLen] = toupper(dot_p[i]);
602 
603             extensionLen++;
604         }
605     }
606 
607     extension[extensionLen] = '\0';
608     /* END GET extension */
609 
610     strcpy(newName, base);
611     if(extensionLen > 0)
612     {
613         strcat(newName, ".");
614         strcat(newName, extension);
615     }
616 }
617