1 /* @(#)hash.c 1.30 16/12/13 joerg */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)hash.c 1.30 16/12/13 joerg";
6
7 #endif
8 /*
9 * File hash.c - generate hash tables for iso9660 filesystem.
10 *
11 * Written by Eric Youngdale (1993).
12 *
13 * Copyright 1993 Yggdrasil Computing, Incorporated
14 * Copyright (c) 1999,2000-2016 J. Schilling
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2, or (at your option)
19 * any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29 */
30
31 /* APPLE_HYB James Pearson j.pearson@ge.ucl.ac.uk 23/2/2000 */
32
33 /*
34 * From jb@danware.dk:
35 *
36 * Cygwin fakes inodes by hashing file info, actual collisions observed!
37 * This is documented in the cygwin source, look at winsup/cygwin/path.cc
38 * and search for the word 'Hash'. On NT, cygwin ORs together the
39 * high and low 32 bits of the 64 bit genuine inode, look at fhandler.cc.
40 *
41 * Note: Other operating systems which support the FAT filesystem may
42 * have the same problem because FAT does not use the inode
43 * concept. For NTFS, genuine inode numbers exist, but they are
44 * 64 bits and available only through an open file handle.
45 *
46 * The solution is the new options -no-cache-inodes/-cache-inodes that
47 * allow to disable the mkisofs inode cache.
48 */
49
50 /* DUPLICATES_ONCE Alex Kopylov cdrtools@bootcd.ru 19.06.2004 */
51
52 #include "mkisofs.h"
53 #include <schily/schily.h>
54 #include <schily/sha3.h>
55
56 #define NR_HASH (16*1024)
57
58 #define HASH_FN(DEV, INO) ((DEV + INO + (INO >> 8) + (INO << 16)) % NR_HASH)
59
60 static struct file_hash *hash_table[NR_HASH];
61
62 #ifdef DUPLICATES_ONCE
63
64 #define DIGEST_FAST_SIZE 65536
65 #define UNIQUE_FILES_HASH_FN(SIZE) ((SIZE) % NR_HASH)
66
67 #ifndef DIGEST_Init
68 #define DIGEST_Init SHA3_512_Init
69 #define DIGEST_Update SHA3_Update
70 #define DIGEST_Final SHA3_Final
71 #define DIGEST_CTX SHA3_CTX
72 #define DIGEST_LENGTH SHA3_512_DIGEST_LENGTH
73
74 #ifdef SHORT_DIGEST
75 #undef DIGEST_Init
76 #undef DIGEST_LENGTH
77 #define DIGEST_Init SHA3_256_Init
78 #define DIGEST_LENGTH SHA3_256_DIGEST_LENGTH
79 #endif /* SHORT_DIGEST */
80 #endif /* DIGEST_Init */
81 #endif
82
83 #ifdef HASH_DEBUG
84 EXPORT void debug_hash __PR((void));
85 #endif
86 EXPORT void add_hash __PR((struct directory_entry *spnt));
87 EXPORT struct file_hash *find_hash __PR((struct directory_entry *spnt));
88 EXPORT void flush_hash __PR((void));
89 EXPORT void add_directory_hash __PR((dev_t dev, ino_t inode));
90 EXPORT struct file_hash *find_directory_hash __PR((dev_t dev, ino_t inode));
91 LOCAL unsigned int name_hash __PR((const char *name));
92 EXPORT void add_file_hash __PR((struct directory_entry *de));
93 EXPORT struct directory_entry *find_file_hash __PR((char *name));
94 LOCAL BOOL isoname_endsok __PR((char *name));
95 EXPORT int delete_file_hash __PR((struct directory_entry *de));
96 EXPORT void flush_file_hash __PR((void));
97 #ifdef DUPLICATES_ONCE
98 LOCAL struct directory_entry *compare_files __PR((
99 struct directory_entry *spnt1,
100 struct directory_entry *spnt2));
101 LOCAL unsigned char *DIGEST_File __PR((char *name, size_t size));
102 #endif
103
104 #ifdef HASH_DEBUG
105 EXPORT void
debug_hash()106 debug_hash()
107 {
108 struct file_hash **p = hash_table;
109 int i;
110 int j = 0;
111 struct file_hash *p2;
112 int k;
113 int maxlen = 0;
114 int minlen = 100000000;
115 int tot = 0;
116
117 for (i = 0; i < NR_HASH; i++, p++) {
118 if (*p == 0)
119 j++;
120 else {
121 p2 = *p;
122 k = 0;
123 while (p2) {
124 tot++;
125 k++;
126 p2 = p2->next;
127 }
128 if (k > maxlen)
129 maxlen = k;
130 if (k < minlen)
131 minlen = k;
132 }
133 }
134 error("Unbenutzt: %d von %d Eintr�gen maxlen %d minlen %d total %d optmittel %d\n",
135 j, NR_HASH, maxlen, minlen, tot, tot/NR_HASH);
136 }
137 #endif
138
139 EXPORT void
add_hash(spnt)140 add_hash(spnt)
141 struct directory_entry *spnt;
142 {
143 struct file_hash *s_hash;
144 unsigned int hash_number = 0;
145
146 if (spnt->size == 0 || spnt->starting_block == 0)
147 if (spnt->size != 0 && spnt->starting_block == 0) {
148 comerrno(EX_BAD,
149 _("Non zero-length file '%s' assigned zero extent.\n"),
150 spnt->name);
151 };
152
153 #ifdef DUPLICATES_ONCE
154 if (!cache_inodes && !duplicates_once)
155 #else
156 if (!cache_inodes)
157 #endif
158 return;
159 if (spnt->dev == UNCACHED_DEVICE &&
160 (spnt->inode == TABLE_INODE || spnt->inode == UNCACHED_INODE)) {
161 return;
162 }
163 #ifdef DUPLICATES_ONCE
164 if (cache_inodes)
165 #endif
166 hash_number = HASH_FN((unsigned int) spnt->dev,
167 (unsigned int) spnt->inode);
168 #ifdef DUPLICATES_ONCE
169 else if (duplicates_once &&
170 spnt->size && !(spnt->isorec.flags[0] & ISO_DIRECTORY))
171 hash_number = UNIQUE_FILES_HASH_FN((unsigned int) spnt->size);
172 #endif
173
174 #if 0
175 if (verbose > 1)
176 fprintf(stderr, "%s ", spnt->name);
177 #endif
178 s_hash = (struct file_hash *)e_malloc(sizeof (struct file_hash));
179 s_hash->next = hash_table[hash_number];
180 s_hash->inode = spnt->inode;
181 s_hash->dev = spnt->dev;
182 s_hash->nlink = 0;
183 s_hash->starting_block = spnt->starting_block;
184 s_hash->size = spnt->size;
185 #if defined(SORTING) || defined(DUPLICATES_ONCE)
186 s_hash->de = spnt;
187 #endif /* defined(SORTING) || defined(DUPLICATES_ONCE) */
188 hash_table[hash_number] = s_hash;
189 }
190
191 EXPORT struct file_hash *
find_hash(spnt)192 find_hash(spnt)
193 struct directory_entry *spnt;
194 {
195 unsigned int hash_number;
196 struct file_hash *s_hash;
197
198 #ifdef DUPLICATES_ONCE
199 if (!cache_inodes && !duplicates_once)
200 #else
201 if (!cache_inodes)
202 #endif
203 return (NULL);
204 if (spnt->dev == UNCACHED_DEVICE &&
205 (spnt->inode == TABLE_INODE || spnt->inode == UNCACHED_INODE))
206 return (NULL);
207
208 #ifdef DUPLICATES_ONCE
209 if (cache_inodes) {
210 #endif
211 hash_number = HASH_FN((unsigned int) spnt->dev,
212 (unsigned int) spnt->inode);
213 s_hash = hash_table[hash_number];
214 while (s_hash) {
215 if (s_hash->inode == spnt->inode &&
216 s_hash->dev == spnt->dev)
217 return (s_hash);
218 s_hash = s_hash->next;
219 }
220 #ifdef DUPLICATES_ONCE
221 } else if (duplicates_once &&
222 spnt->size && !(spnt->isorec.flags[0] & ISO_DIRECTORY)) {
223 hash_number = UNIQUE_FILES_HASH_FN((unsigned int) spnt->size);
224 s_hash = hash_table[hash_number];
225 while (s_hash) {
226 if (compare_files(spnt, s_hash->de))
227 return (s_hash);
228 s_hash = s_hash->next;
229 }
230 }
231 #endif
232 return (NULL);
233 }
234
235 /*
236 * based on flush_file_hash() below - needed as we want to re-use the
237 * file hash table.
238 */
239 EXPORT void
flush_hash()240 flush_hash()
241 {
242 struct file_hash *fh;
243 struct file_hash *fh1;
244 int i;
245
246 for (i = 0; i < NR_HASH; i++) {
247 fh = hash_table[i];
248 while (fh) {
249 fh1 = fh->next;
250 free(fh);
251 fh = fh1;
252 }
253 hash_table[i] = NULL;
254 }
255 }
256
257 #ifdef DUPLICATES_ONCE
258 LOCAL struct directory_entry *
compare_files(spnt1,spnt2)259 compare_files(spnt1, spnt2)
260 struct directory_entry *spnt1;
261 struct directory_entry *spnt2;
262 {
263 if (spnt1->size != spnt2->size)
264 return (NULL);
265
266 if (!spnt1->digest_fast)
267 if (!(spnt1->digest_fast = DIGEST_File(spnt1->whole_name,
268 (spnt1->size > DIGEST_FAST_SIZE) ?
269 DIGEST_FAST_SIZE : spnt1->size)))
270 return (NULL);
271
272 if (spnt1->size <= DIGEST_FAST_SIZE)
273 spnt1->digest_full = spnt1->digest_fast;
274
275 if (!spnt2->digest_fast)
276 if (!(spnt2->digest_fast = DIGEST_File(spnt2->whole_name,
277 (spnt2->size > DIGEST_FAST_SIZE) ?
278 DIGEST_FAST_SIZE : spnt2->size)))
279 return (NULL);
280
281 if (spnt2->size <= DIGEST_FAST_SIZE)
282 spnt2->digest_full = spnt2->digest_fast;
283
284 if (memcmp(spnt1->digest_fast, spnt2->digest_fast,
285 DIGEST_LENGTH * sizeof (unsigned char)))
286 return (NULL);
287
288 if (!spnt1->digest_full)
289 if (!(spnt1->digest_full = DIGEST_File(spnt1->whole_name,
290 spnt1->size)))
291 return (NULL);
292
293 if (!spnt2->digest_full)
294 if (!(spnt2->digest_full = DIGEST_File(spnt2->whole_name,
295 spnt2->size)))
296 return (NULL);
297
298 if (memcmp(spnt1->digest_full, spnt2->digest_full,
299 DIGEST_LENGTH * sizeof (unsigned char)))
300 return (NULL);
301
302 return (spnt2);
303 }
304
305 LOCAL unsigned char *
DIGEST_File(name,size)306 DIGEST_File(name, size)
307 char *name;
308 size_t size;
309 {
310 DIGEST_CTX digest_ctx;
311 FILE *infile;
312 unsigned char buf[32768];
313 unsigned char *digest_hash;
314 size_t cnt;
315
316 DIGEST_Init(&digest_ctx);
317
318 if ((infile = fopen(name, "rb")) == NULL)
319 return (NULL);
320
321 while (size) {
322 cnt = (size > sizeof (buf)) ? sizeof (buf) : size;
323 if ((cnt = fread(buf, 1, cnt, infile)) <= 0) {
324 fclose(infile);
325 return (NULL);
326 }
327 DIGEST_Update(&digest_ctx, buf, cnt);
328 size -= cnt;
329 }
330
331 fclose(infile);
332
333 digest_hash = e_malloc(sizeof (unsigned char) * DIGEST_LENGTH);
334 DIGEST_Final(digest_hash, &digest_ctx);
335 return (digest_hash);
336 }
337 #endif
338
339 static struct file_hash *directory_hash_table[NR_HASH];
340
341 #ifdef PROTOTYPES
342 EXPORT void
add_directory_hash(dev_t dev,ino_t inode)343 add_directory_hash(dev_t dev, ino_t inode)
344 #else
345 EXPORT void
346 add_directory_hash(dev, inode)
347 dev_t dev;
348 ino_t inode;
349 #endif
350 {
351 struct file_hash *s_hash;
352 unsigned int hash_number;
353
354 if (!cache_inodes)
355 return;
356 if (dev == UNCACHED_DEVICE &&
357 (inode == TABLE_INODE || inode == UNCACHED_INODE))
358 return;
359
360 hash_number = HASH_FN((unsigned int) dev, (unsigned int) inode);
361
362 s_hash = (struct file_hash *)e_malloc(sizeof (struct file_hash));
363 s_hash->next = directory_hash_table[hash_number];
364 s_hash->inode = inode;
365 s_hash->dev = dev;
366 s_hash->nlink = 0;
367 directory_hash_table[hash_number] = s_hash;
368 }
369
370 #ifdef PROTOTYPES
371 EXPORT struct file_hash *
find_directory_hash(dev_t dev,ino_t inode)372 find_directory_hash(dev_t dev, ino_t inode)
373 #else
374 EXPORT struct file_hash *
375 find_directory_hash(dev, inode)
376 dev_t dev;
377 ino_t inode;
378 #endif
379 {
380 unsigned int hash_number;
381 struct file_hash *spnt;
382
383 if (!cache_inodes)
384 return (NULL);
385 if (dev == UNCACHED_DEVICE &&
386 (inode == TABLE_INODE || inode == UNCACHED_INODE))
387 return (NULL);
388
389 hash_number = HASH_FN((unsigned int) dev, (unsigned int) inode);
390 spnt = directory_hash_table[hash_number];
391 while (spnt) {
392 if (spnt->inode == inode && spnt->dev == dev)
393 return (spnt);
394 spnt = spnt->next;
395 };
396 return (NULL);
397 }
398
399 struct name_hash {
400 struct name_hash *next;
401 struct directory_entry *de;
402 int sum;
403 };
404
405 #define NR_NAME_HASH 128
406
407 static struct name_hash *name_hash_table[NR_NAME_HASH] = {0, };
408
409 /*
410 * Find the hash bucket for this name.
411 */
412 LOCAL unsigned int
name_hash(name)413 name_hash(name)
414 const char *name;
415 {
416 unsigned int hash = 0;
417 const char *p;
418
419 p = name;
420
421 while (*p) {
422 /*
423 * Don't hash the iso9660 version number.
424 * This way we can detect duplicates in cases where we have
425 * directories (i.e. foo) and non-directories (i.e. foo;1).
426 */
427 if (*p == ';') {
428 break;
429 }
430 hash = (hash << 15) + (hash << 3) + (hash >> 3) + (*p++ & 0xFF);
431 }
432 return (hash % NR_NAME_HASH);
433 }
434
435 EXPORT void
add_file_hash(de)436 add_file_hash(de)
437 struct directory_entry *de;
438 {
439 struct name_hash *new;
440 int hash;
441 Uchar *p;
442 int sum = 0;
443
444 new = (struct name_hash *)e_malloc(sizeof (struct name_hash));
445 new->de = de;
446 new->next = NULL;
447 for (p = (Uchar *)de->isorec.name; *p; p++) {
448 if (*p == ';')
449 break;
450 sum += *p & 0xFF;
451 }
452 new->sum = sum;
453 hash = name_hash(de->isorec.name);
454
455 /* Now insert into the hash table */
456 new->next = name_hash_table[hash];
457 name_hash_table[hash] = new;
458 }
459
460 EXPORT struct directory_entry *
find_file_hash(name)461 find_file_hash(name)
462 register char *name;
463 {
464 register char *p1;
465 register char *p2;
466 register struct name_hash *nh;
467 register int sum = 0;
468
469 if (debug > 1)
470 error("find_hash('%s')\n", name);
471
472 for (p1 = name; *p1; p1++) {
473 if (*p1 == ';')
474 break;
475 sum += *p1 & 0xFF;
476 }
477
478 for (nh = name_hash_table[name_hash(name)]; nh; nh = nh->next) {
479 if (nh->sum != sum)
480 continue;
481
482 p1 = name;
483 p2 = nh->de->isorec.name;
484 if (debug > 1)
485 error(_("Checking name '%s' isorec.name '%s'\n"), p1, p2);
486
487 /* Look for end of string, or a mismatch. */
488 while (1 == 1) {
489 if ((*p1 == '\0' || *p1 == ';') ||
490 (*p2 == '\0' || *p2 == ';') ||
491 (*p1 != *p2)) {
492 break;
493 }
494 p1++;
495 p2++;
496 }
497 if (!isoname_endsok(p1) || !isoname_endsok(p2)) {
498 if (debug > 1) {
499 if (!isoname_endsok(p1))
500 error(_("'%s' does NOT END OK\n"), p1);
501 if (!isoname_endsok(p2))
502 error(_("'%s' does NOT END OK\n"), p2);
503 }
504 /*
505 * If one file does not end with a valid version number
506 * and the other name ends here, we found a miss match.
507 */
508 if (*p1 == '\0' || *p2 == '\0')
509 continue;
510
511 if (*p1 == ';' && *p2 == ';') {
512 p1++;
513 p2++;
514 continue;
515 }
516 }
517
518 /*
519 * If we are at the end of both strings, then we have a match.
520 */
521 if ((*p1 == '\0' || *p1 == ';') &&
522 (*p2 == '\0' || *p2 == ';')) {
523 return (nh->de);
524 }
525 }
526 return (NULL);
527 }
528
529 /*
530 * The macro 'eo' is just an idea on how one might speed up isoname_endsok()
531 */
532 #define eo(p) (((p)[0] == '\0') || \
533 ((p)[0] == ';' && (p)[1] == '1' && (p)[2] == '\0') || \
534 isoname_endsok(p))
535
536 LOCAL BOOL
isoname_endsok(name)537 isoname_endsok(name)
538 char *name;
539 {
540 int i;
541 char *p;
542
543 if (*name == '\0')
544 return (TRUE);
545 if (*name != ';')
546 return (FALSE);
547
548 for (p = ++name, i = 0; *p && i < 5; p++, i++) {
549 if (*p < '0' || *p > '9')
550 return (FALSE);
551 }
552 i = atoi(name);
553 if (i < 1 || i > 32767)
554 return (FALSE);
555 return (TRUE);
556 }
557
558 EXPORT int
delete_file_hash(de)559 delete_file_hash(de)
560 struct directory_entry *de;
561 {
562 struct name_hash *nh;
563 struct name_hash *prev;
564 int hash;
565
566 prev = NULL;
567 hash = name_hash(de->isorec.name);
568 for (nh = name_hash_table[hash]; nh; nh = nh->next) {
569 if (nh->de == de)
570 break;
571 prev = nh;
572 }
573 if (!nh)
574 return (1);
575 if (!prev)
576 name_hash_table[hash] = nh->next;
577 else
578 prev->next = nh->next;
579 free(nh);
580 return (0);
581 }
582
583 EXPORT void
flush_file_hash()584 flush_file_hash()
585 {
586 struct name_hash *nh;
587 struct name_hash *nh1;
588 int i;
589
590 for (i = 0; i < NR_NAME_HASH; i++) {
591 nh = name_hash_table[i];
592 while (nh) {
593 nh1 = nh->next;
594 free(nh);
595 nh = nh1;
596 }
597 name_hash_table[i] = NULL;
598
599 }
600 }
601