1 /* @(#)hash.c	1.30 16/12/13 joerg */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)hash.c	1.30 16/12/13 joerg";
6 
7 #endif
8 /*
9  * File hash.c - generate hash tables for iso9660 filesystem.
10  *
11  * Written by Eric Youngdale (1993).
12  *
13  * Copyright 1993 Yggdrasil Computing, Incorporated
14  * Copyright (c) 1999,2000-2016 J. Schilling
15  *
16  * This program is free software; you can redistribute it and/or modify
17  * it under the terms of the GNU General Public License as published by
18  * the Free Software Foundation; either version 2, or (at your option)
19  * any later version.
20  *
21  * This program is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU General Public License
27  * along with this program; if not, write to the Free Software
28  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29  */
30 
31 /* APPLE_HYB James Pearson j.pearson@ge.ucl.ac.uk 23/2/2000 */
32 
33 /*
34  * From jb@danware.dk:
35  *
36  * Cygwin fakes inodes by hashing file info, actual collisions observed!
37  * This is documented in the cygwin source, look at winsup/cygwin/path.cc
38  * and search for the word 'Hash'.  On NT, cygwin ORs together the
39  * high and low 32 bits of the 64 bit genuine inode, look at fhandler.cc.
40  *
41  * Note:	Other operating systems which support the FAT filesystem may
42  *		have the same problem because FAT does not use the inode
43  *		concept.  For NTFS, genuine inode numbers exist, but they are
44  *		64 bits and available only through an open file handle.
45  *
46  * The solution is the new options -no-cache-inodes/-cache-inodes that
47  * allow to disable the mkisofs inode cache.
48  */
49 
50 /* DUPLICATES_ONCE Alex Kopylov cdrtools@bootcd.ru 19.06.2004 */
51 
52 #include "mkisofs.h"
53 #include <schily/schily.h>
54 #include <schily/sha3.h>
55 
56 #define	NR_HASH	(16*1024)
57 
58 #define	HASH_FN(DEV, INO)	((DEV + INO  + (INO >> 8) + (INO << 16)) % NR_HASH)
59 
60 static struct file_hash *hash_table[NR_HASH];
61 
62 #ifdef	DUPLICATES_ONCE
63 
64 #define	DIGEST_FAST_SIZE		65536
65 #define	UNIQUE_FILES_HASH_FN(SIZE)	((SIZE) % NR_HASH)
66 
67 #ifndef	DIGEST_Init
68 #define	DIGEST_Init			SHA3_512_Init
69 #define	DIGEST_Update			SHA3_Update
70 #define	DIGEST_Final			SHA3_Final
71 #define	DIGEST_CTX			SHA3_CTX
72 #define	DIGEST_LENGTH			SHA3_512_DIGEST_LENGTH
73 
74 #ifdef	SHORT_DIGEST
75 #undef	DIGEST_Init
76 #undef	DIGEST_LENGTH
77 #define	DIGEST_Init			SHA3_256_Init
78 #define	DIGEST_LENGTH			SHA3_256_DIGEST_LENGTH
79 #endif	/* SHORT_DIGEST */
80 #endif	/* DIGEST_Init */
81 #endif
82 
83 #ifdef	HASH_DEBUG
84 EXPORT	void		debug_hash	__PR((void));
85 #endif
86 EXPORT	void		add_hash	__PR((struct directory_entry *spnt));
87 EXPORT	struct file_hash *find_hash	__PR((struct directory_entry *spnt));
88 EXPORT	void		flush_hash	__PR((void));
89 EXPORT	void		add_directory_hash __PR((dev_t dev, ino_t inode));
90 EXPORT	struct file_hash *find_directory_hash __PR((dev_t dev, ino_t inode));
91 LOCAL	unsigned int	name_hash	__PR((const char *name));
92 EXPORT	void		add_file_hash	__PR((struct directory_entry *de));
93 EXPORT	struct directory_entry *find_file_hash __PR((char *name));
94 LOCAL	BOOL		isoname_endsok	__PR((char *name));
95 EXPORT	int		delete_file_hash __PR((struct directory_entry *de));
96 EXPORT	void		flush_file_hash	__PR((void));
97 #ifdef	DUPLICATES_ONCE
98 LOCAL	struct directory_entry *compare_files __PR((
99 					struct directory_entry *spnt1,
100 					struct directory_entry *spnt2));
101 LOCAL	unsigned char	*DIGEST_File	__PR((char *name, size_t size));
102 #endif
103 
104 #ifdef	HASH_DEBUG
105 EXPORT void
debug_hash()106 debug_hash()
107 {
108 	struct file_hash **p = hash_table;
109 	int	i;
110 	int	j = 0;
111 	struct file_hash *p2;
112 	int	k;
113 	int	maxlen = 0;
114 	int	minlen = 100000000;
115 	int	tot = 0;
116 
117 	for (i = 0; i < NR_HASH; i++, p++) {
118 		if (*p == 0)
119 			j++;
120 		else {
121 			p2 = *p;
122 			k = 0;
123 			while (p2) {
124 				tot++;
125 				k++;
126 				p2 = p2->next;
127 			}
128 			if (k > maxlen)
129 				maxlen = k;
130 			if (k < minlen)
131 				minlen = k;
132 		}
133 	}
134 	error("Unbenutzt: %d von %d Eintr�gen maxlen %d minlen %d total %d optmittel %d\n",
135 		j, NR_HASH, maxlen, minlen, tot, tot/NR_HASH);
136 }
137 #endif
138 
139 EXPORT void
add_hash(spnt)140 add_hash(spnt)
141 	struct directory_entry	*spnt;
142 {
143 	struct file_hash *s_hash;
144 	unsigned int    hash_number = 0;
145 
146 	if (spnt->size == 0 || spnt->starting_block == 0)
147 		if (spnt->size != 0 && spnt->starting_block == 0) {
148 			comerrno(EX_BAD,
149 			_("Non zero-length file '%s' assigned zero extent.\n"),
150 							spnt->name);
151 		};
152 
153 #ifdef	DUPLICATES_ONCE
154 	if (!cache_inodes && !duplicates_once)
155 #else
156 	if (!cache_inodes)
157 #endif
158 		return;
159 	if (spnt->dev == UNCACHED_DEVICE &&
160 	    (spnt->inode == TABLE_INODE || spnt->inode == UNCACHED_INODE)) {
161 		return;
162 	}
163 #ifdef	DUPLICATES_ONCE
164 	if (cache_inodes)
165 #endif
166 		hash_number = HASH_FN((unsigned int) spnt->dev,
167 						(unsigned int) spnt->inode);
168 #ifdef	DUPLICATES_ONCE
169 	else if (duplicates_once &&
170 		    spnt->size && !(spnt->isorec.flags[0] & ISO_DIRECTORY))
171 		hash_number = UNIQUE_FILES_HASH_FN((unsigned int) spnt->size);
172 #endif
173 
174 #if 0
175 	if (verbose > 1)
176 		fprintf(stderr, "%s ", spnt->name);
177 #endif
178 	s_hash = (struct file_hash *)e_malloc(sizeof (struct file_hash));
179 	s_hash->next = hash_table[hash_number];
180 	s_hash->inode = spnt->inode;
181 	s_hash->dev = spnt->dev;
182 	s_hash->nlink = 0;
183 	s_hash->starting_block = spnt->starting_block;
184 	s_hash->size = spnt->size;
185 #if	defined(SORTING) || defined(DUPLICATES_ONCE)
186 	s_hash->de = spnt;
187 #endif /* defined(SORTING) || defined(DUPLICATES_ONCE) */
188 	hash_table[hash_number] = s_hash;
189 }
190 
191 EXPORT struct file_hash *
find_hash(spnt)192 find_hash(spnt)
193 	struct directory_entry	*spnt;
194 {
195 	unsigned int    hash_number;
196 	struct file_hash *s_hash;
197 
198 #ifdef	DUPLICATES_ONCE
199 	if (!cache_inodes && !duplicates_once)
200 #else
201 	if (!cache_inodes)
202 #endif
203 		return (NULL);
204 	if (spnt->dev == UNCACHED_DEVICE &&
205 	    (spnt->inode == TABLE_INODE || spnt->inode == UNCACHED_INODE))
206 		return (NULL);
207 
208 #ifdef	DUPLICATES_ONCE
209 	if (cache_inodes) {
210 #endif
211 		hash_number = HASH_FN((unsigned int) spnt->dev,
212 				    (unsigned int) spnt->inode);
213 		s_hash = hash_table[hash_number];
214 		while (s_hash) {
215 			if (s_hash->inode == spnt->inode &&
216 			    s_hash->dev == spnt->dev)
217 				return (s_hash);
218 			s_hash = s_hash->next;
219 		}
220 #ifdef	DUPLICATES_ONCE
221 	} else if (duplicates_once &&
222 		    spnt->size && !(spnt->isorec.flags[0] & ISO_DIRECTORY)) {
223 		hash_number = UNIQUE_FILES_HASH_FN((unsigned int) spnt->size);
224 		s_hash = hash_table[hash_number];
225 		while (s_hash) {
226 			if (compare_files(spnt, s_hash->de))
227 				return (s_hash);
228 			s_hash = s_hash->next;
229 		}
230 	}
231 #endif
232 	return (NULL);
233 }
234 
235 /*
236  * based on flush_file_hash() below - needed as we want to re-use the
237  * file hash table.
238  */
239 EXPORT void
flush_hash()240 flush_hash()
241 {
242 	struct file_hash	*fh;
243 	struct file_hash	*fh1;
244 	int			i;
245 
246 	for (i = 0; i < NR_HASH; i++) {
247 		fh = hash_table[i];
248 		while (fh) {
249 			fh1 = fh->next;
250 			free(fh);
251 			fh = fh1;
252 		}
253 		hash_table[i] = NULL;
254 	}
255 }
256 
257 #ifdef	DUPLICATES_ONCE
258 LOCAL struct directory_entry *
compare_files(spnt1,spnt2)259 compare_files(spnt1, spnt2)
260 	struct directory_entry	*spnt1;
261 	struct directory_entry	*spnt2;
262 {
263 	if (spnt1->size != spnt2->size)
264 		return (NULL);
265 
266 	if (!spnt1->digest_fast)
267 		if (!(spnt1->digest_fast = DIGEST_File(spnt1->whole_name,
268 				(spnt1->size > DIGEST_FAST_SIZE) ?
269 				DIGEST_FAST_SIZE : spnt1->size)))
270 			return (NULL);
271 
272 	if (spnt1->size <= DIGEST_FAST_SIZE)
273 		spnt1->digest_full = spnt1->digest_fast;
274 
275 	if (!spnt2->digest_fast)
276 		if (!(spnt2->digest_fast = DIGEST_File(spnt2->whole_name,
277 				(spnt2->size > DIGEST_FAST_SIZE) ?
278 				DIGEST_FAST_SIZE : spnt2->size)))
279 			return (NULL);
280 
281 	if (spnt2->size <= DIGEST_FAST_SIZE)
282 		spnt2->digest_full = spnt2->digest_fast;
283 
284 	if (memcmp(spnt1->digest_fast, spnt2->digest_fast,
285 			DIGEST_LENGTH * sizeof (unsigned char)))
286 		return (NULL);
287 
288 	if (!spnt1->digest_full)
289 		if (!(spnt1->digest_full = DIGEST_File(spnt1->whole_name,
290 						spnt1->size)))
291 			return (NULL);
292 
293 	if (!spnt2->digest_full)
294 		if (!(spnt2->digest_full = DIGEST_File(spnt2->whole_name,
295 						spnt2->size)))
296 			return (NULL);
297 
298 	if (memcmp(spnt1->digest_full, spnt2->digest_full,
299 			DIGEST_LENGTH * sizeof (unsigned char)))
300 		return (NULL);
301 
302 	return (spnt2);
303 }
304 
305 LOCAL unsigned char *
DIGEST_File(name,size)306 DIGEST_File(name, size)
307 	char		*name;
308 	size_t		size;
309 {
310 	DIGEST_CTX	digest_ctx;
311 	FILE		*infile;
312 	unsigned char	buf[32768];
313 	unsigned char	*digest_hash;
314 	size_t		cnt;
315 
316 	DIGEST_Init(&digest_ctx);
317 
318 	if ((infile = fopen(name, "rb")) == NULL)
319 		return (NULL);
320 
321 	while (size) {
322 		cnt = (size > sizeof (buf)) ? sizeof (buf) : size;
323 		if ((cnt = fread(buf, 1, cnt, infile)) <= 0) {
324 			fclose(infile);
325 			return (NULL);
326 		}
327 		DIGEST_Update(&digest_ctx, buf, cnt);
328 		size -= cnt;
329 	}
330 
331 	fclose(infile);
332 
333 	digest_hash = e_malloc(sizeof (unsigned char) * DIGEST_LENGTH);
334 	DIGEST_Final(digest_hash, &digest_ctx);
335 	return (digest_hash);
336 }
337 #endif
338 
339 static struct file_hash *directory_hash_table[NR_HASH];
340 
341 #ifdef	PROTOTYPES
342 EXPORT void
add_directory_hash(dev_t dev,ino_t inode)343 add_directory_hash(dev_t dev, ino_t inode)
344 #else
345 EXPORT void
346 add_directory_hash(dev, inode)
347 	dev_t	dev;
348 	ino_t	inode;
349 #endif
350 {
351 	struct file_hash *s_hash;
352 	unsigned int    hash_number;
353 
354 	if (!cache_inodes)
355 		return;
356 	if (dev == UNCACHED_DEVICE &&
357 	    (inode == TABLE_INODE || inode == UNCACHED_INODE))
358 		return;
359 
360 	hash_number = HASH_FN((unsigned int) dev, (unsigned int) inode);
361 
362 	s_hash = (struct file_hash *)e_malloc(sizeof (struct file_hash));
363 	s_hash->next = directory_hash_table[hash_number];
364 	s_hash->inode = inode;
365 	s_hash->dev = dev;
366 	s_hash->nlink = 0;
367 	directory_hash_table[hash_number] = s_hash;
368 }
369 
370 #ifdef	PROTOTYPES
371 EXPORT struct file_hash *
find_directory_hash(dev_t dev,ino_t inode)372 find_directory_hash(dev_t dev, ino_t inode)
373 #else
374 EXPORT struct file_hash *
375 find_directory_hash(dev, inode)
376 	dev_t	dev;
377 	ino_t	inode;
378 #endif
379 {
380 	unsigned int    hash_number;
381 	struct file_hash *spnt;
382 
383 	if (!cache_inodes)
384 		return (NULL);
385 	if (dev == UNCACHED_DEVICE &&
386 	    (inode == TABLE_INODE || inode == UNCACHED_INODE))
387 		return (NULL);
388 
389 	hash_number = HASH_FN((unsigned int) dev, (unsigned int) inode);
390 	spnt = directory_hash_table[hash_number];
391 	while (spnt) {
392 		if (spnt->inode == inode && spnt->dev == dev)
393 			return (spnt);
394 		spnt = spnt->next;
395 	};
396 	return (NULL);
397 }
398 
399 struct name_hash {
400 	struct name_hash *next;
401 	struct directory_entry *de;
402 	int	sum;
403 };
404 
405 #define	NR_NAME_HASH	128
406 
407 static struct name_hash *name_hash_table[NR_NAME_HASH] = {0, };
408 
409 /*
410  * Find the hash bucket for this name.
411  */
412 LOCAL unsigned int
name_hash(name)413 name_hash(name)
414 	const char	*name;
415 {
416 	unsigned int	hash = 0;
417 	const char	*p;
418 
419 	p = name;
420 
421 	while (*p) {
422 		/*
423 		 * Don't hash the  iso9660 version number.
424 		 * This way we can detect duplicates in cases where we have
425 		 * directories (i.e. foo) and non-directories (i.e. foo;1).
426 		 */
427 		if (*p == ';') {
428 			break;
429 		}
430 		hash = (hash << 15) + (hash << 3) + (hash >> 3) + (*p++ & 0xFF);
431 	}
432 	return (hash % NR_NAME_HASH);
433 }
434 
435 EXPORT void
add_file_hash(de)436 add_file_hash(de)
437 	struct directory_entry	*de;
438 {
439 	struct name_hash	*new;
440 	int			hash;
441 	Uchar			*p;
442 	int			sum = 0;
443 
444 	new = (struct name_hash *)e_malloc(sizeof (struct name_hash));
445 	new->de = de;
446 	new->next = NULL;
447 	for (p = (Uchar *)de->isorec.name; *p; p++) {
448 		if (*p == ';')
449 			break;
450 		sum += *p & 0xFF;
451 	}
452 	new->sum = sum;
453 	hash = name_hash(de->isorec.name);
454 
455 	/* Now insert into the hash table */
456 	new->next = name_hash_table[hash];
457 	name_hash_table[hash] = new;
458 }
459 
460 EXPORT struct directory_entry *
find_file_hash(name)461 find_file_hash(name)
462 	register char			*name;
463 {
464 	register char			*p1;
465 	register char			*p2;
466 	register struct name_hash	*nh;
467 	register int			sum = 0;
468 
469 	if (debug > 1)
470 		error("find_hash('%s')\n", name);
471 
472 	for (p1 = name; *p1; p1++) {
473 		if (*p1 == ';')
474 			break;
475 		sum += *p1 & 0xFF;
476 	}
477 
478 	for (nh = name_hash_table[name_hash(name)]; nh; nh = nh->next) {
479 		if (nh->sum != sum)
480 			continue;
481 
482 		p1 = name;
483 		p2 = nh->de->isorec.name;
484 		if (debug > 1)
485 			error(_("Checking name '%s' isorec.name '%s'\n"), p1, p2);
486 
487 		/* Look for end of string, or a mismatch. */
488 		while (1 == 1) {
489 			if ((*p1 == '\0' || *p1 == ';') ||
490 			    (*p2 == '\0' || *p2 == ';') ||
491 			    (*p1 != *p2)) {
492 				break;
493 			}
494 			p1++;
495 			p2++;
496 		}
497 		if (!isoname_endsok(p1) || !isoname_endsok(p2)) {
498 			if (debug > 1) {
499 				if (!isoname_endsok(p1))
500 					error(_("'%s' does NOT END OK\n"), p1);
501 				if (!isoname_endsok(p2))
502 					error(_("'%s' does NOT END OK\n"), p2);
503 			}
504 			/*
505 			 * If one file does not end with a valid version number
506 			 * and the other name ends here, we found a miss match.
507 			 */
508 			if (*p1 == '\0' || *p2 == '\0')
509 				continue;
510 
511 			if (*p1 == ';' && *p2 == ';') {
512 				p1++;
513 				p2++;
514 				continue;
515 			}
516 		}
517 
518 		/*
519 		 * If we are at the end of both strings, then we have a match.
520 		 */
521 		if ((*p1 == '\0' || *p1 == ';') &&
522 		    (*p2 == '\0' || *p2 == ';')) {
523 			return (nh->de);
524 		}
525 	}
526 	return (NULL);
527 }
528 
529 /*
530  * The macro 'eo' is just an idea on how one might speed up isoname_endsok()
531  */
532 #define	eo(p)	(((p)[0] == '\0') || \
533 		((p)[0] == ';' && (p)[1] == '1' && (p)[2] == '\0') || \
534 		isoname_endsok(p))
535 
536 LOCAL BOOL
isoname_endsok(name)537 isoname_endsok(name)
538 	char	*name;
539 {
540 	int	i;
541 	char	*p;
542 
543 	if (*name == '\0')
544 		return (TRUE);
545 	if (*name != ';')
546 		return (FALSE);
547 
548 	for (p = ++name, i = 0; *p && i < 5; p++, i++) {
549 		if (*p < '0' || *p > '9')
550 			return (FALSE);
551 	}
552 	i = atoi(name);
553 	if (i < 1 || i > 32767)
554 		return (FALSE);
555 	return (TRUE);
556 }
557 
558 EXPORT int
delete_file_hash(de)559 delete_file_hash(de)
560 	struct directory_entry	*de;
561 {
562 	struct name_hash	*nh;
563 	struct name_hash	*prev;
564 	int			hash;
565 
566 	prev = NULL;
567 	hash = name_hash(de->isorec.name);
568 	for (nh = name_hash_table[hash]; nh; nh = nh->next) {
569 		if (nh->de == de)
570 			break;
571 		prev = nh;
572 	}
573 	if (!nh)
574 		return (1);
575 	if (!prev)
576 		name_hash_table[hash] = nh->next;
577 	else
578 		prev->next = nh->next;
579 	free(nh);
580 	return (0);
581 }
582 
583 EXPORT void
flush_file_hash()584 flush_file_hash()
585 {
586 	struct name_hash	*nh;
587 	struct name_hash	*nh1;
588 	int			i;
589 
590 	for (i = 0; i < NR_NAME_HASH; i++) {
591 		nh = name_hash_table[i];
592 		while (nh) {
593 			nh1 = nh->next;
594 			free(nh);
595 			nh = nh1;
596 		}
597 		name_hash_table[i] = NULL;
598 
599 	}
600 }
601