1 /*********************************************************************
2 
3 	hash.c
4 
5 	Function to handle hash functions (checksums)
6 
7 	Started by Farfetch'd
8 
9 *********************************************************************/
10 
11 /*
12  * Changelog:
13  *
14  * 20030314:  Farfetch'd
15  *    First release
16  *
17  */
18 
19 /*
20  * DONE:
21  *
22  * hash.c/h: New files, implement the new hashing engine with flexible
23  *    support for more functions (for now, CRC, SHA1 and MD5).
24  *
25  * common.h: transparently support the new RomModule structure through
26  *    ROM_* macros, so that old the legacy code still work
27  *
28  * common.c: updated ROM loading engine to support the new hash engine,
29  *    using it to verify ROM integrity at load-time. Updated printromlist()
30  *    (-listroms) to dump all the available checksums, and if a ROM is
31  *    known to be bad or not.
32  *
33  * info.c: -listinfo now supports any hashing function correctly
34  *    (both text and XML mode). Notice that XML header should be
35  *    rewritten to automatically define the new tags when new
36  *    functions are added, but I couldn't be bothered for now.
37  *    It also displays informations about baddump/nodump
38  *
39  * audit.c/h: Updated audit engine (-verifyroms) to use the new
40  *    hash functions.
41  *
42  * fileio.c/h: Updated file engine to use the new hash functions.
43  *    It is now possible to load by any specified checksum (in case
44  *    later we support other archivers with SHA1 signatures or
45  *    equivalent). If the file is open with flag VERIFY_ONLY and
46  *    the file is within an archive (zip), only the checksums
47  *    available in the archive header are used.
48  *
49  * windows/fronthlp.c:  Updated -identrom to the new hash engine, now
50  *    support any hash function available.
51  *    Added -crconly to disable advanced integrity checks.
52  *    This should be needed for people with very slow computers
53  *    whose loading time is affected too much by the new hashing
54  *    calculations (hello, stephh).
55  *    This also means that for -identrom MAME will not have to
56  *    decompress the ROM from the ZIP to calculate checksum
57  *    informations, since the CRC will be extracted from the header.
58  *    Added -listsha1 and -listmd5. It would be possible to add
59  *    also a -listbad now, to list bad dumps (ROMS we need a
60  *    redump for)
61  *    Updated -listdupcrc to check for all the available checksums.
62  *    The output is also a bit more useful and readable, even if it
63  *    is still not optimal.
64  *    Update -listwrongmerge to check for all the available checksums.
65  *
66  * mame.h: Added new field crc_only to struct GameOptions.
67  *
68  * windows/config.c: Added new option -crconly
69  *
70  * windows/fileio.c: Removed check for FILE_TYPE_NOCRC (does not exist
71  *    anymore).
72  *
73  *
74  *
75  * Technical details:
76  *
77  * Checksum informations are now stored inside a string. They are
78  * stored in "printable hex format", which means that they use
79  * more memory than before (since a CRC needs 8 characters to
80  * be printed, instead of 4 bytes of raw information). In the
81  * driver, they are defined with handy macros which rely on
82  * automatic string pasting.
83  *
84  * Additional flags can also be stored in the string: for now we
85  * support NO_DUMP and BAD_DUMP, which replace, respectively,
86  * a CRC of 0 and a bit-inverted CRC.
87  *
88  * All the code that handles hash data is in hash.c. The rest of
89  * the core treats the data as an 'opaque type', so that the
90  * pointers are just passed along through functions but no
91  * operation is performed on the data outside hash.c. This
92  * is important in case we want to change the string
93  * representation later in the future.
94  *
95  * When loading a ROM, MAME will calculate and compare the
96  * checksum using any function for which the driver has declared
97  * an expected checksum. This happens because it would be useless
98  * to calculate a checksum if we cannot verify its correctness.
99  * For developers, it also means that MAME will not compute the
100  * SHA1 for you unless you specify a bogus one in the driver
101  * (like SHA1(0)).
102  *
103  * When verifying a ROM, MAME will use only the checksums available
104  * in the archive header (if zip, CRC). This is by design because
105  * -verifyroms has always been very fast. It is feasible to add
106  * a -fullverifyroms at a later moment, which will decompress the
107  * files and compute every checksum that has been declared in the
108  * driver.
109  *
110  * I have also prepared a little tool (SHA1Merger) which takes care
111  * of the following tasks:
112  *
113  * - Given an existing driver in old syntax (0.66 compatible), it will
114  *   convert all the existing ROM_LOAD entries in the new format, and
115  *   it will automatically compute and add SHA1 checksum for you if
116  *   it can find the romset.
117  *
118  * - Given a romset (ZIP file), it will prepare a ROM definition
119  *   skeleton for a driver, containing already rom names, lengths, and
120  *   checksums (both CRC and SHA1).
121  *
122  * The tool is available on www.mame.net as platform-independent source code
123  * (in Python), or win32 standalone executable.
124  *
125  */
126 
127 #include <stddef.h>
128 #include <ctype.h>
129 #include <string.h>
130 #include <stdlib.h>
131 #include <compat/zlib.h>
132 #include "hash.h"
133 #include <utils/md5.h>
134 #include "mame_sha1.h"
135 #include "osd_cpu.h"
136 #include "mame.h"
137 #include "common.h"
138 
139 #define ASSERT(x)
140 
141 typedef struct
142 {
143 	const char* name;           /* human-readable name*/
144 	char code;                  /* single-char code used within the hash string*/
145 	unsigned int size;          /* checksum size in bytes*/
146 
147 	/* Functions used to calculate the hash of a memory block*/
148 	void (*calculate_begin)(void);
149 	void (*calculate_buffer)(const void* mem, unsigned long len);
150 	void (*calculate_end)(UINT8* bin_chksum);
151 
152 } hash_function_desc;
153 
154 static void h_crc_begin(void);
155 static void h_crc_buffer(const void* mem, unsigned long len);
156 static void h_crc_end(UINT8* chksum);
157 
158 static void h_sha1_begin(void);
159 static void h_sha1_buffer(const void* mem, unsigned long len);
160 static void h_sha1_end(UINT8* chksum);
161 
162 static void h_md5_begin(void);
163 static void h_md5_buffer(const void* mem, unsigned long len);
164 static void h_md5_end(UINT8* chksum);
165 
166 static hash_function_desc hash_descs[HASH_NUM_FUNCTIONS] =
167 {
168 	{
169 		"crc", 'c', 4,
170 		h_crc_begin,
171 		h_crc_buffer,
172 		h_crc_end
173 	},
174 
175 	{
176 		"sha1", 's', 20,
177 		h_sha1_begin,
178 		h_sha1_buffer,
179 		h_sha1_end
180 	},
181 
182 	{
183 		"md5", 'm', 16,
184 		h_md5_begin,
185 		h_md5_buffer,
186 		h_md5_end
187 	},
188 };
189 
190 const char* info_strings[] =
191 {
192 	"$ND$",       /* No dump*/
193 	"$BD$"        /* Bad dump*/
194 };
195 
196 static const char* binToStr = "0123456789abcdef";
197 
198 
hash_get_function_desc(unsigned int function)199 static hash_function_desc* hash_get_function_desc(unsigned int function)
200 {
201 	unsigned int idx = 0;
202 
203 	/* Calling with zero in here is mostly an internal error*/
204 	ASSERT(function != 0);
205 
206 	/* Compute the index of only one function*/
207 	while (!(function & 1))
208 	{
209 		idx++;
210 		function >>= 1;
211 	}
212 
213 	/* Specify only one bit or die*/
214 	ASSERT(function == 1);
215 
216 	return &hash_descs[idx];
217 }
218 
hash_function_name(unsigned int function)219 const char* hash_function_name(unsigned int function)
220 {
221 	hash_function_desc* info = hash_get_function_desc(function);
222 
223 	return info->name;
224 }
225 
hash_data_has_checksum(const char * data,unsigned int function)226 int hash_data_has_checksum(const char* data, unsigned int function)
227 {
228 	hash_function_desc* info = hash_get_function_desc(function);
229 	char str[3];
230 	const char* res;
231 
232 	str[0] = info->code;
233 	str[1] = ':';
234 	str[2] = '\0';
235 
236 	/* Check if the specified hash function is used within this data*/
237 	res = strstr(data, str);
238 
239 	if (!res)
240 		return 0;
241 
242 	/* Return the offset within the string where the checksum begins*/
243 	return (res - data + 2);
244 }
245 
hash_data_add_binary_checksum(char * d,unsigned int function,UINT8 * checksum)246 static int hash_data_add_binary_checksum(char* d, unsigned int function, UINT8* checksum)
247 {
248 	hash_function_desc* desc = hash_get_function_desc(function);
249 	char* start = d;
250 	unsigned i;
251 
252 	*d++ = desc->code;
253 	*d++ = ':';
254 
255 	for (i=0;i<desc->size;i++)
256 	{
257 		UINT8 c = *checksum++;
258 
259 		*d++ = binToStr[(c >> 4) & 0xF];
260 		*d++ = binToStr[(c >> 0) & 0xF];
261 	}
262 
263 	*d++ = '#';
264 
265 	/* Return the number of written bytes*/
266 	return (d - start);
267 }
268 
269 
hash_compare_checksum(const char * chk1,const char * chk2,int length)270 static int hash_compare_checksum(const char* chk1, const char* chk2, int length)
271 {
272 	char c1, c2;
273 
274 	/* The printable format is twice as longer*/
275 	length *= 2;
276 
277 	/* This is basically a case-insensitive string compare*/
278 	while (length--)
279 	{
280 		c1 = *chk1++;
281 		c2 = *chk2++;
282 
283 		if (tolower(c1) != tolower(c2))
284 			return 0;
285 		if (!c1)
286 			return 0;
287 	}
288 
289 	return 1;
290 }
291 
292 
293 /* Compare two hashdata*/
hash_data_is_equal(const char * d1,const char * d2,unsigned int functions)294 int hash_data_is_equal(const char* d1, const char* d2, unsigned int functions)
295 {
296 	int i;
297 	char incomplete = 0;
298 	char ok = 0;
299 
300 	/* If no function is specified, it means we need to check for all*/
301 	/*  of them*/
302 	if (!functions)
303 		functions = ~functions;
304 
305 	for (i=1; i != (1<<HASH_NUM_FUNCTIONS); i<<=1)
306 		if (functions & i)
307 		{
308 			int offs1, offs2;
309 
310 			/* Check if both hashdata contain the current function's checksum*/
311 			offs1 = hash_data_has_checksum(d1, i);
312 			offs2 = hash_data_has_checksum(d2, i);
313 
314 			if (offs1 && offs2)
315 			{
316 				hash_function_desc* info = hash_get_function_desc(i);
317 
318 				if (!hash_compare_checksum(d1+offs1, d2+offs2, info->size))
319 					return 0;
320 
321 				ok = 1;
322 			}
323 			/* If the function was contained only in one, remember that our comparison*/
324 			/*  is incomplete*/
325 			else if (offs1 || offs2)
326 			{
327 				incomplete = 1;
328 			}
329 		}
330 
331 	/* If we could not compare any function, return error*/
332 	if (!ok)
333 		return 0;
334 
335 	/* Return success code*/
336 	return (incomplete ? 2 : 1);
337 }
338 
339 
hash_data_extract_printable_checksum(const char * data,unsigned int function,char * checksum)340 int hash_data_extract_printable_checksum(const char* data, unsigned int function, char* checksum)
341 {
342 	unsigned int i;
343 	hash_function_desc* info;
344 	int offs;
345 
346 	/* Check if the hashdata contains the requested function*/
347 	offs = hash_data_has_checksum(data, function);
348 
349 	if (!offs)
350 		return 0;
351 
352 	/* Move to the beginning of the checksum*/
353 	data += offs;
354 
355 	info = hash_get_function_desc(function);
356 
357 	/* Return the number of required bytes*/
358 	if (!checksum)
359 		return info->size*2+1;
360 
361 	/* If the terminator is not found at the right position,*/
362 	/*  return a full-zero checksum and warn about it. This is mainly*/
363 	/*  for developers putting checksums of '0' or '1' to ask MAME*/
364 	/*  to compute the correct values for them.*/
365 	if (data[info->size*2] != '#')
366 	{
367 		memset(checksum, '0', info->size*2);
368 		checksum[info->size*2] = '\0';
369 		return 2;
370 	}
371 
372 	/* If it contains invalid hexadecimal characters,*/
373 	/*  treat the checksum as zero and return warning*/
374 	for (i=0;i<info->size*2;i++)
375 		if (!(data[i]>='0' && data[i]<='9') &&
376 			!(data[i]>='a' && data[i]<='f') &&
377 			!(data[i]>='A' && data[i]<='F'))
378 		{
379 			memset(checksum, '0', info->size*2);
380 			checksum[info->size*2] = '\0';
381 			return 2;
382 		}
383 
384 	/* Copy the checksum (and make it lowercase)*/
385 	for (i=0;i<info->size*2;i++)
386 		checksum[i] = tolower(data[i]);
387 
388 	checksum[info->size*2] = '\0';
389 
390 	return 1;
391 }
392 
hash_data_extract_binary_checksum(const char * data,unsigned int function,unsigned char * checksum)393 int hash_data_extract_binary_checksum(const char* data, unsigned int function, unsigned char* checksum)
394 {
395 	unsigned int i;
396 	hash_function_desc* info;
397 	int offs;
398 
399 	/* Check if the hashdata contains the requested function*/
400 	offs = hash_data_has_checksum(data, function);
401 
402 	if (!offs)
403 		return 0;
404 
405 	/* Move to the beginning of the checksum*/
406 	data += offs;
407 
408 	info = hash_get_function_desc(function);
409 
410 	/* Return the number of required bytes*/
411 	if (!checksum)
412 		return info->size;
413 
414 	/* Clear the checksum array*/
415 	memset(checksum, 0, info->size);
416 
417 	/* If the terminator is not found at the right position,*/
418 	/*  return a full-zero checksum and warn about it. This is mainly*/
419 	/*  for developers putting checksums of '0' or '1' to ask MAME*/
420 	/*  to compute the correct values for them.*/
421 	if (data[info->size*2] != '#')
422 	{
423 		memset(checksum, '\0', info->size);
424 		return 2;
425 	}
426 
427 	/* Convert hex string into binary*/
428 	for (i=0;i<info->size*2;i++)
429 	{
430 		char c = tolower(*data++);
431 
432 		if (c >= '0' && c <= '9')
433 			c -= '0';
434 		else if (c >= 'a' && c <= 'f')
435 			c -= 'a' - 10;
436 		else if (c >= 'A' && c <= 'F')
437 			c -= 'A' - 10;
438 		else
439 		{
440 			/* Invalid character: the checksum is treated as zero,*/
441 			/*  and a warning is returned*/
442 			memset(checksum, '\0', info->size);
443 			return 2;
444 		}
445 
446 		if (i % 2 == 0)
447 			checksum[i / 2] = c * 16;
448 		else
449 			checksum[i / 2] += c;
450 	}
451 
452 	return 1;
453 }
454 
hash_data_has_info(const char * data,unsigned int info)455 int hash_data_has_info(const char* data, unsigned int info)
456 {
457 	char* res = strstr(data, info_strings[info]);
458 
459 	if (!res)
460 		return 0;
461 
462 	return 1;
463 }
464 
hash_data_copy(char * dst,const char * src)465 void hash_data_copy(char* dst, const char* src)
466 {
467 	/* Copying string is enough*/
468 	strcpy(dst, src);
469 }
470 
hash_data_clear(char * dst)471 void hash_data_clear(char* dst)
472 {
473 	/* Clear the buffer*/
474 	memset(dst, 0, HASH_BUF_SIZE);
475 }
476 
hash_data_used_functions(const char * data)477 unsigned int hash_data_used_functions(const char* data)
478 {
479 	int i;
480 	unsigned int res = 0;
481 
482 	if (!data)
483 		return 0;
484 
485 	for (i=0;i<HASH_NUM_FUNCTIONS;i++)
486 		if (hash_data_has_checksum(data, 1<<i))
487 			res |= 1<<i;
488 
489 	return res;
490 }
491 
hash_data_insert_binary_checksum(char * d,unsigned int function,UINT8 * checksum)492 int hash_data_insert_binary_checksum(char* d, unsigned int function, UINT8* checksum)
493 {
494 	int offset;
495 
496 	offset = hash_data_has_checksum(d, function);
497 
498 	if (!offset)
499 	{
500 		d += strlen(d);
501 		d += hash_data_add_binary_checksum(d, function, checksum);
502 		*d = '\0';
503 
504 		return 1;
505 	}
506 	else
507 	{
508 		/* Move to the start of the whole checksum signature, not only to the checksum*/
509 		/* itself*/
510 		d += offset - 2;
511 
512 		/* Overwrite previous checksum with new one*/
513 		hash_data_add_binary_checksum(d, function, checksum);
514 
515 		return 2;
516 	}
517 }
518 
hash_compute(char * dst,const unsigned char * data,unsigned long length,unsigned int functions)519 void hash_compute(char* dst, const unsigned char* data, unsigned long length, unsigned int functions)
520 {
521 	int i;
522 
523 	hash_data_clear(dst);
524 
525 	/* Zero means use all the functions*/
526 	if (functions == 0)
527 		functions = ~functions;
528 
529 	for (i=0;i<HASH_NUM_FUNCTIONS;i++)
530 	{
531 		unsigned func = 1 << i;
532 
533 		if (functions & func)
534 		{
535 			hash_function_desc* desc = hash_get_function_desc(func);
536 			UINT8 chksum[256];
537 
538 			desc->calculate_begin();
539 			desc->calculate_buffer(data, length);
540 			desc->calculate_end(chksum);
541 
542 			dst += hash_data_add_binary_checksum(dst, func, chksum);
543 		}
544 	}
545 
546 	*dst = '\0';
547 }
548 
hash_data_print(const char * data,unsigned int functions,char * buffer)549 void hash_data_print(const char* data, unsigned int functions, char* buffer)
550 {
551 	int i, j;
552 	char first = 1;
553 
554 	if (functions == 0)
555 		functions = ~functions;
556 
557 	buffer[0] = '\0';
558 
559 	for (i=0;i<HASH_NUM_FUNCTIONS;i++)
560 	{
561 		unsigned func = 1 << i;
562 
563 		if ((functions & func) && hash_data_has_checksum(data, func))
564 		{
565 			char temp[256];
566 
567 			if (!first)
568 				strcat(buffer, " ");
569 			first = 0;
570 
571 			strcpy(temp, hash_function_name(func));
572 			for (j = 0; temp[j]; j++)
573 				temp[j] = toupper(temp[j]);
574 			strcat(buffer, temp);
575 			strcat(buffer, "(");
576 
577 			hash_data_extract_printable_checksum(data, func, temp);
578 			strcat(buffer, temp);
579 			strcat(buffer, ")");
580 		}
581 	}
582 }
583 
hash_verify_string(const char * hash)584 int hash_verify_string(const char *hash)
585 {
586 	int len, i;
587 
588 	if (!hash)
589 		return 0;
590 
591 	while(*hash)
592 	{
593 		if (*hash == '$')
594 		{
595 			if (memcmp(hash, NO_DUMP, 4) && memcmp(hash, BAD_DUMP, 4))
596 				return 0;
597 			hash += 4;
598 		}
599 		else
600 		{
601 			/* first make sure that the next char is a colon */
602 			if (hash[1] != ':')
603 				return 0;
604 
605 			/* search for a hash function for this code */
606 			for (i = 0; i < sizeof(hash_descs) / sizeof(hash_descs[0]); i++)
607 			{
608 				if (*hash == hash_descs[i].code)
609 					break;
610 			}
611 			if (i >= sizeof(hash_descs) / sizeof(hash_descs[0]))
612 				return 0;
613 
614 			/* we have a proper code */
615 			len = hash_descs[i].size * 2;
616 			hash += 2;
617 
618 			for (i = 0; (hash[i] != '#') && (i < len); i++)
619 			{
620 				if (!isxdigit(hash[i]))
621 					return 0;
622 			}
623 			if (hash[i] != '#')
624 				return 0;
625 
626 			hash += i+1;
627 		}
628 	}
629 
630 	return 1;
631 }
632 
633 
634 
635 /*********************************************************************
636 	Hash functions - Wrappers
637  *********************************************************************/
638 
639 static UINT32 crc;
640 
h_crc_begin(void)641 static void h_crc_begin(void)
642 {
643 	crc = 0;
644 }
645 
h_crc_buffer(const void * mem,unsigned long len)646 static void h_crc_buffer(const void* mem, unsigned long len)
647 {
648 	crc = crc32(crc, (UINT8*)mem, len);
649 }
650 
h_crc_end(UINT8 * bin_chksum)651 static void h_crc_end(UINT8* bin_chksum)
652 {
653 	bin_chksum[0] = (UINT8)(crc >> 24);
654 	bin_chksum[1] = (UINT8)(crc >> 16);
655 	bin_chksum[2] = (UINT8)(crc >> 8);
656 	bin_chksum[3] = (UINT8)(crc >> 0);
657 }
658 
659 
660 struct sha1_ctx sha1ctx;
661 
h_sha1_begin(void)662 static void h_sha1_begin(void)
663 {
664 	sha1_init(&sha1ctx);
665 }
666 
h_sha1_buffer(const void * mem,unsigned long len)667 static void h_sha1_buffer(const void* mem, unsigned long len)
668 {
669 	sha1_update(&sha1ctx, len, (UINT8*)mem);
670 }
671 
h_sha1_end(UINT8 * bin_chksum)672 static void h_sha1_end(UINT8* bin_chksum)
673 {
674 	sha1_final(&sha1ctx);
675 	sha1_digest(&sha1ctx, 20, bin_chksum);
676 }
677 
678 
679 static MD5_CTX md5;
680 
h_md5_begin(void)681 static void h_md5_begin(void)
682 {
683 #ifndef HAVE_LIBNX // Add hw crypto later, works without
684 	MD5_Init(&md5);
685 #endif
686 }
687 
h_md5_buffer(const void * mem,unsigned long len)688 static void h_md5_buffer(const void* mem, unsigned long len)
689 {
690 #ifndef HAVE_LIBNX // Add hw crypto later, works without
691 	MD5_Update(&md5, (md5byte*)mem, len);
692 #endif
693 }
694 
h_md5_end(UINT8 * bin_chksum)695 static void h_md5_end(UINT8* bin_chksum)
696 {
697 #ifndef HAVE_LIBNX // Add hw crypto later, works without
698 	MD5_Final(bin_chksum, &md5);
699 #endif
700 }
701