1 /*********************************************************************
2
3 hash.c
4
5 Function to handle hash functions (checksums)
6
7 Started by Farfetch'd
8
9 *********************************************************************/
10
11 /*
12 * Changelog:
13 *
14 * 20030314: Farfetch'd
15 * First release
16 *
17 */
18
19 /*
20 * DONE:
21 *
22 * hash.c/h: New files, implement the new hashing engine with flexible
23 * support for more functions (for now, CRC, SHA1 and MD5).
24 *
25 * common.h: transparently support the new RomModule structure through
26 * ROM_* macros, so that old the legacy code still work
27 *
28 * common.c: updated ROM loading engine to support the new hash engine,
29 * using it to verify ROM integrity at load-time. Updated printromlist()
30 * (-listroms) to dump all the available checksums, and if a ROM is
31 * known to be bad or not.
32 *
33 * info.c: -listinfo now supports any hashing function correctly
34 * (both text and XML mode). Notice that XML header should be
35 * rewritten to automatically define the new tags when new
36 * functions are added, but I couldn't be bothered for now.
37 * It also displays informations about baddump/nodump
38 *
39 * audit.c/h: Updated audit engine (-verifyroms) to use the new
40 * hash functions.
41 *
42 * fileio.c/h: Updated file engine to use the new hash functions.
43 * It is now possible to load by any specified checksum (in case
44 * later we support other archivers with SHA1 signatures or
45 * equivalent). If the file is open with flag VERIFY_ONLY and
46 * the file is within an archive (zip), only the checksums
47 * available in the archive header are used.
48 *
49 * windows/fronthlp.c: Updated -identrom to the new hash engine, now
50 * support any hash function available.
51 * Added -crconly to disable advanced integrity checks.
52 * This should be needed for people with very slow computers
53 * whose loading time is affected too much by the new hashing
54 * calculations (hello, stephh).
55 * This also means that for -identrom MAME will not have to
56 * decompress the ROM from the ZIP to calculate checksum
57 * informations, since the CRC will be extracted from the header.
58 * Added -listsha1 and -listmd5. It would be possible to add
59 * also a -listbad now, to list bad dumps (ROMS we need a
60 * redump for)
61 * Updated -listdupcrc to check for all the available checksums.
62 * The output is also a bit more useful and readable, even if it
63 * is still not optimal.
64 * Update -listwrongmerge to check for all the available checksums.
65 *
66 * mame.h: Added new field crc_only to struct GameOptions.
67 *
68 * windows/config.c: Added new option -crconly
69 *
70 * windows/fileio.c: Removed check for FILE_TYPE_NOCRC (does not exist
71 * anymore).
72 *
73 *
74 *
75 * Technical details:
76 *
77 * Checksum informations are now stored inside a string. They are
78 * stored in "printable hex format", which means that they use
79 * more memory than before (since a CRC needs 8 characters to
80 * be printed, instead of 4 bytes of raw information). In the
81 * driver, they are defined with handy macros which rely on
82 * automatic string pasting.
83 *
84 * Additional flags can also be stored in the string: for now we
85 * support NO_DUMP and BAD_DUMP, which replace, respectively,
86 * a CRC of 0 and a bit-inverted CRC.
87 *
88 * All the code that handles hash data is in hash.c. The rest of
89 * the core treats the data as an 'opaque type', so that the
90 * pointers are just passed along through functions but no
91 * operation is performed on the data outside hash.c. This
92 * is important in case we want to change the string
93 * representation later in the future.
94 *
95 * When loading a ROM, MAME will calculate and compare the
96 * checksum using any function for which the driver has declared
97 * an expected checksum. This happens because it would be useless
98 * to calculate a checksum if we cannot verify its correctness.
99 * For developers, it also means that MAME will not compute the
100 * SHA1 for you unless you specify a bogus one in the driver
101 * (like SHA1(0)).
102 *
103 * When verifying a ROM, MAME will use only the checksums available
104 * in the archive header (if zip, CRC). This is by design because
105 * -verifyroms has always been very fast. It is feasible to add
106 * a -fullverifyroms at a later moment, which will decompress the
107 * files and compute every checksum that has been declared in the
108 * driver.
109 *
110 * I have also prepared a little tool (SHA1Merger) which takes care
111 * of the following tasks:
112 *
113 * - Given an existing driver in old syntax (0.66 compatible), it will
114 * convert all the existing ROM_LOAD entries in the new format, and
115 * it will automatically compute and add SHA1 checksum for you if
116 * it can find the romset.
117 *
118 * - Given a romset (ZIP file), it will prepare a ROM definition
119 * skeleton for a driver, containing already rom names, lengths, and
120 * checksums (both CRC and SHA1).
121 *
122 * The tool is available on www.mame.net as platform-independent source code
123 * (in Python), or win32 standalone executable.
124 *
125 */
126
127 #include <stddef.h>
128 #include <ctype.h>
129 #include <string.h>
130 #include <stdlib.h>
131 #include <compat/zlib.h>
132 #include "hash.h"
133 #include <utils/md5.h>
134 #include "mame_sha1.h"
135 #include "osd_cpu.h"
136 #include "mame.h"
137 #include "common.h"
138
139 #define ASSERT(x)
140
141 typedef struct
142 {
143 const char* name; /* human-readable name*/
144 char code; /* single-char code used within the hash string*/
145 unsigned int size; /* checksum size in bytes*/
146
147 /* Functions used to calculate the hash of a memory block*/
148 void (*calculate_begin)(void);
149 void (*calculate_buffer)(const void* mem, unsigned long len);
150 void (*calculate_end)(UINT8* bin_chksum);
151
152 } hash_function_desc;
153
154 static void h_crc_begin(void);
155 static void h_crc_buffer(const void* mem, unsigned long len);
156 static void h_crc_end(UINT8* chksum);
157
158 static void h_sha1_begin(void);
159 static void h_sha1_buffer(const void* mem, unsigned long len);
160 static void h_sha1_end(UINT8* chksum);
161
162 static void h_md5_begin(void);
163 static void h_md5_buffer(const void* mem, unsigned long len);
164 static void h_md5_end(UINT8* chksum);
165
166 static hash_function_desc hash_descs[HASH_NUM_FUNCTIONS] =
167 {
168 {
169 "crc", 'c', 4,
170 h_crc_begin,
171 h_crc_buffer,
172 h_crc_end
173 },
174
175 {
176 "sha1", 's', 20,
177 h_sha1_begin,
178 h_sha1_buffer,
179 h_sha1_end
180 },
181
182 {
183 "md5", 'm', 16,
184 h_md5_begin,
185 h_md5_buffer,
186 h_md5_end
187 },
188 };
189
190 const char* info_strings[] =
191 {
192 "$ND$", /* No dump*/
193 "$BD$" /* Bad dump*/
194 };
195
196 static const char* binToStr = "0123456789abcdef";
197
198
hash_get_function_desc(unsigned int function)199 static hash_function_desc* hash_get_function_desc(unsigned int function)
200 {
201 unsigned int idx = 0;
202
203 /* Calling with zero in here is mostly an internal error*/
204 ASSERT(function != 0);
205
206 /* Compute the index of only one function*/
207 while (!(function & 1))
208 {
209 idx++;
210 function >>= 1;
211 }
212
213 /* Specify only one bit or die*/
214 ASSERT(function == 1);
215
216 return &hash_descs[idx];
217 }
218
hash_function_name(unsigned int function)219 const char* hash_function_name(unsigned int function)
220 {
221 hash_function_desc* info = hash_get_function_desc(function);
222
223 return info->name;
224 }
225
hash_data_has_checksum(const char * data,unsigned int function)226 int hash_data_has_checksum(const char* data, unsigned int function)
227 {
228 hash_function_desc* info = hash_get_function_desc(function);
229 char str[3];
230 const char* res;
231
232 str[0] = info->code;
233 str[1] = ':';
234 str[2] = '\0';
235
236 /* Check if the specified hash function is used within this data*/
237 res = strstr(data, str);
238
239 if (!res)
240 return 0;
241
242 /* Return the offset within the string where the checksum begins*/
243 return (res - data + 2);
244 }
245
hash_data_add_binary_checksum(char * d,unsigned int function,UINT8 * checksum)246 static int hash_data_add_binary_checksum(char* d, unsigned int function, UINT8* checksum)
247 {
248 hash_function_desc* desc = hash_get_function_desc(function);
249 char* start = d;
250 unsigned i;
251
252 *d++ = desc->code;
253 *d++ = ':';
254
255 for (i=0;i<desc->size;i++)
256 {
257 UINT8 c = *checksum++;
258
259 *d++ = binToStr[(c >> 4) & 0xF];
260 *d++ = binToStr[(c >> 0) & 0xF];
261 }
262
263 *d++ = '#';
264
265 /* Return the number of written bytes*/
266 return (d - start);
267 }
268
269
hash_compare_checksum(const char * chk1,const char * chk2,int length)270 static int hash_compare_checksum(const char* chk1, const char* chk2, int length)
271 {
272 char c1, c2;
273
274 /* The printable format is twice as longer*/
275 length *= 2;
276
277 /* This is basically a case-insensitive string compare*/
278 while (length--)
279 {
280 c1 = *chk1++;
281 c2 = *chk2++;
282
283 if (tolower(c1) != tolower(c2))
284 return 0;
285 if (!c1)
286 return 0;
287 }
288
289 return 1;
290 }
291
292
293 /* Compare two hashdata*/
hash_data_is_equal(const char * d1,const char * d2,unsigned int functions)294 int hash_data_is_equal(const char* d1, const char* d2, unsigned int functions)
295 {
296 int i;
297 char incomplete = 0;
298 char ok = 0;
299
300 /* If no function is specified, it means we need to check for all*/
301 /* of them*/
302 if (!functions)
303 functions = ~functions;
304
305 for (i=1; i != (1<<HASH_NUM_FUNCTIONS); i<<=1)
306 if (functions & i)
307 {
308 int offs1, offs2;
309
310 /* Check if both hashdata contain the current function's checksum*/
311 offs1 = hash_data_has_checksum(d1, i);
312 offs2 = hash_data_has_checksum(d2, i);
313
314 if (offs1 && offs2)
315 {
316 hash_function_desc* info = hash_get_function_desc(i);
317
318 if (!hash_compare_checksum(d1+offs1, d2+offs2, info->size))
319 return 0;
320
321 ok = 1;
322 }
323 /* If the function was contained only in one, remember that our comparison*/
324 /* is incomplete*/
325 else if (offs1 || offs2)
326 {
327 incomplete = 1;
328 }
329 }
330
331 /* If we could not compare any function, return error*/
332 if (!ok)
333 return 0;
334
335 /* Return success code*/
336 return (incomplete ? 2 : 1);
337 }
338
339
hash_data_extract_printable_checksum(const char * data,unsigned int function,char * checksum)340 int hash_data_extract_printable_checksum(const char* data, unsigned int function, char* checksum)
341 {
342 unsigned int i;
343 hash_function_desc* info;
344 int offs;
345
346 /* Check if the hashdata contains the requested function*/
347 offs = hash_data_has_checksum(data, function);
348
349 if (!offs)
350 return 0;
351
352 /* Move to the beginning of the checksum*/
353 data += offs;
354
355 info = hash_get_function_desc(function);
356
357 /* Return the number of required bytes*/
358 if (!checksum)
359 return info->size*2+1;
360
361 /* If the terminator is not found at the right position,*/
362 /* return a full-zero checksum and warn about it. This is mainly*/
363 /* for developers putting checksums of '0' or '1' to ask MAME*/
364 /* to compute the correct values for them.*/
365 if (data[info->size*2] != '#')
366 {
367 memset(checksum, '0', info->size*2);
368 checksum[info->size*2] = '\0';
369 return 2;
370 }
371
372 /* If it contains invalid hexadecimal characters,*/
373 /* treat the checksum as zero and return warning*/
374 for (i=0;i<info->size*2;i++)
375 if (!(data[i]>='0' && data[i]<='9') &&
376 !(data[i]>='a' && data[i]<='f') &&
377 !(data[i]>='A' && data[i]<='F'))
378 {
379 memset(checksum, '0', info->size*2);
380 checksum[info->size*2] = '\0';
381 return 2;
382 }
383
384 /* Copy the checksum (and make it lowercase)*/
385 for (i=0;i<info->size*2;i++)
386 checksum[i] = tolower(data[i]);
387
388 checksum[info->size*2] = '\0';
389
390 return 1;
391 }
392
hash_data_extract_binary_checksum(const char * data,unsigned int function,unsigned char * checksum)393 int hash_data_extract_binary_checksum(const char* data, unsigned int function, unsigned char* checksum)
394 {
395 unsigned int i;
396 hash_function_desc* info;
397 int offs;
398
399 /* Check if the hashdata contains the requested function*/
400 offs = hash_data_has_checksum(data, function);
401
402 if (!offs)
403 return 0;
404
405 /* Move to the beginning of the checksum*/
406 data += offs;
407
408 info = hash_get_function_desc(function);
409
410 /* Return the number of required bytes*/
411 if (!checksum)
412 return info->size;
413
414 /* Clear the checksum array*/
415 memset(checksum, 0, info->size);
416
417 /* If the terminator is not found at the right position,*/
418 /* return a full-zero checksum and warn about it. This is mainly*/
419 /* for developers putting checksums of '0' or '1' to ask MAME*/
420 /* to compute the correct values for them.*/
421 if (data[info->size*2] != '#')
422 {
423 memset(checksum, '\0', info->size);
424 return 2;
425 }
426
427 /* Convert hex string into binary*/
428 for (i=0;i<info->size*2;i++)
429 {
430 char c = tolower(*data++);
431
432 if (c >= '0' && c <= '9')
433 c -= '0';
434 else if (c >= 'a' && c <= 'f')
435 c -= 'a' - 10;
436 else if (c >= 'A' && c <= 'F')
437 c -= 'A' - 10;
438 else
439 {
440 /* Invalid character: the checksum is treated as zero,*/
441 /* and a warning is returned*/
442 memset(checksum, '\0', info->size);
443 return 2;
444 }
445
446 if (i % 2 == 0)
447 checksum[i / 2] = c * 16;
448 else
449 checksum[i / 2] += c;
450 }
451
452 return 1;
453 }
454
hash_data_has_info(const char * data,unsigned int info)455 int hash_data_has_info(const char* data, unsigned int info)
456 {
457 char* res = strstr(data, info_strings[info]);
458
459 if (!res)
460 return 0;
461
462 return 1;
463 }
464
hash_data_copy(char * dst,const char * src)465 void hash_data_copy(char* dst, const char* src)
466 {
467 /* Copying string is enough*/
468 strcpy(dst, src);
469 }
470
hash_data_clear(char * dst)471 void hash_data_clear(char* dst)
472 {
473 /* Clear the buffer*/
474 memset(dst, 0, HASH_BUF_SIZE);
475 }
476
hash_data_used_functions(const char * data)477 unsigned int hash_data_used_functions(const char* data)
478 {
479 int i;
480 unsigned int res = 0;
481
482 if (!data)
483 return 0;
484
485 for (i=0;i<HASH_NUM_FUNCTIONS;i++)
486 if (hash_data_has_checksum(data, 1<<i))
487 res |= 1<<i;
488
489 return res;
490 }
491
hash_data_insert_binary_checksum(char * d,unsigned int function,UINT8 * checksum)492 int hash_data_insert_binary_checksum(char* d, unsigned int function, UINT8* checksum)
493 {
494 int offset;
495
496 offset = hash_data_has_checksum(d, function);
497
498 if (!offset)
499 {
500 d += strlen(d);
501 d += hash_data_add_binary_checksum(d, function, checksum);
502 *d = '\0';
503
504 return 1;
505 }
506 else
507 {
508 /* Move to the start of the whole checksum signature, not only to the checksum*/
509 /* itself*/
510 d += offset - 2;
511
512 /* Overwrite previous checksum with new one*/
513 hash_data_add_binary_checksum(d, function, checksum);
514
515 return 2;
516 }
517 }
518
hash_compute(char * dst,const unsigned char * data,unsigned long length,unsigned int functions)519 void hash_compute(char* dst, const unsigned char* data, unsigned long length, unsigned int functions)
520 {
521 int i;
522
523 hash_data_clear(dst);
524
525 /* Zero means use all the functions*/
526 if (functions == 0)
527 functions = ~functions;
528
529 for (i=0;i<HASH_NUM_FUNCTIONS;i++)
530 {
531 unsigned func = 1 << i;
532
533 if (functions & func)
534 {
535 hash_function_desc* desc = hash_get_function_desc(func);
536 UINT8 chksum[256];
537
538 desc->calculate_begin();
539 desc->calculate_buffer(data, length);
540 desc->calculate_end(chksum);
541
542 dst += hash_data_add_binary_checksum(dst, func, chksum);
543 }
544 }
545
546 *dst = '\0';
547 }
548
hash_data_print(const char * data,unsigned int functions,char * buffer)549 void hash_data_print(const char* data, unsigned int functions, char* buffer)
550 {
551 int i, j;
552 char first = 1;
553
554 if (functions == 0)
555 functions = ~functions;
556
557 buffer[0] = '\0';
558
559 for (i=0;i<HASH_NUM_FUNCTIONS;i++)
560 {
561 unsigned func = 1 << i;
562
563 if ((functions & func) && hash_data_has_checksum(data, func))
564 {
565 char temp[256];
566
567 if (!first)
568 strcat(buffer, " ");
569 first = 0;
570
571 strcpy(temp, hash_function_name(func));
572 for (j = 0; temp[j]; j++)
573 temp[j] = toupper(temp[j]);
574 strcat(buffer, temp);
575 strcat(buffer, "(");
576
577 hash_data_extract_printable_checksum(data, func, temp);
578 strcat(buffer, temp);
579 strcat(buffer, ")");
580 }
581 }
582 }
583
hash_verify_string(const char * hash)584 int hash_verify_string(const char *hash)
585 {
586 int len, i;
587
588 if (!hash)
589 return 0;
590
591 while(*hash)
592 {
593 if (*hash == '$')
594 {
595 if (memcmp(hash, NO_DUMP, 4) && memcmp(hash, BAD_DUMP, 4))
596 return 0;
597 hash += 4;
598 }
599 else
600 {
601 /* first make sure that the next char is a colon */
602 if (hash[1] != ':')
603 return 0;
604
605 /* search for a hash function for this code */
606 for (i = 0; i < sizeof(hash_descs) / sizeof(hash_descs[0]); i++)
607 {
608 if (*hash == hash_descs[i].code)
609 break;
610 }
611 if (i >= sizeof(hash_descs) / sizeof(hash_descs[0]))
612 return 0;
613
614 /* we have a proper code */
615 len = hash_descs[i].size * 2;
616 hash += 2;
617
618 for (i = 0; (hash[i] != '#') && (i < len); i++)
619 {
620 if (!isxdigit(hash[i]))
621 return 0;
622 }
623 if (hash[i] != '#')
624 return 0;
625
626 hash += i+1;
627 }
628 }
629
630 return 1;
631 }
632
633
634
635 /*********************************************************************
636 Hash functions - Wrappers
637 *********************************************************************/
638
639 static UINT32 crc;
640
h_crc_begin(void)641 static void h_crc_begin(void)
642 {
643 crc = 0;
644 }
645
h_crc_buffer(const void * mem,unsigned long len)646 static void h_crc_buffer(const void* mem, unsigned long len)
647 {
648 crc = crc32(crc, (UINT8*)mem, len);
649 }
650
h_crc_end(UINT8 * bin_chksum)651 static void h_crc_end(UINT8* bin_chksum)
652 {
653 bin_chksum[0] = (UINT8)(crc >> 24);
654 bin_chksum[1] = (UINT8)(crc >> 16);
655 bin_chksum[2] = (UINT8)(crc >> 8);
656 bin_chksum[3] = (UINT8)(crc >> 0);
657 }
658
659
660 struct sha1_ctx sha1ctx;
661
h_sha1_begin(void)662 static void h_sha1_begin(void)
663 {
664 sha1_init(&sha1ctx);
665 }
666
h_sha1_buffer(const void * mem,unsigned long len)667 static void h_sha1_buffer(const void* mem, unsigned long len)
668 {
669 sha1_update(&sha1ctx, len, (UINT8*)mem);
670 }
671
h_sha1_end(UINT8 * bin_chksum)672 static void h_sha1_end(UINT8* bin_chksum)
673 {
674 sha1_final(&sha1ctx);
675 sha1_digest(&sha1ctx, 20, bin_chksum);
676 }
677
678
679 static MD5_CTX md5;
680
h_md5_begin(void)681 static void h_md5_begin(void)
682 {
683 #ifndef HAVE_LIBNX // Add hw crypto later, works without
684 MD5_Init(&md5);
685 #endif
686 }
687
h_md5_buffer(const void * mem,unsigned long len)688 static void h_md5_buffer(const void* mem, unsigned long len)
689 {
690 #ifndef HAVE_LIBNX // Add hw crypto later, works without
691 MD5_Update(&md5, (md5byte*)mem, len);
692 #endif
693 }
694
h_md5_end(UINT8 * bin_chksum)695 static void h_md5_end(UINT8* bin_chksum)
696 {
697 #ifndef HAVE_LIBNX // Add hw crypto later, works without
698 MD5_Final(bin_chksum, &md5);
699 #endif
700 }
701