1 /*
2 * The Sleuth Kit
3 *
4 * Brian Carrier [carrier <at> sleuthkit [dot] org]
5 * Copyright (c) 2003-2014 Brian Carrier.  All rights reserved
6 *
7 *
8 * This software is distributed under the Common Public License 1.0
9 */
10 
11 /**
12 * \file md5sum.c
13 * Contains the MD5sum hash database specific extraction and printing routines.
14 */
15 
16 #include "tsk_hashdb_i.h"
17 
18 #define STR_EMPTY ""
19 
20 /**
21 * Test the file to see if it is a md5sum database
22 *
23 * @param hFile File handle to hash database
24 *
25 * @return 1 if md5sum and 0 if not
26 */
27 uint8_t
md5sum_test(FILE * hFile)28     md5sum_test(FILE * hFile)
29 {
30     char buf[TSK_HDB_MAXLEN];
31 
32     fseeko(hFile, 0, SEEK_SET);
33     if (NULL == fgets(buf, TSK_HDB_MAXLEN, hFile))
34         return 0;
35 
36     if (strlen(buf) < TSK_HDB_HTYPE_MD5_LEN)
37         return 0;
38 
39     if ((buf[0] == 'M') && (buf[1] == 'D') &&
40         (buf[2] == '5') && (buf[3] == ' ') && (buf[4] == '(')) {
41             return 1;
42     }
43 
44     if ((isxdigit((int) buf[0]))
45         && (isxdigit((int) buf[TSK_HDB_HTYPE_MD5_LEN - 1]))
46         && (isspace((int) buf[TSK_HDB_HTYPE_MD5_LEN]))) {
47             return 1;
48     }
49 
50     return 0;
51 }
52 
md5sum_open(FILE * hDb,const TSK_TCHAR * db_path)53 TSK_HDB_INFO *md5sum_open(FILE *hDb, const TSK_TCHAR *db_path)
54 {
55     TSK_HDB_BINSRCH_INFO *hdb_binsrch_info = NULL;
56 
57     // get the basic binary-search info struct
58     hdb_binsrch_info = hdb_binsrch_open(hDb, db_path);
59     if (NULL == hdb_binsrch_info) {
60         return NULL;
61     }
62 
63     // overwrite with more specific methods
64     hdb_binsrch_info->base.db_type = TSK_HDB_DBTYPE_MD5SUM_ID;
65     hdb_binsrch_info->get_entry = md5sum_getentry;
66     hdb_binsrch_info->base.make_index = md5sum_makeindex;
67 
68     return (TSK_HDB_INFO*)hdb_binsrch_info;
69 }
70 
71 /**
72 * Given a line of text from an MD5sum database, return pointers
73 * to the start start of the name and MD5 hash values (original
74 * string will have NULL values in it).
75 *
76 * @param [in]Input string from database -- THIS WILL BE MODIFIED
77 * @param [out] Will contain a pointer to MD5 value in input string
78 * @param [out] Will contain a pointer to name value in input string (input could be NULL)
79 *
80 * @return 1 on error and 0 on success
81 */
82 static uint8_t
md5sum_parse_md5(char * str,char ** md5,char ** name)83     md5sum_parse_md5(char *str, char **md5, char **name)
84 {
85     char *ptr;
86 
87     if (strlen(str) < TSK_HDB_HTYPE_MD5_LEN + 1) {
88         tsk_error_reset();
89         tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
90         tsk_error_set_errstr(
91             "md5sum_parse_md5: String is too short: %s", str);
92         return 1;
93     }
94 
95     /* Format of: MD5      NAME  or even just the MD5 value */
96     if ((isxdigit((int) str[0]))
97         && (isxdigit((int) str[TSK_HDB_HTYPE_MD5_LEN - 1]))
98         && (isspace((int) str[TSK_HDB_HTYPE_MD5_LEN]))) {
99             unsigned int i;
100             size_t len = strlen(str);
101 
102             if (md5 != NULL) {
103                 *md5 = &str[0];
104             }
105             i = TSK_HDB_HTYPE_MD5_LEN;
106             str[i++] = '\0';
107 
108             /* Just the MD5 values */
109             if (i >= len) {
110                 if (name != NULL) {
111                     *name = STR_EMPTY;
112                 }
113                 return 0;
114             }
115 
116             while ((i < len) && ((str[i] == ' ') || (str[i] == '\t'))) {
117                 i++;
118             }
119 
120             if ((len == i) || (str[i] == '\n')) {
121                 return 0;
122             }
123 
124             if (str[i] == '*') {
125                 i++;
126             }
127 
128             if (name != NULL) {
129                 *name = &str[i];
130             }
131             ptr = &str[i];
132 
133             if (ptr[strlen(ptr) - 1] == '\n')
134                 ptr[strlen(ptr) - 1] = '\0';
135     }
136 
137     /* Format of: MD5 (NAME) = MD5 */
138     else if ((str[0] == 'M') && (str[1] == 'D') &&
139         (str[2] == '5') && (str[3] == ' ') && (str[4] == '(')) {
140 
141             ptr = &str[5];
142 
143             if (name != NULL) {
144                 *name = ptr;
145             }
146 
147             if (NULL == (ptr = strchr(ptr, ')'))) {
148                 tsk_error_reset();
149                 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
150                 tsk_error_set_errstr(
151                     "md5sum_parse_md5: Missing ) in name: %s", str);
152                 return 1;
153             }
154             *ptr = '\0';
155             ptr++;
156 
157 
158             if (4 + TSK_HDB_HTYPE_MD5_LEN > strlen(ptr)) {
159                 tsk_error_reset();
160                 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
161                 tsk_error_set_errstr(
162                     "md5sum_parse_md5: Invalid MD5 value: %s", ptr);
163                 return 1;
164             }
165 
166             if ((*(ptr) != ' ') || (*(++ptr) != '=') ||
167                 (*(++ptr) != ' ') || (!isxdigit((int) *(++ptr))) ||
168                 (ptr[TSK_HDB_HTYPE_MD5_LEN] != '\n')) {
169                     tsk_error_reset();
170                     tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
171                     tsk_error_set_errstr(
172                         "md5sum_parse_md5: Invalid hash value %s", ptr);
173                     return 1;
174             }
175 
176             *md5 = ptr;
177             ptr[TSK_HDB_HTYPE_MD5_LEN] = '\0';
178     }
179 
180     else {
181         tsk_error_reset();
182         tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
183         tsk_error_set_errstr(
184             "md5sum_parse_md5: Invalid md5sum format in file: %s\n",
185             str);
186         return 1;
187     }
188 
189     return 0;
190 }
191 
192 /**
193 * Process the database to create a sorted index of it. Consecutive
194 * entries with the same hash value are not added to the index, but
195 * will be found during lookup.
196 *
197 * @param hdb_info_base Hash database to make index of.
198 * @param dbtype Type of hash database (should always be TSK_HDB_DBTYPE_MD5SUM_STR)
199 *
200 * @return 1 on error and 0 on success.
201 */
202 uint8_t
md5sum_makeindex(TSK_HDB_INFO * hdb_info_base,TSK_TCHAR * dbtype)203     md5sum_makeindex(TSK_HDB_INFO *hdb_info_base, TSK_TCHAR * dbtype)
204 {
205     TSK_HDB_BINSRCH_INFO *hdb_info = (TSK_HDB_BINSRCH_INFO*)hdb_info_base;
206     int i;
207     char buf[TSK_HDB_MAXLEN];
208     char *hash = NULL, phash[TSK_HDB_HTYPE_MD5_LEN + 1];
209     TSK_OFF_T offset = 0;
210     int db_cnt = 0, idx_cnt = 0, ig_cnt = 0;
211     size_t len;
212 
213     /* Initialize the TSK index file */
214     if (hdb_binsrch_idx_initialize(hdb_info, dbtype)) {
215         tsk_error_set_errstr2( "md5sum_makeindex");
216         return 1;
217     }
218 
219     /* Status */
220     if (tsk_verbose)
221         TFPRINTF(stderr, _TSK_T("Extracting Data from Database (%s)\n"),
222         hdb_info->base.db_fname);
223 
224     /* Allocate a buffer for the previous hash value */
225     memset(phash, '0', TSK_HDB_HTYPE_MD5_LEN + 1);
226 
227     /* read the file and add to the index */
228     fseek(hdb_info->hDb, 0, SEEK_SET);
229     for (i = 0; NULL != fgets(buf, TSK_HDB_MAXLEN, hdb_info->hDb);
230         offset += (TSK_OFF_T) len, i++) {
231 
232             len = strlen(buf);
233 
234             /* Parse each line */
235             if (md5sum_parse_md5(buf, &hash, NULL)) {
236                 ig_cnt++;
237                 continue;
238             }
239             db_cnt++;
240 
241             /* We only want to add one of each hash to the index */
242             if (memcmp(hash, phash, TSK_HDB_HTYPE_MD5_LEN) == 0) {
243                 continue;
244             }
245 
246             /* Add the entry to the index */
247             if (hdb_binsrch_idx_add_entry_str(hdb_info, hash, offset)) {
248                 tsk_error_set_errstr2( "md5sum_makeindex");
249                 return 1;
250             }
251 
252             idx_cnt++;
253 
254             /* Set the previous has value */
255             strncpy(phash, hash, TSK_HDB_HTYPE_MD5_LEN + 1);
256     }
257 
258     if (idx_cnt > 0) {
259 
260         if (tsk_verbose) {
261             fprintf(stderr, "  Valid Database Entries: %d\n", db_cnt);
262             fprintf(stderr,
263                 "  Invalid Database Entries (headers or errors): %d\n",
264                 ig_cnt);
265             fprintf(stderr, "  Index File Entries %s: %d\n",
266                 (idx_cnt == db_cnt) ? "" : "(optimized)", idx_cnt);
267         }
268 
269         /* Close and sort the index */
270         if (hdb_binsrch_idx_finalize(hdb_info)) {
271             tsk_error_set_errstr2( "md5sum_makeindex");
272             return 1;
273         }
274     }
275     else {
276         tsk_error_reset();
277         tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
278         tsk_error_set_errstr(
279             "md5sum_makeindex: No valid entries found in database");
280         return 1;
281     }
282 
283     return 0;
284 }
285 
286 /**
287 * Find the corresponding name at a
288 * given offset.  The offset was likely determined from the index.
289 * The entries in the DB following the one specified are also processed
290 * if they have the same hash value and their name is different.
291 * The callback is called for each entry.
292 *
293 * @param hdb_info Hash database to get data from
294 * @param hash MD5 hash value that was searched for
295 * @param offset Byte offset where hash value should be located in db_file
296 * @param flags (not used)
297 * @param action Callback used for each entry found in lookup
298 * @param cb_ptr Pointer to data passed to callback
299 *
300 * @return 1 on error and 0 on succuss
301 */
302 uint8_t
md5sum_getentry(TSK_HDB_INFO * hdb_info,const char * hash,TSK_OFF_T offset,TSK_HDB_FLAG_ENUM flags,TSK_HDB_LOOKUP_FN action,void * cb_ptr)303     md5sum_getentry(TSK_HDB_INFO * hdb_info, const char *hash,
304     TSK_OFF_T offset, TSK_HDB_FLAG_ENUM flags,
305     TSK_HDB_LOOKUP_FN action, void *cb_ptr)
306 {
307     TSK_HDB_BINSRCH_INFO *hdb_binsrch_info = (TSK_HDB_BINSRCH_INFO*)hdb_info;
308     char buf[TSK_HDB_MAXLEN], *name, *ptr = NULL, pname[TSK_HDB_MAXLEN];
309     int found = 0;
310 
311     if (tsk_verbose)
312         fprintf(stderr,
313         "md5sum_getentry: Lookup up hash %s at offset %" PRIdOFF
314         "\n", hash, offset);
315 
316     if (strlen(hash) != TSK_HDB_HTYPE_MD5_LEN) {
317         tsk_error_reset();
318         tsk_error_set_errno(TSK_ERR_HDB_ARG);
319         tsk_error_set_errstr(
320             "md5sum_getentry: Invalid hash value: %s", hash);
321         return 1;
322     }
323 
324     memset(pname, '0', TSK_HDB_MAXLEN);
325 
326     /* Loop so that we can find multiple occurrences of the same hash */
327     while (1) {
328         size_t len;
329 
330         if (0 != fseeko(hdb_binsrch_info->hDb, offset, SEEK_SET)) {
331             tsk_error_reset();
332             tsk_error_set_errno(TSK_ERR_HDB_READDB);
333             tsk_error_set_errstr(
334                 "md5sum_getentry: Error seeking to get file name: %lu",
335                 (unsigned long) offset);
336             return 1;
337         }
338 
339         if (NULL == fgets(buf, TSK_HDB_MAXLEN, hdb_binsrch_info->hDb)) {
340             if (feof(hdb_binsrch_info->hDb)) {
341                 break;
342             }
343             tsk_error_reset();
344             tsk_error_set_errno(TSK_ERR_HDB_READDB);
345             tsk_error_set_errstr(
346                 "md5sum_getentry: Error reading database");
347             return 1;
348         }
349 
350         len = strlen(buf);
351         if (len < TSK_HDB_HTYPE_MD5_LEN) {
352             tsk_error_reset();
353             tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
354             tsk_error_set_errstr(
355                 "md5sum_getentry: Invalid entry in database (too short): %s",
356                 buf);
357             return 1;
358         }
359 
360         if (md5sum_parse_md5(buf, &ptr, &name)) {
361             tsk_error_reset();
362             tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
363             tsk_error_set_errstr(
364                 "md5sum_getentry: Invalid entry in database: %s",
365                 buf);
366             return 1;
367         }
368 
369         /* Is this the one that we want? */
370         if (0 != strcasecmp(ptr, hash)) {
371             break;
372         }
373 
374         if (strcmp(name, pname) != 0) {
375             int retval;
376             retval = action(hdb_info, hash, name, cb_ptr);
377             if (retval == TSK_WALK_ERROR) {
378                 return 1;
379             }
380             else if (retval == TSK_WALK_STOP) {
381                 return 0;
382             }
383             found = 1;
384             strncpy(pname, name, TSK_HDB_MAXLEN);
385         }
386 
387         /* Advance to the next row */
388         offset += len;
389     }
390 
391     if (found == 0) {
392         tsk_error_reset();
393         tsk_error_set_errno(TSK_ERR_HDB_ARG);
394         tsk_error_set_errstr(
395             "md5sum_getentry: Hash not found in file at offset: %lu",
396             (unsigned long) offset);
397         return 1;
398     }
399 
400     return 0;
401 }
402