1 /*
2 * The Sleuth Kit
3 *
4 * Brian Carrier [carrier <at> sleuthkit [dot] org]
5 * Copyright (c) 2003-2014 Brian Carrier. All rights reserved
6 *
7 *
8 * This software is distributed under the Common Public License 1.0
9 */
10
11 /**
12 * \file md5sum.c
13 * Contains the MD5sum hash database specific extraction and printing routines.
14 */
15
16 #include "tsk_hashdb_i.h"
17
18 #define STR_EMPTY ""
19
20 /**
21 * Test the file to see if it is a md5sum database
22 *
23 * @param hFile File handle to hash database
24 *
25 * @return 1 if md5sum and 0 if not
26 */
27 uint8_t
md5sum_test(FILE * hFile)28 md5sum_test(FILE * hFile)
29 {
30 char buf[TSK_HDB_MAXLEN];
31
32 fseeko(hFile, 0, SEEK_SET);
33 if (NULL == fgets(buf, TSK_HDB_MAXLEN, hFile))
34 return 0;
35
36 if (strlen(buf) < TSK_HDB_HTYPE_MD5_LEN)
37 return 0;
38
39 if ((buf[0] == 'M') && (buf[1] == 'D') &&
40 (buf[2] == '5') && (buf[3] == ' ') && (buf[4] == '(')) {
41 return 1;
42 }
43
44 if ((isxdigit((int) buf[0]))
45 && (isxdigit((int) buf[TSK_HDB_HTYPE_MD5_LEN - 1]))
46 && (isspace((int) buf[TSK_HDB_HTYPE_MD5_LEN]))) {
47 return 1;
48 }
49
50 return 0;
51 }
52
md5sum_open(FILE * hDb,const TSK_TCHAR * db_path)53 TSK_HDB_INFO *md5sum_open(FILE *hDb, const TSK_TCHAR *db_path)
54 {
55 TSK_HDB_BINSRCH_INFO *hdb_binsrch_info = NULL;
56
57 // get the basic binary-search info struct
58 hdb_binsrch_info = hdb_binsrch_open(hDb, db_path);
59 if (NULL == hdb_binsrch_info) {
60 return NULL;
61 }
62
63 // overwrite with more specific methods
64 hdb_binsrch_info->base.db_type = TSK_HDB_DBTYPE_MD5SUM_ID;
65 hdb_binsrch_info->get_entry = md5sum_getentry;
66 hdb_binsrch_info->base.make_index = md5sum_makeindex;
67
68 return (TSK_HDB_INFO*)hdb_binsrch_info;
69 }
70
71 /**
72 * Given a line of text from an MD5sum database, return pointers
73 * to the start start of the name and MD5 hash values (original
74 * string will have NULL values in it).
75 *
76 * @param [in]Input string from database -- THIS WILL BE MODIFIED
77 * @param [out] Will contain a pointer to MD5 value in input string
78 * @param [out] Will contain a pointer to name value in input string (input could be NULL)
79 *
80 * @return 1 on error and 0 on success
81 */
82 static uint8_t
md5sum_parse_md5(char * str,char ** md5,char ** name)83 md5sum_parse_md5(char *str, char **md5, char **name)
84 {
85 char *ptr;
86
87 if (strlen(str) < TSK_HDB_HTYPE_MD5_LEN + 1) {
88 tsk_error_reset();
89 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
90 tsk_error_set_errstr(
91 "md5sum_parse_md5: String is too short: %s", str);
92 return 1;
93 }
94
95 /* Format of: MD5 NAME or even just the MD5 value */
96 if ((isxdigit((int) str[0]))
97 && (isxdigit((int) str[TSK_HDB_HTYPE_MD5_LEN - 1]))
98 && (isspace((int) str[TSK_HDB_HTYPE_MD5_LEN]))) {
99 unsigned int i;
100 size_t len = strlen(str);
101
102 if (md5 != NULL) {
103 *md5 = &str[0];
104 }
105 i = TSK_HDB_HTYPE_MD5_LEN;
106 str[i++] = '\0';
107
108 /* Just the MD5 values */
109 if (i >= len) {
110 if (name != NULL) {
111 *name = STR_EMPTY;
112 }
113 return 0;
114 }
115
116 while ((i < len) && ((str[i] == ' ') || (str[i] == '\t'))) {
117 i++;
118 }
119
120 if ((len == i) || (str[i] == '\n')) {
121 return 0;
122 }
123
124 if (str[i] == '*') {
125 i++;
126 }
127
128 if (name != NULL) {
129 *name = &str[i];
130 }
131 ptr = &str[i];
132
133 if (ptr[strlen(ptr) - 1] == '\n')
134 ptr[strlen(ptr) - 1] = '\0';
135 }
136
137 /* Format of: MD5 (NAME) = MD5 */
138 else if ((str[0] == 'M') && (str[1] == 'D') &&
139 (str[2] == '5') && (str[3] == ' ') && (str[4] == '(')) {
140
141 ptr = &str[5];
142
143 if (name != NULL) {
144 *name = ptr;
145 }
146
147 if (NULL == (ptr = strchr(ptr, ')'))) {
148 tsk_error_reset();
149 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
150 tsk_error_set_errstr(
151 "md5sum_parse_md5: Missing ) in name: %s", str);
152 return 1;
153 }
154 *ptr = '\0';
155 ptr++;
156
157
158 if (4 + TSK_HDB_HTYPE_MD5_LEN > strlen(ptr)) {
159 tsk_error_reset();
160 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
161 tsk_error_set_errstr(
162 "md5sum_parse_md5: Invalid MD5 value: %s", ptr);
163 return 1;
164 }
165
166 if ((*(ptr) != ' ') || (*(++ptr) != '=') ||
167 (*(++ptr) != ' ') || (!isxdigit((int) *(++ptr))) ||
168 (ptr[TSK_HDB_HTYPE_MD5_LEN] != '\n')) {
169 tsk_error_reset();
170 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
171 tsk_error_set_errstr(
172 "md5sum_parse_md5: Invalid hash value %s", ptr);
173 return 1;
174 }
175
176 *md5 = ptr;
177 ptr[TSK_HDB_HTYPE_MD5_LEN] = '\0';
178 }
179
180 else {
181 tsk_error_reset();
182 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
183 tsk_error_set_errstr(
184 "md5sum_parse_md5: Invalid md5sum format in file: %s\n",
185 str);
186 return 1;
187 }
188
189 return 0;
190 }
191
192 /**
193 * Process the database to create a sorted index of it. Consecutive
194 * entries with the same hash value are not added to the index, but
195 * will be found during lookup.
196 *
197 * @param hdb_info_base Hash database to make index of.
198 * @param dbtype Type of hash database (should always be TSK_HDB_DBTYPE_MD5SUM_STR)
199 *
200 * @return 1 on error and 0 on success.
201 */
202 uint8_t
md5sum_makeindex(TSK_HDB_INFO * hdb_info_base,TSK_TCHAR * dbtype)203 md5sum_makeindex(TSK_HDB_INFO *hdb_info_base, TSK_TCHAR * dbtype)
204 {
205 TSK_HDB_BINSRCH_INFO *hdb_info = (TSK_HDB_BINSRCH_INFO*)hdb_info_base;
206 int i;
207 char buf[TSK_HDB_MAXLEN];
208 char *hash = NULL, phash[TSK_HDB_HTYPE_MD5_LEN + 1];
209 TSK_OFF_T offset = 0;
210 int db_cnt = 0, idx_cnt = 0, ig_cnt = 0;
211 size_t len;
212
213 /* Initialize the TSK index file */
214 if (hdb_binsrch_idx_initialize(hdb_info, dbtype)) {
215 tsk_error_set_errstr2( "md5sum_makeindex");
216 return 1;
217 }
218
219 /* Status */
220 if (tsk_verbose)
221 TFPRINTF(stderr, _TSK_T("Extracting Data from Database (%s)\n"),
222 hdb_info->base.db_fname);
223
224 /* Allocate a buffer for the previous hash value */
225 memset(phash, '0', TSK_HDB_HTYPE_MD5_LEN + 1);
226
227 /* read the file and add to the index */
228 fseek(hdb_info->hDb, 0, SEEK_SET);
229 for (i = 0; NULL != fgets(buf, TSK_HDB_MAXLEN, hdb_info->hDb);
230 offset += (TSK_OFF_T) len, i++) {
231
232 len = strlen(buf);
233
234 /* Parse each line */
235 if (md5sum_parse_md5(buf, &hash, NULL)) {
236 ig_cnt++;
237 continue;
238 }
239 db_cnt++;
240
241 /* We only want to add one of each hash to the index */
242 if (memcmp(hash, phash, TSK_HDB_HTYPE_MD5_LEN) == 0) {
243 continue;
244 }
245
246 /* Add the entry to the index */
247 if (hdb_binsrch_idx_add_entry_str(hdb_info, hash, offset)) {
248 tsk_error_set_errstr2( "md5sum_makeindex");
249 return 1;
250 }
251
252 idx_cnt++;
253
254 /* Set the previous has value */
255 strncpy(phash, hash, TSK_HDB_HTYPE_MD5_LEN + 1);
256 }
257
258 if (idx_cnt > 0) {
259
260 if (tsk_verbose) {
261 fprintf(stderr, " Valid Database Entries: %d\n", db_cnt);
262 fprintf(stderr,
263 " Invalid Database Entries (headers or errors): %d\n",
264 ig_cnt);
265 fprintf(stderr, " Index File Entries %s: %d\n",
266 (idx_cnt == db_cnt) ? "" : "(optimized)", idx_cnt);
267 }
268
269 /* Close and sort the index */
270 if (hdb_binsrch_idx_finalize(hdb_info)) {
271 tsk_error_set_errstr2( "md5sum_makeindex");
272 return 1;
273 }
274 }
275 else {
276 tsk_error_reset();
277 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
278 tsk_error_set_errstr(
279 "md5sum_makeindex: No valid entries found in database");
280 return 1;
281 }
282
283 return 0;
284 }
285
286 /**
287 * Find the corresponding name at a
288 * given offset. The offset was likely determined from the index.
289 * The entries in the DB following the one specified are also processed
290 * if they have the same hash value and their name is different.
291 * The callback is called for each entry.
292 *
293 * @param hdb_info Hash database to get data from
294 * @param hash MD5 hash value that was searched for
295 * @param offset Byte offset where hash value should be located in db_file
296 * @param flags (not used)
297 * @param action Callback used for each entry found in lookup
298 * @param cb_ptr Pointer to data passed to callback
299 *
300 * @return 1 on error and 0 on succuss
301 */
302 uint8_t
md5sum_getentry(TSK_HDB_INFO * hdb_info,const char * hash,TSK_OFF_T offset,TSK_HDB_FLAG_ENUM flags,TSK_HDB_LOOKUP_FN action,void * cb_ptr)303 md5sum_getentry(TSK_HDB_INFO * hdb_info, const char *hash,
304 TSK_OFF_T offset, TSK_HDB_FLAG_ENUM flags,
305 TSK_HDB_LOOKUP_FN action, void *cb_ptr)
306 {
307 TSK_HDB_BINSRCH_INFO *hdb_binsrch_info = (TSK_HDB_BINSRCH_INFO*)hdb_info;
308 char buf[TSK_HDB_MAXLEN], *name, *ptr = NULL, pname[TSK_HDB_MAXLEN];
309 int found = 0;
310
311 if (tsk_verbose)
312 fprintf(stderr,
313 "md5sum_getentry: Lookup up hash %s at offset %" PRIdOFF
314 "\n", hash, offset);
315
316 if (strlen(hash) != TSK_HDB_HTYPE_MD5_LEN) {
317 tsk_error_reset();
318 tsk_error_set_errno(TSK_ERR_HDB_ARG);
319 tsk_error_set_errstr(
320 "md5sum_getentry: Invalid hash value: %s", hash);
321 return 1;
322 }
323
324 memset(pname, '0', TSK_HDB_MAXLEN);
325
326 /* Loop so that we can find multiple occurrences of the same hash */
327 while (1) {
328 size_t len;
329
330 if (0 != fseeko(hdb_binsrch_info->hDb, offset, SEEK_SET)) {
331 tsk_error_reset();
332 tsk_error_set_errno(TSK_ERR_HDB_READDB);
333 tsk_error_set_errstr(
334 "md5sum_getentry: Error seeking to get file name: %lu",
335 (unsigned long) offset);
336 return 1;
337 }
338
339 if (NULL == fgets(buf, TSK_HDB_MAXLEN, hdb_binsrch_info->hDb)) {
340 if (feof(hdb_binsrch_info->hDb)) {
341 break;
342 }
343 tsk_error_reset();
344 tsk_error_set_errno(TSK_ERR_HDB_READDB);
345 tsk_error_set_errstr(
346 "md5sum_getentry: Error reading database");
347 return 1;
348 }
349
350 len = strlen(buf);
351 if (len < TSK_HDB_HTYPE_MD5_LEN) {
352 tsk_error_reset();
353 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
354 tsk_error_set_errstr(
355 "md5sum_getentry: Invalid entry in database (too short): %s",
356 buf);
357 return 1;
358 }
359
360 if (md5sum_parse_md5(buf, &ptr, &name)) {
361 tsk_error_reset();
362 tsk_error_set_errno(TSK_ERR_HDB_CORRUPT);
363 tsk_error_set_errstr(
364 "md5sum_getentry: Invalid entry in database: %s",
365 buf);
366 return 1;
367 }
368
369 /* Is this the one that we want? */
370 if (0 != strcasecmp(ptr, hash)) {
371 break;
372 }
373
374 if (strcmp(name, pname) != 0) {
375 int retval;
376 retval = action(hdb_info, hash, name, cb_ptr);
377 if (retval == TSK_WALK_ERROR) {
378 return 1;
379 }
380 else if (retval == TSK_WALK_STOP) {
381 return 0;
382 }
383 found = 1;
384 strncpy(pname, name, TSK_HDB_MAXLEN);
385 }
386
387 /* Advance to the next row */
388 offset += len;
389 }
390
391 if (found == 0) {
392 tsk_error_reset();
393 tsk_error_set_errno(TSK_ERR_HDB_ARG);
394 tsk_error_set_errstr(
395 "md5sum_getentry: Hash not found in file at offset: %lu",
396 (unsigned long) offset);
397 return 1;
398 }
399
400 return 0;
401 }
402