1 /* radare - LGPL - Copyright 2008-2016 pancake */
2 
3 #include <r_search.h>
4 #include <r_list.h>
5 #include <ctype.h>
6 
7 // Experimental search engine (fails, because stops at first hit of every block read
8 #define USE_BMH 0
9 
10 R_LIB_VERSION (r_search);
11 
12 typedef struct {
13 	ut64 end;
14 	int len;
15 	ut8 data[];
16 } RSearchLeftover;
17 
r_search_new(int mode)18 R_API RSearch *r_search_new(int mode) {
19 	RSearch *s = R_NEW0 (RSearch);
20 	if (!s) {
21 		return NULL;
22 	}
23 	if (!r_search_set_mode (s, mode)) {
24 		free (s);
25 		eprintf ("Cannot init search for mode %d\n", mode);
26 		return false;
27 	}
28 	s->inverse = false;
29 	s->data = NULL;
30 	s->user = NULL;
31 	s->callback = NULL;
32 	s->align = 0;
33 	s->distance = 0;
34 	s->contiguous = 0;
35 	s->overlap = false;
36 	s->pattern_size = 0;
37 	s->string_max = 255;
38 	s->string_min = 3;
39 	s->hits = r_list_newf (free);
40 	s->maxhits = 0;
41 	// TODO: review those mempool sizes. ensure never gets NULL
42 	s->kws = r_list_newf (free);
43 	if (!s->kws) {
44 		r_search_free (s);
45 		return NULL;
46 	}
47 	s->kws->free = (RListFree) r_search_keyword_free;
48 	return s;
49 }
50 
r_search_free(RSearch * s)51 R_API RSearch *r_search_free(RSearch *s) {
52 	if (!s) {
53 		return NULL;
54 	}
55 	r_list_free (s->hits);
56 	r_list_free (s->kws);
57 	//r_io_free(s->iob.io); this is supposed to be a weak reference
58 	free (s->data);
59 	free (s);
60 	return NULL;
61 }
62 
r_search_set_string_limits(RSearch * s,ut32 min,ut32 max)63 R_API int r_search_set_string_limits(RSearch *s, ut32 min, ut32 max) {
64 	if (max < min) {
65 		return false;
66 	}
67 	s->string_min = min;
68 	s->string_max = max;
69 	return true;
70 }
71 
r_search_magic_update(RSearch * s,ut64 from,const ut8 * buf,int len)72 R_API int r_search_magic_update(RSearch *s, ut64 from, const ut8 *buf, int len) {
73 	eprintf ("TODO: import libr/core/cmd_search.c /m implementation into rsearch\n");
74 	return false;
75 }
76 
r_search_set_mode(RSearch * s,int mode)77 R_API int r_search_set_mode(RSearch *s, int mode) {
78 	s->update = NULL;
79 	switch (mode) {
80 	case R_SEARCH_KEYWORD: s->update = r_search_mybinparse_update; break;
81 	case R_SEARCH_REGEXP: s->update = r_search_regexp_update; break;
82 	case R_SEARCH_AES: s->update = r_search_aes_update; break;
83 	case R_SEARCH_PRIV_KEY: s->update = r_search_privkey_update; break;
84 	case R_SEARCH_STRING: s->update = r_search_strings_update; break;
85 	case R_SEARCH_DELTAKEY: s->update = r_search_deltakey_update; break;
86 	case R_SEARCH_MAGIC: s->update = r_search_magic_update; break;
87 	}
88 	if (s->update || mode == R_SEARCH_PATTERN) {
89 		s->mode = mode;
90 		return true;
91 	}
92 	return false;
93 }
94 
r_search_begin(RSearch * s)95 R_API int r_search_begin(RSearch *s) {
96 	RListIter *iter;
97 	RSearchKeyword *kw;
98 	r_list_foreach (s->kws, iter, kw) {
99 		kw->count = 0;
100 		kw->last = 0;
101 	}
102 	return true;
103 }
104 
105 // Returns 2 if search.maxhits is reached, 0 on error, otherwise 1
r_search_hit_new(RSearch * s,RSearchKeyword * kw,ut64 addr)106 R_API int r_search_hit_new(RSearch *s, RSearchKeyword *kw, ut64 addr) {
107 	if (s->align && (addr%s->align)) {
108 		eprintf ("0x%08"PFMT64x" unaligned\n", addr);
109 		return 1;
110 	}
111 	if (!s->contiguous) {
112 		if (kw->last && addr == kw->last) {
113 			kw->count--;
114 			kw->last = s->bckwrds? addr: addr + kw->keyword_length;
115 			eprintf ("0x%08"PFMT64x" Sequential hit ignored.\n", addr);
116 			return 1;
117 		}
118 	}
119 	// kw->last is used by string search, the right endpoint of last match (forward search), to honor search.overlap
120 	kw->last = s->bckwrds ? addr : addr + kw->keyword_length;
121 
122 	if (s->callback) {
123 		int ret = s->callback (kw, s->user, addr);
124 		kw->count++;
125 		s->nhits++;
126 		// If callback returns 0 or larger than 1, forwards it; otherwise returns 2 if search.maxhits is reached
127 		return !ret || ret > 1 ? ret : s->maxhits && s->nhits >= s->maxhits ? 2 : 1;
128 	}
129 	kw->count++;
130 	s->nhits++;
131 	RSearchHit* hit = R_NEW0 (RSearchHit);
132 	if (hit) {
133 		hit->kw = kw;
134 		hit->addr = addr;
135 		r_list_append (s->hits, hit);
136 	}
137 	return s->maxhits && s->nhits >= s->maxhits ? 2 : 1;
138 }
139 
140 // TODO support search across block boundaries
141 // Supported search variants: backward, overlap
r_search_deltakey_update(RSearch * s,ut64 from,const ut8 * buf,int len)142 R_API int r_search_deltakey_update(RSearch *s, ut64 from, const ut8 *buf, int len) {
143 	RListIter *iter;
144 	int longest = 0, i, j;
145 	RSearchKeyword *kw;
146 	RSearchLeftover *left;
147 	const int old_nhits = s->nhits;
148 	r_list_foreach (s->kws, iter, kw) {
149 		longest = R_MAX (longest, kw->keyword_length + 1);
150 	}
151 	if (!longest) {
152 		return 0;
153 	}
154 	if (s->data) {
155 		left = s->data;
156 		if (left->end != from) {
157 			left->len = 0;
158 		}
159 	} else {
160 		left = malloc (sizeof(RSearchLeftover) + (size_t)2 * (longest - 1));
161 		if (!left) {
162 			return -1;
163 		}
164 		s->data = left;
165 		left->len = 0;
166 		if (s->bckwrds) {
167 			r_list_foreach (s->kws, iter, kw) {
168 				ut8 *i = kw->bin_keyword, *j = kw->bin_keyword + kw->keyword_length;
169 				for (; i < j; i++) {
170 					*i = -*i;
171 				}
172 			}
173 		}
174 	}
175 	if (s->bckwrds) {
176 		// XXX Change function signature from const ut8 * to ut8 *
177 		ut8 *i = (ut8 *)buf, *j = i + len;
178 		while (i < j) {
179 			ut8 t = *i;
180 			*i++ = *--j;
181 			*j = t;
182 		}
183 	}
184 
185 	ut64 len1 = left->len + R_MIN (longest - 1, len);
186 	memcpy (left->data + left->len, buf, len1 - left->len);
187 	r_list_foreach (s->kws, iter, kw) {
188 		ut8 *a = kw->bin_keyword;
189 		i = s->overlap || !kw->count ? 0 :
190 				s->bckwrds
191 				? kw->last - from < left->len ? from + left->len - kw->last : 0
192 				: from - kw->last < left->len ? kw->last + left->len - from : 0;
193 		for (; i + kw->keyword_length < len1 && i < left->len; i++) {
194 			if ((ut8)(left->data[i+1] - left->data[i]) == a[0]) {
195 				j = 1;
196 				while (j < kw->keyword_length && (ut8)(left->data[i+j+1] - left->data[i+j]) == a[j]) {
197 					j++;
198 				}
199 				if (j == kw->keyword_length) {
200 					int t = r_search_hit_new (s, kw, s->bckwrds ? from - kw->keyword_length - 1 - i + left->len : from + i - left->len);
201 					kw->last += s->bckwrds ? 0 : 1;
202 					if (!t) {
203 						return -1;
204 					}
205 					if (t > 1) {
206 						return s->nhits - old_nhits;
207 					}
208 					if (!s->overlap) {
209 						i += kw->keyword_length;
210 					}
211 				}
212 			}
213 		}
214 		i = s->overlap || !kw->count ? 0 :
215 				s->bckwrds
216 				? from > kw->last ? from - kw->last : 0
217 				: from < kw->last ? kw->last - from : 0;
218 		for (; i + kw->keyword_length < len; i++) {
219 			if ((ut8)(buf[i+1] - buf[i]) == a[0]) {
220 				j = 1;
221 				while (j < kw->keyword_length && (ut8)(buf[i+j+1] - buf[i+j]) == a[j]) {
222 					j++;
223 				}
224 				if (j == kw->keyword_length) {
225 					int t = r_search_hit_new (s, kw, s->bckwrds ? from - kw->keyword_length - 1 - i : from + i);
226 					kw->last += s->bckwrds ? 0 : 1;
227 					if (!t) {
228 						return -1;
229 					}
230 					if (t > 1) {
231 						return s->nhits - old_nhits;
232 					}
233 					if (!s->overlap) {
234 						i += kw->keyword_length;
235 					}
236 				}
237 			}
238 		}
239 	}
240 	if (len < longest - 1) {
241 		if (len1 < longest) {
242 			left->len = len1;
243 		} else {
244 			left->len = longest - 1;
245 			memmove (left->data, left->data + len1 - longest + 1, longest - 1);
246 		}
247 	} else {
248 		left->len = longest - 1;
249 		memcpy (left->data, buf + len - longest + 1, longest - 1);
250 	}
251 	left->end = s->bckwrds ? from - len : from + len;
252 
253 	return s->nhits - old_nhits;
254 }
255 
256 #if 0
257 // Boyer-Moore-Horspool pattern matching
258 // Supported search variants: icase, overlap
259 static int r_search_horspool(RSearch *s, RSearchKeyword *kw, ut64 from, const ut8 *buf, int len) {
260 	ut64 bad_char_shift[UT8_MAX + 1];
261 	int i, j, m = kw->keyword_length - 1, count = 0;
262 	ut8 ch;
263 
264 	for (i = 0; i < R_ARRAY_SIZE (bad_char_shift); i++) {
265 		bad_char_shift[i] = kw->keyword_length;
266 	}
267 	for (i = 0; i < m; i++) {
268 		ch = kw->bin_keyword[i];
269 		bad_char_shift[kw->icase ? tolower (ch) : ch] = m - i;
270 	}
271 
272 	for (i = 0; i + m < len; ) {
273 	next:
274 		for (j = m; ; j--) {
275 			ut8 a = buf[i + j], b = kw->bin_keyword[j];
276 			if (kw->icase) {
277 				a = tolower (a);
278 				b = tolower (b);
279 			}
280 			if (a != b) break;
281 			if (i == 0) {
282 				if (!r_search_hit_new (s, kw, from + i)) {
283 					return -1;
284 				}
285 				kw->count++;
286 				count++;
287 				if (!s->overlap) {
288 					i += kw->keyword_length;
289 					goto next;
290 				}
291 			}
292 		}
293 		ch = buf[i + m];
294 		i += bad_char_shift[kw->icase ? tolower (ch) : ch];
295 	}
296 
297 	return false;
298 }
299 #endif
300 
brute_force_match(RSearch * s,RSearchKeyword * kw,const ut8 * buf,int i)301 static bool brute_force_match(RSearch *s, RSearchKeyword *kw, const ut8 *buf, int i) {
302 	int j = 0;
303 	if (s->distance) { // slow path, more work in the loop
304 		int dist = 0;
305 		if (kw->binmask_length > 0) {
306 			for (; j < kw->keyword_length; j++) {
307 				int k = j % kw->binmask_length;
308 				ut8 a = buf[i + j], b = kw->bin_keyword[j];
309 				if (kw->icase) {
310 					a = tolower (a);
311 					b = tolower (b);
312 				}
313 				if ((a & kw->bin_binmask[k]) != (b & kw->bin_binmask[k])) {
314 					dist++;
315 				}
316 			}
317 		} else if (kw->icase) {
318 			for (; j < kw->keyword_length; j++) {
319 				if (tolower (buf[i + j]) != tolower (kw->bin_keyword[j])) {
320 					dist++;
321 				}
322 			}
323 		} else {
324 			for (; j < kw->keyword_length; j++) {
325 				if (buf[i + j] != kw->bin_keyword[j]) {
326 					dist++;
327 				}
328 			}
329 		}
330 		return dist <= s->distance;
331 	}
332 
333 	if (kw->binmask_length > 0) {
334 		for (; j < kw->keyword_length; j++) {
335 			int k = j % kw->binmask_length;
336 			ut8 a = buf[i + j], b = kw->bin_keyword[j];
337 			if (kw->icase) {
338 				a = tolower (a);
339 				b = tolower (b);
340 			}
341 			if ((a & kw->bin_binmask[k]) != (b & kw->bin_binmask[k])) {
342 				break;
343 			}
344 		}
345 	} else if (kw->icase) {
346 		while (j < kw->keyword_length &&
347 			tolower (buf[i + j]) == tolower (kw->bin_keyword[j])) {
348 			j++;
349 		}
350 	} else {
351 		while (j < kw->keyword_length && buf[i + j] == kw->bin_keyword[j]) {
352 			j++;
353 		}
354 	}
355 	return j == kw->keyword_length;
356 }
357 
358 // Supported search variants: backward, binmask, icase, inverse, overlap
r_search_mybinparse_update(RSearch * s,ut64 from,const ut8 * buf,int len)359 R_API int r_search_mybinparse_update(RSearch *s, ut64 from, const ut8 *buf, int len) {
360 	RSearchKeyword *kw;
361 	RListIter *iter;
362 	RSearchLeftover *left;
363 	int longest = 0, i;
364 	const int old_nhits = s->nhits;
365 
366 	r_list_foreach (s->kws, iter, kw) {
367 		longest = R_MAX (longest, kw->keyword_length);
368 	}
369 	if (!longest) {
370 		return 0;
371 	}
372 	if (s->data) {
373 		left = s->data;
374 		if (left->end != from) {
375 			left->len = 0;
376 		}
377 	} else {
378 		left = malloc (sizeof(RSearchLeftover) + (size_t)2 * (longest - 1));
379 		if (!left) {
380 			return -1;
381 		}
382 		s->data = left;
383 		left->len = 0;
384 	}
385 	if (s->bckwrds) {
386 		// XXX Change function signature from const ut8 * to ut8 *
387 		ut8 *i = (ut8 *)buf, *j = i + len;
388 		while (i < j) {
389 			ut8 t = *i;
390 			*i++ = *--j;
391 			*j = t;
392 		}
393 	}
394 
395 	ut64 len1 = left->len + R_MIN (longest - 1, len);
396 	memcpy (left->data + left->len, buf, len1 - left->len);
397 	r_list_foreach (s->kws, iter, kw) {
398 		i = s->overlap || !kw->count ? 0 :
399 				s->bckwrds
400 				? kw->last - from < left->len ? from + left->len - kw->last : 0
401 				: from - kw->last < left->len ? kw->last + left->len - from : 0;
402 		for (; i + kw->keyword_length <= len1 && i < left->len; i++) {
403 			if (brute_force_match (s, kw, left->data, i) != s->inverse) {
404 				int t = r_search_hit_new (s, kw, s->bckwrds ? from - kw->keyword_length - i + left->len : from + i - left->len);
405 				if (!t) {
406 					return -1;
407 				}
408 				if (t > 1) {
409 					return s->nhits - old_nhits;
410 				}
411 				if (!s->overlap) {
412 					i += kw->keyword_length - 1;
413 				}
414 			}
415 		}
416 		i = s->overlap || !kw->count ? 0 :
417 				s->bckwrds
418 				? from > kw->last ? from - kw->last : 0
419 				: from < kw->last ? kw->last - from : 0;
420 		for (; i + kw->keyword_length <= len; i++) {
421 			if (brute_force_match (s, kw, buf, i) != s->inverse) {
422 				int t = r_search_hit_new (s, kw, s->bckwrds ? from - kw->keyword_length - i : from + i);
423 				if (!t) {
424 					return -1;
425 				}
426 				if (t > 1) {
427 					return s->nhits - old_nhits;
428 				}
429 				if (!s->overlap) {
430 					i += kw->keyword_length - 1;
431 				}
432 			}
433 		}
434 	}
435 	if (len < longest - 1) {
436 		if (len1 < longest) {
437 			left->len = len1;
438 		} else {
439 			left->len = longest - 1;
440 			memmove (left->data, left->data + len1 - longest + 1, longest - 1);
441 		}
442 	} else {
443 		left->len = longest - 1;
444 		memcpy (left->data, buf + len - longest + 1, longest - 1);
445 	}
446 	left->end = s->bckwrds ? from - len : from + len;
447 
448 	return s->nhits - old_nhits;
449 }
450 
r_search_set_distance(RSearch * s,int dist)451 R_API void r_search_set_distance(RSearch *s, int dist) {
452 	if (dist>=R_SEARCH_DISTANCE_MAX) {
453 		eprintf ("Invalid distance\n");
454 		s->distance = 0;
455 	} else {
456 		s->distance = (dist>0)?dist:0;
457 	}
458 }
459 
460 // deprecate? or standarize with ->align ??
r_search_pattern_size(RSearch * s,int size)461 R_API void r_search_pattern_size(RSearch *s, int size) {
462 	s->pattern_size = size;
463 }
464 
r_search_set_callback(RSearch * s,RSearchCallback (callback),void * user)465 R_API void r_search_set_callback(RSearch *s, RSearchCallback(callback), void *user) {
466 	s->callback = callback;
467 	s->user = user;
468 }
469 
470 // backward search: from points to the right endpoint
471 // forward search: from points to the left endpoint
r_search_update(RSearch * s,ut64 from,const ut8 * buf,long len)472 R_API int r_search_update(RSearch *s, ut64 from, const ut8 *buf, long len) {
473 	int ret = -1;
474 	if (s->update) {
475 		if (s->maxhits && s->nhits >= s->maxhits) {
476 			return 0;
477 		}
478 		ret = s->update (s, from, buf, len);
479 	} else {
480 		eprintf ("r_search_update: No search method defined\n");
481 	}
482 	return ret;
483 }
484 
r_search_update_i(RSearch * s,ut64 from,const ut8 * buf,long len)485 R_API int r_search_update_i(RSearch *s, ut64 from, const ut8 *buf, long len) {
486 	return r_search_update (s, from, buf, len);
487 }
488 
listcb(RSearchKeyword * k,void * user,ut64 addr)489 static int listcb(RSearchKeyword *k, void *user, ut64 addr) {
490 	RSearchHit *hit = R_NEW0 (RSearchHit);
491 	if (!hit) {
492 		return 0;
493 	}
494 	hit->kw = k;
495 	hit->addr = addr;
496 	r_list_append (user, hit);
497 	return 1;
498 }
499 
r_search_find(RSearch * s,ut64 addr,const ut8 * buf,int len)500 R_API RList *r_search_find(RSearch *s, ut64 addr, const ut8 *buf, int len) {
501 	RList *ret = r_list_new ();
502 	r_search_set_callback (s, listcb, ret);
503 	r_search_update (s, addr, buf, len);
504 	return ret;
505 }
506 
507 /* --- keywords --- */
r_search_kw_add(RSearch * s,RSearchKeyword * kw)508 R_API int r_search_kw_add(RSearch *s, RSearchKeyword *kw) {
509 	if (!kw || !kw->keyword_length) {
510 		return false;
511 	}
512 	kw->kwidx = s->n_kws++;
513 	r_list_append (s->kws, kw);
514 	return true;
515 }
516 
517 // Reverse bin_keyword & bin_binmask for backward search
r_search_string_prepare_backward(RSearch * s)518 R_API void r_search_string_prepare_backward(RSearch *s) {
519 	RListIter *iter;
520 	RSearchKeyword *kw;
521 	// Precondition: !kw->binmask_length || kw->keyword_length % kw->binmask_length == 0
522 	r_list_foreach (s->kws, iter, kw) {
523 		ut8 *i = kw->bin_keyword, *j = kw->bin_keyword + kw->keyword_length;
524 		while (i < j) {
525 			ut8 t = *i;
526 			*i++ = *--j;
527 			*j = t;
528 		}
529 		i = kw->bin_binmask;
530 		j = kw->bin_binmask + kw->binmask_length;
531 		while (i < j) {
532 			ut8 t = *i;
533 			*i++ = *--j;
534 			*j = t;
535 		}
536 	}
537 }
538 
r_search_reset(RSearch * s,int mode)539 R_API void r_search_reset(RSearch *s, int mode) {
540 	s->nhits = 0;
541 	if (!r_search_set_mode (s, mode)) {
542 		eprintf ("Cannot init search for mode %d\n", mode);
543 	}
544 }
545 
r_search_kw_reset(RSearch * s)546 R_API void r_search_kw_reset(RSearch *s) {
547 	r_list_purge (s->kws);
548 	r_list_purge (s->hits);
549 	R_FREE (s->data);
550 }
551