1 /*
2 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3 * Copyright (C) 2010-2013 Sourcefire, Inc.
4 *
5 * Authors: aCaB <acab@clamav.net>, Török Edvin <edwin@clamav.net>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #if HAVE_CONFIG_H
23 #include "clamav-config.h"
24 #endif
25
26 #include <string.h>
27 #include <stdlib.h>
28 #include <pthread.h>
29 #include <assert.h>
30
31 #include "mpool.h"
32 #include "clamav.h"
33 #include "cache.h"
34 #include "fmap.h"
35
36 // #define USE_LRUHASHCACHE /* The replacement policy algorithm to use */
37 #define USE_SPLAY
38
39 #if defined(CL_THREAD_SAFE) && defined(USE_LRUHASHCACHE)
40 static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
41 #endif
42
43 /* The number of root trees and the chooser function
44 Each tree is protected by a mutex against concurrent access */
45 /* #define TREES 1 */
46 /* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
47 #define TREES 256
getkey(uint8_t * hash)48 static inline unsigned int getkey(uint8_t *hash)
49 {
50 return *hash;
51 }
52 /* #define TREES 4096 */
53 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
54 /* #define TREES 65536 */
55 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */
56
57 /* The number of nodes in each tree */
58 #define NODES 256
59
60 /* LRUHASHCACHE --------------------------------------------------------------------- */
61 #ifdef USE_LRUHASHCACHE
62 struct cache_key {
63 int64_t digest[2];
64 uint32_t size; /* 0 is used to mark an empty hash slot! */
65 struct cache_key *lru_next, *lru_prev;
66 };
67
68 struct cache_set {
69 struct cache_key *data;
70 size_t maxelements; /* considering load factor */
71 size_t maxdeleted;
72 size_t elements;
73 size_t deleted;
74 struct cache_key *lru_head, *lru_tail;
75 };
76
77 #define CACHE_KEY_DELETED ~0u
78 #define CACHE_KEY_EMPTY 0
79
cacheset_lru_remove(struct cache_set * map,size_t howmany)80 static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
81 {
82 while (howmany--) {
83 struct cache_key *old;
84 assert(map->lru_head);
85 /* Remove a key from the head of the list */
86 old = map->lru_head;
87 assert(!old->lru_prev);
88 map->lru_head = old->lru_next;
89 old->size = CACHE_KEY_DELETED;
90 /* This slot is now deleted, it is not empty,
91 * because previously we could have inserted a key that has seen this
92 * slot as occupied, to find that key we need to ensure that all keys
93 * that were occupied when the key was inserted, are seen as occupied
94 * when searching too.
95 * Of course when inserting a new value, we treat deleted slots as
96 * empty.
97 * We only replace old values with new values, but there is no guarantee
98 * that the newly inserted value would hash to same place as the value
99 * we remove due to LRU! */
100 if (old == map->lru_tail)
101 map->lru_tail = 0;
102 map->elements--;
103 map->deleted++;
104 }
105 }
106
cacheset_lookup_internal(struct cache_set * map,const char * md5,size_t size,uint32_t * insert_pos,int deletedok)107 static inline int cacheset_lookup_internal(struct cache_set *map,
108 const char *md5, size_t size,
109 uint32_t *insert_pos, int deletedok)
110 {
111 const struct cache_key *data = map->data;
112 uint32_t capmask = NODES - 1;
113 const struct cache_key *k;
114 uint32_t idx, tries = 0;
115 uint64_t md5_0, md5_1;
116 uint64_t md5a[2];
117
118 memcpy(&md5a, md5, 16);
119 md5_0 = md5a[0];
120 md5_1 = md5a[1];
121 idx = md5_1 & capmask;
122 k = &data[idx];
123 while (k->size != CACHE_KEY_EMPTY && tries <= capmask) {
124 if (k->digest[0] == md5_0 &&
125 k->digest[1] == md5_1 &&
126 k->size == size) {
127 /* found key */
128 *insert_pos = idx;
129 return 1;
130 }
131 if (deletedok && k->size == CACHE_KEY_DELETED) {
132 /* treat deleted slot as empty */
133 *insert_pos = idx;
134 return 0;
135 }
136 idx = (idx + tries++) & capmask;
137 k = &data[idx];
138 }
139 /* found empty pos */
140 *insert_pos = idx;
141 return 0;
142 }
143
lru_remove(struct cache_set * map,struct cache_key * newkey)144 static inline void lru_remove(struct cache_set *map, struct cache_key *newkey)
145 {
146 if (newkey->lru_next)
147 newkey->lru_next->lru_prev = newkey->lru_prev;
148 if (newkey->lru_prev)
149 newkey->lru_prev->lru_next = newkey->lru_next;
150 if (newkey == map->lru_head)
151 map->lru_head = newkey->lru_next;
152 }
153
lru_addtail(struct cache_set * map,struct cache_key * newkey)154 static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
155 {
156 if (!map->lru_head)
157 map->lru_head = newkey;
158 if (map->lru_tail)
159 map->lru_tail->lru_next = newkey;
160 newkey->lru_next = NULL;
161 newkey->lru_prev = map->lru_tail;
162 map->lru_tail = newkey;
163 }
164
165 static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool);
166 static int cacheset_init(struct cache_set *map, mpool_t *mempool);
167
cacheset_rehash(struct cache_set * map,mpool_t * mempool)168 static void cacheset_rehash(struct cache_set *map, mpool_t *mempool)
169 {
170 unsigned i;
171 int ret;
172 struct cache_set tmp_set;
173 struct cache_key *key;
174 #ifdef CL_THREAD_SAFE
175 pthread_mutex_lock(&pool_mutex);
176 #endif
177 ret = cacheset_init(&tmp_set, mempool);
178 #ifdef CL_THREAD_SAFE
179 pthread_mutex_unlock(&pool_mutex);
180 #endif
181 if (ret)
182 return;
183
184 key = map->lru_head;
185 for (i = 0; key && i < tmp_set.maxelements / 2; i++) {
186 cacheset_add(&tmp_set, (unsigned char *)&key->digest, key->size, mempool);
187 key = key->lru_next;
188 }
189 #ifdef CL_THREAD_SAFE
190 pthread_mutex_lock(&pool_mutex);
191 #endif
192 MPOOL_FREE(mempool, map->data);
193 #ifdef CL_THREAD_SAFE
194 pthread_mutex_unlock(&pool_mutex);
195 #endif
196 memcpy(map, &tmp_set, sizeof(tmp_set));
197 }
198
cacheset_add(struct cache_set * map,unsigned char * md5,size_t size,mpool_t * mempool)199 static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool)
200 {
201 int ret;
202 uint32_t pos;
203 struct cache_key *newkey;
204
205 if (map->elements >= map->maxelements) {
206 cacheset_lru_remove(map, 1);
207 if (map->deleted >= map->maxdeleted) {
208 cacheset_rehash(map, mempool);
209 }
210 }
211 assert(map->elements < map->maxelements);
212
213 ret = cacheset_lookup_internal(map, md5, size, &pos, 1);
214 newkey = &map->data[pos];
215 if (newkey->size == CACHE_KEY_DELETED)
216 map->deleted--;
217 if (ret) {
218 /* was already added, remove from LRU list */
219 lru_remove(map, newkey);
220 }
221 /* add new key to tail of LRU list */
222 memcpy(&map->data[pos].digest, md5, sizeof(map->data[pos].digest));
223 map->data[pos].size = size;
224 lru_addtail(map, newkey);
225
226 map->elements++;
227
228 assert(pos < map->maxelements);
229 }
230
cacheset_remove(struct cache_set * map,unsigned char * md5,size_t size,mpool_t * mempool)231 static void cacheset_remove(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool)
232 {
233 int ret;
234 uint32_t pos;
235 struct cache_key *newkey;
236 ret = cacheset_lookup_internal(map, md5, size, &pos, 1);
237 newkey = &map->data[pos];
238 if (!ret || (newkey->size == CACHE_KEY_DELETED)) {
239 /* already deleted */
240 return;
241 }
242 /* remove from list */
243 lru_remove(map, newkey);
244 newkey->size = CACHE_KEY_DELETED;
245 map->deleted++;
246 map->elements--;
247 if (map->deleted >= map->maxdeleted) {
248 cacheset_rehash(map, mempool);
249 }
250 }
251
cacheset_lookup(struct cache_set * map,unsigned char * md5,size_t size)252 static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t size)
253 {
254 struct cache_key *newkey;
255 int ret;
256 uint32_t pos;
257
258 ret = cacheset_lookup_internal(map, md5, size, &pos, 0);
259 if (!ret)
260 return 0;
261 newkey = &map->data[pos];
262 /* update LRU position: move to tail */
263 lru_remove(map, newkey);
264 lru_addtail(map, newkey);
265 return 1;
266 }
267
cacheset_init(struct cache_set * map,mpool_t * mempool)268 static int cacheset_init(struct cache_set *map, mpool_t *mempool)
269 {
270 map->data = MPOOL_CALLOC(mempool, NODES, sizeof(*map->data));
271 if (!map->data)
272 return CL_EMEM;
273 map->maxelements = 80 * NODES / 100;
274 map->maxdeleted = NODES - map->maxelements - 1;
275 map->elements = 0;
276 map->lru_head = map->lru_tail = NULL;
277 return 0;
278 }
279
cacheset_destroy(struct cache_set * cs,mpool_t * mempool)280 static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool)
281 {
282 MPOOL_FREE(mempool, cs->data);
283 cs->data = NULL;
284 }
285
286 #endif /* USE_LRUHASHCACHE */
287
288 /* SPLAY --------------------------------------------------------------------- */
289 #ifdef USE_SPLAY
290
291 struct node { /* a node */
292 int64_t digest[2];
293 struct node *left;
294 struct node *right;
295 struct node *up;
296 struct node *next;
297 struct node *prev;
298 uint32_t size;
299 uint32_t minrec;
300 };
301
302 struct cache_set { /* a tree */
303 struct node *data;
304 struct node *root;
305 struct node *first;
306 struct node *last;
307 };
308
309 /* Allocates all the nodes and sets up the replacement chain */
cacheset_init(struct cache_set * cs,mpool_t * mempool)310 static int cacheset_init(struct cache_set *cs, mpool_t *mempool)
311 {
312 unsigned int i;
313
314 #ifndef USE_MPOOL
315 UNUSEDPARAM(mempool);
316 #endif
317
318 cs->data = MPOOL_CALLOC(mempool, NODES, sizeof(*cs->data));
319 cs->root = NULL;
320
321 if (!cs->data)
322 return 1;
323
324 for (i = 1; i < NODES; i++) {
325 cs->data[i - 1].next = &cs->data[i];
326 cs->data[i].prev = &cs->data[i - 1];
327 }
328
329 cs->first = cs->data;
330 cs->last = &cs->data[NODES - 1];
331
332 return 0;
333 }
334
335 /* Frees all the nodes */
cacheset_destroy(struct cache_set * cs,mpool_t * mempool)336 static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool)
337 {
338 #ifndef USE_MPOOL
339 UNUSEDPARAM(mempool);
340 #endif
341
342 MPOOL_FREE(mempool, cs->data);
343 cs->data = NULL;
344 }
345
346 /* The left/right cooser for the splay tree */
cmp(int64_t * a,ssize_t sa,int64_t * b,ssize_t sb)347 static inline int cmp(int64_t *a, ssize_t sa, int64_t *b, ssize_t sb)
348 {
349 if (a[1] < b[1]) return -1;
350 if (a[1] > b[1]) return 1;
351 if (a[0] < b[0]) return -1;
352 if (a[0] > b[0]) return 1;
353 if (sa < sb) return -1;
354 if (sa > sb) return 1;
355 return 0;
356 }
357
358 /* #define PRINT_TREE */
359 #ifdef PRINT_TREE
360 #define ptree printf
361 #else
362 #define ptree(...)
363 #endif
364
365 /* Debug function to print the tree and check its consistency */
366 /* #define CHECK_TREE */
367 #ifdef CHECK_TREE
printtree(struct cache_set * cs,struct node * n,int d)368 static int printtree(struct cache_set *cs, struct node *n, int d)
369 {
370 int i;
371 int ab = 0;
372 if ((n == NULL) || (cs == NULL) || (cs->data == NULL)) return 0;
373 if (n == cs->root) {
374 ptree("--------------------------\n");
375 }
376 ab |= printtree(cs, n->right, d + 1);
377 if (n->right) {
378 if (cmp(n->digest, n->size, n->right->digest, n->right->size) >= 0) {
379 for (i = 0; i < d; i++) ptree(" ");
380 ptree("^^^^ %lld >= %lld\n", n->digest[1], n->right->digest[1]);
381 ab = 1;
382 }
383 }
384 for (i = 0; i < d; i++) ptree(" ");
385 ptree("%08x(%02u)\n", n->digest[1] >> 48, n - cs->data);
386 if (n->left) {
387 if (cmp(n->digest, n->size, n->left->digest, n->left->size) <= 0) {
388 for (i = 0; i < d; i++) ptree(" ");
389 ptree("vvvv %lld <= %lld\n", n->digest[1], n->left->digest[1]);
390 ab = 1;
391 }
392 }
393 if (d) {
394 if (!n->up) {
395 printf("no parent, [node %02u]!\n", n - cs->data);
396 ab = 1;
397 } else {
398 if (n->up->left != n && n->up->right != n) {
399 printf("broken parent [node %02u, parent node %02u]\n", n - cs->data, n->up - cs->data);
400 ab = 1;
401 }
402 }
403 } else {
404 if (n->up) {
405 printf("root with a parent, [node %02u]!\n", n - cs->data);
406 ab = 1;
407 }
408 }
409 ab |= printtree(cs, n->left, d + 1);
410 return ab;
411 }
412 #else
413 #define printtree(a, b, c) (0)
414 #endif
415
416 /* For troubleshooting only; prints out one specific node */
417 /* #define PRINT_NODE */
418 #ifdef PRINT_NODE
printnode(const char * prefix,struct cache_set * cs,struct node * n)419 static void printnode(const char *prefix, struct cache_set *cs, struct node *n)
420 {
421 if (!prefix || !cs || !cs->data) {
422 printf("bad args!\n");
423 return;
424 }
425 if (!n) {
426 printf("no node!\n");
427 return;
428 }
429 printf("%s node [%02u]:", prefix, n - cs->data);
430 printf(" size=%lu digest=%llx,%llx\n", (unsigned long)(n->size), n->digest[0], n->digest[1]);
431 printf("\tleft=");
432 if (n->left)
433 printf("%02u ", n->left - cs->data);
434 else
435 printf("NULL ");
436 printf("right=");
437 if (n->right)
438 printf("%02u ", n->right - cs->data);
439 else
440 printf("NULL ");
441 printf("up=");
442 if (n->up)
443 printf("%02u ", n->up - cs->data);
444 else
445 printf("NULL ");
446
447 printf("\tprev=");
448 if (n->prev)
449 printf("%02u ", n->prev - cs->data);
450 else
451 printf("NULL ");
452 printf("next=");
453 if (n->next)
454 printf("%02u\n", n->next - cs->data);
455 else
456 printf("NULL\n");
457 }
458 #else
459 #define printnode(a, b, c)
460 #endif
461
462 /* #define PRINT_CHAINS */
463 #ifdef PRINT_CHAINS
464 /* For troubleshooting only, print the chain forwards and back */
printchain(const char * prefix,struct cache_set * cs)465 static inline void printchain(const char *prefix, struct cache_set *cs)
466 {
467 if (!cs || !cs->data) return;
468 if (prefix) printf("%s: ", prefix);
469 printf("chain by next: ");
470 {
471 unsigned int i = 0;
472 struct node *x = cs->first;
473 while (x) {
474 printf("%02d,", x - cs->data);
475 x = x->next;
476 i++;
477 }
478 printf(" [count=%u]\nchain by prev: ", i);
479 x = cs->last;
480 i = 0;
481 while (x) {
482 printf("%02d,", x - cs->data);
483 x = x->prev;
484 i++;
485 }
486 printf(" [count=%u]\n", i);
487 }
488 }
489 #else
490 #define printchain(a, b) (0)
491 #endif
492
493 /* Looks up a node and splays it up to the root of the tree */
splay(int64_t * md5,size_t len,struct cache_set * cs)494 static int splay(int64_t *md5, size_t len, struct cache_set *cs)
495 {
496 struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
497 int comp, found = 0;
498
499 if (!root)
500 return 0;
501
502 while (1) {
503 comp = cmp(md5, len, root->digest, root->size);
504 if (comp < 0) {
505 if (!root->left) break;
506 if (cmp(md5, len, root->left->digest, root->left->size) < 0) {
507 temp = root->left;
508 root->left = temp->right;
509 if (temp->right) temp->right->up = root;
510 temp->right = root;
511 root->up = temp;
512 root = temp;
513 if (!root->left) break;
514 }
515 right->left = root;
516 root->up = right;
517 right = root;
518 root = root->left;
519 } else if (comp > 0) {
520 if (!root->right) break;
521 if (cmp(md5, len, root->right->digest, root->right->size) > 0) {
522 temp = root->right;
523 root->right = temp->left;
524 if (temp->left) temp->left->up = root;
525 temp->left = root;
526 root->up = temp;
527 root = temp;
528 if (!root->right) break;
529 }
530 left->right = root;
531 root->up = left;
532 left = root;
533 root = root->right;
534 } else {
535 found = 1;
536 break;
537 }
538 }
539
540 left->right = root->left;
541 if (root->left) root->left->up = left;
542 right->left = root->right;
543 if (root->right) root->right->up = right;
544 root->left = next.right;
545 if (next.right) next.right->up = root;
546 root->right = next.left;
547 if (next.left) next.left->up = root;
548 root->up = NULL;
549 cs->root = root;
550 return found;
551 }
552
553 /* Looks up an hash in the tree and maintains the replacement chain */
cacheset_lookup(struct cache_set * cs,unsigned char * md5,size_t size,uint32_t recursion_level)554 static inline int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t recursion_level)
555 {
556 int64_t hash[2];
557
558 memcpy(hash, md5, 16);
559 if (splay(hash, size, cs)) {
560 struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next;
561 #ifdef PRINT_CHAINS
562 printf("promoting %02d\n", p - cs->data);
563 printchain("before", cs);
564 #endif
565 if (q) {
566 if (o)
567 o->next = q;
568 else
569 cs->first = q;
570 q->prev = o;
571 cs->last->next = p;
572 p->prev = cs->last;
573 p->next = NULL;
574 cs->last = p;
575 }
576 #ifdef PRINT_CHAINS
577 printchain("after", cs);
578 #endif
579
580 // The recursion_level check here to prevent a "clean" result from exceeding max recursion from
581 // causing a false negative if the same file is scanned where the recursion depth is lower.
582 // e.g. if max-rec set to 4 and "file5" is malicious, a scan of file1 should not cause a scan of file3 to be "clean"
583 // root
584 // ├── file1 -> file2 -> file3 -> file4 -> file5
585 // └── file3 -> file4 -> file5
586 // See: https://bugzilla.clamav.net/show_bug.cgi?id=1856
587 if (recursion_level >= p->minrec)
588 return 1;
589 }
590 return 0;
591 }
592
593 /* If the hash is present nothing happens.
594 Otherwise a new node is created for the hash picking one from the begin of the chain.
595 Used nodes are moved to the end of the chain */
cacheset_add(struct cache_set * cs,unsigned char * md5,size_t size,uint32_t recursion_level)596 static inline void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t recursion_level)
597 {
598 struct node *newnode;
599 int64_t hash[2];
600
601 memcpy(hash, md5, 16);
602 if (splay(hash, size, cs)) {
603 if (cs->root->minrec > recursion_level)
604 cs->root->minrec = recursion_level;
605 return; /* Already there */
606 }
607
608 ptree("1:\n");
609 if (printtree(cs, cs->root, 0)) {
610 cli_errmsg("cacheset_add: inconsistent tree before choosing newnode, good luck\n");
611 return;
612 }
613
614 newnode = cs->first;
615 while (newnode) {
616 if (!newnode->right && !newnode->left)
617 break;
618 if (newnode->next) {
619 if (newnode == newnode->next) {
620 cli_errmsg("cacheset_add: cache chain in a bad state\n");
621 return;
622 }
623 newnode = newnode->next;
624 } else {
625 cli_warnmsg("cacheset_add: end of chain reached\n");
626 return;
627 }
628 }
629 if (!newnode) {
630 cli_errmsg("cacheset_add: tree has got no end nodes\n");
631 return;
632 }
633 if (newnode->up) {
634 if (newnode->up->left == newnode)
635 newnode->up->left = NULL;
636 else
637 newnode->up->right = NULL;
638 }
639 if (newnode->prev)
640 newnode->prev->next = newnode->next;
641 if (newnode->next)
642 newnode->next->prev = newnode->prev;
643 if (cs->first == newnode)
644 cs->first = newnode->next;
645
646 newnode->prev = cs->last;
647 newnode->next = NULL;
648 cs->last->next = newnode;
649 cs->last = newnode;
650
651 ptree("2:\n");
652 if (printtree(cs, cs->root, 0)) {
653 cli_errmsg("cacheset_add: inconsistent tree before adding newnode, good luck\n");
654 return;
655 }
656
657 if (!cs->root) {
658 newnode->left = NULL;
659 newnode->right = NULL;
660 } else {
661 if (cmp(hash, size, cs->root->digest, cs->root->size) < 0) {
662 newnode->left = cs->root->left;
663 newnode->right = cs->root;
664 cs->root->left = NULL;
665 } else {
666 newnode->right = cs->root->right;
667 newnode->left = cs->root;
668 cs->root->right = NULL;
669 }
670 if (newnode->left) newnode->left->up = newnode;
671 if (newnode->right) newnode->right->up = newnode;
672 }
673 newnode->digest[0] = hash[0];
674 newnode->digest[1] = hash[1];
675 newnode->up = NULL;
676 newnode->size = size;
677 newnode->minrec = recursion_level;
678 cs->root = newnode;
679
680 ptree("3: %lld\n", hash[1]);
681 if (printtree(cs, cs->root, 0)) {
682 cli_errmsg("cacheset_add: inconsistent tree after adding newnode, good luck\n");
683 return;
684 }
685 printnode("newnode", cs, newnode);
686 }
687
688 /* If the hash is not present nothing happens other than splaying the tree.
689 Otherwise the identified node is removed from the tree and then placed back at
690 the front of the chain. */
cacheset_remove(struct cache_set * cs,unsigned char * md5,size_t size)691 static inline void cacheset_remove(struct cache_set *cs, unsigned char *md5, size_t size)
692 {
693 struct node *targetnode;
694 struct node *reattachnode;
695 int64_t hash[2];
696
697 memcpy(hash, md5, 16);
698 if (splay(hash, size, cs) != 1) {
699 cli_dbgmsg("cacheset_remove: node not found in tree\n");
700 return; /* No op */
701 }
702
703 ptree("cacheset_remove: node found and splayed to root\n");
704 targetnode = cs->root;
705 printnode("targetnode", cs, targetnode);
706
707 /* First fix the tree */
708 if (targetnode->left == NULL) {
709 /* At left edge so prune */
710 cs->root = targetnode->right;
711 if (cs->root)
712 cs->root->up = NULL;
713 } else {
714 /* new root will come from leftside tree */
715 cs->root = targetnode->left;
716 cs->root->up = NULL;
717 /* splay tree, expecting not found, bringing rightmost member to root */
718 splay(hash, size, cs);
719
720 if (targetnode->right) {
721 /* reattach right tree to clean right-side attach point */
722 reattachnode = cs->root;
723 while (reattachnode->right)
724 reattachnode = reattachnode->right; /* shouldn't happen, but safer in case of dupe */
725 reattachnode->right = targetnode->right;
726 targetnode->right->up = reattachnode;
727 }
728 }
729 targetnode->size = (size_t)0;
730 targetnode->digest[0] = 0;
731 targetnode->digest[1] = 0;
732 targetnode->up = NULL;
733 targetnode->left = NULL;
734 targetnode->right = NULL;
735
736 /* Tree is fixed, so now fix chain around targetnode */
737 if (targetnode->prev)
738 targetnode->prev->next = targetnode->next;
739 if (targetnode->next)
740 targetnode->next->prev = targetnode->prev;
741 if (cs->last == targetnode)
742 cs->last = targetnode->prev;
743
744 /* Put targetnode at front of chain, if not there already */
745 if (cs->first != targetnode) {
746 targetnode->next = cs->first;
747 if (cs->first)
748 cs->first->prev = targetnode;
749 cs->first = targetnode;
750 }
751 targetnode->prev = NULL;
752
753 printnode("root", cs, cs->root);
754 printnode("first", cs, cs->first);
755 printnode("last", cs, cs->last);
756
757 printchain("remove (after)", cs);
758 }
759 #endif /* USE_SPLAY */
760
761 /* COMMON STUFF --------------------------------------------------------------------- */
762
763 struct CACHE {
764 struct cache_set cacheset;
765 #ifdef CL_THREAD_SAFE
766 pthread_mutex_t mutex;
767 #endif
768 };
769
770 /* Allocates the trees for the engine cache */
cli_cache_init(struct cl_engine * engine)771 int cli_cache_init(struct cl_engine *engine)
772 {
773 struct CACHE *cache;
774 unsigned int i, j;
775
776 if (!engine) {
777 cli_errmsg("cli_cache_init: mpool malloc fail\n");
778 return 1;
779 }
780
781 if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
782 cli_dbgmsg("cli_cache_init: Caching disabled.\n");
783 return 0;
784 }
785
786 if (!(cache = MPOOL_MALLOC(engine->mempool, sizeof(struct CACHE) * TREES))) {
787 cli_errmsg("cli_cache_init: mpool malloc fail\n");
788 return 1;
789 }
790
791 for (i = 0; i < TREES; i++) {
792 #ifdef CL_THREAD_SAFE
793 if (pthread_mutex_init(&cache[i].mutex, NULL)) {
794 cli_errmsg("cli_cache_init: mutex init fail\n");
795 for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
796 for (j = 0; j < i; j++) pthread_mutex_destroy(&cache[j].mutex);
797 MPOOL_FREE(engine->mempool, cache);
798 return 1;
799 }
800 #endif
801 if (cacheset_init(&cache[i].cacheset, engine->mempool)) {
802 for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
803 #ifdef CL_THREAD_SAFE
804 for (j = 0; j <= i; j++) pthread_mutex_destroy(&cache[j].mutex);
805 #endif
806 MPOOL_FREE(engine->mempool, cache);
807 return 1;
808 }
809 }
810 engine->cache = cache;
811 return 0;
812 }
813
814 /* Frees the engine cache */
cli_cache_destroy(struct cl_engine * engine)815 void cli_cache_destroy(struct cl_engine *engine)
816 {
817 struct CACHE *cache;
818 unsigned int i;
819
820 if (!engine || !(cache = engine->cache))
821 return;
822
823 if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
824 return;
825 }
826
827 for (i = 0; i < TREES; i++) {
828 cacheset_destroy(&cache[i].cacheset, engine->mempool);
829 #ifdef CL_THREAD_SAFE
830 pthread_mutex_destroy(&cache[i].mutex);
831 #endif
832 }
833 MPOOL_FREE(engine->mempool, cache);
834 }
835
836 /* Looks up an hash in the proper tree */
cache_lookup_hash(unsigned char * md5,size_t len,struct CACHE * cache,uint32_t recursion_level)837 static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache, uint32_t recursion_level)
838 {
839 unsigned int key = getkey(md5);
840 int ret = CL_VIRUS;
841 struct CACHE *c;
842
843 c = &cache[key];
844 #ifdef CL_THREAD_SAFE
845 if (pthread_mutex_lock(&c->mutex)) {
846 cli_errmsg("cache_lookup_hash: cache_lookup_hash: mutex lock fail\n");
847 return ret;
848 }
849 #endif
850
851 /* cli_warnmsg("cache_lookup_hash: key is %u\n", key); */
852
853 ret = (cacheset_lookup(&c->cacheset, md5, len, recursion_level)) ? CL_CLEAN : CL_VIRUS;
854 #ifdef CL_THREAD_SAFE
855 pthread_mutex_unlock(&c->mutex);
856 // if(ret == CL_CLEAN) cli_warnmsg("cached\n");
857 #endif
858 return ret;
859 }
860
861 /* Adds an hash to the cache */
cache_add(unsigned char * md5,size_t size,cli_ctx * ctx)862 void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
863 {
864 unsigned int key = getkey(md5);
865 uint32_t level;
866 struct CACHE *c;
867
868 if (!ctx || !ctx->engine || !ctx->engine->cache)
869 return;
870
871 if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
872 cli_dbgmsg("cache_add: Caching disabled. Not adding sample to cache.\n");
873 return;
874 }
875
876 level = (ctx->fmap && ctx->fmap->dont_cache_flag) ? ctx->recursion_level : 0;
877 if (ctx->found_possibly_unwanted && (level || 0 == ctx->recursion_level))
878 return;
879 if (SCAN_ALLMATCHES && (ctx->num_viruses > 0)) {
880 cli_dbgmsg("cache_add: alert found within same topfile, skipping cache\n");
881 return;
882 }
883 c = &ctx->engine->cache[key];
884 #ifdef CL_THREAD_SAFE
885 if (pthread_mutex_lock(&c->mutex)) {
886 cli_errmsg("cli_add: mutex lock fail\n");
887 return;
888 }
889 #endif
890
891 /* cli_warnmsg("cache_add: key is %u\n", key); */
892
893 #ifdef USE_LRUHASHCACHE
894 cacheset_add(&c->cacheset, md5, size, ctx->engine->mempool);
895 #else
896 #ifdef USE_SPLAY
897 cacheset_add(&c->cacheset, md5, size, level);
898 #else
899 #error #define USE_SPLAY or USE_LRUHASHCACHE
900 #endif
901 #endif
902
903 #ifdef CL_THREAD_SAFE
904 pthread_mutex_unlock(&c->mutex);
905 #endif
906 cli_dbgmsg("cache_add: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (level %u)\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], level);
907 return;
908 }
909
910 /* Removes a hash from the cache */
cache_remove(unsigned char * md5,size_t size,const struct cl_engine * engine)911 void cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engine)
912 {
913 unsigned int key = getkey(md5);
914 struct CACHE *c;
915
916 if (!engine || !engine->cache)
917 return;
918
919 if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
920 cli_dbgmsg("cache_remove: Caching disabled.\n");
921 return;
922 }
923
924 /* cli_warnmsg("cache_remove: key is %u\n", key); */
925
926 c = &engine->cache[key];
927 #ifdef CL_THREAD_SAFE
928 if (pthread_mutex_lock(&c->mutex)) {
929 cli_errmsg("cli_add: mutex lock fail\n");
930 return;
931 }
932 #endif
933
934 #ifdef USE_LRUHASHCACHE
935 cacheset_remove(&c->cacheset, md5, size, engine->mempool);
936 #else
937 #ifdef USE_SPLAY
938 cacheset_remove(&c->cacheset, md5, size);
939 #else
940 #error #define USE_SPLAY or USE_LRUHASHCACHE
941 #endif
942 #endif
943
944 #ifdef CL_THREAD_SAFE
945 pthread_mutex_unlock(&c->mutex);
946 #endif
947 cli_dbgmsg("cache_remove: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15]);
948 return;
949 }
950
951 /* Hashes a file onto the provided buffer and looks it up the cache.
952 Returns CL_VIRUS if found, CL_CLEAN if not FIXME or a recoverable error,
953 and returns CL_EREAD if unrecoverable */
cache_check(unsigned char * hash,cli_ctx * ctx)954 cl_error_t cache_check(unsigned char *hash, cli_ctx *ctx)
955 {
956 fmap_t *map;
957 int ret;
958
959 if (!ctx || !ctx->engine || !ctx->engine->cache)
960 return CL_VIRUS;
961
962 if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
963 cli_dbgmsg("cache_check: Caching disabled. Returning CL_VIRUS.\n");
964 return CL_VIRUS;
965 }
966
967 map = ctx->fmap;
968 ret = cache_lookup_hash(hash, map->len, ctx->engine->cache, ctx->recursion_level);
969 cli_dbgmsg("cache_check: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x is %s\n", hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7], hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15], (ret == CL_VIRUS) ? "negative" : "positive");
970 return ret;
971 }
972