1 /*
2  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3  *  Copyright (C) 2010-2013 Sourcefire, Inc.
4  *
5  *  Authors: aCaB <acab@clamav.net>, Török Edvin <edwin@clamav.net>
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License version 2 as
9  *  published by the Free Software Foundation.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  *  MA 02110-1301, USA.
20  */
21 
22 #if HAVE_CONFIG_H
23 #include "clamav-config.h"
24 #endif
25 
26 #include <string.h>
27 #include <stdlib.h>
28 #include <pthread.h>
29 #include <assert.h>
30 
31 #include "mpool.h"
32 #include "clamav.h"
33 #include "cache.h"
34 #include "fmap.h"
35 
36 // #define USE_LRUHASHCACHE /* The replacement policy algorithm to use */
37 #define USE_SPLAY
38 
39 #if defined(CL_THREAD_SAFE) && defined(USE_LRUHASHCACHE)
40 static pthread_mutex_t pool_mutex = PTHREAD_MUTEX_INITIALIZER;
41 #endif
42 
43 /* The number of root trees and the chooser function
44    Each tree is protected by a mutex against concurrent access */
45 /* #define TREES 1 */
46 /* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
47 #define TREES 256
getkey(uint8_t * hash)48 static inline unsigned int getkey(uint8_t *hash)
49 {
50     return *hash;
51 }
52 /* #define TREES 4096 */
53 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
54 /* #define TREES 65536 */
55 /* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */
56 
57 /* The number of nodes in each tree */
58 #define NODES 256
59 
60 /* LRUHASHCACHE --------------------------------------------------------------------- */
61 #ifdef USE_LRUHASHCACHE
62 struct cache_key {
63     int64_t digest[2];
64     uint32_t size; /* 0 is used to mark an empty hash slot! */
65     struct cache_key *lru_next, *lru_prev;
66 };
67 
68 struct cache_set {
69     struct cache_key *data;
70     size_t maxelements; /* considering load factor */
71     size_t maxdeleted;
72     size_t elements;
73     size_t deleted;
74     struct cache_key *lru_head, *lru_tail;
75 };
76 
77 #define CACHE_KEY_DELETED ~0u
78 #define CACHE_KEY_EMPTY 0
79 
cacheset_lru_remove(struct cache_set * map,size_t howmany)80 static void cacheset_lru_remove(struct cache_set *map, size_t howmany)
81 {
82     while (howmany--) {
83         struct cache_key *old;
84         assert(map->lru_head);
85         /* Remove a key from the head of the list */
86         old = map->lru_head;
87         assert(!old->lru_prev);
88         map->lru_head = old->lru_next;
89         old->size     = CACHE_KEY_DELETED;
90         /* This slot is now deleted, it is not empty,
91 	 * because previously we could have inserted a key that has seen this
92 	 * slot as occupied, to find that key we need to ensure that all keys
93 	 * that were occupied when the key was inserted, are seen as occupied
94 	 * when searching too.
95 	 * Of course when inserting a new value, we treat deleted slots as
96 	 * empty.
97 	 * We only replace old values with new values, but there is no guarantee
98 	 * that the newly inserted value would hash to same place as the value
99 	 * we remove due to LRU! */
100         if (old == map->lru_tail)
101             map->lru_tail = 0;
102         map->elements--;
103         map->deleted++;
104     }
105 }
106 
cacheset_lookup_internal(struct cache_set * map,const char * md5,size_t size,uint32_t * insert_pos,int deletedok)107 static inline int cacheset_lookup_internal(struct cache_set *map,
108                                            const char *md5, size_t size,
109                                            uint32_t *insert_pos, int deletedok)
110 {
111     const struct cache_key *data = map->data;
112     uint32_t capmask             = NODES - 1;
113     const struct cache_key *k;
114     uint32_t idx, tries = 0;
115     uint64_t md5_0, md5_1;
116     uint64_t md5a[2];
117 
118     memcpy(&md5a, md5, 16);
119     md5_0 = md5a[0];
120     md5_1 = md5a[1];
121     idx   = md5_1 & capmask;
122     k     = &data[idx];
123     while (k->size != CACHE_KEY_EMPTY && tries <= capmask) {
124         if (k->digest[0] == md5_0 &&
125             k->digest[1] == md5_1 &&
126             k->size == size) {
127             /* found key */
128             *insert_pos = idx;
129             return 1;
130         }
131         if (deletedok && k->size == CACHE_KEY_DELETED) {
132             /* treat deleted slot as empty */
133             *insert_pos = idx;
134             return 0;
135         }
136         idx = (idx + tries++) & capmask;
137         k   = &data[idx];
138     }
139     /* found empty pos */
140     *insert_pos = idx;
141     return 0;
142 }
143 
lru_remove(struct cache_set * map,struct cache_key * newkey)144 static inline void lru_remove(struct cache_set *map, struct cache_key *newkey)
145 {
146     if (newkey->lru_next)
147         newkey->lru_next->lru_prev = newkey->lru_prev;
148     if (newkey->lru_prev)
149         newkey->lru_prev->lru_next = newkey->lru_next;
150     if (newkey == map->lru_head)
151         map->lru_head = newkey->lru_next;
152 }
153 
lru_addtail(struct cache_set * map,struct cache_key * newkey)154 static inline void lru_addtail(struct cache_set *map, struct cache_key *newkey)
155 {
156     if (!map->lru_head)
157         map->lru_head = newkey;
158     if (map->lru_tail)
159         map->lru_tail->lru_next = newkey;
160     newkey->lru_next = NULL;
161     newkey->lru_prev = map->lru_tail;
162     map->lru_tail    = newkey;
163 }
164 
165 static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool);
166 static int cacheset_init(struct cache_set *map, mpool_t *mempool);
167 
cacheset_rehash(struct cache_set * map,mpool_t * mempool)168 static void cacheset_rehash(struct cache_set *map, mpool_t *mempool)
169 {
170     unsigned i;
171     int ret;
172     struct cache_set tmp_set;
173     struct cache_key *key;
174 #ifdef CL_THREAD_SAFE
175     pthread_mutex_lock(&pool_mutex);
176 #endif
177     ret = cacheset_init(&tmp_set, mempool);
178 #ifdef CL_THREAD_SAFE
179     pthread_mutex_unlock(&pool_mutex);
180 #endif
181     if (ret)
182         return;
183 
184     key = map->lru_head;
185     for (i = 0; key && i < tmp_set.maxelements / 2; i++) {
186         cacheset_add(&tmp_set, (unsigned char *)&key->digest, key->size, mempool);
187         key = key->lru_next;
188     }
189 #ifdef CL_THREAD_SAFE
190     pthread_mutex_lock(&pool_mutex);
191 #endif
192     MPOOL_FREE(mempool, map->data);
193 #ifdef CL_THREAD_SAFE
194     pthread_mutex_unlock(&pool_mutex);
195 #endif
196     memcpy(map, &tmp_set, sizeof(tmp_set));
197 }
198 
cacheset_add(struct cache_set * map,unsigned char * md5,size_t size,mpool_t * mempool)199 static void cacheset_add(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool)
200 {
201     int ret;
202     uint32_t pos;
203     struct cache_key *newkey;
204 
205     if (map->elements >= map->maxelements) {
206         cacheset_lru_remove(map, 1);
207         if (map->deleted >= map->maxdeleted) {
208             cacheset_rehash(map, mempool);
209         }
210     }
211     assert(map->elements < map->maxelements);
212 
213     ret    = cacheset_lookup_internal(map, md5, size, &pos, 1);
214     newkey = &map->data[pos];
215     if (newkey->size == CACHE_KEY_DELETED)
216         map->deleted--;
217     if (ret) {
218         /* was already added, remove from LRU list */
219         lru_remove(map, newkey);
220     }
221     /* add new key to tail of LRU list */
222     memcpy(&map->data[pos].digest, md5, sizeof(map->data[pos].digest));
223     map->data[pos].size = size;
224     lru_addtail(map, newkey);
225 
226     map->elements++;
227 
228     assert(pos < map->maxelements);
229 }
230 
cacheset_remove(struct cache_set * map,unsigned char * md5,size_t size,mpool_t * mempool)231 static void cacheset_remove(struct cache_set *map, unsigned char *md5, size_t size, mpool_t *mempool)
232 {
233     int ret;
234     uint32_t pos;
235     struct cache_key *newkey;
236     ret    = cacheset_lookup_internal(map, md5, size, &pos, 1);
237     newkey = &map->data[pos];
238     if (!ret || (newkey->size == CACHE_KEY_DELETED)) {
239         /* already deleted */
240         return;
241     }
242     /* remove from list */
243     lru_remove(map, newkey);
244     newkey->size = CACHE_KEY_DELETED;
245     map->deleted++;
246     map->elements--;
247     if (map->deleted >= map->maxdeleted) {
248         cacheset_rehash(map, mempool);
249     }
250 }
251 
cacheset_lookup(struct cache_set * map,unsigned char * md5,size_t size)252 static int cacheset_lookup(struct cache_set *map, unsigned char *md5, size_t size)
253 {
254     struct cache_key *newkey;
255     int ret;
256     uint32_t pos;
257 
258     ret = cacheset_lookup_internal(map, md5, size, &pos, 0);
259     if (!ret)
260         return 0;
261     newkey = &map->data[pos];
262     /* update LRU position: move to tail */
263     lru_remove(map, newkey);
264     lru_addtail(map, newkey);
265     return 1;
266 }
267 
cacheset_init(struct cache_set * map,mpool_t * mempool)268 static int cacheset_init(struct cache_set *map, mpool_t *mempool)
269 {
270     map->data = MPOOL_CALLOC(mempool, NODES, sizeof(*map->data));
271     if (!map->data)
272         return CL_EMEM;
273     map->maxelements = 80 * NODES / 100;
274     map->maxdeleted  = NODES - map->maxelements - 1;
275     map->elements    = 0;
276     map->lru_head = map->lru_tail = NULL;
277     return 0;
278 }
279 
cacheset_destroy(struct cache_set * cs,mpool_t * mempool)280 static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool)
281 {
282     MPOOL_FREE(mempool, cs->data);
283     cs->data = NULL;
284 }
285 
286 #endif /* USE_LRUHASHCACHE */
287 
288 /* SPLAY --------------------------------------------------------------------- */
289 #ifdef USE_SPLAY
290 
291 struct node { /* a node */
292     int64_t digest[2];
293     struct node *left;
294     struct node *right;
295     struct node *up;
296     struct node *next;
297     struct node *prev;
298     uint32_t size;
299     uint32_t minrec;
300 };
301 
302 struct cache_set { /* a tree */
303     struct node *data;
304     struct node *root;
305     struct node *first;
306     struct node *last;
307 };
308 
309 /* Allocates all the nodes and sets up the replacement chain */
cacheset_init(struct cache_set * cs,mpool_t * mempool)310 static int cacheset_init(struct cache_set *cs, mpool_t *mempool)
311 {
312     unsigned int i;
313 
314 #ifndef USE_MPOOL
315     UNUSEDPARAM(mempool);
316 #endif
317 
318     cs->data = MPOOL_CALLOC(mempool, NODES, sizeof(*cs->data));
319     cs->root = NULL;
320 
321     if (!cs->data)
322         return 1;
323 
324     for (i = 1; i < NODES; i++) {
325         cs->data[i - 1].next = &cs->data[i];
326         cs->data[i].prev     = &cs->data[i - 1];
327     }
328 
329     cs->first = cs->data;
330     cs->last  = &cs->data[NODES - 1];
331 
332     return 0;
333 }
334 
335 /* Frees all the nodes */
cacheset_destroy(struct cache_set * cs,mpool_t * mempool)336 static inline void cacheset_destroy(struct cache_set *cs, mpool_t *mempool)
337 {
338 #ifndef USE_MPOOL
339     UNUSEDPARAM(mempool);
340 #endif
341 
342     MPOOL_FREE(mempool, cs->data);
343     cs->data = NULL;
344 }
345 
346 /* The left/right cooser for the splay tree */
cmp(int64_t * a,ssize_t sa,int64_t * b,ssize_t sb)347 static inline int cmp(int64_t *a, ssize_t sa, int64_t *b, ssize_t sb)
348 {
349     if (a[1] < b[1]) return -1;
350     if (a[1] > b[1]) return 1;
351     if (a[0] < b[0]) return -1;
352     if (a[0] > b[0]) return 1;
353     if (sa < sb) return -1;
354     if (sa > sb) return 1;
355     return 0;
356 }
357 
358 /* #define PRINT_TREE */
359 #ifdef PRINT_TREE
360 #define ptree printf
361 #else
362 #define ptree(...)
363 #endif
364 
365 /* Debug function to print the tree and check its consistency */
366 /* #define CHECK_TREE */
367 #ifdef CHECK_TREE
printtree(struct cache_set * cs,struct node * n,int d)368 static int printtree(struct cache_set *cs, struct node *n, int d)
369 {
370     int i;
371     int ab = 0;
372     if ((n == NULL) || (cs == NULL) || (cs->data == NULL)) return 0;
373     if (n == cs->root) {
374         ptree("--------------------------\n");
375     }
376     ab |= printtree(cs, n->right, d + 1);
377     if (n->right) {
378         if (cmp(n->digest, n->size, n->right->digest, n->right->size) >= 0) {
379             for (i = 0; i < d; i++) ptree("        ");
380             ptree("^^^^ %lld >= %lld\n", n->digest[1], n->right->digest[1]);
381             ab = 1;
382         }
383     }
384     for (i = 0; i < d; i++) ptree("        ");
385     ptree("%08x(%02u)\n", n->digest[1] >> 48, n - cs->data);
386     if (n->left) {
387         if (cmp(n->digest, n->size, n->left->digest, n->left->size) <= 0) {
388             for (i = 0; i < d; i++) ptree("        ");
389             ptree("vvvv %lld <= %lld\n", n->digest[1], n->left->digest[1]);
390             ab = 1;
391         }
392     }
393     if (d) {
394         if (!n->up) {
395             printf("no parent, [node %02u]!\n", n - cs->data);
396             ab = 1;
397         } else {
398             if (n->up->left != n && n->up->right != n) {
399                 printf("broken parent [node %02u, parent node %02u]\n", n - cs->data, n->up - cs->data);
400                 ab = 1;
401             }
402         }
403     } else {
404         if (n->up) {
405             printf("root with a parent, [node %02u]!\n", n - cs->data);
406             ab = 1;
407         }
408     }
409     ab |= printtree(cs, n->left, d + 1);
410     return ab;
411 }
412 #else
413 #define printtree(a, b, c) (0)
414 #endif
415 
416 /* For troubleshooting only; prints out one specific node */
417 /* #define PRINT_NODE */
418 #ifdef PRINT_NODE
printnode(const char * prefix,struct cache_set * cs,struct node * n)419 static void printnode(const char *prefix, struct cache_set *cs, struct node *n)
420 {
421     if (!prefix || !cs || !cs->data) {
422         printf("bad args!\n");
423         return;
424     }
425     if (!n) {
426         printf("no node!\n");
427         return;
428     }
429     printf("%s node [%02u]:", prefix, n - cs->data);
430     printf(" size=%lu digest=%llx,%llx\n", (unsigned long)(n->size), n->digest[0], n->digest[1]);
431     printf("\tleft=");
432     if (n->left)
433         printf("%02u ", n->left - cs->data);
434     else
435         printf("NULL ");
436     printf("right=");
437     if (n->right)
438         printf("%02u ", n->right - cs->data);
439     else
440         printf("NULL ");
441     printf("up=");
442     if (n->up)
443         printf("%02u ", n->up - cs->data);
444     else
445         printf("NULL ");
446 
447     printf("\tprev=");
448     if (n->prev)
449         printf("%02u ", n->prev - cs->data);
450     else
451         printf("NULL ");
452     printf("next=");
453     if (n->next)
454         printf("%02u\n", n->next - cs->data);
455     else
456         printf("NULL\n");
457 }
458 #else
459 #define printnode(a, b, c)
460 #endif
461 
462 /* #define PRINT_CHAINS */
463 #ifdef PRINT_CHAINS
464 /* For troubleshooting only, print the chain forwards and back */
printchain(const char * prefix,struct cache_set * cs)465 static inline void printchain(const char *prefix, struct cache_set *cs)
466 {
467     if (!cs || !cs->data) return;
468     if (prefix) printf("%s: ", prefix);
469     printf("chain by next: ");
470     {
471         unsigned int i = 0;
472         struct node *x = cs->first;
473         while (x) {
474             printf("%02d,", x - cs->data);
475             x = x->next;
476             i++;
477         }
478         printf(" [count=%u]\nchain by prev: ", i);
479         x = cs->last;
480         i = 0;
481         while (x) {
482             printf("%02d,", x - cs->data);
483             x = x->prev;
484             i++;
485         }
486         printf(" [count=%u]\n", i);
487     }
488 }
489 #else
490 #define printchain(a, b) (0)
491 #endif
492 
493 /* Looks up a node and splays it up to the root of the tree */
splay(int64_t * md5,size_t len,struct cache_set * cs)494 static int splay(int64_t *md5, size_t len, struct cache_set *cs)
495 {
496     struct node next = {{0, 0}, NULL, NULL, NULL, NULL, NULL, 0, 0}, *right = &next, *left = &next, *temp, *root = cs->root;
497     int comp, found = 0;
498 
499     if (!root)
500         return 0;
501 
502     while (1) {
503         comp = cmp(md5, len, root->digest, root->size);
504         if (comp < 0) {
505             if (!root->left) break;
506             if (cmp(md5, len, root->left->digest, root->left->size) < 0) {
507                 temp       = root->left;
508                 root->left = temp->right;
509                 if (temp->right) temp->right->up = root;
510                 temp->right = root;
511                 root->up    = temp;
512                 root        = temp;
513                 if (!root->left) break;
514             }
515             right->left = root;
516             root->up    = right;
517             right       = root;
518             root        = root->left;
519         } else if (comp > 0) {
520             if (!root->right) break;
521             if (cmp(md5, len, root->right->digest, root->right->size) > 0) {
522                 temp        = root->right;
523                 root->right = temp->left;
524                 if (temp->left) temp->left->up = root;
525                 temp->left = root;
526                 root->up   = temp;
527                 root       = temp;
528                 if (!root->right) break;
529             }
530             left->right = root;
531             root->up    = left;
532             left        = root;
533             root        = root->right;
534         } else {
535             found = 1;
536             break;
537         }
538     }
539 
540     left->right = root->left;
541     if (root->left) root->left->up = left;
542     right->left = root->right;
543     if (root->right) root->right->up = right;
544     root->left = next.right;
545     if (next.right) next.right->up = root;
546     root->right = next.left;
547     if (next.left) next.left->up = root;
548     root->up = NULL;
549     cs->root = root;
550     return found;
551 }
552 
553 /* Looks up an hash in the tree and maintains the replacement chain */
cacheset_lookup(struct cache_set * cs,unsigned char * md5,size_t size,uint32_t recursion_level)554 static inline int cacheset_lookup(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t recursion_level)
555 {
556     int64_t hash[2];
557 
558     memcpy(hash, md5, 16);
559     if (splay(hash, size, cs)) {
560         struct node *o = cs->root->prev, *p = cs->root, *q = cs->root->next;
561 #ifdef PRINT_CHAINS
562         printf("promoting %02d\n", p - cs->data);
563         printchain("before", cs);
564 #endif
565         if (q) {
566             if (o)
567                 o->next = q;
568             else
569                 cs->first = q;
570             q->prev        = o;
571             cs->last->next = p;
572             p->prev        = cs->last;
573             p->next        = NULL;
574             cs->last       = p;
575         }
576 #ifdef PRINT_CHAINS
577         printchain("after", cs);
578 #endif
579 
580         // The recursion_level check here to prevent a "clean" result from exceeding max recursion from
581         // causing a false negative if the same file is scanned where the recursion depth is lower.
582         // e.g. if max-rec set to 4 and "file5" is malicious, a scan of file1 should not cause a scan of file3 to be "clean"
583         //      root
584         //      ├── file1 -> file2 -> file3 -> file4 -> file5
585         //      └── file3 -> file4 -> file5
586         // See: https://bugzilla.clamav.net/show_bug.cgi?id=1856
587         if (recursion_level >= p->minrec)
588             return 1;
589     }
590     return 0;
591 }
592 
593 /* If the hash is present nothing happens.
594    Otherwise a new node is created for the hash picking one from the begin of the chain.
595    Used nodes are moved to the end of the chain */
cacheset_add(struct cache_set * cs,unsigned char * md5,size_t size,uint32_t recursion_level)596 static inline void cacheset_add(struct cache_set *cs, unsigned char *md5, size_t size, uint32_t recursion_level)
597 {
598     struct node *newnode;
599     int64_t hash[2];
600 
601     memcpy(hash, md5, 16);
602     if (splay(hash, size, cs)) {
603         if (cs->root->minrec > recursion_level)
604             cs->root->minrec = recursion_level;
605         return; /* Already there */
606     }
607 
608     ptree("1:\n");
609     if (printtree(cs, cs->root, 0)) {
610         cli_errmsg("cacheset_add: inconsistent tree before choosing newnode, good luck\n");
611         return;
612     }
613 
614     newnode = cs->first;
615     while (newnode) {
616         if (!newnode->right && !newnode->left)
617             break;
618         if (newnode->next) {
619             if (newnode == newnode->next) {
620                 cli_errmsg("cacheset_add: cache chain in a bad state\n");
621                 return;
622             }
623             newnode = newnode->next;
624         } else {
625             cli_warnmsg("cacheset_add: end of chain reached\n");
626             return;
627         }
628     }
629     if (!newnode) {
630         cli_errmsg("cacheset_add: tree has got no end nodes\n");
631         return;
632     }
633     if (newnode->up) {
634         if (newnode->up->left == newnode)
635             newnode->up->left = NULL;
636         else
637             newnode->up->right = NULL;
638     }
639     if (newnode->prev)
640         newnode->prev->next = newnode->next;
641     if (newnode->next)
642         newnode->next->prev = newnode->prev;
643     if (cs->first == newnode)
644         cs->first = newnode->next;
645 
646     newnode->prev  = cs->last;
647     newnode->next  = NULL;
648     cs->last->next = newnode;
649     cs->last       = newnode;
650 
651     ptree("2:\n");
652     if (printtree(cs, cs->root, 0)) {
653         cli_errmsg("cacheset_add: inconsistent tree before adding newnode, good luck\n");
654         return;
655     }
656 
657     if (!cs->root) {
658         newnode->left  = NULL;
659         newnode->right = NULL;
660     } else {
661         if (cmp(hash, size, cs->root->digest, cs->root->size) < 0) {
662             newnode->left  = cs->root->left;
663             newnode->right = cs->root;
664             cs->root->left = NULL;
665         } else {
666             newnode->right  = cs->root->right;
667             newnode->left   = cs->root;
668             cs->root->right = NULL;
669         }
670         if (newnode->left) newnode->left->up = newnode;
671         if (newnode->right) newnode->right->up = newnode;
672     }
673     newnode->digest[0] = hash[0];
674     newnode->digest[1] = hash[1];
675     newnode->up        = NULL;
676     newnode->size      = size;
677     newnode->minrec    = recursion_level;
678     cs->root           = newnode;
679 
680     ptree("3: %lld\n", hash[1]);
681     if (printtree(cs, cs->root, 0)) {
682         cli_errmsg("cacheset_add: inconsistent tree after adding newnode, good luck\n");
683         return;
684     }
685     printnode("newnode", cs, newnode);
686 }
687 
688 /* If the hash is not present nothing happens other than splaying the tree.
689    Otherwise the identified node is removed from the tree and then placed back at
690    the front of the chain. */
cacheset_remove(struct cache_set * cs,unsigned char * md5,size_t size)691 static inline void cacheset_remove(struct cache_set *cs, unsigned char *md5, size_t size)
692 {
693     struct node *targetnode;
694     struct node *reattachnode;
695     int64_t hash[2];
696 
697     memcpy(hash, md5, 16);
698     if (splay(hash, size, cs) != 1) {
699         cli_dbgmsg("cacheset_remove: node not found in tree\n");
700         return; /* No op */
701     }
702 
703     ptree("cacheset_remove: node found and splayed to root\n");
704     targetnode = cs->root;
705     printnode("targetnode", cs, targetnode);
706 
707     /* First fix the tree */
708     if (targetnode->left == NULL) {
709         /* At left edge so prune */
710         cs->root = targetnode->right;
711         if (cs->root)
712             cs->root->up = NULL;
713     } else {
714         /* new root will come from leftside tree */
715         cs->root     = targetnode->left;
716         cs->root->up = NULL;
717         /* splay tree, expecting not found, bringing rightmost member to root */
718         splay(hash, size, cs);
719 
720         if (targetnode->right) {
721             /* reattach right tree to clean right-side attach point */
722             reattachnode = cs->root;
723             while (reattachnode->right)
724                 reattachnode = reattachnode->right; /* shouldn't happen, but safer in case of dupe */
725             reattachnode->right   = targetnode->right;
726             targetnode->right->up = reattachnode;
727         }
728     }
729     targetnode->size      = (size_t)0;
730     targetnode->digest[0] = 0;
731     targetnode->digest[1] = 0;
732     targetnode->up        = NULL;
733     targetnode->left      = NULL;
734     targetnode->right     = NULL;
735 
736     /* Tree is fixed, so now fix chain around targetnode */
737     if (targetnode->prev)
738         targetnode->prev->next = targetnode->next;
739     if (targetnode->next)
740         targetnode->next->prev = targetnode->prev;
741     if (cs->last == targetnode)
742         cs->last = targetnode->prev;
743 
744     /* Put targetnode at front of chain, if not there already */
745     if (cs->first != targetnode) {
746         targetnode->next = cs->first;
747         if (cs->first)
748             cs->first->prev = targetnode;
749         cs->first = targetnode;
750     }
751     targetnode->prev = NULL;
752 
753     printnode("root", cs, cs->root);
754     printnode("first", cs, cs->first);
755     printnode("last", cs, cs->last);
756 
757     printchain("remove (after)", cs);
758 }
759 #endif /* USE_SPLAY */
760 
761 /* COMMON STUFF --------------------------------------------------------------------- */
762 
763 struct CACHE {
764     struct cache_set cacheset;
765 #ifdef CL_THREAD_SAFE
766     pthread_mutex_t mutex;
767 #endif
768 };
769 
770 /* Allocates the trees for the engine cache */
cli_cache_init(struct cl_engine * engine)771 int cli_cache_init(struct cl_engine *engine)
772 {
773     struct CACHE *cache;
774     unsigned int i, j;
775 
776     if (!engine) {
777         cli_errmsg("cli_cache_init: mpool malloc fail\n");
778         return 1;
779     }
780 
781     if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
782         cli_dbgmsg("cli_cache_init: Caching disabled.\n");
783         return 0;
784     }
785 
786     if (!(cache = MPOOL_MALLOC(engine->mempool, sizeof(struct CACHE) * TREES))) {
787         cli_errmsg("cli_cache_init: mpool malloc fail\n");
788         return 1;
789     }
790 
791     for (i = 0; i < TREES; i++) {
792 #ifdef CL_THREAD_SAFE
793         if (pthread_mutex_init(&cache[i].mutex, NULL)) {
794             cli_errmsg("cli_cache_init: mutex init fail\n");
795             for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
796             for (j = 0; j < i; j++) pthread_mutex_destroy(&cache[j].mutex);
797             MPOOL_FREE(engine->mempool, cache);
798             return 1;
799         }
800 #endif
801         if (cacheset_init(&cache[i].cacheset, engine->mempool)) {
802             for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
803 #ifdef CL_THREAD_SAFE
804             for (j = 0; j <= i; j++) pthread_mutex_destroy(&cache[j].mutex);
805 #endif
806             MPOOL_FREE(engine->mempool, cache);
807             return 1;
808         }
809     }
810     engine->cache = cache;
811     return 0;
812 }
813 
814 /* Frees the engine cache */
cli_cache_destroy(struct cl_engine * engine)815 void cli_cache_destroy(struct cl_engine *engine)
816 {
817     struct CACHE *cache;
818     unsigned int i;
819 
820     if (!engine || !(cache = engine->cache))
821         return;
822 
823     if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
824         return;
825     }
826 
827     for (i = 0; i < TREES; i++) {
828         cacheset_destroy(&cache[i].cacheset, engine->mempool);
829 #ifdef CL_THREAD_SAFE
830         pthread_mutex_destroy(&cache[i].mutex);
831 #endif
832     }
833     MPOOL_FREE(engine->mempool, cache);
834 }
835 
836 /* Looks up an hash in the proper tree */
cache_lookup_hash(unsigned char * md5,size_t len,struct CACHE * cache,uint32_t recursion_level)837 static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache, uint32_t recursion_level)
838 {
839     unsigned int key = getkey(md5);
840     int ret          = CL_VIRUS;
841     struct CACHE *c;
842 
843     c = &cache[key];
844 #ifdef CL_THREAD_SAFE
845     if (pthread_mutex_lock(&c->mutex)) {
846         cli_errmsg("cache_lookup_hash: cache_lookup_hash: mutex lock fail\n");
847         return ret;
848     }
849 #endif
850 
851     /* cli_warnmsg("cache_lookup_hash: key is %u\n", key); */
852 
853     ret = (cacheset_lookup(&c->cacheset, md5, len, recursion_level)) ? CL_CLEAN : CL_VIRUS;
854 #ifdef CL_THREAD_SAFE
855     pthread_mutex_unlock(&c->mutex);
856     // if(ret == CL_CLEAN) cli_warnmsg("cached\n");
857 #endif
858     return ret;
859 }
860 
861 /* Adds an hash to the cache */
cache_add(unsigned char * md5,size_t size,cli_ctx * ctx)862 void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
863 {
864     unsigned int key = getkey(md5);
865     uint32_t level;
866     struct CACHE *c;
867 
868     if (!ctx || !ctx->engine || !ctx->engine->cache)
869         return;
870 
871     if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
872         cli_dbgmsg("cache_add: Caching disabled. Not adding sample to cache.\n");
873         return;
874     }
875 
876     level = (ctx->fmap && ctx->fmap->dont_cache_flag) ? ctx->recursion_level : 0;
877     if (ctx->found_possibly_unwanted && (level || 0 == ctx->recursion_level))
878         return;
879     if (SCAN_ALLMATCHES && (ctx->num_viruses > 0)) {
880         cli_dbgmsg("cache_add: alert found within same topfile, skipping cache\n");
881         return;
882     }
883     c = &ctx->engine->cache[key];
884 #ifdef CL_THREAD_SAFE
885     if (pthread_mutex_lock(&c->mutex)) {
886         cli_errmsg("cli_add: mutex lock fail\n");
887         return;
888     }
889 #endif
890 
891     /* cli_warnmsg("cache_add: key is %u\n", key); */
892 
893 #ifdef USE_LRUHASHCACHE
894     cacheset_add(&c->cacheset, md5, size, ctx->engine->mempool);
895 #else
896 #ifdef USE_SPLAY
897     cacheset_add(&c->cacheset, md5, size, level);
898 #else
899 #error #define USE_SPLAY or USE_LRUHASHCACHE
900 #endif
901 #endif
902 
903 #ifdef CL_THREAD_SAFE
904     pthread_mutex_unlock(&c->mutex);
905 #endif
906     cli_dbgmsg("cache_add: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (level %u)\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], level);
907     return;
908 }
909 
910 /* Removes a hash from the cache */
cache_remove(unsigned char * md5,size_t size,const struct cl_engine * engine)911 void cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engine)
912 {
913     unsigned int key = getkey(md5);
914     struct CACHE *c;
915 
916     if (!engine || !engine->cache)
917         return;
918 
919     if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
920         cli_dbgmsg("cache_remove: Caching disabled.\n");
921         return;
922     }
923 
924     /* cli_warnmsg("cache_remove: key is %u\n", key); */
925 
926     c = &engine->cache[key];
927 #ifdef CL_THREAD_SAFE
928     if (pthread_mutex_lock(&c->mutex)) {
929         cli_errmsg("cli_add: mutex lock fail\n");
930         return;
931     }
932 #endif
933 
934 #ifdef USE_LRUHASHCACHE
935     cacheset_remove(&c->cacheset, md5, size, engine->mempool);
936 #else
937 #ifdef USE_SPLAY
938     cacheset_remove(&c->cacheset, md5, size);
939 #else
940 #error #define USE_SPLAY or USE_LRUHASHCACHE
941 #endif
942 #endif
943 
944 #ifdef CL_THREAD_SAFE
945     pthread_mutex_unlock(&c->mutex);
946 #endif
947     cli_dbgmsg("cache_remove: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15]);
948     return;
949 }
950 
951 /* Hashes a file onto the provided buffer and looks it up the cache.
952    Returns CL_VIRUS if found, CL_CLEAN if not FIXME or a recoverable error,
953    and returns CL_EREAD if unrecoverable */
cache_check(unsigned char * hash,cli_ctx * ctx)954 cl_error_t cache_check(unsigned char *hash, cli_ctx *ctx)
955 {
956     fmap_t *map;
957     int ret;
958 
959     if (!ctx || !ctx->engine || !ctx->engine->cache)
960         return CL_VIRUS;
961 
962     if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
963         cli_dbgmsg("cache_check: Caching disabled. Returning CL_VIRUS.\n");
964         return CL_VIRUS;
965     }
966 
967     map = ctx->fmap;
968     ret = cache_lookup_hash(hash, map->len, ctx->engine->cache, ctx->recursion_level);
969     cli_dbgmsg("cache_check: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x is %s\n", hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7], hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15], (ret == CL_VIRUS) ? "negative" : "positive");
970     return ret;
971 }
972