1 /*
2  *  md5 based hashtab
3  *
4  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5  *  Copyright (C) 2008-2013 Sourcefire, Inc.
6  *
7  *  Authors: aCaB <acab@clamav.net>
8  *
9  *  This program is free software; you can redistribute it and/or modify
10  *  it under the terms of the GNU General Public License version 2 as
11  *  published by the Free Software Foundation.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program; if not, write to the Free Software
20  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21  *  MA 02110-1301, USA.
22  */
23 
24 #if HAVE_CONFIG_H
25 #include "clamav-config.h"
26 #endif
27 
28 #include <stdlib.h>
29 #if HAVE_STRING_H
30 #include <string.h>
31 #endif
32 
33 #include "clamav.h"
34 #include "uniq.h"
35 #include "others.h"
36 
uniq_init(uint32_t count)37 struct uniq *uniq_init(uint32_t count)
38 {
39     struct uniq *U;
40 
41     if (!count) return NULL;
42     U = cli_calloc(1, sizeof(*U));
43     if (!U) return NULL;
44 
45     U->md5s = cli_malloc(count * sizeof(*U->md5s));
46     if (!U->md5s) {
47         uniq_free(U);
48         return NULL;
49     }
50     U->max_unique_items = count;
51 
52     return U;
53 }
54 
uniq_free(struct uniq * U)55 void uniq_free(struct uniq *U)
56 {
57     free(U->md5s);
58     free(U);
59 }
60 
uniq_add(struct uniq * U,const char * item,uint32_t item_len,char ** rhash,uint32_t * count)61 cl_error_t uniq_add(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
62 {
63     cl_error_t status = CL_EARG;
64     unsigned int i;
65     uint8_t digest[16];
66     struct UNIQMD5 *m = NULL;
67 
68     if (!U) {
69         /* Invalid args */
70         goto done;
71     }
72 
73     /* Uniq adds are limited by the maximum allocated in uniq_init(). */
74     if (U->cur_unique_items >= U->max_unique_items) {
75         /* Attempted to add more uniq items than may be stored. */
76         status = CL_EMAXSIZE;
77         goto done;
78     }
79 
80     /* Make a hash of the item string */
81     if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
82         /* Failed to create hash of item. */
83         status = CL_EFORMAT;
84         goto done;
85     }
86 
87     /* Check for md5 digest match in md5 collection */
88     if (U->items && U->md5s[U->idx[*digest]].md5[0] == *digest)
89         for (m = &U->md5s[U->idx[*digest]]; m; m = m->next)
90             if (!memcmp(&digest[1], &m->md5[1], 15)) break;
91 
92     if (!m) {
93         /* No match. Add new md5 to list */
94         const char HEX[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
95 
96         m        = &U->md5s[U->items];
97         m->count = 0;
98 
99         if (U->items && U->md5s[U->idx[*digest]].md5[0] == *digest)
100             m->next = &U->md5s[U->idx[*digest]];
101         else
102             m->next = NULL;
103 
104         U->idx[*digest] = U->items;
105 
106         for (i = 0; i < 16; i++) {
107             m->name[i * 2]     = HEX[digest[i] >> 4 & 0xf];
108             m->name[i * 2 + 1] = HEX[digest[i] & 0xf];
109             m->md5[i]          = digest[i];
110         }
111         m->name[32] = '\0';
112 
113         /* Increment # of unique items. */
114         U->cur_unique_items++;
115     }
116 
117     /* Increment total # of items. */
118     U->items++;
119 
120     /* Increment # items matching this md5 digest (probably just this 1). */
121     m->count++;
122 
123     /* Pass back the ascii hash, if requested. */
124     if (rhash) *rhash = m->name;
125 
126     /* Pass back the count, if requested. */
127     if (count) *count = m->count;
128 
129     status = CL_SUCCESS;
130 
131 done:
132     return status;
133 }
134 
uniq_get(struct uniq * U,const char * item,uint32_t item_len,char ** rhash,uint32_t * count)135 cl_error_t uniq_get(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
136 {
137     cl_error_t status = CL_EARG;
138     uint8_t digest[16];
139     struct UNIQMD5 *m = NULL;
140     uint32_t idx      = 0;
141 
142     if (!U || !count) {
143         /* Invalid args */
144         goto done;
145     }
146 
147     *count = 0;
148 
149     if (!U->items) {
150         goto not_found;
151     }
152 
153     /* Make a hash of the item string */
154     if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
155         /* Failed to create hash of item. */
156         status = CL_EFORMAT;
157         goto done;
158     }
159 
160     /* Get the md5s array index for the bucket list head. */
161     idx = U->idx[*digest];
162     m   = &U->md5s[idx];
163 
164     if (m->md5[0] != *digest) {
165         /*
166          * If the first two bytes in the digest doesn't actually match,
167          * then the item has never been added.
168          * This is a common scenario because the idx table is initialized
169          * to 0's.
170          */
171         goto not_found;
172     }
173 
174     do {
175         if (0 == memcmp(&digest[1], &m->md5[1], 15)) {
176             /* The item-hash matched.
177              * Pass back the ascii hash value (if requested).
178              * Return the count of matching items (will be 1+).
179              */
180             if (rhash)
181                 *rhash = m->name;
182             *count = m->count;
183             break;
184         }
185         m = m->next;
186     } while (NULL != m);
187 
188 not_found:
189     status = CL_SUCCESS;
190 
191 done:
192     return status;
193 }
194