1 /* Copyright (C) 2017-2020 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "conf.h"
26 #include "datasets.h"
27 #include "datasets-string.h"
28 #include "datasets-md5.h"
29 #include "datasets-sha256.h"
30 #include "datasets-reputation.h"
31 #include "util-thash.h"
32 #include "util-print.h"
33 #include "util-crypt.h"     // encode base64
34 #include "util-base64.h"    // decode base64
35 #include "util-byte.h"
36 #include "util-misc.h"
37 
38 SCMutex sets_lock = SCMUTEX_INITIALIZER;
39 static Dataset *sets = NULL;
40 static uint32_t set_ids = 0;
41 
42 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
43         DataRepType *rep);
44 
DatasetUnlockData(THashData * d)45 static inline void DatasetUnlockData(THashData *d)
46 {
47     (void) THashDecrUsecnt(d);
48     THashDataUnlock(d);
49 }
50 static bool DatasetIsStatic(const char *save, const char *load);
51 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
52 
DatasetGetTypeFromString(const char * s)53 enum DatasetTypes DatasetGetTypeFromString(const char *s)
54 {
55     if (strcasecmp("md5", s) == 0)
56         return DATASET_TYPE_MD5;
57     if (strcasecmp("sha256", s) == 0)
58         return DATASET_TYPE_SHA256;
59     if (strcasecmp("string", s) == 0)
60         return DATASET_TYPE_STRING;
61     return DATASET_TYPE_NOTSET;
62 }
63 
DatasetAlloc(const char * name)64 static Dataset *DatasetAlloc(const char *name)
65 {
66     Dataset *set = SCCalloc(1, sizeof(*set));
67     if (set) {
68         set->id = set_ids++;
69     }
70     return set;
71 }
72 
DatasetSearchByName(const char * name)73 static Dataset *DatasetSearchByName(const char *name)
74 {
75     Dataset *set = sets;
76     while (set) {
77         if (strcasecmp(name, set->name) == 0 && set->hidden == false) {
78             return set;
79         }
80         set = set->next;
81     }
82     return NULL;
83 }
84 
HexToRaw(const uint8_t * in,size_t ins,uint8_t * out,size_t outs)85 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
86 {
87     if (ins % 2 != 0)
88         return -1;
89     if (outs != ins / 2)
90         return -1;
91 
92     uint8_t hash[outs];
93     size_t i, x;
94     for (x = 0, i = 0; i < ins; i+=2, x++) {
95         char buf[3] = { 0, 0, 0 };
96         buf[0] = in[i];
97         buf[1] = in[i+1];
98 
99         long value = strtol(buf, NULL, 16);
100         if (value >= 0 && value <= 255)
101             hash[x] = (uint8_t)value;
102         else {
103             SCLogError(SC_ERR_INVALID_HASH, "hash byte out of range %ld", value);
104             return -1;
105         }
106     }
107 
108     memcpy(out, hash, outs);
109     return 0;
110 }
111 
ParseRepLine(const char * in,size_t ins,DataRepType * rep_out)112 static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out)
113 {
114     SCLogDebug("in '%s'", in);
115     char raw[ins + 1];
116     memcpy(raw, in, ins);
117     raw[ins] = '\0';
118     char *line = raw;
119 
120     char *ptrs[1] = {NULL};
121     int idx = 0;
122 
123     size_t i = 0;
124     while (i < ins + 1) {
125         if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
126             line[i] = '\0';
127             SCLogDebug("line '%s'", line);
128 
129             ptrs[idx] = line;
130             idx++;
131 
132             if (idx == 1)
133                 break;
134         } else {
135             i++;
136         }
137     }
138 
139     if (idx != 1) {
140         SCLogDebug("idx %d", idx);
141         return -1;
142     }
143 
144     uint16_t v = 0;
145     int r = StringParseU16RangeCheck(&v, 10, strlen(ptrs[0]), ptrs[0], 0, USHRT_MAX);
146     if (r != (int)strlen(ptrs[0])) {
147         SCLogError(SC_ERR_INVALID_NUMERIC_VALUE,
148                 "'%s' is not a valid reputation value (0-65535)", ptrs[0]);
149         return -1;
150     }
151     SCLogDebug("v %"PRIu16" raw %s", v, ptrs[0]);
152 
153     rep_out->value = v;
154     return 0;
155 }
156 
DatasetLoadMd5(Dataset * set)157 static int DatasetLoadMd5(Dataset *set)
158 {
159     if (strlen(set->load) == 0)
160         return 0;
161 
162     SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
163     const char *fopen_mode = "r";
164     if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
165         fopen_mode = "a+";
166     }
167 
168     FILE *fp = fopen(set->load, fopen_mode);
169     if (fp == NULL) {
170         SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
171                 set->load, strerror(errno));
172         return -1;
173     }
174 
175     uint32_t cnt = 0;
176     char line[1024];
177     while (fgets(line, (int)sizeof(line), fp) != NULL) {
178         /* straight black/white list */
179         if (strlen(line) == 33) {
180             line[strlen(line) - 1] = '\0';
181             SCLogDebug("line: '%s'", line);
182 
183             uint8_t hash[16];
184             if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
185                 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
186                         set->name, set->load);
187 
188             if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0)
189                 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
190                         set->name, set->load);
191             cnt++;
192 
193         /* list with rep data */
194         } else if (strlen(line) > 33 && line[32] == ',') {
195             line[strlen(line) - 1] = '\0';
196             SCLogDebug("MD5 with REP line: '%s'", line);
197 
198             uint8_t hash[16];
199             if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
200                 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
201                         set->name, set->load);
202 
203             DataRepType rep = { .value = 0};
204             if (ParseRepLine(line+33, strlen(line)-33, &rep) < 0)
205                 FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
206                         set->name, set->load);
207 
208             SCLogDebug("rep v:%u", rep.value);
209             if (DatasetAddwRep(set, hash, 16, &rep) < 0)
210                 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
211                         set->name, set->load);
212 
213             cnt++;
214         }
215         else {
216             FatalError(SC_ERR_FATAL, "MD5 bad line len %u: '%s'",
217                     (uint32_t)strlen(line), line);
218         }
219     }
220     THashConsolidateMemcap(set->hash);
221 
222     fclose(fp);
223     SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
224     return 0;
225 }
226 
DatasetLoadSha256(Dataset * set)227 static int DatasetLoadSha256(Dataset *set)
228 {
229     if (strlen(set->load) == 0)
230         return 0;
231 
232     SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
233     const char *fopen_mode = "r";
234     if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
235         fopen_mode = "a+";
236     }
237 
238     FILE *fp = fopen(set->load, fopen_mode);
239     if (fp == NULL) {
240         SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
241                 set->load, strerror(errno));
242         return -1;
243     }
244 
245     uint32_t cnt = 0;
246     char line[1024];
247     while (fgets(line, (int)sizeof(line), fp) != NULL) {
248         /* straight black/white list */
249         if (strlen(line) == 65) {
250             line[strlen(line) - 1] = '\0';
251             SCLogDebug("line: '%s'", line);
252 
253             uint8_t hash[32];
254             if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
255                 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
256                         set->name, set->load);
257 
258             if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0)
259                 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
260                         set->name, set->load);
261             cnt++;
262 
263             /* list with rep data */
264         } else if (strlen(line) > 65 && line[64] == ',') {
265             line[strlen(line) - 1] = '\0';
266             SCLogDebug("SHA-256 with REP line: '%s'", line);
267 
268             uint8_t hash[32];
269             if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
270                 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
271                         set->name, set->load);
272 
273             DataRepType rep = { .value = 0 };
274             if (ParseRepLine(line+65, strlen(line)-65, &rep) < 0)
275                 FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
276                         set->name, set->load);
277 
278             SCLogDebug("rep %u", rep.value);
279 
280             if (DatasetAddwRep(set, hash, 32, &rep) < 0)
281                 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
282                         set->name, set->load);
283             cnt++;
284         }
285     }
286     THashConsolidateMemcap(set->hash);
287 
288     fclose(fp);
289     SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
290     return 0;
291 }
292 
DatasetLoadString(Dataset * set)293 static int DatasetLoadString(Dataset *set)
294 {
295     if (strlen(set->load) == 0)
296         return 0;
297 
298     SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
299     const char *fopen_mode = "r";
300     if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
301         fopen_mode = "a+";
302     }
303 
304     FILE *fp = fopen(set->load, fopen_mode);
305     if (fp == NULL) {
306         SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
307                 set->load, strerror(errno));
308         return -1;
309     }
310 
311     uint32_t cnt = 0;
312     char line[1024];
313     while (fgets(line, (int)sizeof(line), fp) != NULL) {
314         if (strlen(line) <= 1)
315             continue;
316 
317         char *r = strchr(line, ',');
318         if (r == NULL) {
319             line[strlen(line) - 1] = '\0';
320             SCLogDebug("line: '%s'", line);
321 
322             // coverity[alloc_strlen : FALSE]
323             uint8_t decoded[strlen(line)];
324             uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
325             if (len == 0)
326                 FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
327                         set->name, set->load);
328 
329             if (DatasetAdd(set, (const uint8_t *)decoded, len) < 0)
330                 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
331                         set->name, set->load);
332             cnt++;
333         } else {
334             line[strlen(line) - 1] = '\0';
335             SCLogDebug("line: '%s'", line);
336 
337             *r = '\0';
338 
339             // coverity[alloc_strlen : FALSE]
340             uint8_t decoded[strlen(line)];
341             uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
342             if (len == 0)
343                 FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
344                         set->name, set->load);
345 
346             r++;
347             SCLogDebug("r '%s'", r);
348 
349             DataRepType rep = { .value = 0 };
350             if (ParseRepLine(r, strlen(r), &rep) < 0)
351                 FatalError(SC_ERR_FATAL, "die: bad rep");
352             SCLogDebug("rep %u", rep.value);
353 
354             if (DatasetAddwRep(set, (const uint8_t *)decoded, len, &rep) < 0)
355                 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
356                         set->name, set->load);
357             cnt++;
358 
359             SCLogDebug("line with rep %s, %s", line, r);
360         }
361     }
362     THashConsolidateMemcap(set->hash);
363 
364     fclose(fp);
365     SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
366     return 0;
367 }
368 
369 extern bool g_system;
370 
371 enum DatasetGetPathType {
372     TYPE_STATE,
373     TYPE_LOAD,
374 };
375 
DatasetGetPath(const char * in_path,char * out_path,size_t out_size,enum DatasetGetPathType type)376 static void DatasetGetPath(const char *in_path,
377         char *out_path, size_t out_size, enum DatasetGetPathType type)
378 {
379     char path[PATH_MAX];
380     struct stat st;
381 
382     if (PathIsAbsolute(in_path)) {
383         strlcpy(path, in_path, sizeof(path));
384         strlcpy(out_path, path, out_size);
385         return;
386     }
387 
388     const char *data_dir = ConfigGetDataDirectory();
389     if (stat(data_dir, &st) != 0) {
390         SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
391         return;
392     }
393 
394     snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
395 
396     if (type == TYPE_LOAD) {
397         if (stat(path, &st) != 0) {
398             SCLogDebug("path %s: %s", path, strerror(errno));
399             if (!g_system) {
400                 snprintf(path, sizeof(path), "%s", in_path);
401             }
402         }
403     }
404     strlcpy(out_path, path, out_size);
405     SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
406 }
407 
408 /** \brief look for set by name without creating it */
DatasetFind(const char * name,enum DatasetTypes type)409 Dataset *DatasetFind(const char *name, enum DatasetTypes type)
410 {
411     SCMutexLock(&sets_lock);
412     Dataset *set = DatasetSearchByName(name);
413     if (set) {
414         if (set->type != type) {
415             SCMutexUnlock(&sets_lock);
416             return NULL;
417         }
418     }
419     SCMutexUnlock(&sets_lock);
420     return set;
421 }
422 
DatasetGet(const char * name,enum DatasetTypes type,const char * save,const char * load,uint64_t memcap,uint32_t hashsize)423 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
424         uint64_t memcap, uint32_t hashsize)
425 {
426     uint64_t default_memcap = 0;
427     uint32_t default_hashsize = 0;
428     if (strlen(name) > DATASET_NAME_MAX_LEN) {
429         return NULL;
430     }
431 
432     SCMutexLock(&sets_lock);
433     Dataset *set = DatasetSearchByName(name);
434     if (set) {
435         if (type != DATASET_TYPE_NOTSET && set->type != type) {
436             SCLogError(SC_ERR_DATASET, "dataset %s already "
437                     "exists and is of type %u",
438                 set->name, set->type);
439             SCMutexUnlock(&sets_lock);
440             return NULL;
441         }
442 
443         if ((save == NULL || strlen(save) == 0) &&
444             (load == NULL || strlen(load) == 0)) {
445             // OK, rule keyword doesn't have to set state/load,
446             // even when yaml set has set it.
447         } else {
448             if ((save == NULL && strlen(set->save) > 0) ||
449                     (save != NULL && strcmp(set->save, save) != 0)) {
450                 SCLogError(SC_ERR_DATASET, "dataset %s save mismatch: %s != %s",
451                         set->name, set->save, save);
452                 SCMutexUnlock(&sets_lock);
453                 return NULL;
454             }
455             if ((load == NULL && strlen(set->load) > 0) ||
456                     (load != NULL && strcmp(set->load, load) != 0)) {
457                 SCLogError(SC_ERR_DATASET, "dataset %s load mismatch: %s != %s",
458                         set->name, set->load, load);
459                 SCMutexUnlock(&sets_lock);
460                 return NULL;
461             }
462         }
463 
464         SCMutexUnlock(&sets_lock);
465         return set;
466     } else {
467         if (type == DATASET_TYPE_NOTSET) {
468             SCLogError(SC_ERR_DATASET, "dataset %s not defined", name);
469             goto out_err;
470         }
471     }
472 
473     set = DatasetAlloc(name);
474     if (set == NULL) {
475         goto out_err;
476     }
477 
478     strlcpy(set->name, name, sizeof(set->name));
479     set->type = type;
480     if (save && strlen(save)) {
481         strlcpy(set->save, save, sizeof(set->save));
482         SCLogDebug("name %s save '%s'", name, set->save);
483     }
484     if (load && strlen(load)) {
485         strlcpy(set->load, load, sizeof(set->load));
486         SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
487     }
488 
489     char cnf_name[128];
490     snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
491 
492     GetDefaultMemcap(&default_memcap, &default_hashsize);
493     switch (type) {
494         case DATASET_TYPE_MD5:
495             set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
496                     Md5StrCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
497                     hashsize > 0 ? hashsize : default_hashsize);
498             if (set->hash == NULL)
499                 goto out_err;
500             if (DatasetLoadMd5(set) < 0)
501                 goto out_err;
502             break;
503         case DATASET_TYPE_STRING:
504             set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
505                     StringCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
506                     hashsize > 0 ? hashsize : default_hashsize);
507             if (set->hash == NULL)
508                 goto out_err;
509             if (DatasetLoadString(set) < 0)
510                 goto out_err;
511             break;
512         case DATASET_TYPE_SHA256:
513             set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
514                     Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0,
515                     memcap > 0 ? memcap : default_memcap,
516                     hashsize > 0 ? hashsize : default_hashsize);
517             if (set->hash == NULL)
518                 goto out_err;
519             if (DatasetLoadSha256(set) < 0)
520                 goto out_err;
521             break;
522     }
523 
524     SCLogDebug("set %p/%s type %u save %s load %s",
525             set, set->name, set->type, set->save, set->load);
526 
527     set->next = sets;
528     sets = set;
529 
530     SCMutexUnlock(&sets_lock);
531     return set;
532 out_err:
533     if (set) {
534         if (set->hash) {
535             THashShutdown(set->hash);
536         }
537         SCFree(set);
538     }
539     SCMutexUnlock(&sets_lock);
540     return NULL;
541 }
542 
DatasetIsStatic(const char * save,const char * load)543 static bool DatasetIsStatic(const char *save, const char *load)
544 {
545     /* A set is static if it does not have any dynamic properties like
546      * save and/or state defined but has load defined.
547      * */
548     if ((load != NULL && strlen(load) > 0) &&
549             (save == NULL || strlen(save) == 0)) {
550         return true;
551     }
552     return false;
553 }
554 
DatasetReload(void)555 void DatasetReload(void)
556 {
557     /* In order to reload the datasets, just mark the current sets as hidden
558      * and clean them up later.
559      * New datasets shall be created with the rule reload and do not require
560      * any intervention.
561      * */
562     SCMutexLock(&sets_lock);
563     Dataset *set = sets;
564     while (set) {
565         if (!DatasetIsStatic(set->save, set->load) || set->from_yaml == true) {
566             SCLogDebug("Not a static set, skipping %s", set->name);
567             set = set->next;
568             continue;
569         }
570         set->hidden = true;
571         SCLogDebug("Set %s at %p hidden successfully", set->name, set);
572         set = set->next;
573     }
574     SCMutexUnlock(&sets_lock);
575 }
576 
DatasetPostReloadCleanup(void)577 void DatasetPostReloadCleanup(void)
578 {
579     SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
580     SCMutexLock(&sets_lock);
581     Dataset *cur = sets;
582     Dataset *prev = NULL;
583     while (cur) {
584         Dataset *next = cur->next;
585         if (cur->hidden == false) {
586             prev = cur;
587             cur = next;
588             continue;
589         }
590         // Delete the set in case it was hidden
591         if (prev != NULL) {
592             prev->next = next;
593         } else {
594             sets = next;
595         }
596         THashShutdown(cur->hash);
597         SCFree(cur);
598         cur = next;
599     }
600     SCMutexUnlock(&sets_lock);
601 }
602 
GetDefaultMemcap(uint64_t * memcap,uint32_t * hashsize)603 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
604 {
605     const char *str = NULL;
606     if (ConfGetValue("datasets.defaults.memcap", &str) == 1) {
607         if (ParseSizeStringU64(str, memcap) < 0) {
608             SCLogWarning(SC_ERR_INVALID_VALUE,
609                     "memcap value cannot be deduced: %s,"
610                     " resetting to default",
611                     str);
612             *memcap = 0;
613         }
614     }
615     if (ConfGetValue("datasets.defaults.hashsize", &str) == 1) {
616         if (ParseSizeStringU32(str, hashsize) < 0) {
617             SCLogWarning(SC_ERR_INVALID_VALUE,
618                     "hashsize value cannot be deduced: %s,"
619                     " resetting to default",
620                     str);
621             *hashsize = 0;
622         }
623     }
624 }
625 
DatasetsInit(void)626 int DatasetsInit(void)
627 {
628     SCLogDebug("datasets start");
629     int n = 0;
630     ConfNode *datasets = ConfGetNode("datasets");
631     uint64_t default_memcap = 0;
632     uint32_t default_hashsize = 0;
633     GetDefaultMemcap(&default_memcap, &default_hashsize);
634     if (datasets != NULL) {
635         int list_pos = 0;
636         ConfNode *iter = NULL;
637         TAILQ_FOREACH(iter, &datasets->head, next) {
638             if (iter->name == NULL) {
639                 list_pos++;
640                 continue;
641             }
642 
643             char save[PATH_MAX] = "";
644             char load[PATH_MAX] = "";
645             uint64_t memcap = 0;
646             uint32_t hashsize = 0;
647 
648             const char *set_name = iter->name;
649             if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
650                 FatalError(SC_ERR_CONF_NAME_TOO_LONG, "set name '%s' too long, max %d chars",
651                         set_name, DATASET_NAME_MAX_LEN);
652             }
653 
654             ConfNode *set_type =
655                 ConfNodeLookupChild(iter, "type");
656             if (set_type == NULL) {
657                 list_pos++;
658                 continue;
659             }
660 
661             ConfNode *set_save =
662                 ConfNodeLookupChild(iter, "state");
663             if (set_save) {
664                 DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
665                 strlcpy(load, save, sizeof(load));
666             } else {
667                 ConfNode *set_load =
668                     ConfNodeLookupChild(iter, "load");
669                 if (set_load) {
670                     DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
671                 }
672             }
673 
674             ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap");
675             if (set_memcap) {
676                 if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
677                     SCLogWarning(SC_ERR_INVALID_VALUE,
678                             "memcap value cannot be"
679                             " deduced: %s, resetting to default",
680                             set_memcap->val);
681                     memcap = 0;
682                 }
683             }
684             ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize");
685             if (set_hashsize) {
686                 if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
687                     SCLogWarning(SC_ERR_INVALID_VALUE,
688                             "hashsize value cannot be"
689                             " deduced: %s, resetting to default",
690                             set_hashsize->val);
691                     hashsize = 0;
692                 }
693             }
694             char conf_str[1024];
695             snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
696 
697             SCLogDebug("(%d) set %s type %s. Conf %s", n, set_name, set_type->val, conf_str);
698 
699             if (strcmp(set_type->val, "md5") == 0) {
700                 Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
701                         memcap > 0 ? memcap : default_memcap,
702                         hashsize > 0 ? hashsize : default_hashsize);
703                 if (dset == NULL)
704                     FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
705                 SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
706                 dset->from_yaml = true;
707                 n++;
708 
709             } else if (strcmp(set_type->val, "sha256") == 0) {
710                 Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
711                         memcap > 0 ? memcap : default_memcap,
712                         hashsize > 0 ? hashsize : default_hashsize);
713                 if (dset == NULL)
714                     FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
715                 SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
716                 dset->from_yaml = true;
717                 n++;
718 
719             } else if (strcmp(set_type->val, "string") == 0) {
720                 Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
721                         memcap > 0 ? memcap : default_memcap,
722                         hashsize > 0 ? hashsize : default_hashsize);
723                 if (dset == NULL)
724                     FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
725                 SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
726                 dset->from_yaml = true;
727                 n++;
728             }
729 
730             list_pos++;
731         }
732     }
733     SCLogDebug("datasets done: %p", datasets);
734     return 0;
735 }
736 
DatasetsDestroy(void)737 void DatasetsDestroy(void)
738 {
739     SCLogDebug("destroying datasets: %p", sets);
740     SCMutexLock(&sets_lock);
741     Dataset *set = sets;
742     while (set) {
743         SCLogDebug("destroying set %s", set->name);
744         Dataset *next = set->next;
745         THashShutdown(set->hash);
746         SCFree(set);
747         set = next;
748     }
749     sets = NULL;
750     SCMutexUnlock(&sets_lock);
751     SCLogDebug("destroying datasets done: %p", sets);
752 }
753 
SaveCallback(void * ctx,const uint8_t * data,const uint32_t data_len)754 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
755 {
756     FILE *fp = ctx;
757     //PrintRawDataFp(fp, data, data_len);
758     if (fp) {
759         return fwrite(data, data_len, 1, fp);
760     }
761     return 0;
762 }
763 
Md5AsAscii(const void * s,char * out,size_t out_size)764 static int Md5AsAscii(const void *s, char *out, size_t out_size)
765 {
766     const Md5Type *md5 = s;
767     uint32_t x;
768     int i;
769     char str[256];
770     for (i = 0, x = 0; x < sizeof(md5->md5); x++) {
771         i += snprintf(&str[i], 255-i, "%02x", md5->md5[x]);
772     }
773     strlcat(out, str, out_size);
774     strlcat(out, "\n", out_size);
775     return strlen(out);
776 }
777 
Sha256AsAscii(const void * s,char * out,size_t out_size)778 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
779 {
780     const Sha256Type *sha = s;
781     uint32_t x;
782     int i;
783     char str[256];
784     for (i = 0, x = 0; x < sizeof(sha->sha256); x++) {
785         i += snprintf(&str[i], 255-i, "%02x", sha->sha256[x]);
786     }
787     strlcat(out, str, out_size);
788     strlcat(out, "\n", out_size);
789     return strlen(out);
790 }
791 
DatasetsSave(void)792 void DatasetsSave(void)
793 {
794     SCLogDebug("saving datasets: %p", sets);
795     SCMutexLock(&sets_lock);
796     Dataset *set = sets;
797     while (set) {
798         if (strlen(set->save) == 0)
799             goto next;
800 
801         FILE *fp = fopen(set->save, "w");
802         if (fp == NULL)
803             goto next;
804 
805         SCLogDebug("dumping %s to %s", set->name, set->save);
806 
807         switch (set->type) {
808             case DATASET_TYPE_STRING:
809                 THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
810                 break;
811             case DATASET_TYPE_MD5:
812                 THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
813                 break;
814             case DATASET_TYPE_SHA256:
815                 THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
816                 break;
817         }
818 
819         fclose(fp);
820 
821     next:
822         set = set->next;
823     }
824     SCMutexUnlock(&sets_lock);
825 }
826 
DatasetLookupString(Dataset * set,const uint8_t * data,const uint32_t data_len)827 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
828 {
829     if (set == NULL)
830         return -1;
831 
832     StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
833     THashData *rdata = THashLookupFromHash(set->hash, &lookup);
834     if (rdata) {
835         DatasetUnlockData(rdata);
836         return 1;
837     }
838     return 0;
839 }
840 
DatasetLookupStringwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)841 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
842         const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
843 {
844     DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
845 
846     if (set == NULL)
847         return rrep;
848 
849     StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
850     THashData *rdata = THashLookupFromHash(set->hash, &lookup);
851     if (rdata) {
852         StringType *found = rdata->data;
853         rrep.found = true;
854         rrep.rep = found->rep;
855         DatasetUnlockData(rdata);
856         return rrep;
857     }
858     return rrep;
859 }
860 
DatasetLookupMd5(Dataset * set,const uint8_t * data,const uint32_t data_len)861 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
862 {
863     if (set == NULL)
864         return -1;
865 
866     if (data_len != 16)
867         return -1;
868 
869     Md5Type lookup = { .rep.value = 0 };
870     memcpy(lookup.md5, data, data_len);
871     THashData *rdata = THashLookupFromHash(set->hash, &lookup);
872     if (rdata) {
873         DatasetUnlockData(rdata);
874         return 1;
875     }
876     return 0;
877 }
878 
DatasetLookupMd5wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)879 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
880         const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
881 {
882     DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
883 
884     if (set == NULL)
885         return rrep;
886 
887     if (data_len != 16)
888         return rrep;
889 
890     Md5Type lookup = { .rep.value = 0};
891     memcpy(lookup.md5, data, data_len);
892     THashData *rdata = THashLookupFromHash(set->hash, &lookup);
893     if (rdata) {
894         Md5Type *found = rdata->data;
895         rrep.found = true;
896         rrep.rep = found->rep;
897         DatasetUnlockData(rdata);
898         return rrep;
899     }
900     return rrep;
901 }
902 
DatasetLookupSha256(Dataset * set,const uint8_t * data,const uint32_t data_len)903 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
904 {
905     if (set == NULL)
906         return -1;
907 
908     if (data_len != 32)
909         return -1;
910 
911     Sha256Type lookup = { .rep.value = 0 };
912     memcpy(lookup.sha256, data, data_len);
913     THashData *rdata = THashLookupFromHash(set->hash, &lookup);
914     if (rdata) {
915         DatasetUnlockData(rdata);
916         return 1;
917     }
918     return 0;
919 }
920 
DatasetLookupSha256wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)921 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
922         const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
923 {
924     DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
925 
926     if (set == NULL)
927         return rrep;
928 
929     if (data_len != 32)
930         return rrep;
931 
932     Sha256Type lookup = { .rep.value = 0 };
933     memcpy(lookup.sha256, data, data_len);
934     THashData *rdata = THashLookupFromHash(set->hash, &lookup);
935     if (rdata) {
936         Sha256Type *found = rdata->data;
937         rrep.found = true;
938         rrep.rep = found->rep;
939         DatasetUnlockData(rdata);
940         return rrep;
941     }
942     return rrep;
943 }
944 
945 /**
946  *  \brief see if \a data is part of the set
947  *  \param set dataset
948  *  \param data data to look up
949  *  \param data_len length in bytes of \a data
950  *  \retval -1 error
951  *  \retval 0 not found
952  *  \retval 1 found
953  */
DatasetLookup(Dataset * set,const uint8_t * data,const uint32_t data_len)954 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
955 {
956     if (set == NULL)
957         return -1;
958 
959     switch (set->type) {
960         case DATASET_TYPE_STRING:
961             return DatasetLookupString(set, data, data_len);
962         case DATASET_TYPE_MD5:
963             return DatasetLookupMd5(set, data, data_len);
964         case DATASET_TYPE_SHA256:
965             return DatasetLookupSha256(set, data, data_len);
966     }
967     return -1;
968 }
969 
DatasetLookupwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)970 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
971         const DataRepType *rep)
972 {
973     DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
974     if (set == NULL)
975         return rrep;
976 
977     switch (set->type) {
978         case DATASET_TYPE_STRING:
979             return DatasetLookupStringwRep(set, data, data_len, rep);
980         case DATASET_TYPE_MD5:
981             return DatasetLookupMd5wRep(set, data, data_len, rep);
982         case DATASET_TYPE_SHA256:
983             return DatasetLookupSha256wRep(set, data, data_len, rep);
984     }
985     return rrep;
986 }
987 
988 /**
989  *  \retval 1 data was added to the hash
990  *  \retval 0 data was not added to the hash as it is already there
991  *  \retval -1 failed to add data to the hash
992  */
DatasetAddString(Dataset * set,const uint8_t * data,const uint32_t data_len)993 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
994 {
995     if (set == NULL)
996         return -1;
997 
998     StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
999         .rep.value = 0 };
1000     struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1001     if (res.data) {
1002         DatasetUnlockData(res.data);
1003         return res.is_new ? 1 : 0;
1004     }
1005     return -1;
1006 }
1007 
1008 /**
1009  *  \retval 1 data was added to the hash
1010  *  \retval 0 data was not added to the hash as it is already there
1011  *  \retval -1 failed to add data to the hash
1012  */
DatasetAddStringwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1013 static int DatasetAddStringwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1014         DataRepType *rep)
1015 {
1016     if (set == NULL)
1017         return -1;
1018 
1019     StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1020         .rep = *rep };
1021     struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1022     if (res.data) {
1023         DatasetUnlockData(res.data);
1024         return res.is_new ? 1 : 0;
1025     }
1026     return -1;
1027 }
1028 
DatasetAddMd5(Dataset * set,const uint8_t * data,const uint32_t data_len)1029 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1030 {
1031     if (set == NULL)
1032         return -1;
1033 
1034     if (data_len != 16)
1035         return -2;
1036 
1037     Md5Type lookup = { .rep.value = 0 };
1038     memcpy(lookup.md5, data, 16);
1039     struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1040     if (res.data) {
1041         DatasetUnlockData(res.data);
1042         return res.is_new ? 1 : 0;
1043     }
1044     return -1;
1045 }
1046 
DatasetAddMd5wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1047 static int DatasetAddMd5wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1048         DataRepType *rep)
1049 {
1050     if (set == NULL)
1051         return -1;
1052 
1053     if (data_len != 16)
1054         return -2;
1055 
1056     Md5Type lookup = { .rep = *rep };
1057     memcpy(lookup.md5, data, 16);
1058     struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1059     if (res.data) {
1060         DatasetUnlockData(res.data);
1061         return res.is_new ? 1 : 0;
1062     }
1063     return -1;
1064 }
1065 
DatasetAddSha256wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1066 static int DatasetAddSha256wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1067         DataRepType *rep)
1068 {
1069     if (set == NULL)
1070         return -1;
1071 
1072     if (data_len != 32)
1073         return -2;
1074 
1075     Sha256Type lookup = { .rep = *rep };
1076     memcpy(lookup.sha256, data, 32);
1077     struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1078     if (res.data) {
1079         DatasetUnlockData(res.data);
1080         return res.is_new ? 1 : 0;
1081     }
1082     return -1;
1083 }
1084 
DatasetAddSha256(Dataset * set,const uint8_t * data,const uint32_t data_len)1085 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1086 {
1087     if (set == NULL)
1088         return -1;
1089 
1090     if (data_len != 32)
1091         return -2;
1092 
1093     Sha256Type lookup = { .rep.value = 0 };
1094     memcpy(lookup.sha256, data, 32);
1095     struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1096     if (res.data) {
1097         DatasetUnlockData(res.data);
1098         return res.is_new ? 1 : 0;
1099     }
1100     return -1;
1101 }
1102 
DatasetAdd(Dataset * set,const uint8_t * data,const uint32_t data_len)1103 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1104 {
1105     if (set == NULL)
1106         return -1;
1107 
1108     switch (set->type) {
1109         case DATASET_TYPE_STRING:
1110             return DatasetAddString(set, data, data_len);
1111         case DATASET_TYPE_MD5:
1112             return DatasetAddMd5(set, data, data_len);
1113         case DATASET_TYPE_SHA256:
1114             return DatasetAddSha256(set, data, data_len);
1115     }
1116     return -1;
1117 }
1118 
DatasetAddwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1119 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1120         DataRepType *rep)
1121 {
1122     if (set == NULL)
1123         return -1;
1124 
1125     switch (set->type) {
1126         case DATASET_TYPE_STRING:
1127             return DatasetAddStringwRep(set, data, data_len, rep);
1128         case DATASET_TYPE_MD5:
1129             return DatasetAddMd5wRep(set, data, data_len, rep);
1130         case DATASET_TYPE_SHA256:
1131             return DatasetAddSha256wRep(set, data, data_len, rep);
1132     }
1133     return -1;
1134 }
1135 
1136 /** \brief add serialized data to set
1137  *  \retval int 1 added
1138  *  \retval int 0 already in hash
1139  *  \retval int -1 API error (not added)
1140  *  \retval int -2 DATA error
1141  */
DatasetAddSerialized(Dataset * set,const char * string)1142 int DatasetAddSerialized(Dataset *set, const char *string)
1143 {
1144     if (set == NULL)
1145         return -1;
1146 
1147     switch (set->type) {
1148         case DATASET_TYPE_STRING: {
1149             // coverity[alloc_strlen : FALSE]
1150             uint8_t decoded[strlen(string)];
1151             uint32_t len = DecodeBase64(decoded, (const uint8_t *)string, strlen(string), 1);
1152             if (len == 0) {
1153                 return -2;
1154             }
1155 
1156             return DatasetAddString(set, decoded, len);
1157         }
1158         case DATASET_TYPE_MD5: {
1159             if (strlen(string) != 32)
1160                 return -2;
1161             uint8_t hash[16];
1162             if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1163                 return -2;
1164             return DatasetAddMd5(set, hash, 16);
1165         }
1166         case DATASET_TYPE_SHA256: {
1167             if (strlen(string) != 64)
1168                 return -2;
1169             uint8_t hash[32];
1170             if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1171                 return -2;
1172             return DatasetAddSha256(set, hash, 32);
1173         }
1174     }
1175     return -1;
1176 }
1177 
1178 /**
1179  *  \retval 1 data was removed from the hash
1180  *  \retval 0 data not removed (busy)
1181  *  \retval -1 data not found
1182  */
DatasetRemoveString(Dataset * set,const uint8_t * data,const uint32_t data_len)1183 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1184 {
1185     if (set == NULL)
1186         return -1;
1187 
1188     StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1189         .rep.value = 0 };
1190     return THashRemoveFromHash(set->hash, &lookup);
1191 }
1192 
DatasetRemoveMd5(Dataset * set,const uint8_t * data,const uint32_t data_len)1193 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1194 {
1195     if (set == NULL)
1196         return -1;
1197 
1198     if (data_len != 16)
1199         return -2;
1200 
1201     Md5Type lookup = { .rep.value = 0 };
1202     memcpy(lookup.md5, data, 16);
1203     return THashRemoveFromHash(set->hash, &lookup);
1204 }
1205 
DatasetRemoveSha256(Dataset * set,const uint8_t * data,const uint32_t data_len)1206 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1207 {
1208     if (set == NULL)
1209         return -1;
1210 
1211     if (data_len != 32)
1212         return -2;
1213 
1214     Sha256Type lookup = { .rep.value = 0 };
1215     memcpy(lookup.sha256, data, 32);
1216     return THashRemoveFromHash(set->hash, &lookup);
1217 }
1218 
1219 /** \brief remove serialized data from set
1220  *  \retval int 1 removed
1221  *  \retval int 0 found but busy (not removed)
1222  *  \retval int -1 API error (not removed)
1223  *  \retval int -2 DATA error */
DatasetRemoveSerialized(Dataset * set,const char * string)1224 int DatasetRemoveSerialized(Dataset *set, const char *string)
1225 {
1226     if (set == NULL)
1227         return -1;
1228 
1229     switch (set->type) {
1230         case DATASET_TYPE_STRING: {
1231             // coverity[alloc_strlen : FALSE]
1232             uint8_t decoded[strlen(string)];
1233             uint32_t len = DecodeBase64(decoded, (const uint8_t *)string, strlen(string), 1);
1234             if (len == 0) {
1235                 return -2;
1236             }
1237 
1238             return DatasetRemoveString(set, decoded, len);
1239         }
1240         case DATASET_TYPE_MD5: {
1241             if (strlen(string) != 32)
1242                 return -2;
1243             uint8_t hash[16];
1244             if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1245                 return -2;
1246             return DatasetRemoveMd5(set, hash, 16);
1247         }
1248         case DATASET_TYPE_SHA256: {
1249             if (strlen(string) != 64)
1250                 return -2;
1251             uint8_t hash[32];
1252             if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1253                 return -2;
1254             return DatasetRemoveSha256(set, hash, 32);
1255         }
1256     }
1257     return -1;
1258 }
1259