1 /* Copyright (C) 2017-2020 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18 /**
19 * \file
20 *
21 * \author Victor Julien <victor@inliniac.net>
22 */
23
24 #include "suricata-common.h"
25 #include "conf.h"
26 #include "datasets.h"
27 #include "datasets-string.h"
28 #include "datasets-md5.h"
29 #include "datasets-sha256.h"
30 #include "datasets-reputation.h"
31 #include "util-thash.h"
32 #include "util-print.h"
33 #include "util-crypt.h" // encode base64
34 #include "util-base64.h" // decode base64
35 #include "util-byte.h"
36 #include "util-misc.h"
37
38 SCMutex sets_lock = SCMUTEX_INITIALIZER;
39 static Dataset *sets = NULL;
40 static uint32_t set_ids = 0;
41
42 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
43 DataRepType *rep);
44
DatasetUnlockData(THashData * d)45 static inline void DatasetUnlockData(THashData *d)
46 {
47 (void) THashDecrUsecnt(d);
48 THashDataUnlock(d);
49 }
50 static bool DatasetIsStatic(const char *save, const char *load);
51 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
52
DatasetGetTypeFromString(const char * s)53 enum DatasetTypes DatasetGetTypeFromString(const char *s)
54 {
55 if (strcasecmp("md5", s) == 0)
56 return DATASET_TYPE_MD5;
57 if (strcasecmp("sha256", s) == 0)
58 return DATASET_TYPE_SHA256;
59 if (strcasecmp("string", s) == 0)
60 return DATASET_TYPE_STRING;
61 return DATASET_TYPE_NOTSET;
62 }
63
DatasetAlloc(const char * name)64 static Dataset *DatasetAlloc(const char *name)
65 {
66 Dataset *set = SCCalloc(1, sizeof(*set));
67 if (set) {
68 set->id = set_ids++;
69 }
70 return set;
71 }
72
DatasetSearchByName(const char * name)73 static Dataset *DatasetSearchByName(const char *name)
74 {
75 Dataset *set = sets;
76 while (set) {
77 if (strcasecmp(name, set->name) == 0 && set->hidden == false) {
78 return set;
79 }
80 set = set->next;
81 }
82 return NULL;
83 }
84
HexToRaw(const uint8_t * in,size_t ins,uint8_t * out,size_t outs)85 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
86 {
87 if (ins % 2 != 0)
88 return -1;
89 if (outs != ins / 2)
90 return -1;
91
92 uint8_t hash[outs];
93 size_t i, x;
94 for (x = 0, i = 0; i < ins; i+=2, x++) {
95 char buf[3] = { 0, 0, 0 };
96 buf[0] = in[i];
97 buf[1] = in[i+1];
98
99 long value = strtol(buf, NULL, 16);
100 if (value >= 0 && value <= 255)
101 hash[x] = (uint8_t)value;
102 else {
103 SCLogError(SC_ERR_INVALID_HASH, "hash byte out of range %ld", value);
104 return -1;
105 }
106 }
107
108 memcpy(out, hash, outs);
109 return 0;
110 }
111
ParseRepLine(const char * in,size_t ins,DataRepType * rep_out)112 static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out)
113 {
114 SCLogDebug("in '%s'", in);
115 char raw[ins + 1];
116 memcpy(raw, in, ins);
117 raw[ins] = '\0';
118 char *line = raw;
119
120 char *ptrs[1] = {NULL};
121 int idx = 0;
122
123 size_t i = 0;
124 while (i < ins + 1) {
125 if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
126 line[i] = '\0';
127 SCLogDebug("line '%s'", line);
128
129 ptrs[idx] = line;
130 idx++;
131
132 if (idx == 1)
133 break;
134 } else {
135 i++;
136 }
137 }
138
139 if (idx != 1) {
140 SCLogDebug("idx %d", idx);
141 return -1;
142 }
143
144 uint16_t v = 0;
145 int r = StringParseU16RangeCheck(&v, 10, strlen(ptrs[0]), ptrs[0], 0, USHRT_MAX);
146 if (r != (int)strlen(ptrs[0])) {
147 SCLogError(SC_ERR_INVALID_NUMERIC_VALUE,
148 "'%s' is not a valid reputation value (0-65535)", ptrs[0]);
149 return -1;
150 }
151 SCLogDebug("v %"PRIu16" raw %s", v, ptrs[0]);
152
153 rep_out->value = v;
154 return 0;
155 }
156
DatasetLoadMd5(Dataset * set)157 static int DatasetLoadMd5(Dataset *set)
158 {
159 if (strlen(set->load) == 0)
160 return 0;
161
162 SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
163 const char *fopen_mode = "r";
164 if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
165 fopen_mode = "a+";
166 }
167
168 FILE *fp = fopen(set->load, fopen_mode);
169 if (fp == NULL) {
170 SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
171 set->load, strerror(errno));
172 return -1;
173 }
174
175 uint32_t cnt = 0;
176 char line[1024];
177 while (fgets(line, (int)sizeof(line), fp) != NULL) {
178 /* straight black/white list */
179 if (strlen(line) == 33) {
180 line[strlen(line) - 1] = '\0';
181 SCLogDebug("line: '%s'", line);
182
183 uint8_t hash[16];
184 if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
185 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
186 set->name, set->load);
187
188 if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0)
189 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
190 set->name, set->load);
191 cnt++;
192
193 /* list with rep data */
194 } else if (strlen(line) > 33 && line[32] == ',') {
195 line[strlen(line) - 1] = '\0';
196 SCLogDebug("MD5 with REP line: '%s'", line);
197
198 uint8_t hash[16];
199 if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
200 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
201 set->name, set->load);
202
203 DataRepType rep = { .value = 0};
204 if (ParseRepLine(line+33, strlen(line)-33, &rep) < 0)
205 FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
206 set->name, set->load);
207
208 SCLogDebug("rep v:%u", rep.value);
209 if (DatasetAddwRep(set, hash, 16, &rep) < 0)
210 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
211 set->name, set->load);
212
213 cnt++;
214 }
215 else {
216 FatalError(SC_ERR_FATAL, "MD5 bad line len %u: '%s'",
217 (uint32_t)strlen(line), line);
218 }
219 }
220 THashConsolidateMemcap(set->hash);
221
222 fclose(fp);
223 SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
224 return 0;
225 }
226
DatasetLoadSha256(Dataset * set)227 static int DatasetLoadSha256(Dataset *set)
228 {
229 if (strlen(set->load) == 0)
230 return 0;
231
232 SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
233 const char *fopen_mode = "r";
234 if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
235 fopen_mode = "a+";
236 }
237
238 FILE *fp = fopen(set->load, fopen_mode);
239 if (fp == NULL) {
240 SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
241 set->load, strerror(errno));
242 return -1;
243 }
244
245 uint32_t cnt = 0;
246 char line[1024];
247 while (fgets(line, (int)sizeof(line), fp) != NULL) {
248 /* straight black/white list */
249 if (strlen(line) == 65) {
250 line[strlen(line) - 1] = '\0';
251 SCLogDebug("line: '%s'", line);
252
253 uint8_t hash[32];
254 if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
255 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
256 set->name, set->load);
257
258 if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0)
259 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
260 set->name, set->load);
261 cnt++;
262
263 /* list with rep data */
264 } else if (strlen(line) > 65 && line[64] == ',') {
265 line[strlen(line) - 1] = '\0';
266 SCLogDebug("SHA-256 with REP line: '%s'", line);
267
268 uint8_t hash[32];
269 if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
270 FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
271 set->name, set->load);
272
273 DataRepType rep = { .value = 0 };
274 if (ParseRepLine(line+65, strlen(line)-65, &rep) < 0)
275 FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
276 set->name, set->load);
277
278 SCLogDebug("rep %u", rep.value);
279
280 if (DatasetAddwRep(set, hash, 32, &rep) < 0)
281 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
282 set->name, set->load);
283 cnt++;
284 }
285 }
286 THashConsolidateMemcap(set->hash);
287
288 fclose(fp);
289 SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
290 return 0;
291 }
292
DatasetLoadString(Dataset * set)293 static int DatasetLoadString(Dataset *set)
294 {
295 if (strlen(set->load) == 0)
296 return 0;
297
298 SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
299 const char *fopen_mode = "r";
300 if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
301 fopen_mode = "a+";
302 }
303
304 FILE *fp = fopen(set->load, fopen_mode);
305 if (fp == NULL) {
306 SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
307 set->load, strerror(errno));
308 return -1;
309 }
310
311 uint32_t cnt = 0;
312 char line[1024];
313 while (fgets(line, (int)sizeof(line), fp) != NULL) {
314 if (strlen(line) <= 1)
315 continue;
316
317 char *r = strchr(line, ',');
318 if (r == NULL) {
319 line[strlen(line) - 1] = '\0';
320 SCLogDebug("line: '%s'", line);
321
322 // coverity[alloc_strlen : FALSE]
323 uint8_t decoded[strlen(line)];
324 uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
325 if (len == 0)
326 FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
327 set->name, set->load);
328
329 if (DatasetAdd(set, (const uint8_t *)decoded, len) < 0)
330 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
331 set->name, set->load);
332 cnt++;
333 } else {
334 line[strlen(line) - 1] = '\0';
335 SCLogDebug("line: '%s'", line);
336
337 *r = '\0';
338
339 // coverity[alloc_strlen : FALSE]
340 uint8_t decoded[strlen(line)];
341 uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
342 if (len == 0)
343 FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
344 set->name, set->load);
345
346 r++;
347 SCLogDebug("r '%s'", r);
348
349 DataRepType rep = { .value = 0 };
350 if (ParseRepLine(r, strlen(r), &rep) < 0)
351 FatalError(SC_ERR_FATAL, "die: bad rep");
352 SCLogDebug("rep %u", rep.value);
353
354 if (DatasetAddwRep(set, (const uint8_t *)decoded, len, &rep) < 0)
355 FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
356 set->name, set->load);
357 cnt++;
358
359 SCLogDebug("line with rep %s, %s", line, r);
360 }
361 }
362 THashConsolidateMemcap(set->hash);
363
364 fclose(fp);
365 SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
366 return 0;
367 }
368
369 extern bool g_system;
370
371 enum DatasetGetPathType {
372 TYPE_STATE,
373 TYPE_LOAD,
374 };
375
DatasetGetPath(const char * in_path,char * out_path,size_t out_size,enum DatasetGetPathType type)376 static void DatasetGetPath(const char *in_path,
377 char *out_path, size_t out_size, enum DatasetGetPathType type)
378 {
379 char path[PATH_MAX];
380 struct stat st;
381
382 if (PathIsAbsolute(in_path)) {
383 strlcpy(path, in_path, sizeof(path));
384 strlcpy(out_path, path, out_size);
385 return;
386 }
387
388 const char *data_dir = ConfigGetDataDirectory();
389 if (stat(data_dir, &st) != 0) {
390 SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
391 return;
392 }
393
394 snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
395
396 if (type == TYPE_LOAD) {
397 if (stat(path, &st) != 0) {
398 SCLogDebug("path %s: %s", path, strerror(errno));
399 if (!g_system) {
400 snprintf(path, sizeof(path), "%s", in_path);
401 }
402 }
403 }
404 strlcpy(out_path, path, out_size);
405 SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
406 }
407
408 /** \brief look for set by name without creating it */
DatasetFind(const char * name,enum DatasetTypes type)409 Dataset *DatasetFind(const char *name, enum DatasetTypes type)
410 {
411 SCMutexLock(&sets_lock);
412 Dataset *set = DatasetSearchByName(name);
413 if (set) {
414 if (set->type != type) {
415 SCMutexUnlock(&sets_lock);
416 return NULL;
417 }
418 }
419 SCMutexUnlock(&sets_lock);
420 return set;
421 }
422
DatasetGet(const char * name,enum DatasetTypes type,const char * save,const char * load,uint64_t memcap,uint32_t hashsize)423 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
424 uint64_t memcap, uint32_t hashsize)
425 {
426 uint64_t default_memcap = 0;
427 uint32_t default_hashsize = 0;
428 if (strlen(name) > DATASET_NAME_MAX_LEN) {
429 return NULL;
430 }
431
432 SCMutexLock(&sets_lock);
433 Dataset *set = DatasetSearchByName(name);
434 if (set) {
435 if (type != DATASET_TYPE_NOTSET && set->type != type) {
436 SCLogError(SC_ERR_DATASET, "dataset %s already "
437 "exists and is of type %u",
438 set->name, set->type);
439 SCMutexUnlock(&sets_lock);
440 return NULL;
441 }
442
443 if ((save == NULL || strlen(save) == 0) &&
444 (load == NULL || strlen(load) == 0)) {
445 // OK, rule keyword doesn't have to set state/load,
446 // even when yaml set has set it.
447 } else {
448 if ((save == NULL && strlen(set->save) > 0) ||
449 (save != NULL && strcmp(set->save, save) != 0)) {
450 SCLogError(SC_ERR_DATASET, "dataset %s save mismatch: %s != %s",
451 set->name, set->save, save);
452 SCMutexUnlock(&sets_lock);
453 return NULL;
454 }
455 if ((load == NULL && strlen(set->load) > 0) ||
456 (load != NULL && strcmp(set->load, load) != 0)) {
457 SCLogError(SC_ERR_DATASET, "dataset %s load mismatch: %s != %s",
458 set->name, set->load, load);
459 SCMutexUnlock(&sets_lock);
460 return NULL;
461 }
462 }
463
464 SCMutexUnlock(&sets_lock);
465 return set;
466 } else {
467 if (type == DATASET_TYPE_NOTSET) {
468 SCLogError(SC_ERR_DATASET, "dataset %s not defined", name);
469 goto out_err;
470 }
471 }
472
473 set = DatasetAlloc(name);
474 if (set == NULL) {
475 goto out_err;
476 }
477
478 strlcpy(set->name, name, sizeof(set->name));
479 set->type = type;
480 if (save && strlen(save)) {
481 strlcpy(set->save, save, sizeof(set->save));
482 SCLogDebug("name %s save '%s'", name, set->save);
483 }
484 if (load && strlen(load)) {
485 strlcpy(set->load, load, sizeof(set->load));
486 SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
487 }
488
489 char cnf_name[128];
490 snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
491
492 GetDefaultMemcap(&default_memcap, &default_hashsize);
493 switch (type) {
494 case DATASET_TYPE_MD5:
495 set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
496 Md5StrCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
497 hashsize > 0 ? hashsize : default_hashsize);
498 if (set->hash == NULL)
499 goto out_err;
500 if (DatasetLoadMd5(set) < 0)
501 goto out_err;
502 break;
503 case DATASET_TYPE_STRING:
504 set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
505 StringCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
506 hashsize > 0 ? hashsize : default_hashsize);
507 if (set->hash == NULL)
508 goto out_err;
509 if (DatasetLoadString(set) < 0)
510 goto out_err;
511 break;
512 case DATASET_TYPE_SHA256:
513 set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
514 Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0,
515 memcap > 0 ? memcap : default_memcap,
516 hashsize > 0 ? hashsize : default_hashsize);
517 if (set->hash == NULL)
518 goto out_err;
519 if (DatasetLoadSha256(set) < 0)
520 goto out_err;
521 break;
522 }
523
524 SCLogDebug("set %p/%s type %u save %s load %s",
525 set, set->name, set->type, set->save, set->load);
526
527 set->next = sets;
528 sets = set;
529
530 SCMutexUnlock(&sets_lock);
531 return set;
532 out_err:
533 if (set) {
534 if (set->hash) {
535 THashShutdown(set->hash);
536 }
537 SCFree(set);
538 }
539 SCMutexUnlock(&sets_lock);
540 return NULL;
541 }
542
DatasetIsStatic(const char * save,const char * load)543 static bool DatasetIsStatic(const char *save, const char *load)
544 {
545 /* A set is static if it does not have any dynamic properties like
546 * save and/or state defined but has load defined.
547 * */
548 if ((load != NULL && strlen(load) > 0) &&
549 (save == NULL || strlen(save) == 0)) {
550 return true;
551 }
552 return false;
553 }
554
DatasetReload(void)555 void DatasetReload(void)
556 {
557 /* In order to reload the datasets, just mark the current sets as hidden
558 * and clean them up later.
559 * New datasets shall be created with the rule reload and do not require
560 * any intervention.
561 * */
562 SCMutexLock(&sets_lock);
563 Dataset *set = sets;
564 while (set) {
565 if (!DatasetIsStatic(set->save, set->load) || set->from_yaml == true) {
566 SCLogDebug("Not a static set, skipping %s", set->name);
567 set = set->next;
568 continue;
569 }
570 set->hidden = true;
571 SCLogDebug("Set %s at %p hidden successfully", set->name, set);
572 set = set->next;
573 }
574 SCMutexUnlock(&sets_lock);
575 }
576
DatasetPostReloadCleanup(void)577 void DatasetPostReloadCleanup(void)
578 {
579 SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
580 SCMutexLock(&sets_lock);
581 Dataset *cur = sets;
582 Dataset *prev = NULL;
583 while (cur) {
584 Dataset *next = cur->next;
585 if (cur->hidden == false) {
586 prev = cur;
587 cur = next;
588 continue;
589 }
590 // Delete the set in case it was hidden
591 if (prev != NULL) {
592 prev->next = next;
593 } else {
594 sets = next;
595 }
596 THashShutdown(cur->hash);
597 SCFree(cur);
598 cur = next;
599 }
600 SCMutexUnlock(&sets_lock);
601 }
602
GetDefaultMemcap(uint64_t * memcap,uint32_t * hashsize)603 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
604 {
605 const char *str = NULL;
606 if (ConfGetValue("datasets.defaults.memcap", &str) == 1) {
607 if (ParseSizeStringU64(str, memcap) < 0) {
608 SCLogWarning(SC_ERR_INVALID_VALUE,
609 "memcap value cannot be deduced: %s,"
610 " resetting to default",
611 str);
612 *memcap = 0;
613 }
614 }
615 if (ConfGetValue("datasets.defaults.hashsize", &str) == 1) {
616 if (ParseSizeStringU32(str, hashsize) < 0) {
617 SCLogWarning(SC_ERR_INVALID_VALUE,
618 "hashsize value cannot be deduced: %s,"
619 " resetting to default",
620 str);
621 *hashsize = 0;
622 }
623 }
624 }
625
DatasetsInit(void)626 int DatasetsInit(void)
627 {
628 SCLogDebug("datasets start");
629 int n = 0;
630 ConfNode *datasets = ConfGetNode("datasets");
631 uint64_t default_memcap = 0;
632 uint32_t default_hashsize = 0;
633 GetDefaultMemcap(&default_memcap, &default_hashsize);
634 if (datasets != NULL) {
635 int list_pos = 0;
636 ConfNode *iter = NULL;
637 TAILQ_FOREACH(iter, &datasets->head, next) {
638 if (iter->name == NULL) {
639 list_pos++;
640 continue;
641 }
642
643 char save[PATH_MAX] = "";
644 char load[PATH_MAX] = "";
645 uint64_t memcap = 0;
646 uint32_t hashsize = 0;
647
648 const char *set_name = iter->name;
649 if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
650 FatalError(SC_ERR_CONF_NAME_TOO_LONG, "set name '%s' too long, max %d chars",
651 set_name, DATASET_NAME_MAX_LEN);
652 }
653
654 ConfNode *set_type =
655 ConfNodeLookupChild(iter, "type");
656 if (set_type == NULL) {
657 list_pos++;
658 continue;
659 }
660
661 ConfNode *set_save =
662 ConfNodeLookupChild(iter, "state");
663 if (set_save) {
664 DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
665 strlcpy(load, save, sizeof(load));
666 } else {
667 ConfNode *set_load =
668 ConfNodeLookupChild(iter, "load");
669 if (set_load) {
670 DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
671 }
672 }
673
674 ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap");
675 if (set_memcap) {
676 if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
677 SCLogWarning(SC_ERR_INVALID_VALUE,
678 "memcap value cannot be"
679 " deduced: %s, resetting to default",
680 set_memcap->val);
681 memcap = 0;
682 }
683 }
684 ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize");
685 if (set_hashsize) {
686 if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
687 SCLogWarning(SC_ERR_INVALID_VALUE,
688 "hashsize value cannot be"
689 " deduced: %s, resetting to default",
690 set_hashsize->val);
691 hashsize = 0;
692 }
693 }
694 char conf_str[1024];
695 snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
696
697 SCLogDebug("(%d) set %s type %s. Conf %s", n, set_name, set_type->val, conf_str);
698
699 if (strcmp(set_type->val, "md5") == 0) {
700 Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
701 memcap > 0 ? memcap : default_memcap,
702 hashsize > 0 ? hashsize : default_hashsize);
703 if (dset == NULL)
704 FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
705 SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
706 dset->from_yaml = true;
707 n++;
708
709 } else if (strcmp(set_type->val, "sha256") == 0) {
710 Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
711 memcap > 0 ? memcap : default_memcap,
712 hashsize > 0 ? hashsize : default_hashsize);
713 if (dset == NULL)
714 FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
715 SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
716 dset->from_yaml = true;
717 n++;
718
719 } else if (strcmp(set_type->val, "string") == 0) {
720 Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
721 memcap > 0 ? memcap : default_memcap,
722 hashsize > 0 ? hashsize : default_hashsize);
723 if (dset == NULL)
724 FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
725 SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
726 dset->from_yaml = true;
727 n++;
728 }
729
730 list_pos++;
731 }
732 }
733 SCLogDebug("datasets done: %p", datasets);
734 return 0;
735 }
736
DatasetsDestroy(void)737 void DatasetsDestroy(void)
738 {
739 SCLogDebug("destroying datasets: %p", sets);
740 SCMutexLock(&sets_lock);
741 Dataset *set = sets;
742 while (set) {
743 SCLogDebug("destroying set %s", set->name);
744 Dataset *next = set->next;
745 THashShutdown(set->hash);
746 SCFree(set);
747 set = next;
748 }
749 sets = NULL;
750 SCMutexUnlock(&sets_lock);
751 SCLogDebug("destroying datasets done: %p", sets);
752 }
753
SaveCallback(void * ctx,const uint8_t * data,const uint32_t data_len)754 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
755 {
756 FILE *fp = ctx;
757 //PrintRawDataFp(fp, data, data_len);
758 if (fp) {
759 return fwrite(data, data_len, 1, fp);
760 }
761 return 0;
762 }
763
Md5AsAscii(const void * s,char * out,size_t out_size)764 static int Md5AsAscii(const void *s, char *out, size_t out_size)
765 {
766 const Md5Type *md5 = s;
767 uint32_t x;
768 int i;
769 char str[256];
770 for (i = 0, x = 0; x < sizeof(md5->md5); x++) {
771 i += snprintf(&str[i], 255-i, "%02x", md5->md5[x]);
772 }
773 strlcat(out, str, out_size);
774 strlcat(out, "\n", out_size);
775 return strlen(out);
776 }
777
Sha256AsAscii(const void * s,char * out,size_t out_size)778 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
779 {
780 const Sha256Type *sha = s;
781 uint32_t x;
782 int i;
783 char str[256];
784 for (i = 0, x = 0; x < sizeof(sha->sha256); x++) {
785 i += snprintf(&str[i], 255-i, "%02x", sha->sha256[x]);
786 }
787 strlcat(out, str, out_size);
788 strlcat(out, "\n", out_size);
789 return strlen(out);
790 }
791
DatasetsSave(void)792 void DatasetsSave(void)
793 {
794 SCLogDebug("saving datasets: %p", sets);
795 SCMutexLock(&sets_lock);
796 Dataset *set = sets;
797 while (set) {
798 if (strlen(set->save) == 0)
799 goto next;
800
801 FILE *fp = fopen(set->save, "w");
802 if (fp == NULL)
803 goto next;
804
805 SCLogDebug("dumping %s to %s", set->name, set->save);
806
807 switch (set->type) {
808 case DATASET_TYPE_STRING:
809 THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
810 break;
811 case DATASET_TYPE_MD5:
812 THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
813 break;
814 case DATASET_TYPE_SHA256:
815 THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
816 break;
817 }
818
819 fclose(fp);
820
821 next:
822 set = set->next;
823 }
824 SCMutexUnlock(&sets_lock);
825 }
826
DatasetLookupString(Dataset * set,const uint8_t * data,const uint32_t data_len)827 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
828 {
829 if (set == NULL)
830 return -1;
831
832 StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
833 THashData *rdata = THashLookupFromHash(set->hash, &lookup);
834 if (rdata) {
835 DatasetUnlockData(rdata);
836 return 1;
837 }
838 return 0;
839 }
840
DatasetLookupStringwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)841 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
842 const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
843 {
844 DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
845
846 if (set == NULL)
847 return rrep;
848
849 StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
850 THashData *rdata = THashLookupFromHash(set->hash, &lookup);
851 if (rdata) {
852 StringType *found = rdata->data;
853 rrep.found = true;
854 rrep.rep = found->rep;
855 DatasetUnlockData(rdata);
856 return rrep;
857 }
858 return rrep;
859 }
860
DatasetLookupMd5(Dataset * set,const uint8_t * data,const uint32_t data_len)861 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
862 {
863 if (set == NULL)
864 return -1;
865
866 if (data_len != 16)
867 return -1;
868
869 Md5Type lookup = { .rep.value = 0 };
870 memcpy(lookup.md5, data, data_len);
871 THashData *rdata = THashLookupFromHash(set->hash, &lookup);
872 if (rdata) {
873 DatasetUnlockData(rdata);
874 return 1;
875 }
876 return 0;
877 }
878
DatasetLookupMd5wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)879 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
880 const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
881 {
882 DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
883
884 if (set == NULL)
885 return rrep;
886
887 if (data_len != 16)
888 return rrep;
889
890 Md5Type lookup = { .rep.value = 0};
891 memcpy(lookup.md5, data, data_len);
892 THashData *rdata = THashLookupFromHash(set->hash, &lookup);
893 if (rdata) {
894 Md5Type *found = rdata->data;
895 rrep.found = true;
896 rrep.rep = found->rep;
897 DatasetUnlockData(rdata);
898 return rrep;
899 }
900 return rrep;
901 }
902
DatasetLookupSha256(Dataset * set,const uint8_t * data,const uint32_t data_len)903 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
904 {
905 if (set == NULL)
906 return -1;
907
908 if (data_len != 32)
909 return -1;
910
911 Sha256Type lookup = { .rep.value = 0 };
912 memcpy(lookup.sha256, data, data_len);
913 THashData *rdata = THashLookupFromHash(set->hash, &lookup);
914 if (rdata) {
915 DatasetUnlockData(rdata);
916 return 1;
917 }
918 return 0;
919 }
920
DatasetLookupSha256wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)921 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
922 const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
923 {
924 DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
925
926 if (set == NULL)
927 return rrep;
928
929 if (data_len != 32)
930 return rrep;
931
932 Sha256Type lookup = { .rep.value = 0 };
933 memcpy(lookup.sha256, data, data_len);
934 THashData *rdata = THashLookupFromHash(set->hash, &lookup);
935 if (rdata) {
936 Sha256Type *found = rdata->data;
937 rrep.found = true;
938 rrep.rep = found->rep;
939 DatasetUnlockData(rdata);
940 return rrep;
941 }
942 return rrep;
943 }
944
945 /**
946 * \brief see if \a data is part of the set
947 * \param set dataset
948 * \param data data to look up
949 * \param data_len length in bytes of \a data
950 * \retval -1 error
951 * \retval 0 not found
952 * \retval 1 found
953 */
DatasetLookup(Dataset * set,const uint8_t * data,const uint32_t data_len)954 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
955 {
956 if (set == NULL)
957 return -1;
958
959 switch (set->type) {
960 case DATASET_TYPE_STRING:
961 return DatasetLookupString(set, data, data_len);
962 case DATASET_TYPE_MD5:
963 return DatasetLookupMd5(set, data, data_len);
964 case DATASET_TYPE_SHA256:
965 return DatasetLookupSha256(set, data, data_len);
966 }
967 return -1;
968 }
969
DatasetLookupwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,const DataRepType * rep)970 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
971 const DataRepType *rep)
972 {
973 DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
974 if (set == NULL)
975 return rrep;
976
977 switch (set->type) {
978 case DATASET_TYPE_STRING:
979 return DatasetLookupStringwRep(set, data, data_len, rep);
980 case DATASET_TYPE_MD5:
981 return DatasetLookupMd5wRep(set, data, data_len, rep);
982 case DATASET_TYPE_SHA256:
983 return DatasetLookupSha256wRep(set, data, data_len, rep);
984 }
985 return rrep;
986 }
987
988 /**
989 * \retval 1 data was added to the hash
990 * \retval 0 data was not added to the hash as it is already there
991 * \retval -1 failed to add data to the hash
992 */
DatasetAddString(Dataset * set,const uint8_t * data,const uint32_t data_len)993 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
994 {
995 if (set == NULL)
996 return -1;
997
998 StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
999 .rep.value = 0 };
1000 struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1001 if (res.data) {
1002 DatasetUnlockData(res.data);
1003 return res.is_new ? 1 : 0;
1004 }
1005 return -1;
1006 }
1007
1008 /**
1009 * \retval 1 data was added to the hash
1010 * \retval 0 data was not added to the hash as it is already there
1011 * \retval -1 failed to add data to the hash
1012 */
DatasetAddStringwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1013 static int DatasetAddStringwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1014 DataRepType *rep)
1015 {
1016 if (set == NULL)
1017 return -1;
1018
1019 StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1020 .rep = *rep };
1021 struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1022 if (res.data) {
1023 DatasetUnlockData(res.data);
1024 return res.is_new ? 1 : 0;
1025 }
1026 return -1;
1027 }
1028
DatasetAddMd5(Dataset * set,const uint8_t * data,const uint32_t data_len)1029 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1030 {
1031 if (set == NULL)
1032 return -1;
1033
1034 if (data_len != 16)
1035 return -2;
1036
1037 Md5Type lookup = { .rep.value = 0 };
1038 memcpy(lookup.md5, data, 16);
1039 struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1040 if (res.data) {
1041 DatasetUnlockData(res.data);
1042 return res.is_new ? 1 : 0;
1043 }
1044 return -1;
1045 }
1046
DatasetAddMd5wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1047 static int DatasetAddMd5wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1048 DataRepType *rep)
1049 {
1050 if (set == NULL)
1051 return -1;
1052
1053 if (data_len != 16)
1054 return -2;
1055
1056 Md5Type lookup = { .rep = *rep };
1057 memcpy(lookup.md5, data, 16);
1058 struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1059 if (res.data) {
1060 DatasetUnlockData(res.data);
1061 return res.is_new ? 1 : 0;
1062 }
1063 return -1;
1064 }
1065
DatasetAddSha256wRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1066 static int DatasetAddSha256wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1067 DataRepType *rep)
1068 {
1069 if (set == NULL)
1070 return -1;
1071
1072 if (data_len != 32)
1073 return -2;
1074
1075 Sha256Type lookup = { .rep = *rep };
1076 memcpy(lookup.sha256, data, 32);
1077 struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1078 if (res.data) {
1079 DatasetUnlockData(res.data);
1080 return res.is_new ? 1 : 0;
1081 }
1082 return -1;
1083 }
1084
DatasetAddSha256(Dataset * set,const uint8_t * data,const uint32_t data_len)1085 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1086 {
1087 if (set == NULL)
1088 return -1;
1089
1090 if (data_len != 32)
1091 return -2;
1092
1093 Sha256Type lookup = { .rep.value = 0 };
1094 memcpy(lookup.sha256, data, 32);
1095 struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1096 if (res.data) {
1097 DatasetUnlockData(res.data);
1098 return res.is_new ? 1 : 0;
1099 }
1100 return -1;
1101 }
1102
DatasetAdd(Dataset * set,const uint8_t * data,const uint32_t data_len)1103 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1104 {
1105 if (set == NULL)
1106 return -1;
1107
1108 switch (set->type) {
1109 case DATASET_TYPE_STRING:
1110 return DatasetAddString(set, data, data_len);
1111 case DATASET_TYPE_MD5:
1112 return DatasetAddMd5(set, data, data_len);
1113 case DATASET_TYPE_SHA256:
1114 return DatasetAddSha256(set, data, data_len);
1115 }
1116 return -1;
1117 }
1118
DatasetAddwRep(Dataset * set,const uint8_t * data,const uint32_t data_len,DataRepType * rep)1119 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1120 DataRepType *rep)
1121 {
1122 if (set == NULL)
1123 return -1;
1124
1125 switch (set->type) {
1126 case DATASET_TYPE_STRING:
1127 return DatasetAddStringwRep(set, data, data_len, rep);
1128 case DATASET_TYPE_MD5:
1129 return DatasetAddMd5wRep(set, data, data_len, rep);
1130 case DATASET_TYPE_SHA256:
1131 return DatasetAddSha256wRep(set, data, data_len, rep);
1132 }
1133 return -1;
1134 }
1135
1136 /** \brief add serialized data to set
1137 * \retval int 1 added
1138 * \retval int 0 already in hash
1139 * \retval int -1 API error (not added)
1140 * \retval int -2 DATA error
1141 */
DatasetAddSerialized(Dataset * set,const char * string)1142 int DatasetAddSerialized(Dataset *set, const char *string)
1143 {
1144 if (set == NULL)
1145 return -1;
1146
1147 switch (set->type) {
1148 case DATASET_TYPE_STRING: {
1149 // coverity[alloc_strlen : FALSE]
1150 uint8_t decoded[strlen(string)];
1151 uint32_t len = DecodeBase64(decoded, (const uint8_t *)string, strlen(string), 1);
1152 if (len == 0) {
1153 return -2;
1154 }
1155
1156 return DatasetAddString(set, decoded, len);
1157 }
1158 case DATASET_TYPE_MD5: {
1159 if (strlen(string) != 32)
1160 return -2;
1161 uint8_t hash[16];
1162 if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1163 return -2;
1164 return DatasetAddMd5(set, hash, 16);
1165 }
1166 case DATASET_TYPE_SHA256: {
1167 if (strlen(string) != 64)
1168 return -2;
1169 uint8_t hash[32];
1170 if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1171 return -2;
1172 return DatasetAddSha256(set, hash, 32);
1173 }
1174 }
1175 return -1;
1176 }
1177
1178 /**
1179 * \retval 1 data was removed from the hash
1180 * \retval 0 data not removed (busy)
1181 * \retval -1 data not found
1182 */
DatasetRemoveString(Dataset * set,const uint8_t * data,const uint32_t data_len)1183 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1184 {
1185 if (set == NULL)
1186 return -1;
1187
1188 StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1189 .rep.value = 0 };
1190 return THashRemoveFromHash(set->hash, &lookup);
1191 }
1192
DatasetRemoveMd5(Dataset * set,const uint8_t * data,const uint32_t data_len)1193 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1194 {
1195 if (set == NULL)
1196 return -1;
1197
1198 if (data_len != 16)
1199 return -2;
1200
1201 Md5Type lookup = { .rep.value = 0 };
1202 memcpy(lookup.md5, data, 16);
1203 return THashRemoveFromHash(set->hash, &lookup);
1204 }
1205
DatasetRemoveSha256(Dataset * set,const uint8_t * data,const uint32_t data_len)1206 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1207 {
1208 if (set == NULL)
1209 return -1;
1210
1211 if (data_len != 32)
1212 return -2;
1213
1214 Sha256Type lookup = { .rep.value = 0 };
1215 memcpy(lookup.sha256, data, 32);
1216 return THashRemoveFromHash(set->hash, &lookup);
1217 }
1218
1219 /** \brief remove serialized data from set
1220 * \retval int 1 removed
1221 * \retval int 0 found but busy (not removed)
1222 * \retval int -1 API error (not removed)
1223 * \retval int -2 DATA error */
DatasetRemoveSerialized(Dataset * set,const char * string)1224 int DatasetRemoveSerialized(Dataset *set, const char *string)
1225 {
1226 if (set == NULL)
1227 return -1;
1228
1229 switch (set->type) {
1230 case DATASET_TYPE_STRING: {
1231 // coverity[alloc_strlen : FALSE]
1232 uint8_t decoded[strlen(string)];
1233 uint32_t len = DecodeBase64(decoded, (const uint8_t *)string, strlen(string), 1);
1234 if (len == 0) {
1235 return -2;
1236 }
1237
1238 return DatasetRemoveString(set, decoded, len);
1239 }
1240 case DATASET_TYPE_MD5: {
1241 if (strlen(string) != 32)
1242 return -2;
1243 uint8_t hash[16];
1244 if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1245 return -2;
1246 return DatasetRemoveMd5(set, hash, 16);
1247 }
1248 case DATASET_TYPE_SHA256: {
1249 if (strlen(string) != 64)
1250 return -2;
1251 uint8_t hash[32];
1252 if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1253 return -2;
1254 return DatasetRemoveSha256(set, hash, 32);
1255 }
1256 }
1257 return -1;
1258 }
1259