1 /*
2 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3 * Copyright (C) 2007-2013 Sourcefire, Inc.
4 *
5 * Authors: Tomasz Kojm
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "matcher.h"
23
24 #ifndef __OTHERS_H_LC
25 #define __OTHERS_H_LC
26
27 #if HAVE_CONFIG_H
28 #include "clamav-config.h"
29 #endif
30
31 #ifdef HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34
35 #if HAVE_PTHREAD_H
36 #include <pthread.h>
37 #endif
38
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stdbool.h>
42
43 #ifdef HAVE_JSON
44 #include <json.h>
45 #endif
46
47 #include "clamav.h"
48 #include "dconf.h"
49 #include "filetypes.h"
50 #include "fmap.h"
51 #include "regex/regex.h"
52 #include "bytecode.h"
53 #include "bytecode_api.h"
54 #include "events.h"
55 #include "crtmgr.h"
56
57 #include "unrar_iface.h"
58
59 #ifdef HAVE_YARA
60 #include "yara_clam.h"
61 #endif
62
63 #if HAVE_LIBXML2
64 #define CLAMAV_MIN_XMLREADER_FLAGS (XML_PARSE_NOERROR | XML_PARSE_NONET)
65 #endif
66
67 /*
68 * CL_FLEVEL is the signature f-level specific to the current code and
69 * should never be modified
70 * CL_FLEVEL_DCONF is used in the dconf module and can be bumped by
71 * distribution packagers provided they fix *all* security issues found
72 * in the old versions of ClamAV. Updating CL_FLEVEL_DCONF will result
73 * in re-enabling affected modules.
74 */
75
76 #define CL_FLEVEL 126
77 #define CL_FLEVEL_DCONF CL_FLEVEL
78 #define CL_FLEVEL_SIGTOOL CL_FLEVEL
79
80 extern uint8_t cli_debug_flag;
81 extern uint8_t cli_always_gen_section_hash;
82
83 /*
84 * CLI_ISCONTAINED(bb, bb_size, sb, sb_size) checks if sb (small buffer) is
85 * within bb (big buffer).
86 *
87 * bb and sb are pointers (or offsets) for the main buffer and the
88 * sub-buffer respectively, and bb_size and sb_size are their sizes
89 *
90 * The macro can be used to protect against wraps.
91 */
92 #define CLI_ISCONTAINED(bb, bb_size, sb, sb_size) \
93 ((size_t)(bb_size) > 0 && (size_t)(sb_size) > 0 && \
94 (size_t)(sb_size) <= (size_t)(bb_size) && \
95 (size_t)(sb) >= (size_t)(bb) && \
96 (size_t)(sb) + (size_t)(sb_size) <= (size_t)(bb) + (size_t)(bb_size) && \
97 (size_t)(sb) + (size_t)(sb_size) > (size_t)(bb) && \
98 (size_t)(sb) < (size_t)(bb) + (size_t)(bb_size))
99
100 /*
101 * CLI_ISCONTAINED_0_TO(bb_size, sb, sb_size) checks if sb (small offset) is
102 * within bb (big offset) where the big offset always starts at 0.
103 *
104 * bb and sb are offsets for the main buffer and the
105 * sub-buffer respectively, and bb_size and sb_size are their sizes
106 *
107 * The macro can be used to protect against wraps.
108 *
109 * CLI_ISCONTAINED_0_TO is the same as CLI_ISCONTAINED except that `bb` is gone
110 * and assumed ot be zero.
111 */
112 #define CLI_ISCONTAINED_0_TO(bb_size, sb, sb_size) \
113 ((size_t)(bb_size) > 0 && (size_t)(sb_size) > 0 && \
114 (size_t)(sb_size) <= (size_t)(bb_size) && \
115 (size_t)(sb) + (size_t)(sb_size) <= (size_t)(bb_size) && \
116 (size_t)(sb) < (size_t)(bb_size))
117
118 /*
119 * CLI_ISCONTAINED_2(bb, bb_size, sb, sb_size) checks if sb (small buffer) is
120 * within bb (big buffer).
121 *
122 * CLI_ISCONTAINED_2 is the same as CLI_ISCONTAINED except that it allows for
123 * small-buffers with sb_size == 0.
124 */
125 #define CLI_ISCONTAINED_2(bb, bb_size, sb, sb_size) \
126 ((size_t)(bb_size) > 0 && \
127 (size_t)(sb_size) <= (size_t)(bb_size) && \
128 (size_t)(sb) >= (size_t)(bb) && \
129 (size_t)(sb) + (size_t)(sb_size) <= (size_t)(bb) + (size_t)(bb_size) && \
130 (size_t)(sb) + (size_t)(sb_size) >= (size_t)(bb) && \
131 (size_t)(sb) <= (size_t)(bb) + (size_t)(bb_size))
132
133 #define CLI_MAX_ALLOCATION (182 * 1024 * 1024)
134
135 #ifdef HAVE_SYS_PARAM_H
136 #include <sys/param.h> /* for NAME_MAX */
137 #endif
138
139 /* Maximum filenames under various systems - njh */
140 #ifndef NAME_MAX /* e.g. Linux */
141 #ifdef MAXNAMELEN /* e.g. Solaris */
142 #define NAME_MAX MAXNAMELEN
143 #else
144 #ifdef FILENAME_MAX /* e.g. SCO */
145 #define NAME_MAX FILENAME_MAX
146 #else
147 #define NAME_MAX 256
148 #endif
149 #endif
150 #endif
151
152 #if NAME_MAX < 256
153 #undef NAME_MAX
154 #define NAME_MAX 256
155 #endif
156
157 typedef struct bitset_tag {
158 unsigned char *bitset;
159 unsigned long length;
160 } bitset_t;
161
162 typedef struct recursion_level_tag {
163 cli_file_t type;
164 size_t size;
165 cl_fmap_t *fmap; /* The fmap for this layer. This used to be in an array in the ctx. */
166 uint32_t recursion_level_buffer; /* Which buffer layer in scan recursion. */
167 uint32_t recursion_level_buffer_fmap; /* Which fmap layer in this buffer. */
168 bool is_normalized_layer; /* Indicates that the layer should be skipped when checking container and intermediate types. */
169 } recursion_level_t;
170 // #define CONTAINER_FLAG_VALID 0x01
171
172 /* internal clamav context */
173 typedef struct cli_ctx_tag {
174 char *target_filepath; /**< (optional) The filepath of the original scan target. */
175 const char *sub_filepath; /**< (optional) The filepath of the current file being parsed. May be a temp file. */
176 char *sub_tmpdir; /**< The directory to store tmp files at this recursion depth. */
177 const char **virname;
178 unsigned int num_viruses;
179 unsigned long int *scanned;
180 const struct cli_matcher *root;
181 const struct cl_engine *engine;
182 uint64_t scansize;
183 struct cl_scan_options *options;
184 unsigned int scannedfiles;
185 unsigned int found_possibly_unwanted;
186 unsigned int corrupted_input;
187 unsigned int img_validate;
188 recursion_level_t *recursion_stack; /* Array of recursion levels used as a stack. */
189 uint32_t recursion_stack_size; /* stack size must == engine->max_recursion_level */
190 uint32_t recursion_level; /* Index into recursion_stack; current fmap recursion level from start of scan. */
191 fmap_t *fmap; /* Pointer to current fmap in recursion_stack, varies with recursion depth. For convenience. */
192 bool next_layer_is_normalized; /* Indicate that the next fmap pushed to the stack is normalized and should be ignored when checking container/intermediate types */
193 unsigned char handlertype_hash[16];
194 struct cli_dconf *dconf;
195 bitset_t *hook_lsig_matches;
196 void *cb_ctx;
197 cli_events_t *perf;
198 #ifdef HAVE__INTERNAL__SHA_COLLECT
199 int sha_collect;
200 #endif
201 #ifdef HAVE_JSON
202 struct json_object *properties;
203 struct json_object *wrkproperty;
204 #endif
205 struct timeval time_limit;
206 bool limit_exceeded; /* To guard against alerting on limits exceeded more than once, or storing that in the JSON metadata more than once. */
207 bool abort_scan; /* So we can guarantee a scan is aborted, even if CL_ETIMEOUT/etc. status is lost in the scan recursion stack. */
208 } cli_ctx;
209
210 #define STATS_ANON_UUID "5b585e8f-3be5-11e3-bf0b-18037319526c"
211 #define STATS_MAX_SAMPLES 50
212 #define STATS_MAX_MEM 1024 * 1024
213
214 typedef struct cli_flagged_sample {
215 char **virus_name;
216 char md5[16];
217 uint32_t size; /* A size of zero means size is unavailable (why would this ever happen?) */
218 uint32_t hits;
219 stats_section_t *sections;
220
221 struct cli_flagged_sample *prev;
222 struct cli_flagged_sample *next;
223 } cli_flagged_sample_t;
224
225 typedef struct cli_clamav_intel {
226 char *hostid;
227 char *host_info;
228 cli_flagged_sample_t *samples;
229 uint32_t nsamples;
230 uint32_t maxsamples;
231 uint32_t maxmem;
232 uint32_t timeout;
233 time_t nextupdate;
234 struct cl_engine *engine;
235 #ifdef CL_THREAD_SAFE
236 pthread_mutex_t mutex;
237 #endif
238 } cli_intel_t;
239
240 typedef struct {
241 uint64_t v[2][4];
242 } icon_groupset;
243
244 struct icomtr {
245 unsigned int group[2];
246 unsigned int color_avg[3];
247 unsigned int color_x[3];
248 unsigned int color_y[3];
249 unsigned int gray_avg[3];
250 unsigned int gray_x[3];
251 unsigned int gray_y[3];
252 unsigned int bright_avg[3];
253 unsigned int bright_x[3];
254 unsigned int bright_y[3];
255 unsigned int dark_avg[3];
256 unsigned int dark_x[3];
257 unsigned int dark_y[3];
258 unsigned int edge_avg[3];
259 unsigned int edge_x[3];
260 unsigned int edge_y[3];
261 unsigned int noedge_avg[3];
262 unsigned int noedge_x[3];
263 unsigned int noedge_y[3];
264 unsigned int rsum;
265 unsigned int gsum;
266 unsigned int bsum;
267 unsigned int ccount;
268 char *name;
269 };
270
271 struct icon_matcher {
272 char **group_names[2];
273 unsigned int group_counts[2];
274 struct icomtr *icons[3];
275 unsigned int icon_counts[3];
276 };
277
278 struct cli_dbinfo {
279 char *name;
280 char *hash;
281 size_t size;
282 struct cl_cvd *cvd;
283 struct cli_dbinfo *next;
284 };
285
286 #define CLI_PWDB_COUNT 3
287 typedef enum {
288 CLI_PWDB_ANY = 0,
289 CLI_PWDB_ZIP = 1,
290 CLI_PWDB_RAR = 2
291 } cl_pwdb_t;
292
293 struct cli_pwdb {
294 char *name;
295 char *passwd;
296 uint16_t length;
297 struct cli_pwdb *next;
298 };
299
300 struct cl_engine {
301 uint32_t refcount; /* reference counter */
302 uint32_t sdb;
303 uint32_t dboptions;
304 uint32_t dbversion[2];
305 uint32_t ac_only;
306 uint32_t ac_mindepth;
307 uint32_t ac_maxdepth;
308 char *tmpdir;
309 uint32_t keeptmp;
310 uint64_t engine_options;
311
312 /* Limits */
313 uint32_t maxscantime; /* Time limit (in milliseconds) */
314 uint64_t maxscansize; /* during the scanning of archives this size
315 * will never be exceeded
316 */
317 uint64_t maxfilesize; /* compressed files will only be decompressed
318 * and scanned up to this size
319 */
320 uint32_t max_recursion_level; /* maximum recursion level for archives */
321 uint32_t maxfiles; /* maximum number of files to be scanned
322 * within a single archive
323 */
324 /* This is for structured data detection. You can set the minimum
325 * number of occurrences of an CC# or SSN before the system will
326 * generate a notification.
327 */
328 uint32_t min_cc_count;
329 uint32_t min_ssn_count;
330
331 /* Roots table */
332 struct cli_matcher **root;
333
334 /* hash matcher for standard MD5 sigs */
335 struct cli_matcher *hm_hdb;
336 /* hash matcher for MD5 sigs for PE sections */
337 struct cli_matcher *hm_mdb;
338 /* hash matcher for MD5 sigs for PE import tables */
339 struct cli_matcher *hm_imp;
340 /* hash matcher for whitelist db */
341 struct cli_matcher *hm_fp;
342
343 /* Container metadata */
344 struct cli_cdb *cdb;
345
346 /* Phishing .pdb and .wdb databases*/
347 struct regex_matcher *whitelist_matcher;
348 struct regex_matcher *domainlist_matcher;
349 struct phishcheck *phishcheck;
350
351 /* Dynamic configuration */
352 struct cli_dconf *dconf;
353
354 /* Filetype definitions */
355 struct cli_ftype *ftypes;
356 struct cli_ftype *ptypes;
357
358 /* Container password storage */
359 struct cli_pwdb **pwdbs;
360
361 /* Pre-loading test matcher
362 * Test for presence before using; cleared on engine compile.
363 */
364 struct cli_matcher *test_root;
365
366 /* Ignored signatures */
367 struct cli_matcher *ignored;
368
369 /* PUA categories (to be included or excluded) */
370 char *pua_cats;
371
372 /* Icon reference storage */
373 struct icon_matcher *iconcheck;
374
375 /* Negative cache storage */
376 struct CACHE *cache;
377
378 /* Database information from .info files */
379 struct cli_dbinfo *dbinfo;
380
381 /* Used for memory pools */
382 mpool_t *mempool;
383
384 /* crtmgr stuff */
385 crtmgr cmgr;
386
387 /* Callback(s) */
388 clcb_pre_cache cb_pre_cache;
389 clcb_pre_scan cb_pre_scan;
390 clcb_post_scan cb_post_scan;
391 clcb_virus_found cb_virus_found;
392 clcb_sigload cb_sigload;
393 void *cb_sigload_ctx;
394 clcb_hash cb_hash;
395 clcb_meta cb_meta;
396 clcb_file_props cb_file_props;
397
398 /* Used for bytecode */
399 struct cli_all_bc bcs;
400 unsigned *hooks[_BC_LAST_HOOK - _BC_START_HOOKS];
401 unsigned hooks_cnt[_BC_LAST_HOOK - _BC_START_HOOKS];
402 unsigned hook_lsig_ids;
403 enum bytecode_security bytecode_security;
404 uint32_t bytecode_timeout;
405 enum bytecode_mode bytecode_mode;
406
407 /* Engine max settings */
408 uint64_t maxembeddedpe; /* max size to scan MSEXE for PE */
409 uint64_t maxhtmlnormalize; /* max size to normalize HTML */
410 uint64_t maxhtmlnotags; /* max size for scanning normalized HTML */
411 uint64_t maxscriptnormalize; /* max size to normalize scripts */
412 uint64_t maxziptypercg; /* max size to re-do zip filetype */
413
414 /* Statistics/intelligence gathering */
415 void *stats_data;
416 clcb_stats_add_sample cb_stats_add_sample;
417 clcb_stats_remove_sample cb_stats_remove_sample;
418 clcb_stats_decrement_count cb_stats_decrement_count;
419 clcb_stats_submit cb_stats_submit;
420 clcb_stats_flush cb_stats_flush;
421 clcb_stats_get_num cb_stats_get_num;
422 clcb_stats_get_size cb_stats_get_size;
423 clcb_stats_get_hostid cb_stats_get_hostid;
424
425 /* Raw disk image max settings */
426 uint32_t maxpartitions; /* max number of partitions to scan in a disk image */
427
428 /* Engine max settings */
429 uint32_t maxiconspe; /* max number of icons to scan for PE */
430 uint32_t maxrechwp3; /* max recursive calls for HWP3 parsing */
431
432 /* PCRE matching limitations */
433 uint64_t pcre_match_limit;
434 uint64_t pcre_recmatch_limit;
435 uint64_t pcre_max_filesize;
436
437 #ifdef HAVE_YARA
438 /* YARA */
439 struct _yara_global *yara_global;
440 #endif
441 };
442
443 struct cl_settings {
444 /* don't store dboptions here; it needs to be provided to cl_load() and
445 * can be optionally obtained with cl_engine_get() or from the original
446 * settings stored by the application
447 */
448 uint32_t ac_only;
449 uint32_t ac_mindepth;
450 uint32_t ac_maxdepth;
451 char *tmpdir;
452 uint32_t keeptmp;
453 uint32_t maxscantime;
454 uint64_t maxscansize;
455 uint64_t maxfilesize;
456 uint32_t max_recursion_level;
457 uint32_t maxfiles;
458 uint32_t min_cc_count;
459 uint32_t min_ssn_count;
460 enum bytecode_security bytecode_security;
461 uint32_t bytecode_timeout;
462 enum bytecode_mode bytecode_mode;
463 char *pua_cats;
464 uint64_t engine_options;
465
466 /* callbacks */
467 clcb_pre_cache cb_pre_cache;
468 clcb_pre_scan cb_pre_scan;
469 clcb_post_scan cb_post_scan;
470 clcb_virus_found cb_virus_found;
471 clcb_sigload cb_sigload;
472 void *cb_sigload_ctx;
473 clcb_msg cb_msg;
474 clcb_hash cb_hash;
475 clcb_meta cb_meta;
476 clcb_file_props cb_file_props;
477
478 /* Engine max settings */
479 uint64_t maxembeddedpe; /* max size to scan MSEXE for PE */
480 uint64_t maxhtmlnormalize; /* max size to normalize HTML */
481 uint64_t maxhtmlnotags; /* max size for scanning normalized HTML */
482 uint64_t maxscriptnormalize; /* max size to normalize scripts */
483 uint64_t maxziptypercg; /* max size to re-do zip filetype */
484
485 /* Statistics/intelligence gathering */
486 void *stats_data;
487 clcb_stats_add_sample cb_stats_add_sample;
488 clcb_stats_remove_sample cb_stats_remove_sample;
489 clcb_stats_decrement_count cb_stats_decrement_count;
490 clcb_stats_submit cb_stats_submit;
491 clcb_stats_flush cb_stats_flush;
492 clcb_stats_get_num cb_stats_get_num;
493 clcb_stats_get_size cb_stats_get_size;
494 clcb_stats_get_hostid cb_stats_get_hostid;
495
496 /* Raw disk image max settings */
497 uint32_t maxpartitions; /* max number of partitions to scan in a disk image */
498
499 /* Engine max settings */
500 uint32_t maxiconspe; /* max number of icons to scan for PE */
501 uint32_t maxrechwp3; /* max recursive calls for HWP3 parsing */
502
503 /* PCRE matching limitations */
504 uint64_t pcre_match_limit;
505 uint64_t pcre_recmatch_limit;
506 uint64_t pcre_max_filesize;
507 };
508
509 extern cl_unrar_error_t (*cli_unrar_open)(const char *filename, void **hArchive, char **comment, uint32_t *comment_size, uint8_t debug_flag);
510 extern cl_unrar_error_t (*cli_unrar_peek_file_header)(void *hArchive, unrar_metadata_t *file_metadata);
511 extern cl_unrar_error_t (*cli_unrar_extract_file)(void *hArchive, const char *destPath, char *outputBuffer);
512 extern cl_unrar_error_t (*cli_unrar_skip_file)(void *hArchive);
513 extern void (*cli_unrar_close)(void *hArchive);
514
515 extern LIBCLAMAV_EXPORT int have_rar;
516
517 #define SCAN_ALLMATCHES (ctx->options->general & CL_SCAN_GENERAL_ALLMATCHES)
518 #define SCAN_COLLECT_METADATA (ctx->options->general & CL_SCAN_GENERAL_COLLECT_METADATA)
519 #define SCAN_HEURISTICS (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS)
520 #define SCAN_HEURISTIC_PRECEDENCE (ctx->options->general & CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE)
521 #define SCAN_UNPRIVILEGED (ctx->options->general & CL_SCAN_GENERAL_UNPRIVILEGED)
522
523 #define SCAN_PARSE_ARCHIVE (ctx->options->parse & CL_SCAN_PARSE_ARCHIVE)
524 #define SCAN_PARSE_ELF (ctx->options->parse & CL_SCAN_PARSE_ELF)
525 #define SCAN_PARSE_PDF (ctx->options->parse & CL_SCAN_PARSE_PDF)
526 #define SCAN_PARSE_SWF (ctx->options->parse & CL_SCAN_PARSE_SWF)
527 #define SCAN_PARSE_HWP3 (ctx->options->parse & CL_SCAN_PARSE_HWP3)
528 #define SCAN_PARSE_XMLDOCS (ctx->options->parse & CL_SCAN_PARSE_XMLDOCS)
529 #define SCAN_PARSE_MAIL (ctx->options->parse & CL_SCAN_PARSE_MAIL)
530 #define SCAN_PARSE_OLE2 (ctx->options->parse & CL_SCAN_PARSE_OLE2)
531 #define SCAN_PARSE_HTML (ctx->options->parse & CL_SCAN_PARSE_HTML)
532 #define SCAN_PARSE_PE (ctx->options->parse & CL_SCAN_PARSE_PE)
533
534 #define SCAN_HEURISTIC_BROKEN (ctx->options->heuristic & CL_SCAN_HEURISTIC_BROKEN)
535 #define SCAN_HEURISTIC_BROKEN_MEDIA (ctx->options->heuristic & CL_SCAN_HEURISTIC_BROKEN_MEDIA)
536 #define SCAN_HEURISTIC_EXCEEDS_MAX (ctx->options->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX)
537 #define SCAN_HEURISTIC_PHISHING_SSL_MISMATCH (ctx->options->heuristic & CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH)
538 #define SCAN_HEURISTIC_PHISHING_CLOAK (ctx->options->heuristic & CL_SCAN_HEURISTIC_PHISHING_CLOAK)
539 #define SCAN_HEURISTIC_MACROS (ctx->options->heuristic & CL_SCAN_HEURISTIC_MACROS)
540 #define SCAN_HEURISTIC_ENCRYPTED_ARCHIVE (ctx->options->heuristic & CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE)
541 #define SCAN_HEURISTIC_ENCRYPTED_DOC (ctx->options->heuristic & CL_SCAN_HEURISTIC_ENCRYPTED_DOC)
542 #define SCAN_HEURISTIC_PARTITION_INTXN (ctx->options->heuristic & CL_SCAN_HEURISTIC_PARTITION_INTXN)
543 #define SCAN_HEURISTIC_STRUCTURED (ctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED)
544 #define SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL (ctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL)
545 #define SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED (ctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED)
546
547 #define SCAN_MAIL_PARTIAL_MESSAGE (ctx->options->mail & CL_SCAN_MAIL_PARTIAL_MESSAGE)
548
549 #define SCAN_DEV_COLLECT_SHA (ctx->options->dev & CL_SCAN_DEV_COLLECT_SHA)
550 #define SCAN_DEV_COLLECT_PERF_INFO (ctx->options->dev & CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO)
551
552 /* based on macros from A. Melnikoff */
553 #define cbswap16(v) (((v & 0xff) << 8) | (((v) >> 8) & 0xff))
554 #define cbswap32(v) ((((v)&0x000000ff) << 24) | (((v)&0x0000ff00) << 8) | \
555 (((v)&0x00ff0000) >> 8) | (((v)&0xff000000) >> 24))
556 #define cbswap64(v) ((((v)&0x00000000000000ffULL) << 56) | \
557 (((v)&0x000000000000ff00ULL) << 40) | \
558 (((v)&0x0000000000ff0000ULL) << 24) | \
559 (((v)&0x00000000ff000000ULL) << 8) | \
560 (((v)&0x000000ff00000000ULL) >> 8) | \
561 (((v)&0x0000ff0000000000ULL) >> 24) | \
562 (((v)&0x00ff000000000000ULL) >> 40) | \
563 (((v)&0xff00000000000000ULL) >> 56))
564
565 #ifndef HAVE_ATTRIB_PACKED
566 #define __attribute__(x)
567 #endif
568 #ifdef HAVE_PRAGMA_PACK
569 #pragma pack(1)
570 #endif
571 #ifdef HAVE_PRAGMA_PACK_HPPA
572 #pragma pack 1
573 #endif
574
575 union unaligned_64 {
576 uint64_t una_u64;
577 int64_t una_s64;
578 } __attribute__((packed));
579
580 union unaligned_32 {
581 uint32_t una_u32;
582 int32_t una_s32;
583 } __attribute__((packed));
584
585 union unaligned_16 {
586 uint16_t una_u16;
587 int16_t una_s16;
588 } __attribute__((packed));
589
590 struct unaligned_ptr {
591 void *ptr;
592 } __attribute__((packed));
593
594 #ifdef HAVE_PRAGMA_PACK
595 #pragma pack()
596 #endif
597 #ifdef HAVE_PRAGMA_PACK_HPPA
598 #pragma pack
599 #endif
600
601 #if WORDS_BIGENDIAN == 0
602 /* Little endian */
603 #define le16_to_host(v) (v)
604 #define le32_to_host(v) (v)
605 #define le64_to_host(v) (v)
606 #define be16_to_host(v) cbswap16(v)
607 #define be32_to_host(v) cbswap32(v)
608 #define be64_to_host(v) cbswap64(v)
609 #define cli_readint64(buff) (((const union unaligned_64 *)(buff))->una_s64)
610 #define cli_readint32(buff) (((const union unaligned_32 *)(buff))->una_s32)
611 #define cli_readint16(buff) (((const union unaligned_16 *)(buff))->una_s16)
612 #define cli_writeint32(offset, value) (((union unaligned_32 *)(offset))->una_u32 = (uint32_t)(value))
613 #else
614 /* Big endian */
615 #define le16_to_host(v) cbswap16(v)
616 #define le32_to_host(v) cbswap32(v)
617 #define le64_to_host(v) cbswap64(v)
618 #define be16_to_host(v) (v)
619 #define be32_to_host(v) (v)
620 #define be64_to_host(v) (v)
621
cli_readint64(const void * buff)622 static inline int64_t cli_readint64(const void *buff)
623 {
624 int64_t ret;
625 ret = (int64_t)((const char *)buff)[0] & 0xff;
626 ret |= (int64_t)(((const char *)buff)[1] & 0xff) << 8;
627 ret |= (int64_t)(((const char *)buff)[2] & 0xff) << 16;
628 ret |= (int64_t)(((const char *)buff)[3] & 0xff) << 24;
629
630 ret |= (int64_t)(((const char *)buff)[4] & 0xff) << 32;
631 ret |= (int64_t)(((const char *)buff)[5] & 0xff) << 40;
632 ret |= (int64_t)(((const char *)buff)[6] & 0xff) << 48;
633 ret |= (int64_t)(((const char *)buff)[7] & 0xff) << 56;
634 return ret;
635 }
636
cli_readint32(const void * buff)637 static inline int32_t cli_readint32(const void *buff)
638 {
639 int32_t ret;
640 ret = (int32_t)((const char *)buff)[0] & 0xff;
641 ret |= (int32_t)(((const char *)buff)[1] & 0xff) << 8;
642 ret |= (int32_t)(((const char *)buff)[2] & 0xff) << 16;
643 ret |= (int32_t)(((const char *)buff)[3] & 0xff) << 24;
644 return ret;
645 }
646
cli_readint16(const void * buff)647 static inline int16_t cli_readint16(const void *buff)
648 {
649 int16_t ret;
650 ret = (int16_t)((const char *)buff)[0] & 0xff;
651 ret |= (int16_t)(((const char *)buff)[1] & 0xff) << 8;
652 return ret;
653 }
654
cli_writeint32(void * offset,uint32_t value)655 static inline void cli_writeint32(void *offset, uint32_t value)
656 {
657 ((char *)offset)[0] = value & 0xff;
658 ((char *)offset)[1] = (value & 0xff00) >> 8;
659 ((char *)offset)[2] = (value & 0xff0000) >> 16;
660 ((char *)offset)[3] = (value & 0xff000000) >> 24;
661 }
662 #endif
663
664 /**
665 * @brief Append an alert.
666 *
667 * An FP-check will verify that the file is not whitelisted.
668 * The whitelist check does not happen before the scan because file whitelisting
669 * is so infrequent that such action would be detrimental to performance.
670 *
671 * TODO: Replace implementation with severity scale, and severity threshold
672 * wherein signatures that do not meet the threshold are documented in JSON
673 * metadata but do not halt the scan.
674 *
675 * @param ctx The scan context.
676 * @param virname The alert name.
677 * @return cl_error_t CL_VIRUS if scan should be halted due to an alert, CL_CLEAN if scan should continue.
678 */
679 cl_error_t cli_append_virus(cli_ctx *ctx, const char *virname);
680
681 /**
682 * @brief Append a PUA (low severity) alert.
683 *
684 * This function will return CLEAN unless in all-match or Heuristic-precedence
685 * modes. The intention is for the scan to continue in case something more
686 * malicious is found.
687 *
688 * TODO: Replace implementation with severity scale, and severity threshold
689 * wherein signatures that do not meet the threshold are documented in JSON
690 * metadata but do not halt the scan.
691 *
692 * BUG: In normal scan mode (see above), the alert is not FP-checked!
693 *
694 * @param ctx The scan context.
695 * @param virname The alert name.
696 * @return cl_error_t CL_VIRUS if scan should be halted due to an alert, CL_CLEAN if scan should continue.
697 */
698 cl_error_t cli_append_possibly_unwanted(cli_ctx *ctx, const char *virname);
699
700 const char *cli_get_last_virus(const cli_ctx *ctx);
701 const char *cli_get_last_virus_str(const cli_ctx *ctx);
702 void cli_virus_found_cb(cli_ctx *ctx);
703
704 /**
705 * @brief Push a new fmap onto our scan recursion stack.
706 *
707 * May fail if we exceed max recursion depth.
708 *
709 * @param ctx The scanning context.
710 * @param map The fmap for the new layer.
711 * @param type The file type. May be CL_TYPE_ANY if unknown. Can change it later with cli_recursion_stack_change_type().
712 * @param is_new_buffer true if the fmap represents a new buffer/file, and not some window into an existing fmap.
713 * @return cl_error_t CL_SUCCESS if successful, else CL_EMAXREC if exceeding the max recursion depth.
714 */
715 cl_error_t cli_recursion_stack_push(cli_ctx *ctx, cl_fmap_t *map, cli_file_t type, bool is_new_buffer);
716
717 /**
718 * @brief Pop off a layer of our scan recursion stack.
719 *
720 * Returns the fmap for the popped layer. Does NOT funmap() the fmap for you.
721 *
722 * @param ctx The scanning context.
723 * @return cl_fmap_t* A pointer to the fmap for the popped layer, may return NULL instead if the stack is empty.
724 */
725 cl_fmap_t *cli_recursion_stack_pop(cli_ctx *ctx);
726
727 /**
728 * @brief Re-assign the type for the current layer.
729 *
730 * @param ctx The scanning context.
731 * @param type The new file type.
732 */
733 void cli_recursion_stack_change_type(cli_ctx *ctx, cli_file_t type);
734
735 /**
736 * @brief Get the type of a specific layer.
737 *
738 * Ignores normalized layers internally.
739 *
740 * For index:
741 * 0 == the outermost (bottom) layer of the stack.
742 * 1 == the first layer (probably never explicitly used).
743 * -1 == the present innermost (top) layer of the stack.
744 * -2 == the parent layer (or "container"). That is, the second from the top of the stack.
745 *
746 * @param ctx The scanning context.
747 * @param index Desired index, will be converted internally as though the normalized layers were stripped out. Don't think too had about it. Or do. ¯\_(ツ)_/¯
748 * @return cli_file_t The type of the requested layer,
749 * or returns CL_TYPE_ANY if a negative layer is requested,
750 * or returns CL_TYPE_IGNORED if requested layer too high.
751 */
752 cli_file_t cli_recursion_stack_get_type(cli_ctx *ctx, int index);
753
754 /**
755 * @brief Get the size of a specific layer.
756 *
757 * Ignores normalized layers internally.
758 *
759 * For index:
760 * 0 == the outermost (bottom) layer of the stack.
761 * 1 == the first layer (probably never explicitly used).
762 * -1 == the present innermost (top) layer of the stack.
763 * -2 == the parent layer (or "container"). That is, the second from the top of the stack.
764 *
765 * @param ctx The scanning context.
766 * @param index Desired index, will be converted internally as though the normalized layers were stripped out. Don't think too had about it. Or do. ¯\_(ツ)_/¯
767 * @return cli_file_t The size of the requested layer,
768 * or returns the size of the whole file if a negative layer is requested,
769 * or returns 0 if requested layer too high.
770 */
771 size_t cli_recursion_stack_get_size(cli_ctx *ctx, int index);
772
773 /* used by: spin, yc (C) aCaB */
774 #define __SHIFTBITS(a) (sizeof(a) << 3)
775 #define __SHIFTMASK(a) (__SHIFTBITS(a) - 1)
776 #define CLI_ROL(a, b) a = (a << ((b)&__SHIFTMASK(a))) | (a >> ((__SHIFTBITS(a) - (b)) & __SHIFTMASK(a)))
777 #define CLI_ROR(a, b) a = (a >> ((b)&__SHIFTMASK(a))) | (a << ((__SHIFTBITS(a) - (b)) & __SHIFTMASK(a)))
778
779 /* Implementation independent sign-extended signed right shift */
780 #ifdef HAVE_SAR
781 #define CLI_SRS(n, s) ((n) >> (s))
782 #else
783 #define CLI_SRS(n, s) ((((n) >> (s)) ^ (1 << (sizeof(n) * 8 - 1 - s))) - (1 << (sizeof(n) * 8 - 1 - s)))
784 #endif
785 #define CLI_SAR(n, s) n = CLI_SRS(n, s)
786
787 #ifdef __GNUC__
788 void cli_warnmsg(const char *str, ...) __attribute__((format(printf, 1, 2)));
789 #else
790 void cli_warnmsg(const char *str, ...);
791 #endif
792
793 #ifdef __GNUC__
794 void cli_errmsg(const char *str, ...) __attribute__((format(printf, 1, 2)));
795 #else
796 void cli_errmsg(const char *str, ...);
797 #endif
798
799 #ifdef __GNUC__
800 void cli_infomsg(const cli_ctx *ctx, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
801 #else
802 void cli_infomsg(const cli_ctx *ctx, const char *fmt, ...);
803 #endif
804
805 void cli_logg_setup(const cli_ctx *ctx);
806 void cli_logg_unsetup(void);
807
808 /* tell compiler about branches that are very rarely taken,
809 * such as debug paths, and error paths */
810 #if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)
811 #define UNLIKELY(cond) __builtin_expect(!!(cond), 0)
812 #define LIKELY(cond) __builtin_expect(!!(cond), 1)
813 #else
814 #define UNLIKELY(cond) (cond)
815 #define LIKELY(cond) (cond)
816 #endif
817
818 #ifdef __GNUC__
819 #define always_inline inline __attribute__((always_inline))
820 #define never_inline __attribute__((noinline))
821 #else
822 #define never_inline
823 #define always_inline inline
824 #endif
825
826 #if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
827 #define __hot__ __attribute__((hot))
828 #else
829 #define __hot__
830 #endif
831
832 #define cli_dbgmsg (!UNLIKELY(cli_debug_flag)) ? (void)0 : cli_dbgmsg_internal
833
834 #ifdef __GNUC__
835 void cli_dbgmsg_internal(const char *str, ...) __attribute__((format(printf, 1, 2)));
836 #else
837 void cli_dbgmsg_internal(const char *str, ...);
838 #endif
839
840 #ifdef HAVE_CLI_GETPAGESIZE
841 #undef HAVE_CLI_GETPAGESIZE
842 #endif
843
844 #ifdef _WIN32
cli_getpagesize(void)845 static inline int cli_getpagesize(void)
846 {
847 SYSTEM_INFO si;
848 GetSystemInfo(&si);
849 return si.dwPageSize;
850 }
851 #else /* ! _WIN32 */
852 #if HAVE_SYSCONF_SC_PAGESIZE
cli_getpagesize(void)853 static inline int cli_getpagesize(void)
854 {
855 return sysconf(_SC_PAGESIZE);
856 }
857 #define HAVE_CLI_GETPAGESIZE 1
858 #else
859 #if HAVE_GETPAGESIZE
cli_getpagesize(void)860 static inline int cli_getpagesize(void)
861 {
862 return getpagesize();
863 }
864 #define HAVE_CLI_GETPAGESIZE 1
865 #endif /* HAVE_GETPAGESIZE */
866 #endif /* HAVE_SYSCONF_SC_PAGESIZE */
867 #endif /* _WIN32 */
868
869 void *cli_malloc(size_t nmemb);
870 void *cli_calloc(size_t nmemb, size_t size);
871 void *cli_realloc(void *ptr, size_t size);
872 void *cli_realloc2(void *ptr, size_t size);
873 char *cli_strdup(const char *s);
874 int cli_rmdirs(const char *dirname);
875 char *cli_hashstream(FILE *fs, unsigned char *digcpy, int type);
876 char *cli_hashfile(const char *filename, int type);
877 int cli_unlink(const char *pathname);
878 size_t cli_readn(int fd, void *buff, size_t count);
879 size_t cli_writen(int fd, const void *buff, size_t count);
880 const char *cli_gettmpdir(void);
881
882 /**
883 * @brief Sanitize a relative path, so it cannot have a negative depth.
884 *
885 * Caller is responsible for freeing the sanitized filepath.
886 * The optioal sanitized_filebase output param is a pointer into the filepath,
887 * if set, and does not need to be freed.
888 *
889 * @param filepath The filepath to sanitize
890 * @param filepath_len The length of the filepath
891 * @param[out] sanitized_filebase Pointer to the basename portion of the sanitized filepath. (optional)
892 * @return char*
893 */
894 char *cli_sanitize_filepath(const char *filepath, size_t filepath_len, char **sanitized_filebase);
895
896 /**
897 * @brief Generate tempfile filename (no path) with a random MD5 hash.
898 *
899 * Caller is responsible for freeing the filename.
900 *
901 * @return char* filename or NULL.
902 */
903 char *cli_genfname(const char *prefix);
904
905 /**
906 * @brief Generate a full tempfile filepath with a provided the name.
907 *
908 * Caller is responsible for freeing the filename.
909 * If the dir is not provided, the engine->tmpdir will be used.
910 *
911 * @param dir Alternative directory. (optional)
912 * @return char* filename or NULL.
913 */
914 char *cli_newfilepath(const char *dir, const char *fname);
915
916 /**
917 * @brief Generate a full tempfile filepath with a provided the name.
918 *
919 * Caller is responsible for freeing the filename.
920 * If the dir is not provided, the engine->tmpdir will be used.
921 *
922 * @param dir Alternative temp directory (optional).
923 * @param prefix (Optional) Base filename for new file.
924 * @param[out] name Allocated filepath, must be freed by caller.
925 * @param[out] fd File descriptor of open temp file.
926 */
927 cl_error_t cli_newfilepathfd(const char *dir, char *fname, char **name, int *fd);
928
929 /**
930 * @brief Generate a full tempfile filepath with a random MD5 hash and prefix the name, if provided.
931 *
932 * Caller is responsible for freeing the filename.
933 *
934 * @param dir Alternative temp directory. (optional)
935 * @return char* filename or NULL.
936 */
937 char *cli_gentemp_with_prefix(const char *dir, const char *prefix);
938
939 /**
940 * @brief Generate a full tempfile filepath with a random MD5 hash.
941 *
942 * Caller is responsible for freeing the filename.
943 *
944 * @param dir Alternative temp directory. (optional)
945 * @return char* filename or NULL.
946 */
947 char *cli_gentemp(const char *dir);
948
949 /**
950 * @brief Create a temp filename, create the file, open it, and pass back the filepath and open file descriptor.
951 *
952 * @param dir Alternative temp directory (optional).
953 * @param[out] name Allocated filepath, must be freed by caller.
954 * @param[out] fd File descriptor of open temp file.
955 * @return cl_error_t CL_SUCCESS, CL_ECREAT, or CL_EMEM.
956 */
957 cl_error_t cli_gentempfd(const char *dir, char **name, int *fd);
958
959 /**
960 * @brief Create a temp filename, create the file, open it, and pass back the filepath and open file descriptor.
961 *
962 * @param dir Alternative temp directory (optional).
963 * @param prefix (Optional) Prefix for new file tempfile.
964 * @param[out] name Allocated filepath, must be freed by caller.
965 * @param[out] fd File descriptor of open temp file.
966 * @return cl_error_t CL_SUCCESS, CL_ECREAT, or CL_EMEM.
967 */
968 cl_error_t cli_gentempfd_with_prefix(const char *dir, char *prefix, char **name, int *fd);
969
970 unsigned int cli_rndnum(unsigned int max);
971 int cli_filecopy(const char *src, const char *dest);
972 bitset_t *cli_bitset_init(void);
973 void cli_bitset_free(bitset_t *bs);
974 int cli_bitset_set(bitset_t *bs, unsigned long bit_offset);
975 int cli_bitset_test(bitset_t *bs, unsigned long bit_offset);
976 const char *cli_ctime(const time_t *timep, char *buf, const size_t bufsize);
977 void cli_append_virus_if_heur_exceedsmax(cli_ctx *, char *);
978 cl_error_t cli_checklimits(const char *, cli_ctx *, unsigned long, unsigned long, unsigned long);
979
980 /**
981 * @brief Call before scanning a file to determine if we should scan it, skip it, or abort the entire scanning process.
982 *
983 * If the verdict is CL_SUCCESS, then this function increments the # of scanned files, and increments the amount of scanned data.
984 * If the verdict is that a limit has been exceeded, then ctx->
985 *
986 * @param ctx The scanning context.
987 * @param needed The size of the file we're considering scanning.
988 * @return cl_error_t CL_SUCCESS if we're good to keep scanning else an error status.
989 */
990 cl_error_t cli_updatelimits(cli_ctx *ctx, size_t needed);
991
992 unsigned long cli_getsizelimit(cli_ctx *, unsigned long);
993 int cli_matchregex(const char *str, const char *regex);
994 void cli_qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *));
995 void cli_qsort_r(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *, const void *), void *arg);
996 cl_error_t cli_checktimelimit(cli_ctx *ctx);
997
998 /* symlink behaviour */
999 #define CLI_FTW_FOLLOW_FILE_SYMLINK 0x01
1000 #define CLI_FTW_FOLLOW_DIR_SYMLINK 0x02
1001
1002 /* if the callback needs the stat */
1003 #define CLI_FTW_NEED_STAT 0x04
1004
1005 /* remove leading/trailing slashes */
1006 #define CLI_FTW_TRIM_SLASHES 0x08
1007 #define CLI_FTW_STD (CLI_FTW_NEED_STAT | CLI_FTW_TRIM_SLASHES)
1008
1009 enum cli_ftw_reason {
1010 visit_file,
1011 visit_directory_toplev, /* this is a directory at toplevel of recursion */
1012 error_mem, /* recommended to return CL_EMEM */
1013 /* recommended to return CL_SUCCESS below */
1014 error_stat,
1015 warning_skipped_link,
1016 warning_skipped_special,
1017 warning_skipped_dir
1018 };
1019
1020 /* wrap void*, so that we don't mix it with some other pointer */
1021 struct cli_ftw_cbdata {
1022 void *data;
1023 };
1024
1025 /**
1026 * @brief Callback to process each file in a file tree walk (FTW).
1027 *
1028 * The callback is responsible for freeing filename when it is done using it.
1029 *
1030 * Note that callback decides if directory traversal should continue
1031 * after an error, we call the callback with reason == error,
1032 * and if it returns CL_BREAK we break.
1033 *
1034 * Return:
1035 * - CL_BREAK to break out without an error,
1036 * - CL_SUCCESS to continue,
1037 * - any CL_E* to break out due to error.
1038 */
1039 typedef cl_error_t (*cli_ftw_cb)(STATBUF *stat_buf, char *filename, const char *path, enum cli_ftw_reason reason, struct cli_ftw_cbdata *data);
1040
1041 /**
1042 * @brief Callback to determine if a path in a file tree walk (FTW) should be skipped.
1043 * Has access to the same callback data as the main FTW callback function (above).
1044 *
1045 * Return:
1046 * - 1 if the path should be skipped (i.e. to not call the callback for the given path),
1047 * - 0 if the path should be processed (i.e. to call the callback for the given path).
1048 */
1049 typedef int (*cli_ftw_pathchk)(const char *path, struct cli_ftw_cbdata *data);
1050
1051 /**
1052 * @brief Traverse a file path, calling the callback function on each file
1053 * within if the pathchk() check allows for it. Will skip certain file types:
1054 * -
1055 *
1056 * This is regardless of virus found/not, that is the callback's job to store.
1057 * Note that the callback may dispatch async the scan, so that when cli_ftw
1058 * returns we don't know the infected/notinfected status of the directory yet!
1059 *
1060 * Due to this if the callback scans synchronously it should store the infected
1061 * status in its cbdata.
1062 * This works for both files and directories. It stats the path to determine
1063 * which one it is.
1064 * If it is a file, it simply calls the callback once, otherwise recurses.
1065 *
1066 * @param base The top level directory (or file) path to be processed
1067 * @param flags A bitflag field for the CLI_FTW_* flag options (see above)
1068 * @param maxdepth The max recursion depth.
1069 * @param callback The cli_ftw_cb callback to invoke on each file AND directory.
1070 * @param data Callback data for the callback function.
1071 * @param pathchk A function used to determine if the callback should be run on the given file.
1072 * @return cl_error_t CL_SUCCESS if it traversed all files and subdirs
1073 * @return cl_error_t CL_BREAK if traversal has stopped at some point
1074 * @return cl_error_t CL_E* if error encountered during traversal and we had to break out
1075 */
1076 cl_error_t cli_ftw(char *base, int flags, int maxdepth, cli_ftw_cb callback, struct cli_ftw_cbdata *data, cli_ftw_pathchk pathchk);
1077
1078 const char *cli_strerror(int errnum, char *buf, size_t len);
1079
1080 /**
1081 * @brief Attempt to get a filename from an open file descriptor.
1082 *
1083 * Caller is responsible for free'ing the filename.
1084 * Should work on Linux, macOS, Windows.
1085 *
1086 * @param desc File descriptor
1087 * @param[out] filepath Will be set to file path if found, or NULL.
1088 * @return cl_error_t CL_SUCCESS if found, else an error code.
1089 */
1090 cl_error_t cli_get_filepath_from_filedesc(int desc, char **filepath);
1091
1092 /**
1093 * @brief Attempt to get the real path of a provided path (evaluating symlinks).
1094 *
1095 * Caller is responsible for free'ing the file path.
1096 * On posix systems this just calls realpath() under the hood.
1097 * On Win32, it opens a handle and uses cli_get_filepath_from_filedesc()
1098 * to get the real path.
1099 *
1100 * @param desc A file path to evaluate.
1101 * @param char* [out] A malloced string containing the real path.
1102 * @return cl_error_t CL_SUCCESS if found, else an error code.
1103 */
1104 cl_error_t cli_realpath(const char *file_name, char **real_filename);
1105
1106 #endif
1107