1 /*
2 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3 * Copyright (C) 2007-2013 Sourcefire, Inc.
4 *
5 * Authors: Tomasz Kojm
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #if HAVE_CONFIG_H
23 #include "clamav-config.h"
24 #endif
25
26 #ifndef _WIN32
27 #include <sys/time.h>
28 #endif
29 #include <stdio.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <libgen.h>
33 #include <errno.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <stdbool.h>
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h>
39 #endif
40 #ifdef HAVE_SYS_PARAM_H
41 #include <sys/param.h>
42 #endif
43 #include <fcntl.h>
44 #include <dirent.h>
45 #ifdef HAVE_SYS_TIMES_H
46 #include <sys/times.h>
47 #endif
48
49 #define DCONF_ARCH ctx->dconf->archive
50 #define DCONF_DOC ctx->dconf->doc
51 #define DCONF_MAIL ctx->dconf->mail
52 #define DCONF_OTHER ctx->dconf->other
53
54 #include <zlib.h>
55
56 #include "clamav.h"
57 #include "others.h"
58 #include "dconf.h"
59 #include "scanners.h"
60 #include "matcher-ac.h"
61 #include "matcher-bm.h"
62 #include "matcher.h"
63 #include "ole2_extract.h"
64 #include "vba_extract.h"
65 #include "xlm_extract.h"
66 #include "msexpand.h"
67 #include "mbox.h"
68 #include "libmspack.h"
69 #include "pe.h"
70 #include "elf.h"
71 #include "filetypes.h"
72 #include "htmlnorm.h"
73 #include "untar.h"
74 #include "special.h"
75 #include "binhex.h"
76 /* #include "uuencode.h" */
77 #include "tnef.h"
78 #include "sis.h"
79 #include "pdf.h"
80 #include "str.h"
81 #include "entconv.h"
82 #include "rtf.h"
83 #include "unarj.h"
84 #include "nsis/nulsft.h"
85 #include "autoit.h"
86 #include "textnorm.h"
87 #include "unzip.h"
88 #include "dlp.h"
89 #include "default.h"
90 #include "cpio.h"
91 #include "macho.h"
92 #include "ishield.h"
93 #include "7z_iface.h"
94 #include "fmap.h"
95 #include "cache.h"
96 #include "events.h"
97 #include "swf.h"
98 #include "jpeg.h"
99 #include "gif.h"
100 #include "png.h"
101 #include "iso9660.h"
102 #include "dmg.h"
103 #include "xar.h"
104 #include "hfsplus.h"
105 #include "xz_iface.h"
106 #include "mbr.h"
107 #include "gpt.h"
108 #include "apm.h"
109 #include "ooxml.h"
110 #include "xdp.h"
111 #include "json_api.h"
112 #include "msxml.h"
113 #include "tiff.h"
114 #include "hwp.h"
115 #include "msdoc.h"
116 #include "execs.h"
117 #include "egg.h"
118
119 // libclamunrar_iface
120 #include "unrar_iface.h"
121
122 #ifdef HAVE_BZLIB_H
123 #include <bzlib.h>
124 #endif
125
126 #include <fcntl.h>
127 #include <string.h>
128
cli_magic_scan_dir(const char * dirname,cli_ctx * ctx)129 cl_error_t cli_magic_scan_dir(const char *dirname, cli_ctx *ctx)
130 {
131 cl_error_t status = CL_CLEAN;
132 DIR *dd = NULL;
133 struct dirent *dent;
134 STATBUF statbuf;
135 char *fname = NULL;
136 unsigned int viruses_found = 0;
137 bool processing_normalized_files = ctx->next_layer_is_normalized;
138
139 if ((dd = opendir(dirname)) != NULL) {
140 while ((dent = readdir(dd))) {
141 if (dent->d_ino) {
142 if (strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
143 /* build the full name */
144 fname = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 2);
145 if (!fname) {
146 cli_dbgmsg("cli_magic_scan_dir: Unable to allocate memory for filename\n");
147 status = CL_EMEM;
148 goto done;
149 }
150
151 sprintf(fname, "%s" PATHSEP "%s", dirname, dent->d_name);
152
153 /* stat the file */
154 if (LSTAT(fname, &statbuf) != -1) {
155 if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
156 if (cli_magic_scan_dir(fname, ctx) == CL_VIRUS) {
157 if (SCAN_ALLMATCHES) {
158 viruses_found++;
159 continue;
160 }
161
162 status = CL_VIRUS;
163 goto done;
164 }
165 } else {
166 if (S_ISREG(statbuf.st_mode)) {
167 ctx->next_layer_is_normalized = processing_normalized_files; // This flag ingested by cli_recursion_stack_push().
168 if (cli_magic_scan_file(fname, ctx, dent->d_name) == CL_VIRUS) {
169 if (SCAN_ALLMATCHES) {
170 viruses_found++;
171 continue;
172 }
173
174 status = CL_VIRUS;
175 goto done;
176 }
177 }
178 }
179 }
180 free(fname);
181 fname = NULL;
182 }
183 }
184 }
185 } else {
186 cli_dbgmsg("cli_magic_scan_dir: Can't open directory %s.\n", dirname);
187 status = CL_EOPEN;
188 goto done;
189 }
190
191 done:
192 ctx->next_layer_is_normalized = false;
193 if (NULL != dd) {
194 closedir(dd);
195 }
196 if (NULL != fname) {
197 free(fname);
198 }
199
200 if (SCAN_ALLMATCHES && viruses_found)
201 status = CL_VIRUS;
202
203 return status;
204 }
205
206 /**
207 * @brief Scan the metadata using cli_matchmeta()
208 *
209 * @param metadata unrar metadata structure
210 * @param ctx scanning context structure
211 * @param files
212 * @return cl_error_t Returns CL_CLEAN if nothing found, CL_VIRUS if something found, CL_EUNPACK if encrypted.
213 */
cli_unrar_scanmetadata(unrar_metadata_t * metadata,cli_ctx * ctx,unsigned int files)214 static cl_error_t cli_unrar_scanmetadata(unrar_metadata_t *metadata, cli_ctx *ctx, unsigned int files)
215 {
216 cl_error_t status = CL_CLEAN;
217
218 cli_dbgmsg("RAR: %s, crc32: 0x%x, encrypted: %u, compressed: %u, normal: %u, method: %u, ratio: %u\n",
219 metadata->filename, metadata->crc, metadata->encrypted, (unsigned int)metadata->pack_size,
220 (unsigned int)metadata->unpack_size, metadata->method,
221 metadata->pack_size ? (unsigned int)(metadata->unpack_size / metadata->pack_size) : 0);
222
223 if (CL_VIRUS == cli_matchmeta(ctx, metadata->filename, metadata->pack_size, metadata->unpack_size, metadata->encrypted, files, metadata->crc, NULL)) {
224 status = CL_VIRUS;
225 } else if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE && metadata->encrypted) {
226 cli_dbgmsg("RAR: Encrypted files found in archive.\n");
227 status = CL_EUNPACK;
228 }
229
230 return status;
231 }
232
cli_scanrar_file(const char * filepath,int desc,cli_ctx * ctx)233 static cl_error_t cli_scanrar_file(const char *filepath, int desc, cli_ctx *ctx)
234 {
235 cl_error_t status = CL_EPARSE;
236 cl_unrar_error_t unrar_ret = UNRAR_ERR;
237
238 unsigned int file_count = 0;
239 unsigned int viruses_found = 0;
240
241 uint32_t nEncryptedFilesFound = 0;
242 uint32_t nTooLargeFilesFound = 0;
243
244 void *hArchive = NULL;
245
246 char *comment = NULL;
247 uint32_t comment_size = 0;
248
249 unrar_metadata_t metadata;
250 char *filename_base = NULL;
251 char *extract_fullpath = NULL;
252 char *comment_fullpath = NULL;
253
254 UNUSEDPARAM(desc);
255
256 if (filepath == NULL || ctx == NULL) {
257 cli_dbgmsg("RAR: Invalid arguments!\n");
258 return CL_EARG;
259 }
260
261 cli_dbgmsg("in scanrar()\n");
262
263 /* Zero out the metadata struct before we read the header */
264 memset(&metadata, 0, sizeof(unrar_metadata_t));
265
266 /*
267 * Open the archive.
268 */
269 if (UNRAR_OK != (unrar_ret = cli_unrar_open(filepath, &hArchive, &comment, &comment_size, cli_debug_flag))) {
270 if (unrar_ret == UNRAR_ENCRYPTED) {
271 cli_dbgmsg("RAR: Encrypted main header\n");
272 status = CL_EUNPACK;
273 goto done;
274 }
275 if (unrar_ret == UNRAR_EMEM) {
276 status = CL_EMEM;
277 goto done;
278 } else if (unrar_ret == UNRAR_EOPEN) {
279 status = CL_EOPEN;
280 goto done;
281 } else {
282 status = CL_EFORMAT;
283 goto done;
284 }
285 }
286
287 /* If the archive header had a comment, write it to the comment dir. */
288 if ((comment != NULL) && (comment_size > 0)) {
289
290 if (ctx->engine->keeptmp) {
291 int comment_fd = -1;
292 if (!(comment_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, "comments"))) {
293 status = CL_EMEM;
294 goto done;
295 }
296
297 comment_fd = open(comment_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
298 if (comment_fd < 0) {
299 cli_dbgmsg("RAR: ERROR: Failed to open output file\n");
300 } else {
301 cli_dbgmsg("RAR: Writing the archive comment to temp file: %s\n", comment_fullpath);
302 if (0 == write(comment_fd, comment, comment_size)) {
303 cli_dbgmsg("RAR: ERROR: Failed to write to output file\n");
304 }
305 close(comment_fd);
306 }
307 }
308
309 /* Scan the comment */
310 status = cli_magic_scan_buff(comment, comment_size, ctx, NULL);
311
312 if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
313 status = CL_CLEAN;
314 viruses_found++;
315 }
316 if ((status == CL_VIRUS) || (status == CL_BREAK)) {
317 goto done;
318 }
319 }
320
321 /*
322 * Read & scan each file header.
323 * Extract & scan each file.
324 *
325 * Skip files if they will exceed max filesize or max scansize.
326 * Count the number of encrypted file headers and encrypted files.
327 * - Alert if there are encrypted files,
328 * if the Heuristic for encrypted archives is enabled,
329 * and if we have not detected a signature match.
330 */
331 do {
332 status = CL_CLEAN;
333
334 /* Zero out the metadata struct before we read the header */
335 memset(&metadata, 0, sizeof(unrar_metadata_t));
336
337 /*
338 * Get the header information for the next file in the archive.
339 */
340 unrar_ret = cli_unrar_peek_file_header(hArchive, &metadata);
341 if (unrar_ret != UNRAR_OK) {
342 if (unrar_ret == UNRAR_ENCRYPTED) {
343 /* Found an encrypted file header, must skip. */
344 cli_dbgmsg("RAR: Encrypted file header, unable to reading file metadata and file contents. Skipping file...\n");
345 nEncryptedFilesFound += 1;
346
347 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
348 /* Failed to skip! Break extraction loop. */
349 cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
350 break;
351 }
352 } else if (unrar_ret == UNRAR_BREAK) {
353 /* No more files. Break extraction loop. */
354 cli_dbgmsg("RAR: No more files in archive.\n");
355 break;
356 } else {
357 /* Memory error or some other error reading the header info. */
358 cli_dbgmsg("RAR: Error (%u) reading file header!\n", unrar_ret);
359 break;
360 }
361 } else {
362 file_count += 1;
363
364 /*
365 * Scan the metadata for the file in question since the content was clean, or we're running in all-match.
366 */
367 status = cli_unrar_scanmetadata(&metadata, ctx, file_count);
368 if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
369 status = CL_CLEAN;
370 viruses_found++;
371 }
372 if ((status == CL_VIRUS) || (status == CL_BREAK)) {
373 break;
374 }
375
376 /* Check if we've already exceeded the scan limit */
377 if (cli_checklimits("RAR", ctx, 0, 0, 0))
378 break;
379
380 if (metadata.is_dir) {
381 /* Entry is a directory. Skip. */
382 cli_dbgmsg("RAR: Found directory. Skipping to next file.\n");
383
384 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
385 /* Failed to skip! Break extraction loop. */
386 cli_dbgmsg("RAR: Failed to skip directory. RAR archive extraction has failed.\n");
387 break;
388 }
389 } else if (cli_checklimits("RAR", ctx, metadata.unpack_size, 0, 0)) {
390 /* File size exceeds maxfilesize, must skip extraction.
391 * Although we may be able to scan the metadata */
392 nTooLargeFilesFound += 1;
393
394 cli_dbgmsg("RAR: Next file is too large (%" PRIu64 " bytes); it would exceed max scansize. Skipping to next file.\n", metadata.unpack_size);
395
396 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
397 /* Failed to skip! Break extraction loop. */
398 cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
399 break;
400 }
401 } else if (metadata.encrypted != 0) {
402 /* Found an encrypted file, must skip. */
403 cli_dbgmsg("RAR: Encrypted file, unable to extract file contents. Skipping file...\n");
404 nEncryptedFilesFound += 1;
405
406 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
407 /* Failed to skip! Break extraction loop. */
408 cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
409 break;
410 }
411 } else {
412 /*
413 * Extract the file...
414 */
415 if (NULL != metadata.filename) {
416 (void)cli_basename(metadata.filename, strlen(metadata.filename), &filename_base);
417 }
418
419 if (!(ctx->engine->keeptmp) ||
420 (NULL == filename_base)) {
421 extract_fullpath = cli_gentemp(ctx->sub_tmpdir);
422 } else {
423 extract_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, filename_base);
424 }
425 if (NULL == extract_fullpath) {
426 cli_dbgmsg("RAR: Memory error allocating filename for extracted file.");
427 status = CL_EMEM;
428 break;
429 }
430 cli_dbgmsg("RAR: Extracting file: %s to %s\n", metadata.filename, extract_fullpath);
431
432 unrar_ret = cli_unrar_extract_file(hArchive, extract_fullpath, NULL);
433 if (unrar_ret != UNRAR_OK) {
434 /*
435 * Some other error extracting the file
436 */
437 cli_dbgmsg("RAR: Error extracting file: %s\n", metadata.filename);
438
439 /* TODO:
440 * may need to manually skip the file depending on what, specifically, cli_unrar_extract_file() returned.
441 */
442 } else {
443 /*
444 * File should be extracted...
445 * ... make sure we have read permissions to the file.
446 */
447 #ifdef _WIN32
448 if (0 != _access_s(extract_fullpath, R_OK)) {
449 #else
450 if (0 != access(extract_fullpath, R_OK)) {
451 #endif
452 cli_dbgmsg("RAR: Don't have read permissions, attempting to change file permissions to make it readable..\n");
453 #ifdef _WIN32
454 if (0 != _chmod(extract_fullpath, _S_IREAD)) {
455 #else
456 if (0 != chmod(extract_fullpath, S_IRUSR | S_IRGRP)) {
457 #endif
458 cli_dbgmsg("RAR: Failed to change permission bits so the extracted file is readable..\n");
459 }
460 }
461
462 /*
463 * ... scan the extracted file.
464 */
465 cli_dbgmsg("RAR: Extraction complete. Scanning now...\n");
466 status = cli_magic_scan_file(extract_fullpath, ctx, filename_base);
467 if (status == CL_EOPEN) {
468 cli_dbgmsg("RAR: File not found, Extraction failed!\n");
469 status = CL_CLEAN;
470 } else {
471 /* Delete the tempfile if not --leave-temps */
472 if (!ctx->engine->keeptmp)
473 if (cli_unlink(extract_fullpath))
474 cli_dbgmsg("RAR: Failed to unlink the extracted file: %s\n", extract_fullpath);
475
476 if (status == CL_VIRUS) {
477 cli_dbgmsg("RAR: infected with %s\n", cli_get_last_virus(ctx));
478 status = CL_VIRUS;
479 viruses_found++;
480 }
481 }
482 }
483
484 /* Free up that the filepath */
485 if (NULL != extract_fullpath) {
486 free(extract_fullpath);
487 extract_fullpath = NULL;
488 }
489 }
490 }
491
492 if (status == CL_VIRUS) {
493 if (SCAN_ALLMATCHES)
494 status = CL_SUCCESS;
495 else
496 break;
497 }
498
499 if (ctx->engine->maxscansize && ctx->scansize >= ctx->engine->maxscansize) {
500 status = CL_CLEAN;
501 break;
502 }
503
504 /*
505 * Free up any malloced metadata...
506 */
507 if (metadata.filename != NULL) {
508 free(metadata.filename);
509 metadata.filename = NULL;
510 }
511 if (NULL != filename_base) {
512 free(filename_base);
513 filename_base = NULL;
514 }
515
516 } while (status == CL_CLEAN);
517
518 if (status == CL_BREAK)
519 status = CL_CLEAN;
520
521 done:
522 if (NULL != comment) {
523 free(comment);
524 comment = NULL;
525 }
526
527 if (NULL != comment_fullpath) {
528 if (!ctx->engine->keeptmp) {
529 cli_rmdirs(comment_fullpath);
530 }
531 free(comment_fullpath);
532 comment_fullpath = NULL;
533 }
534
535 if (NULL != hArchive) {
536 cli_unrar_close(hArchive);
537 hArchive = NULL;
538 }
539
540 if (NULL != filename_base) {
541 free(filename_base);
542 filename_base = NULL;
543 }
544
545 if (metadata.filename != NULL) {
546 free(metadata.filename);
547 metadata.filename = NULL;
548 }
549
550 if (NULL != extract_fullpath) {
551 free(extract_fullpath);
552 extract_fullpath = NULL;
553 }
554
555 if ((CL_VIRUS != status) && ((CL_EUNPACK == status) || (nEncryptedFilesFound > 0))) {
556 /* If user requests enabled the Heuristic for encrypted archives... */
557 if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
558 if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Encrypted.RAR")) {
559 status = CL_VIRUS;
560 }
561 }
562 if (status != CL_VIRUS) {
563 status = CL_CLEAN;
564 }
565 }
566
567 cli_dbgmsg("RAR: Exit code: %d\n", status);
568
569 if (SCAN_ALLMATCHES && viruses_found)
570 status = CL_VIRUS;
571
572 return status;
573 }
574
575 static cl_error_t cli_scanrar(cli_ctx *ctx)
576 {
577 cl_error_t status = CL_SUCCESS;
578
579 const char *filepath = NULL;
580 int fd = -1;
581
582 char *tmpname = NULL;
583 int tmpfd = -1;
584
585 #ifdef _WIN32
586 if ((SCAN_UNPRIVILEGED) || (NULL == ctx->sub_filepath) || (0 != _access_s(ctx->sub_filepath, R_OK))) {
587 #else
588 if ((SCAN_UNPRIVILEGED) || (NULL == ctx->sub_filepath) || (0 != access(ctx->sub_filepath, R_OK))) {
589 #endif
590 /* If map is not file-backed have to dump to file for scanrar. */
591 status = fmap_dump_to_file(ctx->fmap, ctx->sub_filepath, ctx->sub_tmpdir, &tmpname, &tmpfd, 0, SIZE_MAX);
592 if (status != CL_SUCCESS) {
593 cli_dbgmsg("cli_magic_scan: failed to generate temporary file.\n");
594 goto done;
595 }
596 filepath = tmpname;
597 fd = tmpfd;
598 } else {
599 /* Use the original file and file descriptor. */
600 filepath = ctx->sub_filepath;
601 fd = fmap_fd(ctx->fmap);
602 }
603
604 /* scan file */
605 status = cli_scanrar_file(filepath, fd, ctx);
606
607 if ((NULL == tmpname) && (CL_EOPEN == status)) {
608 /*
609 * Failed to open the file using the original filename.
610 * Try writing the file descriptor to a temp file and try again.
611 */
612 status = fmap_dump_to_file(ctx->fmap, ctx->sub_filepath, ctx->sub_tmpdir, &tmpname, &tmpfd, 0, SIZE_MAX);
613 if (status != CL_SUCCESS) {
614 cli_dbgmsg("cli_magic_scan: failed to generate temporary file.\n");
615 goto done;
616 }
617 filepath = tmpname;
618 fd = tmpfd;
619
620 /* try to scan again */
621 status = cli_scanrar_file(filepath, fd, ctx);
622 }
623
624 done:
625 if (tmpfd != -1) {
626 /* If dumped tempfile, need to cleanup */
627 close(tmpfd);
628 if (!ctx->engine->keeptmp) {
629 if (cli_unlink(tmpname)) {
630 status = CL_EUNLINK;
631 }
632 }
633 }
634
635 if (tmpname != NULL) {
636 free(tmpname);
637 }
638 return status;
639 }
640
641 /**
642 * @brief Scan the metadata using cli_matchmeta()
643 *
644 * @param metadata egg metadata structure
645 * @param ctx scanning context structure
646 * @param files number of files
647 * @return cl_error_t Returns CL_CLEAN if nothing found, CL_VIRUS if something found, CL_EUNPACK if encrypted.
648 */
649 static cl_error_t cli_egg_scanmetadata(cl_egg_metadata *metadata, cli_ctx *ctx, unsigned int files)
650 {
651 cl_error_t status = CL_CLEAN;
652
653 cli_dbgmsg("EGG: %s, encrypted: %u, compressed: %u, normal: %u, ratio: %u\n",
654 metadata->filename, metadata->encrypted, (unsigned int)metadata->pack_size,
655 (unsigned int)metadata->unpack_size,
656 metadata->pack_size ? (unsigned int)(metadata->unpack_size / metadata->pack_size) : 0);
657
658 if (CL_VIRUS == cli_matchmeta(ctx, metadata->filename, metadata->pack_size, metadata->unpack_size, metadata->encrypted, files, 0, NULL)) {
659 status = CL_VIRUS;
660 } else if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE && metadata->encrypted) {
661 cli_dbgmsg("EGG: Encrypted files found in archive.\n");
662 status = CL_EUNPACK;
663 }
664
665 return status;
666 }
667
668 static cl_error_t cli_scanegg(cli_ctx *ctx)
669 {
670 cl_error_t status = CL_EPARSE;
671 cl_error_t egg_ret = CL_EPARSE;
672
673 unsigned int file_count = 0;
674 unsigned int viruses_found = 0;
675
676 uint32_t nEncryptedFilesFound = 0;
677 uint32_t nTooLargeFilesFound = 0;
678
679 void *hArchive = NULL;
680
681 char **comments = NULL;
682 uint32_t nComments = 0;
683
684 cl_egg_metadata metadata;
685 char *filename_base = NULL;
686 char *extract_fullpath = NULL;
687 char *comment_fullpath = NULL;
688
689 if (ctx == NULL) {
690 cli_dbgmsg("EGG: Invalid arguments!\n");
691 return CL_EARG;
692 }
693
694 cli_dbgmsg("in scanegg()\n");
695
696 /* Zero out the metadata struct before we read the header */
697 memset(&metadata, 0, sizeof(cl_egg_metadata));
698
699 /*
700 * Open the archive.
701 */
702 if (CL_SUCCESS != (egg_ret = cli_egg_open(ctx->fmap, &hArchive, &comments, &nComments))) {
703 if (egg_ret == CL_EUNPACK) {
704 cli_dbgmsg("EGG: Encrypted main header\n");
705 status = CL_EUNPACK;
706 goto done;
707 }
708 if (egg_ret == CL_EMEM) {
709 status = CL_EMEM;
710 goto done;
711 } else {
712 status = CL_EFORMAT;
713 goto done;
714 }
715 }
716
717 /* If the archive header had a comment, write it to the comment dir. */
718 if (comments != NULL) {
719 uint32_t i;
720 for (i = 0; i < nComments; i++) {
721 /*
722 * Drop the comment to a temp file, if requested
723 */
724 if (ctx->engine->keeptmp) {
725 int comment_fd = -1;
726 size_t prefixLen = strlen("comments_") + 5;
727 char *prefix = (char *)malloc(prefixLen + 1);
728
729 snprintf(prefix, prefixLen, "comments_%u", i);
730 prefix[prefixLen] = '\0';
731
732 if (!(comment_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, prefix))) {
733 free(prefix);
734 status = CL_EMEM;
735 goto done;
736 }
737 free(prefix);
738
739 comment_fd = open(comment_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
740 if (comment_fd < 0) {
741 cli_dbgmsg("EGG: ERROR: Failed to open output file\n");
742 } else {
743 cli_dbgmsg("EGG: Writing the archive comment to temp file: %s\n", comment_fullpath);
744 if (0 == write(comment_fd, comments[i], nComments)) {
745 cli_dbgmsg("EGG: ERROR: Failed to write to output file\n");
746 }
747 close(comment_fd);
748 }
749 free(comment_fullpath);
750 comment_fullpath = NULL;
751 }
752
753 /*
754 * Scan the comment.
755 */
756 status = cli_magic_scan_buff(comments[i], strlen(comments[i]), ctx, NULL);
757
758 if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
759 status = CL_CLEAN;
760 viruses_found++;
761 }
762 if ((status == CL_VIRUS) || (status == CL_BREAK)) {
763 goto done;
764 }
765 }
766 }
767
768 /*
769 * Read & scan each file header.
770 * Extract & scan each file.
771 *
772 * Skip files if they will exceed max filesize or max scansize.
773 * Count the number of encrypted file headers and encrypted files.
774 * - Alert if there are encrypted files,
775 * if the Heuristic for encrypted archives is enabled,
776 * and if we have not detected a signature match.
777 */
778 do {
779 status = CL_CLEAN;
780
781 /* Zero out the metadata struct before we read the header */
782 memset(&metadata, 0, sizeof(cl_egg_metadata));
783
784 /*
785 * Get the header information for the next file in the archive.
786 */
787 egg_ret = cli_egg_peek_file_header(hArchive, &metadata);
788 if (egg_ret != CL_SUCCESS) {
789 if (egg_ret == CL_EUNPACK) {
790 /* Found an encrypted file header, must skip. */
791 cli_dbgmsg("EGG: Encrypted file header, unable to reading file metadata and file contents. Skipping file...\n");
792 nEncryptedFilesFound += 1;
793
794 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
795 /* Failed to skip! Break extraction loop. */
796 cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
797 break;
798 }
799 } else if (egg_ret == CL_BREAK) {
800 /* No more files. Break extraction loop. */
801 cli_dbgmsg("EGG: No more files in archive.\n");
802 break;
803 } else {
804 /* Memory error or some other error reading the header info. */
805 cli_dbgmsg("EGG: Error (%u) reading file header!\n", egg_ret);
806 break;
807 }
808 } else {
809 file_count += 1;
810
811 /*
812 * Scan the metadata for the file in question since the content was clean, or we're running in all-match.
813 */
814 status = cli_egg_scanmetadata(&metadata, ctx, file_count);
815 if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
816 status = CL_CLEAN;
817 viruses_found++;
818 }
819 if ((status == CL_VIRUS) || (status == CL_BREAK)) {
820 break;
821 }
822 /* Check if we've already exceeded the scan limit */
823 if (cli_checklimits("EGG", ctx, 0, 0, 0))
824 break;
825
826 if (metadata.is_dir) {
827 /* Entry is a directory. Skip. */
828 cli_dbgmsg("EGG: Found directory. Skipping to next file.\n");
829
830 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
831 /* Failed to skip! Break extraction loop. */
832 cli_dbgmsg("EGG: Failed to skip directory. EGG archive extraction has failed.\n");
833 break;
834 }
835 } else if (cli_checklimits("EGG", ctx, metadata.unpack_size, 0, 0)) {
836 /* File size exceeds maxfilesize, must skip extraction.
837 * Although we may be able to scan the metadata */
838 nTooLargeFilesFound += 1;
839
840 cli_dbgmsg("EGG: Next file is too large (%" PRIu64 " bytes); it would exceed max scansize. Skipping to next file.\n", metadata.unpack_size);
841
842 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
843 /* Failed to skip! Break extraction loop. */
844 cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
845 break;
846 }
847 } else if (metadata.encrypted != 0) {
848 /* Found an encrypted file, must skip. */
849 cli_dbgmsg("EGG: Encrypted file, unable to extract file contents. Skipping file...\n");
850 nEncryptedFilesFound += 1;
851
852 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
853 /* Failed to skip! Break extraction loop. */
854 cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
855 break;
856 }
857 } else {
858 /*
859 * Extract the file...
860 */
861 char *extract_filename = NULL;
862 char *extract_buffer = NULL;
863 size_t extract_buffer_len = 0;
864
865 cli_dbgmsg("EGG: Extracting file: %s\n", metadata.filename);
866
867 egg_ret = cli_egg_extract_file(hArchive, (const char **)&extract_filename, (const char **)&extract_buffer, &extract_buffer_len);
868 if (egg_ret != CL_SUCCESS) {
869 /*
870 * Some other error extracting the file
871 */
872 cli_dbgmsg("EGG: Error extracting file: %s\n", metadata.filename);
873 } else if (!extract_buffer || 0 == extract_buffer_len) {
874 /*
875 * Empty file. Skip.
876 */
877 cli_dbgmsg("EGG: Skipping empty file: %s\n", metadata.filename);
878
879 if (NULL != extract_filename) {
880 free(extract_filename);
881 extract_filename = NULL;
882 }
883 if (NULL != extract_buffer) {
884 free(extract_buffer);
885 extract_buffer = NULL;
886 }
887 } else {
888 /*
889 * Drop to a temp file, if requested.
890 */
891 if (NULL != metadata.filename) {
892 (void)cli_basename(metadata.filename, strlen(metadata.filename), &filename_base);
893 }
894
895 if (ctx->engine->keeptmp) {
896 int extracted_fd = -1;
897 if (NULL == filename_base) {
898 extract_fullpath = cli_gentemp(ctx->sub_tmpdir);
899 } else {
900 extract_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, filename_base);
901 }
902 if (NULL == extract_fullpath) {
903 cli_dbgmsg("EGG: Memory error allocating filename for extracted file.");
904 status = CL_EMEM;
905 break;
906 }
907
908 extracted_fd = open(extract_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
909 if (extracted_fd < 0) {
910 cli_dbgmsg("EGG: ERROR: Failed to open output file\n");
911 } else {
912 cli_dbgmsg("EGG: Writing the extracted file contents to temp file: %s\n", extract_fullpath);
913 if (0 == write(extracted_fd, extract_buffer, extract_buffer_len)) {
914 cli_dbgmsg("EGG: ERROR: Failed to write to output file\n");
915 } else {
916 close(extracted_fd);
917 extracted_fd = -1;
918 }
919 }
920 }
921
922 /*
923 * Scan the extracted file...
924 */
925 cli_dbgmsg("EGG: Extraction complete. Scanning now...\n");
926 status = cli_magic_scan_buff(extract_buffer, extract_buffer_len, ctx, filename_base);
927 if (status == CL_VIRUS) {
928 cli_dbgmsg("EGG: infected with %s\n", cli_get_last_virus(ctx));
929 status = CL_VIRUS;
930 viruses_found++;
931 }
932
933 if (NULL != filename_base) {
934 free(filename_base);
935 filename_base = NULL;
936 }
937 if (NULL != extract_filename) {
938 free(extract_filename);
939 extract_filename = NULL;
940 }
941 if (NULL != extract_buffer) {
942 free(extract_buffer);
943 extract_buffer = NULL;
944 }
945 }
946
947 /* Free up that the filepath */
948 if (NULL != extract_fullpath) {
949 free(extract_fullpath);
950 extract_fullpath = NULL;
951 }
952 }
953 }
954
955 if (status == CL_VIRUS) {
956 if (SCAN_ALLMATCHES)
957 status = CL_SUCCESS;
958 else
959 break;
960 }
961
962 if (ctx->engine->maxscansize && ctx->scansize >= ctx->engine->maxscansize) {
963 status = CL_CLEAN;
964 break;
965 }
966
967 /*
968 * TODO: Free up any malloced metadata...
969 */
970 if (metadata.filename != NULL) {
971 free(metadata.filename);
972 metadata.filename = NULL;
973 }
974
975 } while (status == CL_CLEAN);
976
977 if (status == CL_BREAK)
978 status = CL_CLEAN;
979
980 done:
981
982 if (NULL != comment_fullpath) {
983 free(comment_fullpath);
984 comment_fullpath = NULL;
985 }
986
987 if (NULL != hArchive) {
988 cli_egg_close(hArchive);
989 hArchive = NULL;
990 }
991
992 if (NULL != filename_base) {
993 free(filename_base);
994 filename_base = NULL;
995 }
996
997 if (metadata.filename != NULL) {
998 free(metadata.filename);
999 metadata.filename = NULL;
1000 }
1001
1002 if (NULL != extract_fullpath) {
1003 free(extract_fullpath);
1004 extract_fullpath = NULL;
1005 }
1006
1007 if ((CL_VIRUS != status) && ((CL_EUNPACK == status) || (nEncryptedFilesFound > 0))) {
1008 /* If user requests enabled the Heuristic for encrypted archives... */
1009 if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
1010 if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Encrypted.EGG")) {
1011 status = CL_VIRUS;
1012 }
1013 }
1014 if (status != CL_VIRUS) {
1015 status = CL_CLEAN;
1016 }
1017 }
1018
1019 cli_dbgmsg("EGG: Exit code: %d\n", status);
1020
1021 if (SCAN_ALLMATCHES && viruses_found)
1022 status = CL_VIRUS;
1023
1024 return status;
1025 }
1026
1027 static cl_error_t cli_scanarj(cli_ctx *ctx)
1028 {
1029 cl_error_t ret = CL_CLEAN;
1030 cl_error_t status;
1031 int file = 0;
1032 arj_metadata_t metadata;
1033 char *dir;
1034 int virus_found = 0;
1035
1036 cli_dbgmsg("in cli_scanarj()\n");
1037
1038 memset(&metadata, 0, sizeof(arj_metadata_t));
1039
1040 /* generate the temporary directory */
1041 if (!(dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "arj-tmp")))
1042 return CL_EMEM;
1043
1044 if (mkdir(dir, 0700)) {
1045 cli_dbgmsg("ARJ: Can't create temporary directory %s\n", dir);
1046 free(dir);
1047 return CL_ETMPDIR;
1048 }
1049
1050 ret = cli_unarj_open(ctx->fmap, dir, &metadata);
1051 if (ret != CL_SUCCESS) {
1052 if (!ctx->engine->keeptmp)
1053 cli_rmdirs(dir);
1054 free(dir);
1055 cli_dbgmsg("ARJ: Error: %s\n", cl_strerror(ret));
1056 return ret;
1057 }
1058
1059 do {
1060
1061 metadata.filename = NULL;
1062 ret = cli_unarj_prepare_file(dir, &metadata);
1063 if (ret != CL_SUCCESS) {
1064 cli_dbgmsg("ARJ: cli_unarj_prepare_file Error: %s\n", cl_strerror(ret));
1065 break;
1066 }
1067 file++;
1068 if (cli_matchmeta(ctx, metadata.filename, metadata.comp_size, metadata.orig_size, metadata.encrypted, file, 0, NULL) == CL_VIRUS) {
1069 if (!SCAN_ALLMATCHES) {
1070 cli_rmdirs(dir);
1071 free(dir);
1072 return CL_VIRUS;
1073 }
1074 virus_found = 1;
1075 ret = CL_SUCCESS;
1076 }
1077
1078 if ((ret = cli_checklimits("ARJ", ctx, metadata.orig_size, metadata.comp_size, 0)) != CL_CLEAN) {
1079 ret = CL_SUCCESS;
1080 if (metadata.filename)
1081 free(metadata.filename);
1082 continue;
1083 }
1084 ret = cli_unarj_extract_file(dir, &metadata);
1085 if (ret != CL_SUCCESS) {
1086 cli_dbgmsg("ARJ: cli_unarj_extract_file Error: %s\n", cl_strerror(ret));
1087 }
1088 if (metadata.ofd >= 0) {
1089 if (lseek(metadata.ofd, 0, SEEK_SET) == -1) {
1090 cli_dbgmsg("ARJ: call to lseek() failed\n");
1091 }
1092 status = cli_magic_scan_desc(metadata.ofd, NULL, ctx, metadata.filename);
1093 close(metadata.ofd);
1094 if (status == CL_VIRUS) {
1095 cli_dbgmsg("ARJ: infected with %s\n", cli_get_last_virus(ctx));
1096 if (!SCAN_ALLMATCHES) {
1097 ret = CL_VIRUS;
1098 if (metadata.filename) {
1099 free(metadata.filename);
1100 metadata.filename = NULL;
1101 }
1102 break;
1103 }
1104 virus_found = 1;
1105 ret = CL_SUCCESS;
1106 }
1107 }
1108 if (metadata.filename) {
1109 free(metadata.filename);
1110 metadata.filename = NULL;
1111 }
1112
1113 } while (ret == CL_SUCCESS);
1114
1115 if (!ctx->engine->keeptmp)
1116 cli_rmdirs(dir);
1117
1118 free(dir);
1119 if (metadata.filename) {
1120 free(metadata.filename);
1121 }
1122
1123 if (virus_found != 0)
1124 ret = CL_VIRUS;
1125 cli_dbgmsg("ARJ: Exit code: %d\n", ret);
1126 if (ret == CL_BREAK)
1127 ret = CL_CLEAN;
1128
1129 return ret;
1130 }
1131
1132 static cl_error_t cli_scangzip_with_zib_from_the_80s(cli_ctx *ctx, unsigned char *buff)
1133 {
1134 int fd;
1135 cl_error_t ret;
1136 size_t outsize = 0;
1137 int bytes;
1138 fmap_t *map = ctx->fmap;
1139 char *tmpname;
1140 gzFile gz;
1141
1142 ret = fmap_fd(map);
1143 if (ret < 0)
1144 return CL_EDUP;
1145 fd = dup(ret);
1146 if (fd < 0)
1147 return CL_EDUP;
1148
1149 if (!(gz = gzdopen(fd, "rb"))) {
1150 close(fd);
1151 return CL_EOPEN;
1152 }
1153
1154 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
1155 cli_dbgmsg("GZip: Can't generate temporary file.\n");
1156 gzclose(gz);
1157 close(fd);
1158 return ret;
1159 }
1160
1161 while ((bytes = gzread(gz, buff, FILEBUFF)) > 0) {
1162 outsize += bytes;
1163 if (cli_checklimits("GZip", ctx, outsize, 0, 0) != CL_CLEAN)
1164 break;
1165 if (cli_writen(fd, buff, (size_t)bytes) != (size_t)bytes) {
1166 close(fd);
1167 gzclose(gz);
1168 if (cli_unlink(tmpname)) {
1169 free(tmpname);
1170 return CL_EUNLINK;
1171 }
1172 free(tmpname);
1173 return CL_EWRITE;
1174 }
1175 }
1176
1177 gzclose(gz);
1178
1179 if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1180 cli_dbgmsg("GZip: Infected with %s\n", cli_get_last_virus(ctx));
1181 close(fd);
1182 if (!ctx->engine->keeptmp) {
1183 if (cli_unlink(tmpname)) {
1184 free(tmpname);
1185 return CL_EUNLINK;
1186 }
1187 }
1188 free(tmpname);
1189 return CL_VIRUS;
1190 }
1191 close(fd);
1192 if (!ctx->engine->keeptmp)
1193 if (cli_unlink(tmpname))
1194 ret = CL_EUNLINK;
1195 free(tmpname);
1196 return ret;
1197 }
1198
1199 static cl_error_t cli_scangzip(cli_ctx *ctx)
1200 {
1201 int fd;
1202 cl_error_t ret = CL_CLEAN;
1203 unsigned char buff[FILEBUFF];
1204 char *tmpname;
1205 z_stream z;
1206 size_t at = 0, outsize = 0;
1207 fmap_t *map = ctx->fmap;
1208
1209 cli_dbgmsg("in cli_scangzip()\n");
1210
1211 memset(&z, 0, sizeof(z));
1212 if ((ret = inflateInit2(&z, MAX_WBITS + 16)) != Z_OK) {
1213 cli_dbgmsg("GZip: InflateInit failed: %d\n", ret);
1214 return cli_scangzip_with_zib_from_the_80s(ctx, buff);
1215 }
1216
1217 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
1218 cli_dbgmsg("GZip: Can't generate temporary file.\n");
1219 inflateEnd(&z);
1220 return ret;
1221 }
1222
1223 while (at < map->len) {
1224 unsigned int bytes = MIN(map->len - at, map->pgsz);
1225 if (!(z.next_in = (void *)fmap_need_off_once(map, at, bytes))) {
1226 cli_dbgmsg("GZip: Can't read %u bytes @ %lu.\n", bytes, (long unsigned)at);
1227 inflateEnd(&z);
1228 close(fd);
1229 if (cli_unlink(tmpname)) {
1230 free(tmpname);
1231 return CL_EUNLINK;
1232 }
1233 free(tmpname);
1234 return CL_EREAD;
1235 }
1236 at += bytes;
1237 z.avail_in = bytes;
1238 do {
1239 int inf;
1240 z.avail_out = sizeof(buff);
1241 z.next_out = buff;
1242 inf = inflate(&z, Z_NO_FLUSH);
1243 if (inf != Z_OK && inf != Z_STREAM_END && inf != Z_BUF_ERROR) {
1244 if (sizeof(buff) == z.avail_out) {
1245 cli_dbgmsg("GZip: Bad stream, nothing in output buffer.\n");
1246 at = map->len;
1247 break;
1248 } else {
1249 cli_dbgmsg("GZip: Bad stream, data in output buffer.\n");
1250 /* no break yet, flush extracted bytes to file */
1251 }
1252 }
1253 if (cli_writen(fd, buff, sizeof(buff) - z.avail_out) == (size_t)-1) {
1254 inflateEnd(&z);
1255 close(fd);
1256 if (cli_unlink(tmpname)) {
1257 free(tmpname);
1258 return CL_EUNLINK;
1259 }
1260 free(tmpname);
1261 return CL_EWRITE;
1262 }
1263 outsize += sizeof(buff) - z.avail_out;
1264 if (cli_checklimits("GZip", ctx, outsize, 0, 0) != CL_CLEAN) {
1265 at = map->len;
1266 break;
1267 }
1268 if (inf == Z_STREAM_END) {
1269 at -= z.avail_in;
1270 inflateReset(&z);
1271 break;
1272 } else if (inf != Z_OK && inf != Z_BUF_ERROR) {
1273 at = map->len;
1274 break;
1275 }
1276 } while (z.avail_out == 0);
1277 }
1278
1279 inflateEnd(&z);
1280
1281 if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1282 cli_dbgmsg("GZip: Infected with %s\n", cli_get_last_virus(ctx));
1283 close(fd);
1284 if (!ctx->engine->keeptmp) {
1285 if (cli_unlink(tmpname)) {
1286 free(tmpname);
1287 return CL_EUNLINK;
1288 }
1289 }
1290 free(tmpname);
1291 return CL_VIRUS;
1292 }
1293 close(fd);
1294 if (!ctx->engine->keeptmp)
1295 if (cli_unlink(tmpname))
1296 ret = CL_EUNLINK;
1297 free(tmpname);
1298 return ret;
1299 }
1300
1301 #ifndef HAVE_BZLIB_H
1302 static cl_error_t cli_scanbzip(cli_ctx *ctx)
1303 {
1304 cli_warnmsg("cli_scanbzip: bzip2 support not compiled in\n");
1305 return CL_CLEAN;
1306 }
1307
1308 #else
1309
1310 #ifdef NOBZ2PREFIX
1311 #define BZ2_bzDecompressInit bzDecompressInit
1312 #define BZ2_bzDecompress bzDecompress
1313 #define BZ2_bzDecompressEnd bzDecompressEnd
1314 #endif
1315
1316 static cl_error_t cli_scanbzip(cli_ctx *ctx)
1317 {
1318 cl_error_t ret = CL_CLEAN;
1319 int fd, rc;
1320 uint64_t size = 0;
1321 char *tmpname;
1322 bz_stream strm;
1323 size_t off = 0;
1324 size_t avail;
1325 char buf[FILEBUFF];
1326
1327 memset(&strm, 0, sizeof(strm));
1328 strm.next_out = buf;
1329 strm.avail_out = sizeof(buf);
1330 rc = BZ2_bzDecompressInit(&strm, 0, 0);
1331 if (BZ_OK != rc) {
1332 cli_dbgmsg("Bzip: DecompressInit failed: %d\n", rc);
1333 return CL_EOPEN;
1334 }
1335
1336 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd))) {
1337 cli_dbgmsg("Bzip: Can't generate temporary file.\n");
1338 BZ2_bzDecompressEnd(&strm);
1339 return ret;
1340 }
1341
1342 do {
1343 if (!strm.avail_in) {
1344 strm.next_in = (void *)fmap_need_off_once_len(ctx->fmap, off, FILEBUFF, &avail);
1345 strm.avail_in = avail;
1346 off += avail;
1347 if (!strm.avail_in) {
1348 cli_dbgmsg("Bzip: premature end of compressed stream\n");
1349 break;
1350 }
1351 }
1352
1353 rc = BZ2_bzDecompress(&strm);
1354 if (BZ_OK != rc && BZ_STREAM_END != rc) {
1355 cli_dbgmsg("Bzip: decompress error: %d\n", rc);
1356 break;
1357 }
1358
1359 if (!strm.avail_out || BZ_STREAM_END == rc) {
1360
1361 size += sizeof(buf) - strm.avail_out;
1362
1363 if (cli_writen(fd, buf, sizeof(buf) - strm.avail_out) != sizeof(buf) - strm.avail_out) {
1364 cli_dbgmsg("Bzip: Can't write to file.\n");
1365 BZ2_bzDecompressEnd(&strm);
1366 close(fd);
1367 if (!ctx->engine->keeptmp) {
1368 if (cli_unlink(tmpname)) {
1369 free(tmpname);
1370 return CL_EUNLINK;
1371 }
1372 }
1373 free(tmpname);
1374 return CL_EWRITE;
1375 }
1376
1377 if (cli_checklimits("Bzip", ctx, size, 0, 0) != CL_CLEAN)
1378 break;
1379
1380 strm.next_out = buf;
1381 strm.avail_out = sizeof(buf);
1382 }
1383 } while (BZ_STREAM_END != rc);
1384
1385 BZ2_bzDecompressEnd(&strm);
1386
1387 if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1388 cli_dbgmsg("Bzip: Infected with %s\n", cli_get_last_virus(ctx));
1389 close(fd);
1390 if (!ctx->engine->keeptmp) {
1391 if (cli_unlink(tmpname)) {
1392 ret = CL_EUNLINK;
1393 free(tmpname);
1394 return ret;
1395 }
1396 }
1397 free(tmpname);
1398 return CL_VIRUS;
1399 }
1400 close(fd);
1401 if (!ctx->engine->keeptmp)
1402 if (cli_unlink(tmpname))
1403 ret = CL_EUNLINK;
1404 free(tmpname);
1405
1406 return ret;
1407 }
1408 #endif
1409
1410 static cl_error_t cli_scanxz(cli_ctx *ctx)
1411 {
1412 cl_error_t ret = CL_CLEAN;
1413 int fd, rc;
1414 unsigned long int size = 0;
1415 char *tmpname;
1416 struct CLI_XZ strm;
1417 size_t off = 0;
1418 size_t avail;
1419 unsigned char *buf;
1420
1421 buf = cli_malloc(CLI_XZ_OBUF_SIZE);
1422 if (buf == NULL) {
1423 cli_errmsg("cli_scanxz: nomemory for decompress buffer.\n");
1424 return CL_EMEM;
1425 }
1426 memset(&strm, 0x00, sizeof(struct CLI_XZ));
1427 strm.next_out = buf;
1428 strm.avail_out = CLI_XZ_OBUF_SIZE;
1429 rc = cli_XzInit(&strm);
1430 if (rc != XZ_RESULT_OK) {
1431 cli_errmsg("cli_scanxz: DecompressInit failed: %i\n", rc);
1432 free(buf);
1433 return CL_EOPEN;
1434 }
1435
1436 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd))) {
1437 cli_errmsg("cli_scanxz: Can't generate temporary file.\n");
1438 cli_XzShutdown(&strm);
1439 free(buf);
1440 return ret;
1441 }
1442 cli_dbgmsg("cli_scanxz: decompressing to file %s\n", tmpname);
1443
1444 do {
1445 /* set up input buffer */
1446 if (!strm.avail_in) {
1447 strm.next_in = (void *)fmap_need_off_once_len(ctx->fmap, off, CLI_XZ_IBUF_SIZE, &avail);
1448 strm.avail_in = avail;
1449 off += avail;
1450 if (!strm.avail_in) {
1451 cli_errmsg("cli_scanxz: premature end of compressed stream\n");
1452 ret = CL_EFORMAT;
1453 goto xz_exit;
1454 }
1455 }
1456
1457 /* xz decompress a chunk */
1458 rc = cli_XzDecode(&strm);
1459 if (XZ_RESULT_OK != rc && XZ_STREAM_END != rc) {
1460 if (rc == XZ_DIC_HEURISTIC) {
1461 ret = cli_append_virus(ctx, "Heuristics.XZ.DicSizeLimit");
1462 goto xz_exit;
1463 }
1464 cli_errmsg("cli_scanxz: decompress error: %d\n", rc);
1465 ret = CL_EFORMAT;
1466 goto xz_exit;
1467 }
1468 //cli_dbgmsg("cli_scanxz: xz decompressed %li of %li available bytes\n",
1469 // avail - strm.avail_in, avail);
1470
1471 /* write decompress buffer */
1472 if (!strm.avail_out || rc == XZ_STREAM_END) {
1473 size_t towrite = CLI_XZ_OBUF_SIZE - strm.avail_out;
1474 size += towrite;
1475
1476 //cli_dbgmsg("Writing %li bytes to XZ decompress temp file(%li byte total)\n",
1477 // towrite, size);
1478
1479 if (cli_writen(fd, buf, towrite) != towrite) {
1480 cli_errmsg("cli_scanxz: Can't write to file.\n");
1481 ret = CL_EWRITE;
1482 goto xz_exit;
1483 }
1484 if (cli_checklimits("cli_scanxz", ctx, size, 0, 0) != CL_CLEAN) {
1485 cli_warnmsg("cli_scanxz: decompress file size exceeds limits - "
1486 "only scanning %li bytes\n",
1487 size);
1488 break;
1489 }
1490 strm.next_out = buf;
1491 strm.avail_out = CLI_XZ_OBUF_SIZE;
1492 }
1493 } while (XZ_STREAM_END != rc);
1494
1495 /* scan decompressed file */
1496 if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1497 cli_dbgmsg("cli_scanxz: Infected with %s\n", cli_get_last_virus(ctx));
1498 }
1499
1500 xz_exit:
1501 cli_XzShutdown(&strm);
1502 close(fd);
1503 if (!ctx->engine->keeptmp)
1504 if (cli_unlink(tmpname) && ret == CL_CLEAN)
1505 ret = CL_EUNLINK;
1506 free(tmpname);
1507 free(buf);
1508 return ret;
1509 }
1510
1511 static cl_error_t cli_scanszdd(cli_ctx *ctx)
1512 {
1513 int ofd;
1514 cl_error_t ret;
1515 char *tmpname;
1516
1517 cli_dbgmsg("in cli_scanszdd()\n");
1518
1519 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd))) {
1520 cli_dbgmsg("MSEXPAND: Can't generate temporary file/descriptor\n");
1521 return ret;
1522 }
1523
1524 ret = cli_msexpand(ctx, ofd);
1525
1526 if (ret != CL_SUCCESS) { /* CL_VIRUS or some error */
1527 close(ofd);
1528 if (!ctx->engine->keeptmp)
1529 if (cli_unlink(tmpname))
1530 ret = CL_EUNLINK;
1531 free(tmpname);
1532 return ret;
1533 }
1534
1535 cli_dbgmsg("MSEXPAND: Decompressed into %s\n", tmpname);
1536 ret = cli_magic_scan_desc(ofd, tmpname, ctx, NULL);
1537 close(ofd);
1538 if (!ctx->engine->keeptmp)
1539 if (cli_unlink(tmpname))
1540 ret = CL_EUNLINK;
1541 free(tmpname);
1542
1543 return ret;
1544 }
1545
1546 static cl_error_t vba_scandata(const unsigned char *data, size_t len, cli_ctx *ctx)
1547 {
1548 cl_error_t ret = CL_SUCCESS;
1549 struct cli_matcher *groot = ctx->engine->root[0];
1550 struct cli_matcher *troot = ctx->engine->root[2];
1551 struct cli_ac_data gmdata, tmdata;
1552 bool gmdata_initialized = false;
1553 bool tmdata_initialized = false;
1554 struct cli_ac_data *mdata[2];
1555 unsigned int viruses_found = 0;
1556
1557 cl_fmap_t *new_map = NULL;
1558
1559 if ((ret = cli_ac_initdata(&tmdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
1560 goto done;
1561 }
1562 tmdata_initialized = true;
1563
1564 if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
1565 goto done;
1566 }
1567 gmdata_initialized = true;
1568
1569 mdata[0] = &tmdata;
1570 mdata[1] = &gmdata;
1571
1572 ret = cli_scan_buff(data, len, 0, ctx, CL_TYPE_MSOLE2, mdata);
1573 if (ret == CL_VIRUS) {
1574 viruses_found++;
1575 }
1576
1577 if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
1578 /*
1579 * Evaluate logical & yara rules given the new matches to see if anything alerts.
1580 */
1581 new_map = fmap_open_memory(data, len, NULL);
1582 if (new_map == NULL) {
1583 cli_dbgmsg("Failed to create fmap for evaluating logical/yara rules after call to cli_scan_buff()\n");
1584 ret = CL_EMEM;
1585 goto done;
1586 }
1587
1588 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
1589
1590 ret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_MSOLE2, true); /* Perform exp_eval with child fmap */
1591 if (CL_SUCCESS != ret) {
1592 cli_dbgmsg("Failed to scan fmap.\n");
1593 goto done;
1594 }
1595
1596 ret = cli_exp_eval(ctx, troot, &tmdata, NULL, NULL);
1597 if (ret == CL_VIRUS) {
1598 viruses_found++;
1599 }
1600
1601 if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
1602 ret = cli_exp_eval(ctx, groot, &gmdata, NULL, NULL);
1603 }
1604
1605 (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
1606 }
1607
1608 done:
1609 if (NULL != new_map) {
1610 funmap(new_map);
1611 }
1612
1613 if (tmdata_initialized) {
1614 cli_ac_freedata(&tmdata);
1615 }
1616
1617 if (gmdata_initialized) {
1618 cli_ac_freedata(&gmdata);
1619 }
1620
1621 if (ret == CL_CLEAN && viruses_found) {
1622 ret = CL_VIRUS;
1623 }
1624 return ret;
1625 }
1626
1627 #define min(x, y) ((x) < (y) ? (x) : (y))
1628
1629 /**
1630 * Find a file in a directory tree.
1631 * \param filename Name of the file to find
1632 * \param dir Directory path where to find the file
1633 * \param A pointer to the string to store the result into
1634 * \param Size of the string to store the result in
1635 */
1636 cl_error_t find_file(const char *filename, const char *dir, char *result, size_t result_size)
1637 {
1638 DIR *dd;
1639 struct dirent *dent;
1640 char fullname[PATH_MAX];
1641 cl_error_t ret;
1642 size_t len;
1643 STATBUF statbuf;
1644
1645 if (!result) {
1646 return CL_ENULLARG;
1647 }
1648
1649 if ((dd = opendir(dir)) != NULL) {
1650 while ((dent = readdir(dd))) {
1651 if (dent->d_ino) {
1652 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) {
1653
1654 snprintf(fullname, sizeof(fullname), "%s" PATHSEP "%s", dir, dent->d_name);
1655 fullname[sizeof(fullname) - 1] = '\0';
1656
1657 /* stat the file */
1658 if (LSTAT(fullname, &statbuf) != -1) {
1659 if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
1660 ret = find_file(filename, fullname, result, result_size);
1661 if (ret == CL_SUCCESS) {
1662 closedir(dd);
1663 return ret;
1664 }
1665 } else if (S_ISREG(statbuf.st_mode)) {
1666 if (strcmp(dent->d_name, filename) == 0) {
1667 len = min(strlen(dir) + 1, result_size);
1668 memcpy(result, dir, len);
1669 result[len - 1] = '\0';
1670 closedir(dd);
1671 return CL_SUCCESS;
1672 }
1673 }
1674 }
1675 }
1676 }
1677 }
1678 closedir(dd);
1679 }
1680
1681 return CL_EOPEN;
1682 }
1683
1684 /**
1685 * Scan an OLE directory for a VBA project.
1686 * Contrary to cli_vba_scandir, this function uses the dir file to locate VBA modules.
1687 */
1688 static cl_error_t cli_vba_scandir_new(const char *dirname, cli_ctx *ctx, struct uniq *U, int *has_macros)
1689 {
1690 cl_error_t ret = CL_SUCCESS;
1691 uint32_t hashcnt = 0;
1692 char *hash = NULL;
1693 char path[PATH_MAX];
1694 char filename[PATH_MAX];
1695 int tempfd = -1;
1696 int viruses_found = 0;
1697
1698 if (CL_SUCCESS != (ret = uniq_get(U, "dir", 3, &hash, &hashcnt))) {
1699 cli_dbgmsg("cli_vba_scandir_new: uniq_get('dir') failed with ret code (%d)!\n", ret);
1700 return ret;
1701 }
1702
1703 while (hashcnt) {
1704 //Find the directory containing the extracted dir file. This is complicated
1705 //because ClamAV doesn't use the file names from the OLE file, but temporary names,
1706 //and we have neither the complete path of the dir file in the OLE container,
1707 //nor the mapping of the temporary directory names to their OLE names.
1708 snprintf(filename, sizeof(filename), "%s_%u", hash, hashcnt);
1709 filename[sizeof(filename) - 1] = '\0';
1710
1711 if (CL_SUCCESS == find_file(filename, dirname, path, sizeof(path))) {
1712 cli_dbgmsg("cli_vba_scandir_new: Found dir file: %s\n", path);
1713 if ((ret = cli_vba_readdir_new(ctx, path, U, hash, hashcnt, &tempfd, has_macros)) != CL_SUCCESS) {
1714 //FIXME: Since we only know the stream name of the OLE2 stream, but not its path inside the
1715 // OLE2 archive, we don't know if we have the right file. The only thing we can do is
1716 // iterate all of them until one succeeds.
1717 cli_dbgmsg("cli_vba_scandir_new: Failed to read dir from %s, trying others (error: %s (%d))\n", path, cl_strerror(ret), (int)ret);
1718 ret = CL_SUCCESS;
1719 hashcnt--;
1720 continue;
1721 }
1722
1723 #if HAVE_JSON
1724 if (*has_macros && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1725 cli_jsonbool(ctx->wrkproperty, "HasMacros", 1);
1726 json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages");
1727 if (macro_languages) {
1728 cli_jsonstr(macro_languages, NULL, "VBA");
1729 } else {
1730 cli_dbgmsg("[cli_vba_scandir_new] Failed to add \"VBA\" entry to MacroLanguages JSON array\n");
1731 }
1732 }
1733 #endif
1734 if (SCAN_HEURISTIC_MACROS && *has_macros) {
1735 ret = cli_append_virus(ctx, "Heuristics.OLE2.ContainsMacros.VBA");
1736 if (ret == CL_VIRUS) {
1737 viruses_found++;
1738 if (!SCAN_ALLMATCHES) {
1739 goto done;
1740 }
1741 }
1742 }
1743
1744 /*
1745 * Now rewind the extracted vba-project output FD and scan it!
1746 */
1747 if (lseek(tempfd, 0, SEEK_SET) != 0) {
1748 cli_dbgmsg("cli_vba_scandir_new: Failed to seek to beginning of temporary VBA project file\n");
1749 ret = CL_ESEEK;
1750 goto done;
1751 }
1752
1753 ret = cli_scan_desc(tempfd, ctx, CL_TYPE_SCRIPT, 0, NULL, AC_SCAN_VIR, NULL, NULL);
1754
1755 close(tempfd);
1756 tempfd = -1;
1757
1758 if (CL_VIRUS == ret) {
1759 viruses_found++;
1760 if (!SCAN_ALLMATCHES) {
1761 goto done;
1762 }
1763 }
1764 }
1765
1766 hashcnt--;
1767 }
1768
1769 done:
1770 if (tempfd != -1) {
1771 close(tempfd);
1772 tempfd = -1;
1773 }
1774
1775 if (viruses_found > 0)
1776 ret = CL_VIRUS;
1777 return ret;
1778 }
1779
1780 static cl_error_t cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U, int *has_macros)
1781 {
1782 cl_error_t status = CL_CLEAN;
1783 cl_error_t ret;
1784 int i, j;
1785 size_t data_len;
1786 vba_project_t *vba_project;
1787 DIR *dd = NULL;
1788 struct dirent *dent;
1789 STATBUF statbuf;
1790 char *fullname, vbaname[1024];
1791 unsigned char *data;
1792 char *hash;
1793 uint32_t hashcnt = 0;
1794 unsigned int viruses_found = 0;
1795
1796 cli_dbgmsg("VBADir: %s\n", dirname);
1797 if (CL_SUCCESS != (ret = uniq_get(U, "_vba_project", 12, NULL, &hashcnt))) {
1798 cli_dbgmsg("VBADir: uniq_get('_vba_project') failed with ret code (%d)!\n", ret);
1799 status = ret;
1800 goto done;
1801 }
1802 while (hashcnt) {
1803 if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) {
1804 hashcnt--;
1805 continue;
1806 }
1807
1808 for (i = 0; i < vba_project->count; i++) {
1809 for (j = 1; (unsigned int)j <= vba_project->colls[i]; j++) {
1810 int fd = -1;
1811
1812 snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", vba_project->dir, vba_project->name[i], j);
1813 vbaname[sizeof(vbaname) - 1] = '\0';
1814
1815 fd = open(vbaname, O_RDONLY | O_BINARY);
1816 if (fd == -1) {
1817 continue;
1818 }
1819 cli_dbgmsg("VBADir: Decompress VBA project '%s_%u'\n", vba_project->name[i], j);
1820 data = (unsigned char *)cli_vba_inflate(fd, vba_project->offset[i], &data_len);
1821 close(fd);
1822 *has_macros = *has_macros + 1;
1823 if (!data) {
1824 } else {
1825 /* cli_dbgmsg("Project content:\n%s", data); */
1826 if (ctx->scanned)
1827 *ctx->scanned += data_len / CL_COUNT_PRECISION;
1828 if (ctx->engine->keeptmp) {
1829 char *tempfile;
1830 int of;
1831
1832 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tempfile, &of)) != CL_SUCCESS) {
1833 cli_warnmsg("VBADir: WARNING: VBA project '%s_%u' cannot be dumped to file\n", vba_project->name[i], j);
1834 status = ret;
1835 goto done;
1836 }
1837 if (cli_writen(of, data, data_len) != data_len) {
1838 cli_warnmsg("VBADir: WARNING: VBA project '%s_%u' failed to write to file\n", vba_project->name[i], j);
1839 close(of);
1840 free(tempfile);
1841 status = CL_EWRITE;
1842 goto done;
1843 }
1844
1845 cli_dbgmsg("VBADir: VBA project '%s_%u' dumped to %s\n", vba_project->name[i], j, tempfile);
1846 free(tempfile);
1847 }
1848
1849 if (vba_scandata(data, data_len, ctx) == CL_VIRUS) {
1850 viruses_found++;
1851 if (!SCAN_ALLMATCHES) {
1852 free(data);
1853 status = CL_VIRUS;
1854 break;
1855 }
1856 }
1857 free(data);
1858 }
1859 }
1860
1861 if (status == CL_VIRUS)
1862 break;
1863 }
1864
1865 cli_free_vba_project(vba_project);
1866 vba_project = NULL;
1867
1868 if (status == CL_VIRUS)
1869 break;
1870
1871 hashcnt--;
1872 }
1873
1874 if (status == CL_CLEAN || (status == CL_VIRUS && SCAN_ALLMATCHES)) {
1875 if (CL_SUCCESS != (ret = uniq_get(U, "powerpoint document", 19, &hash, &hashcnt))) {
1876 cli_dbgmsg("VBADir: uniq_get('powerpoint document') failed with ret code (%d)!\n", ret);
1877 status = ret;
1878 goto done;
1879 }
1880 while (hashcnt) {
1881 int fd = -1;
1882
1883 snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1884 vbaname[sizeof(vbaname) - 1] = '\0';
1885
1886 fd = open(vbaname, O_RDONLY | O_BINARY);
1887 if (fd == -1) {
1888 hashcnt--;
1889 continue;
1890 }
1891 if ((fullname = cli_ppt_vba_read(fd, ctx))) {
1892 ret = cli_magic_scan_dir(fullname, ctx);
1893
1894 if (!ctx->engine->keeptmp)
1895 cli_rmdirs(fullname);
1896 free(fullname);
1897
1898 if (ret == CL_VIRUS) {
1899 status = CL_VIRUS;
1900 viruses_found++;
1901 if (!SCAN_ALLMATCHES) {
1902 close(fd);
1903 break;
1904 }
1905 }
1906 }
1907 close(fd);
1908 hashcnt--;
1909 }
1910 }
1911
1912 if (status == CL_CLEAN || (status == CL_VIRUS && SCAN_ALLMATCHES)) {
1913 if (CL_SUCCESS != (ret = uniq_get(U, "worddocument", 12, &hash, &hashcnt))) {
1914 cli_dbgmsg("VBADir: uniq_get('worddocument') failed with ret code (%d)!\n", ret);
1915 status = ret;
1916 goto done;
1917 }
1918 while (hashcnt) {
1919 int fd = -1;
1920
1921 snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1922 vbaname[sizeof(vbaname) - 1] = '\0';
1923
1924 fd = open(vbaname, O_RDONLY | O_BINARY);
1925 if (fd == -1) {
1926 hashcnt--;
1927 continue;
1928 }
1929
1930 if (!(vba_project = (vba_project_t *)cli_wm_readdir(fd))) {
1931 close(fd);
1932 hashcnt--;
1933 continue;
1934 }
1935
1936 for (i = 0; i < vba_project->count; i++) {
1937 cli_dbgmsg("VBADir: Decompress WM project macro:%d key:%d length:%d\n", i, vba_project->key[i], vba_project->length[i]);
1938 data = (unsigned char *)cli_wm_decrypt_macro(fd, vba_project->offset[i], vba_project->length[i], vba_project->key[i]);
1939 if (!data) {
1940 cli_dbgmsg("VBADir: WARNING: WM project '%s' macro %d decrypted to NULL\n", vba_project->name[i], i);
1941 } else {
1942 cli_dbgmsg("Project content:\n%s", data);
1943 if (ctx->scanned)
1944 *ctx->scanned += vba_project->length[i] / CL_COUNT_PRECISION;
1945 if (vba_scandata(data, vba_project->length[i], ctx) == CL_VIRUS) {
1946 viruses_found++;
1947 if (!SCAN_ALLMATCHES) {
1948 free(data);
1949 status = CL_VIRUS;
1950 break;
1951 }
1952 }
1953 free(data);
1954 }
1955 }
1956
1957 close(fd);
1958 cli_free_vba_project(vba_project);
1959 vba_project = NULL;
1960
1961 if (status == CL_VIRUS && !SCAN_ALLMATCHES) {
1962 break;
1963 }
1964 hashcnt--;
1965 }
1966 }
1967
1968 #if HAVE_JSON
1969 /* JSON Output Summary Information */
1970 if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1971 if (CL_SUCCESS != (ret = uniq_get(U, "_5_summaryinformation", 21, &hash, &hashcnt))) {
1972 cli_dbgmsg("VBADir: uniq_get('_5_summaryinformation') failed with ret code (%d)!\n", ret);
1973 status = ret;
1974 goto done;
1975 }
1976 while (hashcnt) {
1977 int fd = -1;
1978
1979 snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1980 vbaname[sizeof(vbaname) - 1] = '\0';
1981
1982 fd = open(vbaname, O_RDONLY | O_BINARY);
1983 if (fd >= 0) {
1984 cli_dbgmsg("VBADir: detected a '_5_summaryinformation' stream\n");
1985 /* JSONOLE2 - what to do if something breaks? */
1986 cli_ole2_summary_json(ctx, fd, 0);
1987 close(fd);
1988 }
1989 hashcnt--;
1990 }
1991
1992 if (CL_SUCCESS != (ret = uniq_get(U, "_5_documentsummaryinformation", 29, &hash, &hashcnt))) {
1993 cli_dbgmsg("VBADir: uniq_get('_5_documentsummaryinformation') failed with ret code (%d)!\n", ret);
1994 status = ret;
1995 goto done;
1996 }
1997 while (hashcnt) {
1998 int fd = -1;
1999
2000 snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
2001 vbaname[sizeof(vbaname) - 1] = '\0';
2002
2003 fd = open(vbaname, O_RDONLY | O_BINARY);
2004 if (fd >= 0) {
2005 cli_dbgmsg("VBADir: detected a '_5_documentsummaryinformation' stream\n");
2006 /* JSONOLE2 - what to do if something breaks? */
2007 cli_ole2_summary_json(ctx, fd, 1);
2008 close(fd);
2009 }
2010 hashcnt--;
2011 }
2012 }
2013 #endif
2014
2015 if (status != CL_CLEAN && !(status == CL_VIRUS && SCAN_ALLMATCHES)) {
2016 goto done;
2017 }
2018
2019 /* Check directory for embedded OLE objects */
2020 if (CL_SUCCESS != (ret = uniq_get(U, "_1_ole10native", 14, &hash, &hashcnt))) {
2021 cli_dbgmsg("VBADir: uniq_get('_1_ole10native') failed with ret code (%d)!\n", ret);
2022 status = ret;
2023 goto done;
2024 }
2025 while (hashcnt) {
2026 int fd = -1;
2027
2028 snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
2029 vbaname[sizeof(vbaname) - 1] = '\0';
2030
2031 fd = open(vbaname, O_RDONLY | O_BINARY);
2032 if (fd >= 0) {
2033 ret = cli_scan_ole10(fd, ctx);
2034 close(fd);
2035 if (CL_VIRUS == ret) {
2036 viruses_found++;
2037 if (!SCAN_ALLMATCHES) {
2038 status = ret;
2039 goto done;
2040 }
2041 }
2042 }
2043 hashcnt--;
2044 }
2045
2046 /* ACAB: since we now hash filenames and handle collisions we
2047 * could avoid recursion by removing the block below and by
2048 * flattening the paths in ole2_walk_property_tree (case 1) */
2049
2050 if ((dd = opendir(dirname)) != NULL) {
2051 while ((dent = readdir(dd))) {
2052 if (dent->d_ino) {
2053 if (strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
2054 /* build the full name */
2055 fullname = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 2);
2056 if (!fullname) {
2057 cli_dbgmsg("cli_vba_scandir: Unable to allocate memory for fullname\n");
2058 status = CL_EMEM;
2059 break;
2060 }
2061 sprintf(fullname, "%s" PATHSEP "%s", dirname, dent->d_name);
2062
2063 /* stat the file */
2064 if (LSTAT(fullname, &statbuf) != -1) {
2065 if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode))
2066 if (cli_vba_scandir(fullname, ctx, U, has_macros) == CL_VIRUS) {
2067 viruses_found++;
2068 if (!SCAN_ALLMATCHES) {
2069 status = CL_VIRUS;
2070 free(fullname);
2071 break;
2072 }
2073 }
2074 }
2075 free(fullname);
2076 }
2077 }
2078 }
2079 } else {
2080 cli_dbgmsg("VBADir: Can't open directory %s.\n", dirname);
2081 status = CL_EOPEN;
2082 goto done;
2083 }
2084
2085 done:
2086 if (NULL != dd) {
2087 closedir(dd);
2088 }
2089
2090 #if HAVE_JSON
2091 if (*has_macros && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
2092 cli_jsonbool(ctx->wrkproperty, "HasMacros", 1);
2093 json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages");
2094 if (macro_languages) {
2095 cli_jsonstr(macro_languages, NULL, "VBA");
2096 } else {
2097 cli_dbgmsg("[cli_scan_vbadir] Failed to add \"VBA\" entry to MacroLanguages JSON array\n");
2098 }
2099 }
2100 #endif
2101 if (SCAN_HEURISTIC_MACROS && *has_macros) {
2102 ret = cli_append_virus(ctx, "Heuristics.OLE2.ContainsMacros.VBA");
2103 if (ret == CL_VIRUS)
2104 viruses_found++;
2105 }
2106
2107 if (viruses_found > 0) {
2108 status = CL_VIRUS;
2109 }
2110 return status;
2111 }
2112
2113 static cl_error_t cli_xlm_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
2114 {
2115 cl_error_t ret = CL_CLEAN;
2116 char *hash = NULL;
2117 uint32_t hashcnt = 0;
2118 unsigned int viruses_found = 0;
2119 char STR_WORKBOOK[] = "workbook";
2120 char STR_BOOK[] = "book";
2121
2122 cli_dbgmsg("XLMDir: %s\n", dirname);
2123
2124 if (CL_SUCCESS != (ret = uniq_get(U, STR_WORKBOOK, sizeof(STR_WORKBOOK) - 1, &hash, &hashcnt))) {
2125 if (CL_SUCCESS != (ret = uniq_get(U, STR_BOOK, sizeof(STR_BOOK) - 1, &hash, &hashcnt))) {
2126 cli_dbgmsg("XLMDir: uniq_get('%s') failed with ret code (%d)!\n", STR_BOOK, ret);
2127 return ret;
2128 }
2129 }
2130
2131 for (; hashcnt > 0; hashcnt--) {
2132 if ((ret = cli_xlm_extract_macros(dirname, ctx, U, hash, hashcnt)) != CL_SUCCESS) {
2133 switch (ret) {
2134 case CL_VIRUS:
2135 case CL_EMEM:
2136 return ret;
2137 default:
2138 cli_dbgmsg("XLMDir: An error occured when parsing XLM BIFF temp file, skipping to next file.\n");
2139 }
2140 }
2141 }
2142
2143 if (SCAN_HEURISTIC_MACROS) {
2144 ret = cli_append_virus(ctx, "Heuristics.OLE2.ContainsMacros.XLM");
2145 if (ret == CL_VIRUS)
2146 viruses_found++;
2147 }
2148 if (SCAN_ALLMATCHES && viruses_found)
2149 return CL_VIRUS;
2150 return ret;
2151 }
2152
2153 static cl_error_t cli_scanhtml(cli_ctx *ctx)
2154 {
2155 char *tempname, fullname[1024];
2156 cl_error_t ret = CL_CLEAN;
2157 int fd;
2158 fmap_t *map = ctx->fmap;
2159 unsigned int viruses_found = 0;
2160 uint64_t curr_len = map->len;
2161
2162 cli_dbgmsg("in cli_scanhtml()\n");
2163
2164 /* CL_ENGINE_MAX_HTMLNORMALIZE */
2165 if (curr_len > ctx->engine->maxhtmlnormalize) {
2166 cli_dbgmsg("cli_scanhtml: exiting (file larger than MaxHTMLNormalize)\n");
2167 return CL_CLEAN;
2168 }
2169
2170 if (!(tempname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "html-tmp")))
2171 return CL_EMEM;
2172
2173 if (mkdir(tempname, 0700)) {
2174 cli_errmsg("cli_scanhtml: Can't create temporary directory %s\n", tempname);
2175 free(tempname);
2176 return CL_ETMPDIR;
2177 }
2178
2179 cli_dbgmsg("cli_scanhtml: using tempdir %s\n", tempname);
2180
2181 html_normalise_map(map, tempname, NULL, ctx->dconf);
2182 snprintf(fullname, 1024, "%s" PATHSEP "nocomment.html", tempname);
2183 fd = open(fullname, O_RDONLY | O_BINARY);
2184 if (fd >= 0) {
2185 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2186 if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2187 viruses_found++;
2188 close(fd);
2189 }
2190
2191 if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2192 /* CL_ENGINE_MAX_HTMLNOTAGS */
2193 curr_len = map->len;
2194 if (curr_len > ctx->engine->maxhtmlnotags) {
2195 /* we're not interested in scanning large files in notags form */
2196 /* TODO: don't even create notags if file is over limit */
2197 cli_dbgmsg("cli_scanhtml: skipping notags (normalized size over MaxHTMLNoTags)\n");
2198 } else {
2199 snprintf(fullname, 1024, "%s" PATHSEP "notags.html", tempname);
2200 fd = open(fullname, O_RDONLY | O_BINARY);
2201 if (fd >= 0) {
2202 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2203 if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2204 viruses_found++;
2205 close(fd);
2206 }
2207 }
2208 }
2209
2210 if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2211 snprintf(fullname, 1024, "%s" PATHSEP "javascript", tempname);
2212 fd = open(fullname, O_RDONLY | O_BINARY);
2213 if (fd >= 0) {
2214 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2215 if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2216 viruses_found++;
2217 if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2218 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2219 if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_TEXT_ASCII, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2220 viruses_found++;
2221 }
2222 close(fd);
2223 }
2224 }
2225
2226 if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2227 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push() or cleared when cli_magic_scan_dir() is done.
2228 snprintf(fullname, 1024, "%s" PATHSEP "rfc2397", tempname);
2229 ret = cli_magic_scan_dir(fullname, ctx);
2230 if (CL_EOPEN == ret) {
2231 /* If the directory doesn't exist, that's fine */
2232 ret = CL_CLEAN;
2233 }
2234 }
2235
2236 if (!ctx->engine->keeptmp)
2237 cli_rmdirs(tempname);
2238
2239 free(tempname);
2240 if (SCAN_ALLMATCHES && viruses_found)
2241 return CL_VIRUS;
2242 return ret;
2243 }
2244
2245 static cl_error_t cli_scanscript(cli_ctx *ctx)
2246 {
2247 const unsigned char *buff;
2248 unsigned char *normalized = NULL;
2249 struct text_norm_state state;
2250 char *tmpname = NULL;
2251 int ofd = -1;
2252 cl_error_t ret;
2253 struct cli_matcher *troot;
2254 uint32_t maxpatlen, offset = 0;
2255 struct cli_matcher *groot;
2256 struct cli_ac_data gmdata, tmdata;
2257 int gmdata_initialized = 0;
2258 int tmdata_initialized = 0;
2259 struct cli_ac_data *mdata[2];
2260 cl_fmap_t *new_map = NULL;
2261 fmap_t *map;
2262 size_t at = 0;
2263 unsigned int viruses_found = 0;
2264 uint64_t curr_len;
2265 struct cli_target_info info;
2266
2267 if (!ctx || !ctx->engine->root)
2268 return CL_ENULLARG;
2269
2270 map = ctx->fmap;
2271 curr_len = map->len;
2272 groot = ctx->engine->root[0];
2273 troot = ctx->engine->root[7];
2274 maxpatlen = troot ? troot->maxpatlen : 0;
2275
2276 // Initialize info so it's safe to pass to destroy later
2277 cli_targetinfo_init(&info);
2278
2279 cli_dbgmsg("in cli_scanscript()\n");
2280
2281 /* CL_ENGINE_MAX_SCRIPTNORMALIZE */
2282 if (curr_len > ctx->engine->maxscriptnormalize) {
2283 cli_dbgmsg("cli_scanscript: exiting (file larger than MaxScriptSize)\n");
2284 ret = CL_CLEAN;
2285 goto done;
2286 }
2287
2288 if (!(normalized = cli_malloc(SCANBUFF + maxpatlen))) {
2289 cli_dbgmsg("cli_scanscript: Unable to malloc %u bytes\n", SCANBUFF);
2290 ret = CL_EMEM;
2291 goto done;
2292 }
2293 text_normalize_init(&state, normalized, SCANBUFF + maxpatlen);
2294
2295 if ((ret = cli_ac_initdata(&tmdata, troot ? troot->ac_partsigs : 0, troot ? troot->ac_lsigs : 0, troot ? troot->ac_reloff_num : 0, CLI_DEFAULT_AC_TRACKLEN))) {
2296 goto done;
2297 }
2298 tmdata_initialized = 1;
2299
2300 if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
2301 goto done;
2302 }
2303 gmdata_initialized = 1;
2304
2305 /* dump to disk only if explicitly asked to
2306 * or if necessary to check relative offsets,
2307 * otherwise we can process just in-memory */
2308 if (ctx->engine->keeptmp || (troot && (troot->ac_reloff_num > 0 || troot->linked_bcs))) {
2309 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd))) {
2310 cli_dbgmsg("cli_scanscript: Can't generate temporary file/descriptor\n");
2311 goto done;
2312 }
2313 if (ctx->engine->keeptmp)
2314 cli_dbgmsg("cli_scanscript: saving normalized file to %s\n", tmpname);
2315 }
2316
2317 mdata[0] = &tmdata;
2318 mdata[1] = &gmdata;
2319
2320 /* If there's a relative offset in troot or triggered bytecodes, normalize to file.*/
2321 if (troot && (troot->ac_reloff_num > 0 || troot->linked_bcs)) {
2322 size_t map_off = 0;
2323 while (map_off < map->len) {
2324 size_t written;
2325 if (!(written = text_normalize_map(&state, map, map_off)))
2326 break;
2327 map_off += written;
2328
2329 if (write(ofd, state.out, state.out_pos) == -1) {
2330 cli_errmsg("cli_scanscript: can't write to file %s\n", tmpname);
2331 ret = CL_EWRITE;
2332 goto done;
2333 }
2334 text_normalize_reset(&state);
2335 }
2336
2337 /* Temporarily store the normalized file map in the context. */
2338 new_map = fmap(ofd, 0, 0, NULL);
2339 if (new_map == NULL) {
2340 cli_dbgmsg("cli_scanscript: could not map file %s\n", tmpname);
2341 goto done;
2342 }
2343
2344 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2345
2346 ret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_TEXT_ASCII, true); /* Perform cli_scan_fmap with child fmap */
2347 if (CL_SUCCESS != ret) {
2348 cli_dbgmsg("Failed to scan fmap.\n");
2349 goto done;
2350 }
2351
2352 /* scan map */
2353 ret = cli_scan_fmap(ctx, CL_TYPE_TEXT_ASCII, 0, NULL, AC_SCAN_VIR, NULL, NULL);
2354 if (ret == CL_VIRUS) {
2355 viruses_found++;
2356 }
2357
2358 (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
2359 } else {
2360 /* Since the above is moderately costly all in all,
2361 * do the old stuff if there's no relative offsets. */
2362
2363 if (troot) {
2364 cli_targetinfo(&info, 7, ctx);
2365 ret = cli_ac_caloff(troot, &tmdata, &info);
2366 if (ret)
2367 goto done;
2368 }
2369
2370 while (1) {
2371 size_t len = MIN(map->pgsz, map->len - at);
2372 buff = fmap_need_off_once(map, at, len);
2373 at += len;
2374 if (!buff || !len || state.out_pos + len > state.out_len) {
2375 /* flush if error/EOF, or too little buffer space left */
2376 if ((ofd != -1) && (write(ofd, state.out, state.out_pos) == -1)) {
2377 cli_errmsg("cli_scanscript: can't write to file %s\n", tmpname);
2378 close(ofd);
2379 ofd = -1;
2380 /* we can continue to scan in memory */
2381 }
2382 /* when we flush the buffer also scan */
2383 if (cli_scan_buff(state.out, state.out_pos, offset, ctx, CL_TYPE_TEXT_ASCII, mdata) == CL_VIRUS) {
2384 if (SCAN_ALLMATCHES)
2385 viruses_found++;
2386 else {
2387 ret = CL_VIRUS;
2388 break;
2389 }
2390 }
2391 if (ctx->scanned)
2392 *ctx->scanned += state.out_pos / CL_COUNT_PRECISION;
2393 offset += state.out_pos;
2394 /* carry over maxpatlen from previous buffer */
2395 if (state.out_pos > maxpatlen)
2396 memmove(state.out, state.out + state.out_pos - maxpatlen, maxpatlen);
2397 text_normalize_reset(&state);
2398 state.out_pos = maxpatlen;
2399 }
2400 if (!len)
2401 break;
2402 if (!buff || text_normalize_buffer(&state, buff, len) != len) {
2403 cli_dbgmsg("cli_scanscript: short read during normalizing\n");
2404 }
2405 }
2406 }
2407
2408 if (ret != CL_VIRUS || SCAN_ALLMATCHES) {
2409 if ((ret = cli_exp_eval(ctx, troot, &tmdata, NULL, NULL)) == CL_VIRUS)
2410 viruses_found++;
2411 if (ret != CL_VIRUS || SCAN_ALLMATCHES)
2412 if ((ret = cli_exp_eval(ctx, groot, &gmdata, NULL, NULL)) == CL_VIRUS)
2413 viruses_found++;
2414 }
2415
2416 done:
2417 if (NULL != new_map) {
2418 funmap(new_map);
2419 }
2420
2421 cli_targetinfo_destroy(&info);
2422
2423 if (NULL != normalized) {
2424 free(normalized);
2425 }
2426
2427 if (tmdata_initialized) {
2428 cli_ac_freedata(&tmdata);
2429 }
2430
2431 if (gmdata_initialized) {
2432 cli_ac_freedata(&gmdata);
2433 }
2434
2435 if (ofd != -1)
2436 close(ofd);
2437 if (tmpname != NULL) {
2438 if (!ctx->engine->keeptmp)
2439 cli_unlink(tmpname);
2440 free(tmpname);
2441 }
2442
2443 if (viruses_found)
2444 return CL_VIRUS;
2445
2446 return ret;
2447 }
2448
2449 static cl_error_t cli_scanhtml_utf16(cli_ctx *ctx)
2450 {
2451 cl_error_t status = CL_ERROR;
2452 char *tempname = NULL;
2453 char *decoded = NULL;
2454 const char *buff;
2455 int fd = -1;
2456 int bytes;
2457 size_t at = 0;
2458 fmap_t *new_map = NULL;
2459
2460 cli_dbgmsg("in cli_scanhtml_utf16()\n");
2461
2462 if (!(tempname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "html-utf16-tmp"))) {
2463 status = CL_EMEM;
2464 goto done;
2465 }
2466
2467 if ((fd = open(tempname, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
2468 cli_errmsg("cli_scanhtml_utf16: Can't create file %s\n", tempname);
2469 status = CL_EOPEN;
2470 goto done;
2471 }
2472
2473 cli_dbgmsg("cli_scanhtml_utf16: using tempfile %s\n", tempname);
2474
2475 while (at < ctx->fmap->len) {
2476 bytes = MIN(ctx->fmap->len - at, ctx->fmap->pgsz * 16);
2477 if (!(buff = fmap_need_off_once(ctx->fmap, at, bytes))) {
2478 status = CL_EREAD;
2479 goto done;
2480 }
2481 at += bytes;
2482 decoded = cli_utf16toascii(buff, bytes);
2483 if (decoded) {
2484 if (write(fd, decoded, bytes / 2) == -1) {
2485 cli_errmsg("cli_scanhtml_utf16: Can't write to file %s\n", tempname);
2486 status = CL_EWRITE;
2487 goto done;
2488 }
2489 free(decoded);
2490 decoded = NULL;
2491 }
2492 }
2493
2494 new_map = fmap(fd, 0, 0, NULL);
2495 if (NULL == new_map) {
2496 cli_errmsg("cli_scanhtml_utf16: failed to create fmap for ascii HTML file decoded from utf16: %s\n.", tempname);
2497 status = CL_EMEM;
2498 goto done;
2499 }
2500
2501 ctx->next_layer_is_normalized = true; // s/normalized/transcoded, practically the same thing.
2502
2503 status = cli_recursion_stack_push(ctx, new_map, CL_TYPE_HTML, true); /* Perform exp_eval with child fmap */
2504 if (CL_SUCCESS != status) {
2505 cli_dbgmsg("Failed to scan fmap.\n");
2506 goto done;
2507 }
2508
2509 status = cli_scanhtml(ctx);
2510
2511 (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
2512
2513 status = CL_SUCCESS;
2514
2515 done:
2516 if (NULL != new_map) {
2517 funmap(new_map);
2518 }
2519 if (-1 != fd) {
2520 close(fd);
2521 }
2522
2523 if (NULL != decoded) {
2524 free(decoded);
2525 }
2526
2527 if (NULL != tempname) {
2528 if (!ctx->engine->keeptmp) {
2529 (void)cli_unlink(tempname);
2530 } else {
2531 cli_dbgmsg("cli_scanhtml_utf16: Decoded HTML data saved in %s\n", tempname);
2532 }
2533
2534 free(tempname);
2535 }
2536
2537 return status;
2538 }
2539
2540 static cl_error_t cli_scanole2(cli_ctx *ctx)
2541 {
2542 char *dir = NULL;
2543 cl_error_t ret = CL_CLEAN;
2544 struct uniq *files = NULL;
2545 int has_vba = 0, has_xlm = 0, has_macros = 0, viruses_found = 0;
2546
2547 cli_dbgmsg("in cli_scanole2()\n");
2548
2549 /* generate the temporary directory */
2550 if (NULL == (dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "ole2-tmp"))) {
2551 ret = CL_EMEM;
2552 goto done;
2553 }
2554
2555 if (mkdir(dir, 0700)) {
2556 cli_dbgmsg("OLE2: Can't create temporary directory %s\n", dir);
2557 free(dir);
2558 dir = NULL;
2559 ret = CL_ETMPDIR;
2560 goto done;
2561 }
2562
2563 ret = cli_ole2_extract(dir, ctx, &files, &has_vba, &has_xlm);
2564 if (ret != CL_CLEAN && ret != CL_VIRUS) {
2565 cli_dbgmsg("OLE2: %s\n", cl_strerror(ret));
2566 goto done;
2567 }
2568 if (CL_VIRUS == ret) {
2569 viruses_found++;
2570 if (!SCAN_ALLMATCHES) {
2571 goto done;
2572 }
2573 }
2574
2575 if (has_vba && files) {
2576 ret = cli_vba_scandir(dir, ctx, files, &has_macros);
2577 if (CL_VIRUS == ret) {
2578 viruses_found++;
2579 if (!SCAN_ALLMATCHES) {
2580 goto done;
2581 }
2582 }
2583
2584 ret = cli_vba_scandir_new(dir, ctx, files, &has_macros);
2585 if (CL_VIRUS == ret) {
2586 viruses_found++;
2587 if (!SCAN_ALLMATCHES) {
2588 goto done;
2589 }
2590 }
2591 }
2592
2593 if (CL_VIRUS == ret) {
2594 viruses_found++;
2595 if (!SCAN_ALLMATCHES) {
2596 goto done;
2597 }
2598 }
2599
2600 if (has_xlm && files) {
2601 ret = cli_xlm_scandir(dir, ctx, files);
2602 if (CL_VIRUS == ret) {
2603 if (!SCAN_ALLMATCHES) {
2604 goto done;
2605 }
2606 }
2607 }
2608
2609 if ((has_xlm || has_vba) && files) {
2610 if (CL_VIRUS == cli_magic_scan_dir(dir, ctx)) {
2611 if (!SCAN_ALLMATCHES) {
2612 goto done;
2613 }
2614 }
2615 }
2616
2617 done:
2618 if (files) {
2619 uniq_free(files);
2620 }
2621
2622 if (NULL != dir) {
2623 if (!ctx->engine->keeptmp)
2624 cli_rmdirs(dir);
2625 free(dir);
2626 }
2627
2628 if (viruses_found > 0) {
2629 ret = CL_VIRUS;
2630 }
2631
2632 return ret;
2633 }
2634
2635 static cl_error_t cli_scantar(cli_ctx *ctx, unsigned int posix)
2636 {
2637 char *dir;
2638 cl_error_t ret = CL_CLEAN;
2639
2640 cli_dbgmsg("in cli_scantar()\n");
2641
2642 /* generate temporary directory */
2643 if (!(dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "tar-tmp")))
2644 return CL_EMEM;
2645
2646 if (mkdir(dir, 0700)) {
2647 cli_errmsg("Tar: Can't create temporary directory %s\n", dir);
2648 free(dir);
2649 return CL_ETMPDIR;
2650 }
2651
2652 ret = cli_untar(dir, posix, ctx);
2653
2654 if (!ctx->engine->keeptmp)
2655 cli_rmdirs(dir);
2656
2657 free(dir);
2658 return ret;
2659 }
2660
2661 static cl_error_t cli_scanscrenc(cli_ctx *ctx)
2662 {
2663 char *tempname;
2664 cl_error_t ret = CL_CLEAN;
2665
2666 cli_dbgmsg("in cli_scanscrenc()\n");
2667
2668 if (!(tempname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "screnc-tmp")))
2669 return CL_EMEM;
2670
2671 if (mkdir(tempname, 0700)) {
2672 cli_dbgmsg("CHM: Can't create temporary directory %s\n", tempname);
2673 free(tempname);
2674 return CL_ETMPDIR;
2675 }
2676
2677 if (html_screnc_decode(ctx->fmap, tempname))
2678 ret = cli_magic_scan_dir(tempname, ctx);
2679
2680 if (!ctx->engine->keeptmp)
2681 cli_rmdirs(tempname);
2682
2683 free(tempname);
2684 return ret;
2685 }
2686
2687 static cl_error_t cli_scanriff(cli_ctx *ctx)
2688 {
2689 cl_error_t ret = CL_CLEAN;
2690
2691 if (cli_check_riff_exploit(ctx) == 2)
2692 ret = cli_append_virus(ctx, "Heuristics.Exploit.W32.MS05-002");
2693
2694 return ret;
2695 }
2696
2697 static cl_error_t cli_scancryptff(cli_ctx *ctx)
2698 {
2699 cl_error_t ret = CL_CLEAN, ndesc;
2700 unsigned int i;
2701 const unsigned char *src;
2702 unsigned char *dest = NULL;
2703 char *tempfile;
2704 size_t pos;
2705 size_t bread;
2706
2707 /* Skip the CryptFF file header */
2708 pos = 0x10;
2709
2710 if ((dest = (unsigned char *)cli_malloc(FILEBUFF)) == NULL) {
2711 cli_dbgmsg("CryptFF: Can't allocate memory\n");
2712 return CL_EMEM;
2713 }
2714
2715 if (!(tempfile = cli_gentemp_with_prefix(ctx->sub_tmpdir, "cryptff"))) {
2716 free(dest);
2717 return CL_EMEM;
2718 }
2719
2720 if ((ndesc = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
2721 cli_errmsg("CryptFF: Can't create file %s\n", tempfile);
2722 free(dest);
2723 free(tempfile);
2724 return CL_ECREAT;
2725 }
2726
2727 for (; (src = fmap_need_off_once_len(ctx->fmap, pos, FILEBUFF, &bread)) && bread; pos += bread) {
2728 for (i = 0; i < bread; i++)
2729 dest[i] = src[i] ^ (unsigned char)0xff;
2730 if (cli_writen(ndesc, dest, bread) == (size_t)-1) {
2731 cli_dbgmsg("CryptFF: Can't write to descriptor %d\n", ndesc);
2732 free(dest);
2733 close(ndesc);
2734 free(tempfile);
2735 return CL_EWRITE;
2736 }
2737 }
2738
2739 free(dest);
2740
2741 cli_dbgmsg("CryptFF: Scanning decrypted data\n");
2742
2743 if ((ret = cli_magic_scan_desc(ndesc, tempfile, ctx, NULL)) == CL_VIRUS)
2744 cli_dbgmsg("CryptFF: Infected with %s\n", cli_get_last_virus(ctx));
2745
2746 close(ndesc);
2747
2748 if (ctx->engine->keeptmp)
2749 cli_dbgmsg("CryptFF: Decompressed data saved in %s\n", tempfile);
2750 else if (cli_unlink(tempfile))
2751 ret = CL_EUNLINK;
2752
2753 free(tempfile);
2754 return ret;
2755 }
2756
2757 static cl_error_t cli_scanpdf(cli_ctx *ctx, off_t offset)
2758 {
2759 cl_error_t ret;
2760 char *dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "pdf-tmp");
2761
2762 if (!dir)
2763 return CL_EMEM;
2764
2765 if (mkdir(dir, 0700)) {
2766 cli_dbgmsg("Can't create temporary directory for PDF file %s\n", dir);
2767 free(dir);
2768 return CL_ETMPDIR;
2769 }
2770
2771 ret = cli_pdf(dir, ctx, offset);
2772
2773 if (!ctx->engine->keeptmp)
2774 cli_rmdirs(dir);
2775
2776 free(dir);
2777 return ret;
2778 }
2779
2780 static cl_error_t cli_scantnef(cli_ctx *ctx)
2781 {
2782 cl_error_t ret;
2783 char *dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "tnef-tmp");
2784
2785 if (!dir)
2786 return CL_EMEM;
2787
2788 if (mkdir(dir, 0700)) {
2789 cli_dbgmsg("Can't create temporary directory for tnef file %s\n", dir);
2790 free(dir);
2791 return CL_ETMPDIR;
2792 }
2793
2794 ret = cli_tnef(dir, ctx);
2795
2796 if (ret == CL_CLEAN)
2797 ret = cli_magic_scan_dir(dir, ctx);
2798
2799 if (!ctx->engine->keeptmp)
2800 cli_rmdirs(dir);
2801
2802 free(dir);
2803 return ret;
2804 }
2805
2806 static cl_error_t cli_scanuuencoded(cli_ctx *ctx)
2807 {
2808 cl_error_t ret;
2809 char *dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "uuencoded-tmp");
2810
2811 if (!dir)
2812 return CL_EMEM;
2813
2814 if (mkdir(dir, 0700)) {
2815 cli_dbgmsg("Can't create temporary directory for uuencoded file %s\n", dir);
2816 free(dir);
2817 return CL_ETMPDIR;
2818 }
2819
2820 ret = cli_uuencode(dir, ctx->fmap);
2821
2822 if (ret == CL_CLEAN)
2823 ret = cli_magic_scan_dir(dir, ctx);
2824
2825 if (!ctx->engine->keeptmp)
2826 cli_rmdirs(dir);
2827
2828 free(dir);
2829 return ret;
2830 }
2831
2832 static cl_error_t cli_scanmail(cli_ctx *ctx)
2833 {
2834 char *dir;
2835 cl_error_t ret;
2836 unsigned int viruses_found = 0;
2837
2838 cli_dbgmsg("Starting cli_scanmail()\n");
2839
2840 /* generate the temporary directory */
2841 if (!(dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "mail-tmp")))
2842 return CL_EMEM;
2843
2844 if (mkdir(dir, 0700)) {
2845 cli_dbgmsg("Mail: Can't create temporary directory %s\n", dir);
2846 free(dir);
2847 return CL_ETMPDIR;
2848 }
2849
2850 /*
2851 * Extract the attachments into the temporary directory
2852 */
2853 if ((ret = cli_mbox(dir, ctx))) {
2854 if (ret == CL_VIRUS && SCAN_ALLMATCHES)
2855 viruses_found++;
2856 else {
2857 if (!ctx->engine->keeptmp)
2858 cli_rmdirs(dir);
2859 free(dir);
2860 return ret;
2861 }
2862 }
2863
2864 ret = cli_magic_scan_dir(dir, ctx);
2865
2866 if (!ctx->engine->keeptmp)
2867 cli_rmdirs(dir);
2868
2869 free(dir);
2870 if (viruses_found)
2871 return CL_VIRUS;
2872 return ret;
2873 }
2874
2875 static cl_error_t cli_scan_structured(cli_ctx *ctx)
2876 {
2877 char buf[8192];
2878 size_t result = 0;
2879 unsigned int cc_count = 0;
2880 unsigned int ssn_count = 0;
2881 int done = 0;
2882 fmap_t *map;
2883 size_t pos = 0;
2884 int (*ccfunc)(const unsigned char *buffer, size_t length, int cc_only);
2885 int (*ssnfunc)(const unsigned char *buffer, size_t length);
2886 unsigned int viruses_found = 0;
2887
2888 if (ctx == NULL)
2889 return CL_ENULLARG;
2890
2891 map = ctx->fmap;
2892
2893 if (ctx->engine->min_cc_count == 1)
2894 ccfunc = dlp_has_cc;
2895 else
2896 ccfunc = dlp_get_cc_count;
2897
2898 switch (SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL | SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED) {
2899 case (CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL | CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED):
2900 if (ctx->engine->min_ssn_count == 1)
2901 ssnfunc = dlp_has_ssn;
2902 else
2903 ssnfunc = dlp_get_ssn_count;
2904 break;
2905
2906 case CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL:
2907 if (ctx->engine->min_ssn_count == 1)
2908 ssnfunc = dlp_has_normal_ssn;
2909 else
2910 ssnfunc = dlp_get_normal_ssn_count;
2911 break;
2912
2913 case CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED:
2914 if (ctx->engine->min_ssn_count == 1)
2915 ssnfunc = dlp_has_stripped_ssn;
2916 else
2917 ssnfunc = dlp_get_stripped_ssn_count;
2918 break;
2919
2920 default:
2921 ssnfunc = NULL;
2922 }
2923
2924 while (!done && ((result = fmap_readn(map, buf, pos, 8191)) > 0) && (result != (size_t)-1)) {
2925 pos += result;
2926 if ((cc_count += ccfunc((const unsigned char *)buf, result,
2927 (ctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_CC) ? 1 : 0)) >= ctx->engine->min_cc_count) {
2928 done = 1;
2929 }
2930
2931 if (ssnfunc && ((ssn_count += ssnfunc((const unsigned char *)buf, result)) >= ctx->engine->min_ssn_count)) {
2932 done = 1;
2933 }
2934 }
2935
2936 if (cc_count != 0 && cc_count >= ctx->engine->min_cc_count) {
2937 cli_dbgmsg("cli_scan_structured: %u credit card numbers detected\n", cc_count);
2938 if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Structured.CreditCardNumber")) {
2939 if (SCAN_ALLMATCHES) {
2940 viruses_found++;
2941 } else {
2942 return CL_VIRUS;
2943 }
2944 }
2945 }
2946
2947 if (ssn_count != 0 && ssn_count >= ctx->engine->min_ssn_count) {
2948 cli_dbgmsg("cli_scan_structured: %u social security numbers detected\n", ssn_count);
2949 if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Structured.SSN")) {
2950 if (SCAN_ALLMATCHES) {
2951 viruses_found++;
2952 } else {
2953 return CL_VIRUS;
2954 }
2955 }
2956 }
2957
2958 if (viruses_found)
2959 return CL_VIRUS;
2960 return CL_CLEAN;
2961 }
2962
2963 static cl_error_t cli_scanembpe(cli_ctx *ctx, off_t offset)
2964 {
2965 cl_error_t ret = CL_CLEAN;
2966 int fd;
2967 size_t bytes;
2968 size_t size = 0;
2969 size_t todo;
2970 const char *buff;
2971 char *tmpname;
2972 fmap_t *map = ctx->fmap;
2973 unsigned int corrupted_input;
2974
2975 tmpname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "embedded-pe");
2976 if (!tmpname)
2977 return CL_EMEM;
2978
2979 if ((fd = open(tmpname, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
2980 cli_errmsg("cli_scanembpe: Can't create file %s\n", tmpname);
2981 free(tmpname);
2982 return CL_ECREAT;
2983 }
2984
2985 todo = map->len - offset;
2986 while (1) {
2987 bytes = MIN(todo, map->pgsz);
2988 if (!bytes)
2989 break;
2990
2991 if (!(buff = fmap_need_off_once(map, offset + size, bytes))) {
2992 close(fd);
2993 if (!ctx->engine->keeptmp) {
2994 if (cli_unlink(tmpname)) {
2995 free(tmpname);
2996 return CL_EUNLINK;
2997 }
2998 }
2999 free(tmpname);
3000 return CL_EREAD;
3001 }
3002 size += bytes;
3003 todo -= bytes;
3004
3005 if (cli_checklimits("cli_scanembpe", ctx, size, 0, 0) != CL_CLEAN)
3006 break;
3007
3008 if (cli_writen(fd, buff, bytes) != bytes) {
3009 cli_dbgmsg("cli_scanembpe: Can't write to temporary file\n");
3010 close(fd);
3011 if (!ctx->engine->keeptmp) {
3012 if (cli_unlink(tmpname)) {
3013 free(tmpname);
3014 return CL_EUNLINK;
3015 }
3016 }
3017 free(tmpname);
3018 return CL_EWRITE;
3019 }
3020 }
3021
3022 corrupted_input = ctx->corrupted_input;
3023 ctx->corrupted_input = 1;
3024 ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL);
3025 ctx->corrupted_input = corrupted_input;
3026 if (ret == CL_VIRUS) {
3027 cli_dbgmsg("cli_scanembpe: Infected with %s\n", cli_get_last_virus(ctx));
3028 close(fd);
3029 if (!ctx->engine->keeptmp) {
3030 if (cli_unlink(tmpname)) {
3031 free(tmpname);
3032 return CL_EUNLINK;
3033 }
3034 }
3035 free(tmpname);
3036 return CL_VIRUS;
3037 }
3038
3039 close(fd);
3040 if (!ctx->engine->keeptmp) {
3041 if (cli_unlink(tmpname)) {
3042 free(tmpname);
3043 return CL_EUNLINK;
3044 }
3045 }
3046 free(tmpname);
3047
3048 /* intentionally ignore possible errors from cli_magic_scan_desc */
3049 return CL_CLEAN;
3050 }
3051
3052 #if defined(_WIN32) || defined(C_LINUX) || defined(C_DARWIN)
3053 #define PERF_MEASURE
3054 #endif
3055
3056 #ifdef PERF_MEASURE
3057
3058 static struct
3059 {
3060 enum perfev id;
3061 const char *name;
3062 enum ev_type type;
3063 } perf_events[] = {
3064 {PERFT_SCAN, "full scan", ev_time},
3065 {PERFT_PRECB, "prescan cb", ev_time},
3066 {PERFT_POSTCB, "postscan cb", ev_time},
3067 {PERFT_CACHE, "cache", ev_time},
3068 {PERFT_FT, "filetype", ev_time},
3069 {PERFT_CONTAINER, "container", ev_time},
3070 {PERFT_SCRIPT, "script", ev_time},
3071 {PERFT_PE, "pe", ev_time},
3072 {PERFT_RAW, "raw", ev_time},
3073 {PERFT_RAWTYPENO, "raw container", ev_time},
3074 {PERFT_MAP, "map", ev_time},
3075 {PERFT_BYTECODE, "bytecode", ev_time},
3076 {PERFT_KTIME, "kernel", ev_int},
3077 {PERFT_UTIME, "user", ev_int}};
3078
3079 static void get_thread_times(uint64_t *kt, uint64_t *ut)
3080 {
3081 #ifdef _WIN32
3082 FILETIME c, e, k, u;
3083 ULARGE_INTEGER kl, ul;
3084 if (!GetThreadTimes(GetCurrentThread(), &c, &e, &k, &u)) {
3085 *kt = *ut = 0;
3086 return;
3087 }
3088 kl.LowPart = k.dwLowDateTime;
3089 kl.HighPart = k.dwHighDateTime;
3090 ul.LowPart = u.dwLowDateTime;
3091 ul.HighPart = u.dwHighDateTime;
3092 *kt = kl.QuadPart / 10;
3093 *ut = ul.QuadPart / 10;
3094 #else
3095 struct tms tbuf;
3096 if (times(&tbuf) != ((clock_t)-1)) {
3097 clock_t tck = sysconf(_SC_CLK_TCK);
3098 *kt = ((uint64_t)1000000) * tbuf.tms_stime / tck;
3099 *ut = ((uint64_t)1000000) * tbuf.tms_utime / tck;
3100 } else {
3101 *kt = *ut = 0;
3102 }
3103 #endif
3104 }
3105
3106 static inline void perf_init(cli_ctx *ctx)
3107 {
3108 uint64_t kt, ut;
3109 unsigned i;
3110
3111 if (!SCAN_DEV_COLLECT_PERF_INFO)
3112 return;
3113
3114 ctx->perf = cli_events_new(PERFT_LAST);
3115 for (i = 0; i < sizeof(perf_events) / sizeof(perf_events[0]); i++) {
3116 if (cli_event_define(ctx->perf, perf_events[i].id, perf_events[i].name,
3117 perf_events[i].type, multiple_sum) == -1)
3118 continue;
3119 }
3120 cli_event_time_start(ctx->perf, PERFT_SCAN);
3121 get_thread_times(&kt, &ut);
3122 cli_event_int(ctx->perf, PERFT_KTIME, -kt);
3123 cli_event_int(ctx->perf, PERFT_UTIME, -ut);
3124 }
3125
3126 static inline void perf_done(cli_ctx *ctx)
3127 {
3128 char timestr[512];
3129 char *p;
3130 unsigned i;
3131 uint64_t kt, ut;
3132 char *pend;
3133 cli_events_t *perf = ctx->perf;
3134
3135 if (!perf)
3136 return;
3137
3138 p = timestr;
3139 pend = timestr + sizeof(timestr) - 1;
3140 *pend = 0;
3141
3142 cli_event_time_stop(perf, PERFT_SCAN);
3143 get_thread_times(&kt, &ut);
3144 cli_event_int(perf, PERFT_KTIME, kt);
3145 cli_event_int(perf, PERFT_UTIME, ut);
3146
3147 for (i = 0; i < sizeof(perf_events) / sizeof(perf_events[0]); i++) {
3148 union ev_val val;
3149 unsigned count;
3150
3151 cli_event_get(perf, perf_events[i].id, &val, &count);
3152 if (p < pend)
3153 p += snprintf(p, pend - p, "%s: %d.%03ums, ", perf_events[i].name,
3154 (signed)(val.v_int / 1000),
3155 (unsigned)(val.v_int % 1000));
3156 }
3157 *p = 0;
3158 cli_infomsg(ctx, "performance: %s\n", timestr);
3159
3160 cli_events_free(perf);
3161 ctx->perf = NULL;
3162 }
3163
3164 static inline void perf_start(cli_ctx *ctx, int id)
3165 {
3166 cli_event_time_start(ctx->perf, id);
3167 }
3168
3169 static inline void perf_stop(cli_ctx *ctx, int id)
3170 {
3171 cli_event_time_stop(ctx->perf, id);
3172 }
3173
3174 static inline void perf_nested_start(cli_ctx *ctx, int id, int nestedid)
3175 {
3176 cli_event_time_nested_start(ctx->perf, id, nestedid);
3177 }
3178
3179 static inline void perf_nested_stop(cli_ctx *ctx, int id, int nestedid)
3180 {
3181 cli_event_time_nested_stop(ctx->perf, id, nestedid);
3182 }
3183
3184 #else
3185 static inline void perf_init(cli_ctx *ctx)
3186 {
3187 UNUSEDPARAM(ctx);
3188 }
3189 static inline void perf_start(cli_ctx *ctx, int id)
3190 {
3191 UNUSEDPARAM(ctx);
3192 UNUSEDPARAM(id);
3193 }
3194 static inline void perf_stop(cli_ctx *ctx, int id)
3195 {
3196 UNUSEDPARAM(ctx);
3197 UNUSEDPARAM(id);
3198 }
3199 static inline void perf_nested_start(cli_ctx *ctx, int id, int nestedid)
3200 {
3201 UNUSEDPARAM(ctx);
3202 UNUSEDPARAM(id);
3203 UNUSEDPARAM(nestedid);
3204 }
3205 static inline void perf_nested_stop(cli_ctx *ctx, int id, int nestedid)
3206 {
3207 UNUSEDPARAM(ctx);
3208 UNUSEDPARAM(id);
3209 UNUSEDPARAM(nestedid);
3210 }
3211 static inline void perf_done(cli_ctx *ctx)
3212 {
3213 UNUSEDPARAM(ctx);
3214 }
3215 #endif
3216
3217 /**
3218 * @brief Perform raw scan of current fmap.
3219 *
3220 * @param ctx Current scan context.
3221 * @param type File type
3222 * @param typercg Enable type recognition (file typing scan results).
3223 * If 0, will be a regular ac-mode scan.
3224 * @param dettype [out] If typercg enabled and scan detects HTML or MAIL types,
3225 * will output HTML or MAIL types after performing HTML/MAIL scans
3226 * @param refhash Hash of current fmap
3227 * @return cl_error_t
3228 */
3229 static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_t *dettype, unsigned char *refhash)
3230 {
3231 cl_error_t ret = CL_CLEAN, nret = CL_CLEAN;
3232 struct cli_matched_type *ftoffset = NULL, *fpt;
3233 struct cli_exe_info peinfo;
3234 unsigned int acmode = AC_SCAN_VIR, break_loop = 0;
3235
3236 #if HAVE_JSON
3237 struct json_object *parent_property = NULL;
3238 #else
3239 void *parent_property = NULL;
3240 #endif
3241
3242 if ((typercg) &&
3243 // We should also omit bzips, but DMG's may be detected in bzips. (type != CL_TYPE_BZ) && /* Omit BZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan! */
3244 (type != CL_TYPE_GZ) && /* Omit GZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan! */
3245 (type != CL_TYPE_CPIO_OLD) && /* Omit CPIO_OLD files because it's an image format that we can extract and scan manually. */
3246 (type != CL_TYPE_ZIP) && /* Omit ZIP files because it'll detect each zip file entry as SFXZIP, which is a waste. We'll extract it and then scan. */
3247 (type != CL_TYPE_ZIPSFX) && /* Omit SFX archive types from being checked for embedded content. They should only be parsed for contained files. Those contained files could be EXE's with more SFX, but that's the nature of containers. */
3248 (type != CL_TYPE_ARJSFX) && /* " */
3249 (type != CL_TYPE_RARSFX) && /* " */
3250 (type != CL_TYPE_EGGSFX) && /* " */
3251 (type != CL_TYPE_CABSFX) && /* " */
3252 (type != CL_TYPE_7ZSFX) && /* " */
3253 (type != CL_TYPE_OLD_TAR) && /* Omit OLD TAR files because it's a raw archive format that we can extract and scan manually. */
3254 (type != CL_TYPE_POSIX_TAR)) { /* Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually. */
3255 /*
3256 * Enable file type recognition scan mode if requested, except for some some problematic types (above).
3257 */
3258 acmode |= AC_SCAN_FT;
3259 }
3260
3261 perf_start(ctx, PERFT_RAW);
3262 ret = cli_scan_fmap(ctx, type == CL_TYPE_TEXT_ASCII ? CL_TYPE_ANY : type, 0, &ftoffset, acmode, NULL, refhash);
3263 perf_stop(ctx, PERFT_RAW);
3264
3265 // I think this (CL_TYPENO business) causes embedded file extraction to stop when a
3266 // signature has matched in cli_scan_fmap, which wouldn't be what
3267 // we want if allmatch is specified.
3268 //
3269 // TODO: find a way to return type matches separately from malware matches
3270 if (ret >= CL_TYPENO) {
3271 perf_nested_start(ctx, PERFT_RAWTYPENO, PERFT_SCAN);
3272 fpt = ftoffset;
3273
3274 while (fpt) {
3275 if (fpt->offset > 0) {
3276 bool type_has_been_handled = true;
3277
3278 #if HAVE_JSON
3279 if (SCAN_COLLECT_METADATA && ctx->wrkproperty) {
3280 json_object *arrobj;
3281
3282 parent_property = ctx->wrkproperty;
3283 if (!json_object_object_get_ex(parent_property, "EmbeddedObjects", &arrobj)) {
3284 arrobj = json_object_new_array();
3285 if (NULL == arrobj) {
3286 cli_errmsg("scanraw: no memory for json properties object\n");
3287 nret = CL_EMEM;
3288 break;
3289 }
3290 json_object_object_add(parent_property, "EmbeddedObjects", arrobj);
3291 }
3292 ctx->wrkproperty = json_object_new_object();
3293 if (NULL == ctx->wrkproperty) {
3294 cli_errmsg("scanraw: no memory for json properties object\n");
3295 nret = CL_EMEM;
3296 break;
3297 }
3298 json_object_array_add(arrobj, ctx->wrkproperty);
3299
3300 ret = cli_jsonstr(ctx->wrkproperty, "FileType", cli_ftname(fpt->type));
3301 if (ret != CL_SUCCESS) {
3302 cli_errmsg("scanraw: failed to add string to json object\n");
3303 nret = CL_EMEM;
3304 break;
3305 }
3306
3307 ret = cli_jsonint64(ctx->wrkproperty, "Offset", (int64_t)fpt->offset);
3308 if (ret != CL_SUCCESS) {
3309 cli_errmsg("scanraw: failed to add int to json object\n");
3310 nret = CL_EMEM;
3311 break;
3312 }
3313 }
3314 #endif
3315 /*
3316 * First, use "embedded type recognition" to identify a file's actual type.
3317 * (a.k.a. not embedded files, but file type detection corrections)
3318 *
3319 * Do this at all fmap layers. Though we should only reassign the types
3320 * if the current type makes sense for the reassignment.
3321 */
3322 switch (fpt->type) {
3323 case CL_TYPE_MHTML:
3324 if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX)) {
3325 if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3326 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3327 // HTML files may contain special characters and could be
3328 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3329
3330 // Reassign type of current layer based on what we discovered
3331 cli_recursion_stack_change_type(ctx, fpt->type);
3332
3333 cli_dbgmsg("MHTML signature found at %u\n", (unsigned int)fpt->offset);
3334 nret = ret = cli_scanmail(ctx);
3335 }
3336 }
3337 break;
3338
3339 case CL_TYPE_XDP:
3340 if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) {
3341 if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3342 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3343 // XML files may contain special characters and could be
3344 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3345
3346 // Reassign type of current layer based on what we discovered
3347 cli_recursion_stack_change_type(ctx, fpt->type);
3348
3349 cli_dbgmsg("XDP signature found at %u\n", (unsigned int)fpt->offset);
3350 nret = ret = cli_scanxdp(ctx);
3351 }
3352 }
3353 break;
3354
3355 case CL_TYPE_XML_WORD:
3356 if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
3357 if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3358 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3359 // XML files may contain special characters and could be
3360 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3361
3362 // Reassign type of current layer based on what we discovered
3363 cli_recursion_stack_change_type(ctx, fpt->type);
3364
3365 cli_dbgmsg("XML-WORD signature found at %u\n", (unsigned int)fpt->offset);
3366 nret = ret = cli_scanmsxml(ctx);
3367 }
3368 }
3369 break;
3370 case CL_TYPE_XML_XL:
3371 if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
3372 if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3373 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3374 // XML files may contain special characters and could be
3375 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3376
3377 // Reassign type of current layer based on what we discovered
3378 cli_recursion_stack_change_type(ctx, fpt->type);
3379
3380 cli_dbgmsg("XML-XL signature found at %u\n", (unsigned int)fpt->offset);
3381 nret = ret = cli_scanmsxml(ctx);
3382 }
3383 }
3384 break;
3385 case CL_TYPE_XML_HWP:
3386 if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP)) {
3387 if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3388 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3389 // XML files may contain special characters and could be
3390 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3391
3392 // Reassign type of current layer based on what we discovered
3393 cli_recursion_stack_change_type(ctx, fpt->type);
3394
3395 cli_dbgmsg("XML-HWP signature found at %u\n", (unsigned int)fpt->offset);
3396 nret = ret = cli_scanhwpml(ctx);
3397 }
3398 }
3399 break;
3400
3401 case CL_TYPE_DMG:
3402 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_DMG)) {
3403 // TODO: determine all types that DMG may start with
3404 // if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_BZIP2) || ...))
3405 {
3406 // Reassign type of current layer based on what we discovered
3407 cli_recursion_stack_change_type(ctx, fpt->type);
3408
3409 cli_dbgmsg("DMG signature found at %u\n", (unsigned int)fpt->offset);
3410 nret = cli_scandmg(ctx);
3411 }
3412 }
3413 break;
3414
3415 case CL_TYPE_ISO9660:
3416 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ISO9660)) {
3417 // TODO: determine all types that ISO9660 may start with
3418 // if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_ANY) || ...))
3419 {
3420 // Reassign type of current layer based on what we discovered
3421 cli_recursion_stack_change_type(ctx, fpt->type);
3422
3423 cli_dbgmsg("DMG signature found at %u\n", (unsigned int)fpt->offset);
3424 nret = cli_scaniso(ctx, fpt->offset);
3425 }
3426 }
3427 break;
3428
3429 case CL_TYPE_MBR:
3430 if (SCAN_PARSE_ARCHIVE) {
3431 // TODO: determine all types that GPT or MBR may start with
3432 // if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_???) || ...))
3433 {
3434 // First check if actually a GPT, not MBR.
3435 int iret = cli_mbr_check2(ctx, 0);
3436
3437 if ((iret == CL_TYPE_GPT) && (DCONF_ARCH & ARCH_CONF_GPT)) {
3438 // Reassign type of current layer based on what we discovered
3439 cli_recursion_stack_change_type(ctx, CL_TYPE_GPT);
3440
3441 cli_dbgmsg("Recognized GUID Partition Table file\n");
3442 cli_dbgmsg("GPT signature found at %u\n", (unsigned int)fpt->offset);
3443 nret = cli_scangpt(ctx, 0);
3444 } else if ((iret == CL_CLEAN) && (DCONF_ARCH & ARCH_CONF_MBR)) {
3445 // Reassign type of current layer based on what we discovered
3446 cli_recursion_stack_change_type(ctx, CL_TYPE_MBR);
3447
3448 cli_dbgmsg("MBR signature found at %u\n", (unsigned int)fpt->offset);
3449 nret = cli_scanmbr(ctx, 0);
3450 }
3451 }
3452 }
3453 break;
3454
3455 default:
3456 type_has_been_handled = false;
3457 }
3458
3459 /*
3460 * Next, check for actual embedded files.
3461 */
3462 if ((ctx->recursion_stack[ctx->recursion_level].recursion_level_buffer_fmap == 0) &&
3463 (false == type_has_been_handled)) {
3464
3465 fmap_t *new_map = NULL;
3466
3467 /*
3468 * Only do this though if we're at the top fmap layer of a buffer.
3469 *
3470 * This restriction will prevent detecting the same embedded content
3471 * more than once when recursing with embedded file type recognition
3472 * deeper within the same buffer.
3473 */
3474 cli_dbgmsg("%s signature found at %u\n", cli_ftname(fpt->type), (unsigned int)fpt->offset);
3475
3476 type_has_been_handled = true;
3477
3478 switch (fpt->type) {
3479 case CL_TYPE_RARSFX:
3480 if (type != CL_TYPE_RAR && have_rar && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) {
3481
3482 /// TODO: This is extremely expensive because it has to hash the fpt->offset -> len!
3483 /// We need to find a way to not hash every time!!!!
3484 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3485 if (NULL == new_map) {
3486 ret = nret = CL_EMEM;
3487 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3488 break;
3489 }
3490
3491 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_RAR, false); /* Perform scan with child fmap */
3492 if (CL_SUCCESS != nret) {
3493 ret = nret;
3494 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3495 break;
3496 }
3497
3498 nret = cli_scanrar(ctx);
3499
3500 (void)cli_recursion_stack_pop(ctx);
3501 }
3502 break;
3503
3504 case CL_TYPE_EGGSFX:
3505 if (type != CL_TYPE_EGG && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG)) {
3506
3507 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3508 if (NULL == new_map) {
3509 ret = nret = CL_EMEM;
3510 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3511 break;
3512 }
3513
3514 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_EGG, false); /* Perform scan with child fmap */
3515 if (CL_SUCCESS != nret) {
3516 ret = nret;
3517 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3518 break;
3519 }
3520
3521 nret = cli_scanegg(ctx);
3522
3523 (void)cli_recursion_stack_pop(ctx);
3524 }
3525 break;
3526
3527 case CL_TYPE_ZIPSFX:
3528 if (type != CL_TYPE_ZIP && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) {
3529
3530 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3531 if (NULL == new_map) {
3532 ret = nret = CL_EMEM;
3533 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3534 break;
3535 }
3536
3537 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_ZIP, false); /* Perform scan with child fmap */
3538 if (CL_SUCCESS != nret) {
3539 ret = nret;
3540 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3541 break;
3542 }
3543
3544 nret = cli_unzip_single(ctx, 0);
3545
3546 (void)cli_recursion_stack_pop(ctx);
3547 }
3548 break;
3549
3550 case CL_TYPE_CABSFX:
3551 if (type != CL_TYPE_MSCAB && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB)) {
3552
3553 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3554 if (NULL == new_map) {
3555 ret = nret = CL_EMEM;
3556 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3557 break;
3558 }
3559
3560 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_MSCAB, false); /* Perform scan with child fmap */
3561 if (CL_SUCCESS != nret) {
3562 ret = nret;
3563 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3564 break;
3565 }
3566
3567 nret = cli_scanmscab(ctx, 0);
3568
3569 (void)cli_recursion_stack_pop(ctx);
3570 }
3571 break;
3572
3573 case CL_TYPE_ARJSFX:
3574 if (type != CL_TYPE_ARJ && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ARJ)) {
3575
3576 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3577 if (NULL == new_map) {
3578 ret = nret = CL_EMEM;
3579 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3580 break;
3581 }
3582
3583 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_ARJ, false); /* Perform scan with child fmap */
3584 if (CL_SUCCESS != nret) {
3585 ret = nret;
3586 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3587 break;
3588 }
3589
3590 nret = cli_scanarj(ctx);
3591
3592 (void)cli_recursion_stack_pop(ctx);
3593 }
3594 break;
3595
3596 case CL_TYPE_7ZSFX:
3597 if (type != CL_TYPE_7Z && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_7Z)) {
3598
3599 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3600 if (NULL == new_map) {
3601 ret = nret = CL_EMEM;
3602 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3603 break;
3604 }
3605
3606 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_7Z, false); /* Perform scan with child fmap */
3607 if (CL_SUCCESS != nret) {
3608 ret = nret;
3609 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3610 break;
3611 }
3612
3613 nret = cli_7unz(ctx, 0);
3614
3615 (void)cli_recursion_stack_pop(ctx);
3616 }
3617 break;
3618
3619 case CL_TYPE_NULSFT:
3620 if (SCAN_PARSE_ARCHIVE && type == CL_TYPE_MSEXE && (DCONF_ARCH & ARCH_CONF_NSIS) && fpt->offset > 4) {
3621 // Note: CL_TYPE_NULSFT is special, because the file actually starts 4 bytes before the start of the signature match
3622 new_map = fmap_duplicate(ctx->fmap, fpt->offset - 4, ctx->fmap->len - (fpt->offset - 4), NULL);
3623 if (NULL == new_map) {
3624 ret = nret = CL_EMEM;
3625 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3626 break;
3627 }
3628
3629 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_NULSFT, false); /* Perform scan with child fmap */
3630 if (CL_SUCCESS != nret) {
3631 ret = nret;
3632 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3633 break;
3634 }
3635
3636 nret = cli_scannulsft(ctx, 0);
3637
3638 (void)cli_recursion_stack_pop(ctx);
3639 }
3640 break;
3641
3642 case CL_TYPE_AUTOIT:
3643 if (SCAN_PARSE_ARCHIVE && type == CL_TYPE_MSEXE && (DCONF_ARCH & ARCH_CONF_AUTOIT)) {
3644
3645 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3646 if (NULL == new_map) {
3647 ret = nret = CL_EMEM;
3648 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3649 break;
3650 }
3651
3652 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_AUTOIT, false); /* Perform scan with child fmap */
3653 if (CL_SUCCESS != nret) {
3654 ret = nret;
3655 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3656 break;
3657 }
3658
3659 nret = cli_scanautoit(ctx, 23);
3660
3661 (void)cli_recursion_stack_pop(ctx);
3662 }
3663 break;
3664
3665 case CL_TYPE_ISHIELD_MSI:
3666 if (SCAN_PARSE_ARCHIVE && type == CL_TYPE_MSEXE && (DCONF_ARCH & ARCH_CONF_ISHIELD)) {
3667
3668 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3669 if (NULL == new_map) {
3670 ret = nret = CL_EMEM;
3671 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3672 break;
3673 }
3674
3675 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_ISHIELD_MSI, false); /* Perform scan with child fmap */
3676 if (CL_SUCCESS != nret) {
3677 ret = nret;
3678 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3679 break;
3680 }
3681
3682 nret = cli_scanishield_msi(ctx, 14);
3683
3684 (void)cli_recursion_stack_pop(ctx);
3685 }
3686 break;
3687
3688 case CL_TYPE_PDF:
3689 if (type != CL_TYPE_PDF && SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) {
3690
3691 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3692 if (NULL == new_map) {
3693 ret = nret = CL_EMEM;
3694 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3695 break;
3696 }
3697
3698 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_PDF, false); /* Perform scan with child fmap */
3699 if (CL_SUCCESS != nret) {
3700 ret = nret;
3701 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3702 break;
3703 }
3704
3705 nret = cli_scanpdf(ctx, 0);
3706
3707 (void)cli_recursion_stack_pop(ctx);
3708 }
3709 break;
3710
3711 case CL_TYPE_MSEXE:
3712 if (SCAN_PARSE_PE && (type == CL_TYPE_MSEXE || type == CL_TYPE_ZIP || type == CL_TYPE_MSOLE2) && ctx->dconf->pe) {
3713
3714 if ((uint64_t)(ctx->fmap->len - fpt->offset) > ctx->engine->maxembeddedpe) {
3715 cli_dbgmsg("scanraw: MaxEmbeddedPE exceeded\n");
3716 break;
3717 }
3718
3719 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3720 if (NULL == new_map) {
3721 ret = nret = CL_EMEM;
3722 cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3723 break;
3724 }
3725
3726 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_MSEXE, false); /* Perform scan with child fmap */
3727 if (CL_SUCCESS != nret) {
3728 ret = nret;
3729 cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3730 break;
3731 }
3732 // IMPORTANT: Must not break or return before cli_recursion_stack_pop!
3733
3734 cli_exe_info_init(&peinfo, 0);
3735
3736 // TODO We could probably substitute in a quicker
3737 // method of determining whether a PE file exists
3738 // at this offset.
3739 if (cli_peheader(ctx->fmap, &peinfo, CLI_PEHEADER_OPT_NONE, NULL) != 0) {
3740 cli_dbgmsg("Header check for MSEXE detection failed, probably not actually an embedded PE file.\n");
3741
3742 /* Despite failing, peinfo memory may have been allocated and must be freed. */
3743 cli_exe_info_destroy(&peinfo);
3744
3745 } else {
3746 cli_dbgmsg("*** Detected embedded PE file at %u ***\n", (unsigned int)fpt->offset);
3747
3748 /* Immediately free up peinfo allocated memory, prior to any recursion */
3749 cli_exe_info_destroy(&peinfo);
3750
3751 nret = cli_scanembpe(ctx, 0);
3752 break_loop = 1; /* we can stop here and other
3753 * embedded executables will
3754 * be found recursively
3755 * through the above call
3756 */
3757
3758 // TODO This method of embedded PE extraction
3759 // is kinda gross in that:
3760 // - if you have an executable that contains
3761 // 20 other exes, the bytes associated with
3762 // the last exe will have been included in
3763 // hash computations and things 20 times
3764 // (as overlay data to the previously
3765 // extracted exes).
3766 // - if you have a signed embedded exe, it
3767 // will fail to validate after extraction
3768 // bc it has overlay data, which is a
3769 // violation of the Authenticode spec.
3770 // - this method of extraction is subject to
3771 // the recursion limit, which is fairly
3772 // low by default (I think 16)
3773 //
3774 // It'd be awesome if we could compute the PE
3775 // size from the PE header and just extract
3776 // that.
3777 }
3778
3779 (void)cli_recursion_stack_pop(ctx);
3780 }
3781 break;
3782
3783 default:
3784 type_has_been_handled = false;
3785 cli_dbgmsg("scanraw: Type %u not handled in fpt loop\n", fpt->type);
3786 }
3787
3788 if (NULL != new_map) {
3789 free_duplicate_fmap(new_map);
3790 }
3791 }
3792 }
3793
3794 if ((nret == CL_VIRUS && !SCAN_ALLMATCHES) ||
3795 (nret == CL_EMEM) ||
3796 (ctx->abort_scan) ||
3797 (break_loop)) {
3798 break;
3799 }
3800
3801 fpt = fpt->next;
3802
3803 #if HAVE_JSON
3804 if (NULL != parent_property) {
3805 ctx->wrkproperty = (struct json_object *)(parent_property);
3806 parent_property = NULL;
3807 }
3808 #endif
3809 }
3810
3811 if (nret != CL_VIRUS) {
3812 /*
3813 * Now run the other file type parsers that may rely on file type
3814 * recognition to determine the actual file type.
3815 */
3816 switch (ret) {
3817 case CL_TYPE_HTML:
3818 /* bb#11196 - autoit script file misclassified as HTML */
3819 if (cli_recursion_stack_get_type(ctx, -2) == CL_TYPE_AUTOIT) {
3820 ret = CL_TYPE_TEXT_ASCII;
3821 } else if (SCAN_PARSE_HTML &&
3822 (type == CL_TYPE_TEXT_ASCII ||
3823 type == CL_TYPE_GIF) && /* Scan GIFs for embedded HTML/Javascript */
3824 (DCONF_DOC & DOC_CONF_HTML)) {
3825 *dettype = CL_TYPE_HTML;
3826 cli_recursion_stack_change_type(ctx, CL_TYPE_HTML);
3827 nret = cli_scanhtml(ctx);
3828 }
3829 break;
3830
3831 case CL_TYPE_MAIL:
3832 if (SCAN_PARSE_MAIL && type == CL_TYPE_TEXT_ASCII && (DCONF_MAIL & MAIL_CONF_MBOX)) {
3833 *dettype = CL_TYPE_MAIL;
3834 cli_recursion_stack_change_type(ctx, CL_TYPE_MAIL);
3835 nret = cli_scanmail(ctx);
3836 }
3837 break;
3838
3839 default:
3840 break;
3841 }
3842 }
3843
3844 perf_nested_stop(ctx, PERFT_RAWTYPENO, PERFT_SCAN);
3845 ret = nret;
3846 }
3847
3848 #if HAVE_JSON
3849 if (NULL != parent_property) {
3850 ctx->wrkproperty = (struct json_object *)(parent_property);
3851 }
3852 #endif
3853
3854 while (ftoffset) {
3855 fpt = ftoffset;
3856 ftoffset = ftoffset->next;
3857 free(fpt);
3858 }
3859
3860 if (ret == CL_VIRUS)
3861 cli_dbgmsg("%s found\n", cli_get_last_virus(ctx));
3862
3863 return ret;
3864 }
3865
3866 void emax_reached(cli_ctx *ctx)
3867 {
3868 int32_t stack_index;
3869
3870 if (NULL == ctx || NULL == ctx->recursion_stack) {
3871 return;
3872 }
3873
3874 stack_index = (int32_t)ctx->recursion_level;
3875
3876 while (stack_index >= 0) {
3877 fmap_t *map = ctx->recursion_stack[stack_index].fmap;
3878
3879 if (NULL != map) {
3880 map->dont_cache_flag = 1;
3881 }
3882
3883 stack_index -= 1;
3884 }
3885
3886 cli_dbgmsg("emax_reached: marked parents as non cacheable\n");
3887 }
3888
3889 #define LINESTR(x) #x
3890 #define LINESTR2(x) LINESTR(x)
3891 #define __AT__ " at line " LINESTR2(__LINE__)
3892
3893 static cl_error_t dispatch_prescan_callback(clcb_pre_scan cb, cli_ctx *ctx, const char *filetype)
3894 {
3895 cl_error_t status = CL_CLEAN;
3896
3897 if (cb) {
3898 perf_start(ctx, PERFT_PRECB);
3899
3900 status = cb(fmap_fd(ctx->fmap), filetype, ctx->cb_ctx);
3901 switch (status) {
3902 case CL_BREAK:
3903 cli_dbgmsg("dispatch_prescan_callback: file whitelisted by callback\n");
3904 perf_stop(ctx, PERFT_PRECB);
3905 status = CL_BREAK;
3906 break;
3907 case CL_VIRUS:
3908 cli_dbgmsg("dispatch_prescan_callback: file blacklisted by callback\n");
3909 cli_append_virus(ctx, "Detected.By.Callback");
3910 perf_stop(ctx, PERFT_PRECB);
3911 status = CL_VIRUS;
3912 break;
3913 case CL_CLEAN:
3914 break;
3915 default:
3916 status = CL_CLEAN;
3917 cli_warnmsg("dispatch_prescan_callback: ignoring bad return code from callback\n");
3918 }
3919
3920 perf_stop(ctx, PERFT_PRECB);
3921 }
3922
3923 return status;
3924 }
3925
3926 cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
3927 {
3928 cl_error_t ret = CL_CLEAN;
3929 cl_error_t res;
3930 cl_error_t cb_retcode;
3931 cli_file_t dettype = 0;
3932 uint8_t typercg = 1;
3933 size_t hashed_size;
3934 unsigned char *hash = NULL;
3935 bitset_t *old_hook_lsig_matches = NULL;
3936 const char *filetype;
3937 int cache_clean = 0;
3938 #if HAVE_JSON
3939 struct json_object *parent_property = NULL;
3940 #else
3941 void *parent_property = NULL;
3942 #endif
3943
3944 char *old_temp_path = NULL;
3945 char *new_temp_path = NULL;
3946
3947 if (!ctx->engine) {
3948 cli_errmsg("CRITICAL: engine == NULL\n");
3949 ret = CL_ENULLARG;
3950 goto early_ret;
3951 }
3952
3953 if (!(ctx->engine->dboptions & CL_DB_COMPILED)) {
3954 cli_errmsg("CRITICAL: engine not compiled\n");
3955 ret = CL_EMALFDB;
3956 goto early_ret;
3957 }
3958
3959 if (ctx->fmap->len <= 5) {
3960 cli_dbgmsg("cli_magic_scandesc: File is too too small (%zu bytes), ignoring.\n", ctx->fmap->len);
3961 ret = CL_CLEAN;
3962 goto early_ret;
3963 }
3964
3965 if (cli_updatelimits(ctx, ctx->fmap->len) != CL_CLEAN) {
3966 emax_reached(ctx);
3967 ret = CL_CLEAN;
3968 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
3969 goto early_ret;
3970 }
3971
3972 if (ctx->engine->keeptmp) {
3973 char *fmap_basename = NULL;
3974 /*
3975 * Keep-temp enabled, so create a sub-directory to provide extraction directory recursion.
3976 */
3977 if ((NULL != ctx->fmap->name) &&
3978 (CL_SUCCESS == cli_basename(ctx->fmap->name, strlen(ctx->fmap->name), &fmap_basename))) {
3979 /*
3980 * The fmap has a name, lets include it in the new sub-directory.
3981 */
3982 new_temp_path = cli_gentemp_with_prefix(ctx->sub_tmpdir, fmap_basename);
3983 free(fmap_basename);
3984 if (NULL == new_temp_path) {
3985 cli_errmsg("cli_magic_scan: Failed to generate temp directory name.\n");
3986 ret = CL_EMEM;
3987 goto early_ret;
3988 }
3989 } else {
3990 /*
3991 * The fmap has no name or we failed to get the basename.
3992 */
3993 new_temp_path = cli_gentemp(ctx->sub_tmpdir);
3994 if (NULL == new_temp_path) {
3995 cli_errmsg("cli_magic_scan: Failed to generate temp directory name.\n");
3996 ret = CL_EMEM;
3997 goto early_ret;
3998 }
3999 }
4000
4001 old_temp_path = ctx->sub_tmpdir;
4002 ctx->sub_tmpdir = new_temp_path;
4003
4004 if (mkdir(ctx->sub_tmpdir, 0700)) {
4005 cli_errmsg("cli_magic_scan: Can't create tmp sub-directory for scan: %s.\n", ctx->sub_tmpdir);
4006 ret = CL_EACCES;
4007 goto early_ret;
4008 }
4009 }
4010
4011 if (type == CL_TYPE_PART_ANY) {
4012 typercg = 0;
4013 }
4014
4015 /*
4016 * Perform file typing from the start of the file.
4017 */
4018 perf_start(ctx, PERFT_FT);
4019 if ((type == CL_TYPE_ANY) || type == CL_TYPE_PART_ANY) {
4020 type = cli_determine_fmap_type(ctx->fmap, ctx->engine, type);
4021 }
4022 perf_stop(ctx, PERFT_FT);
4023 if (type == CL_TYPE_ERROR) {
4024 cli_dbgmsg("cli_magic_scan: cli_determine_fmap_type returned CL_TYPE_ERROR\n");
4025 ret = CL_EREAD;
4026 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4027 goto early_ret;
4028 }
4029 filetype = cli_ftname(type);
4030
4031 /* set current layer to the type we found */
4032 cli_recursion_stack_change_type(ctx, type);
4033
4034 #if HAVE_JSON
4035 if (SCAN_COLLECT_METADATA) {
4036 /*
4037 * Create JSON object to record metadata during the scan.
4038 */
4039 if (NULL == ctx->properties) {
4040 ctx->properties = json_object_new_object();
4041 if (NULL == ctx->properties) {
4042 cli_errmsg("cli_magic_scan: no memory for json properties object\n");
4043 ret = CL_EMEM;
4044 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4045 goto early_ret;
4046 }
4047 ctx->wrkproperty = ctx->properties;
4048
4049 ret = cli_jsonstr(ctx->properties, "Magic", "CLAMJSONv0");
4050 if (ret != CL_SUCCESS) {
4051 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4052 goto early_ret;
4053 }
4054 ret = cli_jsonstr(ctx->properties, "RootFileType", filetype);
4055 if (ret != CL_SUCCESS) {
4056 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4057 goto early_ret;
4058 }
4059
4060 } else {
4061 json_object *arrobj;
4062
4063 parent_property = ctx->wrkproperty;
4064 if (!json_object_object_get_ex(parent_property, "ContainedObjects", &arrobj)) {
4065 arrobj = json_object_new_array();
4066 if (NULL == arrobj) {
4067 cli_errmsg("cli_magic_scan: no memory for json properties object\n");
4068 ret = CL_EMEM;
4069 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4070 goto early_ret;
4071 }
4072 json_object_object_add(parent_property, "ContainedObjects", arrobj);
4073 }
4074 ctx->wrkproperty = json_object_new_object();
4075 if (NULL == ctx->wrkproperty) {
4076 cli_errmsg("cli_magic_scan: no memory for json properties object\n");
4077 ret = CL_EMEM;
4078 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4079 goto early_ret;
4080 }
4081 json_object_array_add(arrobj, ctx->wrkproperty);
4082 }
4083
4084 if (ctx->fmap->name) {
4085 ret = cli_jsonstr(ctx->wrkproperty, "FileName", ctx->fmap->name);
4086 if (ret != CL_SUCCESS) {
4087 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4088 goto early_ret;
4089 }
4090 }
4091 if (ctx->sub_filepath) {
4092 ret = cli_jsonstr(ctx->wrkproperty, "FilePath", ctx->sub_filepath);
4093 if (ret != CL_SUCCESS) {
4094 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4095 goto early_ret;
4096 }
4097 }
4098 ret = cli_jsonstr(ctx->wrkproperty, "FileType", filetype);
4099 if (ret != CL_SUCCESS) {
4100 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4101 goto early_ret;
4102 }
4103 ret = cli_jsonint(ctx->wrkproperty, "FileSize", ctx->fmap->len);
4104 if (ret != CL_SUCCESS) {
4105 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4106 goto early_ret;
4107 }
4108 }
4109 #endif
4110
4111 ret = dispatch_prescan_callback(ctx->engine->cb_pre_cache, ctx, filetype);
4112 if (CL_CLEAN != ret) {
4113 if (ret == CL_VIRUS) {
4114 ret = cli_check_fp(ctx, NULL);
4115 } else {
4116 ret = CL_CLEAN;
4117 }
4118 goto done;
4119 }
4120
4121 /*
4122 * Get the maphash
4123 */
4124 if (CL_SUCCESS != fmap_get_MD5(ctx->fmap, &hash)) {
4125 cli_dbgmsg("cli_magic_scan: Failed to get a hash for the current fmap.\n");
4126 goto done;
4127 }
4128 hashed_size = ctx->fmap->len;
4129
4130 /*
4131 * Check if we've already scanned this file before.
4132 */
4133 perf_start(ctx, PERFT_CACHE);
4134
4135 if (!(SCAN_COLLECT_METADATA))
4136 res = cache_check(hash, ctx);
4137 else
4138 res = CL_VIRUS;
4139
4140 #if HAVE_JSON
4141 if (SCAN_COLLECT_METADATA /* ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA && ctx->wrkproperty != NULL */) {
4142 char hashstr[33];
4143 snprintf(hashstr, 33, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
4144 hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7],
4145 hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15]);
4146
4147 ret = cli_jsonstr(ctx->wrkproperty, "FileMD5", hashstr);
4148 if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE)
4149 memset(hash, 0, 16);
4150 if (ret != CL_SUCCESS) {
4151 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4152 goto early_ret;
4153 }
4154 }
4155 #endif
4156
4157 perf_stop(ctx, PERFT_CACHE);
4158
4159 if (res != CL_VIRUS) {
4160 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4161 goto early_ret;
4162 }
4163
4164 old_hook_lsig_matches = ctx->hook_lsig_matches;
4165 ctx->hook_lsig_matches = NULL;
4166
4167 if (!((ctx->options->general & ~CL_SCAN_GENERAL_ALLMATCHES) || (ctx->options->parse) || (ctx->options->heuristic) || (ctx->options->mail) || (ctx->options->dev))) {
4168 /*
4169 * Scanning in raw mode (stdin, etc.)
4170 */
4171 ret = dispatch_prescan_callback(ctx->engine->cb_pre_scan, ctx, filetype);
4172 if (CL_CLEAN != ret) {
4173 if (ret == CL_VIRUS) {
4174 ret = cli_check_fp(ctx, NULL);
4175 } else if (ret == CL_BREAK) {
4176 ret = CL_CLEAN;
4177 }
4178 goto done;
4179 }
4180
4181 if (CL_VIRUS == (ret = cli_scan_fmap(ctx, CL_TYPE_ANY, 0, NULL, AC_SCAN_VIR, NULL, hash)))
4182 cli_dbgmsg("cli_magic_scan: %s found in descriptor %d\n", cli_get_last_virus(ctx), fmap_fd(ctx->fmap));
4183
4184 goto done;
4185 }
4186
4187 ret = dispatch_prescan_callback(ctx->engine->cb_pre_scan, ctx, filetype);
4188 if (CL_CLEAN != ret) {
4189 if (ret == CL_VIRUS) {
4190 ret = cli_check_fp(ctx, NULL);
4191 } else if (ret == CL_BREAK) {
4192 ret = CL_CLEAN;
4193 }
4194 goto done;
4195 }
4196
4197 #ifdef HAVE__INTERNAL__SHA_COLLECT
4198 if (!ctx->sha_collect && type == CL_TYPE_MSEXE)
4199 ctx->sha_collect = 1;
4200 #endif
4201
4202 // We already saved the hook_lsig_matches (above)
4203 // The ctx one is NULL at present.
4204 ctx->hook_lsig_matches = cli_bitset_init();
4205 if (!ctx->hook_lsig_matches) {
4206 ret = CL_EMEM;
4207 goto done;
4208 }
4209
4210 if (type != CL_TYPE_IGNORED && ctx->engine->sdb) {
4211 /*
4212 * If self protection mechanism enabled, do the scanraw() scan first
4213 * before extracting with a file type parser.
4214 */
4215 ret = scanraw(ctx, type, 0, &dettype, (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) ? NULL : hash);
4216 if (ret == CL_EMEM || ret == CL_VIRUS) {
4217 ret = cli_check_fp(ctx, NULL);
4218 goto done;
4219 }
4220 }
4221
4222 /*
4223 * Run the file type parsers that we normally use before the raw scan.
4224 */
4225 perf_nested_start(ctx, PERFT_CONTAINER, PERFT_SCAN);
4226 switch (type) {
4227 case CL_TYPE_IGNORED:
4228 break;
4229
4230 case CL_TYPE_HWP3:
4231 if (SCAN_PARSE_HWP3 && (DCONF_DOC & DOC_CONF_HWP))
4232 ret = cli_scanhwp3(ctx);
4233 break;
4234
4235 case CL_TYPE_HWPOLE2:
4236 if (SCAN_PARSE_OLE2 && (DCONF_ARCH & ARCH_CONF_OLE2))
4237 ret = cli_scanhwpole2(ctx);
4238 break;
4239
4240 case CL_TYPE_XML_WORD:
4241 if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
4242 ret = cli_scanmsxml(ctx);
4243 break;
4244
4245 case CL_TYPE_XML_XL:
4246 if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
4247 ret = cli_scanmsxml(ctx);
4248 break;
4249
4250 case CL_TYPE_XML_HWP:
4251 if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP))
4252 ret = cli_scanhwpml(ctx);
4253 break;
4254
4255 case CL_TYPE_XDP:
4256 if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF))
4257 ret = cli_scanxdp(ctx);
4258 break;
4259
4260 case CL_TYPE_RAR:
4261 if (have_rar && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR))
4262 ret = cli_scanrar(ctx);
4263 break;
4264
4265 case CL_TYPE_EGG:
4266 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG))
4267 ret = cli_scanegg(ctx);
4268 break;
4269
4270 case CL_TYPE_OOXML_WORD:
4271 case CL_TYPE_OOXML_PPT:
4272 case CL_TYPE_OOXML_XL:
4273 case CL_TYPE_OOXML_HWP:
4274 #if HAVE_JSON
4275 if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_OOXML)) {
4276 if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
4277 ret = cli_process_ooxml(ctx, type);
4278
4279 if (ret == CL_EMEM || ret == CL_ENULLARG) {
4280 /* critical error */
4281 break;
4282 } else if (ret != CL_SUCCESS) {
4283 /*
4284 * non-critical return => allow for the CL_TYPE_ZIP scan to occur
4285 * cli_process_ooxml other possible returns:
4286 * CL_ETIMEOUT, CL_EMAXSIZE, CL_EMAXFILES, CL_EPARSE,
4287 * CL_EFORMAT, CL_BREAK, CL_ESTAT
4288 */
4289 ret = CL_SUCCESS;
4290 }
4291 }
4292 }
4293 #endif
4294 /* fall-through */
4295 case CL_TYPE_ZIP:
4296 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP))
4297 ret = cli_unzip(ctx);
4298 break;
4299
4300 case CL_TYPE_GZ:
4301 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GZ))
4302 ret = cli_scangzip(ctx);
4303 break;
4304
4305 case CL_TYPE_BZ:
4306 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_BZ))
4307 ret = cli_scanbzip(ctx);
4308 break;
4309
4310 case CL_TYPE_XZ:
4311 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XZ))
4312 ret = cli_scanxz(ctx);
4313 break;
4314
4315 case CL_TYPE_GPT:
4316 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GPT))
4317 ret = cli_scangpt(ctx, 0);
4318 break;
4319
4320 case CL_TYPE_APM:
4321 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_APM))
4322 ret = cli_scanapm(ctx);
4323 break;
4324
4325 case CL_TYPE_ARJ:
4326 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ARJ))
4327 ret = cli_scanarj(ctx);
4328 break;
4329
4330 case CL_TYPE_NULSFT:
4331 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_NSIS))
4332 ret = cli_scannulsft(ctx, 0);
4333 break;
4334
4335 case CL_TYPE_AUTOIT:
4336 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_AUTOIT))
4337 ret = cli_scanautoit(ctx, 23);
4338 break;
4339
4340 case CL_TYPE_MSSZDD:
4341 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_SZDD))
4342 ret = cli_scanszdd(ctx);
4343 break;
4344
4345 case CL_TYPE_MSCAB:
4346 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB))
4347 ret = cli_scanmscab(ctx, 0);
4348 break;
4349
4350 case CL_TYPE_HTML:
4351 if (SCAN_PARSE_HTML && (DCONF_DOC & DOC_CONF_HTML))
4352 ret = cli_scanhtml(ctx);
4353 break;
4354
4355 case CL_TYPE_HTML_UTF16:
4356 if (SCAN_PARSE_HTML && (DCONF_DOC & DOC_CONF_HTML))
4357 ret = cli_scanhtml_utf16(ctx);
4358 break;
4359
4360 case CL_TYPE_SCRIPT:
4361 if ((DCONF_DOC & DOC_CONF_SCRIPT) && dettype != CL_TYPE_HTML)
4362 ret = cli_scanscript(ctx);
4363 break;
4364
4365 case CL_TYPE_SWF:
4366 if (SCAN_PARSE_SWF && (DCONF_DOC & DOC_CONF_SWF))
4367 ret = cli_scanswf(ctx);
4368 break;
4369
4370 case CL_TYPE_RTF:
4371 if (SCAN_PARSE_ARCHIVE && (DCONF_DOC & DOC_CONF_RTF))
4372 ret = cli_scanrtf(ctx);
4373 break;
4374
4375 case CL_TYPE_MAIL:
4376 if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX))
4377 ret = cli_scanmail(ctx);
4378 break;
4379
4380 case CL_TYPE_MHTML:
4381 if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX))
4382 ret = cli_scanmail(ctx);
4383 break;
4384
4385 case CL_TYPE_TNEF:
4386 if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_TNEF))
4387 ret = cli_scantnef(ctx);
4388 break;
4389
4390 case CL_TYPE_UUENCODED:
4391 if (DCONF_OTHER & OTHER_CONF_UUENC)
4392 ret = cli_scanuuencoded(ctx);
4393 break;
4394
4395 case CL_TYPE_MSCHM:
4396 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CHM))
4397 ret = cli_scanmschm(ctx);
4398 break;
4399
4400 case CL_TYPE_MSOLE2:
4401 if (SCAN_PARSE_OLE2 && (DCONF_ARCH & ARCH_CONF_OLE2))
4402 ret = cli_scanole2(ctx);
4403 break;
4404
4405 case CL_TYPE_7Z:
4406 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_7Z))
4407 ret = cli_7unz(ctx, 0);
4408 break;
4409
4410 case CL_TYPE_POSIX_TAR:
4411 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_TAR))
4412 ret = cli_scantar(ctx, 1);
4413 break;
4414
4415 case CL_TYPE_OLD_TAR:
4416 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_TAR))
4417 ret = cli_scantar(ctx, 0);
4418 break;
4419
4420 case CL_TYPE_CPIO_OLD:
4421 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4422 ret = cli_scancpio_old(ctx);
4423 break;
4424
4425 case CL_TYPE_CPIO_ODC:
4426 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4427 ret = cli_scancpio_odc(ctx);
4428 break;
4429
4430 case CL_TYPE_CPIO_NEWC:
4431 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4432 ret = cli_scancpio_newc(ctx, 0);
4433 break;
4434
4435 case CL_TYPE_CPIO_CRC:
4436 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4437 ret = cli_scancpio_newc(ctx, 1);
4438 break;
4439
4440 case CL_TYPE_BINHEX:
4441 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_BINHEX))
4442 ret = cli_binhex(ctx);
4443 break;
4444
4445 case CL_TYPE_SCRENC:
4446 if (DCONF_OTHER & OTHER_CONF_SCRENC)
4447 ret = cli_scanscrenc(ctx);
4448 break;
4449
4450 case CL_TYPE_RIFF:
4451 if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_RIFF))
4452 ret = cli_scanriff(ctx);
4453 break;
4454
4455 case CL_TYPE_GRAPHICS:
4456 /*
4457 * This case is for unhandled graphics types such as BMP, JPEG 2000, etc.
4458 *
4459 * Note: JPEG 2000 is a very different format from JPEG, JPEG/JFIF, JPEG/Exif, JPEG/SPIFF (1994, 1997)
4460 * JPEG 2000 is not handled by cli_scanjpeg or cli_parsejpeg.
4461 */
4462 break;
4463
4464 case CL_TYPE_GIF:
4465 if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA && (DCONF_OTHER & OTHER_CONF_GIF))
4466 ret = cli_parsegif(ctx);
4467 break;
4468
4469 case CL_TYPE_PNG:
4470 if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_PNG))
4471 ret = cli_parsepng(ctx); /* PNG parser detects a couple CVE's as well as Broken.Media */
4472 break;
4473
4474 case CL_TYPE_JPEG:
4475 if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_JPEG))
4476 ret = cli_parsejpeg(ctx); /* JPG parser detects MS04-028 exploits as well as Broken.Media */
4477 break;
4478
4479 case CL_TYPE_TIFF:
4480 if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA && (DCONF_OTHER & OTHER_CONF_TIFF) && ret != CL_VIRUS)
4481 ret = cli_parsetiff(ctx);
4482 break;
4483
4484 case CL_TYPE_PDF: /* FIXMELIMITS: pdf should be an archive! */
4485 if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF))
4486 ret = cli_scanpdf(ctx, 0);
4487 break;
4488
4489 case CL_TYPE_CRYPTFF:
4490 if (DCONF_OTHER & OTHER_CONF_CRYPTFF)
4491 ret = cli_scancryptff(ctx);
4492 break;
4493
4494 case CL_TYPE_ELF:
4495 if (SCAN_PARSE_ELF && ctx->dconf->elf)
4496 ret = cli_scanelf(ctx);
4497 break;
4498
4499 case CL_TYPE_MACHO:
4500 if (ctx->dconf->macho)
4501 ret = cli_scanmacho(ctx, NULL);
4502 break;
4503
4504 case CL_TYPE_MACHO_UNIBIN:
4505 if (ctx->dconf->macho)
4506 ret = cli_scanmacho_unibin(ctx);
4507 break;
4508
4509 case CL_TYPE_SIS:
4510 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_SIS))
4511 ret = cli_scansis(ctx);
4512 break;
4513
4514 case CL_TYPE_XAR:
4515 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XAR))
4516 ret = cli_scanxar(ctx);
4517 break;
4518
4519 case CL_TYPE_PART_HFSPLUS:
4520 if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_HFSPLUS))
4521 ret = cli_scanhfsplus(ctx);
4522 break;
4523
4524 case CL_TYPE_BINARY_DATA:
4525 case CL_TYPE_TEXT_UTF16BE:
4526 if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_MYDOOMLOG))
4527 ret = cli_check_mydoom_log(ctx);
4528 break;
4529
4530 case CL_TYPE_TEXT_ASCII:
4531 if (SCAN_HEURISTIC_STRUCTURED && (DCONF_OTHER & OTHER_CONF_DLP))
4532 /* TODO: consider calling this from cli_scanscript() for
4533 * a normalised text
4534 */
4535
4536 ret = cli_scan_structured(ctx);
4537 break;
4538
4539 default:
4540 break;
4541 }
4542 perf_nested_stop(ctx, PERFT_CONTAINER, PERFT_SCAN);
4543
4544 /*
4545 * Perform the raw scan, which may include file type recognition signatures.
4546 */
4547 if ((ret == CL_VIRUS && !SCAN_ALLMATCHES) ||
4548 (ctx->abort_scan)) {
4549 goto done;
4550 }
4551
4552 /* Disable type recognition for the raw scan for zip files larger than maxziptypercg */
4553 if (type == CL_TYPE_ZIP && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) {
4554 /* CL_ENGINE_MAX_ZIPTYPERCG */
4555 uint64_t curr_len = ctx->fmap->len;
4556 if (curr_len > ctx->engine->maxziptypercg) {
4557 cli_dbgmsg("cli_magic_scan_desc: Not checking for embedded PEs (zip file > MaxZipTypeRcg)\n");
4558 typercg = 0;
4559 }
4560 }
4561
4562 /* CL_TYPE_HTML: raw HTML files are not scanned, unless safety measure activated via DCONF */
4563 if (type != CL_TYPE_IGNORED && (type != CL_TYPE_HTML || !(SCAN_PARSE_HTML) || !(DCONF_DOC & DOC_CONF_HTML_SKIPRAW)) && !ctx->engine->sdb) {
4564 res = scanraw(ctx, type, typercg, &dettype, (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) ? NULL : hash);
4565 if (res != CL_CLEAN) {
4566 switch (res) {
4567 /* List of scan halts, runtime errors only! */
4568 case CL_EUNLINK:
4569 case CL_ESTAT:
4570 case CL_ESEEK:
4571 case CL_EWRITE:
4572 case CL_EDUP:
4573 case CL_ETMPFILE:
4574 case CL_ETMPDIR:
4575 case CL_EMEM:
4576 cli_dbgmsg("Descriptor[%d]: scanraw error %s\n", fmap_fd(ctx->fmap), cl_strerror(res));
4577 ret = res;
4578 goto done;
4579 /* CL_VIRUS = malware found, check FP and report.
4580 * Likewise, if the file was determined to be trusted, then we
4581 * can also finish with the scan. (Ex: EXE with a valid
4582 * Authenticode sig.) */
4583 case CL_VERIFIED:
4584 // For now just conver CL_VERIFIED to CL_CLEAN, since
4585 // CL_VERIFIED isn't used elsewhere
4586 res = CL_CLEAN;
4587 // Fall through
4588 case CL_VIRUS:
4589 ret = res;
4590 if (SCAN_ALLMATCHES)
4591 break;
4592 goto done;
4593 /* All other "MAX" conditions should still fully scan the current file */
4594 case CL_ETIMEOUT:
4595 case CL_EMAXREC:
4596 case CL_EMAXSIZE:
4597 case CL_EMAXFILES:
4598 ret = res;
4599 cli_dbgmsg("Descriptor[%d]: Continuing after scanraw reached %s\n",
4600 fmap_fd(ctx->fmap), cl_strerror(res));
4601 break;
4602 /* Other errors must not block further scans below
4603 * This specifically includes CL_EFORMAT & CL_EREAD & CL_EUNPACK
4604 * Malformed/truncated files could report as any of these three.
4605 */
4606 default:
4607 ret = res;
4608 cli_dbgmsg("Descriptor[%d]: Continuing after scanraw error %s\n",
4609 fmap_fd(ctx->fmap), cl_strerror(res));
4610 }
4611 }
4612 }
4613
4614 /* Make sure we bail out if required. */
4615 if (ctx->abort_scan) {
4616 goto done;
4617 }
4618
4619 /*
4620 * Now run the rest of the file type parsers.
4621 */
4622 switch (type) {
4623 /* bytecode hooks triggered by a lsig must be a hook
4624 * called from one of the functions here */
4625 case CL_TYPE_TEXT_ASCII:
4626 case CL_TYPE_TEXT_UTF16BE:
4627 case CL_TYPE_TEXT_UTF16LE:
4628 case CL_TYPE_TEXT_UTF8:
4629 perf_nested_start(ctx, PERFT_SCRIPT, PERFT_SCAN);
4630 if ((DCONF_DOC & DOC_CONF_SCRIPT) && dettype != CL_TYPE_HTML && (ret != CL_VIRUS || SCAN_ALLMATCHES) && SCAN_PARSE_HTML)
4631 ret = cli_scanscript(ctx);
4632 if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX) && ret != CL_VIRUS && (cli_recursion_stack_get_type(ctx, -1) == CL_TYPE_MAIL || dettype == CL_TYPE_MAIL)) {
4633 ret = cli_scan_fmap(ctx, CL_TYPE_MAIL, 0, NULL, AC_SCAN_VIR, NULL, NULL);
4634 }
4635 perf_nested_stop(ctx, PERFT_SCRIPT, PERFT_SCAN);
4636 break;
4637 /* Due to performance reasons all executables were first scanned
4638 * in raw mode. Now we will try to unpack them
4639 */
4640 case CL_TYPE_MSEXE:
4641 perf_nested_start(ctx, PERFT_PE, PERFT_SCAN);
4642 if (SCAN_PARSE_PE && ctx->dconf->pe) {
4643 unsigned int corrupted_input = ctx->corrupted_input;
4644 ret = cli_scanpe(ctx);
4645 ctx->corrupted_input = corrupted_input;
4646 }
4647 perf_nested_stop(ctx, PERFT_PE, PERFT_SCAN);
4648 break;
4649 case CL_TYPE_ELF:
4650 perf_nested_start(ctx, PERFT_ELF, PERFT_SCAN);
4651 ret = cli_unpackelf(ctx);
4652 perf_nested_stop(ctx, PERFT_ELF, PERFT_SCAN);
4653 break;
4654 case CL_TYPE_MACHO:
4655 case CL_TYPE_MACHO_UNIBIN:
4656 perf_nested_start(ctx, PERFT_MACHO, PERFT_SCAN);
4657 ret = cli_unpackmacho(ctx);
4658 perf_nested_stop(ctx, PERFT_MACHO, PERFT_SCAN);
4659 break;
4660 case CL_TYPE_BINARY_DATA:
4661 ret = cli_scan_fmap(ctx, CL_TYPE_OTHER, 0, NULL, AC_SCAN_VIR, NULL, NULL);
4662 break;
4663 default:
4664 break;
4665 }
4666
4667 done:
4668 switch (ret) {
4669 /*
4670 * Limits exceeded
4671 */
4672 // Exceeding these maximums means we have to stop scanning:
4673 case CL_ETIMEOUT:
4674 case CL_EMAXFILES:
4675 ctx->abort_scan = true;
4676 cli_dbgmsg("Descriptor[%d]: %s\n", fmap_fd(ctx->fmap), cl_strerror(ret));
4677 ret = CL_CLEAN;
4678 break;
4679 // Exceeding these maximums means we had to skip an embedded file:
4680 case CL_EMAXREC:
4681 case CL_EMAXSIZE:
4682 cli_dbgmsg("Descriptor[%d]: %s\n", fmap_fd(ctx->fmap), cl_strerror(ret));
4683 ret = CL_CLEAN;
4684 break;
4685
4686 /*
4687 * Malformed file cases
4688 */
4689 case CL_EFORMAT:
4690 case CL_EREAD:
4691 case CL_EUNPACK:
4692 cli_dbgmsg("Descriptor[%d]: %s\n", fmap_fd(ctx->fmap), cl_strerror(ret));
4693 ret = CL_CLEAN;
4694 break;
4695
4696 case CL_CLEAN:
4697 cache_clean = 1;
4698 break;
4699
4700 default:
4701 break;
4702 }
4703
4704 if (old_hook_lsig_matches) {
4705 /* We need to restore the old hook_lsig_matches */
4706 cli_bitset_free(ctx->hook_lsig_matches); // safe to call, even if NULL
4707 ctx->hook_lsig_matches = old_hook_lsig_matches;
4708 }
4709
4710 #if HAVE_JSON
4711 ctx->wrkproperty = (struct json_object *)(parent_property);
4712 #endif
4713
4714 if (ret == CL_CLEAN && ctx->found_possibly_unwanted) {
4715 cb_retcode = CL_VIRUS;
4716 } else {
4717 if (ret == CL_CLEAN && ctx->num_viruses != 0)
4718 cb_retcode = CL_VIRUS;
4719 else
4720 cb_retcode = ret;
4721 }
4722
4723 cli_dbgmsg("cli_magic_scan_desc: returning %d %s\n", ret, __AT__);
4724 if (ctx->engine->cb_post_scan) {
4725 const char *virusname = NULL;
4726 perf_start(ctx, PERFT_POSTCB);
4727 if (cb_retcode == CL_VIRUS)
4728 virusname = cli_get_last_virus(ctx);
4729 switch (ctx->engine->cb_post_scan(fmap_fd(ctx->fmap), cb_retcode, virusname, ctx->cb_ctx)) {
4730 case CL_BREAK:
4731 cli_dbgmsg("cli_magic_scan_desc: file whitelisted by post_scan callback\n");
4732 perf_stop(ctx, PERFT_POSTCB);
4733 ret = CL_CLEAN;
4734 break;
4735 case CL_VIRUS:
4736 cli_dbgmsg("cli_magic_scan_desc: file blacklisted by post_scan callback\n");
4737 cli_append_virus(ctx, "Detected.By.Callback");
4738 perf_stop(ctx, PERFT_POSTCB);
4739 if (ret != CL_VIRUS) {
4740 ret = cli_check_fp(ctx, NULL);
4741 }
4742 break;
4743 case CL_CLEAN:
4744 break;
4745 default:
4746 cli_warnmsg("cli_magic_scan_desc: ignoring bad return code from post_scan callback\n");
4747 }
4748 perf_stop(ctx, PERFT_POSTCB);
4749 }
4750
4751 if (cb_retcode == CL_CLEAN && cache_clean && !ctx->fmap->dont_cache_flag && !SCAN_COLLECT_METADATA) {
4752 perf_start(ctx, PERFT_CACHE);
4753 cache_add(hash, hashed_size, ctx);
4754 perf_stop(ctx, PERFT_CACHE);
4755 }
4756
4757 if (ret == CL_VIRUS && SCAN_ALLMATCHES) {
4758 ret = CL_CLEAN;
4759 }
4760
4761 early_ret:
4762
4763 if ((ctx->engine->keeptmp) && (NULL != old_temp_path)) {
4764 /* Use rmdir to remove empty tmp subdirectories. If rmdir fails, it wasn't empty. */
4765 (void)rmdir(ctx->sub_tmpdir);
4766
4767 free((void *)ctx->sub_tmpdir);
4768 ctx->sub_tmpdir = old_temp_path;
4769 }
4770
4771 #if HAVE_JSON
4772 if (NULL != parent_property) {
4773 ctx->wrkproperty = (struct json_object *)(parent_property);
4774 }
4775 #endif
4776
4777 return ret;
4778 }
4779
4780 cl_error_t cli_magic_scan_desc_type(int desc, const char *filepath, cli_ctx *ctx, cli_file_t type, const char *name)
4781 {
4782 STATBUF sb;
4783 cl_error_t status = CL_CLEAN;
4784 fmap_t *new_map = NULL;
4785
4786 if (!ctx) {
4787 return CL_EARG;
4788 }
4789
4790 const char *parent_filepath = ctx->sub_filepath;
4791 ctx->sub_filepath = filepath;
4792
4793 #ifdef HAVE__INTERNAL__SHA_COLLECT
4794 if (ctx->sha_collect > 0)
4795 ctx->sha_collect = 0;
4796 #endif
4797
4798 cli_dbgmsg("in cli_magic_scan_desc_type (recursion_level: %u/%u)\n", ctx->recursion_level, ctx->engine->max_recursion_level);
4799
4800 if (FSTAT(desc, &sb) == -1) {
4801 cli_errmsg("cli_magic_scan: Can't fstat descriptor %d\n", desc);
4802
4803 status = CL_ESTAT;
4804 cli_dbgmsg("cli_magic_scan_desc_type: returning %d %s (no post, no cache)\n", status, __AT__);
4805 goto done;
4806 }
4807 if (sb.st_size <= 5) {
4808 cli_dbgmsg("Small data (%u bytes)\n", (unsigned int)sb.st_size);
4809
4810 status = CL_CLEAN;
4811 cli_dbgmsg("cli_magic_scan_desc_type: returning %d %s (no post, no cache)\n", status, __AT__);
4812 goto done;
4813 }
4814
4815 perf_start(ctx, PERFT_MAP);
4816 new_map = fmap(desc, 0, sb.st_size, name);
4817 perf_stop(ctx, PERFT_MAP);
4818 if (NULL == new_map) {
4819 cli_errmsg("CRITICAL: fmap() failed\n");
4820 status = CL_EMEM;
4821 cli_dbgmsg("cli_magic_scan_desc_type: returning %d %s (no post, no cache)\n", status, __AT__);
4822 goto done;
4823 }
4824
4825 status = cli_recursion_stack_push(ctx, new_map, type, true); /* Perform scan with child fmap */
4826 if (CL_SUCCESS != status) {
4827 cli_dbgmsg("Failed to scan fmap.\n");
4828 goto done;
4829 }
4830
4831 status = cli_magic_scan(ctx, type);
4832
4833 (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
4834
4835 done:
4836 if (NULL != new_map) {
4837 funmap(new_map);
4838 }
4839
4840 ctx->sub_filepath = parent_filepath;
4841
4842 return status;
4843 }
4844
4845 cl_error_t cli_magic_scan_desc(int desc, const char *filepath, cli_ctx *ctx, const char *name)
4846 {
4847 return cli_magic_scan_desc_type(desc, filepath, ctx, CL_TYPE_ANY, name);
4848 }
4849
4850 cl_error_t cl_scandesc(int desc, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions)
4851 {
4852 return cl_scandesc_callback(desc, filename, virname, scanned, engine, scanoptions, NULL);
4853 }
4854
4855 /**
4856 * @brief Scan an offset/length into a file map.
4857 *
4858 * Magic-scan some portion of an existing fmap.
4859 *
4860 * @param map File map.
4861 * @param offset Offset into file map.
4862 * @param length Length from offset.
4863 * @param ctx Scanning context structure.
4864 * @param type CL_TYPE of data to be scanned.
4865 * @param name (optional) Original name of the file (to set fmap name metadata)
4866 * @return int CL_SUCCESS, or an error code.
4867 */
4868 static cl_error_t magic_scan_nested_fmap_type(cl_fmap_t *map, size_t offset, size_t length, cli_ctx *ctx, cli_file_t type, const char *name)
4869 {
4870 cl_error_t status = CL_CLEAN;
4871 fmap_t *new_map = NULL;
4872
4873 cli_dbgmsg("magic_scan_nested_fmap_type: [0, +%zu), [%zu, +%zu)\n",
4874 map->len, offset, length);
4875
4876 if (offset >= map->len) {
4877 cli_dbgmsg("magic_scan_nested_fmap_type: Invalid offset: %zu\n", offset);
4878 goto done;
4879 }
4880
4881 if (!length)
4882 length = map->len - offset;
4883
4884 if (length > map->len - offset) {
4885 cli_dbgmsg("magic_scan_nested_fmap_type: Data truncated: %zu -> %zu\n",
4886 length, map->len - offset);
4887 length = map->len - offset;
4888 }
4889
4890 if (length <= 5) {
4891 cli_dbgmsg("magic_scan_nested_fmap_type: Small data (%zu bytes)\n", length);
4892 goto done;
4893 }
4894
4895 new_map = fmap_duplicate(map, offset, length, name);
4896 if (NULL == new_map) {
4897 cli_dbgmsg("magic_scan_nested_fmap_type: Failed to duplicate fmap for scan of fmap subsection\n");
4898 goto done;
4899 }
4900
4901 status = cli_recursion_stack_push(ctx, new_map, type, false); /* Perform scan with child fmap */
4902 if (CL_SUCCESS != status) {
4903 cli_dbgmsg("magic_scan_nested_fmap_type: Failed to add map to recursion stack for magic scan.\n");
4904 goto done;
4905 }
4906
4907 status = cli_magic_scan(ctx, type);
4908
4909 (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
4910
4911 done:
4912 if (NULL != new_map) {
4913 free_duplicate_fmap(new_map); /* This fmap is just a duplicate. */
4914 }
4915
4916 return status;
4917 }
4918
4919 /* For map scans that may be forced to disk */
4920 cl_error_t cli_magic_scan_nested_fmap_type(cl_fmap_t *map, size_t offset, size_t length, cli_ctx *ctx, cli_file_t type, const char *name)
4921 {
4922 cl_error_t ret = CL_CLEAN;
4923
4924 cli_dbgmsg("cli_magic_scan_nested_fmap_type: [%zu, +%zu)\n", offset, length);
4925 if (offset >= map->len) {
4926 cli_dbgmsg("Invalid offset: %zu\n", offset);
4927 return CL_CLEAN;
4928 }
4929
4930 if (ctx->engine->engine_options & ENGINE_OPTIONS_FORCE_TO_DISK) {
4931 /*
4932 * Force to disk!
4933 *
4934 * Write the offset + length section of the fmap to disk, and scan it.
4935 */
4936 const uint8_t *mapdata = NULL;
4937 char *tempfile = NULL;
4938 int fd = -1;
4939 size_t nread = 0;
4940
4941 /* Then check length */
4942 if (!length) {
4943 /* Caller didn't specify len, use rest of the map */
4944 length = map->len - offset;
4945 }
4946 if (length > map->len - offset) {
4947 cli_dbgmsg("cli_magic_scan_nested_fmap_type: Data truncated: %zu -> %zu\n", length, map->len - offset);
4948 length = map->len - offset;
4949 }
4950 if (length <= 5) {
4951 cli_dbgmsg("cli_magic_scan_nested_fmap_type: Small data (%u bytes)\n", (unsigned int)length);
4952 return CL_CLEAN;
4953 }
4954 if (!CLI_ISCONTAINED_0_TO(map->len, offset, length)) {
4955 cli_dbgmsg("cli_magic_scan_nested_fmap_type: map error occurred [%zu, %zu] not within [0, %zu]\n", offset, length, map->len);
4956 return CL_CLEAN;
4957 }
4958
4959 /* Length checked, now get map */
4960 mapdata = fmap_need_off_once_len(map, offset, length, &nread);
4961 if (!mapdata || (nread != length)) {
4962 cli_errmsg("cli_magic_scan_nested_fmap_type: could not map sub-file\n");
4963 return CL_EMAP;
4964 }
4965
4966 ret = cli_gentempfd(ctx->sub_tmpdir, &tempfile, &fd);
4967 if (ret != CL_SUCCESS) {
4968 return ret;
4969 }
4970
4971 cli_dbgmsg("cli_magic_scan_nested_fmap_type: writing nested map content to temp file %s\n", tempfile);
4972 if (cli_writen(fd, mapdata, length) == (size_t)-1) {
4973 cli_errmsg("cli_magic_scan_nested_fmap_type: cli_writen error writing subdoc temporary file.\n");
4974 ret = CL_EWRITE;
4975 }
4976
4977 /* scan the temp file */
4978 ret = cli_magic_scan_desc_type(fd, tempfile, ctx, type, name);
4979
4980 /* remove the temp file, if needed */
4981 if (fd >= 0) {
4982 close(fd);
4983 }
4984 if (!ctx->engine->keeptmp) {
4985 if (cli_unlink(tempfile)) {
4986 cli_errmsg("cli_magic_scan_nested_fmap_type: error unlinking tempfile %s\n", tempfile);
4987 ret = CL_EUNLINK;
4988 }
4989 }
4990 free(tempfile);
4991 } else {
4992 /*
4993 * Not forced to disk.
4994 *
4995 * Just use nested map by scanning given fmap at offset + length.
4996 */
4997 ret = magic_scan_nested_fmap_type(map, offset, length, ctx, type, name);
4998 }
4999 return ret;
5000 }
5001
5002 cl_error_t cli_magic_scan_buff(const void *buffer, size_t length, cli_ctx *ctx, const char *name)
5003 {
5004 cl_error_t ret;
5005 fmap_t *map = NULL;
5006
5007 map = fmap_open_memory(buffer, length, name);
5008 if (!map) {
5009 return CL_EMAP;
5010 }
5011
5012 ret = cli_magic_scan_nested_fmap_type(map, 0, length, ctx, CL_TYPE_ANY, name);
5013
5014 funmap(map);
5015
5016 return ret;
5017 }
5018
5019 /**
5020 * @brief The main function to initiate a scan of an fmap.
5021 *
5022 * @param map File map.
5023 * @param filepath (optional, recommended) filepath of the open file descriptor or file map.
5024 * @param[out] virname Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
5025 * @param[out] scanned The number of bytes scanned.
5026 * @param engine The scanning engine.
5027 * @param scanoptions Scanning options.
5028 * @param[in,out] context An opaque context structure allowing the caller to record details about the sample being scanned.
5029 * @return int CL_CLEAN, CL_VIRUS, or an error code if an error occured during the scan.
5030 */
5031 static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5032 {
5033 cl_error_t status;
5034 cli_ctx ctx = {0};
5035
5036 char *target_basename = NULL;
5037 char *new_temp_prefix = NULL;
5038 size_t new_temp_prefix_len;
5039 char *new_temp_path = NULL;
5040
5041 time_t current_time;
5042 struct tm tm_struct;
5043
5044 if (NULL == map) {
5045 return CL_ENULLARG;
5046 }
5047
5048 ctx.engine = engine;
5049 ctx.virname = virname;
5050 ctx.scanned = scanned;
5051 ctx.options = malloc(sizeof(struct cl_scan_options));
5052 memcpy(ctx.options, scanoptions, sizeof(struct cl_scan_options));
5053 ctx.found_possibly_unwanted = 0;
5054
5055 ctx.dconf = (struct cli_dconf *)engine->dconf;
5056 ctx.cb_ctx = context;
5057
5058 if (!(ctx.hook_lsig_matches = cli_bitset_init())) {
5059 status = CL_EMEM;
5060 goto done;
5061 }
5062
5063 ctx.recursion_stack_size = ctx.engine->max_recursion_level;
5064 ctx.recursion_stack = cli_calloc(sizeof(recursion_level_t), ctx.recursion_stack_size);
5065 if (!ctx.recursion_stack) {
5066 status = CL_EMEM;
5067 goto done;
5068 }
5069
5070 // ctx was memset, so recursion_level starts at 0.
5071 ctx.recursion_stack[ctx.recursion_level].fmap = map;
5072 ctx.recursion_stack[ctx.recursion_level].type = CL_TYPE_ANY; // ANY for the top level, because we don't yet know the type.
5073 ctx.recursion_stack[ctx.recursion_level].size = map->len;
5074
5075 ctx.fmap = ctx.recursion_stack[ctx.recursion_level].fmap;
5076
5077 perf_init(&ctx);
5078
5079 if (ctx.engine->maxscantime != 0) {
5080 if (gettimeofday(&ctx.time_limit, NULL) == 0) {
5081 uint32_t secs = ctx.engine->maxscantime / 1000;
5082 uint32_t usecs = (ctx.engine->maxscantime % 1000) * 1000;
5083 ctx.time_limit.tv_sec += secs;
5084 ctx.time_limit.tv_usec += usecs;
5085 if (ctx.time_limit.tv_usec >= 1000000) {
5086 ctx.time_limit.tv_usec -= 1000000;
5087 ctx.time_limit.tv_sec++;
5088 }
5089 } else {
5090 char buf[64];
5091 cli_dbgmsg("scan_common: gettimeofday error: %s\n", cli_strerror(errno, buf, 64));
5092 }
5093 }
5094
5095 if (filepath != NULL) {
5096 ctx.target_filepath = strdup(filepath);
5097 }
5098
5099 /*
5100 * Create a tmp sub-directory for the temp files generated by this scan.
5101 *
5102 * If keeptmp (LeaveTemporaryFiles / --leave-temps) is enabled, we'll include the
5103 * basename in the tmp directory.
5104 * If keeptmp is not enabled, we'll just call it "scantemp".
5105 */
5106 current_time = time(NULL);
5107
5108 #ifdef _WIN32
5109 if (0 != localtime_s(&tm_struct, ¤t_time)) {
5110 #else
5111 if (!localtime_r(¤t_time, &tm_struct)) {
5112 #endif
5113 cli_errmsg("scan_common: Failed to get local time.\n");
5114 status = CL_ESTAT;
5115 goto done;
5116 }
5117
5118 if ((ctx.engine->keeptmp) &&
5119 (NULL != ctx.target_filepath) &&
5120 (CL_SUCCESS == cli_basename(ctx.target_filepath, strlen(ctx.target_filepath), &target_basename))) {
5121 /* Include the basename in the temp directory */
5122 new_temp_prefix_len = strlen("YYYYMMDD_HHMMSS-") + strlen(target_basename);
5123 new_temp_prefix = cli_calloc(1, new_temp_prefix_len + 1);
5124 if (!new_temp_prefix) {
5125 cli_errmsg("scan_common: Failed to allocate memory for temp directory name.\n");
5126 status = CL_EMEM;
5127 goto done;
5128 }
5129 strftime(new_temp_prefix, new_temp_prefix_len, "%Y%m%d_%H%M%S-", &tm_struct);
5130 strcpy(new_temp_prefix + strlen("YYYYMMDD_HHMMSS-"), target_basename);
5131 } else {
5132 /* Just use date */
5133 new_temp_prefix_len = strlen("YYYYMMDD_HHMMSS-scantemp");
5134 new_temp_prefix = cli_calloc(1, new_temp_prefix_len + 1);
5135 if (!new_temp_prefix) {
5136 cli_errmsg("scan_common: Failed to allocate memory for temp directory name.\n");
5137 status = CL_EMEM;
5138 goto done;
5139 }
5140 strftime(new_temp_prefix, new_temp_prefix_len, "%Y%m%d_%H%M%S-scantemp", &tm_struct);
5141 }
5142
5143 /* Place the new temp sub-directory within the configured temp directory */
5144 new_temp_path = cli_gentemp_with_prefix(ctx.engine->tmpdir, new_temp_prefix);
5145 free(new_temp_prefix);
5146 if (NULL == new_temp_path) {
5147 cli_errmsg("scan_common: Failed to generate temp directory name.\n");
5148 status = CL_EMEM;
5149 goto done;
5150 }
5151
5152 ctx.sub_tmpdir = new_temp_path;
5153
5154 if (mkdir(ctx.sub_tmpdir, 0700)) {
5155 cli_errmsg("Can't create temporary directory for scan: %s.\n", ctx.sub_tmpdir);
5156 status = CL_EACCES;
5157 goto done;
5158 }
5159
5160 cli_logg_setup(&ctx);
5161
5162 /* We have a limit of around 2GB (INT_MAX - 2). Enforce it here. */
5163 /* TODO: Large file support is large-ly untested. Remove this restriction
5164 * and test with a large set of large files of various types. libclamav's
5165 * integer type safety has come a long way since 2014, so it's possible
5166 * we could lift this restriction, but at least one of the parsers is
5167 * bound to behave badly with large files. */
5168 if (map->len > INT_MAX - 2) {
5169 if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5170 status = cli_append_virus(&ctx, "Heuristics.Limits.Exceeded.MaxFileSize");
5171 } else {
5172 status = CL_CLEAN;
5173 }
5174 goto done;
5175 }
5176
5177 status = cli_magic_scan(&ctx, CL_TYPE_ANY);
5178
5179 if (status == CL_CLEAN && ctx.found_possibly_unwanted) {
5180 cli_virus_found_cb(&ctx);
5181 }
5182
5183 #if HAVE_JSON
5184 if (ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA && (ctx.properties != NULL)) {
5185 json_object *jobj;
5186 const char *jstring;
5187
5188 /* set value of unique root object tag */
5189 if (json_object_object_get_ex(ctx.properties, "FileType", &jobj)) {
5190 enum json_type type;
5191 const char *jstr;
5192
5193 type = json_object_get_type(jobj);
5194 if (type == json_type_string) {
5195 jstr = json_object_get_string(jobj);
5196 cli_jsonstr(ctx.properties, "RootFileType", jstr);
5197 }
5198 }
5199
5200 /* serialize json properties to string */
5201 #ifdef JSON_C_TO_STRING_NOSLASHESCAPE
5202 jstring = json_object_to_json_string_ext(ctx.properties, JSON_C_TO_STRING_PRETTY | JSON_C_TO_STRING_NOSLASHESCAPE);
5203 #else
5204 jstring = json_object_to_json_string_ext(ctx.properties, JSON_C_TO_STRING_PRETTY);
5205 #endif
5206 if (NULL == jstring) {
5207 cli_errmsg("scan_common: no memory for json serialization.\n");
5208 status = CL_EMEM;
5209 } else {
5210 int ret = CL_SUCCESS;
5211 struct cli_matcher *iroot = ctx.engine->root[13];
5212 cli_dbgmsg("%s\n", jstring);
5213
5214 if ((status != CL_VIRUS) || (ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES)) {
5215 /* run bytecode preclass hook; generate fmap if needed for running hook */
5216 struct cli_bc_ctx *bc_ctx = cli_bytecode_context_alloc();
5217 if (!bc_ctx) {
5218 cli_errmsg("scan_common: can't allocate memory for bc_ctx\n");
5219 status = CL_EMEM;
5220 } else {
5221 cli_bytecode_context_setctx(bc_ctx, &ctx);
5222 status = cli_bytecode_runhook(&ctx, ctx.engine, bc_ctx, BC_PRECLASS, map);
5223 cli_bytecode_context_destroy(bc_ctx);
5224 }
5225
5226 /* backwards compatibility: scan the json string unless a virus was detected */
5227 if (status != CL_VIRUS && (iroot->ac_lsigs || iroot->ac_patterns
5228 #ifdef HAVE_PCRE
5229 || iroot->pcre_metas
5230 #endif // HAVE_PCRE
5231 )) {
5232 cli_dbgmsg("scan_common: running deprecated preclass bytecodes for target type 13\n");
5233 ctx.options->general &= ~CL_SCAN_GENERAL_COLLECT_METADATA;
5234 status = cli_magic_scan_buff(jstring, strlen(jstring), &ctx, NULL);
5235 }
5236 }
5237
5238 /* Invoke file props callback */
5239 if (ctx.engine->cb_file_props != NULL) {
5240 ret = ctx.engine->cb_file_props(jstring, status, ctx.cb_ctx);
5241 if (ret != CL_SUCCESS)
5242 status = ret;
5243 }
5244
5245 /* keeptmp file processing for file properties json string */
5246 if (ctx.engine->keeptmp) {
5247 int fd = -1;
5248 char *tmpname = NULL;
5249
5250 if ((ret = cli_newfilepathfd(ctx.sub_tmpdir, "metadata.json", &tmpname, &fd)) != CL_SUCCESS) {
5251 cli_dbgmsg("scan_common: Can't create json properties file, ret = %i.\n", ret);
5252 } else {
5253 if (cli_writen(fd, jstring, strlen(jstring)) == (size_t)-1)
5254 cli_dbgmsg("scan_common: cli_writen error writing json properties file.\n");
5255 else
5256 cli_dbgmsg("json written to: %s\n", tmpname);
5257 }
5258 if (fd != -1)
5259 close(fd);
5260 if (NULL != tmpname)
5261 free(tmpname);
5262 }
5263 }
5264 cli_json_delobj(ctx.properties); /* frees all json memory */
5265 }
5266 #endif // HAVE_JSON
5267
5268 if (status == CL_CLEAN) {
5269 if ((ctx.found_possibly_unwanted) ||
5270 ((ctx.num_viruses != 0) &&
5271 ((ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES) ||
5272 (ctx.options->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX)))) {
5273 status = CL_VIRUS;
5274 }
5275 }
5276
5277 cli_logg_unsetup();
5278
5279 done:
5280 if (NULL != ctx.sub_tmpdir) {
5281 if (!ctx.engine->keeptmp) {
5282 (void)cli_rmdirs(ctx.sub_tmpdir);
5283 }
5284 free(ctx.sub_tmpdir);
5285 }
5286
5287 if (NULL != target_basename) {
5288 free(target_basename);
5289 }
5290
5291 if (NULL != ctx.target_filepath) {
5292 free(ctx.target_filepath);
5293 }
5294
5295 if (NULL != ctx.perf) {
5296 perf_done(&ctx);
5297 }
5298
5299 if (NULL != ctx.hook_lsig_matches) {
5300 cli_bitset_free(ctx.hook_lsig_matches);
5301 }
5302
5303 if (NULL != ctx.recursion_stack) {
5304 free(ctx.recursion_stack);
5305 }
5306
5307 if (NULL != ctx.options) {
5308 free(ctx.options);
5309 }
5310
5311 return status;
5312 }
5313
5314 cl_error_t cl_scandesc_callback(int desc, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5315 {
5316 cl_error_t status = CL_SUCCESS;
5317 cl_fmap_t *map = NULL;
5318 STATBUF sb;
5319 char *filename_base = NULL;
5320
5321 if (FSTAT(desc, &sb) == -1) {
5322 cli_errmsg("cl_scandesc_callback: Can't fstat descriptor %d\n", desc);
5323 status = CL_ESTAT;
5324 goto done;
5325 }
5326 if (sb.st_size <= 5) {
5327 cli_dbgmsg("cl_scandesc_callback: File too small (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
5328 status = CL_CLEAN;
5329 goto done;
5330 }
5331 if ((engine->maxfilesize > 0) && ((uint64_t)sb.st_size > engine->maxfilesize)) {
5332 cli_dbgmsg("cl_scandesc_callback: File too large (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
5333 if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5334 if (engine->cb_virus_found)
5335 engine->cb_virus_found(desc, "Heuristics.Limits.Exceeded.MaxFileSize", context);
5336 status = CL_VIRUS;
5337 } else {
5338 status = CL_CLEAN;
5339 }
5340 goto done;
5341 }
5342
5343 if (NULL != filename) {
5344 (void)cli_basename(filename, strlen(filename), &filename_base);
5345 }
5346
5347 if (NULL == (map = fmap(desc, 0, sb.st_size, filename_base))) {
5348 cli_errmsg("CRITICAL: fmap() failed\n");
5349 status = CL_EMEM;
5350 goto done;
5351 }
5352
5353 status = scan_common(map, filename, virname, scanned, engine, scanoptions, context);
5354
5355 done:
5356 if (NULL != map) {
5357 funmap(map);
5358 }
5359 if (NULL != filename_base) {
5360 free(filename_base);
5361 }
5362
5363 return status;
5364 }
5365
5366 cl_error_t cl_scanmap_callback(cl_fmap_t *map, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5367 {
5368 if ((engine->maxfilesize > 0) && (map->len > engine->maxfilesize)) {
5369 cli_dbgmsg("cl_scandesc_callback: File too large (%zu bytes), ignoring\n", map->len);
5370 if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5371 if (engine->cb_virus_found)
5372 engine->cb_virus_found(fmap_fd(map), "Heuristics.Limits.Exceeded.MaxFileSize", context);
5373 return CL_VIRUS;
5374 }
5375 return CL_CLEAN;
5376 }
5377
5378 return scan_common(map, filename, virname, scanned, engine, scanoptions, context);
5379 }
5380
5381 cl_error_t cli_found_possibly_unwanted(cli_ctx *ctx)
5382 {
5383 if (cli_get_last_virus(ctx)) {
5384 cli_dbgmsg("found Possibly Unwanted: %s\n", cli_get_last_virus(ctx));
5385 if (SCAN_HEURISTIC_PRECEDENCE) {
5386 /* we found a heuristic match, don't scan further,
5387 * but consider it a virus. */
5388 cli_dbgmsg("cli_found_possibly_unwanted: CL_VIRUS\n");
5389 return CL_VIRUS;
5390 }
5391 /* heuristic scan isn't taking precedence, keep scanning.
5392 * If this is part of an archive, and
5393 * we find a real malware we report that instead of the
5394 * heuristic match */
5395 ctx->found_possibly_unwanted = 1;
5396 } else {
5397 cli_warnmsg("cli_found_possibly_unwanted called, but virname is not set\n");
5398 }
5399 emax_reached(ctx);
5400 return CL_CLEAN;
5401 }
5402
5403 cl_error_t cli_magic_scan_file(const char *filename, cli_ctx *ctx, const char *original_name)
5404 {
5405 int fd = -1;
5406 cl_error_t ret = CL_EOPEN;
5407
5408 /* internal version of cl_scanfile with arec/mrec preserved */
5409 fd = safe_open(filename, O_RDONLY | O_BINARY);
5410 if (fd < 0) {
5411 goto done;
5412 }
5413
5414 ret = cli_magic_scan_desc(fd, filename, ctx, original_name);
5415
5416 done:
5417 if (fd >= 0) {
5418 close(fd);
5419 }
5420 return ret;
5421 }
5422
5423 cl_error_t cl_scanfile(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions)
5424 {
5425 return cl_scanfile_callback(filename, virname, scanned, engine, scanoptions, NULL);
5426 }
5427
5428 cl_error_t cl_scanfile_callback(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5429 {
5430 int fd;
5431 cl_error_t ret;
5432 const char *fname = cli_to_utf8_maybe_alloc(filename);
5433
5434 if (!fname)
5435 return CL_EARG;
5436
5437 if ((fd = safe_open(fname, O_RDONLY | O_BINARY)) == -1)
5438 return CL_EOPEN;
5439
5440 if (fname != filename)
5441 free((char *)fname);
5442
5443 ret = cl_scandesc_callback(fd, filename, virname, scanned, engine, scanoptions, context);
5444 close(fd);
5445
5446 return ret;
5447 }
5448
5449 /*
5450 Local Variables:
5451 c-basic-offset: 4
5452 End:
5453 */
5454